diff options
Diffstat (limited to 'block/ll_rw_blk.c')
-rw-r--r-- | block/ll_rw_blk.c | 536 |
1 files changed, 327 insertions, 209 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index d4beb9a89ee0..91d3b4828c49 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -36,6 +36,8 @@ | |||
36 | static void blk_unplug_work(void *data); | 36 | static void blk_unplug_work(void *data); |
37 | static void blk_unplug_timeout(unsigned long data); | 37 | static void blk_unplug_timeout(unsigned long data); |
38 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); | 38 | static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); |
39 | static void init_request_from_bio(struct request *req, struct bio *bio); | ||
40 | static int __make_request(request_queue_t *q, struct bio *bio); | ||
39 | 41 | ||
40 | /* | 42 | /* |
41 | * For the allocated request tables | 43 | * For the allocated request tables |
@@ -288,8 +290,8 @@ static inline void rq_init(request_queue_t *q, struct request *rq) | |||
288 | 290 | ||
289 | /** | 291 | /** |
290 | * blk_queue_ordered - does this queue support ordered writes | 292 | * blk_queue_ordered - does this queue support ordered writes |
291 | * @q: the request queue | 293 | * @q: the request queue |
292 | * @flag: see below | 294 | * @ordered: one of QUEUE_ORDERED_* |
293 | * | 295 | * |
294 | * Description: | 296 | * Description: |
295 | * For journalled file systems, doing ordered writes on a commit | 297 | * For journalled file systems, doing ordered writes on a commit |
@@ -298,28 +300,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq) | |||
298 | * feature should call this function and indicate so. | 300 | * feature should call this function and indicate so. |
299 | * | 301 | * |
300 | **/ | 302 | **/ |
301 | void blk_queue_ordered(request_queue_t *q, int flag) | 303 | int blk_queue_ordered(request_queue_t *q, unsigned ordered, |
302 | { | 304 | prepare_flush_fn *prepare_flush_fn) |
303 | switch (flag) { | 305 | { |
304 | case QUEUE_ORDERED_NONE: | 306 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && |
305 | if (q->flush_rq) | 307 | prepare_flush_fn == NULL) { |
306 | kmem_cache_free(request_cachep, q->flush_rq); | 308 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); |
307 | q->flush_rq = NULL; | 309 | return -EINVAL; |
308 | q->ordered = flag; | 310 | } |
309 | break; | 311 | |
310 | case QUEUE_ORDERED_TAG: | 312 | if (ordered != QUEUE_ORDERED_NONE && |
311 | q->ordered = flag; | 313 | ordered != QUEUE_ORDERED_DRAIN && |
312 | break; | 314 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && |
313 | case QUEUE_ORDERED_FLUSH: | 315 | ordered != QUEUE_ORDERED_DRAIN_FUA && |
314 | q->ordered = flag; | 316 | ordered != QUEUE_ORDERED_TAG && |
315 | if (!q->flush_rq) | 317 | ordered != QUEUE_ORDERED_TAG_FLUSH && |
316 | q->flush_rq = kmem_cache_alloc(request_cachep, | 318 | ordered != QUEUE_ORDERED_TAG_FUA) { |
317 | GFP_KERNEL); | 319 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); |
318 | break; | 320 | return -EINVAL; |
319 | default: | ||
320 | printk("blk_queue_ordered: bad value %d\n", flag); | ||
321 | break; | ||
322 | } | 321 | } |
322 | |||
323 | q->next_ordered = ordered; | ||
324 | q->prepare_flush_fn = prepare_flush_fn; | ||
325 | |||
326 | return 0; | ||
323 | } | 327 | } |
324 | 328 | ||
325 | EXPORT_SYMBOL(blk_queue_ordered); | 329 | EXPORT_SYMBOL(blk_queue_ordered); |
@@ -344,167 +348,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); | |||
344 | /* | 348 | /* |
345 | * Cache flushing for ordered writes handling | 349 | * Cache flushing for ordered writes handling |
346 | */ | 350 | */ |
347 | static void blk_pre_flush_end_io(struct request *flush_rq) | 351 | inline unsigned blk_ordered_cur_seq(request_queue_t *q) |
348 | { | 352 | { |
349 | struct request *rq = flush_rq->end_io_data; | 353 | if (!q->ordseq) |
350 | request_queue_t *q = rq->q; | 354 | return 0; |
351 | 355 | return 1 << ffz(q->ordseq); | |
352 | elv_completed_request(q, flush_rq); | ||
353 | |||
354 | rq->flags |= REQ_BAR_PREFLUSH; | ||
355 | |||
356 | if (!flush_rq->errors) | ||
357 | elv_requeue_request(q, rq); | ||
358 | else { | ||
359 | q->end_flush_fn(q, flush_rq); | ||
360 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
361 | q->request_fn(q); | ||
362 | } | ||
363 | } | 356 | } |
364 | 357 | ||
365 | static void blk_post_flush_end_io(struct request *flush_rq) | 358 | unsigned blk_ordered_req_seq(struct request *rq) |
366 | { | 359 | { |
367 | struct request *rq = flush_rq->end_io_data; | ||
368 | request_queue_t *q = rq->q; | 360 | request_queue_t *q = rq->q; |
369 | 361 | ||
370 | elv_completed_request(q, flush_rq); | 362 | BUG_ON(q->ordseq == 0); |
371 | 363 | ||
372 | rq->flags |= REQ_BAR_POSTFLUSH; | 364 | if (rq == &q->pre_flush_rq) |
365 | return QUEUE_ORDSEQ_PREFLUSH; | ||
366 | if (rq == &q->bar_rq) | ||
367 | return QUEUE_ORDSEQ_BAR; | ||
368 | if (rq == &q->post_flush_rq) | ||
369 | return QUEUE_ORDSEQ_POSTFLUSH; | ||
373 | 370 | ||
374 | q->end_flush_fn(q, flush_rq); | 371 | if ((rq->flags & REQ_ORDERED_COLOR) == |
375 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | 372 | (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) |
376 | q->request_fn(q); | 373 | return QUEUE_ORDSEQ_DRAIN; |
374 | else | ||
375 | return QUEUE_ORDSEQ_DONE; | ||
377 | } | 376 | } |
378 | 377 | ||
379 | struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) | 378 | void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) |
380 | { | 379 | { |
381 | struct request *flush_rq = q->flush_rq; | 380 | struct request *rq; |
382 | 381 | int uptodate; | |
383 | BUG_ON(!blk_barrier_rq(rq)); | ||
384 | 382 | ||
385 | if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) | 383 | if (error && !q->orderr) |
386 | return NULL; | 384 | q->orderr = error; |
387 | 385 | ||
388 | rq_init(q, flush_rq); | 386 | BUG_ON(q->ordseq & seq); |
389 | flush_rq->elevator_private = NULL; | 387 | q->ordseq |= seq; |
390 | flush_rq->flags = REQ_BAR_FLUSH; | ||
391 | flush_rq->rq_disk = rq->rq_disk; | ||
392 | flush_rq->rl = NULL; | ||
393 | 388 | ||
394 | /* | 389 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) |
395 | * prepare_flush returns 0 if no flush is needed, just mark both | 390 | return; |
396 | * pre and post flush as done in that case | ||
397 | */ | ||
398 | if (!q->prepare_flush_fn(q, flush_rq)) { | ||
399 | rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH; | ||
400 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
401 | return rq; | ||
402 | } | ||
403 | 391 | ||
404 | /* | 392 | /* |
405 | * some drivers dequeue requests right away, some only after io | 393 | * Okay, sequence complete. |
406 | * completion. make sure the request is dequeued. | ||
407 | */ | 394 | */ |
408 | if (!list_empty(&rq->queuelist)) | 395 | rq = q->orig_bar_rq; |
409 | blkdev_dequeue_request(rq); | 396 | uptodate = q->orderr ? q->orderr : 1; |
410 | 397 | ||
411 | flush_rq->end_io_data = rq; | 398 | q->ordseq = 0; |
412 | flush_rq->end_io = blk_pre_flush_end_io; | ||
413 | 399 | ||
414 | __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); | 400 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); |
415 | return flush_rq; | 401 | end_that_request_last(rq, uptodate); |
416 | } | 402 | } |
417 | 403 | ||
418 | static void blk_start_post_flush(request_queue_t *q, struct request *rq) | 404 | static void pre_flush_end_io(struct request *rq, int error) |
419 | { | 405 | { |
420 | struct request *flush_rq = q->flush_rq; | 406 | elv_completed_request(rq->q, rq); |
407 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | ||
408 | } | ||
421 | 409 | ||
422 | BUG_ON(!blk_barrier_rq(rq)); | 410 | static void bar_end_io(struct request *rq, int error) |
411 | { | ||
412 | elv_completed_request(rq->q, rq); | ||
413 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | ||
414 | } | ||
423 | 415 | ||
424 | rq_init(q, flush_rq); | 416 | static void post_flush_end_io(struct request *rq, int error) |
425 | flush_rq->elevator_private = NULL; | 417 | { |
426 | flush_rq->flags = REQ_BAR_FLUSH; | 418 | elv_completed_request(rq->q, rq); |
427 | flush_rq->rq_disk = rq->rq_disk; | 419 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); |
428 | flush_rq->rl = NULL; | 420 | } |
429 | 421 | ||
430 | if (q->prepare_flush_fn(q, flush_rq)) { | 422 | static void queue_flush(request_queue_t *q, unsigned which) |
431 | flush_rq->end_io_data = rq; | 423 | { |
432 | flush_rq->end_io = blk_post_flush_end_io; | 424 | struct request *rq; |
425 | rq_end_io_fn *end_io; | ||
433 | 426 | ||
434 | __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); | 427 | if (which == QUEUE_ORDERED_PREFLUSH) { |
435 | q->request_fn(q); | 428 | rq = &q->pre_flush_rq; |
429 | end_io = pre_flush_end_io; | ||
430 | } else { | ||
431 | rq = &q->post_flush_rq; | ||
432 | end_io = post_flush_end_io; | ||
436 | } | 433 | } |
434 | |||
435 | rq_init(q, rq); | ||
436 | rq->flags = REQ_HARDBARRIER; | ||
437 | rq->elevator_private = NULL; | ||
438 | rq->rq_disk = q->bar_rq.rq_disk; | ||
439 | rq->rl = NULL; | ||
440 | rq->end_io = end_io; | ||
441 | q->prepare_flush_fn(q, rq); | ||
442 | |||
443 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
437 | } | 444 | } |
438 | 445 | ||
439 | static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, | 446 | static inline struct request *start_ordered(request_queue_t *q, |
440 | int sectors) | 447 | struct request *rq) |
441 | { | 448 | { |
442 | if (sectors > rq->nr_sectors) | 449 | q->bi_size = 0; |
443 | sectors = rq->nr_sectors; | 450 | q->orderr = 0; |
451 | q->ordered = q->next_ordered; | ||
452 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | ||
453 | |||
454 | /* | ||
455 | * Prep proxy barrier request. | ||
456 | */ | ||
457 | blkdev_dequeue_request(rq); | ||
458 | q->orig_bar_rq = rq; | ||
459 | rq = &q->bar_rq; | ||
460 | rq_init(q, rq); | ||
461 | rq->flags = bio_data_dir(q->orig_bar_rq->bio); | ||
462 | rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; | ||
463 | rq->elevator_private = NULL; | ||
464 | rq->rl = NULL; | ||
465 | init_request_from_bio(rq, q->orig_bar_rq->bio); | ||
466 | rq->end_io = bar_end_io; | ||
467 | |||
468 | /* | ||
469 | * Queue ordered sequence. As we stack them at the head, we | ||
470 | * need to queue in reverse order. Note that we rely on that | ||
471 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | ||
472 | * request gets inbetween ordered sequence. | ||
473 | */ | ||
474 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | ||
475 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | ||
476 | else | ||
477 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | ||
478 | |||
479 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
480 | |||
481 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { | ||
482 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); | ||
483 | rq = &q->pre_flush_rq; | ||
484 | } else | ||
485 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; | ||
444 | 486 | ||
445 | rq->nr_sectors -= sectors; | 487 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) |
446 | return rq->nr_sectors; | 488 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; |
489 | else | ||
490 | rq = NULL; | ||
491 | |||
492 | return rq; | ||
447 | } | 493 | } |
448 | 494 | ||
449 | static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, | 495 | int blk_do_ordered(request_queue_t *q, struct request **rqp) |
450 | int sectors, int queue_locked) | ||
451 | { | 496 | { |
452 | if (q->ordered != QUEUE_ORDERED_FLUSH) | 497 | struct request *rq = *rqp, *allowed_rq; |
453 | return 0; | 498 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
454 | if (!blk_fs_request(rq) || !blk_barrier_rq(rq)) | ||
455 | return 0; | ||
456 | if (blk_barrier_postflush(rq)) | ||
457 | return 0; | ||
458 | 499 | ||
459 | if (!blk_check_end_barrier(q, rq, sectors)) { | 500 | if (!q->ordseq) { |
460 | unsigned long flags = 0; | 501 | if (!is_barrier) |
502 | return 1; | ||
461 | 503 | ||
462 | if (!queue_locked) | 504 | if (q->next_ordered != QUEUE_ORDERED_NONE) { |
463 | spin_lock_irqsave(q->queue_lock, flags); | 505 | *rqp = start_ordered(q, rq); |
506 | return 1; | ||
507 | } else { | ||
508 | /* | ||
509 | * This can happen when the queue switches to | ||
510 | * ORDERED_NONE while this request is on it. | ||
511 | */ | ||
512 | blkdev_dequeue_request(rq); | ||
513 | end_that_request_first(rq, -EOPNOTSUPP, | ||
514 | rq->hard_nr_sectors); | ||
515 | end_that_request_last(rq, -EOPNOTSUPP); | ||
516 | *rqp = NULL; | ||
517 | return 0; | ||
518 | } | ||
519 | } | ||
464 | 520 | ||
465 | blk_start_post_flush(q, rq); | 521 | if (q->ordered & QUEUE_ORDERED_TAG) { |
522 | if (is_barrier && rq != &q->bar_rq) | ||
523 | *rqp = NULL; | ||
524 | return 1; | ||
525 | } | ||
466 | 526 | ||
467 | if (!queue_locked) | 527 | switch (blk_ordered_cur_seq(q)) { |
468 | spin_unlock_irqrestore(q->queue_lock, flags); | 528 | case QUEUE_ORDSEQ_PREFLUSH: |
529 | allowed_rq = &q->pre_flush_rq; | ||
530 | break; | ||
531 | case QUEUE_ORDSEQ_BAR: | ||
532 | allowed_rq = &q->bar_rq; | ||
533 | break; | ||
534 | case QUEUE_ORDSEQ_POSTFLUSH: | ||
535 | allowed_rq = &q->post_flush_rq; | ||
536 | break; | ||
537 | default: | ||
538 | allowed_rq = NULL; | ||
539 | break; | ||
469 | } | 540 | } |
470 | 541 | ||
542 | if (rq != allowed_rq && | ||
543 | (blk_fs_request(rq) || rq == &q->pre_flush_rq || | ||
544 | rq == &q->post_flush_rq)) | ||
545 | *rqp = NULL; | ||
546 | |||
471 | return 1; | 547 | return 1; |
472 | } | 548 | } |
473 | 549 | ||
474 | /** | 550 | static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) |
475 | * blk_complete_barrier_rq - complete possible barrier request | ||
476 | * @q: the request queue for the device | ||
477 | * @rq: the request | ||
478 | * @sectors: number of sectors to complete | ||
479 | * | ||
480 | * Description: | ||
481 | * Used in driver end_io handling to determine whether to postpone | ||
482 | * completion of a barrier request until a post flush has been done. This | ||
483 | * is the unlocked variant, used if the caller doesn't already hold the | ||
484 | * queue lock. | ||
485 | **/ | ||
486 | int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors) | ||
487 | { | 551 | { |
488 | return __blk_complete_barrier_rq(q, rq, sectors, 0); | 552 | request_queue_t *q = bio->bi_private; |
553 | struct bio_vec *bvec; | ||
554 | int i; | ||
555 | |||
556 | /* | ||
557 | * This is dry run, restore bio_sector and size. We'll finish | ||
558 | * this request again with the original bi_end_io after an | ||
559 | * error occurs or post flush is complete. | ||
560 | */ | ||
561 | q->bi_size += bytes; | ||
562 | |||
563 | if (bio->bi_size) | ||
564 | return 1; | ||
565 | |||
566 | /* Rewind bvec's */ | ||
567 | bio->bi_idx = 0; | ||
568 | bio_for_each_segment(bvec, bio, i) { | ||
569 | bvec->bv_len += bvec->bv_offset; | ||
570 | bvec->bv_offset = 0; | ||
571 | } | ||
572 | |||
573 | /* Reset bio */ | ||
574 | set_bit(BIO_UPTODATE, &bio->bi_flags); | ||
575 | bio->bi_size = q->bi_size; | ||
576 | bio->bi_sector -= (q->bi_size >> 9); | ||
577 | q->bi_size = 0; | ||
578 | |||
579 | return 0; | ||
489 | } | 580 | } |
490 | EXPORT_SYMBOL(blk_complete_barrier_rq); | ||
491 | 581 | ||
492 | /** | 582 | static inline int ordered_bio_endio(struct request *rq, struct bio *bio, |
493 | * blk_complete_barrier_rq_locked - complete possible barrier request | 583 | unsigned int nbytes, int error) |
494 | * @q: the request queue for the device | ||
495 | * @rq: the request | ||
496 | * @sectors: number of sectors to complete | ||
497 | * | ||
498 | * Description: | ||
499 | * See blk_complete_barrier_rq(). This variant must be used if the caller | ||
500 | * holds the queue lock. | ||
501 | **/ | ||
502 | int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq, | ||
503 | int sectors) | ||
504 | { | 584 | { |
505 | return __blk_complete_barrier_rq(q, rq, sectors, 1); | 585 | request_queue_t *q = rq->q; |
586 | bio_end_io_t *endio; | ||
587 | void *private; | ||
588 | |||
589 | if (&q->bar_rq != rq) | ||
590 | return 0; | ||
591 | |||
592 | /* | ||
593 | * Okay, this is the barrier request in progress, dry finish it. | ||
594 | */ | ||
595 | if (error && !q->orderr) | ||
596 | q->orderr = error; | ||
597 | |||
598 | endio = bio->bi_end_io; | ||
599 | private = bio->bi_private; | ||
600 | bio->bi_end_io = flush_dry_bio_endio; | ||
601 | bio->bi_private = q; | ||
602 | |||
603 | bio_endio(bio, nbytes, error); | ||
604 | |||
605 | bio->bi_end_io = endio; | ||
606 | bio->bi_private = private; | ||
607 | |||
608 | return 1; | ||
506 | } | 609 | } |
507 | EXPORT_SYMBOL(blk_complete_barrier_rq_locked); | ||
508 | 610 | ||
509 | /** | 611 | /** |
510 | * blk_queue_bounce_limit - set bounce buffer limit for queue | 612 | * blk_queue_bounce_limit - set bounce buffer limit for queue |
@@ -1039,12 +1141,13 @@ void blk_queue_invalidate_tags(request_queue_t *q) | |||
1039 | 1141 | ||
1040 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | 1142 | EXPORT_SYMBOL(blk_queue_invalidate_tags); |
1041 | 1143 | ||
1042 | static char *rq_flags[] = { | 1144 | static const char * const rq_flags[] = { |
1043 | "REQ_RW", | 1145 | "REQ_RW", |
1044 | "REQ_FAILFAST", | 1146 | "REQ_FAILFAST", |
1045 | "REQ_SORTED", | 1147 | "REQ_SORTED", |
1046 | "REQ_SOFTBARRIER", | 1148 | "REQ_SOFTBARRIER", |
1047 | "REQ_HARDBARRIER", | 1149 | "REQ_HARDBARRIER", |
1150 | "REQ_FUA", | ||
1048 | "REQ_CMD", | 1151 | "REQ_CMD", |
1049 | "REQ_NOMERGE", | 1152 | "REQ_NOMERGE", |
1050 | "REQ_STARTED", | 1153 | "REQ_STARTED", |
@@ -1064,6 +1167,7 @@ static char *rq_flags[] = { | |||
1064 | "REQ_PM_SUSPEND", | 1167 | "REQ_PM_SUSPEND", |
1065 | "REQ_PM_RESUME", | 1168 | "REQ_PM_RESUME", |
1066 | "REQ_PM_SHUTDOWN", | 1169 | "REQ_PM_SHUTDOWN", |
1170 | "REQ_ORDERED_COLOR", | ||
1067 | }; | 1171 | }; |
1068 | 1172 | ||
1069 | void blk_dump_rq_flags(struct request *rq, char *msg) | 1173 | void blk_dump_rq_flags(struct request *rq, char *msg) |
@@ -1641,8 +1745,6 @@ void blk_cleanup_queue(request_queue_t * q) | |||
1641 | if (q->queue_tags) | 1745 | if (q->queue_tags) |
1642 | __blk_queue_free_tags(q); | 1746 | __blk_queue_free_tags(q); |
1643 | 1747 | ||
1644 | blk_queue_ordered(q, QUEUE_ORDERED_NONE); | ||
1645 | |||
1646 | kmem_cache_free(requestq_cachep, q); | 1748 | kmem_cache_free(requestq_cachep, q); |
1647 | } | 1749 | } |
1648 | 1750 | ||
@@ -1667,8 +1769,6 @@ static int blk_init_free_list(request_queue_t *q) | |||
1667 | return 0; | 1769 | return 0; |
1668 | } | 1770 | } |
1669 | 1771 | ||
1670 | static int __make_request(request_queue_t *, struct bio *); | ||
1671 | |||
1672 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) | 1772 | request_queue_t *blk_alloc_queue(gfp_t gfp_mask) |
1673 | { | 1773 | { |
1674 | return blk_alloc_queue_node(gfp_mask, -1); | 1774 | return blk_alloc_queue_node(gfp_mask, -1); |
@@ -1908,40 +2008,40 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, | |||
1908 | { | 2008 | { |
1909 | struct request *rq = NULL; | 2009 | struct request *rq = NULL; |
1910 | struct request_list *rl = &q->rq; | 2010 | struct request_list *rl = &q->rq; |
1911 | struct io_context *ioc = current_io_context(GFP_ATOMIC); | 2011 | struct io_context *ioc = NULL; |
1912 | int priv; | 2012 | int may_queue, priv; |
1913 | 2013 | ||
1914 | if (rl->count[rw]+1 >= q->nr_requests) { | 2014 | may_queue = elv_may_queue(q, rw, bio); |
1915 | /* | 2015 | if (may_queue == ELV_MQUEUE_NO) |
1916 | * The queue will fill after this allocation, so set it as | 2016 | goto rq_starved; |
1917 | * full, and mark this process as "batching". This process | ||
1918 | * will be allowed to complete a batch of requests, others | ||
1919 | * will be blocked. | ||
1920 | */ | ||
1921 | if (!blk_queue_full(q, rw)) { | ||
1922 | ioc_set_batching(q, ioc); | ||
1923 | blk_set_queue_full(q, rw); | ||
1924 | } | ||
1925 | } | ||
1926 | 2017 | ||
1927 | switch (elv_may_queue(q, rw, bio)) { | 2018 | if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { |
1928 | case ELV_MQUEUE_NO: | 2019 | if (rl->count[rw]+1 >= q->nr_requests) { |
1929 | goto rq_starved; | 2020 | ioc = current_io_context(GFP_ATOMIC); |
1930 | case ELV_MQUEUE_MAY: | 2021 | /* |
1931 | break; | 2022 | * The queue will fill after this allocation, so set |
1932 | case ELV_MQUEUE_MUST: | 2023 | * it as full, and mark this process as "batching". |
1933 | goto get_rq; | 2024 | * This process will be allowed to complete a batch of |
1934 | } | 2025 | * requests, others will be blocked. |
1935 | 2026 | */ | |
1936 | if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { | 2027 | if (!blk_queue_full(q, rw)) { |
1937 | /* | 2028 | ioc_set_batching(q, ioc); |
1938 | * The queue is full and the allocating process is not a | 2029 | blk_set_queue_full(q, rw); |
1939 | * "batcher", and not exempted by the IO scheduler | 2030 | } else { |
1940 | */ | 2031 | if (may_queue != ELV_MQUEUE_MUST |
1941 | goto out; | 2032 | && !ioc_batching(q, ioc)) { |
2033 | /* | ||
2034 | * The queue is full and the allocating | ||
2035 | * process is not a "batcher", and not | ||
2036 | * exempted by the IO scheduler | ||
2037 | */ | ||
2038 | goto out; | ||
2039 | } | ||
2040 | } | ||
2041 | } | ||
2042 | set_queue_congested(q, rw); | ||
1942 | } | 2043 | } |
1943 | 2044 | ||
1944 | get_rq: | ||
1945 | /* | 2045 | /* |
1946 | * Only allow batching queuers to allocate up to 50% over the defined | 2046 | * Only allow batching queuers to allocate up to 50% over the defined |
1947 | * limit of requests, otherwise we could have thousands of requests | 2047 | * limit of requests, otherwise we could have thousands of requests |
@@ -1952,8 +2052,6 @@ get_rq: | |||
1952 | 2052 | ||
1953 | rl->count[rw]++; | 2053 | rl->count[rw]++; |
1954 | rl->starved[rw] = 0; | 2054 | rl->starved[rw] = 0; |
1955 | if (rl->count[rw] >= queue_congestion_on_threshold(q)) | ||
1956 | set_queue_congested(q, rw); | ||
1957 | 2055 | ||
1958 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 2056 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
1959 | if (priv) | 2057 | if (priv) |
@@ -1962,7 +2060,7 @@ get_rq: | |||
1962 | spin_unlock_irq(q->queue_lock); | 2060 | spin_unlock_irq(q->queue_lock); |
1963 | 2061 | ||
1964 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); | 2062 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); |
1965 | if (!rq) { | 2063 | if (unlikely(!rq)) { |
1966 | /* | 2064 | /* |
1967 | * Allocation failed presumably due to memory. Undo anything | 2065 | * Allocation failed presumably due to memory. Undo anything |
1968 | * we might have messed up. | 2066 | * we might have messed up. |
@@ -1987,6 +2085,12 @@ rq_starved: | |||
1987 | goto out; | 2085 | goto out; |
1988 | } | 2086 | } |
1989 | 2087 | ||
2088 | /* | ||
2089 | * ioc may be NULL here, and ioc_batching will be false. That's | ||
2090 | * OK, if the queue is under the request limit then requests need | ||
2091 | * not count toward the nr_batch_requests limit. There will always | ||
2092 | * be some limit enforced by BLK_BATCH_TIME. | ||
2093 | */ | ||
1990 | if (ioc_batching(q, ioc)) | 2094 | if (ioc_batching(q, ioc)) |
1991 | ioc->nr_batch_requests--; | 2095 | ioc->nr_batch_requests--; |
1992 | 2096 | ||
@@ -2313,7 +2417,7 @@ EXPORT_SYMBOL(blk_rq_map_kern); | |||
2313 | */ | 2417 | */ |
2314 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, | 2418 | void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, |
2315 | struct request *rq, int at_head, | 2419 | struct request *rq, int at_head, |
2316 | void (*done)(struct request *)) | 2420 | rq_end_io_fn *done) |
2317 | { | 2421 | { |
2318 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 2422 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
2319 | 2423 | ||
@@ -2517,7 +2621,7 @@ EXPORT_SYMBOL(blk_put_request); | |||
2517 | * blk_end_sync_rq - executes a completion event on a request | 2621 | * blk_end_sync_rq - executes a completion event on a request |
2518 | * @rq: request to complete | 2622 | * @rq: request to complete |
2519 | */ | 2623 | */ |
2520 | void blk_end_sync_rq(struct request *rq) | 2624 | void blk_end_sync_rq(struct request *rq, int error) |
2521 | { | 2625 | { |
2522 | struct completion *waiting = rq->waiting; | 2626 | struct completion *waiting = rq->waiting; |
2523 | 2627 | ||
@@ -2655,6 +2759,36 @@ void blk_attempt_remerge(request_queue_t *q, struct request *rq) | |||
2655 | 2759 | ||
2656 | EXPORT_SYMBOL(blk_attempt_remerge); | 2760 | EXPORT_SYMBOL(blk_attempt_remerge); |
2657 | 2761 | ||
2762 | static void init_request_from_bio(struct request *req, struct bio *bio) | ||
2763 | { | ||
2764 | req->flags |= REQ_CMD; | ||
2765 | |||
2766 | /* | ||
2767 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) | ||
2768 | */ | ||
2769 | if (bio_rw_ahead(bio) || bio_failfast(bio)) | ||
2770 | req->flags |= REQ_FAILFAST; | ||
2771 | |||
2772 | /* | ||
2773 | * REQ_BARRIER implies no merging, but lets make it explicit | ||
2774 | */ | ||
2775 | if (unlikely(bio_barrier(bio))) | ||
2776 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | ||
2777 | |||
2778 | req->errors = 0; | ||
2779 | req->hard_sector = req->sector = bio->bi_sector; | ||
2780 | req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); | ||
2781 | req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); | ||
2782 | req->nr_phys_segments = bio_phys_segments(req->q, bio); | ||
2783 | req->nr_hw_segments = bio_hw_segments(req->q, bio); | ||
2784 | req->buffer = bio_data(bio); /* see ->buffer comment above */ | ||
2785 | req->waiting = NULL; | ||
2786 | req->bio = req->biotail = bio; | ||
2787 | req->ioprio = bio_prio(bio); | ||
2788 | req->rq_disk = bio->bi_bdev->bd_disk; | ||
2789 | req->start_time = jiffies; | ||
2790 | } | ||
2791 | |||
2658 | static int __make_request(request_queue_t *q, struct bio *bio) | 2792 | static int __make_request(request_queue_t *q, struct bio *bio) |
2659 | { | 2793 | { |
2660 | struct request *req; | 2794 | struct request *req; |
@@ -2680,7 +2814,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) | |||
2680 | spin_lock_prefetch(q->queue_lock); | 2814 | spin_lock_prefetch(q->queue_lock); |
2681 | 2815 | ||
2682 | barrier = bio_barrier(bio); | 2816 | barrier = bio_barrier(bio); |
2683 | if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { | 2817 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { |
2684 | err = -EOPNOTSUPP; | 2818 | err = -EOPNOTSUPP; |
2685 | goto end_io; | 2819 | goto end_io; |
2686 | } | 2820 | } |
@@ -2750,33 +2884,7 @@ get_rq: | |||
2750 | * We don't worry about that case for efficiency. It won't happen | 2884 | * We don't worry about that case for efficiency. It won't happen |
2751 | * often, and the elevators are able to handle it. | 2885 | * often, and the elevators are able to handle it. |
2752 | */ | 2886 | */ |
2753 | 2887 | init_request_from_bio(req, bio); | |
2754 | req->flags |= REQ_CMD; | ||
2755 | |||
2756 | /* | ||
2757 | * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) | ||
2758 | */ | ||
2759 | if (bio_rw_ahead(bio) || bio_failfast(bio)) | ||
2760 | req->flags |= REQ_FAILFAST; | ||
2761 | |||
2762 | /* | ||
2763 | * REQ_BARRIER implies no merging, but lets make it explicit | ||
2764 | */ | ||
2765 | if (unlikely(barrier)) | ||
2766 | req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | ||
2767 | |||
2768 | req->errors = 0; | ||
2769 | req->hard_sector = req->sector = sector; | ||
2770 | req->hard_nr_sectors = req->nr_sectors = nr_sectors; | ||
2771 | req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; | ||
2772 | req->nr_phys_segments = bio_phys_segments(q, bio); | ||
2773 | req->nr_hw_segments = bio_hw_segments(q, bio); | ||
2774 | req->buffer = bio_data(bio); /* see ->buffer comment above */ | ||
2775 | req->waiting = NULL; | ||
2776 | req->bio = req->biotail = bio; | ||
2777 | req->ioprio = prio; | ||
2778 | req->rq_disk = bio->bi_bdev->bd_disk; | ||
2779 | req->start_time = jiffies; | ||
2780 | 2888 | ||
2781 | spin_lock_irq(q->queue_lock); | 2889 | spin_lock_irq(q->queue_lock); |
2782 | if (elv_queue_empty(q)) | 2890 | if (elv_queue_empty(q)) |
@@ -3067,7 +3175,8 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3067 | if (nr_bytes >= bio->bi_size) { | 3175 | if (nr_bytes >= bio->bi_size) { |
3068 | req->bio = bio->bi_next; | 3176 | req->bio = bio->bi_next; |
3069 | nbytes = bio->bi_size; | 3177 | nbytes = bio->bi_size; |
3070 | bio_endio(bio, nbytes, error); | 3178 | if (!ordered_bio_endio(req, bio, nbytes, error)) |
3179 | bio_endio(bio, nbytes, error); | ||
3071 | next_idx = 0; | 3180 | next_idx = 0; |
3072 | bio_nbytes = 0; | 3181 | bio_nbytes = 0; |
3073 | } else { | 3182 | } else { |
@@ -3122,7 +3231,8 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3122 | * if the request wasn't completed, update state | 3231 | * if the request wasn't completed, update state |
3123 | */ | 3232 | */ |
3124 | if (bio_nbytes) { | 3233 | if (bio_nbytes) { |
3125 | bio_endio(bio, bio_nbytes, error); | 3234 | if (!ordered_bio_endio(req, bio, bio_nbytes, error)) |
3235 | bio_endio(bio, bio_nbytes, error); | ||
3126 | bio->bi_idx += next_idx; | 3236 | bio->bi_idx += next_idx; |
3127 | bio_iovec(bio)->bv_offset += nr_bytes; | 3237 | bio_iovec(bio)->bv_offset += nr_bytes; |
3128 | bio_iovec(bio)->bv_len -= nr_bytes; | 3238 | bio_iovec(bio)->bv_len -= nr_bytes; |
@@ -3179,9 +3289,17 @@ EXPORT_SYMBOL(end_that_request_chunk); | |||
3179 | /* | 3289 | /* |
3180 | * queue lock must be held | 3290 | * queue lock must be held |
3181 | */ | 3291 | */ |
3182 | void end_that_request_last(struct request *req) | 3292 | void end_that_request_last(struct request *req, int uptodate) |
3183 | { | 3293 | { |
3184 | struct gendisk *disk = req->rq_disk; | 3294 | struct gendisk *disk = req->rq_disk; |
3295 | int error; | ||
3296 | |||
3297 | /* | ||
3298 | * extend uptodate bool to allow < 0 value to be direct io error | ||
3299 | */ | ||
3300 | error = 0; | ||
3301 | if (end_io_error(uptodate)) | ||
3302 | error = !uptodate ? -EIO : uptodate; | ||
3185 | 3303 | ||
3186 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 3304 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
3187 | laptop_io_completion(); | 3305 | laptop_io_completion(); |
@@ -3196,7 +3314,7 @@ void end_that_request_last(struct request *req) | |||
3196 | disk->in_flight--; | 3314 | disk->in_flight--; |
3197 | } | 3315 | } |
3198 | if (req->end_io) | 3316 | if (req->end_io) |
3199 | req->end_io(req); | 3317 | req->end_io(req, error); |
3200 | else | 3318 | else |
3201 | __blk_put_request(req->q, req); | 3319 | __blk_put_request(req->q, req); |
3202 | } | 3320 | } |
@@ -3208,7 +3326,7 @@ void end_request(struct request *req, int uptodate) | |||
3208 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { | 3326 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { |
3209 | add_disk_randomness(req->rq_disk); | 3327 | add_disk_randomness(req->rq_disk); |
3210 | blkdev_dequeue_request(req); | 3328 | blkdev_dequeue_request(req); |
3211 | end_that_request_last(req); | 3329 | end_that_request_last(req, uptodate); |
3212 | } | 3330 | } |
3213 | } | 3331 | } |
3214 | 3332 | ||