diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/as-iosched.c | 34 | ||||
-rw-r--r-- | block/cfq-iosched.c | 425 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 407 |
3 files changed, 524 insertions, 342 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c index cb5e53b05c7c..b201d16a7102 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -170,9 +170,11 @@ static void free_as_io_context(struct as_io_context *aic) | |||
170 | 170 | ||
171 | static void as_trim(struct io_context *ioc) | 171 | static void as_trim(struct io_context *ioc) |
172 | { | 172 | { |
173 | spin_lock(&ioc->lock); | ||
173 | if (ioc->aic) | 174 | if (ioc->aic) |
174 | free_as_io_context(ioc->aic); | 175 | free_as_io_context(ioc->aic); |
175 | ioc->aic = NULL; | 176 | ioc->aic = NULL; |
177 | spin_unlock(&ioc->lock); | ||
176 | } | 178 | } |
177 | 179 | ||
178 | /* Called when the task exits */ | 180 | /* Called when the task exits */ |
@@ -462,7 +464,9 @@ static void as_antic_timeout(unsigned long data) | |||
462 | spin_lock_irqsave(q->queue_lock, flags); | 464 | spin_lock_irqsave(q->queue_lock, flags); |
463 | if (ad->antic_status == ANTIC_WAIT_REQ | 465 | if (ad->antic_status == ANTIC_WAIT_REQ |
464 | || ad->antic_status == ANTIC_WAIT_NEXT) { | 466 | || ad->antic_status == ANTIC_WAIT_NEXT) { |
465 | struct as_io_context *aic = ad->io_context->aic; | 467 | struct as_io_context *aic; |
468 | spin_lock(&ad->io_context->lock); | ||
469 | aic = ad->io_context->aic; | ||
466 | 470 | ||
467 | ad->antic_status = ANTIC_FINISHED; | 471 | ad->antic_status = ANTIC_FINISHED; |
468 | kblockd_schedule_work(&ad->antic_work); | 472 | kblockd_schedule_work(&ad->antic_work); |
@@ -475,6 +479,7 @@ static void as_antic_timeout(unsigned long data) | |||
475 | /* process not "saved" by a cooperating request */ | 479 | /* process not "saved" by a cooperating request */ |
476 | ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; | 480 | ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; |
477 | } | 481 | } |
482 | spin_unlock(&ad->io_context->lock); | ||
478 | } | 483 | } |
479 | spin_unlock_irqrestore(q->queue_lock, flags); | 484 | spin_unlock_irqrestore(q->queue_lock, flags); |
480 | } | 485 | } |
@@ -635,9 +640,11 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
635 | 640 | ||
636 | ioc = ad->io_context; | 641 | ioc = ad->io_context; |
637 | BUG_ON(!ioc); | 642 | BUG_ON(!ioc); |
643 | spin_lock(&ioc->lock); | ||
638 | 644 | ||
639 | if (rq && ioc == RQ_IOC(rq)) { | 645 | if (rq && ioc == RQ_IOC(rq)) { |
640 | /* request from same process */ | 646 | /* request from same process */ |
647 | spin_unlock(&ioc->lock); | ||
641 | return 1; | 648 | return 1; |
642 | } | 649 | } |
643 | 650 | ||
@@ -646,20 +653,25 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
646 | * In this situation status should really be FINISHED, | 653 | * In this situation status should really be FINISHED, |
647 | * however the timer hasn't had the chance to run yet. | 654 | * however the timer hasn't had the chance to run yet. |
648 | */ | 655 | */ |
656 | spin_unlock(&ioc->lock); | ||
649 | return 1; | 657 | return 1; |
650 | } | 658 | } |
651 | 659 | ||
652 | aic = ioc->aic; | 660 | aic = ioc->aic; |
653 | if (!aic) | 661 | if (!aic) { |
662 | spin_unlock(&ioc->lock); | ||
654 | return 0; | 663 | return 0; |
664 | } | ||
655 | 665 | ||
656 | if (atomic_read(&aic->nr_queued) > 0) { | 666 | if (atomic_read(&aic->nr_queued) > 0) { |
657 | /* process has more requests queued */ | 667 | /* process has more requests queued */ |
668 | spin_unlock(&ioc->lock); | ||
658 | return 1; | 669 | return 1; |
659 | } | 670 | } |
660 | 671 | ||
661 | if (atomic_read(&aic->nr_dispatched) > 0) { | 672 | if (atomic_read(&aic->nr_dispatched) > 0) { |
662 | /* process has more requests dispatched */ | 673 | /* process has more requests dispatched */ |
674 | spin_unlock(&ioc->lock); | ||
663 | return 1; | 675 | return 1; |
664 | } | 676 | } |
665 | 677 | ||
@@ -680,6 +692,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
680 | } | 692 | } |
681 | 693 | ||
682 | as_update_iohist(ad, aic, rq); | 694 | as_update_iohist(ad, aic, rq); |
695 | spin_unlock(&ioc->lock); | ||
683 | return 1; | 696 | return 1; |
684 | } | 697 | } |
685 | 698 | ||
@@ -688,20 +701,27 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
688 | if (aic->ttime_samples == 0) | 701 | if (aic->ttime_samples == 0) |
689 | ad->exit_prob = (7*ad->exit_prob + 256)/8; | 702 | ad->exit_prob = (7*ad->exit_prob + 256)/8; |
690 | 703 | ||
691 | if (ad->exit_no_coop > 128) | 704 | if (ad->exit_no_coop > 128) { |
705 | spin_unlock(&ioc->lock); | ||
692 | return 1; | 706 | return 1; |
707 | } | ||
693 | } | 708 | } |
694 | 709 | ||
695 | if (aic->ttime_samples == 0) { | 710 | if (aic->ttime_samples == 0) { |
696 | if (ad->new_ttime_mean > ad->antic_expire) | 711 | if (ad->new_ttime_mean > ad->antic_expire) { |
712 | spin_unlock(&ioc->lock); | ||
697 | return 1; | 713 | return 1; |
698 | if (ad->exit_prob * ad->exit_no_coop > 128*256) | 714 | } |
715 | if (ad->exit_prob * ad->exit_no_coop > 128*256) { | ||
716 | spin_unlock(&ioc->lock); | ||
699 | return 1; | 717 | return 1; |
718 | } | ||
700 | } else if (aic->ttime_mean > ad->antic_expire) { | 719 | } else if (aic->ttime_mean > ad->antic_expire) { |
701 | /* the process thinks too much between requests */ | 720 | /* the process thinks too much between requests */ |
721 | spin_unlock(&ioc->lock); | ||
702 | return 1; | 722 | return 1; |
703 | } | 723 | } |
704 | 724 | spin_unlock(&ioc->lock); | |
705 | return 0; | 725 | return 0; |
706 | } | 726 | } |
707 | 727 | ||
@@ -1255,7 +1275,9 @@ static void as_merged_requests(struct request_queue *q, struct request *req, | |||
1255 | * Don't copy here but swap, because when anext is | 1275 | * Don't copy here but swap, because when anext is |
1256 | * removed below, it must contain the unused context | 1276 | * removed below, it must contain the unused context |
1257 | */ | 1277 | */ |
1278 | double_spin_lock(&rioc->lock, &nioc->lock, rioc < nioc); | ||
1258 | swap_io_context(&rioc, &nioc); | 1279 | swap_io_context(&rioc, &nioc); |
1280 | double_spin_unlock(&rioc->lock, &nioc->lock, rioc < nioc); | ||
1259 | } | 1281 | } |
1260 | } | 1282 | } |
1261 | 1283 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 13553e015d72..f28d1fb30608 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -26,9 +26,9 @@ static const int cfq_slice_async_rq = 2; | |||
26 | static int cfq_slice_idle = HZ / 125; | 26 | static int cfq_slice_idle = HZ / 125; |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * grace period before allowing idle class to get disk access | 29 | * offset from end of service tree |
30 | */ | 30 | */ |
31 | #define CFQ_IDLE_GRACE (HZ / 10) | 31 | #define CFQ_IDLE_DELAY (HZ / 5) |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * below this threshold, we consider thinktime immediate | 34 | * below this threshold, we consider thinktime immediate |
@@ -98,8 +98,6 @@ struct cfq_data { | |||
98 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; | 98 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; |
99 | struct cfq_queue *async_idle_cfqq; | 99 | struct cfq_queue *async_idle_cfqq; |
100 | 100 | ||
101 | struct timer_list idle_class_timer; | ||
102 | |||
103 | sector_t last_position; | 101 | sector_t last_position; |
104 | unsigned long last_end_request; | 102 | unsigned long last_end_request; |
105 | 103 | ||
@@ -199,8 +197,8 @@ CFQ_CFQQ_FNS(sync); | |||
199 | 197 | ||
200 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 198 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
201 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, | 199 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, |
202 | struct task_struct *, gfp_t); | 200 | struct io_context *, gfp_t); |
203 | static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *, | 201 | static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, |
204 | struct io_context *); | 202 | struct io_context *); |
205 | 203 | ||
206 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, | 204 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, |
@@ -384,12 +382,15 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) | |||
384 | /* | 382 | /* |
385 | * The below is leftmost cache rbtree addon | 383 | * The below is leftmost cache rbtree addon |
386 | */ | 384 | */ |
387 | static struct rb_node *cfq_rb_first(struct cfq_rb_root *root) | 385 | static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) |
388 | { | 386 | { |
389 | if (!root->left) | 387 | if (!root->left) |
390 | root->left = rb_first(&root->rb); | 388 | root->left = rb_first(&root->rb); |
391 | 389 | ||
392 | return root->left; | 390 | if (root->left) |
391 | return rb_entry(root->left, struct cfq_queue, rb_node); | ||
392 | |||
393 | return NULL; | ||
393 | } | 394 | } |
394 | 395 | ||
395 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) | 396 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) |
@@ -446,12 +447,20 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | |||
446 | static void cfq_service_tree_add(struct cfq_data *cfqd, | 447 | static void cfq_service_tree_add(struct cfq_data *cfqd, |
447 | struct cfq_queue *cfqq, int add_front) | 448 | struct cfq_queue *cfqq, int add_front) |
448 | { | 449 | { |
449 | struct rb_node **p = &cfqd->service_tree.rb.rb_node; | 450 | struct rb_node **p, *parent; |
450 | struct rb_node *parent = NULL; | 451 | struct cfq_queue *__cfqq; |
451 | unsigned long rb_key; | 452 | unsigned long rb_key; |
452 | int left; | 453 | int left; |
453 | 454 | ||
454 | if (!add_front) { | 455 | if (cfq_class_idle(cfqq)) { |
456 | rb_key = CFQ_IDLE_DELAY; | ||
457 | parent = rb_last(&cfqd->service_tree.rb); | ||
458 | if (parent && parent != &cfqq->rb_node) { | ||
459 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | ||
460 | rb_key += __cfqq->rb_key; | ||
461 | } else | ||
462 | rb_key += jiffies; | ||
463 | } else if (!add_front) { | ||
455 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; | 464 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; |
456 | rb_key += cfqq->slice_resid; | 465 | rb_key += cfqq->slice_resid; |
457 | cfqq->slice_resid = 0; | 466 | cfqq->slice_resid = 0; |
@@ -469,8 +478,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, | |||
469 | } | 478 | } |
470 | 479 | ||
471 | left = 1; | 480 | left = 1; |
481 | parent = NULL; | ||
482 | p = &cfqd->service_tree.rb.rb_node; | ||
472 | while (*p) { | 483 | while (*p) { |
473 | struct cfq_queue *__cfqq; | ||
474 | struct rb_node **n; | 484 | struct rb_node **n; |
475 | 485 | ||
476 | parent = *p; | 486 | parent = *p; |
@@ -524,8 +534,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
524 | * add to busy list of queues for service, trying to be fair in ordering | 534 | * add to busy list of queues for service, trying to be fair in ordering |
525 | * the pending list according to last request service | 535 | * the pending list according to last request service |
526 | */ | 536 | */ |
527 | static inline void | 537 | static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
528 | cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
529 | { | 538 | { |
530 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 539 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
531 | cfq_mark_cfqq_on_rr(cfqq); | 540 | cfq_mark_cfqq_on_rr(cfqq); |
@@ -538,8 +547,7 @@ cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
538 | * Called when the cfqq no longer has requests pending, remove it from | 547 | * Called when the cfqq no longer has requests pending, remove it from |
539 | * the service tree. | 548 | * the service tree. |
540 | */ | 549 | */ |
541 | static inline void | 550 | static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
542 | cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
543 | { | 551 | { |
544 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | 552 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); |
545 | cfq_clear_cfqq_on_rr(cfqq); | 553 | cfq_clear_cfqq_on_rr(cfqq); |
@@ -554,7 +562,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
554 | /* | 562 | /* |
555 | * rb tree support functions | 563 | * rb tree support functions |
556 | */ | 564 | */ |
557 | static inline void cfq_del_rq_rb(struct request *rq) | 565 | static void cfq_del_rq_rb(struct request *rq) |
558 | { | 566 | { |
559 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 567 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
560 | struct cfq_data *cfqd = cfqq->cfqd; | 568 | struct cfq_data *cfqd = cfqq->cfqd; |
@@ -594,8 +602,7 @@ static void cfq_add_rq_rb(struct request *rq) | |||
594 | BUG_ON(!cfqq->next_rq); | 602 | BUG_ON(!cfqq->next_rq); |
595 | } | 603 | } |
596 | 604 | ||
597 | static inline void | 605 | static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) |
598 | cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) | ||
599 | { | 606 | { |
600 | elv_rb_del(&cfqq->sort_list, rq); | 607 | elv_rb_del(&cfqq->sort_list, rq); |
601 | cfqq->queued[rq_is_sync(rq)]--; | 608 | cfqq->queued[rq_is_sync(rq)]--; |
@@ -609,7 +616,7 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) | |||
609 | struct cfq_io_context *cic; | 616 | struct cfq_io_context *cic; |
610 | struct cfq_queue *cfqq; | 617 | struct cfq_queue *cfqq; |
611 | 618 | ||
612 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 619 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
613 | if (!cic) | 620 | if (!cic) |
614 | return NULL; | 621 | return NULL; |
615 | 622 | ||
@@ -721,7 +728,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
721 | * Lookup the cfqq that this bio will be queued with. Allow | 728 | * Lookup the cfqq that this bio will be queued with. Allow |
722 | * merge only if rq is queued there. | 729 | * merge only if rq is queued there. |
723 | */ | 730 | */ |
724 | cic = cfq_cic_rb_lookup(cfqd, current->io_context); | 731 | cic = cfq_cic_lookup(cfqd, current->io_context); |
725 | if (!cic) | 732 | if (!cic) |
726 | return 0; | 733 | return 0; |
727 | 734 | ||
@@ -732,15 +739,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
732 | return 0; | 739 | return 0; |
733 | } | 740 | } |
734 | 741 | ||
735 | static inline void | 742 | static void __cfq_set_active_queue(struct cfq_data *cfqd, |
736 | __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 743 | struct cfq_queue *cfqq) |
737 | { | 744 | { |
738 | if (cfqq) { | 745 | if (cfqq) { |
739 | /* | ||
740 | * stop potential idle class queues waiting service | ||
741 | */ | ||
742 | del_timer(&cfqd->idle_class_timer); | ||
743 | |||
744 | cfqq->slice_end = 0; | 746 | cfqq->slice_end = 0; |
745 | cfq_clear_cfqq_must_alloc_slice(cfqq); | 747 | cfq_clear_cfqq_must_alloc_slice(cfqq); |
746 | cfq_clear_cfqq_fifo_expire(cfqq); | 748 | cfq_clear_cfqq_fifo_expire(cfqq); |
@@ -789,47 +791,16 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out) | |||
789 | __cfq_slice_expired(cfqd, cfqq, timed_out); | 791 | __cfq_slice_expired(cfqd, cfqq, timed_out); |
790 | } | 792 | } |
791 | 793 | ||
792 | static int start_idle_class_timer(struct cfq_data *cfqd) | ||
793 | { | ||
794 | unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; | ||
795 | unsigned long now = jiffies; | ||
796 | |||
797 | if (time_before(now, end) && | ||
798 | time_after_eq(now, cfqd->last_end_request)) { | ||
799 | mod_timer(&cfqd->idle_class_timer, end); | ||
800 | return 1; | ||
801 | } | ||
802 | |||
803 | return 0; | ||
804 | } | ||
805 | |||
806 | /* | 794 | /* |
807 | * Get next queue for service. Unless we have a queue preemption, | 795 | * Get next queue for service. Unless we have a queue preemption, |
808 | * we'll simply select the first cfqq in the service tree. | 796 | * we'll simply select the first cfqq in the service tree. |
809 | */ | 797 | */ |
810 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | 798 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) |
811 | { | 799 | { |
812 | struct cfq_queue *cfqq; | ||
813 | struct rb_node *n; | ||
814 | |||
815 | if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) | 800 | if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) |
816 | return NULL; | 801 | return NULL; |
817 | 802 | ||
818 | n = cfq_rb_first(&cfqd->service_tree); | 803 | return cfq_rb_first(&cfqd->service_tree); |
819 | cfqq = rb_entry(n, struct cfq_queue, rb_node); | ||
820 | |||
821 | if (cfq_class_idle(cfqq)) { | ||
822 | /* | ||
823 | * if we have idle queues and no rt or be queues had | ||
824 | * pending requests, either allow immediate service if | ||
825 | * the grace period has passed or arm the idle grace | ||
826 | * timer | ||
827 | */ | ||
828 | if (start_idle_class_timer(cfqd)) | ||
829 | cfqq = NULL; | ||
830 | } | ||
831 | |||
832 | return cfqq; | ||
833 | } | 804 | } |
834 | 805 | ||
835 | /* | 806 | /* |
@@ -895,7 +866,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
895 | * task has exited, don't wait | 866 | * task has exited, don't wait |
896 | */ | 867 | */ |
897 | cic = cfqd->active_cic; | 868 | cic = cfqd->active_cic; |
898 | if (!cic || !cic->ioc->task) | 869 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) |
899 | return; | 870 | return; |
900 | 871 | ||
901 | /* | 872 | /* |
@@ -939,7 +910,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) | |||
939 | /* | 910 | /* |
940 | * return expired entry, or NULL to just start from scratch in rbtree | 911 | * return expired entry, or NULL to just start from scratch in rbtree |
941 | */ | 912 | */ |
942 | static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq) | 913 | static struct request *cfq_check_fifo(struct cfq_queue *cfqq) |
943 | { | 914 | { |
944 | struct cfq_data *cfqd = cfqq->cfqd; | 915 | struct cfq_data *cfqd = cfqq->cfqd; |
945 | struct request *rq; | 916 | struct request *rq; |
@@ -1068,7 +1039,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1068 | return dispatched; | 1039 | return dispatched; |
1069 | } | 1040 | } |
1070 | 1041 | ||
1071 | static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | 1042 | static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) |
1072 | { | 1043 | { |
1073 | int dispatched = 0; | 1044 | int dispatched = 0; |
1074 | 1045 | ||
@@ -1087,14 +1058,11 @@ static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | |||
1087 | */ | 1058 | */ |
1088 | static int cfq_forced_dispatch(struct cfq_data *cfqd) | 1059 | static int cfq_forced_dispatch(struct cfq_data *cfqd) |
1089 | { | 1060 | { |
1061 | struct cfq_queue *cfqq; | ||
1090 | int dispatched = 0; | 1062 | int dispatched = 0; |
1091 | struct rb_node *n; | ||
1092 | |||
1093 | while ((n = cfq_rb_first(&cfqd->service_tree)) != NULL) { | ||
1094 | struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node); | ||
1095 | 1063 | ||
1064 | while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL) | ||
1096 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 1065 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
1097 | } | ||
1098 | 1066 | ||
1099 | cfq_slice_expired(cfqd, 0); | 1067 | cfq_slice_expired(cfqd, 0); |
1100 | 1068 | ||
@@ -1170,20 +1138,69 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
1170 | kmem_cache_free(cfq_pool, cfqq); | 1138 | kmem_cache_free(cfq_pool, cfqq); |
1171 | } | 1139 | } |
1172 | 1140 | ||
1173 | static void cfq_free_io_context(struct io_context *ioc) | 1141 | /* |
1142 | * Call func for each cic attached to this ioc. Returns number of cic's seen. | ||
1143 | */ | ||
1144 | #define CIC_GANG_NR 16 | ||
1145 | static unsigned int | ||
1146 | call_for_each_cic(struct io_context *ioc, | ||
1147 | void (*func)(struct io_context *, struct cfq_io_context *)) | ||
1174 | { | 1148 | { |
1175 | struct cfq_io_context *__cic; | 1149 | struct cfq_io_context *cics[CIC_GANG_NR]; |
1176 | struct rb_node *n; | 1150 | unsigned long index = 0; |
1177 | int freed = 0; | 1151 | unsigned int called = 0; |
1152 | int nr; | ||
1178 | 1153 | ||
1179 | ioc->ioc_data = NULL; | 1154 | rcu_read_lock(); |
1180 | 1155 | ||
1181 | while ((n = rb_first(&ioc->cic_root)) != NULL) { | 1156 | do { |
1182 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | 1157 | int i; |
1183 | rb_erase(&__cic->rb_node, &ioc->cic_root); | 1158 | |
1184 | kmem_cache_free(cfq_ioc_pool, __cic); | 1159 | /* |
1185 | freed++; | 1160 | * Perhaps there's a better way - this just gang lookups from |
1186 | } | 1161 | * 0 to the end, restarting after each CIC_GANG_NR from the |
1162 | * last key + 1. | ||
1163 | */ | ||
1164 | nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics, | ||
1165 | index, CIC_GANG_NR); | ||
1166 | if (!nr) | ||
1167 | break; | ||
1168 | |||
1169 | called += nr; | ||
1170 | index = 1 + (unsigned long) cics[nr - 1]->key; | ||
1171 | |||
1172 | for (i = 0; i < nr; i++) | ||
1173 | func(ioc, cics[i]); | ||
1174 | } while (nr == CIC_GANG_NR); | ||
1175 | |||
1176 | rcu_read_unlock(); | ||
1177 | |||
1178 | return called; | ||
1179 | } | ||
1180 | |||
1181 | static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) | ||
1182 | { | ||
1183 | unsigned long flags; | ||
1184 | |||
1185 | BUG_ON(!cic->dead_key); | ||
1186 | |||
1187 | spin_lock_irqsave(&ioc->lock, flags); | ||
1188 | radix_tree_delete(&ioc->radix_root, cic->dead_key); | ||
1189 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
1190 | |||
1191 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1192 | } | ||
1193 | |||
1194 | static void cfq_free_io_context(struct io_context *ioc) | ||
1195 | { | ||
1196 | int freed; | ||
1197 | |||
1198 | /* | ||
1199 | * ioc->refcount is zero here, so no more cic's are allowed to be | ||
1200 | * linked into this ioc. So it should be ok to iterate over the known | ||
1201 | * list, we will see all cic's since no new ones are added. | ||
1202 | */ | ||
1203 | freed = call_for_each_cic(ioc, cic_free_func); | ||
1187 | 1204 | ||
1188 | elv_ioc_count_mod(ioc_count, -freed); | 1205 | elv_ioc_count_mod(ioc_count, -freed); |
1189 | 1206 | ||
@@ -1205,7 +1222,12 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
1205 | struct cfq_io_context *cic) | 1222 | struct cfq_io_context *cic) |
1206 | { | 1223 | { |
1207 | list_del_init(&cic->queue_list); | 1224 | list_del_init(&cic->queue_list); |
1225 | |||
1226 | /* | ||
1227 | * Make sure key == NULL is seen for dead queues | ||
1228 | */ | ||
1208 | smp_wmb(); | 1229 | smp_wmb(); |
1230 | cic->dead_key = (unsigned long) cic->key; | ||
1209 | cic->key = NULL; | 1231 | cic->key = NULL; |
1210 | 1232 | ||
1211 | if (cic->cfqq[ASYNC]) { | 1233 | if (cic->cfqq[ASYNC]) { |
@@ -1219,16 +1241,18 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
1219 | } | 1241 | } |
1220 | } | 1242 | } |
1221 | 1243 | ||
1222 | static void cfq_exit_single_io_context(struct cfq_io_context *cic) | 1244 | static void cfq_exit_single_io_context(struct io_context *ioc, |
1245 | struct cfq_io_context *cic) | ||
1223 | { | 1246 | { |
1224 | struct cfq_data *cfqd = cic->key; | 1247 | struct cfq_data *cfqd = cic->key; |
1225 | 1248 | ||
1226 | if (cfqd) { | 1249 | if (cfqd) { |
1227 | struct request_queue *q = cfqd->queue; | 1250 | struct request_queue *q = cfqd->queue; |
1251 | unsigned long flags; | ||
1228 | 1252 | ||
1229 | spin_lock_irq(q->queue_lock); | 1253 | spin_lock_irqsave(q->queue_lock, flags); |
1230 | __cfq_exit_single_io_context(cfqd, cic); | 1254 | __cfq_exit_single_io_context(cfqd, cic); |
1231 | spin_unlock_irq(q->queue_lock); | 1255 | spin_unlock_irqrestore(q->queue_lock, flags); |
1232 | } | 1256 | } |
1233 | } | 1257 | } |
1234 | 1258 | ||
@@ -1238,21 +1262,8 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic) | |||
1238 | */ | 1262 | */ |
1239 | static void cfq_exit_io_context(struct io_context *ioc) | 1263 | static void cfq_exit_io_context(struct io_context *ioc) |
1240 | { | 1264 | { |
1241 | struct cfq_io_context *__cic; | 1265 | rcu_assign_pointer(ioc->ioc_data, NULL); |
1242 | struct rb_node *n; | 1266 | call_for_each_cic(ioc, cfq_exit_single_io_context); |
1243 | |||
1244 | ioc->ioc_data = NULL; | ||
1245 | |||
1246 | /* | ||
1247 | * put the reference this task is holding to the various queues | ||
1248 | */ | ||
1249 | n = rb_first(&ioc->cic_root); | ||
1250 | while (n != NULL) { | ||
1251 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1252 | |||
1253 | cfq_exit_single_io_context(__cic); | ||
1254 | n = rb_next(n); | ||
1255 | } | ||
1256 | } | 1267 | } |
1257 | 1268 | ||
1258 | static struct cfq_io_context * | 1269 | static struct cfq_io_context * |
@@ -1273,7 +1284,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1273 | return cic; | 1284 | return cic; |
1274 | } | 1285 | } |
1275 | 1286 | ||
1276 | static void cfq_init_prio_data(struct cfq_queue *cfqq) | 1287 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) |
1277 | { | 1288 | { |
1278 | struct task_struct *tsk = current; | 1289 | struct task_struct *tsk = current; |
1279 | int ioprio_class; | 1290 | int ioprio_class; |
@@ -1281,7 +1292,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1281 | if (!cfq_cfqq_prio_changed(cfqq)) | 1292 | if (!cfq_cfqq_prio_changed(cfqq)) |
1282 | return; | 1293 | return; |
1283 | 1294 | ||
1284 | ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); | 1295 | ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); |
1285 | switch (ioprio_class) { | 1296 | switch (ioprio_class) { |
1286 | default: | 1297 | default: |
1287 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); | 1298 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); |
@@ -1293,11 +1304,11 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1293 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1304 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1294 | break; | 1305 | break; |
1295 | case IOPRIO_CLASS_RT: | 1306 | case IOPRIO_CLASS_RT: |
1296 | cfqq->ioprio = task_ioprio(tsk); | 1307 | cfqq->ioprio = task_ioprio(ioc); |
1297 | cfqq->ioprio_class = IOPRIO_CLASS_RT; | 1308 | cfqq->ioprio_class = IOPRIO_CLASS_RT; |
1298 | break; | 1309 | break; |
1299 | case IOPRIO_CLASS_BE: | 1310 | case IOPRIO_CLASS_BE: |
1300 | cfqq->ioprio = task_ioprio(tsk); | 1311 | cfqq->ioprio = task_ioprio(ioc); |
1301 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1312 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1302 | break; | 1313 | break; |
1303 | case IOPRIO_CLASS_IDLE: | 1314 | case IOPRIO_CLASS_IDLE: |
@@ -1316,7 +1327,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1316 | cfq_clear_cfqq_prio_changed(cfqq); | 1327 | cfq_clear_cfqq_prio_changed(cfqq); |
1317 | } | 1328 | } |
1318 | 1329 | ||
1319 | static inline void changed_ioprio(struct cfq_io_context *cic) | 1330 | static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) |
1320 | { | 1331 | { |
1321 | struct cfq_data *cfqd = cic->key; | 1332 | struct cfq_data *cfqd = cic->key; |
1322 | struct cfq_queue *cfqq; | 1333 | struct cfq_queue *cfqq; |
@@ -1330,8 +1341,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1330 | cfqq = cic->cfqq[ASYNC]; | 1341 | cfqq = cic->cfqq[ASYNC]; |
1331 | if (cfqq) { | 1342 | if (cfqq) { |
1332 | struct cfq_queue *new_cfqq; | 1343 | struct cfq_queue *new_cfqq; |
1333 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc->task, | 1344 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); |
1334 | GFP_ATOMIC); | ||
1335 | if (new_cfqq) { | 1345 | if (new_cfqq) { |
1336 | cic->cfqq[ASYNC] = new_cfqq; | 1346 | cic->cfqq[ASYNC] = new_cfqq; |
1337 | cfq_put_queue(cfqq); | 1347 | cfq_put_queue(cfqq); |
@@ -1347,29 +1357,19 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1347 | 1357 | ||
1348 | static void cfq_ioc_set_ioprio(struct io_context *ioc) | 1358 | static void cfq_ioc_set_ioprio(struct io_context *ioc) |
1349 | { | 1359 | { |
1350 | struct cfq_io_context *cic; | 1360 | call_for_each_cic(ioc, changed_ioprio); |
1351 | struct rb_node *n; | ||
1352 | |||
1353 | ioc->ioprio_changed = 0; | 1361 | ioc->ioprio_changed = 0; |
1354 | |||
1355 | n = rb_first(&ioc->cic_root); | ||
1356 | while (n != NULL) { | ||
1357 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1358 | |||
1359 | changed_ioprio(cic); | ||
1360 | n = rb_next(n); | ||
1361 | } | ||
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | static struct cfq_queue * | 1364 | static struct cfq_queue * |
1365 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, | 1365 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, |
1366 | struct task_struct *tsk, gfp_t gfp_mask) | 1366 | struct io_context *ioc, gfp_t gfp_mask) |
1367 | { | 1367 | { |
1368 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 1368 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
1369 | struct cfq_io_context *cic; | 1369 | struct cfq_io_context *cic; |
1370 | 1370 | ||
1371 | retry: | 1371 | retry: |
1372 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 1372 | cic = cfq_cic_lookup(cfqd, ioc); |
1373 | /* cic always exists here */ | 1373 | /* cic always exists here */ |
1374 | cfqq = cic_to_cfqq(cic, is_sync); | 1374 | cfqq = cic_to_cfqq(cic, is_sync); |
1375 | 1375 | ||
@@ -1404,15 +1404,16 @@ retry: | |||
1404 | atomic_set(&cfqq->ref, 0); | 1404 | atomic_set(&cfqq->ref, 0); |
1405 | cfqq->cfqd = cfqd; | 1405 | cfqq->cfqd = cfqd; |
1406 | 1406 | ||
1407 | if (is_sync) { | ||
1408 | cfq_mark_cfqq_idle_window(cfqq); | ||
1409 | cfq_mark_cfqq_sync(cfqq); | ||
1410 | } | ||
1411 | |||
1412 | cfq_mark_cfqq_prio_changed(cfqq); | 1407 | cfq_mark_cfqq_prio_changed(cfqq); |
1413 | cfq_mark_cfqq_queue_new(cfqq); | 1408 | cfq_mark_cfqq_queue_new(cfqq); |
1414 | 1409 | ||
1415 | cfq_init_prio_data(cfqq); | 1410 | cfq_init_prio_data(cfqq, ioc); |
1411 | |||
1412 | if (is_sync) { | ||
1413 | if (!cfq_class_idle(cfqq)) | ||
1414 | cfq_mark_cfqq_idle_window(cfqq); | ||
1415 | cfq_mark_cfqq_sync(cfqq); | ||
1416 | } | ||
1416 | } | 1417 | } |
1417 | 1418 | ||
1418 | if (new_cfqq) | 1419 | if (new_cfqq) |
@@ -1439,11 +1440,11 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) | |||
1439 | } | 1440 | } |
1440 | 1441 | ||
1441 | static struct cfq_queue * | 1442 | static struct cfq_queue * |
1442 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | 1443 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, |
1443 | gfp_t gfp_mask) | 1444 | gfp_t gfp_mask) |
1444 | { | 1445 | { |
1445 | const int ioprio = task_ioprio(tsk); | 1446 | const int ioprio = task_ioprio(ioc); |
1446 | const int ioprio_class = task_ioprio_class(tsk); | 1447 | const int ioprio_class = task_ioprio_class(ioc); |
1447 | struct cfq_queue **async_cfqq = NULL; | 1448 | struct cfq_queue **async_cfqq = NULL; |
1448 | struct cfq_queue *cfqq = NULL; | 1449 | struct cfq_queue *cfqq = NULL; |
1449 | 1450 | ||
@@ -1453,7 +1454,7 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | |||
1453 | } | 1454 | } |
1454 | 1455 | ||
1455 | if (!cfqq) { | 1456 | if (!cfqq) { |
1456 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, tsk, gfp_mask); | 1457 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); |
1457 | if (!cfqq) | 1458 | if (!cfqq) |
1458 | return NULL; | 1459 | return NULL; |
1459 | } | 1460 | } |
@@ -1470,28 +1471,42 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | |||
1470 | return cfqq; | 1471 | return cfqq; |
1471 | } | 1472 | } |
1472 | 1473 | ||
1474 | static void cfq_cic_free(struct cfq_io_context *cic) | ||
1475 | { | ||
1476 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1477 | elv_ioc_count_dec(ioc_count); | ||
1478 | |||
1479 | if (ioc_gone && !elv_ioc_count_read(ioc_count)) | ||
1480 | complete(ioc_gone); | ||
1481 | } | ||
1482 | |||
1473 | /* | 1483 | /* |
1474 | * We drop cfq io contexts lazily, so we may find a dead one. | 1484 | * We drop cfq io contexts lazily, so we may find a dead one. |
1475 | */ | 1485 | */ |
1476 | static void | 1486 | static void |
1477 | cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) | 1487 | cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, |
1488 | struct cfq_io_context *cic) | ||
1478 | { | 1489 | { |
1490 | unsigned long flags; | ||
1491 | |||
1479 | WARN_ON(!list_empty(&cic->queue_list)); | 1492 | WARN_ON(!list_empty(&cic->queue_list)); |
1480 | 1493 | ||
1494 | spin_lock_irqsave(&ioc->lock, flags); | ||
1495 | |||
1481 | if (ioc->ioc_data == cic) | 1496 | if (ioc->ioc_data == cic) |
1482 | ioc->ioc_data = NULL; | 1497 | rcu_assign_pointer(ioc->ioc_data, NULL); |
1483 | 1498 | ||
1484 | rb_erase(&cic->rb_node, &ioc->cic_root); | 1499 | radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); |
1485 | kmem_cache_free(cfq_ioc_pool, cic); | 1500 | spin_unlock_irqrestore(&ioc->lock, flags); |
1486 | elv_ioc_count_dec(ioc_count); | 1501 | |
1502 | cfq_cic_free(cic); | ||
1487 | } | 1503 | } |
1488 | 1504 | ||
1489 | static struct cfq_io_context * | 1505 | static struct cfq_io_context * |
1490 | cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | 1506 | cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) |
1491 | { | 1507 | { |
1492 | struct rb_node *n; | ||
1493 | struct cfq_io_context *cic; | 1508 | struct cfq_io_context *cic; |
1494 | void *k, *key = cfqd; | 1509 | void *k; |
1495 | 1510 | ||
1496 | if (unlikely(!ioc)) | 1511 | if (unlikely(!ioc)) |
1497 | return NULL; | 1512 | return NULL; |
@@ -1499,74 +1514,64 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | |||
1499 | /* | 1514 | /* |
1500 | * we maintain a last-hit cache, to avoid browsing over the tree | 1515 | * we maintain a last-hit cache, to avoid browsing over the tree |
1501 | */ | 1516 | */ |
1502 | cic = ioc->ioc_data; | 1517 | cic = rcu_dereference(ioc->ioc_data); |
1503 | if (cic && cic->key == cfqd) | 1518 | if (cic && cic->key == cfqd) |
1504 | return cic; | 1519 | return cic; |
1505 | 1520 | ||
1506 | restart: | 1521 | do { |
1507 | n = ioc->cic_root.rb_node; | 1522 | rcu_read_lock(); |
1508 | while (n) { | 1523 | cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); |
1509 | cic = rb_entry(n, struct cfq_io_context, rb_node); | 1524 | rcu_read_unlock(); |
1525 | if (!cic) | ||
1526 | break; | ||
1510 | /* ->key must be copied to avoid race with cfq_exit_queue() */ | 1527 | /* ->key must be copied to avoid race with cfq_exit_queue() */ |
1511 | k = cic->key; | 1528 | k = cic->key; |
1512 | if (unlikely(!k)) { | 1529 | if (unlikely(!k)) { |
1513 | cfq_drop_dead_cic(ioc, cic); | 1530 | cfq_drop_dead_cic(cfqd, ioc, cic); |
1514 | goto restart; | 1531 | continue; |
1515 | } | 1532 | } |
1516 | 1533 | ||
1517 | if (key < k) | 1534 | rcu_assign_pointer(ioc->ioc_data, cic); |
1518 | n = n->rb_left; | 1535 | break; |
1519 | else if (key > k) | 1536 | } while (1); |
1520 | n = n->rb_right; | ||
1521 | else { | ||
1522 | ioc->ioc_data = cic; | ||
1523 | return cic; | ||
1524 | } | ||
1525 | } | ||
1526 | 1537 | ||
1527 | return NULL; | 1538 | return cic; |
1528 | } | 1539 | } |
1529 | 1540 | ||
1530 | static inline void | 1541 | /* |
1531 | cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | 1542 | * Add cic into ioc, using cfqd as the search key. This enables us to lookup |
1532 | struct cfq_io_context *cic) | 1543 | * the process specific cfq io context when entered from the block layer. |
1544 | * Also adds the cic to a per-cfqd list, used when this queue is removed. | ||
1545 | */ | ||
1546 | static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | ||
1547 | struct cfq_io_context *cic, gfp_t gfp_mask) | ||
1533 | { | 1548 | { |
1534 | struct rb_node **p; | ||
1535 | struct rb_node *parent; | ||
1536 | struct cfq_io_context *__cic; | ||
1537 | unsigned long flags; | 1549 | unsigned long flags; |
1538 | void *k; | 1550 | int ret; |
1539 | 1551 | ||
1540 | cic->ioc = ioc; | 1552 | ret = radix_tree_preload(gfp_mask); |
1541 | cic->key = cfqd; | 1553 | if (!ret) { |
1554 | cic->ioc = ioc; | ||
1555 | cic->key = cfqd; | ||
1542 | 1556 | ||
1543 | restart: | 1557 | spin_lock_irqsave(&ioc->lock, flags); |
1544 | parent = NULL; | 1558 | ret = radix_tree_insert(&ioc->radix_root, |
1545 | p = &ioc->cic_root.rb_node; | 1559 | (unsigned long) cfqd, cic); |
1546 | while (*p) { | 1560 | spin_unlock_irqrestore(&ioc->lock, flags); |
1547 | parent = *p; | ||
1548 | __cic = rb_entry(parent, struct cfq_io_context, rb_node); | ||
1549 | /* ->key must be copied to avoid race with cfq_exit_queue() */ | ||
1550 | k = __cic->key; | ||
1551 | if (unlikely(!k)) { | ||
1552 | cfq_drop_dead_cic(ioc, __cic); | ||
1553 | goto restart; | ||
1554 | } | ||
1555 | 1561 | ||
1556 | if (cic->key < k) | 1562 | radix_tree_preload_end(); |
1557 | p = &(*p)->rb_left; | 1563 | |
1558 | else if (cic->key > k) | 1564 | if (!ret) { |
1559 | p = &(*p)->rb_right; | 1565 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); |
1560 | else | 1566 | list_add(&cic->queue_list, &cfqd->cic_list); |
1561 | BUG(); | 1567 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
1568 | } | ||
1562 | } | 1569 | } |
1563 | 1570 | ||
1564 | rb_link_node(&cic->rb_node, parent, p); | 1571 | if (ret) |
1565 | rb_insert_color(&cic->rb_node, &ioc->cic_root); | 1572 | printk(KERN_ERR "cfq: cic link failed!\n"); |
1566 | 1573 | ||
1567 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | 1574 | return ret; |
1568 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
1569 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
1570 | } | 1575 | } |
1571 | 1576 | ||
1572 | /* | 1577 | /* |
@@ -1586,7 +1591,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1586 | if (!ioc) | 1591 | if (!ioc) |
1587 | return NULL; | 1592 | return NULL; |
1588 | 1593 | ||
1589 | cic = cfq_cic_rb_lookup(cfqd, ioc); | 1594 | cic = cfq_cic_lookup(cfqd, ioc); |
1590 | if (cic) | 1595 | if (cic) |
1591 | goto out; | 1596 | goto out; |
1592 | 1597 | ||
@@ -1594,13 +1599,17 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1594 | if (cic == NULL) | 1599 | if (cic == NULL) |
1595 | goto err; | 1600 | goto err; |
1596 | 1601 | ||
1597 | cfq_cic_link(cfqd, ioc, cic); | 1602 | if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) |
1603 | goto err_free; | ||
1604 | |||
1598 | out: | 1605 | out: |
1599 | smp_read_barrier_depends(); | 1606 | smp_read_barrier_depends(); |
1600 | if (unlikely(ioc->ioprio_changed)) | 1607 | if (unlikely(ioc->ioprio_changed)) |
1601 | cfq_ioc_set_ioprio(ioc); | 1608 | cfq_ioc_set_ioprio(ioc); |
1602 | 1609 | ||
1603 | return cic; | 1610 | return cic; |
1611 | err_free: | ||
1612 | cfq_cic_free(cic); | ||
1604 | err: | 1613 | err: |
1605 | put_io_context(ioc); | 1614 | put_io_context(ioc); |
1606 | return NULL; | 1615 | return NULL; |
@@ -1655,12 +1664,15 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1655 | { | 1664 | { |
1656 | int enable_idle; | 1665 | int enable_idle; |
1657 | 1666 | ||
1658 | if (!cfq_cfqq_sync(cfqq)) | 1667 | /* |
1668 | * Don't idle for async or idle io prio class | ||
1669 | */ | ||
1670 | if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) | ||
1659 | return; | 1671 | return; |
1660 | 1672 | ||
1661 | enable_idle = cfq_cfqq_idle_window(cfqq); | 1673 | enable_idle = cfq_cfqq_idle_window(cfqq); |
1662 | 1674 | ||
1663 | if (!cic->ioc->task || !cfqd->cfq_slice_idle || | 1675 | if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || |
1664 | (cfqd->hw_tag && CIC_SEEKY(cic))) | 1676 | (cfqd->hw_tag && CIC_SEEKY(cic))) |
1665 | enable_idle = 0; | 1677 | enable_idle = 0; |
1666 | else if (sample_valid(cic->ttime_samples)) { | 1678 | else if (sample_valid(cic->ttime_samples)) { |
@@ -1793,7 +1805,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
1793 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1805 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1794 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1806 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1795 | 1807 | ||
1796 | cfq_init_prio_data(cfqq); | 1808 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); |
1797 | 1809 | ||
1798 | cfq_add_rq_rb(rq); | 1810 | cfq_add_rq_rb(rq); |
1799 | 1811 | ||
@@ -1834,7 +1846,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
1834 | cfq_set_prio_slice(cfqd, cfqq); | 1846 | cfq_set_prio_slice(cfqd, cfqq); |
1835 | cfq_clear_cfqq_slice_new(cfqq); | 1847 | cfq_clear_cfqq_slice_new(cfqq); |
1836 | } | 1848 | } |
1837 | if (cfq_slice_used(cfqq)) | 1849 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) |
1838 | cfq_slice_expired(cfqd, 1); | 1850 | cfq_slice_expired(cfqd, 1); |
1839 | else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) | 1851 | else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) |
1840 | cfq_arm_slice_timer(cfqd); | 1852 | cfq_arm_slice_timer(cfqd); |
@@ -1894,13 +1906,13 @@ static int cfq_may_queue(struct request_queue *q, int rw) | |||
1894 | * so just lookup a possibly existing queue, or return 'may queue' | 1906 | * so just lookup a possibly existing queue, or return 'may queue' |
1895 | * if that fails | 1907 | * if that fails |
1896 | */ | 1908 | */ |
1897 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 1909 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
1898 | if (!cic) | 1910 | if (!cic) |
1899 | return ELV_MQUEUE_MAY; | 1911 | return ELV_MQUEUE_MAY; |
1900 | 1912 | ||
1901 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); | 1913 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); |
1902 | if (cfqq) { | 1914 | if (cfqq) { |
1903 | cfq_init_prio_data(cfqq); | 1915 | cfq_init_prio_data(cfqq, cic->ioc); |
1904 | cfq_prio_boost(cfqq); | 1916 | cfq_prio_boost(cfqq); |
1905 | 1917 | ||
1906 | return __cfq_may_queue(cfqq); | 1918 | return __cfq_may_queue(cfqq); |
@@ -1938,7 +1950,6 @@ static int | |||
1938 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | 1950 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
1939 | { | 1951 | { |
1940 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1952 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1941 | struct task_struct *tsk = current; | ||
1942 | struct cfq_io_context *cic; | 1953 | struct cfq_io_context *cic; |
1943 | const int rw = rq_data_dir(rq); | 1954 | const int rw = rq_data_dir(rq); |
1944 | const int is_sync = rq_is_sync(rq); | 1955 | const int is_sync = rq_is_sync(rq); |
@@ -1956,7 +1967,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
1956 | 1967 | ||
1957 | cfqq = cic_to_cfqq(cic, is_sync); | 1968 | cfqq = cic_to_cfqq(cic, is_sync); |
1958 | if (!cfqq) { | 1969 | if (!cfqq) { |
1959 | cfqq = cfq_get_queue(cfqd, is_sync, tsk, gfp_mask); | 1970 | cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); |
1960 | 1971 | ||
1961 | if (!cfqq) | 1972 | if (!cfqq) |
1962 | goto queue_fail; | 1973 | goto queue_fail; |
@@ -2039,29 +2050,9 @@ out_cont: | |||
2039 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | 2050 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
2040 | } | 2051 | } |
2041 | 2052 | ||
2042 | /* | ||
2043 | * Timer running if an idle class queue is waiting for service | ||
2044 | */ | ||
2045 | static void cfq_idle_class_timer(unsigned long data) | ||
2046 | { | ||
2047 | struct cfq_data *cfqd = (struct cfq_data *) data; | ||
2048 | unsigned long flags; | ||
2049 | |||
2050 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
2051 | |||
2052 | /* | ||
2053 | * race with a non-idle queue, reset timer | ||
2054 | */ | ||
2055 | if (!start_idle_class_timer(cfqd)) | ||
2056 | cfq_schedule_dispatch(cfqd); | ||
2057 | |||
2058 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
2059 | } | ||
2060 | |||
2061 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) | 2053 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) |
2062 | { | 2054 | { |
2063 | del_timer_sync(&cfqd->idle_slice_timer); | 2055 | del_timer_sync(&cfqd->idle_slice_timer); |
2064 | del_timer_sync(&cfqd->idle_class_timer); | ||
2065 | kblockd_flush_work(&cfqd->unplug_work); | 2056 | kblockd_flush_work(&cfqd->unplug_work); |
2066 | } | 2057 | } |
2067 | 2058 | ||
@@ -2126,10 +2117,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
2126 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; | 2117 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; |
2127 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; | 2118 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; |
2128 | 2119 | ||
2129 | init_timer(&cfqd->idle_class_timer); | ||
2130 | cfqd->idle_class_timer.function = cfq_idle_class_timer; | ||
2131 | cfqd->idle_class_timer.data = (unsigned long) cfqd; | ||
2132 | |||
2133 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); | 2120 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); |
2134 | 2121 | ||
2135 | cfqd->last_end_request = jiffies; | 2122 | cfqd->last_end_request = jiffies; |
@@ -2160,7 +2147,7 @@ static int __init cfq_slab_setup(void) | |||
2160 | if (!cfq_pool) | 2147 | if (!cfq_pool) |
2161 | goto fail; | 2148 | goto fail; |
2162 | 2149 | ||
2163 | cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); | 2150 | cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU); |
2164 | if (!cfq_ioc_pool) | 2151 | if (!cfq_ioc_pool) |
2165 | goto fail; | 2152 | goto fail; |
2166 | 2153 | ||
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 768987dc2697..1932a56f5e4b 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -347,7 +347,6 @@ unsigned blk_ordered_req_seq(struct request *rq) | |||
347 | void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | 347 | void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) |
348 | { | 348 | { |
349 | struct request *rq; | 349 | struct request *rq; |
350 | int uptodate; | ||
351 | 350 | ||
352 | if (error && !q->orderr) | 351 | if (error && !q->orderr) |
353 | q->orderr = error; | 352 | q->orderr = error; |
@@ -361,15 +360,11 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | |||
361 | /* | 360 | /* |
362 | * Okay, sequence complete. | 361 | * Okay, sequence complete. |
363 | */ | 362 | */ |
364 | uptodate = 1; | ||
365 | if (q->orderr) | ||
366 | uptodate = q->orderr; | ||
367 | |||
368 | q->ordseq = 0; | 363 | q->ordseq = 0; |
369 | rq = q->orig_bar_rq; | 364 | rq = q->orig_bar_rq; |
370 | 365 | ||
371 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); | 366 | if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) |
372 | end_that_request_last(rq, uptodate); | 367 | BUG(); |
373 | } | 368 | } |
374 | 369 | ||
375 | static void pre_flush_end_io(struct request *rq, int error) | 370 | static void pre_flush_end_io(struct request *rq, int error) |
@@ -486,9 +481,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp) | |||
486 | * ORDERED_NONE while this request is on it. | 481 | * ORDERED_NONE while this request is on it. |
487 | */ | 482 | */ |
488 | blkdev_dequeue_request(rq); | 483 | blkdev_dequeue_request(rq); |
489 | end_that_request_first(rq, -EOPNOTSUPP, | 484 | if (__blk_end_request(rq, -EOPNOTSUPP, |
490 | rq->hard_nr_sectors); | 485 | blk_rq_bytes(rq))) |
491 | end_that_request_last(rq, -EOPNOTSUPP); | 486 | BUG(); |
492 | *rqp = NULL; | 487 | *rqp = NULL; |
493 | return 0; | 488 | return 0; |
494 | } | 489 | } |
@@ -3486,29 +3481,36 @@ static void blk_recalc_rq_sectors(struct request *rq, int nsect) | |||
3486 | } | 3481 | } |
3487 | } | 3482 | } |
3488 | 3483 | ||
3489 | static int __end_that_request_first(struct request *req, int uptodate, | 3484 | /** |
3485 | * __end_that_request_first - end I/O on a request | ||
3486 | * @req: the request being processed | ||
3487 | * @error: 0 for success, < 0 for error | ||
3488 | * @nr_bytes: number of bytes to complete | ||
3489 | * | ||
3490 | * Description: | ||
3491 | * Ends I/O on a number of bytes attached to @req, and sets it up | ||
3492 | * for the next range of segments (if any) in the cluster. | ||
3493 | * | ||
3494 | * Return: | ||
3495 | * 0 - we are done with this request, call end_that_request_last() | ||
3496 | * 1 - still buffers pending for this request | ||
3497 | **/ | ||
3498 | static int __end_that_request_first(struct request *req, int error, | ||
3490 | int nr_bytes) | 3499 | int nr_bytes) |
3491 | { | 3500 | { |
3492 | int total_bytes, bio_nbytes, error, next_idx = 0; | 3501 | int total_bytes, bio_nbytes, next_idx = 0; |
3493 | struct bio *bio; | 3502 | struct bio *bio; |
3494 | 3503 | ||
3495 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); | 3504 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); |
3496 | 3505 | ||
3497 | /* | 3506 | /* |
3498 | * extend uptodate bool to allow < 0 value to be direct io error | ||
3499 | */ | ||
3500 | error = 0; | ||
3501 | if (end_io_error(uptodate)) | ||
3502 | error = !uptodate ? -EIO : uptodate; | ||
3503 | |||
3504 | /* | ||
3505 | * for a REQ_BLOCK_PC request, we want to carry any eventual | 3507 | * for a REQ_BLOCK_PC request, we want to carry any eventual |
3506 | * sense key with us all the way through | 3508 | * sense key with us all the way through |
3507 | */ | 3509 | */ |
3508 | if (!blk_pc_request(req)) | 3510 | if (!blk_pc_request(req)) |
3509 | req->errors = 0; | 3511 | req->errors = 0; |
3510 | 3512 | ||
3511 | if (!uptodate) { | 3513 | if (error) { |
3512 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) | 3514 | if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) |
3513 | printk("end_request: I/O error, dev %s, sector %llu\n", | 3515 | printk("end_request: I/O error, dev %s, sector %llu\n", |
3514 | req->rq_disk ? req->rq_disk->disk_name : "?", | 3516 | req->rq_disk ? req->rq_disk->disk_name : "?", |
@@ -3602,49 +3604,6 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3602 | return 1; | 3604 | return 1; |
3603 | } | 3605 | } |
3604 | 3606 | ||
3605 | /** | ||
3606 | * end_that_request_first - end I/O on a request | ||
3607 | * @req: the request being processed | ||
3608 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | ||
3609 | * @nr_sectors: number of sectors to end I/O on | ||
3610 | * | ||
3611 | * Description: | ||
3612 | * Ends I/O on a number of sectors attached to @req, and sets it up | ||
3613 | * for the next range of segments (if any) in the cluster. | ||
3614 | * | ||
3615 | * Return: | ||
3616 | * 0 - we are done with this request, call end_that_request_last() | ||
3617 | * 1 - still buffers pending for this request | ||
3618 | **/ | ||
3619 | int end_that_request_first(struct request *req, int uptodate, int nr_sectors) | ||
3620 | { | ||
3621 | return __end_that_request_first(req, uptodate, nr_sectors << 9); | ||
3622 | } | ||
3623 | |||
3624 | EXPORT_SYMBOL(end_that_request_first); | ||
3625 | |||
3626 | /** | ||
3627 | * end_that_request_chunk - end I/O on a request | ||
3628 | * @req: the request being processed | ||
3629 | * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error | ||
3630 | * @nr_bytes: number of bytes to complete | ||
3631 | * | ||
3632 | * Description: | ||
3633 | * Ends I/O on a number of bytes attached to @req, and sets it up | ||
3634 | * for the next range of segments (if any). Like end_that_request_first(), | ||
3635 | * but deals with bytes instead of sectors. | ||
3636 | * | ||
3637 | * Return: | ||
3638 | * 0 - we are done with this request, call end_that_request_last() | ||
3639 | * 1 - still buffers pending for this request | ||
3640 | **/ | ||
3641 | int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) | ||
3642 | { | ||
3643 | return __end_that_request_first(req, uptodate, nr_bytes); | ||
3644 | } | ||
3645 | |||
3646 | EXPORT_SYMBOL(end_that_request_chunk); | ||
3647 | |||
3648 | /* | 3607 | /* |
3649 | * splice the completion data to a local structure and hand off to | 3608 | * splice the completion data to a local structure and hand off to |
3650 | * process_completion_queue() to complete the requests | 3609 | * process_completion_queue() to complete the requests |
@@ -3724,17 +3683,15 @@ EXPORT_SYMBOL(blk_complete_request); | |||
3724 | /* | 3683 | /* |
3725 | * queue lock must be held | 3684 | * queue lock must be held |
3726 | */ | 3685 | */ |
3727 | void end_that_request_last(struct request *req, int uptodate) | 3686 | static void end_that_request_last(struct request *req, int error) |
3728 | { | 3687 | { |
3729 | struct gendisk *disk = req->rq_disk; | 3688 | struct gendisk *disk = req->rq_disk; |
3730 | int error; | ||
3731 | 3689 | ||
3732 | /* | 3690 | if (blk_rq_tagged(req)) |
3733 | * extend uptodate bool to allow < 0 value to be direct io error | 3691 | blk_queue_end_tag(req->q, req); |
3734 | */ | 3692 | |
3735 | error = 0; | 3693 | if (blk_queued_rq(req)) |
3736 | if (end_io_error(uptodate)) | 3694 | blkdev_dequeue_request(req); |
3737 | error = !uptodate ? -EIO : uptodate; | ||
3738 | 3695 | ||
3739 | if (unlikely(laptop_mode) && blk_fs_request(req)) | 3696 | if (unlikely(laptop_mode) && blk_fs_request(req)) |
3740 | laptop_io_completion(); | 3697 | laptop_io_completion(); |
@@ -3753,32 +3710,54 @@ void end_that_request_last(struct request *req, int uptodate) | |||
3753 | disk_round_stats(disk); | 3710 | disk_round_stats(disk); |
3754 | disk->in_flight--; | 3711 | disk->in_flight--; |
3755 | } | 3712 | } |
3713 | |||
3756 | if (req->end_io) | 3714 | if (req->end_io) |
3757 | req->end_io(req, error); | 3715 | req->end_io(req, error); |
3758 | else | 3716 | else { |
3717 | if (blk_bidi_rq(req)) | ||
3718 | __blk_put_request(req->next_rq->q, req->next_rq); | ||
3719 | |||
3759 | __blk_put_request(req->q, req); | 3720 | __blk_put_request(req->q, req); |
3721 | } | ||
3760 | } | 3722 | } |
3761 | 3723 | ||
3762 | EXPORT_SYMBOL(end_that_request_last); | ||
3763 | |||
3764 | static inline void __end_request(struct request *rq, int uptodate, | 3724 | static inline void __end_request(struct request *rq, int uptodate, |
3765 | unsigned int nr_bytes, int dequeue) | 3725 | unsigned int nr_bytes) |
3766 | { | 3726 | { |
3767 | if (!end_that_request_chunk(rq, uptodate, nr_bytes)) { | 3727 | int error = 0; |
3768 | if (dequeue) | 3728 | |
3769 | blkdev_dequeue_request(rq); | 3729 | if (uptodate <= 0) |
3770 | add_disk_randomness(rq->rq_disk); | 3730 | error = uptodate ? uptodate : -EIO; |
3771 | end_that_request_last(rq, uptodate); | 3731 | |
3772 | } | 3732 | __blk_end_request(rq, error, nr_bytes); |
3773 | } | 3733 | } |
3774 | 3734 | ||
3775 | static unsigned int rq_byte_size(struct request *rq) | 3735 | /** |
3736 | * blk_rq_bytes - Returns bytes left to complete in the entire request | ||
3737 | **/ | ||
3738 | unsigned int blk_rq_bytes(struct request *rq) | ||
3776 | { | 3739 | { |
3777 | if (blk_fs_request(rq)) | 3740 | if (blk_fs_request(rq)) |
3778 | return rq->hard_nr_sectors << 9; | 3741 | return rq->hard_nr_sectors << 9; |
3779 | 3742 | ||
3780 | return rq->data_len; | 3743 | return rq->data_len; |
3781 | } | 3744 | } |
3745 | EXPORT_SYMBOL_GPL(blk_rq_bytes); | ||
3746 | |||
3747 | /** | ||
3748 | * blk_rq_cur_bytes - Returns bytes left to complete in the current segment | ||
3749 | **/ | ||
3750 | unsigned int blk_rq_cur_bytes(struct request *rq) | ||
3751 | { | ||
3752 | if (blk_fs_request(rq)) | ||
3753 | return rq->current_nr_sectors << 9; | ||
3754 | |||
3755 | if (rq->bio) | ||
3756 | return rq->bio->bi_size; | ||
3757 | |||
3758 | return rq->data_len; | ||
3759 | } | ||
3760 | EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); | ||
3782 | 3761 | ||
3783 | /** | 3762 | /** |
3784 | * end_queued_request - end all I/O on a queued request | 3763 | * end_queued_request - end all I/O on a queued request |
@@ -3793,7 +3772,7 @@ static unsigned int rq_byte_size(struct request *rq) | |||
3793 | **/ | 3772 | **/ |
3794 | void end_queued_request(struct request *rq, int uptodate) | 3773 | void end_queued_request(struct request *rq, int uptodate) |
3795 | { | 3774 | { |
3796 | __end_request(rq, uptodate, rq_byte_size(rq), 1); | 3775 | __end_request(rq, uptodate, blk_rq_bytes(rq)); |
3797 | } | 3776 | } |
3798 | EXPORT_SYMBOL(end_queued_request); | 3777 | EXPORT_SYMBOL(end_queued_request); |
3799 | 3778 | ||
@@ -3810,7 +3789,7 @@ EXPORT_SYMBOL(end_queued_request); | |||
3810 | **/ | 3789 | **/ |
3811 | void end_dequeued_request(struct request *rq, int uptodate) | 3790 | void end_dequeued_request(struct request *rq, int uptodate) |
3812 | { | 3791 | { |
3813 | __end_request(rq, uptodate, rq_byte_size(rq), 0); | 3792 | __end_request(rq, uptodate, blk_rq_bytes(rq)); |
3814 | } | 3793 | } |
3815 | EXPORT_SYMBOL(end_dequeued_request); | 3794 | EXPORT_SYMBOL(end_dequeued_request); |
3816 | 3795 | ||
@@ -3836,10 +3815,159 @@ EXPORT_SYMBOL(end_dequeued_request); | |||
3836 | **/ | 3815 | **/ |
3837 | void end_request(struct request *req, int uptodate) | 3816 | void end_request(struct request *req, int uptodate) |
3838 | { | 3817 | { |
3839 | __end_request(req, uptodate, req->hard_cur_sectors << 9, 1); | 3818 | __end_request(req, uptodate, req->hard_cur_sectors << 9); |
3840 | } | 3819 | } |
3841 | EXPORT_SYMBOL(end_request); | 3820 | EXPORT_SYMBOL(end_request); |
3842 | 3821 | ||
3822 | /** | ||
3823 | * blk_end_io - Generic end_io function to complete a request. | ||
3824 | * @rq: the request being processed | ||
3825 | * @error: 0 for success, < 0 for error | ||
3826 | * @nr_bytes: number of bytes to complete @rq | ||
3827 | * @bidi_bytes: number of bytes to complete @rq->next_rq | ||
3828 | * @drv_callback: function called between completion of bios in the request | ||
3829 | * and completion of the request. | ||
3830 | * If the callback returns non 0, this helper returns without | ||
3831 | * completion of the request. | ||
3832 | * | ||
3833 | * Description: | ||
3834 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. | ||
3835 | * If @rq has leftover, sets it up for the next range of segments. | ||
3836 | * | ||
3837 | * Return: | ||
3838 | * 0 - we are done with this request | ||
3839 | * 1 - this request is not freed yet, it still has pending buffers. | ||
3840 | **/ | ||
3841 | static int blk_end_io(struct request *rq, int error, int nr_bytes, | ||
3842 | int bidi_bytes, int (drv_callback)(struct request *)) | ||
3843 | { | ||
3844 | struct request_queue *q = rq->q; | ||
3845 | unsigned long flags = 0UL; | ||
3846 | |||
3847 | if (blk_fs_request(rq) || blk_pc_request(rq)) { | ||
3848 | if (__end_that_request_first(rq, error, nr_bytes)) | ||
3849 | return 1; | ||
3850 | |||
3851 | /* Bidi request must be completed as a whole */ | ||
3852 | if (blk_bidi_rq(rq) && | ||
3853 | __end_that_request_first(rq->next_rq, error, bidi_bytes)) | ||
3854 | return 1; | ||
3855 | } | ||
3856 | |||
3857 | /* Special feature for tricky drivers */ | ||
3858 | if (drv_callback && drv_callback(rq)) | ||
3859 | return 1; | ||
3860 | |||
3861 | add_disk_randomness(rq->rq_disk); | ||
3862 | |||
3863 | spin_lock_irqsave(q->queue_lock, flags); | ||
3864 | end_that_request_last(rq, error); | ||
3865 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
3866 | |||
3867 | return 0; | ||
3868 | } | ||
3869 | |||
3870 | /** | ||
3871 | * blk_end_request - Helper function for drivers to complete the request. | ||
3872 | * @rq: the request being processed | ||
3873 | * @error: 0 for success, < 0 for error | ||
3874 | * @nr_bytes: number of bytes to complete | ||
3875 | * | ||
3876 | * Description: | ||
3877 | * Ends I/O on a number of bytes attached to @rq. | ||
3878 | * If @rq has leftover, sets it up for the next range of segments. | ||
3879 | * | ||
3880 | * Return: | ||
3881 | * 0 - we are done with this request | ||
3882 | * 1 - still buffers pending for this request | ||
3883 | **/ | ||
3884 | int blk_end_request(struct request *rq, int error, int nr_bytes) | ||
3885 | { | ||
3886 | return blk_end_io(rq, error, nr_bytes, 0, NULL); | ||
3887 | } | ||
3888 | EXPORT_SYMBOL_GPL(blk_end_request); | ||
3889 | |||
3890 | /** | ||
3891 | * __blk_end_request - Helper function for drivers to complete the request. | ||
3892 | * @rq: the request being processed | ||
3893 | * @error: 0 for success, < 0 for error | ||
3894 | * @nr_bytes: number of bytes to complete | ||
3895 | * | ||
3896 | * Description: | ||
3897 | * Must be called with queue lock held unlike blk_end_request(). | ||
3898 | * | ||
3899 | * Return: | ||
3900 | * 0 - we are done with this request | ||
3901 | * 1 - still buffers pending for this request | ||
3902 | **/ | ||
3903 | int __blk_end_request(struct request *rq, int error, int nr_bytes) | ||
3904 | { | ||
3905 | if (blk_fs_request(rq) || blk_pc_request(rq)) { | ||
3906 | if (__end_that_request_first(rq, error, nr_bytes)) | ||
3907 | return 1; | ||
3908 | } | ||
3909 | |||
3910 | add_disk_randomness(rq->rq_disk); | ||
3911 | |||
3912 | end_that_request_last(rq, error); | ||
3913 | |||
3914 | return 0; | ||
3915 | } | ||
3916 | EXPORT_SYMBOL_GPL(__blk_end_request); | ||
3917 | |||
3918 | /** | ||
3919 | * blk_end_bidi_request - Helper function for drivers to complete bidi request. | ||
3920 | * @rq: the bidi request being processed | ||
3921 | * @error: 0 for success, < 0 for error | ||
3922 | * @nr_bytes: number of bytes to complete @rq | ||
3923 | * @bidi_bytes: number of bytes to complete @rq->next_rq | ||
3924 | * | ||
3925 | * Description: | ||
3926 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. | ||
3927 | * | ||
3928 | * Return: | ||
3929 | * 0 - we are done with this request | ||
3930 | * 1 - still buffers pending for this request | ||
3931 | **/ | ||
3932 | int blk_end_bidi_request(struct request *rq, int error, int nr_bytes, | ||
3933 | int bidi_bytes) | ||
3934 | { | ||
3935 | return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL); | ||
3936 | } | ||
3937 | EXPORT_SYMBOL_GPL(blk_end_bidi_request); | ||
3938 | |||
3939 | /** | ||
3940 | * blk_end_request_callback - Special helper function for tricky drivers | ||
3941 | * @rq: the request being processed | ||
3942 | * @error: 0 for success, < 0 for error | ||
3943 | * @nr_bytes: number of bytes to complete | ||
3944 | * @drv_callback: function called between completion of bios in the request | ||
3945 | * and completion of the request. | ||
3946 | * If the callback returns non 0, this helper returns without | ||
3947 | * completion of the request. | ||
3948 | * | ||
3949 | * Description: | ||
3950 | * Ends I/O on a number of bytes attached to @rq. | ||
3951 | * If @rq has leftover, sets it up for the next range of segments. | ||
3952 | * | ||
3953 | * This special helper function is used only for existing tricky drivers. | ||
3954 | * (e.g. cdrom_newpc_intr() of ide-cd) | ||
3955 | * This interface will be removed when such drivers are rewritten. | ||
3956 | * Don't use this interface in other places anymore. | ||
3957 | * | ||
3958 | * Return: | ||
3959 | * 0 - we are done with this request | ||
3960 | * 1 - this request is not freed yet. | ||
3961 | * this request still has pending buffers or | ||
3962 | * the driver doesn't want to finish this request yet. | ||
3963 | **/ | ||
3964 | int blk_end_request_callback(struct request *rq, int error, int nr_bytes, | ||
3965 | int (drv_callback)(struct request *)) | ||
3966 | { | ||
3967 | return blk_end_io(rq, error, nr_bytes, 0, drv_callback); | ||
3968 | } | ||
3969 | EXPORT_SYMBOL_GPL(blk_end_request_callback); | ||
3970 | |||
3843 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 3971 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
3844 | struct bio *bio) | 3972 | struct bio *bio) |
3845 | { | 3973 | { |
@@ -3902,55 +4030,100 @@ int __init blk_dev_init(void) | |||
3902 | return 0; | 4030 | return 0; |
3903 | } | 4031 | } |
3904 | 4032 | ||
4033 | static void cfq_dtor(struct io_context *ioc) | ||
4034 | { | ||
4035 | struct cfq_io_context *cic[1]; | ||
4036 | int r; | ||
4037 | |||
4038 | /* | ||
4039 | * We don't have a specific key to lookup with, so use the gang | ||
4040 | * lookup to just retrieve the first item stored. The cfq exit | ||
4041 | * function will iterate the full tree, so any member will do. | ||
4042 | */ | ||
4043 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
4044 | if (r > 0) | ||
4045 | cic[0]->dtor(ioc); | ||
4046 | } | ||
4047 | |||
3905 | /* | 4048 | /* |
3906 | * IO Context helper functions | 4049 | * IO Context helper functions. put_io_context() returns 1 if there are no |
4050 | * more users of this io context, 0 otherwise. | ||
3907 | */ | 4051 | */ |
3908 | void put_io_context(struct io_context *ioc) | 4052 | int put_io_context(struct io_context *ioc) |
3909 | { | 4053 | { |
3910 | if (ioc == NULL) | 4054 | if (ioc == NULL) |
3911 | return; | 4055 | return 1; |
3912 | 4056 | ||
3913 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 4057 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3914 | 4058 | ||
3915 | if (atomic_dec_and_test(&ioc->refcount)) { | 4059 | if (atomic_dec_and_test(&ioc->refcount)) { |
3916 | struct cfq_io_context *cic; | ||
3917 | |||
3918 | rcu_read_lock(); | 4060 | rcu_read_lock(); |
3919 | if (ioc->aic && ioc->aic->dtor) | 4061 | if (ioc->aic && ioc->aic->dtor) |
3920 | ioc->aic->dtor(ioc->aic); | 4062 | ioc->aic->dtor(ioc->aic); |
3921 | if (ioc->cic_root.rb_node != NULL) { | ||
3922 | struct rb_node *n = rb_first(&ioc->cic_root); | ||
3923 | |||
3924 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
3925 | cic->dtor(ioc); | ||
3926 | } | ||
3927 | rcu_read_unlock(); | 4063 | rcu_read_unlock(); |
4064 | cfq_dtor(ioc); | ||
3928 | 4065 | ||
3929 | kmem_cache_free(iocontext_cachep, ioc); | 4066 | kmem_cache_free(iocontext_cachep, ioc); |
4067 | return 1; | ||
3930 | } | 4068 | } |
4069 | return 0; | ||
3931 | } | 4070 | } |
3932 | EXPORT_SYMBOL(put_io_context); | 4071 | EXPORT_SYMBOL(put_io_context); |
3933 | 4072 | ||
4073 | static void cfq_exit(struct io_context *ioc) | ||
4074 | { | ||
4075 | struct cfq_io_context *cic[1]; | ||
4076 | int r; | ||
4077 | |||
4078 | rcu_read_lock(); | ||
4079 | /* | ||
4080 | * See comment for cfq_dtor() | ||
4081 | */ | ||
4082 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
4083 | rcu_read_unlock(); | ||
4084 | |||
4085 | if (r > 0) | ||
4086 | cic[0]->exit(ioc); | ||
4087 | } | ||
4088 | |||
3934 | /* Called by the exitting task */ | 4089 | /* Called by the exitting task */ |
3935 | void exit_io_context(void) | 4090 | void exit_io_context(void) |
3936 | { | 4091 | { |
3937 | struct io_context *ioc; | 4092 | struct io_context *ioc; |
3938 | struct cfq_io_context *cic; | ||
3939 | 4093 | ||
3940 | task_lock(current); | 4094 | task_lock(current); |
3941 | ioc = current->io_context; | 4095 | ioc = current->io_context; |
3942 | current->io_context = NULL; | 4096 | current->io_context = NULL; |
3943 | task_unlock(current); | 4097 | task_unlock(current); |
3944 | 4098 | ||
3945 | ioc->task = NULL; | 4099 | if (atomic_dec_and_test(&ioc->nr_tasks)) { |
3946 | if (ioc->aic && ioc->aic->exit) | 4100 | if (ioc->aic && ioc->aic->exit) |
3947 | ioc->aic->exit(ioc->aic); | 4101 | ioc->aic->exit(ioc->aic); |
3948 | if (ioc->cic_root.rb_node != NULL) { | 4102 | cfq_exit(ioc); |
3949 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); | 4103 | |
3950 | cic->exit(ioc); | 4104 | put_io_context(ioc); |
4105 | } | ||
4106 | } | ||
4107 | |||
4108 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | ||
4109 | { | ||
4110 | struct io_context *ret; | ||
4111 | |||
4112 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | ||
4113 | if (ret) { | ||
4114 | atomic_set(&ret->refcount, 1); | ||
4115 | atomic_set(&ret->nr_tasks, 1); | ||
4116 | spin_lock_init(&ret->lock); | ||
4117 | ret->ioprio_changed = 0; | ||
4118 | ret->ioprio = 0; | ||
4119 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
4120 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
4121 | ret->aic = NULL; | ||
4122 | INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); | ||
4123 | ret->ioc_data = NULL; | ||
3951 | } | 4124 | } |
3952 | 4125 | ||
3953 | put_io_context(ioc); | 4126 | return ret; |
3954 | } | 4127 | } |
3955 | 4128 | ||
3956 | /* | 4129 | /* |
@@ -3970,16 +4143,8 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
3970 | if (likely(ret)) | 4143 | if (likely(ret)) |
3971 | return ret; | 4144 | return ret; |
3972 | 4145 | ||
3973 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | 4146 | ret = alloc_io_context(gfp_flags, node); |
3974 | if (ret) { | 4147 | if (ret) { |
3975 | atomic_set(&ret->refcount, 1); | ||
3976 | ret->task = current; | ||
3977 | ret->ioprio_changed = 0; | ||
3978 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
3979 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
3980 | ret->aic = NULL; | ||
3981 | ret->cic_root.rb_node = NULL; | ||
3982 | ret->ioc_data = NULL; | ||
3983 | /* make sure set_task_ioprio() sees the settings above */ | 4148 | /* make sure set_task_ioprio() sees the settings above */ |
3984 | smp_wmb(); | 4149 | smp_wmb(); |
3985 | tsk->io_context = ret; | 4150 | tsk->io_context = ret; |
@@ -3996,10 +4161,18 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
3996 | */ | 4161 | */ |
3997 | struct io_context *get_io_context(gfp_t gfp_flags, int node) | 4162 | struct io_context *get_io_context(gfp_t gfp_flags, int node) |
3998 | { | 4163 | { |
3999 | struct io_context *ret; | 4164 | struct io_context *ret = NULL; |
4000 | ret = current_io_context(gfp_flags, node); | 4165 | |
4001 | if (likely(ret)) | 4166 | /* |
4002 | atomic_inc(&ret->refcount); | 4167 | * Check for unlikely race with exiting task. ioc ref count is |
4168 | * zero when ioc is being detached. | ||
4169 | */ | ||
4170 | do { | ||
4171 | ret = current_io_context(gfp_flags, node); | ||
4172 | if (unlikely(!ret)) | ||
4173 | break; | ||
4174 | } while (!atomic_inc_not_zero(&ret->refcount)); | ||
4175 | |||
4003 | return ret; | 4176 | return ret; |
4004 | } | 4177 | } |
4005 | EXPORT_SYMBOL(get_io_context); | 4178 | EXPORT_SYMBOL(get_io_context); |