diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/as-iosched.c | 34 | ||||
-rw-r--r-- | block/cfq-iosched.c | 425 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 109 |
3 files changed, 311 insertions, 257 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c index cb5e53b05c7c..b201d16a7102 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -170,9 +170,11 @@ static void free_as_io_context(struct as_io_context *aic) | |||
170 | 170 | ||
171 | static void as_trim(struct io_context *ioc) | 171 | static void as_trim(struct io_context *ioc) |
172 | { | 172 | { |
173 | spin_lock(&ioc->lock); | ||
173 | if (ioc->aic) | 174 | if (ioc->aic) |
174 | free_as_io_context(ioc->aic); | 175 | free_as_io_context(ioc->aic); |
175 | ioc->aic = NULL; | 176 | ioc->aic = NULL; |
177 | spin_unlock(&ioc->lock); | ||
176 | } | 178 | } |
177 | 179 | ||
178 | /* Called when the task exits */ | 180 | /* Called when the task exits */ |
@@ -462,7 +464,9 @@ static void as_antic_timeout(unsigned long data) | |||
462 | spin_lock_irqsave(q->queue_lock, flags); | 464 | spin_lock_irqsave(q->queue_lock, flags); |
463 | if (ad->antic_status == ANTIC_WAIT_REQ | 465 | if (ad->antic_status == ANTIC_WAIT_REQ |
464 | || ad->antic_status == ANTIC_WAIT_NEXT) { | 466 | || ad->antic_status == ANTIC_WAIT_NEXT) { |
465 | struct as_io_context *aic = ad->io_context->aic; | 467 | struct as_io_context *aic; |
468 | spin_lock(&ad->io_context->lock); | ||
469 | aic = ad->io_context->aic; | ||
466 | 470 | ||
467 | ad->antic_status = ANTIC_FINISHED; | 471 | ad->antic_status = ANTIC_FINISHED; |
468 | kblockd_schedule_work(&ad->antic_work); | 472 | kblockd_schedule_work(&ad->antic_work); |
@@ -475,6 +479,7 @@ static void as_antic_timeout(unsigned long data) | |||
475 | /* process not "saved" by a cooperating request */ | 479 | /* process not "saved" by a cooperating request */ |
476 | ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; | 480 | ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; |
477 | } | 481 | } |
482 | spin_unlock(&ad->io_context->lock); | ||
478 | } | 483 | } |
479 | spin_unlock_irqrestore(q->queue_lock, flags); | 484 | spin_unlock_irqrestore(q->queue_lock, flags); |
480 | } | 485 | } |
@@ -635,9 +640,11 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
635 | 640 | ||
636 | ioc = ad->io_context; | 641 | ioc = ad->io_context; |
637 | BUG_ON(!ioc); | 642 | BUG_ON(!ioc); |
643 | spin_lock(&ioc->lock); | ||
638 | 644 | ||
639 | if (rq && ioc == RQ_IOC(rq)) { | 645 | if (rq && ioc == RQ_IOC(rq)) { |
640 | /* request from same process */ | 646 | /* request from same process */ |
647 | spin_unlock(&ioc->lock); | ||
641 | return 1; | 648 | return 1; |
642 | } | 649 | } |
643 | 650 | ||
@@ -646,20 +653,25 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
646 | * In this situation status should really be FINISHED, | 653 | * In this situation status should really be FINISHED, |
647 | * however the timer hasn't had the chance to run yet. | 654 | * however the timer hasn't had the chance to run yet. |
648 | */ | 655 | */ |
656 | spin_unlock(&ioc->lock); | ||
649 | return 1; | 657 | return 1; |
650 | } | 658 | } |
651 | 659 | ||
652 | aic = ioc->aic; | 660 | aic = ioc->aic; |
653 | if (!aic) | 661 | if (!aic) { |
662 | spin_unlock(&ioc->lock); | ||
654 | return 0; | 663 | return 0; |
664 | } | ||
655 | 665 | ||
656 | if (atomic_read(&aic->nr_queued) > 0) { | 666 | if (atomic_read(&aic->nr_queued) > 0) { |
657 | /* process has more requests queued */ | 667 | /* process has more requests queued */ |
668 | spin_unlock(&ioc->lock); | ||
658 | return 1; | 669 | return 1; |
659 | } | 670 | } |
660 | 671 | ||
661 | if (atomic_read(&aic->nr_dispatched) > 0) { | 672 | if (atomic_read(&aic->nr_dispatched) > 0) { |
662 | /* process has more requests dispatched */ | 673 | /* process has more requests dispatched */ |
674 | spin_unlock(&ioc->lock); | ||
663 | return 1; | 675 | return 1; |
664 | } | 676 | } |
665 | 677 | ||
@@ -680,6 +692,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
680 | } | 692 | } |
681 | 693 | ||
682 | as_update_iohist(ad, aic, rq); | 694 | as_update_iohist(ad, aic, rq); |
695 | spin_unlock(&ioc->lock); | ||
683 | return 1; | 696 | return 1; |
684 | } | 697 | } |
685 | 698 | ||
@@ -688,20 +701,27 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
688 | if (aic->ttime_samples == 0) | 701 | if (aic->ttime_samples == 0) |
689 | ad->exit_prob = (7*ad->exit_prob + 256)/8; | 702 | ad->exit_prob = (7*ad->exit_prob + 256)/8; |
690 | 703 | ||
691 | if (ad->exit_no_coop > 128) | 704 | if (ad->exit_no_coop > 128) { |
705 | spin_unlock(&ioc->lock); | ||
692 | return 1; | 706 | return 1; |
707 | } | ||
693 | } | 708 | } |
694 | 709 | ||
695 | if (aic->ttime_samples == 0) { | 710 | if (aic->ttime_samples == 0) { |
696 | if (ad->new_ttime_mean > ad->antic_expire) | 711 | if (ad->new_ttime_mean > ad->antic_expire) { |
712 | spin_unlock(&ioc->lock); | ||
697 | return 1; | 713 | return 1; |
698 | if (ad->exit_prob * ad->exit_no_coop > 128*256) | 714 | } |
715 | if (ad->exit_prob * ad->exit_no_coop > 128*256) { | ||
716 | spin_unlock(&ioc->lock); | ||
699 | return 1; | 717 | return 1; |
718 | } | ||
700 | } else if (aic->ttime_mean > ad->antic_expire) { | 719 | } else if (aic->ttime_mean > ad->antic_expire) { |
701 | /* the process thinks too much between requests */ | 720 | /* the process thinks too much between requests */ |
721 | spin_unlock(&ioc->lock); | ||
702 | return 1; | 722 | return 1; |
703 | } | 723 | } |
704 | 724 | spin_unlock(&ioc->lock); | |
705 | return 0; | 725 | return 0; |
706 | } | 726 | } |
707 | 727 | ||
@@ -1255,7 +1275,9 @@ static void as_merged_requests(struct request_queue *q, struct request *req, | |||
1255 | * Don't copy here but swap, because when anext is | 1275 | * Don't copy here but swap, because when anext is |
1256 | * removed below, it must contain the unused context | 1276 | * removed below, it must contain the unused context |
1257 | */ | 1277 | */ |
1278 | double_spin_lock(&rioc->lock, &nioc->lock, rioc < nioc); | ||
1258 | swap_io_context(&rioc, &nioc); | 1279 | swap_io_context(&rioc, &nioc); |
1280 | double_spin_unlock(&rioc->lock, &nioc->lock, rioc < nioc); | ||
1259 | } | 1281 | } |
1260 | } | 1282 | } |
1261 | 1283 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 13553e015d72..f28d1fb30608 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -26,9 +26,9 @@ static const int cfq_slice_async_rq = 2; | |||
26 | static int cfq_slice_idle = HZ / 125; | 26 | static int cfq_slice_idle = HZ / 125; |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * grace period before allowing idle class to get disk access | 29 | * offset from end of service tree |
30 | */ | 30 | */ |
31 | #define CFQ_IDLE_GRACE (HZ / 10) | 31 | #define CFQ_IDLE_DELAY (HZ / 5) |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * below this threshold, we consider thinktime immediate | 34 | * below this threshold, we consider thinktime immediate |
@@ -98,8 +98,6 @@ struct cfq_data { | |||
98 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; | 98 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; |
99 | struct cfq_queue *async_idle_cfqq; | 99 | struct cfq_queue *async_idle_cfqq; |
100 | 100 | ||
101 | struct timer_list idle_class_timer; | ||
102 | |||
103 | sector_t last_position; | 101 | sector_t last_position; |
104 | unsigned long last_end_request; | 102 | unsigned long last_end_request; |
105 | 103 | ||
@@ -199,8 +197,8 @@ CFQ_CFQQ_FNS(sync); | |||
199 | 197 | ||
200 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 198 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
201 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, | 199 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, |
202 | struct task_struct *, gfp_t); | 200 | struct io_context *, gfp_t); |
203 | static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *, | 201 | static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, |
204 | struct io_context *); | 202 | struct io_context *); |
205 | 203 | ||
206 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, | 204 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, |
@@ -384,12 +382,15 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) | |||
384 | /* | 382 | /* |
385 | * The below is leftmost cache rbtree addon | 383 | * The below is leftmost cache rbtree addon |
386 | */ | 384 | */ |
387 | static struct rb_node *cfq_rb_first(struct cfq_rb_root *root) | 385 | static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) |
388 | { | 386 | { |
389 | if (!root->left) | 387 | if (!root->left) |
390 | root->left = rb_first(&root->rb); | 388 | root->left = rb_first(&root->rb); |
391 | 389 | ||
392 | return root->left; | 390 | if (root->left) |
391 | return rb_entry(root->left, struct cfq_queue, rb_node); | ||
392 | |||
393 | return NULL; | ||
393 | } | 394 | } |
394 | 395 | ||
395 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) | 396 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) |
@@ -446,12 +447,20 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | |||
446 | static void cfq_service_tree_add(struct cfq_data *cfqd, | 447 | static void cfq_service_tree_add(struct cfq_data *cfqd, |
447 | struct cfq_queue *cfqq, int add_front) | 448 | struct cfq_queue *cfqq, int add_front) |
448 | { | 449 | { |
449 | struct rb_node **p = &cfqd->service_tree.rb.rb_node; | 450 | struct rb_node **p, *parent; |
450 | struct rb_node *parent = NULL; | 451 | struct cfq_queue *__cfqq; |
451 | unsigned long rb_key; | 452 | unsigned long rb_key; |
452 | int left; | 453 | int left; |
453 | 454 | ||
454 | if (!add_front) { | 455 | if (cfq_class_idle(cfqq)) { |
456 | rb_key = CFQ_IDLE_DELAY; | ||
457 | parent = rb_last(&cfqd->service_tree.rb); | ||
458 | if (parent && parent != &cfqq->rb_node) { | ||
459 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | ||
460 | rb_key += __cfqq->rb_key; | ||
461 | } else | ||
462 | rb_key += jiffies; | ||
463 | } else if (!add_front) { | ||
455 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; | 464 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; |
456 | rb_key += cfqq->slice_resid; | 465 | rb_key += cfqq->slice_resid; |
457 | cfqq->slice_resid = 0; | 466 | cfqq->slice_resid = 0; |
@@ -469,8 +478,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, | |||
469 | } | 478 | } |
470 | 479 | ||
471 | left = 1; | 480 | left = 1; |
481 | parent = NULL; | ||
482 | p = &cfqd->service_tree.rb.rb_node; | ||
472 | while (*p) { | 483 | while (*p) { |
473 | struct cfq_queue *__cfqq; | ||
474 | struct rb_node **n; | 484 | struct rb_node **n; |
475 | 485 | ||
476 | parent = *p; | 486 | parent = *p; |
@@ -524,8 +534,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
524 | * add to busy list of queues for service, trying to be fair in ordering | 534 | * add to busy list of queues for service, trying to be fair in ordering |
525 | * the pending list according to last request service | 535 | * the pending list according to last request service |
526 | */ | 536 | */ |
527 | static inline void | 537 | static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
528 | cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
529 | { | 538 | { |
530 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 539 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
531 | cfq_mark_cfqq_on_rr(cfqq); | 540 | cfq_mark_cfqq_on_rr(cfqq); |
@@ -538,8 +547,7 @@ cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
538 | * Called when the cfqq no longer has requests pending, remove it from | 547 | * Called when the cfqq no longer has requests pending, remove it from |
539 | * the service tree. | 548 | * the service tree. |
540 | */ | 549 | */ |
541 | static inline void | 550 | static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
542 | cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
543 | { | 551 | { |
544 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | 552 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); |
545 | cfq_clear_cfqq_on_rr(cfqq); | 553 | cfq_clear_cfqq_on_rr(cfqq); |
@@ -554,7 +562,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
554 | /* | 562 | /* |
555 | * rb tree support functions | 563 | * rb tree support functions |
556 | */ | 564 | */ |
557 | static inline void cfq_del_rq_rb(struct request *rq) | 565 | static void cfq_del_rq_rb(struct request *rq) |
558 | { | 566 | { |
559 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 567 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
560 | struct cfq_data *cfqd = cfqq->cfqd; | 568 | struct cfq_data *cfqd = cfqq->cfqd; |
@@ -594,8 +602,7 @@ static void cfq_add_rq_rb(struct request *rq) | |||
594 | BUG_ON(!cfqq->next_rq); | 602 | BUG_ON(!cfqq->next_rq); |
595 | } | 603 | } |
596 | 604 | ||
597 | static inline void | 605 | static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) |
598 | cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) | ||
599 | { | 606 | { |
600 | elv_rb_del(&cfqq->sort_list, rq); | 607 | elv_rb_del(&cfqq->sort_list, rq); |
601 | cfqq->queued[rq_is_sync(rq)]--; | 608 | cfqq->queued[rq_is_sync(rq)]--; |
@@ -609,7 +616,7 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) | |||
609 | struct cfq_io_context *cic; | 616 | struct cfq_io_context *cic; |
610 | struct cfq_queue *cfqq; | 617 | struct cfq_queue *cfqq; |
611 | 618 | ||
612 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 619 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
613 | if (!cic) | 620 | if (!cic) |
614 | return NULL; | 621 | return NULL; |
615 | 622 | ||
@@ -721,7 +728,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
721 | * Lookup the cfqq that this bio will be queued with. Allow | 728 | * Lookup the cfqq that this bio will be queued with. Allow |
722 | * merge only if rq is queued there. | 729 | * merge only if rq is queued there. |
723 | */ | 730 | */ |
724 | cic = cfq_cic_rb_lookup(cfqd, current->io_context); | 731 | cic = cfq_cic_lookup(cfqd, current->io_context); |
725 | if (!cic) | 732 | if (!cic) |
726 | return 0; | 733 | return 0; |
727 | 734 | ||
@@ -732,15 +739,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
732 | return 0; | 739 | return 0; |
733 | } | 740 | } |
734 | 741 | ||
735 | static inline void | 742 | static void __cfq_set_active_queue(struct cfq_data *cfqd, |
736 | __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 743 | struct cfq_queue *cfqq) |
737 | { | 744 | { |
738 | if (cfqq) { | 745 | if (cfqq) { |
739 | /* | ||
740 | * stop potential idle class queues waiting service | ||
741 | */ | ||
742 | del_timer(&cfqd->idle_class_timer); | ||
743 | |||
744 | cfqq->slice_end = 0; | 746 | cfqq->slice_end = 0; |
745 | cfq_clear_cfqq_must_alloc_slice(cfqq); | 747 | cfq_clear_cfqq_must_alloc_slice(cfqq); |
746 | cfq_clear_cfqq_fifo_expire(cfqq); | 748 | cfq_clear_cfqq_fifo_expire(cfqq); |
@@ -789,47 +791,16 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out) | |||
789 | __cfq_slice_expired(cfqd, cfqq, timed_out); | 791 | __cfq_slice_expired(cfqd, cfqq, timed_out); |
790 | } | 792 | } |
791 | 793 | ||
792 | static int start_idle_class_timer(struct cfq_data *cfqd) | ||
793 | { | ||
794 | unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; | ||
795 | unsigned long now = jiffies; | ||
796 | |||
797 | if (time_before(now, end) && | ||
798 | time_after_eq(now, cfqd->last_end_request)) { | ||
799 | mod_timer(&cfqd->idle_class_timer, end); | ||
800 | return 1; | ||
801 | } | ||
802 | |||
803 | return 0; | ||
804 | } | ||
805 | |||
806 | /* | 794 | /* |
807 | * Get next queue for service. Unless we have a queue preemption, | 795 | * Get next queue for service. Unless we have a queue preemption, |
808 | * we'll simply select the first cfqq in the service tree. | 796 | * we'll simply select the first cfqq in the service tree. |
809 | */ | 797 | */ |
810 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | 798 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) |
811 | { | 799 | { |
812 | struct cfq_queue *cfqq; | ||
813 | struct rb_node *n; | ||
814 | |||
815 | if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) | 800 | if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) |
816 | return NULL; | 801 | return NULL; |
817 | 802 | ||
818 | n = cfq_rb_first(&cfqd->service_tree); | 803 | return cfq_rb_first(&cfqd->service_tree); |
819 | cfqq = rb_entry(n, struct cfq_queue, rb_node); | ||
820 | |||
821 | if (cfq_class_idle(cfqq)) { | ||
822 | /* | ||
823 | * if we have idle queues and no rt or be queues had | ||
824 | * pending requests, either allow immediate service if | ||
825 | * the grace period has passed or arm the idle grace | ||
826 | * timer | ||
827 | */ | ||
828 | if (start_idle_class_timer(cfqd)) | ||
829 | cfqq = NULL; | ||
830 | } | ||
831 | |||
832 | return cfqq; | ||
833 | } | 804 | } |
834 | 805 | ||
835 | /* | 806 | /* |
@@ -895,7 +866,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
895 | * task has exited, don't wait | 866 | * task has exited, don't wait |
896 | */ | 867 | */ |
897 | cic = cfqd->active_cic; | 868 | cic = cfqd->active_cic; |
898 | if (!cic || !cic->ioc->task) | 869 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) |
899 | return; | 870 | return; |
900 | 871 | ||
901 | /* | 872 | /* |
@@ -939,7 +910,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) | |||
939 | /* | 910 | /* |
940 | * return expired entry, or NULL to just start from scratch in rbtree | 911 | * return expired entry, or NULL to just start from scratch in rbtree |
941 | */ | 912 | */ |
942 | static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq) | 913 | static struct request *cfq_check_fifo(struct cfq_queue *cfqq) |
943 | { | 914 | { |
944 | struct cfq_data *cfqd = cfqq->cfqd; | 915 | struct cfq_data *cfqd = cfqq->cfqd; |
945 | struct request *rq; | 916 | struct request *rq; |
@@ -1068,7 +1039,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1068 | return dispatched; | 1039 | return dispatched; |
1069 | } | 1040 | } |
1070 | 1041 | ||
1071 | static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | 1042 | static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) |
1072 | { | 1043 | { |
1073 | int dispatched = 0; | 1044 | int dispatched = 0; |
1074 | 1045 | ||
@@ -1087,14 +1058,11 @@ static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | |||
1087 | */ | 1058 | */ |
1088 | static int cfq_forced_dispatch(struct cfq_data *cfqd) | 1059 | static int cfq_forced_dispatch(struct cfq_data *cfqd) |
1089 | { | 1060 | { |
1061 | struct cfq_queue *cfqq; | ||
1090 | int dispatched = 0; | 1062 | int dispatched = 0; |
1091 | struct rb_node *n; | ||
1092 | |||
1093 | while ((n = cfq_rb_first(&cfqd->service_tree)) != NULL) { | ||
1094 | struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node); | ||
1095 | 1063 | ||
1064 | while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL) | ||
1096 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 1065 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
1097 | } | ||
1098 | 1066 | ||
1099 | cfq_slice_expired(cfqd, 0); | 1067 | cfq_slice_expired(cfqd, 0); |
1100 | 1068 | ||
@@ -1170,20 +1138,69 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
1170 | kmem_cache_free(cfq_pool, cfqq); | 1138 | kmem_cache_free(cfq_pool, cfqq); |
1171 | } | 1139 | } |
1172 | 1140 | ||
1173 | static void cfq_free_io_context(struct io_context *ioc) | 1141 | /* |
1142 | * Call func for each cic attached to this ioc. Returns number of cic's seen. | ||
1143 | */ | ||
1144 | #define CIC_GANG_NR 16 | ||
1145 | static unsigned int | ||
1146 | call_for_each_cic(struct io_context *ioc, | ||
1147 | void (*func)(struct io_context *, struct cfq_io_context *)) | ||
1174 | { | 1148 | { |
1175 | struct cfq_io_context *__cic; | 1149 | struct cfq_io_context *cics[CIC_GANG_NR]; |
1176 | struct rb_node *n; | 1150 | unsigned long index = 0; |
1177 | int freed = 0; | 1151 | unsigned int called = 0; |
1152 | int nr; | ||
1178 | 1153 | ||
1179 | ioc->ioc_data = NULL; | 1154 | rcu_read_lock(); |
1180 | 1155 | ||
1181 | while ((n = rb_first(&ioc->cic_root)) != NULL) { | 1156 | do { |
1182 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | 1157 | int i; |
1183 | rb_erase(&__cic->rb_node, &ioc->cic_root); | 1158 | |
1184 | kmem_cache_free(cfq_ioc_pool, __cic); | 1159 | /* |
1185 | freed++; | 1160 | * Perhaps there's a better way - this just gang lookups from |
1186 | } | 1161 | * 0 to the end, restarting after each CIC_GANG_NR from the |
1162 | * last key + 1. | ||
1163 | */ | ||
1164 | nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics, | ||
1165 | index, CIC_GANG_NR); | ||
1166 | if (!nr) | ||
1167 | break; | ||
1168 | |||
1169 | called += nr; | ||
1170 | index = 1 + (unsigned long) cics[nr - 1]->key; | ||
1171 | |||
1172 | for (i = 0; i < nr; i++) | ||
1173 | func(ioc, cics[i]); | ||
1174 | } while (nr == CIC_GANG_NR); | ||
1175 | |||
1176 | rcu_read_unlock(); | ||
1177 | |||
1178 | return called; | ||
1179 | } | ||
1180 | |||
1181 | static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) | ||
1182 | { | ||
1183 | unsigned long flags; | ||
1184 | |||
1185 | BUG_ON(!cic->dead_key); | ||
1186 | |||
1187 | spin_lock_irqsave(&ioc->lock, flags); | ||
1188 | radix_tree_delete(&ioc->radix_root, cic->dead_key); | ||
1189 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
1190 | |||
1191 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1192 | } | ||
1193 | |||
1194 | static void cfq_free_io_context(struct io_context *ioc) | ||
1195 | { | ||
1196 | int freed; | ||
1197 | |||
1198 | /* | ||
1199 | * ioc->refcount is zero here, so no more cic's are allowed to be | ||
1200 | * linked into this ioc. So it should be ok to iterate over the known | ||
1201 | * list, we will see all cic's since no new ones are added. | ||
1202 | */ | ||
1203 | freed = call_for_each_cic(ioc, cic_free_func); | ||
1187 | 1204 | ||
1188 | elv_ioc_count_mod(ioc_count, -freed); | 1205 | elv_ioc_count_mod(ioc_count, -freed); |
1189 | 1206 | ||
@@ -1205,7 +1222,12 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
1205 | struct cfq_io_context *cic) | 1222 | struct cfq_io_context *cic) |
1206 | { | 1223 | { |
1207 | list_del_init(&cic->queue_list); | 1224 | list_del_init(&cic->queue_list); |
1225 | |||
1226 | /* | ||
1227 | * Make sure key == NULL is seen for dead queues | ||
1228 | */ | ||
1208 | smp_wmb(); | 1229 | smp_wmb(); |
1230 | cic->dead_key = (unsigned long) cic->key; | ||
1209 | cic->key = NULL; | 1231 | cic->key = NULL; |
1210 | 1232 | ||
1211 | if (cic->cfqq[ASYNC]) { | 1233 | if (cic->cfqq[ASYNC]) { |
@@ -1219,16 +1241,18 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
1219 | } | 1241 | } |
1220 | } | 1242 | } |
1221 | 1243 | ||
1222 | static void cfq_exit_single_io_context(struct cfq_io_context *cic) | 1244 | static void cfq_exit_single_io_context(struct io_context *ioc, |
1245 | struct cfq_io_context *cic) | ||
1223 | { | 1246 | { |
1224 | struct cfq_data *cfqd = cic->key; | 1247 | struct cfq_data *cfqd = cic->key; |
1225 | 1248 | ||
1226 | if (cfqd) { | 1249 | if (cfqd) { |
1227 | struct request_queue *q = cfqd->queue; | 1250 | struct request_queue *q = cfqd->queue; |
1251 | unsigned long flags; | ||
1228 | 1252 | ||
1229 | spin_lock_irq(q->queue_lock); | 1253 | spin_lock_irqsave(q->queue_lock, flags); |
1230 | __cfq_exit_single_io_context(cfqd, cic); | 1254 | __cfq_exit_single_io_context(cfqd, cic); |
1231 | spin_unlock_irq(q->queue_lock); | 1255 | spin_unlock_irqrestore(q->queue_lock, flags); |
1232 | } | 1256 | } |
1233 | } | 1257 | } |
1234 | 1258 | ||
@@ -1238,21 +1262,8 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic) | |||
1238 | */ | 1262 | */ |
1239 | static void cfq_exit_io_context(struct io_context *ioc) | 1263 | static void cfq_exit_io_context(struct io_context *ioc) |
1240 | { | 1264 | { |
1241 | struct cfq_io_context *__cic; | 1265 | rcu_assign_pointer(ioc->ioc_data, NULL); |
1242 | struct rb_node *n; | 1266 | call_for_each_cic(ioc, cfq_exit_single_io_context); |
1243 | |||
1244 | ioc->ioc_data = NULL; | ||
1245 | |||
1246 | /* | ||
1247 | * put the reference this task is holding to the various queues | ||
1248 | */ | ||
1249 | n = rb_first(&ioc->cic_root); | ||
1250 | while (n != NULL) { | ||
1251 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1252 | |||
1253 | cfq_exit_single_io_context(__cic); | ||
1254 | n = rb_next(n); | ||
1255 | } | ||
1256 | } | 1267 | } |
1257 | 1268 | ||
1258 | static struct cfq_io_context * | 1269 | static struct cfq_io_context * |
@@ -1273,7 +1284,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1273 | return cic; | 1284 | return cic; |
1274 | } | 1285 | } |
1275 | 1286 | ||
1276 | static void cfq_init_prio_data(struct cfq_queue *cfqq) | 1287 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) |
1277 | { | 1288 | { |
1278 | struct task_struct *tsk = current; | 1289 | struct task_struct *tsk = current; |
1279 | int ioprio_class; | 1290 | int ioprio_class; |
@@ -1281,7 +1292,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1281 | if (!cfq_cfqq_prio_changed(cfqq)) | 1292 | if (!cfq_cfqq_prio_changed(cfqq)) |
1282 | return; | 1293 | return; |
1283 | 1294 | ||
1284 | ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); | 1295 | ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); |
1285 | switch (ioprio_class) { | 1296 | switch (ioprio_class) { |
1286 | default: | 1297 | default: |
1287 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); | 1298 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); |
@@ -1293,11 +1304,11 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1293 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1304 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1294 | break; | 1305 | break; |
1295 | case IOPRIO_CLASS_RT: | 1306 | case IOPRIO_CLASS_RT: |
1296 | cfqq->ioprio = task_ioprio(tsk); | 1307 | cfqq->ioprio = task_ioprio(ioc); |
1297 | cfqq->ioprio_class = IOPRIO_CLASS_RT; | 1308 | cfqq->ioprio_class = IOPRIO_CLASS_RT; |
1298 | break; | 1309 | break; |
1299 | case IOPRIO_CLASS_BE: | 1310 | case IOPRIO_CLASS_BE: |
1300 | cfqq->ioprio = task_ioprio(tsk); | 1311 | cfqq->ioprio = task_ioprio(ioc); |
1301 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1312 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1302 | break; | 1313 | break; |
1303 | case IOPRIO_CLASS_IDLE: | 1314 | case IOPRIO_CLASS_IDLE: |
@@ -1316,7 +1327,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1316 | cfq_clear_cfqq_prio_changed(cfqq); | 1327 | cfq_clear_cfqq_prio_changed(cfqq); |
1317 | } | 1328 | } |
1318 | 1329 | ||
1319 | static inline void changed_ioprio(struct cfq_io_context *cic) | 1330 | static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) |
1320 | { | 1331 | { |
1321 | struct cfq_data *cfqd = cic->key; | 1332 | struct cfq_data *cfqd = cic->key; |
1322 | struct cfq_queue *cfqq; | 1333 | struct cfq_queue *cfqq; |
@@ -1330,8 +1341,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1330 | cfqq = cic->cfqq[ASYNC]; | 1341 | cfqq = cic->cfqq[ASYNC]; |
1331 | if (cfqq) { | 1342 | if (cfqq) { |
1332 | struct cfq_queue *new_cfqq; | 1343 | struct cfq_queue *new_cfqq; |
1333 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc->task, | 1344 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); |
1334 | GFP_ATOMIC); | ||
1335 | if (new_cfqq) { | 1345 | if (new_cfqq) { |
1336 | cic->cfqq[ASYNC] = new_cfqq; | 1346 | cic->cfqq[ASYNC] = new_cfqq; |
1337 | cfq_put_queue(cfqq); | 1347 | cfq_put_queue(cfqq); |
@@ -1347,29 +1357,19 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1347 | 1357 | ||
1348 | static void cfq_ioc_set_ioprio(struct io_context *ioc) | 1358 | static void cfq_ioc_set_ioprio(struct io_context *ioc) |
1349 | { | 1359 | { |
1350 | struct cfq_io_context *cic; | 1360 | call_for_each_cic(ioc, changed_ioprio); |
1351 | struct rb_node *n; | ||
1352 | |||
1353 | ioc->ioprio_changed = 0; | 1361 | ioc->ioprio_changed = 0; |
1354 | |||
1355 | n = rb_first(&ioc->cic_root); | ||
1356 | while (n != NULL) { | ||
1357 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1358 | |||
1359 | changed_ioprio(cic); | ||
1360 | n = rb_next(n); | ||
1361 | } | ||
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | static struct cfq_queue * | 1364 | static struct cfq_queue * |
1365 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, | 1365 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, |
1366 | struct task_struct *tsk, gfp_t gfp_mask) | 1366 | struct io_context *ioc, gfp_t gfp_mask) |
1367 | { | 1367 | { |
1368 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 1368 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
1369 | struct cfq_io_context *cic; | 1369 | struct cfq_io_context *cic; |
1370 | 1370 | ||
1371 | retry: | 1371 | retry: |
1372 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 1372 | cic = cfq_cic_lookup(cfqd, ioc); |
1373 | /* cic always exists here */ | 1373 | /* cic always exists here */ |
1374 | cfqq = cic_to_cfqq(cic, is_sync); | 1374 | cfqq = cic_to_cfqq(cic, is_sync); |
1375 | 1375 | ||
@@ -1404,15 +1404,16 @@ retry: | |||
1404 | atomic_set(&cfqq->ref, 0); | 1404 | atomic_set(&cfqq->ref, 0); |
1405 | cfqq->cfqd = cfqd; | 1405 | cfqq->cfqd = cfqd; |
1406 | 1406 | ||
1407 | if (is_sync) { | ||
1408 | cfq_mark_cfqq_idle_window(cfqq); | ||
1409 | cfq_mark_cfqq_sync(cfqq); | ||
1410 | } | ||
1411 | |||
1412 | cfq_mark_cfqq_prio_changed(cfqq); | 1407 | cfq_mark_cfqq_prio_changed(cfqq); |
1413 | cfq_mark_cfqq_queue_new(cfqq); | 1408 | cfq_mark_cfqq_queue_new(cfqq); |
1414 | 1409 | ||
1415 | cfq_init_prio_data(cfqq); | 1410 | cfq_init_prio_data(cfqq, ioc); |
1411 | |||
1412 | if (is_sync) { | ||
1413 | if (!cfq_class_idle(cfqq)) | ||
1414 | cfq_mark_cfqq_idle_window(cfqq); | ||
1415 | cfq_mark_cfqq_sync(cfqq); | ||
1416 | } | ||
1416 | } | 1417 | } |
1417 | 1418 | ||
1418 | if (new_cfqq) | 1419 | if (new_cfqq) |
@@ -1439,11 +1440,11 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) | |||
1439 | } | 1440 | } |
1440 | 1441 | ||
1441 | static struct cfq_queue * | 1442 | static struct cfq_queue * |
1442 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | 1443 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, |
1443 | gfp_t gfp_mask) | 1444 | gfp_t gfp_mask) |
1444 | { | 1445 | { |
1445 | const int ioprio = task_ioprio(tsk); | 1446 | const int ioprio = task_ioprio(ioc); |
1446 | const int ioprio_class = task_ioprio_class(tsk); | 1447 | const int ioprio_class = task_ioprio_class(ioc); |
1447 | struct cfq_queue **async_cfqq = NULL; | 1448 | struct cfq_queue **async_cfqq = NULL; |
1448 | struct cfq_queue *cfqq = NULL; | 1449 | struct cfq_queue *cfqq = NULL; |
1449 | 1450 | ||
@@ -1453,7 +1454,7 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | |||
1453 | } | 1454 | } |
1454 | 1455 | ||
1455 | if (!cfqq) { | 1456 | if (!cfqq) { |
1456 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, tsk, gfp_mask); | 1457 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); |
1457 | if (!cfqq) | 1458 | if (!cfqq) |
1458 | return NULL; | 1459 | return NULL; |
1459 | } | 1460 | } |
@@ -1470,28 +1471,42 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | |||
1470 | return cfqq; | 1471 | return cfqq; |
1471 | } | 1472 | } |
1472 | 1473 | ||
1474 | static void cfq_cic_free(struct cfq_io_context *cic) | ||
1475 | { | ||
1476 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1477 | elv_ioc_count_dec(ioc_count); | ||
1478 | |||
1479 | if (ioc_gone && !elv_ioc_count_read(ioc_count)) | ||
1480 | complete(ioc_gone); | ||
1481 | } | ||
1482 | |||
1473 | /* | 1483 | /* |
1474 | * We drop cfq io contexts lazily, so we may find a dead one. | 1484 | * We drop cfq io contexts lazily, so we may find a dead one. |
1475 | */ | 1485 | */ |
1476 | static void | 1486 | static void |
1477 | cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) | 1487 | cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, |
1488 | struct cfq_io_context *cic) | ||
1478 | { | 1489 | { |
1490 | unsigned long flags; | ||
1491 | |||
1479 | WARN_ON(!list_empty(&cic->queue_list)); | 1492 | WARN_ON(!list_empty(&cic->queue_list)); |
1480 | 1493 | ||
1494 | spin_lock_irqsave(&ioc->lock, flags); | ||
1495 | |||
1481 | if (ioc->ioc_data == cic) | 1496 | if (ioc->ioc_data == cic) |
1482 | ioc->ioc_data = NULL; | 1497 | rcu_assign_pointer(ioc->ioc_data, NULL); |
1483 | 1498 | ||
1484 | rb_erase(&cic->rb_node, &ioc->cic_root); | 1499 | radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); |
1485 | kmem_cache_free(cfq_ioc_pool, cic); | 1500 | spin_unlock_irqrestore(&ioc->lock, flags); |
1486 | elv_ioc_count_dec(ioc_count); | 1501 | |
1502 | cfq_cic_free(cic); | ||
1487 | } | 1503 | } |
1488 | 1504 | ||
1489 | static struct cfq_io_context * | 1505 | static struct cfq_io_context * |
1490 | cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | 1506 | cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) |
1491 | { | 1507 | { |
1492 | struct rb_node *n; | ||
1493 | struct cfq_io_context *cic; | 1508 | struct cfq_io_context *cic; |
1494 | void *k, *key = cfqd; | 1509 | void *k; |
1495 | 1510 | ||
1496 | if (unlikely(!ioc)) | 1511 | if (unlikely(!ioc)) |
1497 | return NULL; | 1512 | return NULL; |
@@ -1499,74 +1514,64 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | |||
1499 | /* | 1514 | /* |
1500 | * we maintain a last-hit cache, to avoid browsing over the tree | 1515 | * we maintain a last-hit cache, to avoid browsing over the tree |
1501 | */ | 1516 | */ |
1502 | cic = ioc->ioc_data; | 1517 | cic = rcu_dereference(ioc->ioc_data); |
1503 | if (cic && cic->key == cfqd) | 1518 | if (cic && cic->key == cfqd) |
1504 | return cic; | 1519 | return cic; |
1505 | 1520 | ||
1506 | restart: | 1521 | do { |
1507 | n = ioc->cic_root.rb_node; | 1522 | rcu_read_lock(); |
1508 | while (n) { | 1523 | cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); |
1509 | cic = rb_entry(n, struct cfq_io_context, rb_node); | 1524 | rcu_read_unlock(); |
1525 | if (!cic) | ||
1526 | break; | ||
1510 | /* ->key must be copied to avoid race with cfq_exit_queue() */ | 1527 | /* ->key must be copied to avoid race with cfq_exit_queue() */ |
1511 | k = cic->key; | 1528 | k = cic->key; |
1512 | if (unlikely(!k)) { | 1529 | if (unlikely(!k)) { |
1513 | cfq_drop_dead_cic(ioc, cic); | 1530 | cfq_drop_dead_cic(cfqd, ioc, cic); |
1514 | goto restart; | 1531 | continue; |
1515 | } | 1532 | } |
1516 | 1533 | ||
1517 | if (key < k) | 1534 | rcu_assign_pointer(ioc->ioc_data, cic); |
1518 | n = n->rb_left; | 1535 | break; |
1519 | else if (key > k) | 1536 | } while (1); |
1520 | n = n->rb_right; | ||
1521 | else { | ||
1522 | ioc->ioc_data = cic; | ||
1523 | return cic; | ||
1524 | } | ||
1525 | } | ||
1526 | 1537 | ||
1527 | return NULL; | 1538 | return cic; |
1528 | } | 1539 | } |
1529 | 1540 | ||
1530 | static inline void | 1541 | /* |
1531 | cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | 1542 | * Add cic into ioc, using cfqd as the search key. This enables us to lookup |
1532 | struct cfq_io_context *cic) | 1543 | * the process specific cfq io context when entered from the block layer. |
1544 | * Also adds the cic to a per-cfqd list, used when this queue is removed. | ||
1545 | */ | ||
1546 | static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | ||
1547 | struct cfq_io_context *cic, gfp_t gfp_mask) | ||
1533 | { | 1548 | { |
1534 | struct rb_node **p; | ||
1535 | struct rb_node *parent; | ||
1536 | struct cfq_io_context *__cic; | ||
1537 | unsigned long flags; | 1549 | unsigned long flags; |
1538 | void *k; | 1550 | int ret; |
1539 | 1551 | ||
1540 | cic->ioc = ioc; | 1552 | ret = radix_tree_preload(gfp_mask); |
1541 | cic->key = cfqd; | 1553 | if (!ret) { |
1554 | cic->ioc = ioc; | ||
1555 | cic->key = cfqd; | ||
1542 | 1556 | ||
1543 | restart: | 1557 | spin_lock_irqsave(&ioc->lock, flags); |
1544 | parent = NULL; | 1558 | ret = radix_tree_insert(&ioc->radix_root, |
1545 | p = &ioc->cic_root.rb_node; | 1559 | (unsigned long) cfqd, cic); |
1546 | while (*p) { | 1560 | spin_unlock_irqrestore(&ioc->lock, flags); |
1547 | parent = *p; | ||
1548 | __cic = rb_entry(parent, struct cfq_io_context, rb_node); | ||
1549 | /* ->key must be copied to avoid race with cfq_exit_queue() */ | ||
1550 | k = __cic->key; | ||
1551 | if (unlikely(!k)) { | ||
1552 | cfq_drop_dead_cic(ioc, __cic); | ||
1553 | goto restart; | ||
1554 | } | ||
1555 | 1561 | ||
1556 | if (cic->key < k) | 1562 | radix_tree_preload_end(); |
1557 | p = &(*p)->rb_left; | 1563 | |
1558 | else if (cic->key > k) | 1564 | if (!ret) { |
1559 | p = &(*p)->rb_right; | 1565 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); |
1560 | else | 1566 | list_add(&cic->queue_list, &cfqd->cic_list); |
1561 | BUG(); | 1567 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
1568 | } | ||
1562 | } | 1569 | } |
1563 | 1570 | ||
1564 | rb_link_node(&cic->rb_node, parent, p); | 1571 | if (ret) |
1565 | rb_insert_color(&cic->rb_node, &ioc->cic_root); | 1572 | printk(KERN_ERR "cfq: cic link failed!\n"); |
1566 | 1573 | ||
1567 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | 1574 | return ret; |
1568 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
1569 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
1570 | } | 1575 | } |
1571 | 1576 | ||
1572 | /* | 1577 | /* |
@@ -1586,7 +1591,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1586 | if (!ioc) | 1591 | if (!ioc) |
1587 | return NULL; | 1592 | return NULL; |
1588 | 1593 | ||
1589 | cic = cfq_cic_rb_lookup(cfqd, ioc); | 1594 | cic = cfq_cic_lookup(cfqd, ioc); |
1590 | if (cic) | 1595 | if (cic) |
1591 | goto out; | 1596 | goto out; |
1592 | 1597 | ||
@@ -1594,13 +1599,17 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1594 | if (cic == NULL) | 1599 | if (cic == NULL) |
1595 | goto err; | 1600 | goto err; |
1596 | 1601 | ||
1597 | cfq_cic_link(cfqd, ioc, cic); | 1602 | if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) |
1603 | goto err_free; | ||
1604 | |||
1598 | out: | 1605 | out: |
1599 | smp_read_barrier_depends(); | 1606 | smp_read_barrier_depends(); |
1600 | if (unlikely(ioc->ioprio_changed)) | 1607 | if (unlikely(ioc->ioprio_changed)) |
1601 | cfq_ioc_set_ioprio(ioc); | 1608 | cfq_ioc_set_ioprio(ioc); |
1602 | 1609 | ||
1603 | return cic; | 1610 | return cic; |
1611 | err_free: | ||
1612 | cfq_cic_free(cic); | ||
1604 | err: | 1613 | err: |
1605 | put_io_context(ioc); | 1614 | put_io_context(ioc); |
1606 | return NULL; | 1615 | return NULL; |
@@ -1655,12 +1664,15 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1655 | { | 1664 | { |
1656 | int enable_idle; | 1665 | int enable_idle; |
1657 | 1666 | ||
1658 | if (!cfq_cfqq_sync(cfqq)) | 1667 | /* |
1668 | * Don't idle for async or idle io prio class | ||
1669 | */ | ||
1670 | if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) | ||
1659 | return; | 1671 | return; |
1660 | 1672 | ||
1661 | enable_idle = cfq_cfqq_idle_window(cfqq); | 1673 | enable_idle = cfq_cfqq_idle_window(cfqq); |
1662 | 1674 | ||
1663 | if (!cic->ioc->task || !cfqd->cfq_slice_idle || | 1675 | if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || |
1664 | (cfqd->hw_tag && CIC_SEEKY(cic))) | 1676 | (cfqd->hw_tag && CIC_SEEKY(cic))) |
1665 | enable_idle = 0; | 1677 | enable_idle = 0; |
1666 | else if (sample_valid(cic->ttime_samples)) { | 1678 | else if (sample_valid(cic->ttime_samples)) { |
@@ -1793,7 +1805,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
1793 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1805 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1794 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1806 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1795 | 1807 | ||
1796 | cfq_init_prio_data(cfqq); | 1808 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); |
1797 | 1809 | ||
1798 | cfq_add_rq_rb(rq); | 1810 | cfq_add_rq_rb(rq); |
1799 | 1811 | ||
@@ -1834,7 +1846,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
1834 | cfq_set_prio_slice(cfqd, cfqq); | 1846 | cfq_set_prio_slice(cfqd, cfqq); |
1835 | cfq_clear_cfqq_slice_new(cfqq); | 1847 | cfq_clear_cfqq_slice_new(cfqq); |
1836 | } | 1848 | } |
1837 | if (cfq_slice_used(cfqq)) | 1849 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) |
1838 | cfq_slice_expired(cfqd, 1); | 1850 | cfq_slice_expired(cfqd, 1); |
1839 | else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) | 1851 | else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) |
1840 | cfq_arm_slice_timer(cfqd); | 1852 | cfq_arm_slice_timer(cfqd); |
@@ -1894,13 +1906,13 @@ static int cfq_may_queue(struct request_queue *q, int rw) | |||
1894 | * so just lookup a possibly existing queue, or return 'may queue' | 1906 | * so just lookup a possibly existing queue, or return 'may queue' |
1895 | * if that fails | 1907 | * if that fails |
1896 | */ | 1908 | */ |
1897 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 1909 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
1898 | if (!cic) | 1910 | if (!cic) |
1899 | return ELV_MQUEUE_MAY; | 1911 | return ELV_MQUEUE_MAY; |
1900 | 1912 | ||
1901 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); | 1913 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); |
1902 | if (cfqq) { | 1914 | if (cfqq) { |
1903 | cfq_init_prio_data(cfqq); | 1915 | cfq_init_prio_data(cfqq, cic->ioc); |
1904 | cfq_prio_boost(cfqq); | 1916 | cfq_prio_boost(cfqq); |
1905 | 1917 | ||
1906 | return __cfq_may_queue(cfqq); | 1918 | return __cfq_may_queue(cfqq); |
@@ -1938,7 +1950,6 @@ static int | |||
1938 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | 1950 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
1939 | { | 1951 | { |
1940 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1952 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1941 | struct task_struct *tsk = current; | ||
1942 | struct cfq_io_context *cic; | 1953 | struct cfq_io_context *cic; |
1943 | const int rw = rq_data_dir(rq); | 1954 | const int rw = rq_data_dir(rq); |
1944 | const int is_sync = rq_is_sync(rq); | 1955 | const int is_sync = rq_is_sync(rq); |
@@ -1956,7 +1967,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
1956 | 1967 | ||
1957 | cfqq = cic_to_cfqq(cic, is_sync); | 1968 | cfqq = cic_to_cfqq(cic, is_sync); |
1958 | if (!cfqq) { | 1969 | if (!cfqq) { |
1959 | cfqq = cfq_get_queue(cfqd, is_sync, tsk, gfp_mask); | 1970 | cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); |
1960 | 1971 | ||
1961 | if (!cfqq) | 1972 | if (!cfqq) |
1962 | goto queue_fail; | 1973 | goto queue_fail; |
@@ -2039,29 +2050,9 @@ out_cont: | |||
2039 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | 2050 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
2040 | } | 2051 | } |
2041 | 2052 | ||
2042 | /* | ||
2043 | * Timer running if an idle class queue is waiting for service | ||
2044 | */ | ||
2045 | static void cfq_idle_class_timer(unsigned long data) | ||
2046 | { | ||
2047 | struct cfq_data *cfqd = (struct cfq_data *) data; | ||
2048 | unsigned long flags; | ||
2049 | |||
2050 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
2051 | |||
2052 | /* | ||
2053 | * race with a non-idle queue, reset timer | ||
2054 | */ | ||
2055 | if (!start_idle_class_timer(cfqd)) | ||
2056 | cfq_schedule_dispatch(cfqd); | ||
2057 | |||
2058 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
2059 | } | ||
2060 | |||
2061 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) | 2053 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) |
2062 | { | 2054 | { |
2063 | del_timer_sync(&cfqd->idle_slice_timer); | 2055 | del_timer_sync(&cfqd->idle_slice_timer); |
2064 | del_timer_sync(&cfqd->idle_class_timer); | ||
2065 | kblockd_flush_work(&cfqd->unplug_work); | 2056 | kblockd_flush_work(&cfqd->unplug_work); |
2066 | } | 2057 | } |
2067 | 2058 | ||
@@ -2126,10 +2117,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
2126 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; | 2117 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; |
2127 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; | 2118 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; |
2128 | 2119 | ||
2129 | init_timer(&cfqd->idle_class_timer); | ||
2130 | cfqd->idle_class_timer.function = cfq_idle_class_timer; | ||
2131 | cfqd->idle_class_timer.data = (unsigned long) cfqd; | ||
2132 | |||
2133 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); | 2120 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); |
2134 | 2121 | ||
2135 | cfqd->last_end_request = jiffies; | 2122 | cfqd->last_end_request = jiffies; |
@@ -2160,7 +2147,7 @@ static int __init cfq_slab_setup(void) | |||
2160 | if (!cfq_pool) | 2147 | if (!cfq_pool) |
2161 | goto fail; | 2148 | goto fail; |
2162 | 2149 | ||
2163 | cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); | 2150 | cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU); |
2164 | if (!cfq_ioc_pool) | 2151 | if (!cfq_ioc_pool) |
2165 | goto fail; | 2152 | goto fail; |
2166 | 2153 | ||
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 4bf95b602f36..c16fdfed8c62 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -3981,55 +3981,100 @@ int __init blk_dev_init(void) | |||
3981 | return 0; | 3981 | return 0; |
3982 | } | 3982 | } |
3983 | 3983 | ||
3984 | static void cfq_dtor(struct io_context *ioc) | ||
3985 | { | ||
3986 | struct cfq_io_context *cic[1]; | ||
3987 | int r; | ||
3988 | |||
3989 | /* | ||
3990 | * We don't have a specific key to lookup with, so use the gang | ||
3991 | * lookup to just retrieve the first item stored. The cfq exit | ||
3992 | * function will iterate the full tree, so any member will do. | ||
3993 | */ | ||
3994 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
3995 | if (r > 0) | ||
3996 | cic[0]->dtor(ioc); | ||
3997 | } | ||
3998 | |||
3984 | /* | 3999 | /* |
3985 | * IO Context helper functions | 4000 | * IO Context helper functions. put_io_context() returns 1 if there are no |
4001 | * more users of this io context, 0 otherwise. | ||
3986 | */ | 4002 | */ |
3987 | void put_io_context(struct io_context *ioc) | 4003 | int put_io_context(struct io_context *ioc) |
3988 | { | 4004 | { |
3989 | if (ioc == NULL) | 4005 | if (ioc == NULL) |
3990 | return; | 4006 | return 1; |
3991 | 4007 | ||
3992 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 4008 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3993 | 4009 | ||
3994 | if (atomic_dec_and_test(&ioc->refcount)) { | 4010 | if (atomic_dec_and_test(&ioc->refcount)) { |
3995 | struct cfq_io_context *cic; | ||
3996 | |||
3997 | rcu_read_lock(); | 4011 | rcu_read_lock(); |
3998 | if (ioc->aic && ioc->aic->dtor) | 4012 | if (ioc->aic && ioc->aic->dtor) |
3999 | ioc->aic->dtor(ioc->aic); | 4013 | ioc->aic->dtor(ioc->aic); |
4000 | if (ioc->cic_root.rb_node != NULL) { | ||
4001 | struct rb_node *n = rb_first(&ioc->cic_root); | ||
4002 | |||
4003 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
4004 | cic->dtor(ioc); | ||
4005 | } | ||
4006 | rcu_read_unlock(); | 4014 | rcu_read_unlock(); |
4015 | cfq_dtor(ioc); | ||
4007 | 4016 | ||
4008 | kmem_cache_free(iocontext_cachep, ioc); | 4017 | kmem_cache_free(iocontext_cachep, ioc); |
4018 | return 1; | ||
4009 | } | 4019 | } |
4020 | return 0; | ||
4010 | } | 4021 | } |
4011 | EXPORT_SYMBOL(put_io_context); | 4022 | EXPORT_SYMBOL(put_io_context); |
4012 | 4023 | ||
4024 | static void cfq_exit(struct io_context *ioc) | ||
4025 | { | ||
4026 | struct cfq_io_context *cic[1]; | ||
4027 | int r; | ||
4028 | |||
4029 | rcu_read_lock(); | ||
4030 | /* | ||
4031 | * See comment for cfq_dtor() | ||
4032 | */ | ||
4033 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
4034 | rcu_read_unlock(); | ||
4035 | |||
4036 | if (r > 0) | ||
4037 | cic[0]->exit(ioc); | ||
4038 | } | ||
4039 | |||
4013 | /* Called by the exitting task */ | 4040 | /* Called by the exitting task */ |
4014 | void exit_io_context(void) | 4041 | void exit_io_context(void) |
4015 | { | 4042 | { |
4016 | struct io_context *ioc; | 4043 | struct io_context *ioc; |
4017 | struct cfq_io_context *cic; | ||
4018 | 4044 | ||
4019 | task_lock(current); | 4045 | task_lock(current); |
4020 | ioc = current->io_context; | 4046 | ioc = current->io_context; |
4021 | current->io_context = NULL; | 4047 | current->io_context = NULL; |
4022 | task_unlock(current); | 4048 | task_unlock(current); |
4023 | 4049 | ||
4024 | ioc->task = NULL; | 4050 | if (atomic_dec_and_test(&ioc->nr_tasks)) { |
4025 | if (ioc->aic && ioc->aic->exit) | 4051 | if (ioc->aic && ioc->aic->exit) |
4026 | ioc->aic->exit(ioc->aic); | 4052 | ioc->aic->exit(ioc->aic); |
4027 | if (ioc->cic_root.rb_node != NULL) { | 4053 | cfq_exit(ioc); |
4028 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); | 4054 | |
4029 | cic->exit(ioc); | 4055 | put_io_context(ioc); |
4030 | } | 4056 | } |
4057 | } | ||
4058 | |||
4059 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | ||
4060 | { | ||
4061 | struct io_context *ret; | ||
4031 | 4062 | ||
4032 | put_io_context(ioc); | 4063 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); |
4064 | if (ret) { | ||
4065 | atomic_set(&ret->refcount, 1); | ||
4066 | atomic_set(&ret->nr_tasks, 1); | ||
4067 | spin_lock_init(&ret->lock); | ||
4068 | ret->ioprio_changed = 0; | ||
4069 | ret->ioprio = 0; | ||
4070 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
4071 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
4072 | ret->aic = NULL; | ||
4073 | INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); | ||
4074 | ret->ioc_data = NULL; | ||
4075 | } | ||
4076 | |||
4077 | return ret; | ||
4033 | } | 4078 | } |
4034 | 4079 | ||
4035 | /* | 4080 | /* |
@@ -4049,16 +4094,8 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
4049 | if (likely(ret)) | 4094 | if (likely(ret)) |
4050 | return ret; | 4095 | return ret; |
4051 | 4096 | ||
4052 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | 4097 | ret = alloc_io_context(gfp_flags, node); |
4053 | if (ret) { | 4098 | if (ret) { |
4054 | atomic_set(&ret->refcount, 1); | ||
4055 | ret->task = current; | ||
4056 | ret->ioprio_changed = 0; | ||
4057 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
4058 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
4059 | ret->aic = NULL; | ||
4060 | ret->cic_root.rb_node = NULL; | ||
4061 | ret->ioc_data = NULL; | ||
4062 | /* make sure set_task_ioprio() sees the settings above */ | 4099 | /* make sure set_task_ioprio() sees the settings above */ |
4063 | smp_wmb(); | 4100 | smp_wmb(); |
4064 | tsk->io_context = ret; | 4101 | tsk->io_context = ret; |
@@ -4075,10 +4112,18 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
4075 | */ | 4112 | */ |
4076 | struct io_context *get_io_context(gfp_t gfp_flags, int node) | 4113 | struct io_context *get_io_context(gfp_t gfp_flags, int node) |
4077 | { | 4114 | { |
4078 | struct io_context *ret; | 4115 | struct io_context *ret = NULL; |
4079 | ret = current_io_context(gfp_flags, node); | 4116 | |
4080 | if (likely(ret)) | 4117 | /* |
4081 | atomic_inc(&ret->refcount); | 4118 | * Check for unlikely race with exiting task. ioc ref count is |
4119 | * zero when ioc is being detached. | ||
4120 | */ | ||
4121 | do { | ||
4122 | ret = current_io_context(gfp_flags, node); | ||
4123 | if (unlikely(!ret)) | ||
4124 | break; | ||
4125 | } while (!atomic_inc_not_zero(&ret->refcount)); | ||
4126 | |||
4082 | return ret; | 4127 | return ret; |
4083 | } | 4128 | } |
4084 | EXPORT_SYMBOL(get_io_context); | 4129 | EXPORT_SYMBOL(get_io_context); |