diff options
-rw-r--r-- | block/as-iosched.c | 34 | ||||
-rw-r--r-- | block/cfq-iosched.c | 425 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 109 | ||||
-rw-r--r-- | fs/ioprio.c | 30 | ||||
-rw-r--r-- | include/linux/blkdev.h | 83 | ||||
-rw-r--r-- | include/linux/init_task.h | 1 | ||||
-rw-r--r-- | include/linux/iocontext.h | 95 | ||||
-rw-r--r-- | include/linux/ioprio.h | 13 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/fork.c | 37 |
10 files changed, 474 insertions, 355 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c index cb5e53b05c7c..b201d16a7102 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -170,9 +170,11 @@ static void free_as_io_context(struct as_io_context *aic) | |||
170 | 170 | ||
171 | static void as_trim(struct io_context *ioc) | 171 | static void as_trim(struct io_context *ioc) |
172 | { | 172 | { |
173 | spin_lock(&ioc->lock); | ||
173 | if (ioc->aic) | 174 | if (ioc->aic) |
174 | free_as_io_context(ioc->aic); | 175 | free_as_io_context(ioc->aic); |
175 | ioc->aic = NULL; | 176 | ioc->aic = NULL; |
177 | spin_unlock(&ioc->lock); | ||
176 | } | 178 | } |
177 | 179 | ||
178 | /* Called when the task exits */ | 180 | /* Called when the task exits */ |
@@ -462,7 +464,9 @@ static void as_antic_timeout(unsigned long data) | |||
462 | spin_lock_irqsave(q->queue_lock, flags); | 464 | spin_lock_irqsave(q->queue_lock, flags); |
463 | if (ad->antic_status == ANTIC_WAIT_REQ | 465 | if (ad->antic_status == ANTIC_WAIT_REQ |
464 | || ad->antic_status == ANTIC_WAIT_NEXT) { | 466 | || ad->antic_status == ANTIC_WAIT_NEXT) { |
465 | struct as_io_context *aic = ad->io_context->aic; | 467 | struct as_io_context *aic; |
468 | spin_lock(&ad->io_context->lock); | ||
469 | aic = ad->io_context->aic; | ||
466 | 470 | ||
467 | ad->antic_status = ANTIC_FINISHED; | 471 | ad->antic_status = ANTIC_FINISHED; |
468 | kblockd_schedule_work(&ad->antic_work); | 472 | kblockd_schedule_work(&ad->antic_work); |
@@ -475,6 +479,7 @@ static void as_antic_timeout(unsigned long data) | |||
475 | /* process not "saved" by a cooperating request */ | 479 | /* process not "saved" by a cooperating request */ |
476 | ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; | 480 | ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8; |
477 | } | 481 | } |
482 | spin_unlock(&ad->io_context->lock); | ||
478 | } | 483 | } |
479 | spin_unlock_irqrestore(q->queue_lock, flags); | 484 | spin_unlock_irqrestore(q->queue_lock, flags); |
480 | } | 485 | } |
@@ -635,9 +640,11 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
635 | 640 | ||
636 | ioc = ad->io_context; | 641 | ioc = ad->io_context; |
637 | BUG_ON(!ioc); | 642 | BUG_ON(!ioc); |
643 | spin_lock(&ioc->lock); | ||
638 | 644 | ||
639 | if (rq && ioc == RQ_IOC(rq)) { | 645 | if (rq && ioc == RQ_IOC(rq)) { |
640 | /* request from same process */ | 646 | /* request from same process */ |
647 | spin_unlock(&ioc->lock); | ||
641 | return 1; | 648 | return 1; |
642 | } | 649 | } |
643 | 650 | ||
@@ -646,20 +653,25 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
646 | * In this situation status should really be FINISHED, | 653 | * In this situation status should really be FINISHED, |
647 | * however the timer hasn't had the chance to run yet. | 654 | * however the timer hasn't had the chance to run yet. |
648 | */ | 655 | */ |
656 | spin_unlock(&ioc->lock); | ||
649 | return 1; | 657 | return 1; |
650 | } | 658 | } |
651 | 659 | ||
652 | aic = ioc->aic; | 660 | aic = ioc->aic; |
653 | if (!aic) | 661 | if (!aic) { |
662 | spin_unlock(&ioc->lock); | ||
654 | return 0; | 663 | return 0; |
664 | } | ||
655 | 665 | ||
656 | if (atomic_read(&aic->nr_queued) > 0) { | 666 | if (atomic_read(&aic->nr_queued) > 0) { |
657 | /* process has more requests queued */ | 667 | /* process has more requests queued */ |
668 | spin_unlock(&ioc->lock); | ||
658 | return 1; | 669 | return 1; |
659 | } | 670 | } |
660 | 671 | ||
661 | if (atomic_read(&aic->nr_dispatched) > 0) { | 672 | if (atomic_read(&aic->nr_dispatched) > 0) { |
662 | /* process has more requests dispatched */ | 673 | /* process has more requests dispatched */ |
674 | spin_unlock(&ioc->lock); | ||
663 | return 1; | 675 | return 1; |
664 | } | 676 | } |
665 | 677 | ||
@@ -680,6 +692,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
680 | } | 692 | } |
681 | 693 | ||
682 | as_update_iohist(ad, aic, rq); | 694 | as_update_iohist(ad, aic, rq); |
695 | spin_unlock(&ioc->lock); | ||
683 | return 1; | 696 | return 1; |
684 | } | 697 | } |
685 | 698 | ||
@@ -688,20 +701,27 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
688 | if (aic->ttime_samples == 0) | 701 | if (aic->ttime_samples == 0) |
689 | ad->exit_prob = (7*ad->exit_prob + 256)/8; | 702 | ad->exit_prob = (7*ad->exit_prob + 256)/8; |
690 | 703 | ||
691 | if (ad->exit_no_coop > 128) | 704 | if (ad->exit_no_coop > 128) { |
705 | spin_unlock(&ioc->lock); | ||
692 | return 1; | 706 | return 1; |
707 | } | ||
693 | } | 708 | } |
694 | 709 | ||
695 | if (aic->ttime_samples == 0) { | 710 | if (aic->ttime_samples == 0) { |
696 | if (ad->new_ttime_mean > ad->antic_expire) | 711 | if (ad->new_ttime_mean > ad->antic_expire) { |
712 | spin_unlock(&ioc->lock); | ||
697 | return 1; | 713 | return 1; |
698 | if (ad->exit_prob * ad->exit_no_coop > 128*256) | 714 | } |
715 | if (ad->exit_prob * ad->exit_no_coop > 128*256) { | ||
716 | spin_unlock(&ioc->lock); | ||
699 | return 1; | 717 | return 1; |
718 | } | ||
700 | } else if (aic->ttime_mean > ad->antic_expire) { | 719 | } else if (aic->ttime_mean > ad->antic_expire) { |
701 | /* the process thinks too much between requests */ | 720 | /* the process thinks too much between requests */ |
721 | spin_unlock(&ioc->lock); | ||
702 | return 1; | 722 | return 1; |
703 | } | 723 | } |
704 | 724 | spin_unlock(&ioc->lock); | |
705 | return 0; | 725 | return 0; |
706 | } | 726 | } |
707 | 727 | ||
@@ -1255,7 +1275,9 @@ static void as_merged_requests(struct request_queue *q, struct request *req, | |||
1255 | * Don't copy here but swap, because when anext is | 1275 | * Don't copy here but swap, because when anext is |
1256 | * removed below, it must contain the unused context | 1276 | * removed below, it must contain the unused context |
1257 | */ | 1277 | */ |
1278 | double_spin_lock(&rioc->lock, &nioc->lock, rioc < nioc); | ||
1258 | swap_io_context(&rioc, &nioc); | 1279 | swap_io_context(&rioc, &nioc); |
1280 | double_spin_unlock(&rioc->lock, &nioc->lock, rioc < nioc); | ||
1259 | } | 1281 | } |
1260 | } | 1282 | } |
1261 | 1283 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 13553e015d72..f28d1fb30608 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -26,9 +26,9 @@ static const int cfq_slice_async_rq = 2; | |||
26 | static int cfq_slice_idle = HZ / 125; | 26 | static int cfq_slice_idle = HZ / 125; |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * grace period before allowing idle class to get disk access | 29 | * offset from end of service tree |
30 | */ | 30 | */ |
31 | #define CFQ_IDLE_GRACE (HZ / 10) | 31 | #define CFQ_IDLE_DELAY (HZ / 5) |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * below this threshold, we consider thinktime immediate | 34 | * below this threshold, we consider thinktime immediate |
@@ -98,8 +98,6 @@ struct cfq_data { | |||
98 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; | 98 | struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; |
99 | struct cfq_queue *async_idle_cfqq; | 99 | struct cfq_queue *async_idle_cfqq; |
100 | 100 | ||
101 | struct timer_list idle_class_timer; | ||
102 | |||
103 | sector_t last_position; | 101 | sector_t last_position; |
104 | unsigned long last_end_request; | 102 | unsigned long last_end_request; |
105 | 103 | ||
@@ -199,8 +197,8 @@ CFQ_CFQQ_FNS(sync); | |||
199 | 197 | ||
200 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 198 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
201 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, | 199 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, |
202 | struct task_struct *, gfp_t); | 200 | struct io_context *, gfp_t); |
203 | static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *, | 201 | static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, |
204 | struct io_context *); | 202 | struct io_context *); |
205 | 203 | ||
206 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, | 204 | static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, |
@@ -384,12 +382,15 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) | |||
384 | /* | 382 | /* |
385 | * The below is leftmost cache rbtree addon | 383 | * The below is leftmost cache rbtree addon |
386 | */ | 384 | */ |
387 | static struct rb_node *cfq_rb_first(struct cfq_rb_root *root) | 385 | static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) |
388 | { | 386 | { |
389 | if (!root->left) | 387 | if (!root->left) |
390 | root->left = rb_first(&root->rb); | 388 | root->left = rb_first(&root->rb); |
391 | 389 | ||
392 | return root->left; | 390 | if (root->left) |
391 | return rb_entry(root->left, struct cfq_queue, rb_node); | ||
392 | |||
393 | return NULL; | ||
393 | } | 394 | } |
394 | 395 | ||
395 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) | 396 | static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) |
@@ -446,12 +447,20 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | |||
446 | static void cfq_service_tree_add(struct cfq_data *cfqd, | 447 | static void cfq_service_tree_add(struct cfq_data *cfqd, |
447 | struct cfq_queue *cfqq, int add_front) | 448 | struct cfq_queue *cfqq, int add_front) |
448 | { | 449 | { |
449 | struct rb_node **p = &cfqd->service_tree.rb.rb_node; | 450 | struct rb_node **p, *parent; |
450 | struct rb_node *parent = NULL; | 451 | struct cfq_queue *__cfqq; |
451 | unsigned long rb_key; | 452 | unsigned long rb_key; |
452 | int left; | 453 | int left; |
453 | 454 | ||
454 | if (!add_front) { | 455 | if (cfq_class_idle(cfqq)) { |
456 | rb_key = CFQ_IDLE_DELAY; | ||
457 | parent = rb_last(&cfqd->service_tree.rb); | ||
458 | if (parent && parent != &cfqq->rb_node) { | ||
459 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | ||
460 | rb_key += __cfqq->rb_key; | ||
461 | } else | ||
462 | rb_key += jiffies; | ||
463 | } else if (!add_front) { | ||
455 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; | 464 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; |
456 | rb_key += cfqq->slice_resid; | 465 | rb_key += cfqq->slice_resid; |
457 | cfqq->slice_resid = 0; | 466 | cfqq->slice_resid = 0; |
@@ -469,8 +478,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, | |||
469 | } | 478 | } |
470 | 479 | ||
471 | left = 1; | 480 | left = 1; |
481 | parent = NULL; | ||
482 | p = &cfqd->service_tree.rb.rb_node; | ||
472 | while (*p) { | 483 | while (*p) { |
473 | struct cfq_queue *__cfqq; | ||
474 | struct rb_node **n; | 484 | struct rb_node **n; |
475 | 485 | ||
476 | parent = *p; | 486 | parent = *p; |
@@ -524,8 +534,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
524 | * add to busy list of queues for service, trying to be fair in ordering | 534 | * add to busy list of queues for service, trying to be fair in ordering |
525 | * the pending list according to last request service | 535 | * the pending list according to last request service |
526 | */ | 536 | */ |
527 | static inline void | 537 | static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
528 | cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
529 | { | 538 | { |
530 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 539 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
531 | cfq_mark_cfqq_on_rr(cfqq); | 540 | cfq_mark_cfqq_on_rr(cfqq); |
@@ -538,8 +547,7 @@ cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
538 | * Called when the cfqq no longer has requests pending, remove it from | 547 | * Called when the cfqq no longer has requests pending, remove it from |
539 | * the service tree. | 548 | * the service tree. |
540 | */ | 549 | */ |
541 | static inline void | 550 | static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
542 | cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
543 | { | 551 | { |
544 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); | 552 | BUG_ON(!cfq_cfqq_on_rr(cfqq)); |
545 | cfq_clear_cfqq_on_rr(cfqq); | 553 | cfq_clear_cfqq_on_rr(cfqq); |
@@ -554,7 +562,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
554 | /* | 562 | /* |
555 | * rb tree support functions | 563 | * rb tree support functions |
556 | */ | 564 | */ |
557 | static inline void cfq_del_rq_rb(struct request *rq) | 565 | static void cfq_del_rq_rb(struct request *rq) |
558 | { | 566 | { |
559 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 567 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
560 | struct cfq_data *cfqd = cfqq->cfqd; | 568 | struct cfq_data *cfqd = cfqq->cfqd; |
@@ -594,8 +602,7 @@ static void cfq_add_rq_rb(struct request *rq) | |||
594 | BUG_ON(!cfqq->next_rq); | 602 | BUG_ON(!cfqq->next_rq); |
595 | } | 603 | } |
596 | 604 | ||
597 | static inline void | 605 | static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) |
598 | cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) | ||
599 | { | 606 | { |
600 | elv_rb_del(&cfqq->sort_list, rq); | 607 | elv_rb_del(&cfqq->sort_list, rq); |
601 | cfqq->queued[rq_is_sync(rq)]--; | 608 | cfqq->queued[rq_is_sync(rq)]--; |
@@ -609,7 +616,7 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) | |||
609 | struct cfq_io_context *cic; | 616 | struct cfq_io_context *cic; |
610 | struct cfq_queue *cfqq; | 617 | struct cfq_queue *cfqq; |
611 | 618 | ||
612 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 619 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
613 | if (!cic) | 620 | if (!cic) |
614 | return NULL; | 621 | return NULL; |
615 | 622 | ||
@@ -721,7 +728,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
721 | * Lookup the cfqq that this bio will be queued with. Allow | 728 | * Lookup the cfqq that this bio will be queued with. Allow |
722 | * merge only if rq is queued there. | 729 | * merge only if rq is queued there. |
723 | */ | 730 | */ |
724 | cic = cfq_cic_rb_lookup(cfqd, current->io_context); | 731 | cic = cfq_cic_lookup(cfqd, current->io_context); |
725 | if (!cic) | 732 | if (!cic) |
726 | return 0; | 733 | return 0; |
727 | 734 | ||
@@ -732,15 +739,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
732 | return 0; | 739 | return 0; |
733 | } | 740 | } |
734 | 741 | ||
735 | static inline void | 742 | static void __cfq_set_active_queue(struct cfq_data *cfqd, |
736 | __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 743 | struct cfq_queue *cfqq) |
737 | { | 744 | { |
738 | if (cfqq) { | 745 | if (cfqq) { |
739 | /* | ||
740 | * stop potential idle class queues waiting service | ||
741 | */ | ||
742 | del_timer(&cfqd->idle_class_timer); | ||
743 | |||
744 | cfqq->slice_end = 0; | 746 | cfqq->slice_end = 0; |
745 | cfq_clear_cfqq_must_alloc_slice(cfqq); | 747 | cfq_clear_cfqq_must_alloc_slice(cfqq); |
746 | cfq_clear_cfqq_fifo_expire(cfqq); | 748 | cfq_clear_cfqq_fifo_expire(cfqq); |
@@ -789,47 +791,16 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out) | |||
789 | __cfq_slice_expired(cfqd, cfqq, timed_out); | 791 | __cfq_slice_expired(cfqd, cfqq, timed_out); |
790 | } | 792 | } |
791 | 793 | ||
792 | static int start_idle_class_timer(struct cfq_data *cfqd) | ||
793 | { | ||
794 | unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; | ||
795 | unsigned long now = jiffies; | ||
796 | |||
797 | if (time_before(now, end) && | ||
798 | time_after_eq(now, cfqd->last_end_request)) { | ||
799 | mod_timer(&cfqd->idle_class_timer, end); | ||
800 | return 1; | ||
801 | } | ||
802 | |||
803 | return 0; | ||
804 | } | ||
805 | |||
806 | /* | 794 | /* |
807 | * Get next queue for service. Unless we have a queue preemption, | 795 | * Get next queue for service. Unless we have a queue preemption, |
808 | * we'll simply select the first cfqq in the service tree. | 796 | * we'll simply select the first cfqq in the service tree. |
809 | */ | 797 | */ |
810 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | 798 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) |
811 | { | 799 | { |
812 | struct cfq_queue *cfqq; | ||
813 | struct rb_node *n; | ||
814 | |||
815 | if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) | 800 | if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) |
816 | return NULL; | 801 | return NULL; |
817 | 802 | ||
818 | n = cfq_rb_first(&cfqd->service_tree); | 803 | return cfq_rb_first(&cfqd->service_tree); |
819 | cfqq = rb_entry(n, struct cfq_queue, rb_node); | ||
820 | |||
821 | if (cfq_class_idle(cfqq)) { | ||
822 | /* | ||
823 | * if we have idle queues and no rt or be queues had | ||
824 | * pending requests, either allow immediate service if | ||
825 | * the grace period has passed or arm the idle grace | ||
826 | * timer | ||
827 | */ | ||
828 | if (start_idle_class_timer(cfqd)) | ||
829 | cfqq = NULL; | ||
830 | } | ||
831 | |||
832 | return cfqq; | ||
833 | } | 804 | } |
834 | 805 | ||
835 | /* | 806 | /* |
@@ -895,7 +866,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
895 | * task has exited, don't wait | 866 | * task has exited, don't wait |
896 | */ | 867 | */ |
897 | cic = cfqd->active_cic; | 868 | cic = cfqd->active_cic; |
898 | if (!cic || !cic->ioc->task) | 869 | if (!cic || !atomic_read(&cic->ioc->nr_tasks)) |
899 | return; | 870 | return; |
900 | 871 | ||
901 | /* | 872 | /* |
@@ -939,7 +910,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) | |||
939 | /* | 910 | /* |
940 | * return expired entry, or NULL to just start from scratch in rbtree | 911 | * return expired entry, or NULL to just start from scratch in rbtree |
941 | */ | 912 | */ |
942 | static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq) | 913 | static struct request *cfq_check_fifo(struct cfq_queue *cfqq) |
943 | { | 914 | { |
944 | struct cfq_data *cfqd = cfqq->cfqd; | 915 | struct cfq_data *cfqd = cfqq->cfqd; |
945 | struct request *rq; | 916 | struct request *rq; |
@@ -1068,7 +1039,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1068 | return dispatched; | 1039 | return dispatched; |
1069 | } | 1040 | } |
1070 | 1041 | ||
1071 | static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | 1042 | static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) |
1072 | { | 1043 | { |
1073 | int dispatched = 0; | 1044 | int dispatched = 0; |
1074 | 1045 | ||
@@ -1087,14 +1058,11 @@ static inline int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | |||
1087 | */ | 1058 | */ |
1088 | static int cfq_forced_dispatch(struct cfq_data *cfqd) | 1059 | static int cfq_forced_dispatch(struct cfq_data *cfqd) |
1089 | { | 1060 | { |
1061 | struct cfq_queue *cfqq; | ||
1090 | int dispatched = 0; | 1062 | int dispatched = 0; |
1091 | struct rb_node *n; | ||
1092 | |||
1093 | while ((n = cfq_rb_first(&cfqd->service_tree)) != NULL) { | ||
1094 | struct cfq_queue *cfqq = rb_entry(n, struct cfq_queue, rb_node); | ||
1095 | 1063 | ||
1064 | while ((cfqq = cfq_rb_first(&cfqd->service_tree)) != NULL) | ||
1096 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 1065 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
1097 | } | ||
1098 | 1066 | ||
1099 | cfq_slice_expired(cfqd, 0); | 1067 | cfq_slice_expired(cfqd, 0); |
1100 | 1068 | ||
@@ -1170,20 +1138,69 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
1170 | kmem_cache_free(cfq_pool, cfqq); | 1138 | kmem_cache_free(cfq_pool, cfqq); |
1171 | } | 1139 | } |
1172 | 1140 | ||
1173 | static void cfq_free_io_context(struct io_context *ioc) | 1141 | /* |
1142 | * Call func for each cic attached to this ioc. Returns number of cic's seen. | ||
1143 | */ | ||
1144 | #define CIC_GANG_NR 16 | ||
1145 | static unsigned int | ||
1146 | call_for_each_cic(struct io_context *ioc, | ||
1147 | void (*func)(struct io_context *, struct cfq_io_context *)) | ||
1174 | { | 1148 | { |
1175 | struct cfq_io_context *__cic; | 1149 | struct cfq_io_context *cics[CIC_GANG_NR]; |
1176 | struct rb_node *n; | 1150 | unsigned long index = 0; |
1177 | int freed = 0; | 1151 | unsigned int called = 0; |
1152 | int nr; | ||
1178 | 1153 | ||
1179 | ioc->ioc_data = NULL; | 1154 | rcu_read_lock(); |
1180 | 1155 | ||
1181 | while ((n = rb_first(&ioc->cic_root)) != NULL) { | 1156 | do { |
1182 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | 1157 | int i; |
1183 | rb_erase(&__cic->rb_node, &ioc->cic_root); | 1158 | |
1184 | kmem_cache_free(cfq_ioc_pool, __cic); | 1159 | /* |
1185 | freed++; | 1160 | * Perhaps there's a better way - this just gang lookups from |
1186 | } | 1161 | * 0 to the end, restarting after each CIC_GANG_NR from the |
1162 | * last key + 1. | ||
1163 | */ | ||
1164 | nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics, | ||
1165 | index, CIC_GANG_NR); | ||
1166 | if (!nr) | ||
1167 | break; | ||
1168 | |||
1169 | called += nr; | ||
1170 | index = 1 + (unsigned long) cics[nr - 1]->key; | ||
1171 | |||
1172 | for (i = 0; i < nr; i++) | ||
1173 | func(ioc, cics[i]); | ||
1174 | } while (nr == CIC_GANG_NR); | ||
1175 | |||
1176 | rcu_read_unlock(); | ||
1177 | |||
1178 | return called; | ||
1179 | } | ||
1180 | |||
1181 | static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) | ||
1182 | { | ||
1183 | unsigned long flags; | ||
1184 | |||
1185 | BUG_ON(!cic->dead_key); | ||
1186 | |||
1187 | spin_lock_irqsave(&ioc->lock, flags); | ||
1188 | radix_tree_delete(&ioc->radix_root, cic->dead_key); | ||
1189 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
1190 | |||
1191 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1192 | } | ||
1193 | |||
1194 | static void cfq_free_io_context(struct io_context *ioc) | ||
1195 | { | ||
1196 | int freed; | ||
1197 | |||
1198 | /* | ||
1199 | * ioc->refcount is zero here, so no more cic's are allowed to be | ||
1200 | * linked into this ioc. So it should be ok to iterate over the known | ||
1201 | * list, we will see all cic's since no new ones are added. | ||
1202 | */ | ||
1203 | freed = call_for_each_cic(ioc, cic_free_func); | ||
1187 | 1204 | ||
1188 | elv_ioc_count_mod(ioc_count, -freed); | 1205 | elv_ioc_count_mod(ioc_count, -freed); |
1189 | 1206 | ||
@@ -1205,7 +1222,12 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
1205 | struct cfq_io_context *cic) | 1222 | struct cfq_io_context *cic) |
1206 | { | 1223 | { |
1207 | list_del_init(&cic->queue_list); | 1224 | list_del_init(&cic->queue_list); |
1225 | |||
1226 | /* | ||
1227 | * Make sure key == NULL is seen for dead queues | ||
1228 | */ | ||
1208 | smp_wmb(); | 1229 | smp_wmb(); |
1230 | cic->dead_key = (unsigned long) cic->key; | ||
1209 | cic->key = NULL; | 1231 | cic->key = NULL; |
1210 | 1232 | ||
1211 | if (cic->cfqq[ASYNC]) { | 1233 | if (cic->cfqq[ASYNC]) { |
@@ -1219,16 +1241,18 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, | |||
1219 | } | 1241 | } |
1220 | } | 1242 | } |
1221 | 1243 | ||
1222 | static void cfq_exit_single_io_context(struct cfq_io_context *cic) | 1244 | static void cfq_exit_single_io_context(struct io_context *ioc, |
1245 | struct cfq_io_context *cic) | ||
1223 | { | 1246 | { |
1224 | struct cfq_data *cfqd = cic->key; | 1247 | struct cfq_data *cfqd = cic->key; |
1225 | 1248 | ||
1226 | if (cfqd) { | 1249 | if (cfqd) { |
1227 | struct request_queue *q = cfqd->queue; | 1250 | struct request_queue *q = cfqd->queue; |
1251 | unsigned long flags; | ||
1228 | 1252 | ||
1229 | spin_lock_irq(q->queue_lock); | 1253 | spin_lock_irqsave(q->queue_lock, flags); |
1230 | __cfq_exit_single_io_context(cfqd, cic); | 1254 | __cfq_exit_single_io_context(cfqd, cic); |
1231 | spin_unlock_irq(q->queue_lock); | 1255 | spin_unlock_irqrestore(q->queue_lock, flags); |
1232 | } | 1256 | } |
1233 | } | 1257 | } |
1234 | 1258 | ||
@@ -1238,21 +1262,8 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic) | |||
1238 | */ | 1262 | */ |
1239 | static void cfq_exit_io_context(struct io_context *ioc) | 1263 | static void cfq_exit_io_context(struct io_context *ioc) |
1240 | { | 1264 | { |
1241 | struct cfq_io_context *__cic; | 1265 | rcu_assign_pointer(ioc->ioc_data, NULL); |
1242 | struct rb_node *n; | 1266 | call_for_each_cic(ioc, cfq_exit_single_io_context); |
1243 | |||
1244 | ioc->ioc_data = NULL; | ||
1245 | |||
1246 | /* | ||
1247 | * put the reference this task is holding to the various queues | ||
1248 | */ | ||
1249 | n = rb_first(&ioc->cic_root); | ||
1250 | while (n != NULL) { | ||
1251 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1252 | |||
1253 | cfq_exit_single_io_context(__cic); | ||
1254 | n = rb_next(n); | ||
1255 | } | ||
1256 | } | 1267 | } |
1257 | 1268 | ||
1258 | static struct cfq_io_context * | 1269 | static struct cfq_io_context * |
@@ -1273,7 +1284,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1273 | return cic; | 1284 | return cic; |
1274 | } | 1285 | } |
1275 | 1286 | ||
1276 | static void cfq_init_prio_data(struct cfq_queue *cfqq) | 1287 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) |
1277 | { | 1288 | { |
1278 | struct task_struct *tsk = current; | 1289 | struct task_struct *tsk = current; |
1279 | int ioprio_class; | 1290 | int ioprio_class; |
@@ -1281,7 +1292,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1281 | if (!cfq_cfqq_prio_changed(cfqq)) | 1292 | if (!cfq_cfqq_prio_changed(cfqq)) |
1282 | return; | 1293 | return; |
1283 | 1294 | ||
1284 | ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); | 1295 | ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); |
1285 | switch (ioprio_class) { | 1296 | switch (ioprio_class) { |
1286 | default: | 1297 | default: |
1287 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); | 1298 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); |
@@ -1293,11 +1304,11 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1293 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1304 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1294 | break; | 1305 | break; |
1295 | case IOPRIO_CLASS_RT: | 1306 | case IOPRIO_CLASS_RT: |
1296 | cfqq->ioprio = task_ioprio(tsk); | 1307 | cfqq->ioprio = task_ioprio(ioc); |
1297 | cfqq->ioprio_class = IOPRIO_CLASS_RT; | 1308 | cfqq->ioprio_class = IOPRIO_CLASS_RT; |
1298 | break; | 1309 | break; |
1299 | case IOPRIO_CLASS_BE: | 1310 | case IOPRIO_CLASS_BE: |
1300 | cfqq->ioprio = task_ioprio(tsk); | 1311 | cfqq->ioprio = task_ioprio(ioc); |
1301 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1312 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1302 | break; | 1313 | break; |
1303 | case IOPRIO_CLASS_IDLE: | 1314 | case IOPRIO_CLASS_IDLE: |
@@ -1316,7 +1327,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1316 | cfq_clear_cfqq_prio_changed(cfqq); | 1327 | cfq_clear_cfqq_prio_changed(cfqq); |
1317 | } | 1328 | } |
1318 | 1329 | ||
1319 | static inline void changed_ioprio(struct cfq_io_context *cic) | 1330 | static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) |
1320 | { | 1331 | { |
1321 | struct cfq_data *cfqd = cic->key; | 1332 | struct cfq_data *cfqd = cic->key; |
1322 | struct cfq_queue *cfqq; | 1333 | struct cfq_queue *cfqq; |
@@ -1330,8 +1341,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1330 | cfqq = cic->cfqq[ASYNC]; | 1341 | cfqq = cic->cfqq[ASYNC]; |
1331 | if (cfqq) { | 1342 | if (cfqq) { |
1332 | struct cfq_queue *new_cfqq; | 1343 | struct cfq_queue *new_cfqq; |
1333 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc->task, | 1344 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); |
1334 | GFP_ATOMIC); | ||
1335 | if (new_cfqq) { | 1345 | if (new_cfqq) { |
1336 | cic->cfqq[ASYNC] = new_cfqq; | 1346 | cic->cfqq[ASYNC] = new_cfqq; |
1337 | cfq_put_queue(cfqq); | 1347 | cfq_put_queue(cfqq); |
@@ -1347,29 +1357,19 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1347 | 1357 | ||
1348 | static void cfq_ioc_set_ioprio(struct io_context *ioc) | 1358 | static void cfq_ioc_set_ioprio(struct io_context *ioc) |
1349 | { | 1359 | { |
1350 | struct cfq_io_context *cic; | 1360 | call_for_each_cic(ioc, changed_ioprio); |
1351 | struct rb_node *n; | ||
1352 | |||
1353 | ioc->ioprio_changed = 0; | 1361 | ioc->ioprio_changed = 0; |
1354 | |||
1355 | n = rb_first(&ioc->cic_root); | ||
1356 | while (n != NULL) { | ||
1357 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1358 | |||
1359 | changed_ioprio(cic); | ||
1360 | n = rb_next(n); | ||
1361 | } | ||
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | static struct cfq_queue * | 1364 | static struct cfq_queue * |
1365 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, | 1365 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, |
1366 | struct task_struct *tsk, gfp_t gfp_mask) | 1366 | struct io_context *ioc, gfp_t gfp_mask) |
1367 | { | 1367 | { |
1368 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 1368 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
1369 | struct cfq_io_context *cic; | 1369 | struct cfq_io_context *cic; |
1370 | 1370 | ||
1371 | retry: | 1371 | retry: |
1372 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 1372 | cic = cfq_cic_lookup(cfqd, ioc); |
1373 | /* cic always exists here */ | 1373 | /* cic always exists here */ |
1374 | cfqq = cic_to_cfqq(cic, is_sync); | 1374 | cfqq = cic_to_cfqq(cic, is_sync); |
1375 | 1375 | ||
@@ -1404,15 +1404,16 @@ retry: | |||
1404 | atomic_set(&cfqq->ref, 0); | 1404 | atomic_set(&cfqq->ref, 0); |
1405 | cfqq->cfqd = cfqd; | 1405 | cfqq->cfqd = cfqd; |
1406 | 1406 | ||
1407 | if (is_sync) { | ||
1408 | cfq_mark_cfqq_idle_window(cfqq); | ||
1409 | cfq_mark_cfqq_sync(cfqq); | ||
1410 | } | ||
1411 | |||
1412 | cfq_mark_cfqq_prio_changed(cfqq); | 1407 | cfq_mark_cfqq_prio_changed(cfqq); |
1413 | cfq_mark_cfqq_queue_new(cfqq); | 1408 | cfq_mark_cfqq_queue_new(cfqq); |
1414 | 1409 | ||
1415 | cfq_init_prio_data(cfqq); | 1410 | cfq_init_prio_data(cfqq, ioc); |
1411 | |||
1412 | if (is_sync) { | ||
1413 | if (!cfq_class_idle(cfqq)) | ||
1414 | cfq_mark_cfqq_idle_window(cfqq); | ||
1415 | cfq_mark_cfqq_sync(cfqq); | ||
1416 | } | ||
1416 | } | 1417 | } |
1417 | 1418 | ||
1418 | if (new_cfqq) | 1419 | if (new_cfqq) |
@@ -1439,11 +1440,11 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) | |||
1439 | } | 1440 | } |
1440 | 1441 | ||
1441 | static struct cfq_queue * | 1442 | static struct cfq_queue * |
1442 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | 1443 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, |
1443 | gfp_t gfp_mask) | 1444 | gfp_t gfp_mask) |
1444 | { | 1445 | { |
1445 | const int ioprio = task_ioprio(tsk); | 1446 | const int ioprio = task_ioprio(ioc); |
1446 | const int ioprio_class = task_ioprio_class(tsk); | 1447 | const int ioprio_class = task_ioprio_class(ioc); |
1447 | struct cfq_queue **async_cfqq = NULL; | 1448 | struct cfq_queue **async_cfqq = NULL; |
1448 | struct cfq_queue *cfqq = NULL; | 1449 | struct cfq_queue *cfqq = NULL; |
1449 | 1450 | ||
@@ -1453,7 +1454,7 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | |||
1453 | } | 1454 | } |
1454 | 1455 | ||
1455 | if (!cfqq) { | 1456 | if (!cfqq) { |
1456 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, tsk, gfp_mask); | 1457 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); |
1457 | if (!cfqq) | 1458 | if (!cfqq) |
1458 | return NULL; | 1459 | return NULL; |
1459 | } | 1460 | } |
@@ -1470,28 +1471,42 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | |||
1470 | return cfqq; | 1471 | return cfqq; |
1471 | } | 1472 | } |
1472 | 1473 | ||
1474 | static void cfq_cic_free(struct cfq_io_context *cic) | ||
1475 | { | ||
1476 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1477 | elv_ioc_count_dec(ioc_count); | ||
1478 | |||
1479 | if (ioc_gone && !elv_ioc_count_read(ioc_count)) | ||
1480 | complete(ioc_gone); | ||
1481 | } | ||
1482 | |||
1473 | /* | 1483 | /* |
1474 | * We drop cfq io contexts lazily, so we may find a dead one. | 1484 | * We drop cfq io contexts lazily, so we may find a dead one. |
1475 | */ | 1485 | */ |
1476 | static void | 1486 | static void |
1477 | cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) | 1487 | cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, |
1488 | struct cfq_io_context *cic) | ||
1478 | { | 1489 | { |
1490 | unsigned long flags; | ||
1491 | |||
1479 | WARN_ON(!list_empty(&cic->queue_list)); | 1492 | WARN_ON(!list_empty(&cic->queue_list)); |
1480 | 1493 | ||
1494 | spin_lock_irqsave(&ioc->lock, flags); | ||
1495 | |||
1481 | if (ioc->ioc_data == cic) | 1496 | if (ioc->ioc_data == cic) |
1482 | ioc->ioc_data = NULL; | 1497 | rcu_assign_pointer(ioc->ioc_data, NULL); |
1483 | 1498 | ||
1484 | rb_erase(&cic->rb_node, &ioc->cic_root); | 1499 | radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); |
1485 | kmem_cache_free(cfq_ioc_pool, cic); | 1500 | spin_unlock_irqrestore(&ioc->lock, flags); |
1486 | elv_ioc_count_dec(ioc_count); | 1501 | |
1502 | cfq_cic_free(cic); | ||
1487 | } | 1503 | } |
1488 | 1504 | ||
1489 | static struct cfq_io_context * | 1505 | static struct cfq_io_context * |
1490 | cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | 1506 | cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) |
1491 | { | 1507 | { |
1492 | struct rb_node *n; | ||
1493 | struct cfq_io_context *cic; | 1508 | struct cfq_io_context *cic; |
1494 | void *k, *key = cfqd; | 1509 | void *k; |
1495 | 1510 | ||
1496 | if (unlikely(!ioc)) | 1511 | if (unlikely(!ioc)) |
1497 | return NULL; | 1512 | return NULL; |
@@ -1499,74 +1514,64 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | |||
1499 | /* | 1514 | /* |
1500 | * we maintain a last-hit cache, to avoid browsing over the tree | 1515 | * we maintain a last-hit cache, to avoid browsing over the tree |
1501 | */ | 1516 | */ |
1502 | cic = ioc->ioc_data; | 1517 | cic = rcu_dereference(ioc->ioc_data); |
1503 | if (cic && cic->key == cfqd) | 1518 | if (cic && cic->key == cfqd) |
1504 | return cic; | 1519 | return cic; |
1505 | 1520 | ||
1506 | restart: | 1521 | do { |
1507 | n = ioc->cic_root.rb_node; | 1522 | rcu_read_lock(); |
1508 | while (n) { | 1523 | cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); |
1509 | cic = rb_entry(n, struct cfq_io_context, rb_node); | 1524 | rcu_read_unlock(); |
1525 | if (!cic) | ||
1526 | break; | ||
1510 | /* ->key must be copied to avoid race with cfq_exit_queue() */ | 1527 | /* ->key must be copied to avoid race with cfq_exit_queue() */ |
1511 | k = cic->key; | 1528 | k = cic->key; |
1512 | if (unlikely(!k)) { | 1529 | if (unlikely(!k)) { |
1513 | cfq_drop_dead_cic(ioc, cic); | 1530 | cfq_drop_dead_cic(cfqd, ioc, cic); |
1514 | goto restart; | 1531 | continue; |
1515 | } | 1532 | } |
1516 | 1533 | ||
1517 | if (key < k) | 1534 | rcu_assign_pointer(ioc->ioc_data, cic); |
1518 | n = n->rb_left; | 1535 | break; |
1519 | else if (key > k) | 1536 | } while (1); |
1520 | n = n->rb_right; | ||
1521 | else { | ||
1522 | ioc->ioc_data = cic; | ||
1523 | return cic; | ||
1524 | } | ||
1525 | } | ||
1526 | 1537 | ||
1527 | return NULL; | 1538 | return cic; |
1528 | } | 1539 | } |
1529 | 1540 | ||
1530 | static inline void | 1541 | /* |
1531 | cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | 1542 | * Add cic into ioc, using cfqd as the search key. This enables us to lookup |
1532 | struct cfq_io_context *cic) | 1543 | * the process specific cfq io context when entered from the block layer. |
1544 | * Also adds the cic to a per-cfqd list, used when this queue is removed. | ||
1545 | */ | ||
1546 | static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | ||
1547 | struct cfq_io_context *cic, gfp_t gfp_mask) | ||
1533 | { | 1548 | { |
1534 | struct rb_node **p; | ||
1535 | struct rb_node *parent; | ||
1536 | struct cfq_io_context *__cic; | ||
1537 | unsigned long flags; | 1549 | unsigned long flags; |
1538 | void *k; | 1550 | int ret; |
1539 | 1551 | ||
1540 | cic->ioc = ioc; | 1552 | ret = radix_tree_preload(gfp_mask); |
1541 | cic->key = cfqd; | 1553 | if (!ret) { |
1554 | cic->ioc = ioc; | ||
1555 | cic->key = cfqd; | ||
1542 | 1556 | ||
1543 | restart: | 1557 | spin_lock_irqsave(&ioc->lock, flags); |
1544 | parent = NULL; | 1558 | ret = radix_tree_insert(&ioc->radix_root, |
1545 | p = &ioc->cic_root.rb_node; | 1559 | (unsigned long) cfqd, cic); |
1546 | while (*p) { | 1560 | spin_unlock_irqrestore(&ioc->lock, flags); |
1547 | parent = *p; | ||
1548 | __cic = rb_entry(parent, struct cfq_io_context, rb_node); | ||
1549 | /* ->key must be copied to avoid race with cfq_exit_queue() */ | ||
1550 | k = __cic->key; | ||
1551 | if (unlikely(!k)) { | ||
1552 | cfq_drop_dead_cic(ioc, __cic); | ||
1553 | goto restart; | ||
1554 | } | ||
1555 | 1561 | ||
1556 | if (cic->key < k) | 1562 | radix_tree_preload_end(); |
1557 | p = &(*p)->rb_left; | 1563 | |
1558 | else if (cic->key > k) | 1564 | if (!ret) { |
1559 | p = &(*p)->rb_right; | 1565 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); |
1560 | else | 1566 | list_add(&cic->queue_list, &cfqd->cic_list); |
1561 | BUG(); | 1567 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
1568 | } | ||
1562 | } | 1569 | } |
1563 | 1570 | ||
1564 | rb_link_node(&cic->rb_node, parent, p); | 1571 | if (ret) |
1565 | rb_insert_color(&cic->rb_node, &ioc->cic_root); | 1572 | printk(KERN_ERR "cfq: cic link failed!\n"); |
1566 | 1573 | ||
1567 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | 1574 | return ret; |
1568 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
1569 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
1570 | } | 1575 | } |
1571 | 1576 | ||
1572 | /* | 1577 | /* |
@@ -1586,7 +1591,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1586 | if (!ioc) | 1591 | if (!ioc) |
1587 | return NULL; | 1592 | return NULL; |
1588 | 1593 | ||
1589 | cic = cfq_cic_rb_lookup(cfqd, ioc); | 1594 | cic = cfq_cic_lookup(cfqd, ioc); |
1590 | if (cic) | 1595 | if (cic) |
1591 | goto out; | 1596 | goto out; |
1592 | 1597 | ||
@@ -1594,13 +1599,17 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1594 | if (cic == NULL) | 1599 | if (cic == NULL) |
1595 | goto err; | 1600 | goto err; |
1596 | 1601 | ||
1597 | cfq_cic_link(cfqd, ioc, cic); | 1602 | if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) |
1603 | goto err_free; | ||
1604 | |||
1598 | out: | 1605 | out: |
1599 | smp_read_barrier_depends(); | 1606 | smp_read_barrier_depends(); |
1600 | if (unlikely(ioc->ioprio_changed)) | 1607 | if (unlikely(ioc->ioprio_changed)) |
1601 | cfq_ioc_set_ioprio(ioc); | 1608 | cfq_ioc_set_ioprio(ioc); |
1602 | 1609 | ||
1603 | return cic; | 1610 | return cic; |
1611 | err_free: | ||
1612 | cfq_cic_free(cic); | ||
1604 | err: | 1613 | err: |
1605 | put_io_context(ioc); | 1614 | put_io_context(ioc); |
1606 | return NULL; | 1615 | return NULL; |
@@ -1655,12 +1664,15 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1655 | { | 1664 | { |
1656 | int enable_idle; | 1665 | int enable_idle; |
1657 | 1666 | ||
1658 | if (!cfq_cfqq_sync(cfqq)) | 1667 | /* |
1668 | * Don't idle for async or idle io prio class | ||
1669 | */ | ||
1670 | if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) | ||
1659 | return; | 1671 | return; |
1660 | 1672 | ||
1661 | enable_idle = cfq_cfqq_idle_window(cfqq); | 1673 | enable_idle = cfq_cfqq_idle_window(cfqq); |
1662 | 1674 | ||
1663 | if (!cic->ioc->task || !cfqd->cfq_slice_idle || | 1675 | if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || |
1664 | (cfqd->hw_tag && CIC_SEEKY(cic))) | 1676 | (cfqd->hw_tag && CIC_SEEKY(cic))) |
1665 | enable_idle = 0; | 1677 | enable_idle = 0; |
1666 | else if (sample_valid(cic->ttime_samples)) { | 1678 | else if (sample_valid(cic->ttime_samples)) { |
@@ -1793,7 +1805,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
1793 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1805 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1794 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1806 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1795 | 1807 | ||
1796 | cfq_init_prio_data(cfqq); | 1808 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); |
1797 | 1809 | ||
1798 | cfq_add_rq_rb(rq); | 1810 | cfq_add_rq_rb(rq); |
1799 | 1811 | ||
@@ -1834,7 +1846,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
1834 | cfq_set_prio_slice(cfqd, cfqq); | 1846 | cfq_set_prio_slice(cfqd, cfqq); |
1835 | cfq_clear_cfqq_slice_new(cfqq); | 1847 | cfq_clear_cfqq_slice_new(cfqq); |
1836 | } | 1848 | } |
1837 | if (cfq_slice_used(cfqq)) | 1849 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) |
1838 | cfq_slice_expired(cfqd, 1); | 1850 | cfq_slice_expired(cfqd, 1); |
1839 | else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) | 1851 | else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) |
1840 | cfq_arm_slice_timer(cfqd); | 1852 | cfq_arm_slice_timer(cfqd); |
@@ -1894,13 +1906,13 @@ static int cfq_may_queue(struct request_queue *q, int rw) | |||
1894 | * so just lookup a possibly existing queue, or return 'may queue' | 1906 | * so just lookup a possibly existing queue, or return 'may queue' |
1895 | * if that fails | 1907 | * if that fails |
1896 | */ | 1908 | */ |
1897 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 1909 | cic = cfq_cic_lookup(cfqd, tsk->io_context); |
1898 | if (!cic) | 1910 | if (!cic) |
1899 | return ELV_MQUEUE_MAY; | 1911 | return ELV_MQUEUE_MAY; |
1900 | 1912 | ||
1901 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); | 1913 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); |
1902 | if (cfqq) { | 1914 | if (cfqq) { |
1903 | cfq_init_prio_data(cfqq); | 1915 | cfq_init_prio_data(cfqq, cic->ioc); |
1904 | cfq_prio_boost(cfqq); | 1916 | cfq_prio_boost(cfqq); |
1905 | 1917 | ||
1906 | return __cfq_may_queue(cfqq); | 1918 | return __cfq_may_queue(cfqq); |
@@ -1938,7 +1950,6 @@ static int | |||
1938 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | 1950 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
1939 | { | 1951 | { |
1940 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1952 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1941 | struct task_struct *tsk = current; | ||
1942 | struct cfq_io_context *cic; | 1953 | struct cfq_io_context *cic; |
1943 | const int rw = rq_data_dir(rq); | 1954 | const int rw = rq_data_dir(rq); |
1944 | const int is_sync = rq_is_sync(rq); | 1955 | const int is_sync = rq_is_sync(rq); |
@@ -1956,7 +1967,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
1956 | 1967 | ||
1957 | cfqq = cic_to_cfqq(cic, is_sync); | 1968 | cfqq = cic_to_cfqq(cic, is_sync); |
1958 | if (!cfqq) { | 1969 | if (!cfqq) { |
1959 | cfqq = cfq_get_queue(cfqd, is_sync, tsk, gfp_mask); | 1970 | cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); |
1960 | 1971 | ||
1961 | if (!cfqq) | 1972 | if (!cfqq) |
1962 | goto queue_fail; | 1973 | goto queue_fail; |
@@ -2039,29 +2050,9 @@ out_cont: | |||
2039 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | 2050 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
2040 | } | 2051 | } |
2041 | 2052 | ||
2042 | /* | ||
2043 | * Timer running if an idle class queue is waiting for service | ||
2044 | */ | ||
2045 | static void cfq_idle_class_timer(unsigned long data) | ||
2046 | { | ||
2047 | struct cfq_data *cfqd = (struct cfq_data *) data; | ||
2048 | unsigned long flags; | ||
2049 | |||
2050 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
2051 | |||
2052 | /* | ||
2053 | * race with a non-idle queue, reset timer | ||
2054 | */ | ||
2055 | if (!start_idle_class_timer(cfqd)) | ||
2056 | cfq_schedule_dispatch(cfqd); | ||
2057 | |||
2058 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
2059 | } | ||
2060 | |||
2061 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) | 2053 | static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) |
2062 | { | 2054 | { |
2063 | del_timer_sync(&cfqd->idle_slice_timer); | 2055 | del_timer_sync(&cfqd->idle_slice_timer); |
2064 | del_timer_sync(&cfqd->idle_class_timer); | ||
2065 | kblockd_flush_work(&cfqd->unplug_work); | 2056 | kblockd_flush_work(&cfqd->unplug_work); |
2066 | } | 2057 | } |
2067 | 2058 | ||
@@ -2126,10 +2117,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
2126 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; | 2117 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; |
2127 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; | 2118 | cfqd->idle_slice_timer.data = (unsigned long) cfqd; |
2128 | 2119 | ||
2129 | init_timer(&cfqd->idle_class_timer); | ||
2130 | cfqd->idle_class_timer.function = cfq_idle_class_timer; | ||
2131 | cfqd->idle_class_timer.data = (unsigned long) cfqd; | ||
2132 | |||
2133 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); | 2120 | INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); |
2134 | 2121 | ||
2135 | cfqd->last_end_request = jiffies; | 2122 | cfqd->last_end_request = jiffies; |
@@ -2160,7 +2147,7 @@ static int __init cfq_slab_setup(void) | |||
2160 | if (!cfq_pool) | 2147 | if (!cfq_pool) |
2161 | goto fail; | 2148 | goto fail; |
2162 | 2149 | ||
2163 | cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); | 2150 | cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU); |
2164 | if (!cfq_ioc_pool) | 2151 | if (!cfq_ioc_pool) |
2165 | goto fail; | 2152 | goto fail; |
2166 | 2153 | ||
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 3d0422f48453..b901db63f6ae 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -3853,55 +3853,100 @@ int __init blk_dev_init(void) | |||
3853 | return 0; | 3853 | return 0; |
3854 | } | 3854 | } |
3855 | 3855 | ||
3856 | static void cfq_dtor(struct io_context *ioc) | ||
3857 | { | ||
3858 | struct cfq_io_context *cic[1]; | ||
3859 | int r; | ||
3860 | |||
3861 | /* | ||
3862 | * We don't have a specific key to lookup with, so use the gang | ||
3863 | * lookup to just retrieve the first item stored. The cfq exit | ||
3864 | * function will iterate the full tree, so any member will do. | ||
3865 | */ | ||
3866 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
3867 | if (r > 0) | ||
3868 | cic[0]->dtor(ioc); | ||
3869 | } | ||
3870 | |||
3856 | /* | 3871 | /* |
3857 | * IO Context helper functions | 3872 | * IO Context helper functions. put_io_context() returns 1 if there are no |
3873 | * more users of this io context, 0 otherwise. | ||
3858 | */ | 3874 | */ |
3859 | void put_io_context(struct io_context *ioc) | 3875 | int put_io_context(struct io_context *ioc) |
3860 | { | 3876 | { |
3861 | if (ioc == NULL) | 3877 | if (ioc == NULL) |
3862 | return; | 3878 | return 1; |
3863 | 3879 | ||
3864 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 3880 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3865 | 3881 | ||
3866 | if (atomic_dec_and_test(&ioc->refcount)) { | 3882 | if (atomic_dec_and_test(&ioc->refcount)) { |
3867 | struct cfq_io_context *cic; | ||
3868 | |||
3869 | rcu_read_lock(); | 3883 | rcu_read_lock(); |
3870 | if (ioc->aic && ioc->aic->dtor) | 3884 | if (ioc->aic && ioc->aic->dtor) |
3871 | ioc->aic->dtor(ioc->aic); | 3885 | ioc->aic->dtor(ioc->aic); |
3872 | if (ioc->cic_root.rb_node != NULL) { | ||
3873 | struct rb_node *n = rb_first(&ioc->cic_root); | ||
3874 | |||
3875 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
3876 | cic->dtor(ioc); | ||
3877 | } | ||
3878 | rcu_read_unlock(); | 3886 | rcu_read_unlock(); |
3887 | cfq_dtor(ioc); | ||
3879 | 3888 | ||
3880 | kmem_cache_free(iocontext_cachep, ioc); | 3889 | kmem_cache_free(iocontext_cachep, ioc); |
3890 | return 1; | ||
3881 | } | 3891 | } |
3892 | return 0; | ||
3882 | } | 3893 | } |
3883 | EXPORT_SYMBOL(put_io_context); | 3894 | EXPORT_SYMBOL(put_io_context); |
3884 | 3895 | ||
3896 | static void cfq_exit(struct io_context *ioc) | ||
3897 | { | ||
3898 | struct cfq_io_context *cic[1]; | ||
3899 | int r; | ||
3900 | |||
3901 | rcu_read_lock(); | ||
3902 | /* | ||
3903 | * See comment for cfq_dtor() | ||
3904 | */ | ||
3905 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
3906 | rcu_read_unlock(); | ||
3907 | |||
3908 | if (r > 0) | ||
3909 | cic[0]->exit(ioc); | ||
3910 | } | ||
3911 | |||
3885 | /* Called by the exitting task */ | 3912 | /* Called by the exitting task */ |
3886 | void exit_io_context(void) | 3913 | void exit_io_context(void) |
3887 | { | 3914 | { |
3888 | struct io_context *ioc; | 3915 | struct io_context *ioc; |
3889 | struct cfq_io_context *cic; | ||
3890 | 3916 | ||
3891 | task_lock(current); | 3917 | task_lock(current); |
3892 | ioc = current->io_context; | 3918 | ioc = current->io_context; |
3893 | current->io_context = NULL; | 3919 | current->io_context = NULL; |
3894 | task_unlock(current); | 3920 | task_unlock(current); |
3895 | 3921 | ||
3896 | ioc->task = NULL; | 3922 | if (atomic_dec_and_test(&ioc->nr_tasks)) { |
3897 | if (ioc->aic && ioc->aic->exit) | 3923 | if (ioc->aic && ioc->aic->exit) |
3898 | ioc->aic->exit(ioc->aic); | 3924 | ioc->aic->exit(ioc->aic); |
3899 | if (ioc->cic_root.rb_node != NULL) { | 3925 | cfq_exit(ioc); |
3900 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); | 3926 | |
3901 | cic->exit(ioc); | 3927 | put_io_context(ioc); |
3902 | } | 3928 | } |
3929 | } | ||
3930 | |||
3931 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | ||
3932 | { | ||
3933 | struct io_context *ret; | ||
3903 | 3934 | ||
3904 | put_io_context(ioc); | 3935 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); |
3936 | if (ret) { | ||
3937 | atomic_set(&ret->refcount, 1); | ||
3938 | atomic_set(&ret->nr_tasks, 1); | ||
3939 | spin_lock_init(&ret->lock); | ||
3940 | ret->ioprio_changed = 0; | ||
3941 | ret->ioprio = 0; | ||
3942 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
3943 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
3944 | ret->aic = NULL; | ||
3945 | INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); | ||
3946 | ret->ioc_data = NULL; | ||
3947 | } | ||
3948 | |||
3949 | return ret; | ||
3905 | } | 3950 | } |
3906 | 3951 | ||
3907 | /* | 3952 | /* |
@@ -3921,16 +3966,8 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
3921 | if (likely(ret)) | 3966 | if (likely(ret)) |
3922 | return ret; | 3967 | return ret; |
3923 | 3968 | ||
3924 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | 3969 | ret = alloc_io_context(gfp_flags, node); |
3925 | if (ret) { | 3970 | if (ret) { |
3926 | atomic_set(&ret->refcount, 1); | ||
3927 | ret->task = current; | ||
3928 | ret->ioprio_changed = 0; | ||
3929 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
3930 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
3931 | ret->aic = NULL; | ||
3932 | ret->cic_root.rb_node = NULL; | ||
3933 | ret->ioc_data = NULL; | ||
3934 | /* make sure set_task_ioprio() sees the settings above */ | 3971 | /* make sure set_task_ioprio() sees the settings above */ |
3935 | smp_wmb(); | 3972 | smp_wmb(); |
3936 | tsk->io_context = ret; | 3973 | tsk->io_context = ret; |
@@ -3947,10 +3984,18 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
3947 | */ | 3984 | */ |
3948 | struct io_context *get_io_context(gfp_t gfp_flags, int node) | 3985 | struct io_context *get_io_context(gfp_t gfp_flags, int node) |
3949 | { | 3986 | { |
3950 | struct io_context *ret; | 3987 | struct io_context *ret = NULL; |
3951 | ret = current_io_context(gfp_flags, node); | 3988 | |
3952 | if (likely(ret)) | 3989 | /* |
3953 | atomic_inc(&ret->refcount); | 3990 | * Check for unlikely race with exiting task. ioc ref count is |
3991 | * zero when ioc is being detached. | ||
3992 | */ | ||
3993 | do { | ||
3994 | ret = current_io_context(gfp_flags, node); | ||
3995 | if (unlikely(!ret)) | ||
3996 | break; | ||
3997 | } while (!atomic_inc_not_zero(&ret->refcount)); | ||
3998 | |||
3954 | return ret; | 3999 | return ret; |
3955 | } | 4000 | } |
3956 | EXPORT_SYMBOL(get_io_context); | 4001 | EXPORT_SYMBOL(get_io_context); |
diff --git a/fs/ioprio.c b/fs/ioprio.c index e4e01bc7f338..c4a1c3c65aac 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -41,18 +41,28 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) | |||
41 | return err; | 41 | return err; |
42 | 42 | ||
43 | task_lock(task); | 43 | task_lock(task); |
44 | do { | ||
45 | ioc = task->io_context; | ||
46 | /* see wmb() in current_io_context() */ | ||
47 | smp_read_barrier_depends(); | ||
48 | if (ioc) | ||
49 | break; | ||
44 | 50 | ||
45 | task->ioprio = ioprio; | 51 | ioc = alloc_io_context(GFP_ATOMIC, -1); |
46 | 52 | if (!ioc) { | |
47 | ioc = task->io_context; | 53 | err = -ENOMEM; |
48 | /* see wmb() in current_io_context() */ | 54 | break; |
49 | smp_read_barrier_depends(); | 55 | } |
56 | task->io_context = ioc; | ||
57 | } while (1); | ||
50 | 58 | ||
51 | if (ioc) | 59 | if (!err) { |
60 | ioc->ioprio = ioprio; | ||
52 | ioc->ioprio_changed = 1; | 61 | ioc->ioprio_changed = 1; |
62 | } | ||
53 | 63 | ||
54 | task_unlock(task); | 64 | task_unlock(task); |
55 | return 0; | 65 | return err; |
56 | } | 66 | } |
57 | 67 | ||
58 | asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | 68 | asmlinkage long sys_ioprio_set(int which, int who, int ioprio) |
@@ -75,8 +85,6 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | |||
75 | 85 | ||
76 | break; | 86 | break; |
77 | case IOPRIO_CLASS_IDLE: | 87 | case IOPRIO_CLASS_IDLE: |
78 | if (!capable(CAP_SYS_ADMIN)) | ||
79 | return -EPERM; | ||
80 | break; | 88 | break; |
81 | case IOPRIO_CLASS_NONE: | 89 | case IOPRIO_CLASS_NONE: |
82 | if (data) | 90 | if (data) |
@@ -148,7 +156,9 @@ static int get_task_ioprio(struct task_struct *p) | |||
148 | ret = security_task_getioprio(p); | 156 | ret = security_task_getioprio(p); |
149 | if (ret) | 157 | if (ret) |
150 | goto out; | 158 | goto out; |
151 | ret = p->ioprio; | 159 | ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); |
160 | if (p->io_context) | ||
161 | ret = p->io_context->ioprio; | ||
152 | out: | 162 | out: |
153 | return ret; | 163 | return ret; |
154 | } | 164 | } |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49b7a4c31a6d..2483a05231c7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -34,83 +34,10 @@ struct sg_io_hdr; | |||
34 | #define BLKDEV_MIN_RQ 4 | 34 | #define BLKDEV_MIN_RQ 4 |
35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
36 | 36 | ||
37 | /* | 37 | int put_io_context(struct io_context *ioc); |
38 | * This is the per-process anticipatory I/O scheduler state. | ||
39 | */ | ||
40 | struct as_io_context { | ||
41 | spinlock_t lock; | ||
42 | |||
43 | void (*dtor)(struct as_io_context *aic); /* destructor */ | ||
44 | void (*exit)(struct as_io_context *aic); /* called on task exit */ | ||
45 | |||
46 | unsigned long state; | ||
47 | atomic_t nr_queued; /* queued reads & sync writes */ | ||
48 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ | ||
49 | |||
50 | /* IO History tracking */ | ||
51 | /* Thinktime */ | ||
52 | unsigned long last_end_request; | ||
53 | unsigned long ttime_total; | ||
54 | unsigned long ttime_samples; | ||
55 | unsigned long ttime_mean; | ||
56 | /* Layout pattern */ | ||
57 | unsigned int seek_samples; | ||
58 | sector_t last_request_pos; | ||
59 | u64 seek_total; | ||
60 | sector_t seek_mean; | ||
61 | }; | ||
62 | |||
63 | struct cfq_queue; | ||
64 | struct cfq_io_context { | ||
65 | struct rb_node rb_node; | ||
66 | void *key; | ||
67 | |||
68 | struct cfq_queue *cfqq[2]; | ||
69 | |||
70 | struct io_context *ioc; | ||
71 | |||
72 | unsigned long last_end_request; | ||
73 | sector_t last_request_pos; | ||
74 | |||
75 | unsigned long ttime_total; | ||
76 | unsigned long ttime_samples; | ||
77 | unsigned long ttime_mean; | ||
78 | |||
79 | unsigned int seek_samples; | ||
80 | u64 seek_total; | ||
81 | sector_t seek_mean; | ||
82 | |||
83 | struct list_head queue_list; | ||
84 | |||
85 | void (*dtor)(struct io_context *); /* destructor */ | ||
86 | void (*exit)(struct io_context *); /* called on task exit */ | ||
87 | }; | ||
88 | |||
89 | /* | ||
90 | * This is the per-process I/O subsystem state. It is refcounted and | ||
91 | * kmalloc'ed. Currently all fields are modified in process io context | ||
92 | * (apart from the atomic refcount), so require no locking. | ||
93 | */ | ||
94 | struct io_context { | ||
95 | atomic_t refcount; | ||
96 | struct task_struct *task; | ||
97 | |||
98 | unsigned int ioprio_changed; | ||
99 | |||
100 | /* | ||
101 | * For request batching | ||
102 | */ | ||
103 | unsigned long last_waited; /* Time last woken after wait for request */ | ||
104 | int nr_batch_requests; /* Number of requests left in the batch */ | ||
105 | |||
106 | struct as_io_context *aic; | ||
107 | struct rb_root cic_root; | ||
108 | void *ioc_data; | ||
109 | }; | ||
110 | |||
111 | void put_io_context(struct io_context *ioc); | ||
112 | void exit_io_context(void); | 38 | void exit_io_context(void); |
113 | struct io_context *get_io_context(gfp_t gfp_flags, int node); | 39 | struct io_context *get_io_context(gfp_t gfp_flags, int node); |
40 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node); | ||
114 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); | 41 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); |
115 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); | 42 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); |
116 | 43 | ||
@@ -894,6 +821,12 @@ static inline void exit_io_context(void) | |||
894 | { | 821 | { |
895 | } | 822 | } |
896 | 823 | ||
824 | static inline int put_io_context(struct io_context *ioc) | ||
825 | { | ||
826 | return 1; | ||
827 | } | ||
828 | |||
829 | |||
897 | #endif /* CONFIG_BLOCK */ | 830 | #endif /* CONFIG_BLOCK */ |
898 | 831 | ||
899 | #endif | 832 | #endif |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 796019b22b6f..e6b3f7080679 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -137,7 +137,6 @@ extern struct group_info init_groups; | |||
137 | .time_slice = HZ, \ | 137 | .time_slice = HZ, \ |
138 | .nr_cpus_allowed = NR_CPUS, \ | 138 | .nr_cpus_allowed = NR_CPUS, \ |
139 | }, \ | 139 | }, \ |
140 | .ioprio = 0, \ | ||
141 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ | 140 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ |
142 | .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ | 141 | .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ |
143 | .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ | 142 | .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ |
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h new file mode 100644 index 000000000000..593b222d9dcc --- /dev/null +++ b/include/linux/iocontext.h | |||
@@ -0,0 +1,95 @@ | |||
1 | #ifndef IOCONTEXT_H | ||
2 | #define IOCONTEXT_H | ||
3 | |||
4 | #include <linux/radix-tree.h> | ||
5 | |||
6 | /* | ||
7 | * This is the per-process anticipatory I/O scheduler state. | ||
8 | */ | ||
9 | struct as_io_context { | ||
10 | spinlock_t lock; | ||
11 | |||
12 | void (*dtor)(struct as_io_context *aic); /* destructor */ | ||
13 | void (*exit)(struct as_io_context *aic); /* called on task exit */ | ||
14 | |||
15 | unsigned long state; | ||
16 | atomic_t nr_queued; /* queued reads & sync writes */ | ||
17 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ | ||
18 | |||
19 | /* IO History tracking */ | ||
20 | /* Thinktime */ | ||
21 | unsigned long last_end_request; | ||
22 | unsigned long ttime_total; | ||
23 | unsigned long ttime_samples; | ||
24 | unsigned long ttime_mean; | ||
25 | /* Layout pattern */ | ||
26 | unsigned int seek_samples; | ||
27 | sector_t last_request_pos; | ||
28 | u64 seek_total; | ||
29 | sector_t seek_mean; | ||
30 | }; | ||
31 | |||
32 | struct cfq_queue; | ||
33 | struct cfq_io_context { | ||
34 | void *key; | ||
35 | unsigned long dead_key; | ||
36 | |||
37 | struct cfq_queue *cfqq[2]; | ||
38 | |||
39 | struct io_context *ioc; | ||
40 | |||
41 | unsigned long last_end_request; | ||
42 | sector_t last_request_pos; | ||
43 | |||
44 | unsigned long ttime_total; | ||
45 | unsigned long ttime_samples; | ||
46 | unsigned long ttime_mean; | ||
47 | |||
48 | unsigned int seek_samples; | ||
49 | u64 seek_total; | ||
50 | sector_t seek_mean; | ||
51 | |||
52 | struct list_head queue_list; | ||
53 | |||
54 | void (*dtor)(struct io_context *); /* destructor */ | ||
55 | void (*exit)(struct io_context *); /* called on task exit */ | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * I/O subsystem state of the associated processes. It is refcounted | ||
60 | * and kmalloc'ed. These could be shared between processes. | ||
61 | */ | ||
62 | struct io_context { | ||
63 | atomic_t refcount; | ||
64 | atomic_t nr_tasks; | ||
65 | |||
66 | /* all the fields below are protected by this lock */ | ||
67 | spinlock_t lock; | ||
68 | |||
69 | unsigned short ioprio; | ||
70 | unsigned short ioprio_changed; | ||
71 | |||
72 | /* | ||
73 | * For request batching | ||
74 | */ | ||
75 | unsigned long last_waited; /* Time last woken after wait for request */ | ||
76 | int nr_batch_requests; /* Number of requests left in the batch */ | ||
77 | |||
78 | struct as_io_context *aic; | ||
79 | struct radix_tree_root radix_root; | ||
80 | void *ioc_data; | ||
81 | }; | ||
82 | |||
83 | static inline struct io_context *ioc_task_link(struct io_context *ioc) | ||
84 | { | ||
85 | /* | ||
86 | * if ref count is zero, don't allow sharing (ioc is going away, it's | ||
87 | * a race). | ||
88 | */ | ||
89 | if (ioc && atomic_inc_not_zero(&ioc->refcount)) | ||
90 | return ioc; | ||
91 | |||
92 | return NULL; | ||
93 | } | ||
94 | |||
95 | #endif | ||
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index baf29387cab4..2a3bb1bb7433 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define IOPRIO_H | 2 | #define IOPRIO_H |
3 | 3 | ||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/iocontext.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * Gives us 8 prio classes with 13-bits of data for each class | 8 | * Gives us 8 prio classes with 13-bits of data for each class |
@@ -45,18 +46,18 @@ enum { | |||
45 | * the cpu scheduler nice value to an io priority | 46 | * the cpu scheduler nice value to an io priority |
46 | */ | 47 | */ |
47 | #define IOPRIO_NORM (4) | 48 | #define IOPRIO_NORM (4) |
48 | static inline int task_ioprio(struct task_struct *task) | 49 | static inline int task_ioprio(struct io_context *ioc) |
49 | { | 50 | { |
50 | if (ioprio_valid(task->ioprio)) | 51 | if (ioprio_valid(ioc->ioprio)) |
51 | return IOPRIO_PRIO_DATA(task->ioprio); | 52 | return IOPRIO_PRIO_DATA(ioc->ioprio); |
52 | 53 | ||
53 | return IOPRIO_NORM; | 54 | return IOPRIO_NORM; |
54 | } | 55 | } |
55 | 56 | ||
56 | static inline int task_ioprio_class(struct task_struct *task) | 57 | static inline int task_ioprio_class(struct io_context *ioc) |
57 | { | 58 | { |
58 | if (ioprio_valid(task->ioprio)) | 59 | if (ioprio_valid(ioc->ioprio)) |
59 | return IOPRIO_PRIO_CLASS(task->ioprio); | 60 | return IOPRIO_PRIO_CLASS(ioc->ioprio); |
60 | 61 | ||
61 | return IOPRIO_CLASS_BE; | 62 | return IOPRIO_CLASS_BE; |
62 | } | 63 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index df5b24ee80b3..2d0546e884ea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #define CLONE_NEWUSER 0x10000000 /* New user namespace */ | 27 | #define CLONE_NEWUSER 0x10000000 /* New user namespace */ |
28 | #define CLONE_NEWPID 0x20000000 /* New pid namespace */ | 28 | #define CLONE_NEWPID 0x20000000 /* New pid namespace */ |
29 | #define CLONE_NEWNET 0x40000000 /* New network namespace */ | 29 | #define CLONE_NEWNET 0x40000000 /* New network namespace */ |
30 | #define CLONE_IO 0x80000000 /* Clone io context */ | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Scheduling policies | 33 | * Scheduling policies |
@@ -975,7 +976,6 @@ struct task_struct { | |||
975 | struct hlist_head preempt_notifiers; | 976 | struct hlist_head preempt_notifiers; |
976 | #endif | 977 | #endif |
977 | 978 | ||
978 | unsigned short ioprio; | ||
979 | /* | 979 | /* |
980 | * fpu_counter contains the number of consecutive context switches | 980 | * fpu_counter contains the number of consecutive context switches |
981 | * that the FPU is used. If this is over a threshold, the lazy fpu | 981 | * that the FPU is used. If this is over a threshold, the lazy fpu |
diff --git a/kernel/fork.c b/kernel/fork.c index 39d22b3357de..314f5101d2b0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/random.h> | 51 | #include <linux/random.h> |
52 | #include <linux/tty.h> | 52 | #include <linux/tty.h> |
53 | #include <linux/proc_fs.h> | 53 | #include <linux/proc_fs.h> |
54 | #include <linux/blkdev.h> | ||
54 | 55 | ||
55 | #include <asm/pgtable.h> | 56 | #include <asm/pgtable.h> |
56 | #include <asm/pgalloc.h> | 57 | #include <asm/pgalloc.h> |
@@ -791,6 +792,31 @@ out: | |||
791 | return error; | 792 | return error; |
792 | } | 793 | } |
793 | 794 | ||
795 | static int copy_io(unsigned long clone_flags, struct task_struct *tsk) | ||
796 | { | ||
797 | #ifdef CONFIG_BLOCK | ||
798 | struct io_context *ioc = current->io_context; | ||
799 | |||
800 | if (!ioc) | ||
801 | return 0; | ||
802 | /* | ||
803 | * Share io context with parent, if CLONE_IO is set | ||
804 | */ | ||
805 | if (clone_flags & CLONE_IO) { | ||
806 | tsk->io_context = ioc_task_link(ioc); | ||
807 | if (unlikely(!tsk->io_context)) | ||
808 | return -ENOMEM; | ||
809 | } else if (ioprio_valid(ioc->ioprio)) { | ||
810 | tsk->io_context = alloc_io_context(GFP_KERNEL, -1); | ||
811 | if (unlikely(!tsk->io_context)) | ||
812 | return -ENOMEM; | ||
813 | |||
814 | tsk->io_context->ioprio = ioc->ioprio; | ||
815 | } | ||
816 | #endif | ||
817 | return 0; | ||
818 | } | ||
819 | |||
794 | /* | 820 | /* |
795 | * Helper to unshare the files of the current task. | 821 | * Helper to unshare the files of the current task. |
796 | * We don't want to expose copy_files internals to | 822 | * We don't want to expose copy_files internals to |
@@ -1156,15 +1182,17 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1156 | goto bad_fork_cleanup_mm; | 1182 | goto bad_fork_cleanup_mm; |
1157 | if ((retval = copy_namespaces(clone_flags, p))) | 1183 | if ((retval = copy_namespaces(clone_flags, p))) |
1158 | goto bad_fork_cleanup_keys; | 1184 | goto bad_fork_cleanup_keys; |
1185 | if ((retval = copy_io(clone_flags, p))) | ||
1186 | goto bad_fork_cleanup_namespaces; | ||
1159 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); | 1187 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); |
1160 | if (retval) | 1188 | if (retval) |
1161 | goto bad_fork_cleanup_namespaces; | 1189 | goto bad_fork_cleanup_io; |
1162 | 1190 | ||
1163 | if (pid != &init_struct_pid) { | 1191 | if (pid != &init_struct_pid) { |
1164 | retval = -ENOMEM; | 1192 | retval = -ENOMEM; |
1165 | pid = alloc_pid(task_active_pid_ns(p)); | 1193 | pid = alloc_pid(task_active_pid_ns(p)); |
1166 | if (!pid) | 1194 | if (!pid) |
1167 | goto bad_fork_cleanup_namespaces; | 1195 | goto bad_fork_cleanup_io; |
1168 | 1196 | ||
1169 | if (clone_flags & CLONE_NEWPID) { | 1197 | if (clone_flags & CLONE_NEWPID) { |
1170 | retval = pid_ns_prepare_proc(task_active_pid_ns(p)); | 1198 | retval = pid_ns_prepare_proc(task_active_pid_ns(p)); |
@@ -1234,9 +1262,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1234 | /* Need tasklist lock for parent etc handling! */ | 1262 | /* Need tasklist lock for parent etc handling! */ |
1235 | write_lock_irq(&tasklist_lock); | 1263 | write_lock_irq(&tasklist_lock); |
1236 | 1264 | ||
1237 | /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */ | ||
1238 | p->ioprio = current->ioprio; | ||
1239 | |||
1240 | /* | 1265 | /* |
1241 | * The task hasn't been attached yet, so its cpus_allowed mask will | 1266 | * The task hasn't been attached yet, so its cpus_allowed mask will |
1242 | * not be changed, nor will its assigned CPU. | 1267 | * not be changed, nor will its assigned CPU. |
@@ -1328,6 +1353,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1328 | bad_fork_free_pid: | 1353 | bad_fork_free_pid: |
1329 | if (pid != &init_struct_pid) | 1354 | if (pid != &init_struct_pid) |
1330 | free_pid(pid); | 1355 | free_pid(pid); |
1356 | bad_fork_cleanup_io: | ||
1357 | put_io_context(p->io_context); | ||
1331 | bad_fork_cleanup_namespaces: | 1358 | bad_fork_cleanup_namespaces: |
1332 | exit_task_namespaces(p); | 1359 | exit_task_namespaces(p); |
1333 | bad_fork_cleanup_keys: | 1360 | bad_fork_cleanup_keys: |