diff options
| author | Jens Axboe <axboe@kernel.dk> | 2017-06-12 10:30:13 -0400 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2017-06-12 10:30:13 -0400 |
| commit | 8f66439eec46d652255b9351abebb540ee5b2fd9 (patch) | |
| tree | 94f4a41dc343cf769cd92f1f7711e9ce8ad43728 /block | |
| parent | 22ec656bcc3f38207ad5476ebad1e5005fb0f1ff (diff) | |
| parent | 32c1431eea4881a6b17bd7c639315010aeefa452 (diff) | |
Merge tag 'v4.12-rc5' into for-4.13/block
We've already got a few conflicts and upcoming work depends on some of the
changes that have gone into mainline as regression fixes for this series.
Pull in 4.12-rc5 to resolve these conflicts and make it easier on down stream
trees to continue working on 4.13 changes.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
| -rw-r--r-- | block/bfq-cgroup.c | 116 | ||||
| -rw-r--r-- | block/bfq-iosched.c | 2 | ||||
| -rw-r--r-- | block/bfq-iosched.h | 23 | ||||
| -rw-r--r-- | block/bio-integrity.c | 3 | ||||
| -rw-r--r-- | block/blk-cgroup.c | 2 | ||||
| -rw-r--r-- | block/blk-core.c | 10 | ||||
| -rw-r--r-- | block/blk-mq.c | 37 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 2 | ||||
| -rw-r--r-- | block/blk-throttle.c | 22 | ||||
| -rw-r--r-- | block/blk.h | 2 | ||||
| -rw-r--r-- | block/cfq-iosched.c | 17 |
11 files changed, 179 insertions, 57 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index c8a32fb345cf..78b2e0db4fb2 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c | |||
| @@ -52,7 +52,7 @@ BFQG_FLAG_FNS(idling) | |||
| 52 | BFQG_FLAG_FNS(empty) | 52 | BFQG_FLAG_FNS(empty) |
| 53 | #undef BFQG_FLAG_FNS | 53 | #undef BFQG_FLAG_FNS |
| 54 | 54 | ||
| 55 | /* This should be called with the queue_lock held. */ | 55 | /* This should be called with the scheduler lock held. */ |
| 56 | static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) | 56 | static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) |
| 57 | { | 57 | { |
| 58 | unsigned long long now; | 58 | unsigned long long now; |
| @@ -67,7 +67,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) | |||
| 67 | bfqg_stats_clear_waiting(stats); | 67 | bfqg_stats_clear_waiting(stats); |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | /* This should be called with the queue_lock held. */ | 70 | /* This should be called with the scheduler lock held. */ |
| 71 | static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, | 71 | static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, |
| 72 | struct bfq_group *curr_bfqg) | 72 | struct bfq_group *curr_bfqg) |
| 73 | { | 73 | { |
| @@ -81,7 +81,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, | |||
| 81 | bfqg_stats_mark_waiting(stats); | 81 | bfqg_stats_mark_waiting(stats); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | /* This should be called with the queue_lock held. */ | 84 | /* This should be called with the scheduler lock held. */ |
| 85 | static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) | 85 | static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) |
| 86 | { | 86 | { |
| 87 | unsigned long long now; | 87 | unsigned long long now; |
| @@ -203,12 +203,30 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) | |||
| 203 | 203 | ||
| 204 | static void bfqg_get(struct bfq_group *bfqg) | 204 | static void bfqg_get(struct bfq_group *bfqg) |
| 205 | { | 205 | { |
| 206 | return blkg_get(bfqg_to_blkg(bfqg)); | 206 | bfqg->ref++; |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | void bfqg_put(struct bfq_group *bfqg) | 209 | void bfqg_put(struct bfq_group *bfqg) |
| 210 | { | 210 | { |
| 211 | return blkg_put(bfqg_to_blkg(bfqg)); | 211 | bfqg->ref--; |
| 212 | |||
| 213 | if (bfqg->ref == 0) | ||
| 214 | kfree(bfqg); | ||
| 215 | } | ||
| 216 | |||
| 217 | static void bfqg_and_blkg_get(struct bfq_group *bfqg) | ||
| 218 | { | ||
| 219 | /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ | ||
| 220 | bfqg_get(bfqg); | ||
| 221 | |||
| 222 | blkg_get(bfqg_to_blkg(bfqg)); | ||
| 223 | } | ||
| 224 | |||
| 225 | void bfqg_and_blkg_put(struct bfq_group *bfqg) | ||
| 226 | { | ||
| 227 | bfqg_put(bfqg); | ||
| 228 | |||
| 229 | blkg_put(bfqg_to_blkg(bfqg)); | ||
| 212 | } | 230 | } |
| 213 | 231 | ||
| 214 | void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, | 232 | void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, |
| @@ -312,7 +330,11 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) | |||
| 312 | if (bfqq) { | 330 | if (bfqq) { |
| 313 | bfqq->ioprio = bfqq->new_ioprio; | 331 | bfqq->ioprio = bfqq->new_ioprio; |
| 314 | bfqq->ioprio_class = bfqq->new_ioprio_class; | 332 | bfqq->ioprio_class = bfqq->new_ioprio_class; |
| 315 | bfqg_get(bfqg); | 333 | /* |
| 334 | * Make sure that bfqg and its associated blkg do not | ||
| 335 | * disappear before entity. | ||
| 336 | */ | ||
| 337 | bfqg_and_blkg_get(bfqg); | ||
| 316 | } | 338 | } |
| 317 | entity->parent = bfqg->my_entity; /* NULL for root group */ | 339 | entity->parent = bfqg->my_entity; /* NULL for root group */ |
| 318 | entity->sched_data = &bfqg->sched_data; | 340 | entity->sched_data = &bfqg->sched_data; |
| @@ -399,6 +421,8 @@ struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) | |||
| 399 | return NULL; | 421 | return NULL; |
| 400 | } | 422 | } |
| 401 | 423 | ||
| 424 | /* see comments in bfq_bic_update_cgroup for why refcounting */ | ||
| 425 | bfqg_get(bfqg); | ||
| 402 | return &bfqg->pd; | 426 | return &bfqg->pd; |
| 403 | } | 427 | } |
| 404 | 428 | ||
| @@ -426,7 +450,7 @@ void bfq_pd_free(struct blkg_policy_data *pd) | |||
| 426 | struct bfq_group *bfqg = pd_to_bfqg(pd); | 450 | struct bfq_group *bfqg = pd_to_bfqg(pd); |
| 427 | 451 | ||
| 428 | bfqg_stats_exit(&bfqg->stats); | 452 | bfqg_stats_exit(&bfqg->stats); |
| 429 | return kfree(bfqg); | 453 | bfqg_put(bfqg); |
| 430 | } | 454 | } |
| 431 | 455 | ||
| 432 | void bfq_pd_reset_stats(struct blkg_policy_data *pd) | 456 | void bfq_pd_reset_stats(struct blkg_policy_data *pd) |
| @@ -496,9 +520,10 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, | |||
| 496 | * Move @bfqq to @bfqg, deactivating it from its old group and reactivating | 520 | * Move @bfqq to @bfqg, deactivating it from its old group and reactivating |
| 497 | * it on the new one. Avoid putting the entity on the old group idle tree. | 521 | * it on the new one. Avoid putting the entity on the old group idle tree. |
| 498 | * | 522 | * |
| 499 | * Must be called under the queue lock; the cgroup owning @bfqg must | 523 | * Must be called under the scheduler lock, to make sure that the blkg |
| 500 | * not disappear (by now this just means that we are called under | 524 | * owning @bfqg does not disappear (see comments in |
| 501 | * rcu_read_lock()). | 525 | * bfq_bic_update_cgroup on guaranteeing the consistency of blkg |
| 526 | * objects). | ||
| 502 | */ | 527 | */ |
| 503 | void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, | 528 | void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
| 504 | struct bfq_group *bfqg) | 529 | struct bfq_group *bfqg) |
| @@ -519,16 +544,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
| 519 | bfq_deactivate_bfqq(bfqd, bfqq, false, false); | 544 | bfq_deactivate_bfqq(bfqd, bfqq, false, false); |
| 520 | else if (entity->on_st) | 545 | else if (entity->on_st) |
| 521 | bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); | 546 | bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); |
| 522 | bfqg_put(bfqq_group(bfqq)); | 547 | bfqg_and_blkg_put(bfqq_group(bfqq)); |
| 523 | 548 | ||
| 524 | /* | ||
| 525 | * Here we use a reference to bfqg. We don't need a refcounter | ||
| 526 | * as the cgroup reference will not be dropped, so that its | ||
| 527 | * destroy() callback will not be invoked. | ||
| 528 | */ | ||
| 529 | entity->parent = bfqg->my_entity; | 549 | entity->parent = bfqg->my_entity; |
| 530 | entity->sched_data = &bfqg->sched_data; | 550 | entity->sched_data = &bfqg->sched_data; |
| 531 | bfqg_get(bfqg); | 551 | /* pin down bfqg and its associated blkg */ |
| 552 | bfqg_and_blkg_get(bfqg); | ||
| 532 | 553 | ||
| 533 | if (bfq_bfqq_busy(bfqq)) { | 554 | if (bfq_bfqq_busy(bfqq)) { |
| 534 | bfq_pos_tree_add_move(bfqd, bfqq); | 555 | bfq_pos_tree_add_move(bfqd, bfqq); |
| @@ -545,8 +566,9 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
| 545 | * @bic: the bic to move. | 566 | * @bic: the bic to move. |
| 546 | * @blkcg: the blk-cgroup to move to. | 567 | * @blkcg: the blk-cgroup to move to. |
| 547 | * | 568 | * |
| 548 | * Move bic to blkcg, assuming that bfqd->queue is locked; the caller | 569 | * Move bic to blkcg, assuming that bfqd->lock is held; which makes |
| 549 | * has to make sure that the reference to cgroup is valid across the call. | 570 | * sure that the reference to cgroup is valid across the call (see |
| 571 | * comments in bfq_bic_update_cgroup on this issue) | ||
| 550 | * | 572 | * |
| 551 | * NOTE: an alternative approach might have been to store the current | 573 | * NOTE: an alternative approach might have been to store the current |
| 552 | * cgroup in bfqq and getting a reference to it, reducing the lookup | 574 | * cgroup in bfqq and getting a reference to it, reducing the lookup |
| @@ -604,6 +626,57 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) | |||
| 604 | goto out; | 626 | goto out; |
| 605 | 627 | ||
| 606 | bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); | 628 | bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); |
| 629 | /* | ||
| 630 | * Update blkg_path for bfq_log_* functions. We cache this | ||
| 631 | * path, and update it here, for the following | ||
| 632 | * reasons. Operations on blkg objects in blk-cgroup are | ||
| 633 | * protected with the request_queue lock, and not with the | ||
| 634 | * lock that protects the instances of this scheduler | ||
| 635 | * (bfqd->lock). This exposes BFQ to the following sort of | ||
| 636 | * race. | ||
| 637 | * | ||
| 638 | * The blkg_lookup performed in bfq_get_queue, protected | ||
| 639 | * through rcu, may happen to return the address of a copy of | ||
| 640 | * the original blkg. If this is the case, then the | ||
| 641 | * bfqg_and_blkg_get performed in bfq_get_queue, to pin down | ||
| 642 | * the blkg, is useless: it does not prevent blk-cgroup code | ||
| 643 | * from destroying both the original blkg and all objects | ||
| 644 | * directly or indirectly referred by the copy of the | ||
| 645 | * blkg. | ||
| 646 | * | ||
| 647 | * On the bright side, destroy operations on a blkg invoke, as | ||
| 648 | * a first step, hooks of the scheduler associated with the | ||
| 649 | * blkg. And these hooks are executed with bfqd->lock held for | ||
| 650 | * BFQ. As a consequence, for any blkg associated with the | ||
| 651 | * request queue this instance of the scheduler is attached | ||
| 652 | * to, we are guaranteed that such a blkg is not destroyed, and | ||
| 653 | * that all the pointers it contains are consistent, while we | ||
| 654 | * are holding bfqd->lock. A blkg_lookup performed with | ||
| 655 | * bfqd->lock held then returns a fully consistent blkg, which | ||
| 656 | * remains consistent until this lock is held. | ||
| 657 | * | ||
| 658 | * Thanks to the last fact, and to the fact that: (1) bfqg has | ||
| 659 | * been obtained through a blkg_lookup in the above | ||
| 660 | * assignment, and (2) bfqd->lock is being held, here we can | ||
| 661 | * safely use the policy data for the involved blkg (i.e., the | ||
| 662 | * field bfqg->pd) to get to the blkg associated with bfqg, | ||
| 663 | * and then we can safely use any field of blkg. After we | ||
| 664 | * release bfqd->lock, even just getting blkg through this | ||
| 665 | * bfqg may cause dangling references to be traversed, as | ||
| 666 | * bfqg->pd may not exist any more. | ||
| 667 | * | ||
| 668 | * In view of the above facts, here we cache, in the bfqg, any | ||
| 669 | * blkg data we may need for this bic, and for its associated | ||
| 670 | * bfq_queue. As of now, we need to cache only the path of the | ||
| 671 | * blkg, which is used in the bfq_log_* functions. | ||
| 672 | * | ||
| 673 | * Finally, note that bfqg itself needs to be protected from | ||
| 674 | * destruction on the blkg_free of the original blkg (which | ||
| 675 | * invokes bfq_pd_free). We use an additional private | ||
| 676 | * refcounter for bfqg, to let it disappear only after no | ||
| 677 | * bfq_queue refers to it any longer. | ||
| 678 | */ | ||
| 679 | blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); | ||
| 607 | bic->blkcg_serial_nr = serial_nr; | 680 | bic->blkcg_serial_nr = serial_nr; |
| 608 | out: | 681 | out: |
| 609 | rcu_read_unlock(); | 682 | rcu_read_unlock(); |
| @@ -640,8 +713,6 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, | |||
| 640 | * @bfqd: the device data structure with the root group. | 713 | * @bfqd: the device data structure with the root group. |
| 641 | * @bfqg: the group to move from. | 714 | * @bfqg: the group to move from. |
| 642 | * @st: the service tree with the entities. | 715 | * @st: the service tree with the entities. |
| 643 | * | ||
| 644 | * Needs queue_lock to be taken and reference to be valid over the call. | ||
| 645 | */ | 716 | */ |
| 646 | static void bfq_reparent_active_entities(struct bfq_data *bfqd, | 717 | static void bfq_reparent_active_entities(struct bfq_data *bfqd, |
| 647 | struct bfq_group *bfqg, | 718 | struct bfq_group *bfqg, |
| @@ -692,8 +763,7 @@ void bfq_pd_offline(struct blkg_policy_data *pd) | |||
| 692 | /* | 763 | /* |
| 693 | * The idle tree may still contain bfq_queues belonging | 764 | * The idle tree may still contain bfq_queues belonging |
| 694 | * to exited task because they never migrated to a different | 765 | * to exited task because they never migrated to a different |
| 695 | * cgroup from the one being destroyed now. No one else | 766 | * cgroup from the one being destroyed now. |
| 696 | * can access them so it's safe to act without any lock. | ||
| 697 | */ | 767 | */ |
| 698 | bfq_flush_idle_tree(st); | 768 | bfq_flush_idle_tree(st); |
| 699 | 769 | ||
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 08ce45096350..ed93da2462ab 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
| @@ -3665,7 +3665,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) | |||
| 3665 | 3665 | ||
| 3666 | kmem_cache_free(bfq_pool, bfqq); | 3666 | kmem_cache_free(bfq_pool, bfqq); |
| 3667 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 3667 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
| 3668 | bfqg_put(bfqg); | 3668 | bfqg_and_blkg_put(bfqg); |
| 3669 | #endif | 3669 | #endif |
| 3670 | } | 3670 | } |
| 3671 | 3671 | ||
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ae783c06dfd9..5c3bf9861492 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h | |||
| @@ -759,6 +759,12 @@ struct bfq_group { | |||
| 759 | /* must be the first member */ | 759 | /* must be the first member */ |
| 760 | struct blkg_policy_data pd; | 760 | struct blkg_policy_data pd; |
| 761 | 761 | ||
| 762 | /* cached path for this blkg (see comments in bfq_bic_update_cgroup) */ | ||
| 763 | char blkg_path[128]; | ||
| 764 | |||
| 765 | /* reference counter (see comments in bfq_bic_update_cgroup) */ | ||
| 766 | int ref; | ||
| 767 | |||
| 762 | struct bfq_entity entity; | 768 | struct bfq_entity entity; |
| 763 | struct bfq_sched_data sched_data; | 769 | struct bfq_sched_data sched_data; |
| 764 | 770 | ||
| @@ -838,7 +844,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, | |||
| 838 | struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); | 844 | struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); |
| 839 | struct bfq_group *bfqq_group(struct bfq_queue *bfqq); | 845 | struct bfq_group *bfqq_group(struct bfq_queue *bfqq); |
| 840 | struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); | 846 | struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); |
| 841 | void bfqg_put(struct bfq_group *bfqg); | 847 | void bfqg_and_blkg_put(struct bfq_group *bfqg); |
| 842 | 848 | ||
| 843 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 849 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
| 844 | extern struct cftype bfq_blkcg_legacy_files[]; | 850 | extern struct cftype bfq_blkcg_legacy_files[]; |
| @@ -910,20 +916,13 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq); | |||
| 910 | struct bfq_group *bfqq_group(struct bfq_queue *bfqq); | 916 | struct bfq_group *bfqq_group(struct bfq_queue *bfqq); |
| 911 | 917 | ||
| 912 | #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ | 918 | #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ |
| 913 | char __pbuf[128]; \ | 919 | blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid,\ |
| 914 | \ | ||
| 915 | blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \ | ||
| 916 | blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid, \ | ||
| 917 | bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ | 920 | bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ |
| 918 | __pbuf, ##args); \ | 921 | bfqq_group(bfqq)->blkg_path, ##args); \ |
| 919 | } while (0) | 922 | } while (0) |
| 920 | 923 | ||
| 921 | #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ | 924 | #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) \ |
| 922 | char __pbuf[128]; \ | 925 | blk_add_trace_msg((bfqd)->queue, "%s " fmt, (bfqg)->blkg_path, ##args) |
| 923 | \ | ||
| 924 | blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf)); \ | ||
| 925 | blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args); \ | ||
| 926 | } while (0) | ||
| 927 | 926 | ||
| 928 | #else /* CONFIG_BFQ_GROUP_IOSCHED */ | 927 | #else /* CONFIG_BFQ_GROUP_IOSCHED */ |
| 929 | 928 | ||
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 17b9740e138b..b8a3a65f7364 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c | |||
| @@ -175,6 +175,9 @@ bool bio_integrity_enabled(struct bio *bio) | |||
| 175 | if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) | 175 | if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) |
| 176 | return false; | 176 | return false; |
| 177 | 177 | ||
| 178 | if (!bio_sectors(bio)) | ||
| 179 | return false; | ||
| 180 | |||
| 178 | /* Already protected? */ | 181 | /* Already protected? */ |
| 179 | if (bio_integrity(bio)) | 182 | if (bio_integrity(bio)) |
| 180 | return false; | 183 | return false; |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 7c2947128f58..0480892e97e5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
| @@ -74,7 +74,7 @@ static void blkg_free(struct blkcg_gq *blkg) | |||
| 74 | blkcg_policy[i]->pd_free_fn(blkg->pd[i]); | 74 | blkcg_policy[i]->pd_free_fn(blkg->pd[i]); |
| 75 | 75 | ||
| 76 | if (blkg->blkcg != &blkcg_root) | 76 | if (blkg->blkcg != &blkcg_root) |
| 77 | blk_exit_rl(&blkg->rl); | 77 | blk_exit_rl(blkg->q, &blkg->rl); |
| 78 | 78 | ||
| 79 | blkg_rwstat_exit(&blkg->stat_ios); | 79 | blkg_rwstat_exit(&blkg->stat_ios); |
| 80 | blkg_rwstat_exit(&blkg->stat_bytes); | 80 | blkg_rwstat_exit(&blkg->stat_bytes); |
diff --git a/block/blk-core.c b/block/blk-core.c index 3d84820ace9e..8592409db272 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -706,13 +706,19 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, | |||
| 706 | if (!rl->rq_pool) | 706 | if (!rl->rq_pool) |
| 707 | return -ENOMEM; | 707 | return -ENOMEM; |
| 708 | 708 | ||
| 709 | if (rl != &q->root_rl) | ||
| 710 | WARN_ON_ONCE(!blk_get_queue(q)); | ||
| 711 | |||
| 709 | return 0; | 712 | return 0; |
| 710 | } | 713 | } |
| 711 | 714 | ||
| 712 | void blk_exit_rl(struct request_list *rl) | 715 | void blk_exit_rl(struct request_queue *q, struct request_list *rl) |
| 713 | { | 716 | { |
| 714 | if (rl->rq_pool) | 717 | if (rl->rq_pool) { |
| 715 | mempool_destroy(rl->rq_pool); | 718 | mempool_destroy(rl->rq_pool); |
| 719 | if (rl != &q->root_rl) | ||
| 720 | blk_put_queue(q); | ||
| 721 | } | ||
| 716 | } | 722 | } |
| 717 | 723 | ||
| 718 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) | 724 | struct request_queue *blk_alloc_queue(gfp_t gfp_mask) |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 7af78b1e9db9..da2f21961525 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
| @@ -1396,22 +1396,28 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) | |||
| 1396 | return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); | 1396 | return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); |
| 1397 | } | 1397 | } |
| 1398 | 1398 | ||
| 1399 | static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, | 1399 | static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, |
| 1400 | bool may_sleep) | 1400 | struct request *rq, |
| 1401 | blk_qc_t *cookie, bool may_sleep) | ||
| 1401 | { | 1402 | { |
| 1402 | struct request_queue *q = rq->q; | 1403 | struct request_queue *q = rq->q; |
| 1403 | struct blk_mq_queue_data bd = { | 1404 | struct blk_mq_queue_data bd = { |
| 1404 | .rq = rq, | 1405 | .rq = rq, |
| 1405 | .last = true, | 1406 | .last = true, |
| 1406 | }; | 1407 | }; |
| 1407 | struct blk_mq_hw_ctx *hctx; | ||
| 1408 | blk_qc_t new_cookie; | 1408 | blk_qc_t new_cookie; |
| 1409 | blk_status_t ret; | 1409 | int ret; |
| 1410 | bool run_queue = true; | ||
| 1411 | |||
| 1412 | if (blk_mq_hctx_stopped(hctx)) { | ||
| 1413 | run_queue = false; | ||
| 1414 | goto insert; | ||
| 1415 | } | ||
| 1410 | 1416 | ||
| 1411 | if (q->elevator) | 1417 | if (q->elevator) |
| 1412 | goto insert; | 1418 | goto insert; |
| 1413 | 1419 | ||
| 1414 | if (!blk_mq_get_driver_tag(rq, &hctx, false)) | 1420 | if (!blk_mq_get_driver_tag(rq, NULL, false)) |
| 1415 | goto insert; | 1421 | goto insert; |
| 1416 | 1422 | ||
| 1417 | new_cookie = request_to_qc_t(hctx, rq); | 1423 | new_cookie = request_to_qc_t(hctx, rq); |
| @@ -1436,7 +1442,7 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, | |||
| 1436 | } | 1442 | } |
| 1437 | 1443 | ||
| 1438 | insert: | 1444 | insert: |
| 1439 | blk_mq_sched_insert_request(rq, false, true, false, may_sleep); | 1445 | blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep); |
| 1440 | } | 1446 | } |
| 1441 | 1447 | ||
| 1442 | static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, | 1448 | static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, |
| @@ -1444,7 +1450,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, | |||
| 1444 | { | 1450 | { |
| 1445 | if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { | 1451 | if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { |
| 1446 | rcu_read_lock(); | 1452 | rcu_read_lock(); |
| 1447 | __blk_mq_try_issue_directly(rq, cookie, false); | 1453 | __blk_mq_try_issue_directly(hctx, rq, cookie, false); |
| 1448 | rcu_read_unlock(); | 1454 | rcu_read_unlock(); |
| 1449 | } else { | 1455 | } else { |
| 1450 | unsigned int srcu_idx; | 1456 | unsigned int srcu_idx; |
| @@ -1452,7 +1458,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, | |||
| 1452 | might_sleep(); | 1458 | might_sleep(); |
| 1453 | 1459 | ||
| 1454 | srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); | 1460 | srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); |
| 1455 | __blk_mq_try_issue_directly(rq, cookie, true); | 1461 | __blk_mq_try_issue_directly(hctx, rq, cookie, true); |
| 1456 | srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); | 1462 | srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); |
| 1457 | } | 1463 | } |
| 1458 | } | 1464 | } |
| @@ -1555,9 +1561,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
| 1555 | 1561 | ||
| 1556 | blk_mq_put_ctx(data.ctx); | 1562 | blk_mq_put_ctx(data.ctx); |
| 1557 | 1563 | ||
| 1558 | if (same_queue_rq) | 1564 | if (same_queue_rq) { |
| 1565 | data.hctx = blk_mq_map_queue(q, | ||
| 1566 | same_queue_rq->mq_ctx->cpu); | ||
| 1559 | blk_mq_try_issue_directly(data.hctx, same_queue_rq, | 1567 | blk_mq_try_issue_directly(data.hctx, same_queue_rq, |
| 1560 | &cookie); | 1568 | &cookie); |
| 1569 | } | ||
| 1561 | } else if (q->nr_hw_queues > 1 && is_sync) { | 1570 | } else if (q->nr_hw_queues > 1 && is_sync) { |
| 1562 | blk_mq_put_ctx(data.ctx); | 1571 | blk_mq_put_ctx(data.ctx); |
| 1563 | blk_mq_bio_to_request(rq, bio); | 1572 | blk_mq_bio_to_request(rq, bio); |
| @@ -2578,7 +2587,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) | |||
| 2578 | return ret; | 2587 | return ret; |
| 2579 | } | 2588 | } |
| 2580 | 2589 | ||
| 2581 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) | 2590 | static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, |
| 2591 | int nr_hw_queues) | ||
| 2582 | { | 2592 | { |
| 2583 | struct request_queue *q; | 2593 | struct request_queue *q; |
| 2584 | 2594 | ||
| @@ -2602,6 +2612,13 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) | |||
| 2602 | list_for_each_entry(q, &set->tag_list, tag_set_list) | 2612 | list_for_each_entry(q, &set->tag_list, tag_set_list) |
| 2603 | blk_mq_unfreeze_queue(q); | 2613 | blk_mq_unfreeze_queue(q); |
| 2604 | } | 2614 | } |
| 2615 | |||
| 2616 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) | ||
| 2617 | { | ||
| 2618 | mutex_lock(&set->tag_list_lock); | ||
| 2619 | __blk_mq_update_nr_hw_queues(set, nr_hw_queues); | ||
| 2620 | mutex_unlock(&set->tag_list_lock); | ||
| 2621 | } | ||
| 2605 | EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); | 2622 | EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); |
| 2606 | 2623 | ||
| 2607 | /* Enable polling stats and return whether they were already enabled. */ | 2624 | /* Enable polling stats and return whether they were already enabled. */ |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 712b018e9f54..283da7fbe034 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -809,7 +809,7 @@ static void blk_release_queue(struct kobject *kobj) | |||
| 809 | 809 | ||
| 810 | blk_free_queue_stats(q->stats); | 810 | blk_free_queue_stats(q->stats); |
| 811 | 811 | ||
| 812 | blk_exit_rl(&q->root_rl); | 812 | blk_exit_rl(q, &q->root_rl); |
| 813 | 813 | ||
| 814 | if (q->queue_tags) | 814 | if (q->queue_tags) |
| 815 | __blk_queue_free_tags(q); | 815 | __blk_queue_free_tags(q); |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index fc13dd0c6e39..a7285bf2831c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
| @@ -27,6 +27,13 @@ static int throtl_quantum = 32; | |||
| 27 | #define MIN_THROTL_IOPS (10) | 27 | #define MIN_THROTL_IOPS (10) |
| 28 | #define DFL_LATENCY_TARGET (-1L) | 28 | #define DFL_LATENCY_TARGET (-1L) |
| 29 | #define DFL_IDLE_THRESHOLD (0) | 29 | #define DFL_IDLE_THRESHOLD (0) |
| 30 | #define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */ | ||
| 31 | #define LATENCY_FILTERED_SSD (0) | ||
| 32 | /* | ||
| 33 | * For HD, very small latency comes from sequential IO. Such IO is helpless to | ||
| 34 | * help determine if its IO is impacted by others, hence we ignore the IO | ||
| 35 | */ | ||
| 36 | #define LATENCY_FILTERED_HD (1000L) /* 1ms */ | ||
| 30 | 37 | ||
| 31 | #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) | 38 | #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) |
| 32 | 39 | ||
| @@ -212,6 +219,7 @@ struct throtl_data | |||
| 212 | struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE]; | 219 | struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE]; |
| 213 | struct latency_bucket __percpu *latency_buckets; | 220 | struct latency_bucket __percpu *latency_buckets; |
| 214 | unsigned long last_calculate_time; | 221 | unsigned long last_calculate_time; |
| 222 | unsigned long filtered_latency; | ||
| 215 | 223 | ||
| 216 | bool track_bio_latency; | 224 | bool track_bio_latency; |
| 217 | }; | 225 | }; |
| @@ -698,7 +706,7 @@ static void throtl_dequeue_tg(struct throtl_grp *tg) | |||
| 698 | static void throtl_schedule_pending_timer(struct throtl_service_queue *sq, | 706 | static void throtl_schedule_pending_timer(struct throtl_service_queue *sq, |
| 699 | unsigned long expires) | 707 | unsigned long expires) |
| 700 | { | 708 | { |
| 701 | unsigned long max_expire = jiffies + 8 * sq_to_tg(sq)->td->throtl_slice; | 709 | unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice; |
| 702 | 710 | ||
| 703 | /* | 711 | /* |
| 704 | * Since we are adjusting the throttle limit dynamically, the sleep | 712 | * Since we are adjusting the throttle limit dynamically, the sleep |
| @@ -2281,7 +2289,7 @@ void blk_throtl_bio_endio(struct bio *bio) | |||
| 2281 | throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat), | 2289 | throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat), |
| 2282 | bio_op(bio), lat); | 2290 | bio_op(bio), lat); |
| 2283 | 2291 | ||
| 2284 | if (tg->latency_target) { | 2292 | if (tg->latency_target && lat >= tg->td->filtered_latency) { |
| 2285 | int bucket; | 2293 | int bucket; |
| 2286 | unsigned int threshold; | 2294 | unsigned int threshold; |
| 2287 | 2295 | ||
| @@ -2417,14 +2425,20 @@ void blk_throtl_exit(struct request_queue *q) | |||
| 2417 | void blk_throtl_register_queue(struct request_queue *q) | 2425 | void blk_throtl_register_queue(struct request_queue *q) |
| 2418 | { | 2426 | { |
| 2419 | struct throtl_data *td; | 2427 | struct throtl_data *td; |
| 2428 | int i; | ||
| 2420 | 2429 | ||
| 2421 | td = q->td; | 2430 | td = q->td; |
| 2422 | BUG_ON(!td); | 2431 | BUG_ON(!td); |
| 2423 | 2432 | ||
| 2424 | if (blk_queue_nonrot(q)) | 2433 | if (blk_queue_nonrot(q)) { |
| 2425 | td->throtl_slice = DFL_THROTL_SLICE_SSD; | 2434 | td->throtl_slice = DFL_THROTL_SLICE_SSD; |
| 2426 | else | 2435 | td->filtered_latency = LATENCY_FILTERED_SSD; |
| 2436 | } else { | ||
| 2427 | td->throtl_slice = DFL_THROTL_SLICE_HD; | 2437 | td->throtl_slice = DFL_THROTL_SLICE_HD; |
| 2438 | td->filtered_latency = LATENCY_FILTERED_HD; | ||
| 2439 | for (i = 0; i < LATENCY_BUCKET_SIZE; i++) | ||
| 2440 | td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY; | ||
| 2441 | } | ||
| 2428 | #ifndef CONFIG_BLK_DEV_THROTTLING_LOW | 2442 | #ifndef CONFIG_BLK_DEV_THROTTLING_LOW |
| 2429 | /* if no low limit, use previous default */ | 2443 | /* if no low limit, use previous default */ |
| 2430 | td->throtl_slice = DFL_THROTL_SLICE_HD; | 2444 | td->throtl_slice = DFL_THROTL_SLICE_HD; |
diff --git a/block/blk.h b/block/blk.h index 2ed70228e44f..83c8e1100525 100644 --- a/block/blk.h +++ b/block/blk.h | |||
| @@ -59,7 +59,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q); | |||
| 59 | 59 | ||
| 60 | int blk_init_rl(struct request_list *rl, struct request_queue *q, | 60 | int blk_init_rl(struct request_list *rl, struct request_queue *q, |
| 61 | gfp_t gfp_mask); | 61 | gfp_t gfp_mask); |
| 62 | void blk_exit_rl(struct request_list *rl); | 62 | void blk_exit_rl(struct request_queue *q, struct request_list *rl); |
| 63 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 63 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
| 64 | struct bio *bio); | 64 | struct bio *bio); |
| 65 | void blk_queue_bypass_start(struct request_queue *q); | 65 | void blk_queue_bypass_start(struct request_queue *q); |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f57bc7d5c483..3d5c28945719 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
| @@ -38,9 +38,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ | |||
| 38 | static const int cfq_hist_divisor = 4; | 38 | static const int cfq_hist_divisor = 4; |
| 39 | 39 | ||
| 40 | /* | 40 | /* |
| 41 | * offset from end of service tree | 41 | * offset from end of queue service tree for idle class |
| 42 | */ | 42 | */ |
| 43 | #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) | 43 | #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) |
| 44 | /* offset from end of group service tree under time slice mode */ | ||
| 45 | #define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5) | ||
| 46 | /* offset from end of group service under IOPS mode */ | ||
| 47 | #define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5) | ||
| 44 | 48 | ||
| 45 | /* | 49 | /* |
| 46 | * below this threshold, we consider thinktime immediate | 50 | * below this threshold, we consider thinktime immediate |
| @@ -1353,6 +1357,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | |||
| 1353 | cfqg->vfraction = max_t(unsigned, vfr, 1); | 1357 | cfqg->vfraction = max_t(unsigned, vfr, 1); |
| 1354 | } | 1358 | } |
| 1355 | 1359 | ||
| 1360 | static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd) | ||
| 1361 | { | ||
| 1362 | if (!iops_mode(cfqd)) | ||
| 1363 | return CFQ_SLICE_MODE_GROUP_DELAY; | ||
| 1364 | else | ||
| 1365 | return CFQ_IOPS_MODE_GROUP_DELAY; | ||
| 1366 | } | ||
| 1367 | |||
| 1356 | static void | 1368 | static void |
| 1357 | cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | 1369 | cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) |
| 1358 | { | 1370 | { |
| @@ -1372,7 +1384,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
| 1372 | n = rb_last(&st->rb); | 1384 | n = rb_last(&st->rb); |
| 1373 | if (n) { | 1385 | if (n) { |
| 1374 | __cfqg = rb_entry_cfqg(n); | 1386 | __cfqg = rb_entry_cfqg(n); |
| 1375 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; | 1387 | cfqg->vdisktime = __cfqg->vdisktime + |
| 1388 | cfq_get_cfqg_vdisktime_delay(cfqd); | ||
| 1376 | } else | 1389 | } else |
| 1377 | cfqg->vdisktime = st->min_vdisktime; | 1390 | cfqg->vdisktime = st->min_vdisktime; |
| 1378 | cfq_group_service_tree_add(st, cfqg); | 1391 | cfq_group_service_tree_add(st, cfqg); |
