aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c91
-rw-r--r--block/blk-core.c7
-rw-r--r--block/blk-mq-cpumap.c2
-rw-r--r--block/blk-mq.c68
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/bounce.c2
-rw-r--r--block/cfq-iosched.c125
-rw-r--r--block/elevator.c8
-rw-r--r--block/genhd.c13
9 files changed, 243 insertions, 75 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 31610ae0ebff..9f97da52d006 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -9,6 +9,10 @@
9 * 9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com> 11 * Nauman Rafique <nauman@google.com>
12 *
13 * For policy-specific per-blkcg data:
14 * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
15 * Arianna Avanzini <avanzini.arianna@gmail.com>
12 */ 16 */
13#include <linux/ioprio.h> 17#include <linux/ioprio.h>
14#include <linux/kdev_t.h> 18#include <linux/kdev_t.h>
@@ -27,8 +31,7 @@
27 31
28static DEFINE_MUTEX(blkcg_pol_mutex); 32static DEFINE_MUTEX(blkcg_pol_mutex);
29 33
30struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, 34struct blkcg blkcg_root;
31 .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
32EXPORT_SYMBOL_GPL(blkcg_root); 35EXPORT_SYMBOL_GPL(blkcg_root);
33 36
34struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; 37struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
@@ -827,6 +830,8 @@ static struct cgroup_subsys_state *
827blkcg_css_alloc(struct cgroup_subsys_state *parent_css) 830blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
828{ 831{
829 struct blkcg *blkcg; 832 struct blkcg *blkcg;
833 struct cgroup_subsys_state *ret;
834 int i;
830 835
831 if (!parent_css) { 836 if (!parent_css) {
832 blkcg = &blkcg_root; 837 blkcg = &blkcg_root;
@@ -834,11 +839,35 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
834 } 839 }
835 840
836 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 841 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
837 if (!blkcg) 842 if (!blkcg) {
838 return ERR_PTR(-ENOMEM); 843 ret = ERR_PTR(-ENOMEM);
844 goto free_blkcg;
845 }
846
847 for (i = 0; i < BLKCG_MAX_POLS ; i++) {
848 struct blkcg_policy *pol = blkcg_policy[i];
849 struct blkcg_policy_data *cpd;
850
851 /*
852 * If the policy hasn't been attached yet, wait for it
853 * to be attached before doing anything else. Otherwise,
854 * check if the policy requires any specific per-cgroup
855 * data: if it does, allocate and initialize it.
856 */
857 if (!pol || !pol->cpd_size)
858 continue;
859
860 BUG_ON(blkcg->pd[i]);
861 cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
862 if (!cpd) {
863 ret = ERR_PTR(-ENOMEM);
864 goto free_pd_blkcg;
865 }
866 blkcg->pd[i] = cpd;
867 cpd->plid = i;
868 pol->cpd_init_fn(blkcg);
869 }
839 870
840 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
841 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
842done: 871done:
843 spin_lock_init(&blkcg->lock); 872 spin_lock_init(&blkcg->lock);
844 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); 873 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
@@ -847,6 +876,14 @@ done:
847 INIT_LIST_HEAD(&blkcg->cgwb_list); 876 INIT_LIST_HEAD(&blkcg->cgwb_list);
848#endif 877#endif
849 return &blkcg->css; 878 return &blkcg->css;
879
880free_pd_blkcg:
881 for (i--; i >= 0; i--)
882 kfree(blkcg->pd[i]);
883
884free_blkcg:
885 kfree(blkcg);
886 return ret;
850} 887}
851 888
852/** 889/**
@@ -1000,8 +1037,10 @@ int blkcg_activate_policy(struct request_queue *q,
1000 const struct blkcg_policy *pol) 1037 const struct blkcg_policy *pol)
1001{ 1038{
1002 LIST_HEAD(pds); 1039 LIST_HEAD(pds);
1040 LIST_HEAD(cpds);
1003 struct blkcg_gq *blkg; 1041 struct blkcg_gq *blkg;
1004 struct blkg_policy_data *pd, *n; 1042 struct blkg_policy_data *pd, *nd;
1043 struct blkcg_policy_data *cpd, *cnd;
1005 int cnt = 0, ret; 1044 int cnt = 0, ret;
1006 1045
1007 if (blkcg_policy_enabled(q, pol)) 1046 if (blkcg_policy_enabled(q, pol))
@@ -1014,6 +1053,10 @@ int blkcg_activate_policy(struct request_queue *q,
1014 cnt++; 1053 cnt++;
1015 spin_unlock_irq(q->queue_lock); 1054 spin_unlock_irq(q->queue_lock);
1016 1055
1056 /*
1057 * Allocate per-blkg and per-blkcg policy data
1058 * for all existing blkgs.
1059 */
1017 while (cnt--) { 1060 while (cnt--) {
1018 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); 1061 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
1019 if (!pd) { 1062 if (!pd) {
@@ -1021,26 +1064,50 @@ int blkcg_activate_policy(struct request_queue *q,
1021 goto out_free; 1064 goto out_free;
1022 } 1065 }
1023 list_add_tail(&pd->alloc_node, &pds); 1066 list_add_tail(&pd->alloc_node, &pds);
1067
1068 if (!pol->cpd_size)
1069 continue;
1070 cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node);
1071 if (!cpd) {
1072 ret = -ENOMEM;
1073 goto out_free;
1074 }
1075 list_add_tail(&cpd->alloc_node, &cpds);
1024 } 1076 }
1025 1077
1026 /* 1078 /*
1027 * Install the allocated pds. With @q bypassing, no new blkg 1079 * Install the allocated pds and cpds. With @q bypassing, no new blkg
1028 * should have been created while the queue lock was dropped. 1080 * should have been created while the queue lock was dropped.
1029 */ 1081 */
1030 spin_lock_irq(q->queue_lock); 1082 spin_lock_irq(q->queue_lock);
1031 1083
1032 list_for_each_entry(blkg, &q->blkg_list, q_node) { 1084 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1033 if (WARN_ON(list_empty(&pds))) { 1085 if (WARN_ON(list_empty(&pds)) ||
1086 WARN_ON(pol->cpd_size && list_empty(&cpds))) {
1034 /* umm... this shouldn't happen, just abort */ 1087 /* umm... this shouldn't happen, just abort */
1035 ret = -ENOMEM; 1088 ret = -ENOMEM;
1036 goto out_unlock; 1089 goto out_unlock;
1037 } 1090 }
1091 cpd = list_first_entry(&cpds, struct blkcg_policy_data,
1092 alloc_node);
1093 list_del_init(&cpd->alloc_node);
1038 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); 1094 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
1039 list_del_init(&pd->alloc_node); 1095 list_del_init(&pd->alloc_node);
1040 1096
1041 /* grab blkcg lock too while installing @pd on @blkg */ 1097 /* grab blkcg lock too while installing @pd on @blkg */
1042 spin_lock(&blkg->blkcg->lock); 1098 spin_lock(&blkg->blkcg->lock);
1043 1099
1100 if (!pol->cpd_size)
1101 goto no_cpd;
1102 if (!blkg->blkcg->pd[pol->plid]) {
1103 /* Per-policy per-blkcg data */
1104 blkg->blkcg->pd[pol->plid] = cpd;
1105 cpd->plid = pol->plid;
1106 pol->cpd_init_fn(blkg->blkcg);
1107 } else { /* must free it as it has already been extracted */
1108 kfree(cpd);
1109 }
1110no_cpd:
1044 blkg->pd[pol->plid] = pd; 1111 blkg->pd[pol->plid] = pd;
1045 pd->blkg = blkg; 1112 pd->blkg = blkg;
1046 pd->plid = pol->plid; 1113 pd->plid = pol->plid;
@@ -1055,8 +1122,10 @@ out_unlock:
1055 spin_unlock_irq(q->queue_lock); 1122 spin_unlock_irq(q->queue_lock);
1056out_free: 1123out_free:
1057 blk_queue_bypass_end(q); 1124 blk_queue_bypass_end(q);
1058 list_for_each_entry_safe(pd, n, &pds, alloc_node) 1125 list_for_each_entry_safe(pd, nd, &pds, alloc_node)
1059 kfree(pd); 1126 kfree(pd);
1127 list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node)
1128 kfree(cpd);
1060 return ret; 1129 return ret;
1061} 1130}
1062EXPORT_SYMBOL_GPL(blkcg_activate_policy); 1131EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1093,6 +1162,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
1093 1162
1094 kfree(blkg->pd[pol->plid]); 1163 kfree(blkg->pd[pol->plid]);
1095 blkg->pd[pol->plid] = NULL; 1164 blkg->pd[pol->plid] = NULL;
1165 kfree(blkg->blkcg->pd[pol->plid]);
1166 blkg->blkcg->pd[pol->plid] = NULL;
1096 1167
1097 spin_unlock(&blkg->blkcg->lock); 1168 spin_unlock(&blkg->blkcg->lock);
1098 } 1169 }
diff --git a/block/blk-core.c b/block/blk-core.c
index a4a2dbe46fe3..688ae9482cb8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -579,6 +579,8 @@ void blk_cleanup_queue(struct request_queue *q)
579 q->queue_lock = &q->__queue_lock; 579 q->queue_lock = &q->__queue_lock;
580 spin_unlock_irq(lock); 580 spin_unlock_irq(lock);
581 581
582 bdi_destroy(&q->backing_dev_info);
583
582 /* @q is and will stay empty, shutdown and put */ 584 /* @q is and will stay empty, shutdown and put */
583 blk_put_queue(q); 585 blk_put_queue(q);
584} 586}
@@ -758,6 +760,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
758} 760}
759EXPORT_SYMBOL(blk_init_queue_node); 761EXPORT_SYMBOL(blk_init_queue_node);
760 762
763static void blk_queue_bio(struct request_queue *q, struct bio *bio);
764
761struct request_queue * 765struct request_queue *
762blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, 766blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
763 spinlock_t *lock) 767 spinlock_t *lock)
@@ -1601,7 +1605,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1601 blk_rq_bio_prep(req->q, req, bio); 1605 blk_rq_bio_prep(req->q, req, bio);
1602} 1606}
1603 1607
1604void blk_queue_bio(struct request_queue *q, struct bio *bio) 1608static void blk_queue_bio(struct request_queue *q, struct bio *bio)
1605{ 1609{
1606 const bool sync = !!(bio->bi_rw & REQ_SYNC); 1610 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1607 struct blk_plug *plug; 1611 struct blk_plug *plug;
@@ -1709,7 +1713,6 @@ out_unlock:
1709 spin_unlock_irq(q->queue_lock); 1713 spin_unlock_irq(q->queue_lock);
1710 } 1714 }
1711} 1715}
1712EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */
1713 1716
1714/* 1717/*
1715 * If bio->bi_dev is a partition, remap the location 1718 * If bio->bi_dev is a partition, remap the location
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 5f13f4d0bcce..1e28ddb656b8 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -24,7 +24,7 @@ static int get_first_sibling(unsigned int cpu)
24{ 24{
25 unsigned int ret; 25 unsigned int ret;
26 26
27 ret = cpumask_first(topology_thread_cpumask(cpu)); 27 ret = cpumask_first(topology_sibling_cpumask(cpu));
28 if (ret < nr_cpu_ids) 28 if (ret < nr_cpu_ids)
29 return ret; 29 return ret;
30 30
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ef100fd2cb86..f53779692c77 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -673,8 +673,11 @@ static void blk_mq_rq_timer(unsigned long priv)
673 data.next = blk_rq_timeout(round_jiffies_up(data.next)); 673 data.next = blk_rq_timeout(round_jiffies_up(data.next));
674 mod_timer(&q->timeout, data.next); 674 mod_timer(&q->timeout, data.next);
675 } else { 675 } else {
676 queue_for_each_hw_ctx(q, hctx, i) 676 queue_for_each_hw_ctx(q, hctx, i) {
677 blk_mq_tag_idle(hctx); 677 /* the hctx may be unmapped, so check it here */
678 if (blk_mq_hw_queue_mapped(hctx))
679 blk_mq_tag_idle(hctx);
680 }
678 } 681 }
679} 682}
680 683
@@ -851,6 +854,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
851 spin_lock(&hctx->lock); 854 spin_lock(&hctx->lock);
852 list_splice(&rq_list, &hctx->dispatch); 855 list_splice(&rq_list, &hctx->dispatch);
853 spin_unlock(&hctx->lock); 856 spin_unlock(&hctx->lock);
857 /*
858 * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
859 * it's possible the queue is stopped and restarted again
860 * before this. Queue restart will dispatch requests. And since
861 * requests in rq_list aren't added into hctx->dispatch yet,
862 * the requests in rq_list might get lost.
863 *
864 * blk_mq_run_hw_queue() already checks the STOPPED bit
865 **/
866 blk_mq_run_hw_queue(hctx, true);
854 } 867 }
855} 868}
856 869
@@ -1600,22 +1613,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
1600 return NOTIFY_OK; 1613 return NOTIFY_OK;
1601} 1614}
1602 1615
1603static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
1604{
1605 struct request_queue *q = hctx->queue;
1606 struct blk_mq_tag_set *set = q->tag_set;
1607
1608 if (set->tags[hctx->queue_num])
1609 return NOTIFY_OK;
1610
1611 set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
1612 if (!set->tags[hctx->queue_num])
1613 return NOTIFY_STOP;
1614
1615 hctx->tags = set->tags[hctx->queue_num];
1616 return NOTIFY_OK;
1617}
1618
1619static int blk_mq_hctx_notify(void *data, unsigned long action, 1616static int blk_mq_hctx_notify(void *data, unsigned long action,
1620 unsigned int cpu) 1617 unsigned int cpu)
1621{ 1618{
@@ -1623,12 +1620,16 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
1623 1620
1624 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 1621 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
1625 return blk_mq_hctx_cpu_offline(hctx, cpu); 1622 return blk_mq_hctx_cpu_offline(hctx, cpu);
1626 else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) 1623
1627 return blk_mq_hctx_cpu_online(hctx, cpu); 1624 /*
1625 * In case of CPU online, tags may be reallocated
1626 * in blk_mq_map_swqueue() after mapping is updated.
1627 */
1628 1628
1629 return NOTIFY_OK; 1629 return NOTIFY_OK;
1630} 1630}
1631 1631
1632/* hctx->ctxs will be freed in queue's release handler */
1632static void blk_mq_exit_hctx(struct request_queue *q, 1633static void blk_mq_exit_hctx(struct request_queue *q,
1633 struct blk_mq_tag_set *set, 1634 struct blk_mq_tag_set *set,
1634 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) 1635 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
@@ -1647,7 +1648,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
1647 1648
1648 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1649 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1649 blk_free_flush_queue(hctx->fq); 1650 blk_free_flush_queue(hctx->fq);
1650 kfree(hctx->ctxs);
1651 blk_mq_free_bitmap(&hctx->ctx_map); 1651 blk_mq_free_bitmap(&hctx->ctx_map);
1652} 1652}
1653 1653
@@ -1804,6 +1804,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1804 unsigned int i; 1804 unsigned int i;
1805 struct blk_mq_hw_ctx *hctx; 1805 struct blk_mq_hw_ctx *hctx;
1806 struct blk_mq_ctx *ctx; 1806 struct blk_mq_ctx *ctx;
1807 struct blk_mq_tag_set *set = q->tag_set;
1807 1808
1808 queue_for_each_hw_ctx(q, hctx, i) { 1809 queue_for_each_hw_ctx(q, hctx, i) {
1809 cpumask_clear(hctx->cpumask); 1810 cpumask_clear(hctx->cpumask);
@@ -1833,16 +1834,20 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1833 * disable it and free the request entries. 1834 * disable it and free the request entries.
1834 */ 1835 */
1835 if (!hctx->nr_ctx) { 1836 if (!hctx->nr_ctx) {
1836 struct blk_mq_tag_set *set = q->tag_set;
1837
1838 if (set->tags[i]) { 1837 if (set->tags[i]) {
1839 blk_mq_free_rq_map(set, set->tags[i], i); 1838 blk_mq_free_rq_map(set, set->tags[i], i);
1840 set->tags[i] = NULL; 1839 set->tags[i] = NULL;
1841 hctx->tags = NULL;
1842 } 1840 }
1841 hctx->tags = NULL;
1843 continue; 1842 continue;
1844 } 1843 }
1845 1844
1845 /* unmapped hw queue can be remapped after CPU topo changed */
1846 if (!set->tags[i])
1847 set->tags[i] = blk_mq_init_rq_map(set, i);
1848 hctx->tags = set->tags[i];
1849 WARN_ON(!hctx->tags);
1850
1846 /* 1851 /*
1847 * Set the map size to the number of mapped software queues. 1852 * Set the map size to the number of mapped software queues.
1848 * This is more accurate and more efficient than looping 1853 * This is more accurate and more efficient than looping
@@ -1916,8 +1921,12 @@ void blk_mq_release(struct request_queue *q)
1916 unsigned int i; 1921 unsigned int i;
1917 1922
1918 /* hctx kobj stays in hctx */ 1923 /* hctx kobj stays in hctx */
1919 queue_for_each_hw_ctx(q, hctx, i) 1924 queue_for_each_hw_ctx(q, hctx, i) {
1925 if (!hctx)
1926 continue;
1927 kfree(hctx->ctxs);
1920 kfree(hctx); 1928 kfree(hctx);
1929 }
1921 1930
1922 kfree(q->queue_hw_ctx); 1931 kfree(q->queue_hw_ctx);
1923 1932
@@ -2120,9 +2129,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
2120 */ 2129 */
2121 list_for_each_entry(q, &all_q_list, all_q_node) 2130 list_for_each_entry(q, &all_q_list, all_q_node)
2122 blk_mq_freeze_queue_start(q); 2131 blk_mq_freeze_queue_start(q);
2123 list_for_each_entry(q, &all_q_list, all_q_node) 2132 list_for_each_entry(q, &all_q_list, all_q_node) {
2124 blk_mq_freeze_queue_wait(q); 2133 blk_mq_freeze_queue_wait(q);
2125 2134
2135 /*
2136 * timeout handler can't touch hw queue during the
2137 * reinitialization
2138 */
2139 del_timer_sync(&q->timeout);
2140 }
2141
2126 list_for_each_entry(q, &all_q_list, all_q_node) 2142 list_for_each_entry(q, &all_q_list, all_q_node)
2127 blk_mq_queue_reinit(q); 2143 blk_mq_queue_reinit(q);
2128 2144
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1b60941dc4c6..6264b382d4d1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -523,8 +523,6 @@ static void blk_release_queue(struct kobject *kobj)
523 523
524 blk_trace_shutdown(q); 524 blk_trace_shutdown(q);
525 525
526 bdi_destroy(&q->backing_dev_info);
527
528 ida_simple_remove(&blk_queue_ida, q->id); 526 ida_simple_remove(&blk_queue_ida, q->id);
529 call_rcu(&q->rcu_head, blk_free_queue_rcu); 527 call_rcu(&q->rcu_head, blk_free_queue_rcu);
530} 528}
diff --git a/block/bounce.c b/block/bounce.c
index 072280b3dd13..b17311227c12 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -219,8 +219,8 @@ bounce:
219 if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) 219 if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
220 continue; 220 continue;
221 221
222 inc_zone_page_state(to->bv_page, NR_BOUNCE);
223 to->bv_page = mempool_alloc(pool, q->bounce_gfp); 222 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
223 inc_zone_page_state(to->bv_page, NR_BOUNCE);
224 224
225 if (rw == WRITE) { 225 if (rw == WRITE) {
226 char *vto, *vfrom; 226 char *vto, *vfrom;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index bc8f42930773..c62bb2e650b8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -67,6 +67,11 @@ static struct kmem_cache *cfq_pool;
67#define sample_valid(samples) ((samples) > 80) 67#define sample_valid(samples) ((samples) > 80)
68#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) 68#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
69 69
70/* blkio-related constants */
71#define CFQ_WEIGHT_MIN 10
72#define CFQ_WEIGHT_MAX 1000
73#define CFQ_WEIGHT_DEFAULT 500
74
70struct cfq_ttime { 75struct cfq_ttime {
71 unsigned long last_end_request; 76 unsigned long last_end_request;
72 77
@@ -212,6 +217,15 @@ struct cfqg_stats {
212#endif /* CONFIG_CFQ_GROUP_IOSCHED */ 217#endif /* CONFIG_CFQ_GROUP_IOSCHED */
213}; 218};
214 219
220/* Per-cgroup data */
221struct cfq_group_data {
222 /* must be the first member */
223 struct blkcg_policy_data pd;
224
225 unsigned int weight;
226 unsigned int leaf_weight;
227};
228
215/* This is per cgroup per device grouping structure */ 229/* This is per cgroup per device grouping structure */
216struct cfq_group { 230struct cfq_group {
217 /* must be the first member */ 231 /* must be the first member */
@@ -446,16 +460,6 @@ CFQ_CFQQ_FNS(deep);
446CFQ_CFQQ_FNS(wait_busy); 460CFQ_CFQQ_FNS(wait_busy);
447#undef CFQ_CFQQ_FNS 461#undef CFQ_CFQQ_FNS
448 462
449static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
450{
451 return pd ? container_of(pd, struct cfq_group, pd) : NULL;
452}
453
454static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
455{
456 return pd_to_blkg(&cfqg->pd);
457}
458
459#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) 463#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
460 464
461/* cfqg stats flags */ 465/* cfqg stats flags */
@@ -600,6 +604,22 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
600 604
601#ifdef CONFIG_CFQ_GROUP_IOSCHED 605#ifdef CONFIG_CFQ_GROUP_IOSCHED
602 606
607static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
608{
609 return pd ? container_of(pd, struct cfq_group, pd) : NULL;
610}
611
612static struct cfq_group_data
613*cpd_to_cfqgd(struct blkcg_policy_data *cpd)
614{
615 return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL;
616}
617
618static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
619{
620 return pd_to_blkg(&cfqg->pd);
621}
622
603static struct blkcg_policy blkcg_policy_cfq; 623static struct blkcg_policy blkcg_policy_cfq;
604 624
605static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) 625static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
@@ -607,6 +627,11 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
607 return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); 627 return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
608} 628}
609 629
630static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg)
631{
632 return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq));
633}
634
610static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) 635static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
611{ 636{
612 struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent; 637 struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent;
@@ -1544,13 +1569,28 @@ static void cfqg_stats_init(struct cfqg_stats *stats)
1544#endif 1569#endif
1545} 1570}
1546 1571
1572static void cfq_cpd_init(const struct blkcg *blkcg)
1573{
1574 struct cfq_group_data *cgd =
1575 cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]);
1576
1577 if (blkcg == &blkcg_root) {
1578 cgd->weight = 2 * CFQ_WEIGHT_DEFAULT;
1579 cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT;
1580 } else {
1581 cgd->weight = CFQ_WEIGHT_DEFAULT;
1582 cgd->leaf_weight = CFQ_WEIGHT_DEFAULT;
1583 }
1584}
1585
1547static void cfq_pd_init(struct blkcg_gq *blkg) 1586static void cfq_pd_init(struct blkcg_gq *blkg)
1548{ 1587{
1549 struct cfq_group *cfqg = blkg_to_cfqg(blkg); 1588 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
1589 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkg->blkcg);
1550 1590
1551 cfq_init_cfqg_base(cfqg); 1591 cfq_init_cfqg_base(cfqg);
1552 cfqg->weight = blkg->blkcg->cfq_weight; 1592 cfqg->weight = cgd->weight;
1553 cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; 1593 cfqg->leaf_weight = cgd->leaf_weight;
1554 cfqg_stats_init(&cfqg->stats); 1594 cfqg_stats_init(&cfqg->stats);
1555 cfqg_stats_init(&cfqg->dead_stats); 1595 cfqg_stats_init(&cfqg->dead_stats);
1556} 1596}
@@ -1673,13 +1713,27 @@ static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
1673 1713
1674static int cfq_print_weight(struct seq_file *sf, void *v) 1714static int cfq_print_weight(struct seq_file *sf, void *v)
1675{ 1715{
1676 seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); 1716 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1717 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1718 unsigned int val = 0;
1719
1720 if (cgd)
1721 val = cgd->weight;
1722
1723 seq_printf(sf, "%u\n", val);
1677 return 0; 1724 return 0;
1678} 1725}
1679 1726
1680static int cfq_print_leaf_weight(struct seq_file *sf, void *v) 1727static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
1681{ 1728{
1682 seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); 1729 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1730 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1731 unsigned int val = 0;
1732
1733 if (cgd)
1734 val = cgd->leaf_weight;
1735
1736 seq_printf(sf, "%u\n", val);
1683 return 0; 1737 return 0;
1684} 1738}
1685 1739
@@ -1690,6 +1744,7 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
1690 struct blkcg *blkcg = css_to_blkcg(of_css(of)); 1744 struct blkcg *blkcg = css_to_blkcg(of_css(of));
1691 struct blkg_conf_ctx ctx; 1745 struct blkg_conf_ctx ctx;
1692 struct cfq_group *cfqg; 1746 struct cfq_group *cfqg;
1747 struct cfq_group_data *cfqgd;
1693 int ret; 1748 int ret;
1694 1749
1695 ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx); 1750 ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
@@ -1698,17 +1753,22 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
1698 1753
1699 ret = -EINVAL; 1754 ret = -EINVAL;
1700 cfqg = blkg_to_cfqg(ctx.blkg); 1755 cfqg = blkg_to_cfqg(ctx.blkg);
1756 cfqgd = blkcg_to_cfqgd(blkcg);
1757 if (!cfqg || !cfqgd)
1758 goto err;
1759
1701 if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { 1760 if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) {
1702 if (!is_leaf_weight) { 1761 if (!is_leaf_weight) {
1703 cfqg->dev_weight = ctx.v; 1762 cfqg->dev_weight = ctx.v;
1704 cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; 1763 cfqg->new_weight = ctx.v ?: cfqgd->weight;
1705 } else { 1764 } else {
1706 cfqg->dev_leaf_weight = ctx.v; 1765 cfqg->dev_leaf_weight = ctx.v;
1707 cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; 1766 cfqg->new_leaf_weight = ctx.v ?: cfqgd->leaf_weight;
1708 } 1767 }
1709 ret = 0; 1768 ret = 0;
1710 } 1769 }
1711 1770
1771err:
1712 blkg_conf_finish(&ctx); 1772 blkg_conf_finish(&ctx);
1713 return ret ?: nbytes; 1773 return ret ?: nbytes;
1714} 1774}
@@ -1730,16 +1790,23 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
1730{ 1790{
1731 struct blkcg *blkcg = css_to_blkcg(css); 1791 struct blkcg *blkcg = css_to_blkcg(css);
1732 struct blkcg_gq *blkg; 1792 struct blkcg_gq *blkg;
1793 struct cfq_group_data *cfqgd;
1794 int ret = 0;
1733 1795
1734 if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) 1796 if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
1735 return -EINVAL; 1797 return -EINVAL;
1736 1798
1737 spin_lock_irq(&blkcg->lock); 1799 spin_lock_irq(&blkcg->lock);
1800 cfqgd = blkcg_to_cfqgd(blkcg);
1801 if (!cfqgd) {
1802 ret = -EINVAL;
1803 goto out;
1804 }
1738 1805
1739 if (!is_leaf_weight) 1806 if (!is_leaf_weight)
1740 blkcg->cfq_weight = val; 1807 cfqgd->weight = val;
1741 else 1808 else
1742 blkcg->cfq_leaf_weight = val; 1809 cfqgd->leaf_weight = val;
1743 1810
1744 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { 1811 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1745 struct cfq_group *cfqg = blkg_to_cfqg(blkg); 1812 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1749,15 +1816,16 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
1749 1816
1750 if (!is_leaf_weight) { 1817 if (!is_leaf_weight) {
1751 if (!cfqg->dev_weight) 1818 if (!cfqg->dev_weight)
1752 cfqg->new_weight = blkcg->cfq_weight; 1819 cfqg->new_weight = cfqgd->weight;
1753 } else { 1820 } else {
1754 if (!cfqg->dev_leaf_weight) 1821 if (!cfqg->dev_leaf_weight)
1755 cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; 1822 cfqg->new_leaf_weight = cfqgd->leaf_weight;
1756 } 1823 }
1757 } 1824 }
1758 1825
1826out:
1759 spin_unlock_irq(&blkcg->lock); 1827 spin_unlock_irq(&blkcg->lock);
1760 return 0; 1828 return ret;
1761} 1829}
1762 1830
1763static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, 1831static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -4477,6 +4545,18 @@ out_free:
4477 return ret; 4545 return ret;
4478} 4546}
4479 4547
4548static void cfq_registered_queue(struct request_queue *q)
4549{
4550 struct elevator_queue *e = q->elevator;
4551 struct cfq_data *cfqd = e->elevator_data;
4552
4553 /*
4554 * Default to IOPS mode with no idling for SSDs
4555 */
4556 if (blk_queue_nonrot(q))
4557 cfqd->cfq_slice_idle = 0;
4558}
4559
4480/* 4560/*
4481 * sysfs parts below --> 4561 * sysfs parts below -->
4482 */ 4562 */
@@ -4592,6 +4672,7 @@ static struct elevator_type iosched_cfq = {
4592 .elevator_may_queue_fn = cfq_may_queue, 4672 .elevator_may_queue_fn = cfq_may_queue,
4593 .elevator_init_fn = cfq_init_queue, 4673 .elevator_init_fn = cfq_init_queue,
4594 .elevator_exit_fn = cfq_exit_queue, 4674 .elevator_exit_fn = cfq_exit_queue,
4675 .elevator_registered_fn = cfq_registered_queue,
4595 }, 4676 },
4596 .icq_size = sizeof(struct cfq_io_cq), 4677 .icq_size = sizeof(struct cfq_io_cq),
4597 .icq_align = __alignof__(struct cfq_io_cq), 4678 .icq_align = __alignof__(struct cfq_io_cq),
@@ -4603,8 +4684,10 @@ static struct elevator_type iosched_cfq = {
4603#ifdef CONFIG_CFQ_GROUP_IOSCHED 4684#ifdef CONFIG_CFQ_GROUP_IOSCHED
4604static struct blkcg_policy blkcg_policy_cfq = { 4685static struct blkcg_policy blkcg_policy_cfq = {
4605 .pd_size = sizeof(struct cfq_group), 4686 .pd_size = sizeof(struct cfq_group),
4687 .cpd_size = sizeof(struct cfq_group_data),
4606 .cftypes = cfq_blkcg_files, 4688 .cftypes = cfq_blkcg_files,
4607 4689
4690 .cpd_init_fn = cfq_cpd_init,
4608 .pd_init_fn = cfq_pd_init, 4691 .pd_init_fn = cfq_pd_init,
4609 .pd_offline_fn = cfq_pd_offline, 4692 .pd_offline_fn = cfq_pd_offline,
4610 .pd_reset_stats_fn = cfq_pd_reset_stats, 4693 .pd_reset_stats_fn = cfq_pd_reset_stats,
diff --git a/block/elevator.c b/block/elevator.c
index 3bbb48f430e4..84d63943f2de 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -157,7 +157,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
157 157
158 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); 158 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
159 if (unlikely(!eq)) 159 if (unlikely(!eq))
160 goto err; 160 return NULL;
161 161
162 eq->type = e; 162 eq->type = e;
163 kobject_init(&eq->kobj, &elv_ktype); 163 kobject_init(&eq->kobj, &elv_ktype);
@@ -165,10 +165,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
165 hash_init(eq->hash); 165 hash_init(eq->hash);
166 166
167 return eq; 167 return eq;
168err:
169 kfree(eq);
170 elevator_put(e);
171 return NULL;
172} 168}
173EXPORT_SYMBOL(elevator_alloc); 169EXPORT_SYMBOL(elevator_alloc);
174 170
@@ -810,6 +806,8 @@ int elv_register_queue(struct request_queue *q)
810 } 806 }
811 kobject_uevent(&e->kobj, KOBJ_ADD); 807 kobject_uevent(&e->kobj, KOBJ_ADD);
812 e->registered = 1; 808 e->registered = 1;
809 if (e->type->ops.elevator_registered_fn)
810 e->type->ops.elevator_registered_fn(q);
813 } 811 }
814 return error; 812 return error;
815} 813}
diff --git a/block/genhd.c b/block/genhd.c
index d46ba566d62f..59a1395eedac 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -423,9 +423,9 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
423 /* allocate ext devt */ 423 /* allocate ext devt */
424 idr_preload(GFP_KERNEL); 424 idr_preload(GFP_KERNEL);
425 425
426 spin_lock(&ext_devt_lock); 426 spin_lock_bh(&ext_devt_lock);
427 idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); 427 idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
428 spin_unlock(&ext_devt_lock); 428 spin_unlock_bh(&ext_devt_lock);
429 429
430 idr_preload_end(); 430 idr_preload_end();
431 if (idx < 0) 431 if (idx < 0)
@@ -450,9 +450,9 @@ void blk_free_devt(dev_t devt)
450 return; 450 return;
451 451
452 if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 452 if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
453 spin_lock(&ext_devt_lock); 453 spin_lock_bh(&ext_devt_lock);
454 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 454 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
455 spin_unlock(&ext_devt_lock); 455 spin_unlock_bh(&ext_devt_lock);
456 } 456 }
457} 457}
458 458
@@ -654,7 +654,6 @@ void del_gendisk(struct gendisk *disk)
654 disk->flags &= ~GENHD_FL_UP; 654 disk->flags &= ~GENHD_FL_UP;
655 655
656 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 656 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
657 bdi_unregister(&disk->queue->backing_dev_info);
658 blk_unregister_queue(disk); 657 blk_unregister_queue(disk);
659 blk_unregister_region(disk_devt(disk), disk->minors); 658 blk_unregister_region(disk_devt(disk), disk->minors);
660 659
@@ -692,13 +691,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
692 } else { 691 } else {
693 struct hd_struct *part; 692 struct hd_struct *part;
694 693
695 spin_lock(&ext_devt_lock); 694 spin_lock_bh(&ext_devt_lock);
696 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 695 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
697 if (part && get_disk(part_to_disk(part))) { 696 if (part && get_disk(part_to_disk(part))) {
698 *partno = part->partno; 697 *partno = part->partno;
699 disk = part_to_disk(part); 698 disk = part_to_disk(part);
700 } 699 }
701 spin_unlock(&ext_devt_lock); 700 spin_unlock_bh(&ext_devt_lock);
702 } 701 }
703 702
704 return disk; 703 return disk;