diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio-integrity.c | 4 | ||||
-rw-r--r-- | block/bio.c | 77 | ||||
-rw-r--r-- | block/blk-cgroup.c | 92 | ||||
-rw-r--r-- | block/blk-cgroup.h | 40 | ||||
-rw-r--r-- | block/blk-core.c | 143 | ||||
-rw-r--r-- | block/blk-exec.c | 10 | ||||
-rw-r--r-- | block/blk-merge.c | 3 | ||||
-rw-r--r-- | block/blk-mq-cpumap.c | 2 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 38 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 228 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk.h | 5 | ||||
-rw-r--r-- | block/bounce.c | 5 | ||||
-rw-r--r-- | block/cfq-iosched.c | 125 | ||||
-rw-r--r-- | block/elevator.c | 8 | ||||
-rw-r--r-- | block/genhd.c | 13 | ||||
-rw-r--r-- | block/ioctl.c | 37 |
18 files changed, 534 insertions, 299 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5cbd5d9ea61d..0436c21db7f2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c | |||
@@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) | |||
361 | 361 | ||
362 | /* Restore original bio completion handler */ | 362 | /* Restore original bio completion handler */ |
363 | bio->bi_end_io = bip->bip_end_io; | 363 | bio->bi_end_io = bip->bip_end_io; |
364 | bio_endio_nodec(bio, error); | 364 | bio_endio(bio, error); |
365 | } | 365 | } |
366 | 366 | ||
367 | /** | 367 | /** |
@@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error) | |||
388 | */ | 388 | */ |
389 | if (error) { | 389 | if (error) { |
390 | bio->bi_end_io = bip->bip_end_io; | 390 | bio->bi_end_io = bip->bip_end_io; |
391 | bio_endio_nodec(bio, error); | 391 | bio_endio(bio, error); |
392 | 392 | ||
393 | return; | 393 | return; |
394 | } | 394 | } |
diff --git a/block/bio.c b/block/bio.c index f66a4eae16ee..259197d97de1 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -270,8 +270,8 @@ void bio_init(struct bio *bio) | |||
270 | { | 270 | { |
271 | memset(bio, 0, sizeof(*bio)); | 271 | memset(bio, 0, sizeof(*bio)); |
272 | bio->bi_flags = 1 << BIO_UPTODATE; | 272 | bio->bi_flags = 1 << BIO_UPTODATE; |
273 | atomic_set(&bio->bi_remaining, 1); | 273 | atomic_set(&bio->__bi_remaining, 1); |
274 | atomic_set(&bio->bi_cnt, 1); | 274 | atomic_set(&bio->__bi_cnt, 1); |
275 | } | 275 | } |
276 | EXPORT_SYMBOL(bio_init); | 276 | EXPORT_SYMBOL(bio_init); |
277 | 277 | ||
@@ -292,8 +292,8 @@ void bio_reset(struct bio *bio) | |||
292 | __bio_free(bio); | 292 | __bio_free(bio); |
293 | 293 | ||
294 | memset(bio, 0, BIO_RESET_BYTES); | 294 | memset(bio, 0, BIO_RESET_BYTES); |
295 | bio->bi_flags = flags|(1 << BIO_UPTODATE); | 295 | bio->bi_flags = flags | (1 << BIO_UPTODATE); |
296 | atomic_set(&bio->bi_remaining, 1); | 296 | atomic_set(&bio->__bi_remaining, 1); |
297 | } | 297 | } |
298 | EXPORT_SYMBOL(bio_reset); | 298 | EXPORT_SYMBOL(bio_reset); |
299 | 299 | ||
@@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error) | |||
303 | bio_put(bio); | 303 | bio_put(bio); |
304 | } | 304 | } |
305 | 305 | ||
306 | /* | ||
307 | * Increment chain count for the bio. Make sure the CHAIN flag update | ||
308 | * is visible before the raised count. | ||
309 | */ | ||
310 | static inline void bio_inc_remaining(struct bio *bio) | ||
311 | { | ||
312 | bio->bi_flags |= (1 << BIO_CHAIN); | ||
313 | smp_mb__before_atomic(); | ||
314 | atomic_inc(&bio->__bi_remaining); | ||
315 | } | ||
316 | |||
306 | /** | 317 | /** |
307 | * bio_chain - chain bio completions | 318 | * bio_chain - chain bio completions |
308 | * @bio: the target bio | 319 | * @bio: the target bio |
@@ -320,7 +331,7 @@ void bio_chain(struct bio *bio, struct bio *parent) | |||
320 | 331 | ||
321 | bio->bi_private = parent; | 332 | bio->bi_private = parent; |
322 | bio->bi_end_io = bio_chain_endio; | 333 | bio->bi_end_io = bio_chain_endio; |
323 | atomic_inc(&parent->bi_remaining); | 334 | bio_inc_remaining(parent); |
324 | } | 335 | } |
325 | EXPORT_SYMBOL(bio_chain); | 336 | EXPORT_SYMBOL(bio_chain); |
326 | 337 | ||
@@ -524,13 +535,17 @@ EXPORT_SYMBOL(zero_fill_bio); | |||
524 | **/ | 535 | **/ |
525 | void bio_put(struct bio *bio) | 536 | void bio_put(struct bio *bio) |
526 | { | 537 | { |
527 | BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); | 538 | if (!bio_flagged(bio, BIO_REFFED)) |
528 | |||
529 | /* | ||
530 | * last put frees it | ||
531 | */ | ||
532 | if (atomic_dec_and_test(&bio->bi_cnt)) | ||
533 | bio_free(bio); | 539 | bio_free(bio); |
540 | else { | ||
541 | BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); | ||
542 | |||
543 | /* | ||
544 | * last put frees it | ||
545 | */ | ||
546 | if (atomic_dec_and_test(&bio->__bi_cnt)) | ||
547 | bio_free(bio); | ||
548 | } | ||
534 | } | 549 | } |
535 | EXPORT_SYMBOL(bio_put); | 550 | EXPORT_SYMBOL(bio_put); |
536 | 551 | ||
@@ -1741,6 +1756,25 @@ void bio_flush_dcache_pages(struct bio *bi) | |||
1741 | EXPORT_SYMBOL(bio_flush_dcache_pages); | 1756 | EXPORT_SYMBOL(bio_flush_dcache_pages); |
1742 | #endif | 1757 | #endif |
1743 | 1758 | ||
1759 | static inline bool bio_remaining_done(struct bio *bio) | ||
1760 | { | ||
1761 | /* | ||
1762 | * If we're not chaining, then ->__bi_remaining is always 1 and | ||
1763 | * we always end io on the first invocation. | ||
1764 | */ | ||
1765 | if (!bio_flagged(bio, BIO_CHAIN)) | ||
1766 | return true; | ||
1767 | |||
1768 | BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); | ||
1769 | |||
1770 | if (atomic_dec_and_test(&bio->__bi_remaining)) { | ||
1771 | clear_bit(BIO_CHAIN, &bio->bi_flags); | ||
1772 | return true; | ||
1773 | } | ||
1774 | |||
1775 | return false; | ||
1776 | } | ||
1777 | |||
1744 | /** | 1778 | /** |
1745 | * bio_endio - end I/O on a bio | 1779 | * bio_endio - end I/O on a bio |
1746 | * @bio: bio | 1780 | * @bio: bio |
@@ -1758,15 +1792,13 @@ EXPORT_SYMBOL(bio_flush_dcache_pages); | |||
1758 | void bio_endio(struct bio *bio, int error) | 1792 | void bio_endio(struct bio *bio, int error) |
1759 | { | 1793 | { |
1760 | while (bio) { | 1794 | while (bio) { |
1761 | BUG_ON(atomic_read(&bio->bi_remaining) <= 0); | ||
1762 | |||
1763 | if (error) | 1795 | if (error) |
1764 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 1796 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
1765 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 1797 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1766 | error = -EIO; | 1798 | error = -EIO; |
1767 | 1799 | ||
1768 | if (!atomic_dec_and_test(&bio->bi_remaining)) | 1800 | if (unlikely(!bio_remaining_done(bio))) |
1769 | return; | 1801 | break; |
1770 | 1802 | ||
1771 | /* | 1803 | /* |
1772 | * Need to have a real endio function for chained bios, | 1804 | * Need to have a real endio function for chained bios, |
@@ -1790,21 +1822,6 @@ void bio_endio(struct bio *bio, int error) | |||
1790 | EXPORT_SYMBOL(bio_endio); | 1822 | EXPORT_SYMBOL(bio_endio); |
1791 | 1823 | ||
1792 | /** | 1824 | /** |
1793 | * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining | ||
1794 | * @bio: bio | ||
1795 | * @error: error, if any | ||
1796 | * | ||
1797 | * For code that has saved and restored bi_end_io; thing hard before using this | ||
1798 | * function, probably you should've cloned the entire bio. | ||
1799 | **/ | ||
1800 | void bio_endio_nodec(struct bio *bio, int error) | ||
1801 | { | ||
1802 | atomic_inc(&bio->bi_remaining); | ||
1803 | bio_endio(bio, error); | ||
1804 | } | ||
1805 | EXPORT_SYMBOL(bio_endio_nodec); | ||
1806 | |||
1807 | /** | ||
1808 | * bio_split - split a bio | 1825 | * bio_split - split a bio |
1809 | * @bio: bio to split | 1826 | * @bio: bio to split |
1810 | * @sectors: number of sectors to split from the front of @bio | 1827 | * @sectors: number of sectors to split from the front of @bio |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0ac817b750db..6e43fa355e71 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -9,6 +9,10 @@ | |||
9 | * | 9 | * |
10 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | 10 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> |
11 | * Nauman Rafique <nauman@google.com> | 11 | * Nauman Rafique <nauman@google.com> |
12 | * | ||
13 | * For policy-specific per-blkcg data: | ||
14 | * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it> | ||
15 | * Arianna Avanzini <avanzini.arianna@gmail.com> | ||
12 | */ | 16 | */ |
13 | #include <linux/ioprio.h> | 17 | #include <linux/ioprio.h> |
14 | #include <linux/kdev_t.h> | 18 | #include <linux/kdev_t.h> |
@@ -26,8 +30,7 @@ | |||
26 | 30 | ||
27 | static DEFINE_MUTEX(blkcg_pol_mutex); | 31 | static DEFINE_MUTEX(blkcg_pol_mutex); |
28 | 32 | ||
29 | struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, | 33 | struct blkcg blkcg_root; |
30 | .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; | ||
31 | EXPORT_SYMBOL_GPL(blkcg_root); | 34 | EXPORT_SYMBOL_GPL(blkcg_root); |
32 | 35 | ||
33 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 36 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
@@ -823,6 +826,8 @@ static struct cgroup_subsys_state * | |||
823 | blkcg_css_alloc(struct cgroup_subsys_state *parent_css) | 826 | blkcg_css_alloc(struct cgroup_subsys_state *parent_css) |
824 | { | 827 | { |
825 | struct blkcg *blkcg; | 828 | struct blkcg *blkcg; |
829 | struct cgroup_subsys_state *ret; | ||
830 | int i; | ||
826 | 831 | ||
827 | if (!parent_css) { | 832 | if (!parent_css) { |
828 | blkcg = &blkcg_root; | 833 | blkcg = &blkcg_root; |
@@ -830,17 +835,49 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) | |||
830 | } | 835 | } |
831 | 836 | ||
832 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); | 837 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); |
833 | if (!blkcg) | 838 | if (!blkcg) { |
834 | return ERR_PTR(-ENOMEM); | 839 | ret = ERR_PTR(-ENOMEM); |
840 | goto free_blkcg; | ||
841 | } | ||
842 | |||
843 | for (i = 0; i < BLKCG_MAX_POLS ; i++) { | ||
844 | struct blkcg_policy *pol = blkcg_policy[i]; | ||
845 | struct blkcg_policy_data *cpd; | ||
846 | |||
847 | /* | ||
848 | * If the policy hasn't been attached yet, wait for it | ||
849 | * to be attached before doing anything else. Otherwise, | ||
850 | * check if the policy requires any specific per-cgroup | ||
851 | * data: if it does, allocate and initialize it. | ||
852 | */ | ||
853 | if (!pol || !pol->cpd_size) | ||
854 | continue; | ||
855 | |||
856 | BUG_ON(blkcg->pd[i]); | ||
857 | cpd = kzalloc(pol->cpd_size, GFP_KERNEL); | ||
858 | if (!cpd) { | ||
859 | ret = ERR_PTR(-ENOMEM); | ||
860 | goto free_pd_blkcg; | ||
861 | } | ||
862 | blkcg->pd[i] = cpd; | ||
863 | cpd->plid = i; | ||
864 | pol->cpd_init_fn(blkcg); | ||
865 | } | ||
835 | 866 | ||
836 | blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; | ||
837 | blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; | ||
838 | done: | 867 | done: |
839 | spin_lock_init(&blkcg->lock); | 868 | spin_lock_init(&blkcg->lock); |
840 | INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); | 869 | INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); |
841 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 870 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
842 | 871 | ||
843 | return &blkcg->css; | 872 | return &blkcg->css; |
873 | |||
874 | free_pd_blkcg: | ||
875 | for (i--; i >= 0; i--) | ||
876 | kfree(blkcg->pd[i]); | ||
877 | |||
878 | free_blkcg: | ||
879 | kfree(blkcg); | ||
880 | return ret; | ||
844 | } | 881 | } |
845 | 882 | ||
846 | /** | 883 | /** |
@@ -958,8 +995,10 @@ int blkcg_activate_policy(struct request_queue *q, | |||
958 | const struct blkcg_policy *pol) | 995 | const struct blkcg_policy *pol) |
959 | { | 996 | { |
960 | LIST_HEAD(pds); | 997 | LIST_HEAD(pds); |
998 | LIST_HEAD(cpds); | ||
961 | struct blkcg_gq *blkg, *new_blkg; | 999 | struct blkcg_gq *blkg, *new_blkg; |
962 | struct blkg_policy_data *pd, *n; | 1000 | struct blkg_policy_data *pd, *nd; |
1001 | struct blkcg_policy_data *cpd, *cnd; | ||
963 | int cnt = 0, ret; | 1002 | int cnt = 0, ret; |
964 | bool preloaded; | 1003 | bool preloaded; |
965 | 1004 | ||
@@ -1003,7 +1042,10 @@ int blkcg_activate_policy(struct request_queue *q, | |||
1003 | 1042 | ||
1004 | spin_unlock_irq(q->queue_lock); | 1043 | spin_unlock_irq(q->queue_lock); |
1005 | 1044 | ||
1006 | /* allocate policy_data for all existing blkgs */ | 1045 | /* |
1046 | * Allocate per-blkg and per-blkcg policy data | ||
1047 | * for all existing blkgs. | ||
1048 | */ | ||
1007 | while (cnt--) { | 1049 | while (cnt--) { |
1008 | pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); | 1050 | pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); |
1009 | if (!pd) { | 1051 | if (!pd) { |
@@ -1011,26 +1053,50 @@ int blkcg_activate_policy(struct request_queue *q, | |||
1011 | goto out_free; | 1053 | goto out_free; |
1012 | } | 1054 | } |
1013 | list_add_tail(&pd->alloc_node, &pds); | 1055 | list_add_tail(&pd->alloc_node, &pds); |
1056 | |||
1057 | if (!pol->cpd_size) | ||
1058 | continue; | ||
1059 | cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node); | ||
1060 | if (!cpd) { | ||
1061 | ret = -ENOMEM; | ||
1062 | goto out_free; | ||
1063 | } | ||
1064 | list_add_tail(&cpd->alloc_node, &cpds); | ||
1014 | } | 1065 | } |
1015 | 1066 | ||
1016 | /* | 1067 | /* |
1017 | * Install the allocated pds. With @q bypassing, no new blkg | 1068 | * Install the allocated pds and cpds. With @q bypassing, no new blkg |
1018 | * should have been created while the queue lock was dropped. | 1069 | * should have been created while the queue lock was dropped. |
1019 | */ | 1070 | */ |
1020 | spin_lock_irq(q->queue_lock); | 1071 | spin_lock_irq(q->queue_lock); |
1021 | 1072 | ||
1022 | list_for_each_entry(blkg, &q->blkg_list, q_node) { | 1073 | list_for_each_entry(blkg, &q->blkg_list, q_node) { |
1023 | if (WARN_ON(list_empty(&pds))) { | 1074 | if (WARN_ON(list_empty(&pds)) || |
1075 | WARN_ON(pol->cpd_size && list_empty(&cpds))) { | ||
1024 | /* umm... this shouldn't happen, just abort */ | 1076 | /* umm... this shouldn't happen, just abort */ |
1025 | ret = -ENOMEM; | 1077 | ret = -ENOMEM; |
1026 | goto out_unlock; | 1078 | goto out_unlock; |
1027 | } | 1079 | } |
1080 | cpd = list_first_entry(&cpds, struct blkcg_policy_data, | ||
1081 | alloc_node); | ||
1082 | list_del_init(&cpd->alloc_node); | ||
1028 | pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); | 1083 | pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); |
1029 | list_del_init(&pd->alloc_node); | 1084 | list_del_init(&pd->alloc_node); |
1030 | 1085 | ||
1031 | /* grab blkcg lock too while installing @pd on @blkg */ | 1086 | /* grab blkcg lock too while installing @pd on @blkg */ |
1032 | spin_lock(&blkg->blkcg->lock); | 1087 | spin_lock(&blkg->blkcg->lock); |
1033 | 1088 | ||
1089 | if (!pol->cpd_size) | ||
1090 | goto no_cpd; | ||
1091 | if (!blkg->blkcg->pd[pol->plid]) { | ||
1092 | /* Per-policy per-blkcg data */ | ||
1093 | blkg->blkcg->pd[pol->plid] = cpd; | ||
1094 | cpd->plid = pol->plid; | ||
1095 | pol->cpd_init_fn(blkg->blkcg); | ||
1096 | } else { /* must free it as it has already been extracted */ | ||
1097 | kfree(cpd); | ||
1098 | } | ||
1099 | no_cpd: | ||
1034 | blkg->pd[pol->plid] = pd; | 1100 | blkg->pd[pol->plid] = pd; |
1035 | pd->blkg = blkg; | 1101 | pd->blkg = blkg; |
1036 | pd->plid = pol->plid; | 1102 | pd->plid = pol->plid; |
@@ -1045,8 +1111,10 @@ out_unlock: | |||
1045 | spin_unlock_irq(q->queue_lock); | 1111 | spin_unlock_irq(q->queue_lock); |
1046 | out_free: | 1112 | out_free: |
1047 | blk_queue_bypass_end(q); | 1113 | blk_queue_bypass_end(q); |
1048 | list_for_each_entry_safe(pd, n, &pds, alloc_node) | 1114 | list_for_each_entry_safe(pd, nd, &pds, alloc_node) |
1049 | kfree(pd); | 1115 | kfree(pd); |
1116 | list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node) | ||
1117 | kfree(cpd); | ||
1050 | return ret; | 1118 | return ret; |
1051 | } | 1119 | } |
1052 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); | 1120 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); |
@@ -1087,6 +1155,8 @@ void blkcg_deactivate_policy(struct request_queue *q, | |||
1087 | 1155 | ||
1088 | kfree(blkg->pd[pol->plid]); | 1156 | kfree(blkg->pd[pol->plid]); |
1089 | blkg->pd[pol->plid] = NULL; | 1157 | blkg->pd[pol->plid] = NULL; |
1158 | kfree(blkg->blkcg->pd[pol->plid]); | ||
1159 | blkg->blkcg->pd[pol->plid] = NULL; | ||
1090 | 1160 | ||
1091 | spin_unlock(&blkg->blkcg->lock); | 1161 | spin_unlock(&blkg->blkcg->lock); |
1092 | } | 1162 | } |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index c567865b5f1d..74296a78bba1 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -23,11 +23,6 @@ | |||
23 | /* Max limits for throttle policy */ | 23 | /* Max limits for throttle policy */ |
24 | #define THROTL_IOPS_MAX UINT_MAX | 24 | #define THROTL_IOPS_MAX UINT_MAX |
25 | 25 | ||
26 | /* CFQ specific, out here for blkcg->cfq_weight */ | ||
27 | #define CFQ_WEIGHT_MIN 10 | ||
28 | #define CFQ_WEIGHT_MAX 1000 | ||
29 | #define CFQ_WEIGHT_DEFAULT 500 | ||
30 | |||
31 | #ifdef CONFIG_BLK_CGROUP | 26 | #ifdef CONFIG_BLK_CGROUP |
32 | 27 | ||
33 | enum blkg_rwstat_type { | 28 | enum blkg_rwstat_type { |
@@ -50,9 +45,7 @@ struct blkcg { | |||
50 | struct blkcg_gq *blkg_hint; | 45 | struct blkcg_gq *blkg_hint; |
51 | struct hlist_head blkg_list; | 46 | struct hlist_head blkg_list; |
52 | 47 | ||
53 | /* TODO: per-policy storage in blkcg */ | 48 | struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; |
54 | unsigned int cfq_weight; /* belongs to cfq */ | ||
55 | unsigned int cfq_leaf_weight; | ||
56 | }; | 49 | }; |
57 | 50 | ||
58 | struct blkg_stat { | 51 | struct blkg_stat { |
@@ -87,6 +80,24 @@ struct blkg_policy_data { | |||
87 | struct list_head alloc_node; | 80 | struct list_head alloc_node; |
88 | }; | 81 | }; |
89 | 82 | ||
83 | /* | ||
84 | * Policies that need to keep per-blkcg data which is independent | ||
85 | * from any request_queue associated to it must specify its size | ||
86 | * with the cpd_size field of the blkcg_policy structure and | ||
87 | * embed a blkcg_policy_data in it. blkcg core allocates | ||
88 | * policy-specific per-blkcg structures lazily the first time | ||
89 | * they are actually needed, so it handles them together with | ||
90 | * blkgs. cpd_init() is invoked to let each policy handle | ||
91 | * per-blkcg data. | ||
92 | */ | ||
93 | struct blkcg_policy_data { | ||
94 | /* the policy id this per-policy data belongs to */ | ||
95 | int plid; | ||
96 | |||
97 | /* used during policy activation */ | ||
98 | struct list_head alloc_node; | ||
99 | }; | ||
100 | |||
90 | /* association between a blk cgroup and a request queue */ | 101 | /* association between a blk cgroup and a request queue */ |
91 | struct blkcg_gq { | 102 | struct blkcg_gq { |
92 | /* Pointer to the associated request_queue */ | 103 | /* Pointer to the associated request_queue */ |
@@ -112,6 +123,7 @@ struct blkcg_gq { | |||
112 | struct rcu_head rcu_head; | 123 | struct rcu_head rcu_head; |
113 | }; | 124 | }; |
114 | 125 | ||
126 | typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); | ||
115 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); | 127 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); |
116 | typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); | 128 | typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); |
117 | typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); | 129 | typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); |
@@ -122,10 +134,13 @@ struct blkcg_policy { | |||
122 | int plid; | 134 | int plid; |
123 | /* policy specific private data size */ | 135 | /* policy specific private data size */ |
124 | size_t pd_size; | 136 | size_t pd_size; |
137 | /* policy specific per-blkcg data size */ | ||
138 | size_t cpd_size; | ||
125 | /* cgroup files for the policy */ | 139 | /* cgroup files for the policy */ |
126 | struct cftype *cftypes; | 140 | struct cftype *cftypes; |
127 | 141 | ||
128 | /* operations */ | 142 | /* operations */ |
143 | blkcg_pol_init_cpd_fn *cpd_init_fn; | ||
129 | blkcg_pol_init_pd_fn *pd_init_fn; | 144 | blkcg_pol_init_pd_fn *pd_init_fn; |
130 | blkcg_pol_online_pd_fn *pd_online_fn; | 145 | blkcg_pol_online_pd_fn *pd_online_fn; |
131 | blkcg_pol_offline_pd_fn *pd_offline_fn; | 146 | blkcg_pol_offline_pd_fn *pd_offline_fn; |
@@ -218,6 +233,12 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | |||
218 | return blkg ? blkg->pd[pol->plid] : NULL; | 233 | return blkg ? blkg->pd[pol->plid] : NULL; |
219 | } | 234 | } |
220 | 235 | ||
236 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, | ||
237 | struct blkcg_policy *pol) | ||
238 | { | ||
239 | return blkcg ? blkcg->pd[pol->plid] : NULL; | ||
240 | } | ||
241 | |||
221 | /** | 242 | /** |
222 | * pdata_to_blkg - get blkg associated with policy private data | 243 | * pdata_to_blkg - get blkg associated with policy private data |
223 | * @pd: policy private data of interest | 244 | * @pd: policy private data of interest |
@@ -564,6 +585,9 @@ struct blkcg; | |||
564 | struct blkg_policy_data { | 585 | struct blkg_policy_data { |
565 | }; | 586 | }; |
566 | 587 | ||
588 | struct blkcg_policy_data { | ||
589 | }; | ||
590 | |||
567 | struct blkcg_gq { | 591 | struct blkcg_gq { |
568 | }; | 592 | }; |
569 | 593 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index fd154b94447a..f6ab750060fe 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(blk_rq_init); | |||
117 | static void req_bio_endio(struct request *rq, struct bio *bio, | 117 | static void req_bio_endio(struct request *rq, struct bio *bio, |
118 | unsigned int nbytes, int error) | 118 | unsigned int nbytes, int error) |
119 | { | 119 | { |
120 | if (error) | 120 | if (error && !(rq->cmd_flags & REQ_CLONE)) |
121 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | 121 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
122 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 122 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
123 | error = -EIO; | 123 | error = -EIO; |
@@ -128,7 +128,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio, | |||
128 | bio_advance(bio, nbytes); | 128 | bio_advance(bio, nbytes); |
129 | 129 | ||
130 | /* don't actually finish bio if it's part of flush sequence */ | 130 | /* don't actually finish bio if it's part of flush sequence */ |
131 | if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) | 131 | if (bio->bi_iter.bi_size == 0 && |
132 | !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE))) | ||
132 | bio_endio(bio, error); | 133 | bio_endio(bio, error); |
133 | } | 134 | } |
134 | 135 | ||
@@ -285,6 +286,7 @@ inline void __blk_run_queue_uncond(struct request_queue *q) | |||
285 | q->request_fn(q); | 286 | q->request_fn(q); |
286 | q->request_fn_active--; | 287 | q->request_fn_active--; |
287 | } | 288 | } |
289 | EXPORT_SYMBOL_GPL(__blk_run_queue_uncond); | ||
288 | 290 | ||
289 | /** | 291 | /** |
290 | * __blk_run_queue - run a single device queue | 292 | * __blk_run_queue - run a single device queue |
@@ -552,6 +554,8 @@ void blk_cleanup_queue(struct request_queue *q) | |||
552 | q->queue_lock = &q->__queue_lock; | 554 | q->queue_lock = &q->__queue_lock; |
553 | spin_unlock_irq(lock); | 555 | spin_unlock_irq(lock); |
554 | 556 | ||
557 | bdi_destroy(&q->backing_dev_info); | ||
558 | |||
555 | /* @q is and will stay empty, shutdown and put */ | 559 | /* @q is and will stay empty, shutdown and put */ |
556 | blk_put_queue(q); | 560 | blk_put_queue(q); |
557 | } | 561 | } |
@@ -732,6 +736,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | |||
732 | } | 736 | } |
733 | EXPORT_SYMBOL(blk_init_queue_node); | 737 | EXPORT_SYMBOL(blk_init_queue_node); |
734 | 738 | ||
739 | static void blk_queue_bio(struct request_queue *q, struct bio *bio); | ||
740 | |||
735 | struct request_queue * | 741 | struct request_queue * |
736 | blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | 742 | blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, |
737 | spinlock_t *lock) | 743 | spinlock_t *lock) |
@@ -1521,7 +1527,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | |||
1521 | * Caller must ensure !blk_queue_nomerges(q) beforehand. | 1527 | * Caller must ensure !blk_queue_nomerges(q) beforehand. |
1522 | */ | 1528 | */ |
1523 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | 1529 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
1524 | unsigned int *request_count) | 1530 | unsigned int *request_count, |
1531 | struct request **same_queue_rq) | ||
1525 | { | 1532 | { |
1526 | struct blk_plug *plug; | 1533 | struct blk_plug *plug; |
1527 | struct request *rq; | 1534 | struct request *rq; |
@@ -1541,8 +1548,16 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | |||
1541 | list_for_each_entry_reverse(rq, plug_list, queuelist) { | 1548 | list_for_each_entry_reverse(rq, plug_list, queuelist) { |
1542 | int el_ret; | 1549 | int el_ret; |
1543 | 1550 | ||
1544 | if (rq->q == q) | 1551 | if (rq->q == q) { |
1545 | (*request_count)++; | 1552 | (*request_count)++; |
1553 | /* | ||
1554 | * Only blk-mq multiple hardware queues case checks the | ||
1555 | * rq in the same queue, there should be only one such | ||
1556 | * rq in a queue | ||
1557 | **/ | ||
1558 | if (same_queue_rq) | ||
1559 | *same_queue_rq = rq; | ||
1560 | } | ||
1546 | 1561 | ||
1547 | if (rq->q != q || !blk_rq_merge_ok(rq, bio)) | 1562 | if (rq->q != q || !blk_rq_merge_ok(rq, bio)) |
1548 | continue; | 1563 | continue; |
@@ -1576,7 +1591,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) | |||
1576 | blk_rq_bio_prep(req->q, req, bio); | 1591 | blk_rq_bio_prep(req->q, req, bio); |
1577 | } | 1592 | } |
1578 | 1593 | ||
1579 | void blk_queue_bio(struct request_queue *q, struct bio *bio) | 1594 | static void blk_queue_bio(struct request_queue *q, struct bio *bio) |
1580 | { | 1595 | { |
1581 | const bool sync = !!(bio->bi_rw & REQ_SYNC); | 1596 | const bool sync = !!(bio->bi_rw & REQ_SYNC); |
1582 | struct blk_plug *plug; | 1597 | struct blk_plug *plug; |
@@ -1607,7 +1622,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) | |||
1607 | * any locks. | 1622 | * any locks. |
1608 | */ | 1623 | */ |
1609 | if (!blk_queue_nomerges(q) && | 1624 | if (!blk_queue_nomerges(q) && |
1610 | blk_attempt_plug_merge(q, bio, &request_count)) | 1625 | blk_attempt_plug_merge(q, bio, &request_count, NULL)) |
1611 | return; | 1626 | return; |
1612 | 1627 | ||
1613 | spin_lock_irq(q->queue_lock); | 1628 | spin_lock_irq(q->queue_lock); |
@@ -1684,7 +1699,6 @@ out_unlock: | |||
1684 | spin_unlock_irq(q->queue_lock); | 1699 | spin_unlock_irq(q->queue_lock); |
1685 | } | 1700 | } |
1686 | } | 1701 | } |
1687 | EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ | ||
1688 | 1702 | ||
1689 | /* | 1703 | /* |
1690 | * If bio->bi_dev is a partition, remap the location | 1704 | * If bio->bi_dev is a partition, remap the location |
@@ -1715,8 +1729,6 @@ static void handle_bad_sector(struct bio *bio) | |||
1715 | bio->bi_rw, | 1729 | bio->bi_rw, |
1716 | (unsigned long long)bio_end_sector(bio), | 1730 | (unsigned long long)bio_end_sector(bio), |
1717 | (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); | 1731 | (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); |
1718 | |||
1719 | set_bit(BIO_EOF, &bio->bi_flags); | ||
1720 | } | 1732 | } |
1721 | 1733 | ||
1722 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 1734 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
@@ -2901,95 +2913,22 @@ int blk_lld_busy(struct request_queue *q) | |||
2901 | } | 2913 | } |
2902 | EXPORT_SYMBOL_GPL(blk_lld_busy); | 2914 | EXPORT_SYMBOL_GPL(blk_lld_busy); |
2903 | 2915 | ||
2904 | /** | 2916 | void blk_rq_prep_clone(struct request *dst, struct request *src) |
2905 | * blk_rq_unprep_clone - Helper function to free all bios in a cloned request | ||
2906 | * @rq: the clone request to be cleaned up | ||
2907 | * | ||
2908 | * Description: | ||
2909 | * Free all bios in @rq for a cloned request. | ||
2910 | */ | ||
2911 | void blk_rq_unprep_clone(struct request *rq) | ||
2912 | { | ||
2913 | struct bio *bio; | ||
2914 | |||
2915 | while ((bio = rq->bio) != NULL) { | ||
2916 | rq->bio = bio->bi_next; | ||
2917 | |||
2918 | bio_put(bio); | ||
2919 | } | ||
2920 | } | ||
2921 | EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); | ||
2922 | |||
2923 | /* | ||
2924 | * Copy attributes of the original request to the clone request. | ||
2925 | * The actual data parts (e.g. ->cmd, ->sense) are not copied. | ||
2926 | */ | ||
2927 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) | ||
2928 | { | 2917 | { |
2929 | dst->cpu = src->cpu; | 2918 | dst->cpu = src->cpu; |
2930 | dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; | 2919 | dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK); |
2920 | dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE; | ||
2931 | dst->cmd_type = src->cmd_type; | 2921 | dst->cmd_type = src->cmd_type; |
2932 | dst->__sector = blk_rq_pos(src); | 2922 | dst->__sector = blk_rq_pos(src); |
2933 | dst->__data_len = blk_rq_bytes(src); | 2923 | dst->__data_len = blk_rq_bytes(src); |
2934 | dst->nr_phys_segments = src->nr_phys_segments; | 2924 | dst->nr_phys_segments = src->nr_phys_segments; |
2935 | dst->ioprio = src->ioprio; | 2925 | dst->ioprio = src->ioprio; |
2936 | dst->extra_len = src->extra_len; | 2926 | dst->extra_len = src->extra_len; |
2937 | } | 2927 | dst->bio = src->bio; |
2938 | 2928 | dst->biotail = src->biotail; | |
2939 | /** | 2929 | dst->cmd = src->cmd; |
2940 | * blk_rq_prep_clone - Helper function to setup clone request | 2930 | dst->cmd_len = src->cmd_len; |
2941 | * @rq: the request to be setup | 2931 | dst->sense = src->sense; |
2942 | * @rq_src: original request to be cloned | ||
2943 | * @bs: bio_set that bios for clone are allocated from | ||
2944 | * @gfp_mask: memory allocation mask for bio | ||
2945 | * @bio_ctr: setup function to be called for each clone bio. | ||
2946 | * Returns %0 for success, non %0 for failure. | ||
2947 | * @data: private data to be passed to @bio_ctr | ||
2948 | * | ||
2949 | * Description: | ||
2950 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. | ||
2951 | * The actual data parts of @rq_src (e.g. ->cmd, ->sense) | ||
2952 | * are not copied, and copying such parts is the caller's responsibility. | ||
2953 | * Also, pages which the original bios are pointing to are not copied | ||
2954 | * and the cloned bios just point same pages. | ||
2955 | * So cloned bios must be completed before original bios, which means | ||
2956 | * the caller must complete @rq before @rq_src. | ||
2957 | */ | ||
2958 | int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | ||
2959 | struct bio_set *bs, gfp_t gfp_mask, | ||
2960 | int (*bio_ctr)(struct bio *, struct bio *, void *), | ||
2961 | void *data) | ||
2962 | { | ||
2963 | struct bio *bio, *bio_src; | ||
2964 | |||
2965 | if (!bs) | ||
2966 | bs = fs_bio_set; | ||
2967 | |||
2968 | __rq_for_each_bio(bio_src, rq_src) { | ||
2969 | bio = bio_clone_fast(bio_src, gfp_mask, bs); | ||
2970 | if (!bio) | ||
2971 | goto free_and_out; | ||
2972 | |||
2973 | if (bio_ctr && bio_ctr(bio, bio_src, data)) | ||
2974 | goto free_and_out; | ||
2975 | |||
2976 | if (rq->bio) { | ||
2977 | rq->biotail->bi_next = bio; | ||
2978 | rq->biotail = bio; | ||
2979 | } else | ||
2980 | rq->bio = rq->biotail = bio; | ||
2981 | } | ||
2982 | |||
2983 | __blk_rq_prep_clone(rq, rq_src); | ||
2984 | |||
2985 | return 0; | ||
2986 | |||
2987 | free_and_out: | ||
2988 | if (bio) | ||
2989 | bio_put(bio); | ||
2990 | blk_rq_unprep_clone(rq); | ||
2991 | |||
2992 | return -ENOMEM; | ||
2993 | } | 2932 | } |
2994 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); | 2933 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); |
2995 | 2934 | ||
@@ -3031,21 +2970,20 @@ void blk_start_plug(struct blk_plug *plug) | |||
3031 | { | 2970 | { |
3032 | struct task_struct *tsk = current; | 2971 | struct task_struct *tsk = current; |
3033 | 2972 | ||
2973 | /* | ||
2974 | * If this is a nested plug, don't actually assign it. | ||
2975 | */ | ||
2976 | if (tsk->plug) | ||
2977 | return; | ||
2978 | |||
3034 | INIT_LIST_HEAD(&plug->list); | 2979 | INIT_LIST_HEAD(&plug->list); |
3035 | INIT_LIST_HEAD(&plug->mq_list); | 2980 | INIT_LIST_HEAD(&plug->mq_list); |
3036 | INIT_LIST_HEAD(&plug->cb_list); | 2981 | INIT_LIST_HEAD(&plug->cb_list); |
3037 | |||
3038 | /* | 2982 | /* |
3039 | * If this is a nested plug, don't actually assign it. It will be | 2983 | * Store ordering should not be needed here, since a potential |
3040 | * flushed on its own. | 2984 | * preempt will imply a full memory barrier |
3041 | */ | 2985 | */ |
3042 | if (!tsk->plug) { | 2986 | tsk->plug = plug; |
3043 | /* | ||
3044 | * Store ordering should not be needed here, since a potential | ||
3045 | * preempt will imply a full memory barrier | ||
3046 | */ | ||
3047 | tsk->plug = plug; | ||
3048 | } | ||
3049 | } | 2987 | } |
3050 | EXPORT_SYMBOL(blk_start_plug); | 2988 | EXPORT_SYMBOL(blk_start_plug); |
3051 | 2989 | ||
@@ -3192,10 +3130,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
3192 | 3130 | ||
3193 | void blk_finish_plug(struct blk_plug *plug) | 3131 | void blk_finish_plug(struct blk_plug *plug) |
3194 | { | 3132 | { |
3133 | if (plug != current->plug) | ||
3134 | return; | ||
3195 | blk_flush_plug_list(plug, false); | 3135 | blk_flush_plug_list(plug, false); |
3196 | 3136 | ||
3197 | if (plug == current->plug) | 3137 | current->plug = NULL; |
3198 | current->plug = NULL; | ||
3199 | } | 3138 | } |
3200 | EXPORT_SYMBOL(blk_finish_plug); | 3139 | EXPORT_SYMBOL(blk_finish_plug); |
3201 | 3140 | ||
diff --git a/block/blk-exec.c b/block/blk-exec.c index 9924725fa50d..3fec8a29d0fa 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -53,7 +53,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
53 | rq_end_io_fn *done) | 53 | rq_end_io_fn *done) |
54 | { | 54 | { |
55 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | 55 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; |
56 | bool is_pm_resume; | ||
57 | 56 | ||
58 | WARN_ON(irqs_disabled()); | 57 | WARN_ON(irqs_disabled()); |
59 | WARN_ON(rq->cmd_type == REQ_TYPE_FS); | 58 | WARN_ON(rq->cmd_type == REQ_TYPE_FS); |
@@ -70,12 +69,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
70 | return; | 69 | return; |
71 | } | 70 | } |
72 | 71 | ||
73 | /* | ||
74 | * need to check this before __blk_run_queue(), because rq can | ||
75 | * be freed before that returns. | ||
76 | */ | ||
77 | is_pm_resume = rq->cmd_type == REQ_TYPE_PM_RESUME; | ||
78 | |||
79 | spin_lock_irq(q->queue_lock); | 72 | spin_lock_irq(q->queue_lock); |
80 | 73 | ||
81 | if (unlikely(blk_queue_dying(q))) { | 74 | if (unlikely(blk_queue_dying(q))) { |
@@ -88,9 +81,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
88 | 81 | ||
89 | __elv_add_request(q, rq, where); | 82 | __elv_add_request(q, rq, where); |
90 | __blk_run_queue(q); | 83 | __blk_run_queue(q); |
91 | /* the queue is stopped so it won't be run */ | ||
92 | if (is_pm_resume) | ||
93 | __blk_run_queue_uncond(q); | ||
94 | spin_unlock_irq(q->queue_lock); | 84 | spin_unlock_irq(q->queue_lock); |
95 | } | 85 | } |
96 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | 86 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); |
diff --git a/block/blk-merge.c b/block/blk-merge.c index fd3fee81c23c..30a0d9f89017 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -589,7 +589,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | |||
589 | !blk_write_same_mergeable(rq->bio, bio)) | 589 | !blk_write_same_mergeable(rq->bio, bio)) |
590 | return false; | 590 | return false; |
591 | 591 | ||
592 | if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { | 592 | /* Only check gaps if the bio carries data */ |
593 | if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) && bio_has_data(bio)) { | ||
593 | struct bio_vec *bprev; | 594 | struct bio_vec *bprev; |
594 | 595 | ||
595 | bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1]; | 596 | bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1]; |
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 5f13f4d0bcce..1e28ddb656b8 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c | |||
@@ -24,7 +24,7 @@ static int get_first_sibling(unsigned int cpu) | |||
24 | { | 24 | { |
25 | unsigned int ret; | 25 | unsigned int ret; |
26 | 26 | ||
27 | ret = cpumask_first(topology_thread_cpumask(cpu)); | 27 | ret = cpumask_first(topology_sibling_cpumask(cpu)); |
28 | if (ret < nr_cpu_ids) | 28 | if (ret < nr_cpu_ids) |
29 | return ret; | 29 | return ret; |
30 | 30 | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index be3290cc0644..9b6e28830b82 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -438,6 +438,39 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, | |||
438 | } | 438 | } |
439 | } | 439 | } |
440 | 440 | ||
441 | static void bt_tags_for_each(struct blk_mq_tags *tags, | ||
442 | struct blk_mq_bitmap_tags *bt, unsigned int off, | ||
443 | busy_tag_iter_fn *fn, void *data, bool reserved) | ||
444 | { | ||
445 | struct request *rq; | ||
446 | int bit, i; | ||
447 | |||
448 | if (!tags->rqs) | ||
449 | return; | ||
450 | for (i = 0; i < bt->map_nr; i++) { | ||
451 | struct blk_align_bitmap *bm = &bt->map[i]; | ||
452 | |||
453 | for (bit = find_first_bit(&bm->word, bm->depth); | ||
454 | bit < bm->depth; | ||
455 | bit = find_next_bit(&bm->word, bm->depth, bit + 1)) { | ||
456 | rq = blk_mq_tag_to_rq(tags, off + bit); | ||
457 | fn(rq, data, reserved); | ||
458 | } | ||
459 | |||
460 | off += (1 << bt->bits_per_word); | ||
461 | } | ||
462 | } | ||
463 | |||
464 | void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, | ||
465 | void *priv) | ||
466 | { | ||
467 | if (tags->nr_reserved_tags) | ||
468 | bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true); | ||
469 | bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, | ||
470 | false); | ||
471 | } | ||
472 | EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); | ||
473 | |||
441 | void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, | 474 | void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, |
442 | void *priv) | 475 | void *priv) |
443 | { | 476 | { |
@@ -580,6 +613,11 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | |||
580 | if (!tags) | 613 | if (!tags) |
581 | return NULL; | 614 | return NULL; |
582 | 615 | ||
616 | if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) { | ||
617 | kfree(tags); | ||
618 | return NULL; | ||
619 | } | ||
620 | |||
583 | tags->nr_tags = total_tags; | 621 | tags->nr_tags = total_tags; |
584 | tags->nr_reserved_tags = reserved_tags; | 622 | tags->nr_reserved_tags = reserved_tags; |
585 | 623 | ||
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 90767b370308..75893a34237d 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -44,6 +44,7 @@ struct blk_mq_tags { | |||
44 | struct list_head page_list; | 44 | struct list_head page_list; |
45 | 45 | ||
46 | int alloc_policy; | 46 | int alloc_policy; |
47 | cpumask_var_t cpumask; | ||
47 | }; | 48 | }; |
48 | 49 | ||
49 | 50 | ||
diff --git a/block/blk-mq.c b/block/blk-mq.c index ade8a2d1b0aa..f53779692c77 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -89,7 +89,8 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) | |||
89 | return -EBUSY; | 89 | return -EBUSY; |
90 | 90 | ||
91 | ret = wait_event_interruptible(q->mq_freeze_wq, | 91 | ret = wait_event_interruptible(q->mq_freeze_wq, |
92 | !q->mq_freeze_depth || blk_queue_dying(q)); | 92 | !atomic_read(&q->mq_freeze_depth) || |
93 | blk_queue_dying(q)); | ||
93 | if (blk_queue_dying(q)) | 94 | if (blk_queue_dying(q)) |
94 | return -ENODEV; | 95 | return -ENODEV; |
95 | if (ret) | 96 | if (ret) |
@@ -112,13 +113,10 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref) | |||
112 | 113 | ||
113 | void blk_mq_freeze_queue_start(struct request_queue *q) | 114 | void blk_mq_freeze_queue_start(struct request_queue *q) |
114 | { | 115 | { |
115 | bool freeze; | 116 | int freeze_depth; |
116 | 117 | ||
117 | spin_lock_irq(q->queue_lock); | 118 | freeze_depth = atomic_inc_return(&q->mq_freeze_depth); |
118 | freeze = !q->mq_freeze_depth++; | 119 | if (freeze_depth == 1) { |
119 | spin_unlock_irq(q->queue_lock); | ||
120 | |||
121 | if (freeze) { | ||
122 | percpu_ref_kill(&q->mq_usage_counter); | 120 | percpu_ref_kill(&q->mq_usage_counter); |
123 | blk_mq_run_hw_queues(q, false); | 121 | blk_mq_run_hw_queues(q, false); |
124 | } | 122 | } |
@@ -143,13 +141,11 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); | |||
143 | 141 | ||
144 | void blk_mq_unfreeze_queue(struct request_queue *q) | 142 | void blk_mq_unfreeze_queue(struct request_queue *q) |
145 | { | 143 | { |
146 | bool wake; | 144 | int freeze_depth; |
147 | 145 | ||
148 | spin_lock_irq(q->queue_lock); | 146 | freeze_depth = atomic_dec_return(&q->mq_freeze_depth); |
149 | wake = !--q->mq_freeze_depth; | 147 | WARN_ON_ONCE(freeze_depth < 0); |
150 | WARN_ON_ONCE(q->mq_freeze_depth < 0); | 148 | if (!freeze_depth) { |
151 | spin_unlock_irq(q->queue_lock); | ||
152 | if (wake) { | ||
153 | percpu_ref_reinit(&q->mq_usage_counter); | 149 | percpu_ref_reinit(&q->mq_usage_counter); |
154 | wake_up_all(&q->mq_freeze_wq); | 150 | wake_up_all(&q->mq_freeze_wq); |
155 | } | 151 | } |
@@ -677,8 +673,11 @@ static void blk_mq_rq_timer(unsigned long priv) | |||
677 | data.next = blk_rq_timeout(round_jiffies_up(data.next)); | 673 | data.next = blk_rq_timeout(round_jiffies_up(data.next)); |
678 | mod_timer(&q->timeout, data.next); | 674 | mod_timer(&q->timeout, data.next); |
679 | } else { | 675 | } else { |
680 | queue_for_each_hw_ctx(q, hctx, i) | 676 | queue_for_each_hw_ctx(q, hctx, i) { |
681 | blk_mq_tag_idle(hctx); | 677 | /* the hctx may be unmapped, so check it here */ |
678 | if (blk_mq_hw_queue_mapped(hctx)) | ||
679 | blk_mq_tag_idle(hctx); | ||
680 | } | ||
682 | } | 681 | } |
683 | } | 682 | } |
684 | 683 | ||
@@ -855,6 +854,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
855 | spin_lock(&hctx->lock); | 854 | spin_lock(&hctx->lock); |
856 | list_splice(&rq_list, &hctx->dispatch); | 855 | list_splice(&rq_list, &hctx->dispatch); |
857 | spin_unlock(&hctx->lock); | 856 | spin_unlock(&hctx->lock); |
857 | /* | ||
858 | * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but | ||
859 | * it's possible the queue is stopped and restarted again | ||
860 | * before this. Queue restart will dispatch requests. And since | ||
861 | * requests in rq_list aren't added into hctx->dispatch yet, | ||
862 | * the requests in rq_list might get lost. | ||
863 | * | ||
864 | * blk_mq_run_hw_queue() already checks the STOPPED bit | ||
865 | **/ | ||
866 | blk_mq_run_hw_queue(hctx, true); | ||
858 | } | 867 | } |
859 | } | 868 | } |
860 | 869 | ||
@@ -1224,6 +1233,38 @@ static struct request *blk_mq_map_request(struct request_queue *q, | |||
1224 | return rq; | 1233 | return rq; |
1225 | } | 1234 | } |
1226 | 1235 | ||
1236 | static int blk_mq_direct_issue_request(struct request *rq) | ||
1237 | { | ||
1238 | int ret; | ||
1239 | struct request_queue *q = rq->q; | ||
1240 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, | ||
1241 | rq->mq_ctx->cpu); | ||
1242 | struct blk_mq_queue_data bd = { | ||
1243 | .rq = rq, | ||
1244 | .list = NULL, | ||
1245 | .last = 1 | ||
1246 | }; | ||
1247 | |||
1248 | /* | ||
1249 | * For OK queue, we are done. For error, kill it. Any other | ||
1250 | * error (busy), just add it to our list as we previously | ||
1251 | * would have done | ||
1252 | */ | ||
1253 | ret = q->mq_ops->queue_rq(hctx, &bd); | ||
1254 | if (ret == BLK_MQ_RQ_QUEUE_OK) | ||
1255 | return 0; | ||
1256 | else { | ||
1257 | __blk_mq_requeue_request(rq); | ||
1258 | |||
1259 | if (ret == BLK_MQ_RQ_QUEUE_ERROR) { | ||
1260 | rq->errors = -EIO; | ||
1261 | blk_mq_end_request(rq, rq->errors); | ||
1262 | return 0; | ||
1263 | } | ||
1264 | return -1; | ||
1265 | } | ||
1266 | } | ||
1267 | |||
1227 | /* | 1268 | /* |
1228 | * Multiple hardware queue variant. This will not use per-process plugs, | 1269 | * Multiple hardware queue variant. This will not use per-process plugs, |
1229 | * but will attempt to bypass the hctx queueing if we can go straight to | 1270 | * but will attempt to bypass the hctx queueing if we can go straight to |
@@ -1235,6 +1276,9 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1235 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); | 1276 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); |
1236 | struct blk_map_ctx data; | 1277 | struct blk_map_ctx data; |
1237 | struct request *rq; | 1278 | struct request *rq; |
1279 | unsigned int request_count = 0; | ||
1280 | struct blk_plug *plug; | ||
1281 | struct request *same_queue_rq = NULL; | ||
1238 | 1282 | ||
1239 | blk_queue_bounce(q, &bio); | 1283 | blk_queue_bounce(q, &bio); |
1240 | 1284 | ||
@@ -1243,6 +1287,10 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1243 | return; | 1287 | return; |
1244 | } | 1288 | } |
1245 | 1289 | ||
1290 | if (!is_flush_fua && !blk_queue_nomerges(q) && | ||
1291 | blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) | ||
1292 | return; | ||
1293 | |||
1246 | rq = blk_mq_map_request(q, bio, &data); | 1294 | rq = blk_mq_map_request(q, bio, &data); |
1247 | if (unlikely(!rq)) | 1295 | if (unlikely(!rq)) |
1248 | return; | 1296 | return; |
@@ -1253,38 +1301,42 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1253 | goto run_queue; | 1301 | goto run_queue; |
1254 | } | 1302 | } |
1255 | 1303 | ||
1304 | plug = current->plug; | ||
1256 | /* | 1305 | /* |
1257 | * If the driver supports defer issued based on 'last', then | 1306 | * If the driver supports defer issued based on 'last', then |
1258 | * queue it up like normal since we can potentially save some | 1307 | * queue it up like normal since we can potentially save some |
1259 | * CPU this way. | 1308 | * CPU this way. |
1260 | */ | 1309 | */ |
1261 | if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { | 1310 | if (((plug && !blk_queue_nomerges(q)) || is_sync) && |
1262 | struct blk_mq_queue_data bd = { | 1311 | !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { |
1263 | .rq = rq, | 1312 | struct request *old_rq = NULL; |
1264 | .list = NULL, | ||
1265 | .last = 1 | ||
1266 | }; | ||
1267 | int ret; | ||
1268 | 1313 | ||
1269 | blk_mq_bio_to_request(rq, bio); | 1314 | blk_mq_bio_to_request(rq, bio); |
1270 | 1315 | ||
1271 | /* | 1316 | /* |
1272 | * For OK queue, we are done. For error, kill it. Any other | 1317 | * we do limited pluging. If bio can be merged, do merge. |
1273 | * error (busy), just add it to our list as we previously | 1318 | * Otherwise the existing request in the plug list will be |
1274 | * would have done | 1319 | * issued. So the plug list will have one request at most |
1275 | */ | 1320 | */ |
1276 | ret = q->mq_ops->queue_rq(data.hctx, &bd); | 1321 | if (plug) { |
1277 | if (ret == BLK_MQ_RQ_QUEUE_OK) | 1322 | /* |
1278 | goto done; | 1323 | * The plug list might get flushed before this. If that |
1279 | else { | 1324 | * happens, same_queue_rq is invalid and plug list is empty |
1280 | __blk_mq_requeue_request(rq); | 1325 | **/ |
1281 | 1326 | if (same_queue_rq && !list_empty(&plug->mq_list)) { | |
1282 | if (ret == BLK_MQ_RQ_QUEUE_ERROR) { | 1327 | old_rq = same_queue_rq; |
1283 | rq->errors = -EIO; | 1328 | list_del_init(&old_rq->queuelist); |
1284 | blk_mq_end_request(rq, rq->errors); | ||
1285 | goto done; | ||
1286 | } | 1329 | } |
1287 | } | 1330 | list_add_tail(&rq->queuelist, &plug->mq_list); |
1331 | } else /* is_sync */ | ||
1332 | old_rq = rq; | ||
1333 | blk_mq_put_ctx(data.ctx); | ||
1334 | if (!old_rq) | ||
1335 | return; | ||
1336 | if (!blk_mq_direct_issue_request(old_rq)) | ||
1337 | return; | ||
1338 | blk_mq_insert_request(old_rq, false, true, true); | ||
1339 | return; | ||
1288 | } | 1340 | } |
1289 | 1341 | ||
1290 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { | 1342 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { |
@@ -1297,7 +1349,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1297 | run_queue: | 1349 | run_queue: |
1298 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); | 1350 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); |
1299 | } | 1351 | } |
1300 | done: | ||
1301 | blk_mq_put_ctx(data.ctx); | 1352 | blk_mq_put_ctx(data.ctx); |
1302 | } | 1353 | } |
1303 | 1354 | ||
@@ -1309,16 +1360,11 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) | |||
1309 | { | 1360 | { |
1310 | const int is_sync = rw_is_sync(bio->bi_rw); | 1361 | const int is_sync = rw_is_sync(bio->bi_rw); |
1311 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); | 1362 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); |
1312 | unsigned int use_plug, request_count = 0; | 1363 | struct blk_plug *plug; |
1364 | unsigned int request_count = 0; | ||
1313 | struct blk_map_ctx data; | 1365 | struct blk_map_ctx data; |
1314 | struct request *rq; | 1366 | struct request *rq; |
1315 | 1367 | ||
1316 | /* | ||
1317 | * If we have multiple hardware queues, just go directly to | ||
1318 | * one of those for sync IO. | ||
1319 | */ | ||
1320 | use_plug = !is_flush_fua && !is_sync; | ||
1321 | |||
1322 | blk_queue_bounce(q, &bio); | 1368 | blk_queue_bounce(q, &bio); |
1323 | 1369 | ||
1324 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | 1370 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { |
@@ -1326,8 +1372,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) | |||
1326 | return; | 1372 | return; |
1327 | } | 1373 | } |
1328 | 1374 | ||
1329 | if (use_plug && !blk_queue_nomerges(q) && | 1375 | if (!is_flush_fua && !blk_queue_nomerges(q) && |
1330 | blk_attempt_plug_merge(q, bio, &request_count)) | 1376 | blk_attempt_plug_merge(q, bio, &request_count, NULL)) |
1331 | return; | 1377 | return; |
1332 | 1378 | ||
1333 | rq = blk_mq_map_request(q, bio, &data); | 1379 | rq = blk_mq_map_request(q, bio, &data); |
@@ -1345,21 +1391,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) | |||
1345 | * utilize that to temporarily store requests until the task is | 1391 | * utilize that to temporarily store requests until the task is |
1346 | * either done or scheduled away. | 1392 | * either done or scheduled away. |
1347 | */ | 1393 | */ |
1348 | if (use_plug) { | 1394 | plug = current->plug; |
1349 | struct blk_plug *plug = current->plug; | 1395 | if (plug) { |
1350 | 1396 | blk_mq_bio_to_request(rq, bio); | |
1351 | if (plug) { | 1397 | if (list_empty(&plug->mq_list)) |
1352 | blk_mq_bio_to_request(rq, bio); | 1398 | trace_block_plug(q); |
1353 | if (list_empty(&plug->mq_list)) | 1399 | else if (request_count >= BLK_MAX_REQUEST_COUNT) { |
1354 | trace_block_plug(q); | 1400 | blk_flush_plug_list(plug, false); |
1355 | else if (request_count >= BLK_MAX_REQUEST_COUNT) { | 1401 | trace_block_plug(q); |
1356 | blk_flush_plug_list(plug, false); | ||
1357 | trace_block_plug(q); | ||
1358 | } | ||
1359 | list_add_tail(&rq->queuelist, &plug->mq_list); | ||
1360 | blk_mq_put_ctx(data.ctx); | ||
1361 | return; | ||
1362 | } | 1402 | } |
1403 | list_add_tail(&rq->queuelist, &plug->mq_list); | ||
1404 | blk_mq_put_ctx(data.ctx); | ||
1405 | return; | ||
1363 | } | 1406 | } |
1364 | 1407 | ||
1365 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { | 1408 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { |
@@ -1495,7 +1538,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1495 | i++; | 1538 | i++; |
1496 | } | 1539 | } |
1497 | } | 1540 | } |
1498 | |||
1499 | return tags; | 1541 | return tags; |
1500 | 1542 | ||
1501 | fail: | 1543 | fail: |
@@ -1571,22 +1613,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) | |||
1571 | return NOTIFY_OK; | 1613 | return NOTIFY_OK; |
1572 | } | 1614 | } |
1573 | 1615 | ||
1574 | static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu) | ||
1575 | { | ||
1576 | struct request_queue *q = hctx->queue; | ||
1577 | struct blk_mq_tag_set *set = q->tag_set; | ||
1578 | |||
1579 | if (set->tags[hctx->queue_num]) | ||
1580 | return NOTIFY_OK; | ||
1581 | |||
1582 | set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num); | ||
1583 | if (!set->tags[hctx->queue_num]) | ||
1584 | return NOTIFY_STOP; | ||
1585 | |||
1586 | hctx->tags = set->tags[hctx->queue_num]; | ||
1587 | return NOTIFY_OK; | ||
1588 | } | ||
1589 | |||
1590 | static int blk_mq_hctx_notify(void *data, unsigned long action, | 1616 | static int blk_mq_hctx_notify(void *data, unsigned long action, |
1591 | unsigned int cpu) | 1617 | unsigned int cpu) |
1592 | { | 1618 | { |
@@ -1594,12 +1620,16 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, | |||
1594 | 1620 | ||
1595 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | 1621 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
1596 | return blk_mq_hctx_cpu_offline(hctx, cpu); | 1622 | return blk_mq_hctx_cpu_offline(hctx, cpu); |
1597 | else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | 1623 | |
1598 | return blk_mq_hctx_cpu_online(hctx, cpu); | 1624 | /* |
1625 | * In case of CPU online, tags may be reallocated | ||
1626 | * in blk_mq_map_swqueue() after mapping is updated. | ||
1627 | */ | ||
1599 | 1628 | ||
1600 | return NOTIFY_OK; | 1629 | return NOTIFY_OK; |
1601 | } | 1630 | } |
1602 | 1631 | ||
1632 | /* hctx->ctxs will be freed in queue's release handler */ | ||
1603 | static void blk_mq_exit_hctx(struct request_queue *q, | 1633 | static void blk_mq_exit_hctx(struct request_queue *q, |
1604 | struct blk_mq_tag_set *set, | 1634 | struct blk_mq_tag_set *set, |
1605 | struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) | 1635 | struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) |
@@ -1618,7 +1648,6 @@ static void blk_mq_exit_hctx(struct request_queue *q, | |||
1618 | 1648 | ||
1619 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | 1649 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); |
1620 | blk_free_flush_queue(hctx->fq); | 1650 | blk_free_flush_queue(hctx->fq); |
1621 | kfree(hctx->ctxs); | ||
1622 | blk_mq_free_bitmap(&hctx->ctx_map); | 1651 | blk_mq_free_bitmap(&hctx->ctx_map); |
1623 | } | 1652 | } |
1624 | 1653 | ||
@@ -1775,6 +1804,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1775 | unsigned int i; | 1804 | unsigned int i; |
1776 | struct blk_mq_hw_ctx *hctx; | 1805 | struct blk_mq_hw_ctx *hctx; |
1777 | struct blk_mq_ctx *ctx; | 1806 | struct blk_mq_ctx *ctx; |
1807 | struct blk_mq_tag_set *set = q->tag_set; | ||
1778 | 1808 | ||
1779 | queue_for_each_hw_ctx(q, hctx, i) { | 1809 | queue_for_each_hw_ctx(q, hctx, i) { |
1780 | cpumask_clear(hctx->cpumask); | 1810 | cpumask_clear(hctx->cpumask); |
@@ -1791,6 +1821,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1791 | 1821 | ||
1792 | hctx = q->mq_ops->map_queue(q, i); | 1822 | hctx = q->mq_ops->map_queue(q, i); |
1793 | cpumask_set_cpu(i, hctx->cpumask); | 1823 | cpumask_set_cpu(i, hctx->cpumask); |
1824 | cpumask_set_cpu(i, hctx->tags->cpumask); | ||
1794 | ctx->index_hw = hctx->nr_ctx; | 1825 | ctx->index_hw = hctx->nr_ctx; |
1795 | hctx->ctxs[hctx->nr_ctx++] = ctx; | 1826 | hctx->ctxs[hctx->nr_ctx++] = ctx; |
1796 | } | 1827 | } |
@@ -1803,16 +1834,20 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1803 | * disable it and free the request entries. | 1834 | * disable it and free the request entries. |
1804 | */ | 1835 | */ |
1805 | if (!hctx->nr_ctx) { | 1836 | if (!hctx->nr_ctx) { |
1806 | struct blk_mq_tag_set *set = q->tag_set; | ||
1807 | |||
1808 | if (set->tags[i]) { | 1837 | if (set->tags[i]) { |
1809 | blk_mq_free_rq_map(set, set->tags[i], i); | 1838 | blk_mq_free_rq_map(set, set->tags[i], i); |
1810 | set->tags[i] = NULL; | 1839 | set->tags[i] = NULL; |
1811 | hctx->tags = NULL; | ||
1812 | } | 1840 | } |
1841 | hctx->tags = NULL; | ||
1813 | continue; | 1842 | continue; |
1814 | } | 1843 | } |
1815 | 1844 | ||
1845 | /* unmapped hw queue can be remapped after CPU topo changed */ | ||
1846 | if (!set->tags[i]) | ||
1847 | set->tags[i] = blk_mq_init_rq_map(set, i); | ||
1848 | hctx->tags = set->tags[i]; | ||
1849 | WARN_ON(!hctx->tags); | ||
1850 | |||
1816 | /* | 1851 | /* |
1817 | * Set the map size to the number of mapped software queues. | 1852 | * Set the map size to the number of mapped software queues. |
1818 | * This is more accurate and more efficient than looping | 1853 | * This is more accurate and more efficient than looping |
@@ -1886,8 +1921,12 @@ void blk_mq_release(struct request_queue *q) | |||
1886 | unsigned int i; | 1921 | unsigned int i; |
1887 | 1922 | ||
1888 | /* hctx kobj stays in hctx */ | 1923 | /* hctx kobj stays in hctx */ |
1889 | queue_for_each_hw_ctx(q, hctx, i) | 1924 | queue_for_each_hw_ctx(q, hctx, i) { |
1925 | if (!hctx) | ||
1926 | continue; | ||
1927 | kfree(hctx->ctxs); | ||
1890 | kfree(hctx); | 1928 | kfree(hctx); |
1929 | } | ||
1891 | 1930 | ||
1892 | kfree(q->queue_hw_ctx); | 1931 | kfree(q->queue_hw_ctx); |
1893 | 1932 | ||
@@ -2047,7 +2086,7 @@ void blk_mq_free_queue(struct request_queue *q) | |||
2047 | /* Basically redo blk_mq_init_queue with queue frozen */ | 2086 | /* Basically redo blk_mq_init_queue with queue frozen */ |
2048 | static void blk_mq_queue_reinit(struct request_queue *q) | 2087 | static void blk_mq_queue_reinit(struct request_queue *q) |
2049 | { | 2088 | { |
2050 | WARN_ON_ONCE(!q->mq_freeze_depth); | 2089 | WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); |
2051 | 2090 | ||
2052 | blk_mq_sysfs_unregister(q); | 2091 | blk_mq_sysfs_unregister(q); |
2053 | 2092 | ||
@@ -2090,9 +2129,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
2090 | */ | 2129 | */ |
2091 | list_for_each_entry(q, &all_q_list, all_q_node) | 2130 | list_for_each_entry(q, &all_q_list, all_q_node) |
2092 | blk_mq_freeze_queue_start(q); | 2131 | blk_mq_freeze_queue_start(q); |
2093 | list_for_each_entry(q, &all_q_list, all_q_node) | 2132 | list_for_each_entry(q, &all_q_list, all_q_node) { |
2094 | blk_mq_freeze_queue_wait(q); | 2133 | blk_mq_freeze_queue_wait(q); |
2095 | 2134 | ||
2135 | /* | ||
2136 | * timeout handler can't touch hw queue during the | ||
2137 | * reinitialization | ||
2138 | */ | ||
2139 | del_timer_sync(&q->timeout); | ||
2140 | } | ||
2141 | |||
2096 | list_for_each_entry(q, &all_q_list, all_q_node) | 2142 | list_for_each_entry(q, &all_q_list, all_q_node) |
2097 | blk_mq_queue_reinit(q); | 2143 | blk_mq_queue_reinit(q); |
2098 | 2144 | ||
@@ -2157,6 +2203,12 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | |||
2157 | return 0; | 2203 | return 0; |
2158 | } | 2204 | } |
2159 | 2205 | ||
2206 | struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags) | ||
2207 | { | ||
2208 | return tags->cpumask; | ||
2209 | } | ||
2210 | EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask); | ||
2211 | |||
2160 | /* | 2212 | /* |
2161 | * Alloc a tag set to be associated with one or more request queues. | 2213 | * Alloc a tag set to be associated with one or more request queues. |
2162 | * May fail with EINVAL for various error conditions. May adjust the | 2214 | * May fail with EINVAL for various error conditions. May adjust the |
@@ -2218,8 +2270,10 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) | |||
2218 | int i; | 2270 | int i; |
2219 | 2271 | ||
2220 | for (i = 0; i < set->nr_hw_queues; i++) { | 2272 | for (i = 0; i < set->nr_hw_queues; i++) { |
2221 | if (set->tags[i]) | 2273 | if (set->tags[i]) { |
2222 | blk_mq_free_rq_map(set, set->tags[i], i); | 2274 | blk_mq_free_rq_map(set, set->tags[i], i); |
2275 | free_cpumask_var(set->tags[i]->cpumask); | ||
2276 | } | ||
2223 | } | 2277 | } |
2224 | 2278 | ||
2225 | kfree(set->tags); | 2279 | kfree(set->tags); |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index faaf36ade7eb..2b8fd302f677 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -522,8 +522,6 @@ static void blk_release_queue(struct kobject *kobj) | |||
522 | 522 | ||
523 | blk_trace_shutdown(q); | 523 | blk_trace_shutdown(q); |
524 | 524 | ||
525 | bdi_destroy(&q->backing_dev_info); | ||
526 | |||
527 | ida_simple_remove(&blk_queue_ida, q->id); | 525 | ida_simple_remove(&blk_queue_ida, q->id); |
528 | call_rcu(&q->rcu_head, blk_free_queue_rcu); | 526 | call_rcu(&q->rcu_head, blk_free_queue_rcu); |
529 | } | 527 | } |
diff --git a/block/blk.h b/block/blk.h index 43b036185712..026d9594142b 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -78,7 +78,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | |||
78 | bool bio_attempt_back_merge(struct request_queue *q, struct request *req, | 78 | bool bio_attempt_back_merge(struct request_queue *q, struct request *req, |
79 | struct bio *bio); | 79 | struct bio *bio); |
80 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | 80 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
81 | unsigned int *request_count); | 81 | unsigned int *request_count, |
82 | struct request **same_queue_rq); | ||
82 | 83 | ||
83 | void blk_account_io_start(struct request *req, bool new_io); | 84 | void blk_account_io_start(struct request *req, bool new_io); |
84 | void blk_account_io_completion(struct request *req, unsigned int bytes); | 85 | void blk_account_io_completion(struct request *req, unsigned int bytes); |
@@ -193,8 +194,6 @@ int blk_try_merge(struct request *rq, struct bio *bio); | |||
193 | 194 | ||
194 | void blk_queue_congestion_threshold(struct request_queue *q); | 195 | void blk_queue_congestion_threshold(struct request_queue *q); |
195 | 196 | ||
196 | void __blk_run_queue_uncond(struct request_queue *q); | ||
197 | |||
198 | int blk_dev_init(void); | 197 | int blk_dev_init(void); |
199 | 198 | ||
200 | 199 | ||
diff --git a/block/bounce.c b/block/bounce.c index ab21ba203d5c..3ab0bce1c947 100644 --- a/block/bounce.c +++ b/block/bounce.c | |||
@@ -128,9 +128,6 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) | |||
128 | struct bio_vec *bvec, *org_vec; | 128 | struct bio_vec *bvec, *org_vec; |
129 | int i; | 129 | int i; |
130 | 130 | ||
131 | if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) | ||
132 | set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); | ||
133 | |||
134 | /* | 131 | /* |
135 | * free up bounce indirect pages used | 132 | * free up bounce indirect pages used |
136 | */ | 133 | */ |
@@ -221,8 +218,8 @@ bounce: | |||
221 | if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) | 218 | if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) |
222 | continue; | 219 | continue; |
223 | 220 | ||
224 | inc_zone_page_state(to->bv_page, NR_BOUNCE); | ||
225 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); | 221 | to->bv_page = mempool_alloc(pool, q->bounce_gfp); |
222 | inc_zone_page_state(to->bv_page, NR_BOUNCE); | ||
226 | 223 | ||
227 | if (rw == WRITE) { | 224 | if (rw == WRITE) { |
228 | char *vto, *vfrom; | 225 | char *vto, *vfrom; |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5da8e6e9ab4b..d8ad45ccd8fa 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -67,6 +67,11 @@ static struct kmem_cache *cfq_pool; | |||
67 | #define sample_valid(samples) ((samples) > 80) | 67 | #define sample_valid(samples) ((samples) > 80) |
68 | #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) | 68 | #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) |
69 | 69 | ||
70 | /* blkio-related constants */ | ||
71 | #define CFQ_WEIGHT_MIN 10 | ||
72 | #define CFQ_WEIGHT_MAX 1000 | ||
73 | #define CFQ_WEIGHT_DEFAULT 500 | ||
74 | |||
70 | struct cfq_ttime { | 75 | struct cfq_ttime { |
71 | unsigned long last_end_request; | 76 | unsigned long last_end_request; |
72 | 77 | ||
@@ -212,6 +217,15 @@ struct cfqg_stats { | |||
212 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ | 217 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
213 | }; | 218 | }; |
214 | 219 | ||
220 | /* Per-cgroup data */ | ||
221 | struct cfq_group_data { | ||
222 | /* must be the first member */ | ||
223 | struct blkcg_policy_data pd; | ||
224 | |||
225 | unsigned int weight; | ||
226 | unsigned int leaf_weight; | ||
227 | }; | ||
228 | |||
215 | /* This is per cgroup per device grouping structure */ | 229 | /* This is per cgroup per device grouping structure */ |
216 | struct cfq_group { | 230 | struct cfq_group { |
217 | /* must be the first member */ | 231 | /* must be the first member */ |
@@ -446,16 +460,6 @@ CFQ_CFQQ_FNS(deep); | |||
446 | CFQ_CFQQ_FNS(wait_busy); | 460 | CFQ_CFQQ_FNS(wait_busy); |
447 | #undef CFQ_CFQQ_FNS | 461 | #undef CFQ_CFQQ_FNS |
448 | 462 | ||
449 | static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd) | ||
450 | { | ||
451 | return pd ? container_of(pd, struct cfq_group, pd) : NULL; | ||
452 | } | ||
453 | |||
454 | static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) | ||
455 | { | ||
456 | return pd_to_blkg(&cfqg->pd); | ||
457 | } | ||
458 | |||
459 | #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) | 463 | #if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) |
460 | 464 | ||
461 | /* cfqg stats flags */ | 465 | /* cfqg stats flags */ |
@@ -600,6 +604,22 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { } | |||
600 | 604 | ||
601 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 605 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
602 | 606 | ||
607 | static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd) | ||
608 | { | ||
609 | return pd ? container_of(pd, struct cfq_group, pd) : NULL; | ||
610 | } | ||
611 | |||
612 | static struct cfq_group_data | ||
613 | *cpd_to_cfqgd(struct blkcg_policy_data *cpd) | ||
614 | { | ||
615 | return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL; | ||
616 | } | ||
617 | |||
618 | static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) | ||
619 | { | ||
620 | return pd_to_blkg(&cfqg->pd); | ||
621 | } | ||
622 | |||
603 | static struct blkcg_policy blkcg_policy_cfq; | 623 | static struct blkcg_policy blkcg_policy_cfq; |
604 | 624 | ||
605 | static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) | 625 | static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) |
@@ -607,6 +627,11 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) | |||
607 | return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); | 627 | return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); |
608 | } | 628 | } |
609 | 629 | ||
630 | static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg) | ||
631 | { | ||
632 | return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq)); | ||
633 | } | ||
634 | |||
610 | static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) | 635 | static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) |
611 | { | 636 | { |
612 | struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent; | 637 | struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent; |
@@ -1544,13 +1569,28 @@ static void cfqg_stats_init(struct cfqg_stats *stats) | |||
1544 | #endif | 1569 | #endif |
1545 | } | 1570 | } |
1546 | 1571 | ||
1572 | static void cfq_cpd_init(const struct blkcg *blkcg) | ||
1573 | { | ||
1574 | struct cfq_group_data *cgd = | ||
1575 | cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]); | ||
1576 | |||
1577 | if (blkcg == &blkcg_root) { | ||
1578 | cgd->weight = 2 * CFQ_WEIGHT_DEFAULT; | ||
1579 | cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT; | ||
1580 | } else { | ||
1581 | cgd->weight = CFQ_WEIGHT_DEFAULT; | ||
1582 | cgd->leaf_weight = CFQ_WEIGHT_DEFAULT; | ||
1583 | } | ||
1584 | } | ||
1585 | |||
1547 | static void cfq_pd_init(struct blkcg_gq *blkg) | 1586 | static void cfq_pd_init(struct blkcg_gq *blkg) |
1548 | { | 1587 | { |
1549 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | 1588 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); |
1589 | struct cfq_group_data *cgd = blkcg_to_cfqgd(blkg->blkcg); | ||
1550 | 1590 | ||
1551 | cfq_init_cfqg_base(cfqg); | 1591 | cfq_init_cfqg_base(cfqg); |
1552 | cfqg->weight = blkg->blkcg->cfq_weight; | 1592 | cfqg->weight = cgd->weight; |
1553 | cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; | 1593 | cfqg->leaf_weight = cgd->leaf_weight; |
1554 | cfqg_stats_init(&cfqg->stats); | 1594 | cfqg_stats_init(&cfqg->stats); |
1555 | cfqg_stats_init(&cfqg->dead_stats); | 1595 | cfqg_stats_init(&cfqg->dead_stats); |
1556 | } | 1596 | } |
@@ -1673,13 +1713,27 @@ static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v) | |||
1673 | 1713 | ||
1674 | static int cfq_print_weight(struct seq_file *sf, void *v) | 1714 | static int cfq_print_weight(struct seq_file *sf, void *v) |
1675 | { | 1715 | { |
1676 | seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); | 1716 | struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); |
1717 | struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg); | ||
1718 | unsigned int val = 0; | ||
1719 | |||
1720 | if (cgd) | ||
1721 | val = cgd->weight; | ||
1722 | |||
1723 | seq_printf(sf, "%u\n", val); | ||
1677 | return 0; | 1724 | return 0; |
1678 | } | 1725 | } |
1679 | 1726 | ||
1680 | static int cfq_print_leaf_weight(struct seq_file *sf, void *v) | 1727 | static int cfq_print_leaf_weight(struct seq_file *sf, void *v) |
1681 | { | 1728 | { |
1682 | seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); | 1729 | struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); |
1730 | struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg); | ||
1731 | unsigned int val = 0; | ||
1732 | |||
1733 | if (cgd) | ||
1734 | val = cgd->leaf_weight; | ||
1735 | |||
1736 | seq_printf(sf, "%u\n", val); | ||
1683 | return 0; | 1737 | return 0; |
1684 | } | 1738 | } |
1685 | 1739 | ||
@@ -1690,6 +1744,7 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of, | |||
1690 | struct blkcg *blkcg = css_to_blkcg(of_css(of)); | 1744 | struct blkcg *blkcg = css_to_blkcg(of_css(of)); |
1691 | struct blkg_conf_ctx ctx; | 1745 | struct blkg_conf_ctx ctx; |
1692 | struct cfq_group *cfqg; | 1746 | struct cfq_group *cfqg; |
1747 | struct cfq_group_data *cfqgd; | ||
1693 | int ret; | 1748 | int ret; |
1694 | 1749 | ||
1695 | ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx); | 1750 | ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx); |
@@ -1698,17 +1753,22 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of, | |||
1698 | 1753 | ||
1699 | ret = -EINVAL; | 1754 | ret = -EINVAL; |
1700 | cfqg = blkg_to_cfqg(ctx.blkg); | 1755 | cfqg = blkg_to_cfqg(ctx.blkg); |
1756 | cfqgd = blkcg_to_cfqgd(blkcg); | ||
1757 | if (!cfqg || !cfqgd) | ||
1758 | goto err; | ||
1759 | |||
1701 | if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { | 1760 | if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { |
1702 | if (!is_leaf_weight) { | 1761 | if (!is_leaf_weight) { |
1703 | cfqg->dev_weight = ctx.v; | 1762 | cfqg->dev_weight = ctx.v; |
1704 | cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; | 1763 | cfqg->new_weight = ctx.v ?: cfqgd->weight; |
1705 | } else { | 1764 | } else { |
1706 | cfqg->dev_leaf_weight = ctx.v; | 1765 | cfqg->dev_leaf_weight = ctx.v; |
1707 | cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; | 1766 | cfqg->new_leaf_weight = ctx.v ?: cfqgd->leaf_weight; |
1708 | } | 1767 | } |
1709 | ret = 0; | 1768 | ret = 0; |
1710 | } | 1769 | } |
1711 | 1770 | ||
1771 | err: | ||
1712 | blkg_conf_finish(&ctx); | 1772 | blkg_conf_finish(&ctx); |
1713 | return ret ?: nbytes; | 1773 | return ret ?: nbytes; |
1714 | } | 1774 | } |
@@ -1730,16 +1790,23 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, | |||
1730 | { | 1790 | { |
1731 | struct blkcg *blkcg = css_to_blkcg(css); | 1791 | struct blkcg *blkcg = css_to_blkcg(css); |
1732 | struct blkcg_gq *blkg; | 1792 | struct blkcg_gq *blkg; |
1793 | struct cfq_group_data *cfqgd; | ||
1794 | int ret = 0; | ||
1733 | 1795 | ||
1734 | if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) | 1796 | if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) |
1735 | return -EINVAL; | 1797 | return -EINVAL; |
1736 | 1798 | ||
1737 | spin_lock_irq(&blkcg->lock); | 1799 | spin_lock_irq(&blkcg->lock); |
1800 | cfqgd = blkcg_to_cfqgd(blkcg); | ||
1801 | if (!cfqgd) { | ||
1802 | ret = -EINVAL; | ||
1803 | goto out; | ||
1804 | } | ||
1738 | 1805 | ||
1739 | if (!is_leaf_weight) | 1806 | if (!is_leaf_weight) |
1740 | blkcg->cfq_weight = val; | 1807 | cfqgd->weight = val; |
1741 | else | 1808 | else |
1742 | blkcg->cfq_leaf_weight = val; | 1809 | cfqgd->leaf_weight = val; |
1743 | 1810 | ||
1744 | hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { | 1811 | hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { |
1745 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | 1812 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); |
@@ -1749,15 +1816,16 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, | |||
1749 | 1816 | ||
1750 | if (!is_leaf_weight) { | 1817 | if (!is_leaf_weight) { |
1751 | if (!cfqg->dev_weight) | 1818 | if (!cfqg->dev_weight) |
1752 | cfqg->new_weight = blkcg->cfq_weight; | 1819 | cfqg->new_weight = cfqgd->weight; |
1753 | } else { | 1820 | } else { |
1754 | if (!cfqg->dev_leaf_weight) | 1821 | if (!cfqg->dev_leaf_weight) |
1755 | cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; | 1822 | cfqg->new_leaf_weight = cfqgd->leaf_weight; |
1756 | } | 1823 | } |
1757 | } | 1824 | } |
1758 | 1825 | ||
1826 | out: | ||
1759 | spin_unlock_irq(&blkcg->lock); | 1827 | spin_unlock_irq(&blkcg->lock); |
1760 | return 0; | 1828 | return ret; |
1761 | } | 1829 | } |
1762 | 1830 | ||
1763 | static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, | 1831 | static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, |
@@ -4477,6 +4545,18 @@ out_free: | |||
4477 | return ret; | 4545 | return ret; |
4478 | } | 4546 | } |
4479 | 4547 | ||
4548 | static void cfq_registered_queue(struct request_queue *q) | ||
4549 | { | ||
4550 | struct elevator_queue *e = q->elevator; | ||
4551 | struct cfq_data *cfqd = e->elevator_data; | ||
4552 | |||
4553 | /* | ||
4554 | * Default to IOPS mode with no idling for SSDs | ||
4555 | */ | ||
4556 | if (blk_queue_nonrot(q)) | ||
4557 | cfqd->cfq_slice_idle = 0; | ||
4558 | } | ||
4559 | |||
4480 | /* | 4560 | /* |
4481 | * sysfs parts below --> | 4561 | * sysfs parts below --> |
4482 | */ | 4562 | */ |
@@ -4592,6 +4672,7 @@ static struct elevator_type iosched_cfq = { | |||
4592 | .elevator_may_queue_fn = cfq_may_queue, | 4672 | .elevator_may_queue_fn = cfq_may_queue, |
4593 | .elevator_init_fn = cfq_init_queue, | 4673 | .elevator_init_fn = cfq_init_queue, |
4594 | .elevator_exit_fn = cfq_exit_queue, | 4674 | .elevator_exit_fn = cfq_exit_queue, |
4675 | .elevator_registered_fn = cfq_registered_queue, | ||
4595 | }, | 4676 | }, |
4596 | .icq_size = sizeof(struct cfq_io_cq), | 4677 | .icq_size = sizeof(struct cfq_io_cq), |
4597 | .icq_align = __alignof__(struct cfq_io_cq), | 4678 | .icq_align = __alignof__(struct cfq_io_cq), |
@@ -4603,8 +4684,10 @@ static struct elevator_type iosched_cfq = { | |||
4603 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 4684 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
4604 | static struct blkcg_policy blkcg_policy_cfq = { | 4685 | static struct blkcg_policy blkcg_policy_cfq = { |
4605 | .pd_size = sizeof(struct cfq_group), | 4686 | .pd_size = sizeof(struct cfq_group), |
4687 | .cpd_size = sizeof(struct cfq_group_data), | ||
4606 | .cftypes = cfq_blkcg_files, | 4688 | .cftypes = cfq_blkcg_files, |
4607 | 4689 | ||
4690 | .cpd_init_fn = cfq_cpd_init, | ||
4608 | .pd_init_fn = cfq_pd_init, | 4691 | .pd_init_fn = cfq_pd_init, |
4609 | .pd_offline_fn = cfq_pd_offline, | 4692 | .pd_offline_fn = cfq_pd_offline, |
4610 | .pd_reset_stats_fn = cfq_pd_reset_stats, | 4693 | .pd_reset_stats_fn = cfq_pd_reset_stats, |
diff --git a/block/elevator.c b/block/elevator.c index 59794d0d38e3..942579d04128 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -157,7 +157,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, | |||
157 | 157 | ||
158 | eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); | 158 | eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); |
159 | if (unlikely(!eq)) | 159 | if (unlikely(!eq)) |
160 | goto err; | 160 | return NULL; |
161 | 161 | ||
162 | eq->type = e; | 162 | eq->type = e; |
163 | kobject_init(&eq->kobj, &elv_ktype); | 163 | kobject_init(&eq->kobj, &elv_ktype); |
@@ -165,10 +165,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, | |||
165 | hash_init(eq->hash); | 165 | hash_init(eq->hash); |
166 | 166 | ||
167 | return eq; | 167 | return eq; |
168 | err: | ||
169 | kfree(eq); | ||
170 | elevator_put(e); | ||
171 | return NULL; | ||
172 | } | 168 | } |
173 | EXPORT_SYMBOL(elevator_alloc); | 169 | EXPORT_SYMBOL(elevator_alloc); |
174 | 170 | ||
@@ -810,6 +806,8 @@ int elv_register_queue(struct request_queue *q) | |||
810 | } | 806 | } |
811 | kobject_uevent(&e->kobj, KOBJ_ADD); | 807 | kobject_uevent(&e->kobj, KOBJ_ADD); |
812 | e->registered = 1; | 808 | e->registered = 1; |
809 | if (e->type->ops.elevator_registered_fn) | ||
810 | e->type->ops.elevator_registered_fn(q); | ||
813 | } | 811 | } |
814 | return error; | 812 | return error; |
815 | } | 813 | } |
diff --git a/block/genhd.c b/block/genhd.c index 0a536dc05f3b..ea982eadaf63 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -422,9 +422,9 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) | |||
422 | /* allocate ext devt */ | 422 | /* allocate ext devt */ |
423 | idr_preload(GFP_KERNEL); | 423 | idr_preload(GFP_KERNEL); |
424 | 424 | ||
425 | spin_lock(&ext_devt_lock); | 425 | spin_lock_bh(&ext_devt_lock); |
426 | idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); | 426 | idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); |
427 | spin_unlock(&ext_devt_lock); | 427 | spin_unlock_bh(&ext_devt_lock); |
428 | 428 | ||
429 | idr_preload_end(); | 429 | idr_preload_end(); |
430 | if (idx < 0) | 430 | if (idx < 0) |
@@ -449,9 +449,9 @@ void blk_free_devt(dev_t devt) | |||
449 | return; | 449 | return; |
450 | 450 | ||
451 | if (MAJOR(devt) == BLOCK_EXT_MAJOR) { | 451 | if (MAJOR(devt) == BLOCK_EXT_MAJOR) { |
452 | spin_lock(&ext_devt_lock); | 452 | spin_lock_bh(&ext_devt_lock); |
453 | idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); | 453 | idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); |
454 | spin_unlock(&ext_devt_lock); | 454 | spin_unlock_bh(&ext_devt_lock); |
455 | } | 455 | } |
456 | } | 456 | } |
457 | 457 | ||
@@ -653,7 +653,6 @@ void del_gendisk(struct gendisk *disk) | |||
653 | disk->flags &= ~GENHD_FL_UP; | 653 | disk->flags &= ~GENHD_FL_UP; |
654 | 654 | ||
655 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); | 655 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); |
656 | bdi_unregister(&disk->queue->backing_dev_info); | ||
657 | blk_unregister_queue(disk); | 656 | blk_unregister_queue(disk); |
658 | blk_unregister_region(disk_devt(disk), disk->minors); | 657 | blk_unregister_region(disk_devt(disk), disk->minors); |
659 | 658 | ||
@@ -691,13 +690,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) | |||
691 | } else { | 690 | } else { |
692 | struct hd_struct *part; | 691 | struct hd_struct *part; |
693 | 692 | ||
694 | spin_lock(&ext_devt_lock); | 693 | spin_lock_bh(&ext_devt_lock); |
695 | part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); | 694 | part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); |
696 | if (part && get_disk(part_to_disk(part))) { | 695 | if (part && get_disk(part_to_disk(part))) { |
697 | *partno = part->partno; | 696 | *partno = part->partno; |
698 | disk = part_to_disk(part); | 697 | disk = part_to_disk(part); |
699 | } | 698 | } |
700 | spin_unlock(&ext_devt_lock); | 699 | spin_unlock_bh(&ext_devt_lock); |
701 | } | 700 | } |
702 | 701 | ||
703 | return disk; | 702 | return disk; |
diff --git a/block/ioctl.c b/block/ioctl.c index 7d8befde2aca..8061eba42887 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -150,21 +150,48 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
150 | } | 150 | } |
151 | } | 151 | } |
152 | 152 | ||
153 | static int blkdev_reread_part(struct block_device *bdev) | 153 | /* |
154 | * This is an exported API for the block driver, and will not | ||
155 | * acquire bd_mutex. This API should be used in case that | ||
156 | * caller has held bd_mutex already. | ||
157 | */ | ||
158 | int __blkdev_reread_part(struct block_device *bdev) | ||
154 | { | 159 | { |
155 | struct gendisk *disk = bdev->bd_disk; | 160 | struct gendisk *disk = bdev->bd_disk; |
156 | int res; | ||
157 | 161 | ||
158 | if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) | 162 | if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) |
159 | return -EINVAL; | 163 | return -EINVAL; |
160 | if (!capable(CAP_SYS_ADMIN)) | 164 | if (!capable(CAP_SYS_ADMIN)) |
161 | return -EACCES; | 165 | return -EACCES; |
162 | if (!mutex_trylock(&bdev->bd_mutex)) | 166 | |
163 | return -EBUSY; | 167 | lockdep_assert_held(&bdev->bd_mutex); |
164 | res = rescan_partitions(disk, bdev); | 168 | |
169 | return rescan_partitions(disk, bdev); | ||
170 | } | ||
171 | EXPORT_SYMBOL(__blkdev_reread_part); | ||
172 | |||
173 | /* | ||
174 | * This is an exported API for the block driver, and will | ||
175 | * try to acquire bd_mutex. If bd_mutex has been held already | ||
176 | * in current context, please call __blkdev_reread_part(). | ||
177 | * | ||
178 | * Make sure the held locks in current context aren't required | ||
179 | * in open()/close() handler and I/O path for avoiding ABBA deadlock: | ||
180 | * - bd_mutex is held before calling block driver's open/close | ||
181 | * handler | ||
182 | * - reading partition table may submit I/O to the block device | ||
183 | */ | ||
184 | int blkdev_reread_part(struct block_device *bdev) | ||
185 | { | ||
186 | int res; | ||
187 | |||
188 | mutex_lock(&bdev->bd_mutex); | ||
189 | res = __blkdev_reread_part(bdev); | ||
165 | mutex_unlock(&bdev->bd_mutex); | 190 | mutex_unlock(&bdev->bd_mutex); |
191 | |||
166 | return res; | 192 | return res; |
167 | } | 193 | } |
194 | EXPORT_SYMBOL(blkdev_reread_part); | ||
168 | 195 | ||
169 | static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | 196 | static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, |
170 | uint64_t len, int secure) | 197 | uint64_t len, int secure) |