aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio-integrity.c2
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-exec.c1
-rw-r--r--block/blk-merge.c17
-rw-r--r--block/blk-mq.c162
-rw-r--r--block/blk-sysfs.c6
-rw-r--r--block/cfq-iosched.c19
-rw-r--r--block/genhd.c26
-rw-r--r--block/partition-generic.c2
-rw-r--r--block/scsi_ioctl.c40
10 files changed, 194 insertions, 82 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bc423f7b02da..f14b4abbebd8 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -520,7 +520,7 @@ void bio_integrity_endio(struct bio *bio, int error)
520 */ 520 */
521 if (error) { 521 if (error) {
522 bio->bi_end_io = bip->bip_end_io; 522 bio->bi_end_io = bip->bip_end_io;
523 bio_endio(bio, error); 523 bio_endio_nodec(bio, error);
524 524
525 return; 525 return;
526 } 526 }
diff --git a/block/blk-core.c b/block/blk-core.c
index c359d72e9d76..bf930f481d43 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1252,7 +1252,6 @@ void blk_rq_set_block_pc(struct request *rq)
1252 rq->__sector = (sector_t) -1; 1252 rq->__sector = (sector_t) -1;
1253 rq->bio = rq->biotail = NULL; 1253 rq->bio = rq->biotail = NULL;
1254 memset(rq->__cmd, 0, sizeof(rq->__cmd)); 1254 memset(rq->__cmd, 0, sizeof(rq->__cmd));
1255 rq->cmd = rq->__cmd;
1256} 1255}
1257EXPORT_SYMBOL(blk_rq_set_block_pc); 1256EXPORT_SYMBOL(blk_rq_set_block_pc);
1258 1257
diff --git a/block/blk-exec.c b/block/blk-exec.c
index f4d27b12c90b..9924725fa50d 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -56,6 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
56 bool is_pm_resume; 56 bool is_pm_resume;
57 57
58 WARN_ON(irqs_disabled()); 58 WARN_ON(irqs_disabled());
59 WARN_ON(rq->cmd_type == REQ_TYPE_FS);
59 60
60 rq->rq_disk = bd_disk; 61 rq->rq_disk = bd_disk;
61 rq->end_io = done; 62 rq->end_io = done;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 54535831f1e1..77881798f793 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -10,10 +10,11 @@
10#include "blk.h" 10#include "blk.h"
11 11
12static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 12static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
13 struct bio *bio) 13 struct bio *bio,
14 bool no_sg_merge)
14{ 15{
15 struct bio_vec bv, bvprv = { NULL }; 16 struct bio_vec bv, bvprv = { NULL };
16 int cluster, high, highprv = 1, no_sg_merge; 17 int cluster, high, highprv = 1;
17 unsigned int seg_size, nr_phys_segs; 18 unsigned int seg_size, nr_phys_segs;
18 struct bio *fbio, *bbio; 19 struct bio *fbio, *bbio;
19 struct bvec_iter iter; 20 struct bvec_iter iter;
@@ -35,7 +36,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
35 cluster = blk_queue_cluster(q); 36 cluster = blk_queue_cluster(q);
36 seg_size = 0; 37 seg_size = 0;
37 nr_phys_segs = 0; 38 nr_phys_segs = 0;
38 no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
39 high = 0; 39 high = 0;
40 for_each_bio(bio) { 40 for_each_bio(bio) {
41 bio_for_each_segment(bv, bio, iter) { 41 bio_for_each_segment(bv, bio, iter) {
@@ -88,18 +88,23 @@ new_segment:
88 88
89void blk_recalc_rq_segments(struct request *rq) 89void blk_recalc_rq_segments(struct request *rq)
90{ 90{
91 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); 91 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
92 &rq->q->queue_flags);
93
94 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio,
95 no_sg_merge);
92} 96}
93 97
94void blk_recount_segments(struct request_queue *q, struct bio *bio) 98void blk_recount_segments(struct request_queue *q, struct bio *bio)
95{ 99{
96 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags)) 100 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
101 bio->bi_vcnt < queue_max_segments(q))
97 bio->bi_phys_segments = bio->bi_vcnt; 102 bio->bi_phys_segments = bio->bi_vcnt;
98 else { 103 else {
99 struct bio *nxt = bio->bi_next; 104 struct bio *nxt = bio->bi_next;
100 105
101 bio->bi_next = NULL; 106 bio->bi_next = NULL;
102 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); 107 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
103 bio->bi_next = nxt; 108 bio->bi_next = nxt;
104 } 109 }
105 110
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5189cb1e478a..df8e1e09dd17 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -112,18 +112,31 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
112 */ 112 */
113void blk_mq_freeze_queue(struct request_queue *q) 113void blk_mq_freeze_queue(struct request_queue *q)
114{ 114{
115 bool freeze;
116
115 spin_lock_irq(q->queue_lock); 117 spin_lock_irq(q->queue_lock);
116 q->mq_freeze_depth++; 118 freeze = !q->mq_freeze_depth++;
117 spin_unlock_irq(q->queue_lock); 119 spin_unlock_irq(q->queue_lock);
118 120
119 percpu_ref_kill(&q->mq_usage_counter); 121 if (freeze) {
120 blk_mq_run_queues(q, false); 122 /*
123 * XXX: Temporary kludge to work around SCSI blk-mq stall.
124 * SCSI synchronously creates and destroys many queues
125 * back-to-back during probe leading to lengthy stalls.
126 * This will be fixed by keeping ->mq_usage_counter in
127 * atomic mode until genhd registration, but, for now,
128 * let's work around using expedited synchronization.
129 */
130 __percpu_ref_kill_expedited(&q->mq_usage_counter);
131
132 blk_mq_run_queues(q, false);
133 }
121 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); 134 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
122} 135}
123 136
124static void blk_mq_unfreeze_queue(struct request_queue *q) 137static void blk_mq_unfreeze_queue(struct request_queue *q)
125{ 138{
126 bool wake = false; 139 bool wake;
127 140
128 spin_lock_irq(q->queue_lock); 141 spin_lock_irq(q->queue_lock);
129 wake = !--q->mq_freeze_depth; 142 wake = !--q->mq_freeze_depth;
@@ -172,6 +185,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
172 /* tag was already set */ 185 /* tag was already set */
173 rq->errors = 0; 186 rq->errors = 0;
174 187
188 rq->cmd = rq->__cmd;
189
175 rq->extra_len = 0; 190 rq->extra_len = 0;
176 rq->sense_len = 0; 191 rq->sense_len = 0;
177 rq->resid_len = 0; 192 rq->resid_len = 0;
@@ -197,7 +212,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
197 if (tag != BLK_MQ_TAG_FAIL) { 212 if (tag != BLK_MQ_TAG_FAIL) {
198 rq = data->hctx->tags->rqs[tag]; 213 rq = data->hctx->tags->rqs[tag];
199 214
200 rq->cmd_flags = 0;
201 if (blk_mq_tag_busy(data->hctx)) { 215 if (blk_mq_tag_busy(data->hctx)) {
202 rq->cmd_flags = REQ_MQ_INFLIGHT; 216 rq->cmd_flags = REQ_MQ_INFLIGHT;
203 atomic_inc(&data->hctx->nr_active); 217 atomic_inc(&data->hctx->nr_active);
@@ -252,6 +266,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
252 266
253 if (rq->cmd_flags & REQ_MQ_INFLIGHT) 267 if (rq->cmd_flags & REQ_MQ_INFLIGHT)
254 atomic_dec(&hctx->nr_active); 268 atomic_dec(&hctx->nr_active);
269 rq->cmd_flags = 0;
255 270
256 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 271 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
257 blk_mq_put_tag(hctx, tag, &ctx->last_tag); 272 blk_mq_put_tag(hctx, tag, &ctx->last_tag);
@@ -387,6 +402,12 @@ static void blk_mq_start_request(struct request *rq, bool last)
387 blk_add_timer(rq); 402 blk_add_timer(rq);
388 403
389 /* 404 /*
405 * Ensure that ->deadline is visible before set the started
406 * flag and clear the completed flag.
407 */
408 smp_mb__before_atomic();
409
410 /*
390 * Mark us as started and clear complete. Complete might have been 411 * Mark us as started and clear complete. Complete might have been
391 * set if requeue raced with timeout, which then marked it as 412 * set if requeue raced with timeout, which then marked it as
392 * complete. So be sure to clear complete again when we start 413 * complete. So be sure to clear complete again when we start
@@ -467,7 +488,11 @@ static void blk_mq_requeue_work(struct work_struct *work)
467 blk_mq_insert_request(rq, false, false, false); 488 blk_mq_insert_request(rq, false, false, false);
468 } 489 }
469 490
470 blk_mq_run_queues(q, false); 491 /*
492 * Use the start variant of queue running here, so that running
493 * the requeue work will kick stopped queues.
494 */
495 blk_mq_start_hw_queues(q);
471} 496}
472 497
473void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) 498void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
@@ -951,14 +976,9 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
951 976
952 hctx = q->mq_ops->map_queue(q, ctx->cpu); 977 hctx = q->mq_ops->map_queue(q, ctx->cpu);
953 978
954 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && 979 spin_lock(&ctx->lock);
955 !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { 980 __blk_mq_insert_request(hctx, rq, at_head);
956 blk_insert_flush(rq); 981 spin_unlock(&ctx->lock);
957 } else {
958 spin_lock(&ctx->lock);
959 __blk_mq_insert_request(hctx, rq, at_head);
960 spin_unlock(&ctx->lock);
961 }
962 982
963 if (run_queue) 983 if (run_queue)
964 blk_mq_run_hw_queue(hctx, async); 984 blk_mq_run_hw_queue(hctx, async);
@@ -1068,13 +1088,17 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
1068 blk_account_io_start(rq, 1); 1088 blk_account_io_start(rq, 1);
1069} 1089}
1070 1090
1091static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
1092{
1093 return (hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
1094 !blk_queue_nomerges(hctx->queue);
1095}
1096
1071static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, 1097static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
1072 struct blk_mq_ctx *ctx, 1098 struct blk_mq_ctx *ctx,
1073 struct request *rq, struct bio *bio) 1099 struct request *rq, struct bio *bio)
1074{ 1100{
1075 struct request_queue *q = hctx->queue; 1101 if (!hctx_allow_merges(hctx)) {
1076
1077 if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
1078 blk_mq_bio_to_request(rq, bio); 1102 blk_mq_bio_to_request(rq, bio);
1079 spin_lock(&ctx->lock); 1103 spin_lock(&ctx->lock);
1080insert_rq: 1104insert_rq:
@@ -1082,6 +1106,8 @@ insert_rq:
1082 spin_unlock(&ctx->lock); 1106 spin_unlock(&ctx->lock);
1083 return false; 1107 return false;
1084 } else { 1108 } else {
1109 struct request_queue *q = hctx->queue;
1110
1085 spin_lock(&ctx->lock); 1111 spin_lock(&ctx->lock);
1086 if (!blk_mq_attempt_merge(q, ctx, bio)) { 1112 if (!blk_mq_attempt_merge(q, ctx, bio)) {
1087 blk_mq_bio_to_request(rq, bio); 1113 blk_mq_bio_to_request(rq, bio);
@@ -1309,6 +1335,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1309 continue; 1335 continue;
1310 set->ops->exit_request(set->driver_data, tags->rqs[i], 1336 set->ops->exit_request(set->driver_data, tags->rqs[i],
1311 hctx_idx, i); 1337 hctx_idx, i);
1338 tags->rqs[i] = NULL;
1312 } 1339 }
1313 } 1340 }
1314 1341
@@ -1342,8 +1369,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1342 1369
1343 INIT_LIST_HEAD(&tags->page_list); 1370 INIT_LIST_HEAD(&tags->page_list);
1344 1371
1345 tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), 1372 tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
1346 GFP_KERNEL, set->numa_node); 1373 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
1374 set->numa_node);
1347 if (!tags->rqs) { 1375 if (!tags->rqs) {
1348 blk_mq_free_tags(tags); 1376 blk_mq_free_tags(tags);
1349 return NULL; 1377 return NULL;
@@ -1367,8 +1395,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1367 this_order--; 1395 this_order--;
1368 1396
1369 do { 1397 do {
1370 page = alloc_pages_node(set->numa_node, GFP_KERNEL, 1398 page = alloc_pages_node(set->numa_node,
1371 this_order); 1399 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
1400 this_order);
1372 if (page) 1401 if (page)
1373 break; 1402 break;
1374 if (!this_order--) 1403 if (!this_order--)
@@ -1389,11 +1418,15 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1389 left -= to_do * rq_size; 1418 left -= to_do * rq_size;
1390 for (j = 0; j < to_do; j++) { 1419 for (j = 0; j < to_do; j++) {
1391 tags->rqs[i] = p; 1420 tags->rqs[i] = p;
1421 tags->rqs[i]->atomic_flags = 0;
1422 tags->rqs[i]->cmd_flags = 0;
1392 if (set->ops->init_request) { 1423 if (set->ops->init_request) {
1393 if (set->ops->init_request(set->driver_data, 1424 if (set->ops->init_request(set->driver_data,
1394 tags->rqs[i], hctx_idx, i, 1425 tags->rqs[i], hctx_idx, i,
1395 set->numa_node)) 1426 set->numa_node)) {
1427 tags->rqs[i] = NULL;
1396 goto fail; 1428 goto fail;
1429 }
1397 } 1430 }
1398 1431
1399 p += rq_size; 1432 p += rq_size;
@@ -1404,7 +1437,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1404 return tags; 1437 return tags;
1405 1438
1406fail: 1439fail:
1407 pr_warn("%s: failed to allocate requests\n", __func__);
1408 blk_mq_free_rq_map(set, tags, hctx_idx); 1440 blk_mq_free_rq_map(set, tags, hctx_idx);
1409 return NULL; 1441 return NULL;
1410} 1442}
@@ -1574,7 +1606,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
1574 hctx->tags = set->tags[i]; 1606 hctx->tags = set->tags[i];
1575 1607
1576 /* 1608 /*
1577 * Allocate space for all possible cpus to avoid allocation in 1609 * Allocate space for all possible cpus to avoid allocation at
1578 * runtime 1610 * runtime
1579 */ 1611 */
1580 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), 1612 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
@@ -1662,8 +1694,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1662 1694
1663 queue_for_each_hw_ctx(q, hctx, i) { 1695 queue_for_each_hw_ctx(q, hctx, i) {
1664 /* 1696 /*
1665 * If not software queues are mapped to this hardware queue, 1697 * If no software queues are mapped to this hardware queue,
1666 * disable it and free the request entries 1698 * disable it and free the request entries.
1667 */ 1699 */
1668 if (!hctx->nr_ctx) { 1700 if (!hctx->nr_ctx) {
1669 struct blk_mq_tag_set *set = q->tag_set; 1701 struct blk_mq_tag_set *set = q->tag_set;
@@ -1713,14 +1745,10 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
1713{ 1745{
1714 struct blk_mq_tag_set *set = q->tag_set; 1746 struct blk_mq_tag_set *set = q->tag_set;
1715 1747
1716 blk_mq_freeze_queue(q);
1717
1718 mutex_lock(&set->tag_list_lock); 1748 mutex_lock(&set->tag_list_lock);
1719 list_del_init(&q->tag_set_list); 1749 list_del_init(&q->tag_set_list);
1720 blk_mq_update_tag_set_depth(set); 1750 blk_mq_update_tag_set_depth(set);
1721 mutex_unlock(&set->tag_list_lock); 1751 mutex_unlock(&set->tag_list_lock);
1722
1723 blk_mq_unfreeze_queue(q);
1724} 1752}
1725 1753
1726static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, 1754static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
@@ -1928,6 +1956,60 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1928 return NOTIFY_OK; 1956 return NOTIFY_OK;
1929} 1957}
1930 1958
1959static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
1960{
1961 int i;
1962
1963 for (i = 0; i < set->nr_hw_queues; i++) {
1964 set->tags[i] = blk_mq_init_rq_map(set, i);
1965 if (!set->tags[i])
1966 goto out_unwind;
1967 }
1968
1969 return 0;
1970
1971out_unwind:
1972 while (--i >= 0)
1973 blk_mq_free_rq_map(set, set->tags[i], i);
1974
1975 return -ENOMEM;
1976}
1977
1978/*
1979 * Allocate the request maps associated with this tag_set. Note that this
1980 * may reduce the depth asked for, if memory is tight. set->queue_depth
1981 * will be updated to reflect the allocated depth.
1982 */
1983static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
1984{
1985 unsigned int depth;
1986 int err;
1987
1988 depth = set->queue_depth;
1989 do {
1990 err = __blk_mq_alloc_rq_maps(set);
1991 if (!err)
1992 break;
1993
1994 set->queue_depth >>= 1;
1995 if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
1996 err = -ENOMEM;
1997 break;
1998 }
1999 } while (set->queue_depth);
2000
2001 if (!set->queue_depth || err) {
2002 pr_err("blk-mq: failed to allocate request map\n");
2003 return -ENOMEM;
2004 }
2005
2006 if (depth != set->queue_depth)
2007 pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
2008 depth, set->queue_depth);
2009
2010 return 0;
2011}
2012
1931/* 2013/*
1932 * Alloc a tag set to be associated with one or more request queues. 2014 * Alloc a tag set to be associated with one or more request queues.
1933 * May fail with EINVAL for various error conditions. May adjust the 2015 * May fail with EINVAL for various error conditions. May adjust the
@@ -1936,8 +2018,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1936 */ 2018 */
1937int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) 2019int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1938{ 2020{
1939 int i;
1940
1941 if (!set->nr_hw_queues) 2021 if (!set->nr_hw_queues)
1942 return -EINVAL; 2022 return -EINVAL;
1943 if (!set->queue_depth) 2023 if (!set->queue_depth)
@@ -1958,23 +2038,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
1958 sizeof(struct blk_mq_tags *), 2038 sizeof(struct blk_mq_tags *),
1959 GFP_KERNEL, set->numa_node); 2039 GFP_KERNEL, set->numa_node);
1960 if (!set->tags) 2040 if (!set->tags)
1961 goto out; 2041 return -ENOMEM;
1962 2042
1963 for (i = 0; i < set->nr_hw_queues; i++) { 2043 if (blk_mq_alloc_rq_maps(set))
1964 set->tags[i] = blk_mq_init_rq_map(set, i); 2044 goto enomem;
1965 if (!set->tags[i])
1966 goto out_unwind;
1967 }
1968 2045
1969 mutex_init(&set->tag_list_lock); 2046 mutex_init(&set->tag_list_lock);
1970 INIT_LIST_HEAD(&set->tag_list); 2047 INIT_LIST_HEAD(&set->tag_list);
1971 2048
1972 return 0; 2049 return 0;
1973 2050enomem:
1974out_unwind: 2051 kfree(set->tags);
1975 while (--i >= 0) 2052 set->tags = NULL;
1976 blk_mq_free_rq_map(set, set->tags[i], i);
1977out:
1978 return -ENOMEM; 2053 return -ENOMEM;
1979} 2054}
1980EXPORT_SYMBOL(blk_mq_alloc_tag_set); 2055EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -1989,6 +2064,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
1989 } 2064 }
1990 2065
1991 kfree(set->tags); 2066 kfree(set->tags);
2067 set->tags = NULL;
1992} 2068}
1993EXPORT_SYMBOL(blk_mq_free_tag_set); 2069EXPORT_SYMBOL(blk_mq_free_tag_set);
1994 2070
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4db5abf96b9e..17f5c84ce7bf 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -554,8 +554,10 @@ int blk_register_queue(struct gendisk *disk)
554 * Initialization must be complete by now. Finish the initial 554 * Initialization must be complete by now. Finish the initial
555 * bypass from queue allocation. 555 * bypass from queue allocation.
556 */ 556 */
557 queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); 557 if (!blk_queue_init_done(q)) {
558 blk_queue_bypass_end(q); 558 queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
559 blk_queue_bypass_end(q);
560 }
559 561
560 ret = blk_trace_init_sysfs(dev); 562 ret = blk_trace_init_sysfs(dev);
561 if (ret) 563 if (ret)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index cadc37841744..3f31cf9508e6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1272,15 +1272,22 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
1272 rb_insert_color(&cfqg->rb_node, &st->rb); 1272 rb_insert_color(&cfqg->rb_node, &st->rb);
1273} 1273}
1274 1274
1275/*
1276 * This has to be called only on activation of cfqg
1277 */
1275static void 1278static void
1276cfq_update_group_weight(struct cfq_group *cfqg) 1279cfq_update_group_weight(struct cfq_group *cfqg)
1277{ 1280{
1278 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
1279
1280 if (cfqg->new_weight) { 1281 if (cfqg->new_weight) {
1281 cfqg->weight = cfqg->new_weight; 1282 cfqg->weight = cfqg->new_weight;
1282 cfqg->new_weight = 0; 1283 cfqg->new_weight = 0;
1283 } 1284 }
1285}
1286
1287static void
1288cfq_update_group_leaf_weight(struct cfq_group *cfqg)
1289{
1290 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
1284 1291
1285 if (cfqg->new_leaf_weight) { 1292 if (cfqg->new_leaf_weight) {
1286 cfqg->leaf_weight = cfqg->new_leaf_weight; 1293 cfqg->leaf_weight = cfqg->new_leaf_weight;
@@ -1299,7 +1306,12 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
1299 /* add to the service tree */ 1306 /* add to the service tree */
1300 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); 1307 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
1301 1308
1302 cfq_update_group_weight(cfqg); 1309 /*
1310 * Update leaf_weight. We cannot update weight at this point
1311 * because cfqg might already have been activated and is
1312 * contributing its current weight to the parent's child_weight.
1313 */
1314 cfq_update_group_leaf_weight(cfqg);
1303 __cfq_group_service_tree_add(st, cfqg); 1315 __cfq_group_service_tree_add(st, cfqg);
1304 1316
1305 /* 1317 /*
@@ -1323,6 +1335,7 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
1323 */ 1335 */
1324 while ((parent = cfqg_parent(pos))) { 1336 while ((parent = cfqg_parent(pos))) {
1325 if (propagate) { 1337 if (propagate) {
1338 cfq_update_group_weight(pos);
1326 propagate = !parent->nr_active++; 1339 propagate = !parent->nr_active++;
1327 parent->children_weight += pos->weight; 1340 parent->children_weight += pos->weight;
1328 } 1341 }
diff --git a/block/genhd.c b/block/genhd.c
index 791f41943132..e6723bd4d7a1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -28,10 +28,10 @@ struct kobject *block_depr;
28/* for extended dynamic devt allocation, currently only one major is used */ 28/* for extended dynamic devt allocation, currently only one major is used */
29#define NR_EXT_DEVT (1 << MINORBITS) 29#define NR_EXT_DEVT (1 << MINORBITS)
30 30
31/* For extended devt allocation. ext_devt_mutex prevents look up 31/* For extended devt allocation. ext_devt_lock prevents look up
32 * results from going away underneath its user. 32 * results from going away underneath its user.
33 */ 33 */
34static DEFINE_MUTEX(ext_devt_mutex); 34static DEFINE_SPINLOCK(ext_devt_lock);
35static DEFINE_IDR(ext_devt_idr); 35static DEFINE_IDR(ext_devt_idr);
36 36
37static struct device_type disk_type; 37static struct device_type disk_type;
@@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
420 } 420 }
421 421
422 /* allocate ext devt */ 422 /* allocate ext devt */
423 mutex_lock(&ext_devt_mutex); 423 idr_preload(GFP_KERNEL);
424 idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL); 424
425 mutex_unlock(&ext_devt_mutex); 425 spin_lock(&ext_devt_lock);
426 idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
427 spin_unlock(&ext_devt_lock);
428
429 idr_preload_end();
426 if (idx < 0) 430 if (idx < 0)
427 return idx == -ENOSPC ? -EBUSY : idx; 431 return idx == -ENOSPC ? -EBUSY : idx;
428 432
@@ -441,15 +445,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
441 */ 445 */
442void blk_free_devt(dev_t devt) 446void blk_free_devt(dev_t devt)
443{ 447{
444 might_sleep();
445
446 if (devt == MKDEV(0, 0)) 448 if (devt == MKDEV(0, 0))
447 return; 449 return;
448 450
449 if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 451 if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
450 mutex_lock(&ext_devt_mutex); 452 spin_lock(&ext_devt_lock);
451 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 453 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
452 mutex_unlock(&ext_devt_mutex); 454 spin_unlock(&ext_devt_lock);
453 } 455 }
454} 456}
455 457
@@ -665,7 +667,6 @@ void del_gendisk(struct gendisk *disk)
665 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 667 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
666 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); 668 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
667 device_del(disk_to_dev(disk)); 669 device_del(disk_to_dev(disk));
668 blk_free_devt(disk_to_dev(disk)->devt);
669} 670}
670EXPORT_SYMBOL(del_gendisk); 671EXPORT_SYMBOL(del_gendisk);
671 672
@@ -690,13 +691,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
690 } else { 691 } else {
691 struct hd_struct *part; 692 struct hd_struct *part;
692 693
693 mutex_lock(&ext_devt_mutex); 694 spin_lock(&ext_devt_lock);
694 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 695 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
695 if (part && get_disk(part_to_disk(part))) { 696 if (part && get_disk(part_to_disk(part))) {
696 *partno = part->partno; 697 *partno = part->partno;
697 disk = part_to_disk(part); 698 disk = part_to_disk(part);
698 } 699 }
699 mutex_unlock(&ext_devt_mutex); 700 spin_unlock(&ext_devt_lock);
700 } 701 }
701 702
702 return disk; 703 return disk;
@@ -1098,6 +1099,7 @@ static void disk_release(struct device *dev)
1098{ 1099{
1099 struct gendisk *disk = dev_to_disk(dev); 1100 struct gendisk *disk = dev_to_disk(dev);
1100 1101
1102 blk_free_devt(dev->devt);
1101 disk_release_events(disk); 1103 disk_release_events(disk);
1102 kfree(disk->random); 1104 kfree(disk->random);
1103 disk_replace_part_tbl(disk, NULL); 1105 disk_replace_part_tbl(disk, NULL);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 789cdea05893..0d9e5f97f0a8 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = {
211static void part_release(struct device *dev) 211static void part_release(struct device *dev)
212{ 212{
213 struct hd_struct *p = dev_to_part(dev); 213 struct hd_struct *p = dev_to_part(dev);
214 blk_free_devt(dev->devt);
214 free_part_stats(p); 215 free_part_stats(p);
215 free_part_info(p); 216 free_part_info(p);
216 kfree(p); 217 kfree(p);
@@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno)
253 rcu_assign_pointer(ptbl->last_lookup, NULL); 254 rcu_assign_pointer(ptbl->last_lookup, NULL);
254 kobject_put(part->holder_dir); 255 kobject_put(part->holder_dir);
255 device_del(part_to_dev(part)); 256 device_del(part_to_dev(part));
256 blk_free_devt(part_devt(part));
257 257
258 hd_struct_put(part); 258 hd_struct_put(part);
259} 259}
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 51bf5155ee75..9b8eaeca6a79 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -279,7 +279,6 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
279 r = blk_rq_unmap_user(bio); 279 r = blk_rq_unmap_user(bio);
280 if (!ret) 280 if (!ret)
281 ret = r; 281 ret = r;
282 blk_put_request(rq);
283 282
284 return ret; 283 return ret;
285} 284}
@@ -297,8 +296,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
297 296
298 if (hdr->interface_id != 'S') 297 if (hdr->interface_id != 'S')
299 return -EINVAL; 298 return -EINVAL;
300 if (hdr->cmd_len > BLK_MAX_CDB)
301 return -EINVAL;
302 299
303 if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9)) 300 if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9))
304 return -EIO; 301 return -EIO;
@@ -317,16 +314,23 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
317 if (hdr->flags & SG_FLAG_Q_AT_HEAD) 314 if (hdr->flags & SG_FLAG_Q_AT_HEAD)
318 at_head = 1; 315 at_head = 1;
319 316
317 ret = -ENOMEM;
320 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); 318 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
321 if (!rq) 319 if (!rq)
322 return -ENOMEM; 320 goto out;
323 blk_rq_set_block_pc(rq); 321 blk_rq_set_block_pc(rq);
324 322
325 if (blk_fill_sghdr_rq(q, rq, hdr, mode)) { 323 if (hdr->cmd_len > BLK_MAX_CDB) {
326 blk_put_request(rq); 324 rq->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
327 return -EFAULT; 325 if (!rq->cmd)
326 goto out_put_request;
328 } 327 }
329 328
329 ret = -EFAULT;
330 if (blk_fill_sghdr_rq(q, rq, hdr, mode))
331 goto out_free_cdb;
332
333 ret = 0;
330 if (hdr->iovec_count) { 334 if (hdr->iovec_count) {
331 size_t iov_data_len; 335 size_t iov_data_len;
332 struct iovec *iov = NULL; 336 struct iovec *iov = NULL;
@@ -335,7 +339,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
335 0, NULL, &iov); 339 0, NULL, &iov);
336 if (ret < 0) { 340 if (ret < 0) {
337 kfree(iov); 341 kfree(iov);
338 goto out; 342 goto out_free_cdb;
339 } 343 }
340 344
341 iov_data_len = ret; 345 iov_data_len = ret;
@@ -358,7 +362,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
358 GFP_KERNEL); 362 GFP_KERNEL);
359 363
360 if (ret) 364 if (ret)
361 goto out; 365 goto out_free_cdb;
362 366
363 bio = rq->bio; 367 bio = rq->bio;
364 memset(sense, 0, sizeof(sense)); 368 memset(sense, 0, sizeof(sense));
@@ -376,9 +380,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
376 380
377 hdr->duration = jiffies_to_msecs(jiffies - start_time); 381 hdr->duration = jiffies_to_msecs(jiffies - start_time);
378 382
379 return blk_complete_sghdr_rq(rq, hdr, bio); 383 ret = blk_complete_sghdr_rq(rq, hdr, bio);
380out: 384
385out_free_cdb:
386 if (rq->cmd != rq->__cmd)
387 kfree(rq->cmd);
388out_put_request:
381 blk_put_request(rq); 389 blk_put_request(rq);
390out:
382 return ret; 391 return ret;
383} 392}
384 393
@@ -448,6 +457,11 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
448 } 457 }
449 458
450 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); 459 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
460 if (!rq) {
461 err = -ENOMEM;
462 goto error;
463 }
464 blk_rq_set_block_pc(rq);
451 465
452 cmdlen = COMMAND_SIZE(opcode); 466 cmdlen = COMMAND_SIZE(opcode);
453 467
@@ -501,7 +515,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
501 memset(sense, 0, sizeof(sense)); 515 memset(sense, 0, sizeof(sense));
502 rq->sense = sense; 516 rq->sense = sense;
503 rq->sense_len = 0; 517 rq->sense_len = 0;
504 blk_rq_set_block_pc(rq);
505 518
506 blk_execute_rq(q, disk, rq, 0); 519 blk_execute_rq(q, disk, rq, 0);
507 520
@@ -521,7 +534,8 @@ out:
521 534
522error: 535error:
523 kfree(buffer); 536 kfree(buffer);
524 blk_put_request(rq); 537 if (rq)
538 blk_put_request(rq);
525 return err; 539 return err;
526} 540}
527EXPORT_SYMBOL_GPL(sg_scsi_ioctl); 541EXPORT_SYMBOL_GPL(sg_scsi_ioctl);