Merge tag 'for-linus-20180204' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe: "Most of this is fixes and not new code/features: - skd fix from Arnd, fixing a build error dependent on sla allocator type. - blk-mq scheduler discard merging fixes, one from me and one from Keith. This fixes a segment miscalculation for blk-mq-sched, where we mistakenly think two segments are physically contigious even though the request isn't carrying real data. Also fixes a bio-to-rq merge case. - Don't re-set a bit on the buffer_head flags, if it's already set. This can cause scalability concerns on bigger machines and workloads. From Kemi Wang. - Add BLK_STS_DEV_RESOURCE return value to blk-mq, allowing us to distuingish between a local (device related) resource starvation and a global one. The latter might happen without IO being in flight, so it has to be handled a bit differently. From Ming" * tag 'for-linus-20180204' of git://git.kernel.dk/linux-block: block: skd: fix incorrect linux/slab_def.h inclusion buffer: Avoid setting buffer bits that are already set blk-mq-sched: Enable merging discard bio into request blk-mq: fix discard merge with scheduler attached blk-mq: introduce BLK_STS_DEV_RESOURCE
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-04 14:16:35 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-04 14:16:35 -0500
commit: 64b28683deba132f301d1cecfc25c32e295f53a1 (patch)
tree: be38a4e77c530fb129339f983a9d307c60312df8 /block
parent: d3658c2266012f270da52e3e0365536e394bd3bd (diff)
parent: 1d51877578799bfe0fcfe189d8233c9fccf05931 (diff)
4 files changed, 47 insertions, 7 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index a2005a485335..d0d104268f1a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -145,6 +145,7 @@ static const struct {
        [BLK_STS_MEDIUM]        = { -ENODATA,   "critical medium" },
        [BLK_STS_PROTECTION]    = { -EILSEQ,    "protection" },
        [BLK_STS_RESOURCE]      = { -ENOMEM,    "kernel resource" },
+        [BLK_STS_DEV_RESOURCE]  = { -EBUSY,     "device resource" },
        [BLK_STS_AGAIN]         = { -EAGAIN,    "nonblocking retry" },
        /* device mapper special case, should not leak out: */
@@ -3282,6 +3283,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 {
        if (bio_has_data(bio))
                rq->nr_phys_segments = bio_phys_segments(q, bio);
+        else if (bio_op(bio) == REQ_OP_DISCARD)
+                rq->nr_phys_segments = 1;
        rq->__data_len = bio->bi_iter.bi_size;
        rq->bio = rq->biotail = bio;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8452fc7164cc..782940c65d8a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -550,6 +550,24 @@ static bool req_no_special_merge(struct request *req)
        return !q->mq_ops && req->special;
 }
+static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
+                struct request *next)
+{
+        unsigned short segments = blk_rq_nr_discard_segments(req);
+        if (segments >= queue_max_discard_segments(q))
+                goto no_merge;
+        if (blk_rq_sectors(req) + bio_sectors(next->bio) >
+            blk_rq_get_max_sectors(req, blk_rq_pos(req)))
+                goto no_merge;
+        req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
+        return true;
+no_merge:
+        req_set_nomerge(q, req);
+        return false;
+}
 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
                                struct request *next)
 {
@@ -683,9 +701,13 @@ static struct request *attempt_merge(struct request_queue *q,
         * If we are allowed to merge, then append bio list
         * from next to rq and release next. merge_requests_fn
         * will have updated segment counts, update sector
-         * counts here.
+         * counts here. Handle DISCARDs separately, as they
+         * have separate settings.
         */
-        if (!ll_merge_requests_fn(q, req, next))
+        if (req_op(req) == REQ_OP_DISCARD) {
+                if (!req_attempt_discard_merge(q, req, next))
+                        return NULL;
+        } else if (!ll_merge_requests_fn(q, req, next))
                return NULL;
        /*
@@ -715,7 +737,8 @@ static struct request *attempt_merge(struct request_queue *q,
        req->__data_len += blk_rq_bytes(next);
-        elv_merge_requests(q, req, next);
+        if (req_op(req) != REQ_OP_DISCARD)
+                elv_merge_requests(q, req, next);
        /*
         * 'next' is going away, so update stats accordingly
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 55c0a745b427..25c14c58385c 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -259,6 +259,8 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
                if (!*merged_request)
                        elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
                return true;
+        case ELEVATOR_DISCARD_MERGE:
+                return bio_attempt_discard_merge(q, rq, bio);
        default:
                return false;
        }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 01f271d40825..df93102e2149 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1162,6 +1162,8 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
        return true;
 }
+#define BLK_MQ_RESOURCE_DELAY   3               /* ms units */
 bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                             bool got_budget)
 {
@@ -1169,6 +1171,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
        struct request *rq, *nxt;
        bool no_tag = false;
        int errors, queued;
+        blk_status_t ret = BLK_STS_OK;
        if (list_empty(list))
                return false;
@@ -1181,7 +1184,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
        errors = queued = 0;
        do {
                struct blk_mq_queue_data bd;
-                blk_status_t ret;
                rq = list_first_entry(list, struct request, queuelist);
                if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1226,7 +1228,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                }
                ret = q->mq_ops->queue_rq(hctx, &bd);
-                if (ret == BLK_STS_RESOURCE) {
+                if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
                        /*
                         * If an I/O scheduler has been configured and we got a
                         * driver tag for the next request already, free it
@@ -1257,6 +1259,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
         * that is where we will continue on next queue run.
         */
        if (!list_empty(list)) {
+                bool needs_restart;
                spin_lock(&hctx->lock);
                list_splice_init(list, &hctx->dispatch);
                spin_unlock(&hctx->lock);
@@ -1280,10 +1284,17 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 * - Some but not all block drivers stop a queue before
                 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
                 *   and dm-rq.
+                 *
+                 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
+                 * bit is set, run queue after a delay to avoid IO stalls
+                 * that could otherwise occur if the queue is idle.
                 */
-                if (!blk_mq_sched_needs_restart(hctx) ||
+                needs_restart = blk_mq_sched_needs_restart(hctx);
+                if (!needs_restart ||
                    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
                        blk_mq_run_hw_queue(hctx, true);
+                else if (needs_restart && (ret == BLK_STS_RESOURCE))
+                        blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
        }
        return (queued + errors) != 0;
@@ -1764,6 +1775,7 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
                *cookie = new_cookie;
                break;
        case BLK_STS_RESOURCE:
+        case BLK_STS_DEV_RESOURCE:
                __blk_mq_requeue_request(rq);
                break;
        default:
@@ -1826,7 +1838,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
        hctx_lock(hctx, &srcu_idx);
        ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
-        if (ret == BLK_STS_RESOURCE)
+        if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
                blk_mq_sched_insert_request(rq, false, true, false);
        else if (ret != BLK_STS_OK)
                blk_mq_end_request(rq, ret);
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-04 14:16:35 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-04 14:16:35 -0500
commit	64b28683deba132f301d1cecfc25c32e295f53a1 (patch)
tree	be38a4e77c530fb129339f983a9d307c60312df8 /block
parent	d3658c2266012f270da52e3e0365536e394bd3bd (diff)
parent	1d51877578799bfe0fcfe189d8233c9fccf05931 (diff)