summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-26 13:32:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-26 13:32:12 -0400
commit04412819652fe30f900d11e96c67b4adfdf17f6b (patch)
treeaed86baef3fd65e6990484a00514f0594d1fdd6c
parent750c930b085ba56cfac3649e8e0dff72a8c5f8a5 (diff)
parent9c0b2596f2ac30967af0b8bb9f038b65926a6f00 (diff)
Merge tag 'for-linus-20190726' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - Several io_uring fixes/improvements: - Blocking fix for O_DIRECT (me) - Latter page slowness for registered buffers (me) - Fix poll hang under certain conditions (me) - Defer sequence check fix for wrapped rings (Zhengyuan) - Mismatch in async inc/dec accounting (Zhengyuan) - Memory ordering issue that could cause stall (Zhengyuan) - Track sequential defer in bytes, not pages (Zhengyuan) - NVMe pull request from Christoph - Set of hang fixes for wbt (Josef) - Redundant error message kill for libahci (Ding) - Remove unused blk_mq_sched_started_request() and related ops (Marcos) - drbd dynamic alloc shash descriptor to reduce stack use (Arnd) - blkcg ->pd_stat() non-debug print (Tejun) - bcache memory leak fix (Wei) - Comment fix (Akinobu) - BFQ perf regression fix (Paolo) * tag 'for-linus-20190726' of git://git.kernel.dk/linux-block: (24 commits) io_uring: ensure ->list is initialized for poll commands Revert "nvme-pci: don't create a read hctx mapping without read queues" nvme: fix multipath crash when ANA is deactivated nvme: fix memory leak caused by incorrect subsystem free nvme: ignore subnqn for ADATA SX6000LNP drbd: dynamically allocate shash descriptor block: blk-mq: Remove blk_mq_sched_started_request and started_request bcache: fix possible memory leak in bch_cached_dev_run() io_uring: track io length in async_list based on bytes io_uring: don't use iov_iter_advance() for fixed buffers block: properly handle IOCB_NOWAIT for async O_DIRECT IO blk-mq: allow REQ_NOWAIT to return an error inline io_uring: add a memory barrier before atomic_read rq-qos: use a mb for got_token rq-qos: set ourself TASK_UNINTERRUPTIBLE after we schedule rq-qos: don't reset has_sleepers on spurious wakeups rq-qos: fix missed wake-ups in rq_qos_throttle wait: add wq_has_single_sleeper helper block, bfq: check also in-flight I/O in dispatch plugging block: fix sysfs module parameters directory path in comment ...
-rw-r--r--block/bfq-iosched.c67
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-iolatency.c3
-rw-r--r--block/blk-mq-sched.h9
-rw-r--r--block/blk-mq.c10
-rw-r--r--block/blk-rq-qos.c7
-rw-r--r--block/genhd.c2
-rw-r--r--drivers/ata/libahci_platform.c1
-rw-r--r--drivers/block/drbd/drbd_receiver.c14
-rw-r--r--drivers/md/bcache/super.c3
-rw-r--r--drivers/nvme/host/core.c12
-rw-r--r--drivers/nvme/host/multipath.c8
-rw-r--r--drivers/nvme/host/nvme.h6
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--fs/block_dev.c58
-rw-r--r--fs/io_uring.c81
-rw-r--r--include/linux/blk-cgroup.h1
-rw-r--r--include/linux/blk_types.h5
-rw-r--r--include/linux/elevator.h1
-rw-r--r--include/linux/wait.h13
20 files changed, 224 insertions, 92 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 72860325245a..586fcfe227ea 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3354,38 +3354,57 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
3354 * there is no active group, then the primary expectation for 3354 * there is no active group, then the primary expectation for
3355 * this device is probably a high throughput. 3355 * this device is probably a high throughput.
3356 * 3356 *
3357 * We are now left only with explaining the additional 3357 * We are now left only with explaining the two sub-conditions in the
3358 * compound condition that is checked below for deciding 3358 * additional compound condition that is checked below for deciding
3359 * whether the scenario is asymmetric. To explain this 3359 * whether the scenario is asymmetric. To explain the first
3360 * compound condition, we need to add that the function 3360 * sub-condition, we need to add that the function
3361 * bfq_asymmetric_scenario checks the weights of only 3361 * bfq_asymmetric_scenario checks the weights of only
3362 * non-weight-raised queues, for efficiency reasons (see 3362 * non-weight-raised queues, for efficiency reasons (see comments on
3363 * comments on bfq_weights_tree_add()). Then the fact that 3363 * bfq_weights_tree_add()). Then the fact that bfqq is weight-raised
3364 * bfqq is weight-raised is checked explicitly here. More 3364 * is checked explicitly here. More precisely, the compound condition
3365 * precisely, the compound condition below takes into account 3365 * below takes into account also the fact that, even if bfqq is being
3366 * also the fact that, even if bfqq is being weight-raised, 3366 * weight-raised, the scenario is still symmetric if all queues with
3367 * the scenario is still symmetric if all queues with requests 3367 * requests waiting for completion happen to be
3368 * waiting for completion happen to be 3368 * weight-raised. Actually, we should be even more precise here, and
3369 * weight-raised. Actually, we should be even more precise 3369 * differentiate between interactive weight raising and soft real-time
3370 * here, and differentiate between interactive weight raising 3370 * weight raising.
3371 * and soft real-time weight raising. 3371 *
3372 * The second sub-condition checked in the compound condition is
3373 * whether there is a fair amount of already in-flight I/O not
3374 * belonging to bfqq. If so, I/O dispatching is to be plugged, for the
3375 * following reason. The drive may decide to serve in-flight
3376 * non-bfqq's I/O requests before bfqq's ones, thereby delaying the
3377 * arrival of new I/O requests for bfqq (recall that bfqq is sync). If
3378 * I/O-dispatching is not plugged, then, while bfqq remains empty, a
3379 * basically uncontrolled amount of I/O from other queues may be
3380 * dispatched too, possibly causing the service of bfqq's I/O to be
3381 * delayed even longer in the drive. This problem gets more and more
3382 * serious as the speed and the queue depth of the drive grow,
3383 * because, as these two quantities grow, the probability to find no
3384 * queue busy but many requests in flight grows too. By contrast,
3385 * plugging I/O dispatching minimizes the delay induced by already
3386 * in-flight I/O, and enables bfqq to recover the bandwidth it may
3387 * lose because of this delay.
3372 * 3388 *
3373 * As a side note, it is worth considering that the above 3389 * As a side note, it is worth considering that the above
3374 * device-idling countermeasures may however fail in the 3390 * device-idling countermeasures may however fail in the following
3375 * following unlucky scenario: if idling is (correctly) 3391 * unlucky scenario: if I/O-dispatch plugging is (correctly) disabled
3376 * disabled in a time period during which all symmetry 3392 * in a time period during which all symmetry sub-conditions hold, and
3377 * sub-conditions hold, and hence the device is allowed to 3393 * therefore the device is allowed to enqueue many requests, but at
3378 * enqueue many requests, but at some later point in time some 3394 * some later point in time some sub-condition stops to hold, then it
3379 * sub-condition stops to hold, then it may become impossible 3395 * may become impossible to make requests be served in the desired
3380 * to let requests be served in the desired order until all 3396 * order until all the requests already queued in the device have been
3381 * the requests already queued in the device have been served. 3397 * served. The last sub-condition commented above somewhat mitigates
3398 * this problem for weight-raised queues.
3382 */ 3399 */
3383static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, 3400static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd,
3384 struct bfq_queue *bfqq) 3401 struct bfq_queue *bfqq)
3385{ 3402{
3386 return (bfqq->wr_coeff > 1 && 3403 return (bfqq->wr_coeff > 1 &&
3387 bfqd->wr_busy_queues < 3404 (bfqd->wr_busy_queues <
3388 bfq_tot_busy_queues(bfqd)) || 3405 bfq_tot_busy_queues(bfqd) ||
3406 bfqd->rq_in_driver >=
3407 bfqq->dispatched + 4)) ||
3389 bfq_asymmetric_scenario(bfqd, bfqq); 3408 bfq_asymmetric_scenario(bfqd, bfqq);
3390} 3409}
3391 3410
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 24ed26957367..55a7dc227dfb 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -54,7 +54,7 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
54 54
55static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ 55static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
56 56
57static bool blkcg_debug_stats = false; 57bool blkcg_debug_stats = false;
58static struct workqueue_struct *blkcg_punt_bio_wq; 58static struct workqueue_struct *blkcg_punt_bio_wq;
59 59
60static bool blkcg_policy_enabled(struct request_queue *q, 60static bool blkcg_policy_enabled(struct request_queue *q,
@@ -944,10 +944,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
944 dbytes, dios); 944 dbytes, dios);
945 } 945 }
946 946
947 if (!blkcg_debug_stats) 947 if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
948 goto next;
949
950 if (atomic_read(&blkg->use_delay)) {
951 has_stats = true; 948 has_stats = true;
952 off += scnprintf(buf+off, size-off, 949 off += scnprintf(buf+off, size-off,
953 " use_delay=%d delay_nsec=%llu", 950 " use_delay=%d delay_nsec=%llu",
@@ -967,7 +964,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
967 has_stats = true; 964 has_stats = true;
968 off += written; 965 off += written;
969 } 966 }
970next: 967
971 if (has_stats) { 968 if (has_stats) {
972 if (off < size - 1) { 969 if (off < size - 1) {
973 off += scnprintf(buf+off, size-off, "\n"); 970 off += scnprintf(buf+off, size-off, "\n");
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index d973c38ee4fd..0fff7b56df0e 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -917,6 +917,9 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
917 unsigned long long avg_lat; 917 unsigned long long avg_lat;
918 unsigned long long cur_win; 918 unsigned long long cur_win;
919 919
920 if (!blkcg_debug_stats)
921 return 0;
922
920 if (iolat->ssd) 923 if (iolat->ssd)
921 return iolatency_ssd_stat(iolat, buf, size); 924 return iolatency_ssd_stat(iolat, buf, size);
922 925
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index cf22ab00fefb..126021fc3a11 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -61,15 +61,6 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
61 e->type->ops.completed_request(rq, now); 61 e->type->ops.completed_request(rq, now);
62} 62}
63 63
64static inline void blk_mq_sched_started_request(struct request *rq)
65{
66 struct request_queue *q = rq->q;
67 struct elevator_queue *e = q->elevator;
68
69 if (e && e->type->ops.started_request)
70 e->type->ops.started_request(rq);
71}
72
73static inline void blk_mq_sched_requeue_request(struct request *rq) 64static inline void blk_mq_sched_requeue_request(struct request *rq)
74{ 65{
75 struct request_queue *q = rq->q; 66 struct request_queue *q = rq->q;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b038ec680e84..f78d3287dd82 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -669,8 +669,6 @@ void blk_mq_start_request(struct request *rq)
669{ 669{
670 struct request_queue *q = rq->q; 670 struct request_queue *q = rq->q;
671 671
672 blk_mq_sched_started_request(rq);
673
674 trace_block_rq_issue(q, rq); 672 trace_block_rq_issue(q, rq);
675 673
676 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { 674 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
@@ -1960,9 +1958,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1960 rq = blk_mq_get_request(q, bio, &data); 1958 rq = blk_mq_get_request(q, bio, &data);
1961 if (unlikely(!rq)) { 1959 if (unlikely(!rq)) {
1962 rq_qos_cleanup(q, bio); 1960 rq_qos_cleanup(q, bio);
1963 if (bio->bi_opf & REQ_NOWAIT) 1961
1962 cookie = BLK_QC_T_NONE;
1963 if (bio->bi_opf & REQ_NOWAIT_INLINE)
1964 cookie = BLK_QC_T_EAGAIN;
1965 else if (bio->bi_opf & REQ_NOWAIT)
1964 bio_wouldblock_error(bio); 1966 bio_wouldblock_error(bio);
1965 return BLK_QC_T_NONE; 1967 return cookie;
1966 } 1968 }
1967 1969
1968 trace_block_getrq(q, bio, bio->bi_opf); 1970 trace_block_getrq(q, bio, bio->bi_opf);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 659ccb8b693f..3954c0dc1443 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -202,6 +202,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
202 return -1; 202 return -1;
203 203
204 data->got_token = true; 204 data->got_token = true;
205 smp_wmb();
205 list_del_init(&curr->entry); 206 list_del_init(&curr->entry);
206 wake_up_process(data->task); 207 wake_up_process(data->task);
207 return 1; 208 return 1;
@@ -244,7 +245,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
244 return; 245 return;
245 246
246 prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); 247 prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
248 has_sleeper = !wq_has_single_sleeper(&rqw->wait);
247 do { 249 do {
250 /* The memory barrier in set_task_state saves us here. */
248 if (data.got_token) 251 if (data.got_token)
249 break; 252 break;
250 if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { 253 if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
@@ -255,12 +258,14 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
255 * which means we now have two. Put our local token 258 * which means we now have two. Put our local token
256 * and wake anyone else potentially waiting for one. 259 * and wake anyone else potentially waiting for one.
257 */ 260 */
261 smp_rmb();
258 if (data.got_token) 262 if (data.got_token)
259 cleanup_cb(rqw, private_data); 263 cleanup_cb(rqw, private_data);
260 break; 264 break;
261 } 265 }
262 io_schedule(); 266 io_schedule();
263 has_sleeper = false; 267 has_sleeper = true;
268 set_current_state(TASK_UNINTERRUPTIBLE);
264 } while (1); 269 } while (1);
265 finish_wait(&rqw->wait, &data.wq); 270 finish_wait(&rqw->wait, &data.wq);
266} 271}
diff --git a/block/genhd.c b/block/genhd.c
index 97887e59f3b2..54f1f0d381f4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1969,7 +1969,7 @@ static const struct attribute *disk_events_attrs[] = {
1969 * The default polling interval can be specified by the kernel 1969 * The default polling interval can be specified by the kernel
1970 * parameter block.events_dfl_poll_msecs which defaults to 0 1970 * parameter block.events_dfl_poll_msecs which defaults to 0
1971 * (disable). This can also be modified runtime by writing to 1971 * (disable). This can also be modified runtime by writing to
1972 * /sys/module/block/events_dfl_poll_msecs. 1972 * /sys/module/block/parameters/events_dfl_poll_msecs.
1973 */ 1973 */
1974static int disk_events_set_dfl_poll_msecs(const char *val, 1974static int disk_events_set_dfl_poll_msecs(const char *val,
1975 const struct kernel_param *kp) 1975 const struct kernel_param *kp)
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 72312ad2e142..3a36e76eca83 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -408,7 +408,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
408 hpriv->mmio = devm_ioremap_resource(dev, 408 hpriv->mmio = devm_ioremap_resource(dev,
409 platform_get_resource(pdev, IORESOURCE_MEM, 0)); 409 platform_get_resource(pdev, IORESOURCE_MEM, 0));
410 if (IS_ERR(hpriv->mmio)) { 410 if (IS_ERR(hpriv->mmio)) {
411 dev_err(dev, "no mmio space\n");
412 rc = PTR_ERR(hpriv->mmio); 411 rc = PTR_ERR(hpriv->mmio);
413 goto err_out; 412 goto err_out;
414 } 413 }
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 90ebfcae0ce6..2b3103c30857 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -5417,7 +5417,7 @@ static int drbd_do_auth(struct drbd_connection *connection)
5417 unsigned int key_len; 5417 unsigned int key_len;
5418 char secret[SHARED_SECRET_MAX]; /* 64 byte */ 5418 char secret[SHARED_SECRET_MAX]; /* 64 byte */
5419 unsigned int resp_size; 5419 unsigned int resp_size;
5420 SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm); 5420 struct shash_desc *desc;
5421 struct packet_info pi; 5421 struct packet_info pi;
5422 struct net_conf *nc; 5422 struct net_conf *nc;
5423 int err, rv; 5423 int err, rv;
@@ -5430,6 +5430,13 @@ static int drbd_do_auth(struct drbd_connection *connection)
5430 memcpy(secret, nc->shared_secret, key_len); 5430 memcpy(secret, nc->shared_secret, key_len);
5431 rcu_read_unlock(); 5431 rcu_read_unlock();
5432 5432
5433 desc = kmalloc(sizeof(struct shash_desc) +
5434 crypto_shash_descsize(connection->cram_hmac_tfm),
5435 GFP_KERNEL);
5436 if (!desc) {
5437 rv = -1;
5438 goto fail;
5439 }
5433 desc->tfm = connection->cram_hmac_tfm; 5440 desc->tfm = connection->cram_hmac_tfm;
5434 5441
5435 rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); 5442 rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
@@ -5571,7 +5578,10 @@ static int drbd_do_auth(struct drbd_connection *connection)
5571 kfree(peers_ch); 5578 kfree(peers_ch);
5572 kfree(response); 5579 kfree(response);
5573 kfree(right_response); 5580 kfree(right_response);
5574 shash_desc_zero(desc); 5581 if (desc) {
5582 shash_desc_zero(desc);
5583 kfree(desc);
5584 }
5575 5585
5576 return rv; 5586 return rv;
5577} 5587}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 26e374fbf57c..20ed838e9413 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -931,6 +931,9 @@ int bch_cached_dev_run(struct cached_dev *dc)
931 if (dc->io_disable) { 931 if (dc->io_disable) {
932 pr_err("I/O disabled on cached dev %s", 932 pr_err("I/O disabled on cached dev %s",
933 dc->backing_dev_name); 933 dc->backing_dev_name);
934 kfree(env[1]);
935 kfree(env[2]);
936 kfree(buf);
934 return -EIO; 937 return -EIO;
935 } 938 }
936 939
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cc09b81fc7f4..8f3fbe5ca937 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2311,17 +2311,15 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
2311 memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off); 2311 memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off);
2312} 2312}
2313 2313
2314static void __nvme_release_subsystem(struct nvme_subsystem *subsys) 2314static void nvme_release_subsystem(struct device *dev)
2315{ 2315{
2316 struct nvme_subsystem *subsys =
2317 container_of(dev, struct nvme_subsystem, dev);
2318
2316 ida_simple_remove(&nvme_subsystems_ida, subsys->instance); 2319 ida_simple_remove(&nvme_subsystems_ida, subsys->instance);
2317 kfree(subsys); 2320 kfree(subsys);
2318} 2321}
2319 2322
2320static void nvme_release_subsystem(struct device *dev)
2321{
2322 __nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev));
2323}
2324
2325static void nvme_destroy_subsystem(struct kref *ref) 2323static void nvme_destroy_subsystem(struct kref *ref)
2326{ 2324{
2327 struct nvme_subsystem *subsys = 2325 struct nvme_subsystem *subsys =
@@ -2477,7 +2475,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
2477 mutex_lock(&nvme_subsystems_lock); 2475 mutex_lock(&nvme_subsystems_lock);
2478 found = __nvme_find_get_subsystem(subsys->subnqn); 2476 found = __nvme_find_get_subsystem(subsys->subnqn);
2479 if (found) { 2477 if (found) {
2480 __nvme_release_subsystem(subsys); 2478 put_device(&subsys->dev);
2481 subsys = found; 2479 subsys = found;
2482 2480
2483 if (!nvme_validate_cntlid(subsys, ctrl, id)) { 2481 if (!nvme_validate_cntlid(subsys, ctrl, id)) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a9a927677970..4f0d0d12744e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -12,11 +12,6 @@ module_param(multipath, bool, 0444);
12MODULE_PARM_DESC(multipath, 12MODULE_PARM_DESC(multipath,
13 "turn on native support for multiple controllers per subsystem"); 13 "turn on native support for multiple controllers per subsystem");
14 14
15inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
16{
17 return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3));
18}
19
20/* 15/*
21 * If multipathing is enabled we need to always use the subsystem instance 16 * If multipathing is enabled we need to always use the subsystem instance
22 * number for numbering our devices to avoid conflicts between subsystems that 17 * number for numbering our devices to avoid conflicts between subsystems that
@@ -622,7 +617,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
622{ 617{
623 int error; 618 int error;
624 619
625 if (!nvme_ctrl_use_ana(ctrl)) 620 /* check if multipath is enabled and we have the capability */
621 if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
626 return 0; 622 return 0;
627 623
628 ctrl->anacap = id->anacap; 624 ctrl->anacap = id->anacap;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 716a876119c8..26b563f9985b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -485,7 +485,11 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[];
485extern const struct block_device_operations nvme_ns_head_ops; 485extern const struct block_device_operations nvme_ns_head_ops;
486 486
487#ifdef CONFIG_NVME_MULTIPATH 487#ifdef CONFIG_NVME_MULTIPATH
488bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl); 488static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
489{
490 return ctrl->ana_log_buf != NULL;
491}
492
489void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, 493void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
490 struct nvme_ctrl *ctrl, int *flags); 494 struct nvme_ctrl *ctrl, int *flags);
491void nvme_failover_req(struct request *req); 495void nvme_failover_req(struct request *req);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bb970ca82517..db160cee42ad 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2254,9 +2254,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
2254 if (!dev->ctrl.tagset) { 2254 if (!dev->ctrl.tagset) {
2255 dev->tagset.ops = &nvme_mq_ops; 2255 dev->tagset.ops = &nvme_mq_ops;
2256 dev->tagset.nr_hw_queues = dev->online_queues - 1; 2256 dev->tagset.nr_hw_queues = dev->online_queues - 1;
2257 dev->tagset.nr_maps = 1; /* default */ 2257 dev->tagset.nr_maps = 2; /* default + read */
2258 if (dev->io_queues[HCTX_TYPE_READ])
2259 dev->tagset.nr_maps++;
2260 if (dev->io_queues[HCTX_TYPE_POLL]) 2258 if (dev->io_queues[HCTX_TYPE_POLL])
2261 dev->tagset.nr_maps++; 2259 dev->tagset.nr_maps++;
2262 dev->tagset.timeout = NVME_IO_TIMEOUT; 2260 dev->tagset.timeout = NVME_IO_TIMEOUT;
@@ -3029,6 +3027,8 @@ static const struct pci_device_id nvme_id_table[] = {
3029 .driver_data = NVME_QUIRK_LIGHTNVM, }, 3027 .driver_data = NVME_QUIRK_LIGHTNVM, },
3030 { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ 3028 { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
3031 .driver_data = NVME_QUIRK_LIGHTNVM, }, 3029 .driver_data = NVME_QUIRK_LIGHTNVM, },
3030 { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
3031 .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
3032 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, 3032 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
3033 { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, 3033 { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
3034 { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, 3034 { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4707dfff991b..c2a85b587922 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -345,15 +345,24 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
345 struct bio *bio; 345 struct bio *bio;
346 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; 346 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
347 bool is_read = (iov_iter_rw(iter) == READ), is_sync; 347 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
348 bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
348 loff_t pos = iocb->ki_pos; 349 loff_t pos = iocb->ki_pos;
349 blk_qc_t qc = BLK_QC_T_NONE; 350 blk_qc_t qc = BLK_QC_T_NONE;
350 int ret = 0; 351 gfp_t gfp;
352 ssize_t ret;
351 353
352 if ((pos | iov_iter_alignment(iter)) & 354 if ((pos | iov_iter_alignment(iter)) &
353 (bdev_logical_block_size(bdev) - 1)) 355 (bdev_logical_block_size(bdev) - 1))
354 return -EINVAL; 356 return -EINVAL;
355 357
356 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); 358 if (nowait)
359 gfp = GFP_NOWAIT;
360 else
361 gfp = GFP_KERNEL;
362
363 bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
364 if (!bio)
365 return -EAGAIN;
357 366
358 dio = container_of(bio, struct blkdev_dio, bio); 367 dio = container_of(bio, struct blkdev_dio, bio);
359 dio->is_sync = is_sync = is_sync_kiocb(iocb); 368 dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -375,7 +384,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
375 if (!is_poll) 384 if (!is_poll)
376 blk_start_plug(&plug); 385 blk_start_plug(&plug);
377 386
387 ret = 0;
378 for (;;) { 388 for (;;) {
389 int err;
390
379 bio_set_dev(bio, bdev); 391 bio_set_dev(bio, bdev);
380 bio->bi_iter.bi_sector = pos >> 9; 392 bio->bi_iter.bi_sector = pos >> 9;
381 bio->bi_write_hint = iocb->ki_hint; 393 bio->bi_write_hint = iocb->ki_hint;
@@ -383,8 +395,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
383 bio->bi_end_io = blkdev_bio_end_io; 395 bio->bi_end_io = blkdev_bio_end_io;
384 bio->bi_ioprio = iocb->ki_ioprio; 396 bio->bi_ioprio = iocb->ki_ioprio;
385 397
386 ret = bio_iov_iter_get_pages(bio, iter); 398 err = bio_iov_iter_get_pages(bio, iter);
387 if (unlikely(ret)) { 399 if (unlikely(err)) {
400 if (!ret)
401 ret = err;
388 bio->bi_status = BLK_STS_IOERR; 402 bio->bi_status = BLK_STS_IOERR;
389 bio_endio(bio); 403 bio_endio(bio);
390 break; 404 break;
@@ -399,6 +413,14 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
399 task_io_account_write(bio->bi_iter.bi_size); 413 task_io_account_write(bio->bi_iter.bi_size);
400 } 414 }
401 415
416 /*
417 * Tell underlying layer to not block for resource shortage.
418 * And if we would have blocked, return error inline instead
419 * of through the bio->bi_end_io() callback.
420 */
421 if (nowait)
422 bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
423
402 dio->size += bio->bi_iter.bi_size; 424 dio->size += bio->bi_iter.bi_size;
403 pos += bio->bi_iter.bi_size; 425 pos += bio->bi_iter.bi_size;
404 426
@@ -412,6 +434,11 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
412 } 434 }
413 435
414 qc = submit_bio(bio); 436 qc = submit_bio(bio);
437 if (qc == BLK_QC_T_EAGAIN) {
438 if (!ret)
439 ret = -EAGAIN;
440 goto error;
441 }
415 442
416 if (polled) 443 if (polled)
417 WRITE_ONCE(iocb->ki_cookie, qc); 444 WRITE_ONCE(iocb->ki_cookie, qc);
@@ -432,8 +459,20 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
432 atomic_inc(&dio->ref); 459 atomic_inc(&dio->ref);
433 } 460 }
434 461
435 submit_bio(bio); 462 qc = submit_bio(bio);
436 bio = bio_alloc(GFP_KERNEL, nr_pages); 463 if (qc == BLK_QC_T_EAGAIN) {
464 if (!ret)
465 ret = -EAGAIN;
466 goto error;
467 }
468 ret += bio->bi_iter.bi_size;
469
470 bio = bio_alloc(gfp, nr_pages);
471 if (!bio) {
472 if (!ret)
473 ret = -EAGAIN;
474 goto error;
475 }
437 } 476 }
438 477
439 if (!is_poll) 478 if (!is_poll)
@@ -453,13 +492,16 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
453 } 492 }
454 __set_current_state(TASK_RUNNING); 493 __set_current_state(TASK_RUNNING);
455 494
495out:
456 if (!ret) 496 if (!ret)
457 ret = blk_status_to_errno(dio->bio.bi_status); 497 ret = blk_status_to_errno(dio->bio.bi_status);
458 if (likely(!ret))
459 ret = dio->size;
460 498
461 bio_put(&dio->bio); 499 bio_put(&dio->bio);
462 return ret; 500 return ret;
501error:
502 if (!is_poll)
503 blk_finish_plug(&plug);
504 goto out;
463} 505}
464 506
465static ssize_t 507static ssize_t
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e2a66e12fbc6..012bc0efb9d3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -202,7 +202,7 @@ struct async_list {
202 202
203 struct file *file; 203 struct file *file;
204 off_t io_end; 204 off_t io_end;
205 size_t io_pages; 205 size_t io_len;
206}; 206};
207 207
208struct io_ring_ctx { 208struct io_ring_ctx {
@@ -333,7 +333,8 @@ struct io_kiocb {
333#define REQ_F_IO_DRAIN 16 /* drain existing IO first */ 333#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
334#define REQ_F_IO_DRAINED 32 /* drain done */ 334#define REQ_F_IO_DRAINED 32 /* drain done */
335#define REQ_F_LINK 64 /* linked sqes */ 335#define REQ_F_LINK 64 /* linked sqes */
336#define REQ_F_FAIL_LINK 128 /* fail rest of links */ 336#define REQ_F_LINK_DONE 128 /* linked sqes done */
337#define REQ_F_FAIL_LINK 256 /* fail rest of links */
337 u64 user_data; 338 u64 user_data;
338 u32 result; 339 u32 result;
339 u32 sequence; 340 u32 sequence;
@@ -429,7 +430,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
429 if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) 430 if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
430 return false; 431 return false;
431 432
432 return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped; 433 return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped;
433} 434}
434 435
435static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) 436static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
@@ -632,6 +633,7 @@ static void io_req_link_next(struct io_kiocb *req)
632 nxt->flags |= REQ_F_LINK; 633 nxt->flags |= REQ_F_LINK;
633 } 634 }
634 635
636 nxt->flags |= REQ_F_LINK_DONE;
635 INIT_WORK(&nxt->work, io_sq_wq_submit_work); 637 INIT_WORK(&nxt->work, io_sq_wq_submit_work);
636 queue_work(req->ctx->sqo_wq, &nxt->work); 638 queue_work(req->ctx->sqo_wq, &nxt->work);
637 } 639 }
@@ -1064,8 +1066,44 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
1064 */ 1066 */
1065 offset = buf_addr - imu->ubuf; 1067 offset = buf_addr - imu->ubuf;
1066 iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); 1068 iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
1067 if (offset) 1069
1068 iov_iter_advance(iter, offset); 1070 if (offset) {
1071 /*
1072 * Don't use iov_iter_advance() here, as it's really slow for
1073 * using the latter parts of a big fixed buffer - it iterates
1074 * over each segment manually. We can cheat a bit here, because
1075 * we know that:
1076 *
1077 * 1) it's a BVEC iter, we set it up
1078 * 2) all bvecs are PAGE_SIZE in size, except potentially the
1079 * first and last bvec
1080 *
1081 * So just find our index, and adjust the iterator afterwards.
1082 * If the offset is within the first bvec (or the whole first
1083 * bvec, just use iov_iter_advance(). This makes it easier
1084 * since we can just skip the first segment, which may not
1085 * be PAGE_SIZE aligned.
1086 */
1087 const struct bio_vec *bvec = imu->bvec;
1088
1089 if (offset <= bvec->bv_len) {
1090 iov_iter_advance(iter, offset);
1091 } else {
1092 unsigned long seg_skip;
1093
1094 /* skip first vec */
1095 offset -= bvec->bv_len;
1096 seg_skip = 1 + (offset >> PAGE_SHIFT);
1097
1098 iter->bvec = bvec + seg_skip;
1099 iter->nr_segs -= seg_skip;
1100 iter->count -= (seg_skip << PAGE_SHIFT);
1101 iter->iov_offset = offset & ~PAGE_MASK;
1102 if (iter->iov_offset)
1103 iter->count -= iter->iov_offset;
1104 }
1105 }
1106
1069 return 0; 1107 return 0;
1070} 1108}
1071 1109
@@ -1120,28 +1158,26 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
1120 off_t io_end = kiocb->ki_pos + len; 1158 off_t io_end = kiocb->ki_pos + len;
1121 1159
1122 if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { 1160 if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) {
1123 unsigned long max_pages; 1161 unsigned long max_bytes;
1124 1162
1125 /* Use 8x RA size as a decent limiter for both reads/writes */ 1163 /* Use 8x RA size as a decent limiter for both reads/writes */
1126 max_pages = filp->f_ra.ra_pages; 1164 max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3);
1127 if (!max_pages) 1165 if (!max_bytes)
1128 max_pages = VM_READAHEAD_PAGES; 1166 max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3);
1129 max_pages *= 8; 1167
1130 1168 /* If max len are exceeded, reset the state */
1131 /* If max pages are exceeded, reset the state */ 1169 if (async_list->io_len + len <= max_bytes) {
1132 len >>= PAGE_SHIFT;
1133 if (async_list->io_pages + len <= max_pages) {
1134 req->flags |= REQ_F_SEQ_PREV; 1170 req->flags |= REQ_F_SEQ_PREV;
1135 async_list->io_pages += len; 1171 async_list->io_len += len;
1136 } else { 1172 } else {
1137 io_end = 0; 1173 io_end = 0;
1138 async_list->io_pages = 0; 1174 async_list->io_len = 0;
1139 } 1175 }
1140 } 1176 }
1141 1177
1142 /* New file? Reset state. */ 1178 /* New file? Reset state. */
1143 if (async_list->file != filp) { 1179 if (async_list->file != filp) {
1144 async_list->io_pages = 0; 1180 async_list->io_len = 0;
1145 async_list->file = filp; 1181 async_list->file = filp;
1146 } 1182 }
1147 async_list->io_end = io_end; 1183 async_list->io_end = io_end;
@@ -1630,6 +1666,8 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1630 INIT_LIST_HEAD(&poll->wait.entry); 1666 INIT_LIST_HEAD(&poll->wait.entry);
1631 init_waitqueue_func_entry(&poll->wait, io_poll_wake); 1667 init_waitqueue_func_entry(&poll->wait, io_poll_wake);
1632 1668
1669 INIT_LIST_HEAD(&req->list);
1670
1633 mask = vfs_poll(poll->file, &ipt.pt) & poll->events; 1671 mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
1634 1672
1635 spin_lock_irq(&ctx->completion_lock); 1673 spin_lock_irq(&ctx->completion_lock);
@@ -1844,6 +1882,10 @@ restart:
1844 /* async context always use a copy of the sqe */ 1882 /* async context always use a copy of the sqe */
1845 kfree(sqe); 1883 kfree(sqe);
1846 1884
1885 /* req from defer and link list needn't decrease async cnt */
1886 if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
1887 goto out;
1888
1847 if (!async_list) 1889 if (!async_list)
1848 break; 1890 break;
1849 if (!list_empty(&req_list)) { 1891 if (!list_empty(&req_list)) {
@@ -1891,6 +1933,7 @@ restart:
1891 } 1933 }
1892 } 1934 }
1893 1935
1936out:
1894 if (cur_mm) { 1937 if (cur_mm) {
1895 set_fs(old_fs); 1938 set_fs(old_fs);
1896 unuse_mm(cur_mm); 1939 unuse_mm(cur_mm);
@@ -1917,6 +1960,10 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
1917 ret = true; 1960 ret = true;
1918 spin_lock(&list->lock); 1961 spin_lock(&list->lock);
1919 list_add_tail(&req->list, &list->list); 1962 list_add_tail(&req->list, &list->list);
1963 /*
1964 * Ensure we see a simultaneous modification from io_sq_wq_submit_work()
1965 */
1966 smp_mb();
1920 if (!atomic_read(&list->cnt)) { 1967 if (!atomic_read(&list->cnt)) {
1921 list_del_init(&req->list); 1968 list_del_init(&req->list);
1922 ret = false; 1969 ret = false;
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 689a58231288..12811091fd50 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -181,6 +181,7 @@ struct blkcg_policy {
181 181
182extern struct blkcg blkcg_root; 182extern struct blkcg blkcg_root;
183extern struct cgroup_subsys_state * const blkcg_root_css; 183extern struct cgroup_subsys_state * const blkcg_root_css;
184extern bool blkcg_debug_stats;
184 185
185struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, 186struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
186 struct request_queue *q, bool update_hint); 187 struct request_queue *q, bool update_hint);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index feff3fe4467e..1b1fa1557e68 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -311,6 +311,7 @@ enum req_flag_bits {
311 __REQ_RAHEAD, /* read ahead, can fail anytime */ 311 __REQ_RAHEAD, /* read ahead, can fail anytime */
312 __REQ_BACKGROUND, /* background IO */ 312 __REQ_BACKGROUND, /* background IO */
313 __REQ_NOWAIT, /* Don't wait if request will block */ 313 __REQ_NOWAIT, /* Don't wait if request will block */
314 __REQ_NOWAIT_INLINE, /* Return would-block error inline */
314 /* 315 /*
315 * When a shared kthread needs to issue a bio for a cgroup, doing 316 * When a shared kthread needs to issue a bio for a cgroup, doing
316 * so synchronously can lead to priority inversions as the kthread 317 * so synchronously can lead to priority inversions as the kthread
@@ -345,6 +346,7 @@ enum req_flag_bits {
345#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) 346#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
346#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) 347#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
347#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) 348#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
349#define REQ_NOWAIT_INLINE (1ULL << __REQ_NOWAIT_INLINE)
348#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) 350#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
349 351
350#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) 352#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
@@ -418,12 +420,13 @@ static inline int op_stat_group(unsigned int op)
418 420
419typedef unsigned int blk_qc_t; 421typedef unsigned int blk_qc_t;
420#define BLK_QC_T_NONE -1U 422#define BLK_QC_T_NONE -1U
423#define BLK_QC_T_EAGAIN -2U
421#define BLK_QC_T_SHIFT 16 424#define BLK_QC_T_SHIFT 16
422#define BLK_QC_T_INTERNAL (1U << 31) 425#define BLK_QC_T_INTERNAL (1U << 31)
423 426
424static inline bool blk_qc_t_valid(blk_qc_t cookie) 427static inline bool blk_qc_t_valid(blk_qc_t cookie)
425{ 428{
426 return cookie != BLK_QC_T_NONE; 429 return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN;
427} 430}
428 431
429static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) 432static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 17cd0078377c..1dd014c9c87b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -45,7 +45,6 @@ struct elevator_mq_ops {
45 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); 45 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
46 bool (*has_work)(struct blk_mq_hw_ctx *); 46 bool (*has_work)(struct blk_mq_hw_ctx *);
47 void (*completed_request)(struct request *, u64); 47 void (*completed_request)(struct request *, u64);
48 void (*started_request)(struct request *);
49 void (*requeue_request)(struct request *); 48 void (*requeue_request)(struct request *);
50 struct request *(*former_request)(struct request_queue *, struct request *); 49 struct request *(*former_request)(struct request_queue *, struct request *);
51 struct request *(*next_request)(struct request_queue *, struct request *); 50 struct request *(*next_request)(struct request_queue *, struct request *);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index b6f77cf60dd7..30c515520fb2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -127,6 +127,19 @@ static inline int waitqueue_active(struct wait_queue_head *wq_head)
127} 127}
128 128
129/** 129/**
130 * wq_has_single_sleeper - check if there is only one sleeper
131 * @wq_head: wait queue head
132 *
133 * Returns true of wq_head has only one sleeper on the list.
134 *
135 * Please refer to the comment for waitqueue_active.
136 */
137static inline bool wq_has_single_sleeper(struct wait_queue_head *wq_head)
138{
139 return list_is_singular(&wq_head->head);
140}
141
142/**
130 * wq_has_sleeper - check if there are any waiting processes 143 * wq_has_sleeper - check if there are any waiting processes
131 * @wq_head: wait queue head 144 * @wq_head: wait queue head
132 * 145 *