diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-26 13:32:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-26 13:32:12 -0400 |
commit | 04412819652fe30f900d11e96c67b4adfdf17f6b (patch) | |
tree | aed86baef3fd65e6990484a00514f0594d1fdd6c | |
parent | 750c930b085ba56cfac3649e8e0dff72a8c5f8a5 (diff) | |
parent | 9c0b2596f2ac30967af0b8bb9f038b65926a6f00 (diff) |
Merge tag 'for-linus-20190726' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
- Several io_uring fixes/improvements:
- Blocking fix for O_DIRECT (me)
- Latter page slowness for registered buffers (me)
- Fix poll hang under certain conditions (me)
- Defer sequence check fix for wrapped rings (Zhengyuan)
- Mismatch in async inc/dec accounting (Zhengyuan)
- Memory ordering issue that could cause stall (Zhengyuan)
- Track sequential defer in bytes, not pages (Zhengyuan)
- NVMe pull request from Christoph
- Set of hang fixes for wbt (Josef)
- Redundant error message kill for libahci (Ding)
- Remove unused blk_mq_sched_started_request() and related ops (Marcos)
- drbd dynamic alloc shash descriptor to reduce stack use (Arnd)
- blkcg ->pd_stat() non-debug print (Tejun)
- bcache memory leak fix (Wei)
- Comment fix (Akinobu)
- BFQ perf regression fix (Paolo)
* tag 'for-linus-20190726' of git://git.kernel.dk/linux-block: (24 commits)
io_uring: ensure ->list is initialized for poll commands
Revert "nvme-pci: don't create a read hctx mapping without read queues"
nvme: fix multipath crash when ANA is deactivated
nvme: fix memory leak caused by incorrect subsystem free
nvme: ignore subnqn for ADATA SX6000LNP
drbd: dynamically allocate shash descriptor
block: blk-mq: Remove blk_mq_sched_started_request and started_request
bcache: fix possible memory leak in bch_cached_dev_run()
io_uring: track io length in async_list based on bytes
io_uring: don't use iov_iter_advance() for fixed buffers
block: properly handle IOCB_NOWAIT for async O_DIRECT IO
blk-mq: allow REQ_NOWAIT to return an error inline
io_uring: add a memory barrier before atomic_read
rq-qos: use a mb for got_token
rq-qos: set ourself TASK_UNINTERRUPTIBLE after we schedule
rq-qos: don't reset has_sleepers on spurious wakeups
rq-qos: fix missed wake-ups in rq_qos_throttle
wait: add wq_has_single_sleeper helper
block, bfq: check also in-flight I/O in dispatch plugging
block: fix sysfs module parameters directory path in comment
...
-rw-r--r-- | block/bfq-iosched.c | 67 | ||||
-rw-r--r-- | block/blk-cgroup.c | 9 | ||||
-rw-r--r-- | block/blk-iolatency.c | 3 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 9 | ||||
-rw-r--r-- | block/blk-mq.c | 10 | ||||
-rw-r--r-- | block/blk-rq-qos.c | 7 | ||||
-rw-r--r-- | block/genhd.c | 2 | ||||
-rw-r--r-- | drivers/ata/libahci_platform.c | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 3 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 12 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 8 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 6 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 6 | ||||
-rw-r--r-- | fs/block_dev.c | 58 | ||||
-rw-r--r-- | fs/io_uring.c | 81 | ||||
-rw-r--r-- | include/linux/blk-cgroup.h | 1 | ||||
-rw-r--r-- | include/linux/blk_types.h | 5 | ||||
-rw-r--r-- | include/linux/elevator.h | 1 | ||||
-rw-r--r-- | include/linux/wait.h | 13 |
20 files changed, 224 insertions, 92 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 72860325245a..586fcfe227ea 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -3354,38 +3354,57 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) | |||
3354 | * there is no active group, then the primary expectation for | 3354 | * there is no active group, then the primary expectation for |
3355 | * this device is probably a high throughput. | 3355 | * this device is probably a high throughput. |
3356 | * | 3356 | * |
3357 | * We are now left only with explaining the additional | 3357 | * We are now left only with explaining the two sub-conditions in the |
3358 | * compound condition that is checked below for deciding | 3358 | * additional compound condition that is checked below for deciding |
3359 | * whether the scenario is asymmetric. To explain this | 3359 | * whether the scenario is asymmetric. To explain the first |
3360 | * compound condition, we need to add that the function | 3360 | * sub-condition, we need to add that the function |
3361 | * bfq_asymmetric_scenario checks the weights of only | 3361 | * bfq_asymmetric_scenario checks the weights of only |
3362 | * non-weight-raised queues, for efficiency reasons (see | 3362 | * non-weight-raised queues, for efficiency reasons (see comments on |
3363 | * comments on bfq_weights_tree_add()). Then the fact that | 3363 | * bfq_weights_tree_add()). Then the fact that bfqq is weight-raised |
3364 | * bfqq is weight-raised is checked explicitly here. More | 3364 | * is checked explicitly here. More precisely, the compound condition |
3365 | * precisely, the compound condition below takes into account | 3365 | * below takes into account also the fact that, even if bfqq is being |
3366 | * also the fact that, even if bfqq is being weight-raised, | 3366 | * weight-raised, the scenario is still symmetric if all queues with |
3367 | * the scenario is still symmetric if all queues with requests | 3367 | * requests waiting for completion happen to be |
3368 | * waiting for completion happen to be | 3368 | * weight-raised. Actually, we should be even more precise here, and |
3369 | * weight-raised. Actually, we should be even more precise | 3369 | * differentiate between interactive weight raising and soft real-time |
3370 | * here, and differentiate between interactive weight raising | 3370 | * weight raising. |
3371 | * and soft real-time weight raising. | 3371 | * |
3372 | * The second sub-condition checked in the compound condition is | ||
3373 | * whether there is a fair amount of already in-flight I/O not | ||
3374 | * belonging to bfqq. If so, I/O dispatching is to be plugged, for the | ||
3375 | * following reason. The drive may decide to serve in-flight | ||
3376 | * non-bfqq's I/O requests before bfqq's ones, thereby delaying the | ||
3377 | * arrival of new I/O requests for bfqq (recall that bfqq is sync). If | ||
3378 | * I/O-dispatching is not plugged, then, while bfqq remains empty, a | ||
3379 | * basically uncontrolled amount of I/O from other queues may be | ||
3380 | * dispatched too, possibly causing the service of bfqq's I/O to be | ||
3381 | * delayed even longer in the drive. This problem gets more and more | ||
3382 | * serious as the speed and the queue depth of the drive grow, | ||
3383 | * because, as these two quantities grow, the probability to find no | ||
3384 | * queue busy but many requests in flight grows too. By contrast, | ||
3385 | * plugging I/O dispatching minimizes the delay induced by already | ||
3386 | * in-flight I/O, and enables bfqq to recover the bandwidth it may | ||
3387 | * lose because of this delay. | ||
3372 | * | 3388 | * |
3373 | * As a side note, it is worth considering that the above | 3389 | * As a side note, it is worth considering that the above |
3374 | * device-idling countermeasures may however fail in the | 3390 | * device-idling countermeasures may however fail in the following |
3375 | * following unlucky scenario: if idling is (correctly) | 3391 | * unlucky scenario: if I/O-dispatch plugging is (correctly) disabled |
3376 | * disabled in a time period during which all symmetry | 3392 | * in a time period during which all symmetry sub-conditions hold, and |
3377 | * sub-conditions hold, and hence the device is allowed to | 3393 | * therefore the device is allowed to enqueue many requests, but at |
3378 | * enqueue many requests, but at some later point in time some | 3394 | * some later point in time some sub-condition stops to hold, then it |
3379 | * sub-condition stops to hold, then it may become impossible | 3395 | * may become impossible to make requests be served in the desired |
3380 | * to let requests be served in the desired order until all | 3396 | * order until all the requests already queued in the device have been |
3381 | * the requests already queued in the device have been served. | 3397 | * served. The last sub-condition commented above somewhat mitigates |
3398 | * this problem for weight-raised queues. | ||
3382 | */ | 3399 | */ |
3383 | static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, | 3400 | static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, |
3384 | struct bfq_queue *bfqq) | 3401 | struct bfq_queue *bfqq) |
3385 | { | 3402 | { |
3386 | return (bfqq->wr_coeff > 1 && | 3403 | return (bfqq->wr_coeff > 1 && |
3387 | bfqd->wr_busy_queues < | 3404 | (bfqd->wr_busy_queues < |
3388 | bfq_tot_busy_queues(bfqd)) || | 3405 | bfq_tot_busy_queues(bfqd) || |
3406 | bfqd->rq_in_driver >= | ||
3407 | bfqq->dispatched + 4)) || | ||
3389 | bfq_asymmetric_scenario(bfqd, bfqq); | 3408 | bfq_asymmetric_scenario(bfqd, bfqq); |
3390 | } | 3409 | } |
3391 | 3410 | ||
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 24ed26957367..55a7dc227dfb 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -54,7 +54,7 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | |||
54 | 54 | ||
55 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ | 55 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ |
56 | 56 | ||
57 | static bool blkcg_debug_stats = false; | 57 | bool blkcg_debug_stats = false; |
58 | static struct workqueue_struct *blkcg_punt_bio_wq; | 58 | static struct workqueue_struct *blkcg_punt_bio_wq; |
59 | 59 | ||
60 | static bool blkcg_policy_enabled(struct request_queue *q, | 60 | static bool blkcg_policy_enabled(struct request_queue *q, |
@@ -944,10 +944,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) | |||
944 | dbytes, dios); | 944 | dbytes, dios); |
945 | } | 945 | } |
946 | 946 | ||
947 | if (!blkcg_debug_stats) | 947 | if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) { |
948 | goto next; | ||
949 | |||
950 | if (atomic_read(&blkg->use_delay)) { | ||
951 | has_stats = true; | 948 | has_stats = true; |
952 | off += scnprintf(buf+off, size-off, | 949 | off += scnprintf(buf+off, size-off, |
953 | " use_delay=%d delay_nsec=%llu", | 950 | " use_delay=%d delay_nsec=%llu", |
@@ -967,7 +964,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) | |||
967 | has_stats = true; | 964 | has_stats = true; |
968 | off += written; | 965 | off += written; |
969 | } | 966 | } |
970 | next: | 967 | |
971 | if (has_stats) { | 968 | if (has_stats) { |
972 | if (off < size - 1) { | 969 | if (off < size - 1) { |
973 | off += scnprintf(buf+off, size-off, "\n"); | 970 | off += scnprintf(buf+off, size-off, "\n"); |
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index d973c38ee4fd..0fff7b56df0e 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c | |||
@@ -917,6 +917,9 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf, | |||
917 | unsigned long long avg_lat; | 917 | unsigned long long avg_lat; |
918 | unsigned long long cur_win; | 918 | unsigned long long cur_win; |
919 | 919 | ||
920 | if (!blkcg_debug_stats) | ||
921 | return 0; | ||
922 | |||
920 | if (iolat->ssd) | 923 | if (iolat->ssd) |
921 | return iolatency_ssd_stat(iolat, buf, size); | 924 | return iolatency_ssd_stat(iolat, buf, size); |
922 | 925 | ||
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index cf22ab00fefb..126021fc3a11 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h | |||
@@ -61,15 +61,6 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) | |||
61 | e->type->ops.completed_request(rq, now); | 61 | e->type->ops.completed_request(rq, now); |
62 | } | 62 | } |
63 | 63 | ||
64 | static inline void blk_mq_sched_started_request(struct request *rq) | ||
65 | { | ||
66 | struct request_queue *q = rq->q; | ||
67 | struct elevator_queue *e = q->elevator; | ||
68 | |||
69 | if (e && e->type->ops.started_request) | ||
70 | e->type->ops.started_request(rq); | ||
71 | } | ||
72 | |||
73 | static inline void blk_mq_sched_requeue_request(struct request *rq) | 64 | static inline void blk_mq_sched_requeue_request(struct request *rq) |
74 | { | 65 | { |
75 | struct request_queue *q = rq->q; | 66 | struct request_queue *q = rq->q; |
diff --git a/block/blk-mq.c b/block/blk-mq.c index b038ec680e84..f78d3287dd82 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -669,8 +669,6 @@ void blk_mq_start_request(struct request *rq) | |||
669 | { | 669 | { |
670 | struct request_queue *q = rq->q; | 670 | struct request_queue *q = rq->q; |
671 | 671 | ||
672 | blk_mq_sched_started_request(rq); | ||
673 | |||
674 | trace_block_rq_issue(q, rq); | 672 | trace_block_rq_issue(q, rq); |
675 | 673 | ||
676 | if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { | 674 | if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { |
@@ -1960,9 +1958,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1960 | rq = blk_mq_get_request(q, bio, &data); | 1958 | rq = blk_mq_get_request(q, bio, &data); |
1961 | if (unlikely(!rq)) { | 1959 | if (unlikely(!rq)) { |
1962 | rq_qos_cleanup(q, bio); | 1960 | rq_qos_cleanup(q, bio); |
1963 | if (bio->bi_opf & REQ_NOWAIT) | 1961 | |
1962 | cookie = BLK_QC_T_NONE; | ||
1963 | if (bio->bi_opf & REQ_NOWAIT_INLINE) | ||
1964 | cookie = BLK_QC_T_EAGAIN; | ||
1965 | else if (bio->bi_opf & REQ_NOWAIT) | ||
1964 | bio_wouldblock_error(bio); | 1966 | bio_wouldblock_error(bio); |
1965 | return BLK_QC_T_NONE; | 1967 | return cookie; |
1966 | } | 1968 | } |
1967 | 1969 | ||
1968 | trace_block_getrq(q, bio, bio->bi_opf); | 1970 | trace_block_getrq(q, bio, bio->bi_opf); |
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 659ccb8b693f..3954c0dc1443 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c | |||
@@ -202,6 +202,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, | |||
202 | return -1; | 202 | return -1; |
203 | 203 | ||
204 | data->got_token = true; | 204 | data->got_token = true; |
205 | smp_wmb(); | ||
205 | list_del_init(&curr->entry); | 206 | list_del_init(&curr->entry); |
206 | wake_up_process(data->task); | 207 | wake_up_process(data->task); |
207 | return 1; | 208 | return 1; |
@@ -244,7 +245,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, | |||
244 | return; | 245 | return; |
245 | 246 | ||
246 | prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); | 247 | prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); |
248 | has_sleeper = !wq_has_single_sleeper(&rqw->wait); | ||
247 | do { | 249 | do { |
250 | /* The memory barrier in set_task_state saves us here. */ | ||
248 | if (data.got_token) | 251 | if (data.got_token) |
249 | break; | 252 | break; |
250 | if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { | 253 | if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { |
@@ -255,12 +258,14 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, | |||
255 | * which means we now have two. Put our local token | 258 | * which means we now have two. Put our local token |
256 | * and wake anyone else potentially waiting for one. | 259 | * and wake anyone else potentially waiting for one. |
257 | */ | 260 | */ |
261 | smp_rmb(); | ||
258 | if (data.got_token) | 262 | if (data.got_token) |
259 | cleanup_cb(rqw, private_data); | 263 | cleanup_cb(rqw, private_data); |
260 | break; | 264 | break; |
261 | } | 265 | } |
262 | io_schedule(); | 266 | io_schedule(); |
263 | has_sleeper = false; | 267 | has_sleeper = true; |
268 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
264 | } while (1); | 269 | } while (1); |
265 | finish_wait(&rqw->wait, &data.wq); | 270 | finish_wait(&rqw->wait, &data.wq); |
266 | } | 271 | } |
diff --git a/block/genhd.c b/block/genhd.c index 97887e59f3b2..54f1f0d381f4 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -1969,7 +1969,7 @@ static const struct attribute *disk_events_attrs[] = { | |||
1969 | * The default polling interval can be specified by the kernel | 1969 | * The default polling interval can be specified by the kernel |
1970 | * parameter block.events_dfl_poll_msecs which defaults to 0 | 1970 | * parameter block.events_dfl_poll_msecs which defaults to 0 |
1971 | * (disable). This can also be modified runtime by writing to | 1971 | * (disable). This can also be modified runtime by writing to |
1972 | * /sys/module/block/events_dfl_poll_msecs. | 1972 | * /sys/module/block/parameters/events_dfl_poll_msecs. |
1973 | */ | 1973 | */ |
1974 | static int disk_events_set_dfl_poll_msecs(const char *val, | 1974 | static int disk_events_set_dfl_poll_msecs(const char *val, |
1975 | const struct kernel_param *kp) | 1975 | const struct kernel_param *kp) |
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 72312ad2e142..3a36e76eca83 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c | |||
@@ -408,7 +408,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, | |||
408 | hpriv->mmio = devm_ioremap_resource(dev, | 408 | hpriv->mmio = devm_ioremap_resource(dev, |
409 | platform_get_resource(pdev, IORESOURCE_MEM, 0)); | 409 | platform_get_resource(pdev, IORESOURCE_MEM, 0)); |
410 | if (IS_ERR(hpriv->mmio)) { | 410 | if (IS_ERR(hpriv->mmio)) { |
411 | dev_err(dev, "no mmio space\n"); | ||
412 | rc = PTR_ERR(hpriv->mmio); | 411 | rc = PTR_ERR(hpriv->mmio); |
413 | goto err_out; | 412 | goto err_out; |
414 | } | 413 | } |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 90ebfcae0ce6..2b3103c30857 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -5417,7 +5417,7 @@ static int drbd_do_auth(struct drbd_connection *connection) | |||
5417 | unsigned int key_len; | 5417 | unsigned int key_len; |
5418 | char secret[SHARED_SECRET_MAX]; /* 64 byte */ | 5418 | char secret[SHARED_SECRET_MAX]; /* 64 byte */ |
5419 | unsigned int resp_size; | 5419 | unsigned int resp_size; |
5420 | SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm); | 5420 | struct shash_desc *desc; |
5421 | struct packet_info pi; | 5421 | struct packet_info pi; |
5422 | struct net_conf *nc; | 5422 | struct net_conf *nc; |
5423 | int err, rv; | 5423 | int err, rv; |
@@ -5430,6 +5430,13 @@ static int drbd_do_auth(struct drbd_connection *connection) | |||
5430 | memcpy(secret, nc->shared_secret, key_len); | 5430 | memcpy(secret, nc->shared_secret, key_len); |
5431 | rcu_read_unlock(); | 5431 | rcu_read_unlock(); |
5432 | 5432 | ||
5433 | desc = kmalloc(sizeof(struct shash_desc) + | ||
5434 | crypto_shash_descsize(connection->cram_hmac_tfm), | ||
5435 | GFP_KERNEL); | ||
5436 | if (!desc) { | ||
5437 | rv = -1; | ||
5438 | goto fail; | ||
5439 | } | ||
5433 | desc->tfm = connection->cram_hmac_tfm; | 5440 | desc->tfm = connection->cram_hmac_tfm; |
5434 | 5441 | ||
5435 | rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); | 5442 | rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); |
@@ -5571,7 +5578,10 @@ static int drbd_do_auth(struct drbd_connection *connection) | |||
5571 | kfree(peers_ch); | 5578 | kfree(peers_ch); |
5572 | kfree(response); | 5579 | kfree(response); |
5573 | kfree(right_response); | 5580 | kfree(right_response); |
5574 | shash_desc_zero(desc); | 5581 | if (desc) { |
5582 | shash_desc_zero(desc); | ||
5583 | kfree(desc); | ||
5584 | } | ||
5575 | 5585 | ||
5576 | return rv; | 5586 | return rv; |
5577 | } | 5587 | } |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 26e374fbf57c..20ed838e9413 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -931,6 +931,9 @@ int bch_cached_dev_run(struct cached_dev *dc) | |||
931 | if (dc->io_disable) { | 931 | if (dc->io_disable) { |
932 | pr_err("I/O disabled on cached dev %s", | 932 | pr_err("I/O disabled on cached dev %s", |
933 | dc->backing_dev_name); | 933 | dc->backing_dev_name); |
934 | kfree(env[1]); | ||
935 | kfree(env[2]); | ||
936 | kfree(buf); | ||
934 | return -EIO; | 937 | return -EIO; |
935 | } | 938 | } |
936 | 939 | ||
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cc09b81fc7f4..8f3fbe5ca937 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c | |||
@@ -2311,17 +2311,15 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct | |||
2311 | memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off); | 2311 | memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off); |
2312 | } | 2312 | } |
2313 | 2313 | ||
2314 | static void __nvme_release_subsystem(struct nvme_subsystem *subsys) | 2314 | static void nvme_release_subsystem(struct device *dev) |
2315 | { | 2315 | { |
2316 | struct nvme_subsystem *subsys = | ||
2317 | container_of(dev, struct nvme_subsystem, dev); | ||
2318 | |||
2316 | ida_simple_remove(&nvme_subsystems_ida, subsys->instance); | 2319 | ida_simple_remove(&nvme_subsystems_ida, subsys->instance); |
2317 | kfree(subsys); | 2320 | kfree(subsys); |
2318 | } | 2321 | } |
2319 | 2322 | ||
2320 | static void nvme_release_subsystem(struct device *dev) | ||
2321 | { | ||
2322 | __nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev)); | ||
2323 | } | ||
2324 | |||
2325 | static void nvme_destroy_subsystem(struct kref *ref) | 2323 | static void nvme_destroy_subsystem(struct kref *ref) |
2326 | { | 2324 | { |
2327 | struct nvme_subsystem *subsys = | 2325 | struct nvme_subsystem *subsys = |
@@ -2477,7 +2475,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) | |||
2477 | mutex_lock(&nvme_subsystems_lock); | 2475 | mutex_lock(&nvme_subsystems_lock); |
2478 | found = __nvme_find_get_subsystem(subsys->subnqn); | 2476 | found = __nvme_find_get_subsystem(subsys->subnqn); |
2479 | if (found) { | 2477 | if (found) { |
2480 | __nvme_release_subsystem(subsys); | 2478 | put_device(&subsys->dev); |
2481 | subsys = found; | 2479 | subsys = found; |
2482 | 2480 | ||
2483 | if (!nvme_validate_cntlid(subsys, ctrl, id)) { | 2481 | if (!nvme_validate_cntlid(subsys, ctrl, id)) { |
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a9a927677970..4f0d0d12744e 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c | |||
@@ -12,11 +12,6 @@ module_param(multipath, bool, 0444); | |||
12 | MODULE_PARM_DESC(multipath, | 12 | MODULE_PARM_DESC(multipath, |
13 | "turn on native support for multiple controllers per subsystem"); | 13 | "turn on native support for multiple controllers per subsystem"); |
14 | 14 | ||
15 | inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) | ||
16 | { | ||
17 | return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3)); | ||
18 | } | ||
19 | |||
20 | /* | 15 | /* |
21 | * If multipathing is enabled we need to always use the subsystem instance | 16 | * If multipathing is enabled we need to always use the subsystem instance |
22 | * number for numbering our devices to avoid conflicts between subsystems that | 17 | * number for numbering our devices to avoid conflicts between subsystems that |
@@ -622,7 +617,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) | |||
622 | { | 617 | { |
623 | int error; | 618 | int error; |
624 | 619 | ||
625 | if (!nvme_ctrl_use_ana(ctrl)) | 620 | /* check if multipath is enabled and we have the capability */ |
621 | if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3))) | ||
626 | return 0; | 622 | return 0; |
627 | 623 | ||
628 | ctrl->anacap = id->anacap; | 624 | ctrl->anacap = id->anacap; |
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 716a876119c8..26b563f9985b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -485,7 +485,11 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[]; | |||
485 | extern const struct block_device_operations nvme_ns_head_ops; | 485 | extern const struct block_device_operations nvme_ns_head_ops; |
486 | 486 | ||
487 | #ifdef CONFIG_NVME_MULTIPATH | 487 | #ifdef CONFIG_NVME_MULTIPATH |
488 | bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl); | 488 | static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) |
489 | { | ||
490 | return ctrl->ana_log_buf != NULL; | ||
491 | } | ||
492 | |||
489 | void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, | 493 | void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, |
490 | struct nvme_ctrl *ctrl, int *flags); | 494 | struct nvme_ctrl *ctrl, int *flags); |
491 | void nvme_failover_req(struct request *req); | 495 | void nvme_failover_req(struct request *req); |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bb970ca82517..db160cee42ad 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -2254,9 +2254,7 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
2254 | if (!dev->ctrl.tagset) { | 2254 | if (!dev->ctrl.tagset) { |
2255 | dev->tagset.ops = &nvme_mq_ops; | 2255 | dev->tagset.ops = &nvme_mq_ops; |
2256 | dev->tagset.nr_hw_queues = dev->online_queues - 1; | 2256 | dev->tagset.nr_hw_queues = dev->online_queues - 1; |
2257 | dev->tagset.nr_maps = 1; /* default */ | 2257 | dev->tagset.nr_maps = 2; /* default + read */ |
2258 | if (dev->io_queues[HCTX_TYPE_READ]) | ||
2259 | dev->tagset.nr_maps++; | ||
2260 | if (dev->io_queues[HCTX_TYPE_POLL]) | 2258 | if (dev->io_queues[HCTX_TYPE_POLL]) |
2261 | dev->tagset.nr_maps++; | 2259 | dev->tagset.nr_maps++; |
2262 | dev->tagset.timeout = NVME_IO_TIMEOUT; | 2260 | dev->tagset.timeout = NVME_IO_TIMEOUT; |
@@ -3029,6 +3027,8 @@ static const struct pci_device_id nvme_id_table[] = { | |||
3029 | .driver_data = NVME_QUIRK_LIGHTNVM, }, | 3027 | .driver_data = NVME_QUIRK_LIGHTNVM, }, |
3030 | { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ | 3028 | { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ |
3031 | .driver_data = NVME_QUIRK_LIGHTNVM, }, | 3029 | .driver_data = NVME_QUIRK_LIGHTNVM, }, |
3030 | { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ | ||
3031 | .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, | ||
3032 | { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, | 3032 | { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, |
3033 | { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, | 3033 | { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, |
3034 | { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, | 3034 | { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 4707dfff991b..c2a85b587922 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -345,15 +345,24 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
345 | struct bio *bio; | 345 | struct bio *bio; |
346 | bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; | 346 | bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; |
347 | bool is_read = (iov_iter_rw(iter) == READ), is_sync; | 347 | bool is_read = (iov_iter_rw(iter) == READ), is_sync; |
348 | bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0; | ||
348 | loff_t pos = iocb->ki_pos; | 349 | loff_t pos = iocb->ki_pos; |
349 | blk_qc_t qc = BLK_QC_T_NONE; | 350 | blk_qc_t qc = BLK_QC_T_NONE; |
350 | int ret = 0; | 351 | gfp_t gfp; |
352 | ssize_t ret; | ||
351 | 353 | ||
352 | if ((pos | iov_iter_alignment(iter)) & | 354 | if ((pos | iov_iter_alignment(iter)) & |
353 | (bdev_logical_block_size(bdev) - 1)) | 355 | (bdev_logical_block_size(bdev) - 1)) |
354 | return -EINVAL; | 356 | return -EINVAL; |
355 | 357 | ||
356 | bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); | 358 | if (nowait) |
359 | gfp = GFP_NOWAIT; | ||
360 | else | ||
361 | gfp = GFP_KERNEL; | ||
362 | |||
363 | bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool); | ||
364 | if (!bio) | ||
365 | return -EAGAIN; | ||
357 | 366 | ||
358 | dio = container_of(bio, struct blkdev_dio, bio); | 367 | dio = container_of(bio, struct blkdev_dio, bio); |
359 | dio->is_sync = is_sync = is_sync_kiocb(iocb); | 368 | dio->is_sync = is_sync = is_sync_kiocb(iocb); |
@@ -375,7 +384,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
375 | if (!is_poll) | 384 | if (!is_poll) |
376 | blk_start_plug(&plug); | 385 | blk_start_plug(&plug); |
377 | 386 | ||
387 | ret = 0; | ||
378 | for (;;) { | 388 | for (;;) { |
389 | int err; | ||
390 | |||
379 | bio_set_dev(bio, bdev); | 391 | bio_set_dev(bio, bdev); |
380 | bio->bi_iter.bi_sector = pos >> 9; | 392 | bio->bi_iter.bi_sector = pos >> 9; |
381 | bio->bi_write_hint = iocb->ki_hint; | 393 | bio->bi_write_hint = iocb->ki_hint; |
@@ -383,8 +395,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
383 | bio->bi_end_io = blkdev_bio_end_io; | 395 | bio->bi_end_io = blkdev_bio_end_io; |
384 | bio->bi_ioprio = iocb->ki_ioprio; | 396 | bio->bi_ioprio = iocb->ki_ioprio; |
385 | 397 | ||
386 | ret = bio_iov_iter_get_pages(bio, iter); | 398 | err = bio_iov_iter_get_pages(bio, iter); |
387 | if (unlikely(ret)) { | 399 | if (unlikely(err)) { |
400 | if (!ret) | ||
401 | ret = err; | ||
388 | bio->bi_status = BLK_STS_IOERR; | 402 | bio->bi_status = BLK_STS_IOERR; |
389 | bio_endio(bio); | 403 | bio_endio(bio); |
390 | break; | 404 | break; |
@@ -399,6 +413,14 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
399 | task_io_account_write(bio->bi_iter.bi_size); | 413 | task_io_account_write(bio->bi_iter.bi_size); |
400 | } | 414 | } |
401 | 415 | ||
416 | /* | ||
417 | * Tell underlying layer to not block for resource shortage. | ||
418 | * And if we would have blocked, return error inline instead | ||
419 | * of through the bio->bi_end_io() callback. | ||
420 | */ | ||
421 | if (nowait) | ||
422 | bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE); | ||
423 | |||
402 | dio->size += bio->bi_iter.bi_size; | 424 | dio->size += bio->bi_iter.bi_size; |
403 | pos += bio->bi_iter.bi_size; | 425 | pos += bio->bi_iter.bi_size; |
404 | 426 | ||
@@ -412,6 +434,11 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
412 | } | 434 | } |
413 | 435 | ||
414 | qc = submit_bio(bio); | 436 | qc = submit_bio(bio); |
437 | if (qc == BLK_QC_T_EAGAIN) { | ||
438 | if (!ret) | ||
439 | ret = -EAGAIN; | ||
440 | goto error; | ||
441 | } | ||
415 | 442 | ||
416 | if (polled) | 443 | if (polled) |
417 | WRITE_ONCE(iocb->ki_cookie, qc); | 444 | WRITE_ONCE(iocb->ki_cookie, qc); |
@@ -432,8 +459,20 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
432 | atomic_inc(&dio->ref); | 459 | atomic_inc(&dio->ref); |
433 | } | 460 | } |
434 | 461 | ||
435 | submit_bio(bio); | 462 | qc = submit_bio(bio); |
436 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 463 | if (qc == BLK_QC_T_EAGAIN) { |
464 | if (!ret) | ||
465 | ret = -EAGAIN; | ||
466 | goto error; | ||
467 | } | ||
468 | ret += bio->bi_iter.bi_size; | ||
469 | |||
470 | bio = bio_alloc(gfp, nr_pages); | ||
471 | if (!bio) { | ||
472 | if (!ret) | ||
473 | ret = -EAGAIN; | ||
474 | goto error; | ||
475 | } | ||
437 | } | 476 | } |
438 | 477 | ||
439 | if (!is_poll) | 478 | if (!is_poll) |
@@ -453,13 +492,16 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
453 | } | 492 | } |
454 | __set_current_state(TASK_RUNNING); | 493 | __set_current_state(TASK_RUNNING); |
455 | 494 | ||
495 | out: | ||
456 | if (!ret) | 496 | if (!ret) |
457 | ret = blk_status_to_errno(dio->bio.bi_status); | 497 | ret = blk_status_to_errno(dio->bio.bi_status); |
458 | if (likely(!ret)) | ||
459 | ret = dio->size; | ||
460 | 498 | ||
461 | bio_put(&dio->bio); | 499 | bio_put(&dio->bio); |
462 | return ret; | 500 | return ret; |
501 | error: | ||
502 | if (!is_poll) | ||
503 | blk_finish_plug(&plug); | ||
504 | goto out; | ||
463 | } | 505 | } |
464 | 506 | ||
465 | static ssize_t | 507 | static ssize_t |
diff --git a/fs/io_uring.c b/fs/io_uring.c index e2a66e12fbc6..012bc0efb9d3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -202,7 +202,7 @@ struct async_list { | |||
202 | 202 | ||
203 | struct file *file; | 203 | struct file *file; |
204 | off_t io_end; | 204 | off_t io_end; |
205 | size_t io_pages; | 205 | size_t io_len; |
206 | }; | 206 | }; |
207 | 207 | ||
208 | struct io_ring_ctx { | 208 | struct io_ring_ctx { |
@@ -333,7 +333,8 @@ struct io_kiocb { | |||
333 | #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ | 333 | #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ |
334 | #define REQ_F_IO_DRAINED 32 /* drain done */ | 334 | #define REQ_F_IO_DRAINED 32 /* drain done */ |
335 | #define REQ_F_LINK 64 /* linked sqes */ | 335 | #define REQ_F_LINK 64 /* linked sqes */ |
336 | #define REQ_F_FAIL_LINK 128 /* fail rest of links */ | 336 | #define REQ_F_LINK_DONE 128 /* linked sqes done */ |
337 | #define REQ_F_FAIL_LINK 256 /* fail rest of links */ | ||
337 | u64 user_data; | 338 | u64 user_data; |
338 | u32 result; | 339 | u32 result; |
339 | u32 sequence; | 340 | u32 sequence; |
@@ -429,7 +430,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx, | |||
429 | if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) | 430 | if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) |
430 | return false; | 431 | return false; |
431 | 432 | ||
432 | return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped; | 433 | return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped; |
433 | } | 434 | } |
434 | 435 | ||
435 | static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) | 436 | static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) |
@@ -632,6 +633,7 @@ static void io_req_link_next(struct io_kiocb *req) | |||
632 | nxt->flags |= REQ_F_LINK; | 633 | nxt->flags |= REQ_F_LINK; |
633 | } | 634 | } |
634 | 635 | ||
636 | nxt->flags |= REQ_F_LINK_DONE; | ||
635 | INIT_WORK(&nxt->work, io_sq_wq_submit_work); | 637 | INIT_WORK(&nxt->work, io_sq_wq_submit_work); |
636 | queue_work(req->ctx->sqo_wq, &nxt->work); | 638 | queue_work(req->ctx->sqo_wq, &nxt->work); |
637 | } | 639 | } |
@@ -1064,8 +1066,44 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, | |||
1064 | */ | 1066 | */ |
1065 | offset = buf_addr - imu->ubuf; | 1067 | offset = buf_addr - imu->ubuf; |
1066 | iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); | 1068 | iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); |
1067 | if (offset) | 1069 | |
1068 | iov_iter_advance(iter, offset); | 1070 | if (offset) { |
1071 | /* | ||
1072 | * Don't use iov_iter_advance() here, as it's really slow for | ||
1073 | * using the latter parts of a big fixed buffer - it iterates | ||
1074 | * over each segment manually. We can cheat a bit here, because | ||
1075 | * we know that: | ||
1076 | * | ||
1077 | * 1) it's a BVEC iter, we set it up | ||
1078 | * 2) all bvecs are PAGE_SIZE in size, except potentially the | ||
1079 | * first and last bvec | ||
1080 | * | ||
1081 | * So just find our index, and adjust the iterator afterwards. | ||
1082 | * If the offset is within the first bvec (or the whole first | ||
1083 | * bvec, just use iov_iter_advance(). This makes it easier | ||
1084 | * since we can just skip the first segment, which may not | ||
1085 | * be PAGE_SIZE aligned. | ||
1086 | */ | ||
1087 | const struct bio_vec *bvec = imu->bvec; | ||
1088 | |||
1089 | if (offset <= bvec->bv_len) { | ||
1090 | iov_iter_advance(iter, offset); | ||
1091 | } else { | ||
1092 | unsigned long seg_skip; | ||
1093 | |||
1094 | /* skip first vec */ | ||
1095 | offset -= bvec->bv_len; | ||
1096 | seg_skip = 1 + (offset >> PAGE_SHIFT); | ||
1097 | |||
1098 | iter->bvec = bvec + seg_skip; | ||
1099 | iter->nr_segs -= seg_skip; | ||
1100 | iter->count -= (seg_skip << PAGE_SHIFT); | ||
1101 | iter->iov_offset = offset & ~PAGE_MASK; | ||
1102 | if (iter->iov_offset) | ||
1103 | iter->count -= iter->iov_offset; | ||
1104 | } | ||
1105 | } | ||
1106 | |||
1069 | return 0; | 1107 | return 0; |
1070 | } | 1108 | } |
1071 | 1109 | ||
@@ -1120,28 +1158,26 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) | |||
1120 | off_t io_end = kiocb->ki_pos + len; | 1158 | off_t io_end = kiocb->ki_pos + len; |
1121 | 1159 | ||
1122 | if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { | 1160 | if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { |
1123 | unsigned long max_pages; | 1161 | unsigned long max_bytes; |
1124 | 1162 | ||
1125 | /* Use 8x RA size as a decent limiter for both reads/writes */ | 1163 | /* Use 8x RA size as a decent limiter for both reads/writes */ |
1126 | max_pages = filp->f_ra.ra_pages; | 1164 | max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3); |
1127 | if (!max_pages) | 1165 | if (!max_bytes) |
1128 | max_pages = VM_READAHEAD_PAGES; | 1166 | max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3); |
1129 | max_pages *= 8; | 1167 | |
1130 | 1168 | /* If max len are exceeded, reset the state */ | |
1131 | /* If max pages are exceeded, reset the state */ | 1169 | if (async_list->io_len + len <= max_bytes) { |
1132 | len >>= PAGE_SHIFT; | ||
1133 | if (async_list->io_pages + len <= max_pages) { | ||
1134 | req->flags |= REQ_F_SEQ_PREV; | 1170 | req->flags |= REQ_F_SEQ_PREV; |
1135 | async_list->io_pages += len; | 1171 | async_list->io_len += len; |
1136 | } else { | 1172 | } else { |
1137 | io_end = 0; | 1173 | io_end = 0; |
1138 | async_list->io_pages = 0; | 1174 | async_list->io_len = 0; |
1139 | } | 1175 | } |
1140 | } | 1176 | } |
1141 | 1177 | ||
1142 | /* New file? Reset state. */ | 1178 | /* New file? Reset state. */ |
1143 | if (async_list->file != filp) { | 1179 | if (async_list->file != filp) { |
1144 | async_list->io_pages = 0; | 1180 | async_list->io_len = 0; |
1145 | async_list->file = filp; | 1181 | async_list->file = filp; |
1146 | } | 1182 | } |
1147 | async_list->io_end = io_end; | 1183 | async_list->io_end = io_end; |
@@ -1630,6 +1666,8 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) | |||
1630 | INIT_LIST_HEAD(&poll->wait.entry); | 1666 | INIT_LIST_HEAD(&poll->wait.entry); |
1631 | init_waitqueue_func_entry(&poll->wait, io_poll_wake); | 1667 | init_waitqueue_func_entry(&poll->wait, io_poll_wake); |
1632 | 1668 | ||
1669 | INIT_LIST_HEAD(&req->list); | ||
1670 | |||
1633 | mask = vfs_poll(poll->file, &ipt.pt) & poll->events; | 1671 | mask = vfs_poll(poll->file, &ipt.pt) & poll->events; |
1634 | 1672 | ||
1635 | spin_lock_irq(&ctx->completion_lock); | 1673 | spin_lock_irq(&ctx->completion_lock); |
@@ -1844,6 +1882,10 @@ restart: | |||
1844 | /* async context always use a copy of the sqe */ | 1882 | /* async context always use a copy of the sqe */ |
1845 | kfree(sqe); | 1883 | kfree(sqe); |
1846 | 1884 | ||
1885 | /* req from defer and link list needn't decrease async cnt */ | ||
1886 | if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) | ||
1887 | goto out; | ||
1888 | |||
1847 | if (!async_list) | 1889 | if (!async_list) |
1848 | break; | 1890 | break; |
1849 | if (!list_empty(&req_list)) { | 1891 | if (!list_empty(&req_list)) { |
@@ -1891,6 +1933,7 @@ restart: | |||
1891 | } | 1933 | } |
1892 | } | 1934 | } |
1893 | 1935 | ||
1936 | out: | ||
1894 | if (cur_mm) { | 1937 | if (cur_mm) { |
1895 | set_fs(old_fs); | 1938 | set_fs(old_fs); |
1896 | unuse_mm(cur_mm); | 1939 | unuse_mm(cur_mm); |
@@ -1917,6 +1960,10 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req) | |||
1917 | ret = true; | 1960 | ret = true; |
1918 | spin_lock(&list->lock); | 1961 | spin_lock(&list->lock); |
1919 | list_add_tail(&req->list, &list->list); | 1962 | list_add_tail(&req->list, &list->list); |
1963 | /* | ||
1964 | * Ensure we see a simultaneous modification from io_sq_wq_submit_work() | ||
1965 | */ | ||
1966 | smp_mb(); | ||
1920 | if (!atomic_read(&list->cnt)) { | 1967 | if (!atomic_read(&list->cnt)) { |
1921 | list_del_init(&req->list); | 1968 | list_del_init(&req->list); |
1922 | ret = false; | 1969 | ret = false; |
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 689a58231288..12811091fd50 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
@@ -181,6 +181,7 @@ struct blkcg_policy { | |||
181 | 181 | ||
182 | extern struct blkcg blkcg_root; | 182 | extern struct blkcg blkcg_root; |
183 | extern struct cgroup_subsys_state * const blkcg_root_css; | 183 | extern struct cgroup_subsys_state * const blkcg_root_css; |
184 | extern bool blkcg_debug_stats; | ||
184 | 185 | ||
185 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, | 186 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, |
186 | struct request_queue *q, bool update_hint); | 187 | struct request_queue *q, bool update_hint); |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index feff3fe4467e..1b1fa1557e68 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -311,6 +311,7 @@ enum req_flag_bits { | |||
311 | __REQ_RAHEAD, /* read ahead, can fail anytime */ | 311 | __REQ_RAHEAD, /* read ahead, can fail anytime */ |
312 | __REQ_BACKGROUND, /* background IO */ | 312 | __REQ_BACKGROUND, /* background IO */ |
313 | __REQ_NOWAIT, /* Don't wait if request will block */ | 313 | __REQ_NOWAIT, /* Don't wait if request will block */ |
314 | __REQ_NOWAIT_INLINE, /* Return would-block error inline */ | ||
314 | /* | 315 | /* |
315 | * When a shared kthread needs to issue a bio for a cgroup, doing | 316 | * When a shared kthread needs to issue a bio for a cgroup, doing |
316 | * so synchronously can lead to priority inversions as the kthread | 317 | * so synchronously can lead to priority inversions as the kthread |
@@ -345,6 +346,7 @@ enum req_flag_bits { | |||
345 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) | 346 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) |
346 | #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) | 347 | #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) |
347 | #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) | 348 | #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) |
349 | #define REQ_NOWAIT_INLINE (1ULL << __REQ_NOWAIT_INLINE) | ||
348 | #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) | 350 | #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) |
349 | 351 | ||
350 | #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) | 352 | #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) |
@@ -418,12 +420,13 @@ static inline int op_stat_group(unsigned int op) | |||
418 | 420 | ||
419 | typedef unsigned int blk_qc_t; | 421 | typedef unsigned int blk_qc_t; |
420 | #define BLK_QC_T_NONE -1U | 422 | #define BLK_QC_T_NONE -1U |
423 | #define BLK_QC_T_EAGAIN -2U | ||
421 | #define BLK_QC_T_SHIFT 16 | 424 | #define BLK_QC_T_SHIFT 16 |
422 | #define BLK_QC_T_INTERNAL (1U << 31) | 425 | #define BLK_QC_T_INTERNAL (1U << 31) |
423 | 426 | ||
424 | static inline bool blk_qc_t_valid(blk_qc_t cookie) | 427 | static inline bool blk_qc_t_valid(blk_qc_t cookie) |
425 | { | 428 | { |
426 | return cookie != BLK_QC_T_NONE; | 429 | return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN; |
427 | } | 430 | } |
428 | 431 | ||
429 | static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) | 432 | static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) |
diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 17cd0078377c..1dd014c9c87b 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h | |||
@@ -45,7 +45,6 @@ struct elevator_mq_ops { | |||
45 | struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); | 45 | struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); |
46 | bool (*has_work)(struct blk_mq_hw_ctx *); | 46 | bool (*has_work)(struct blk_mq_hw_ctx *); |
47 | void (*completed_request)(struct request *, u64); | 47 | void (*completed_request)(struct request *, u64); |
48 | void (*started_request)(struct request *); | ||
49 | void (*requeue_request)(struct request *); | 48 | void (*requeue_request)(struct request *); |
50 | struct request *(*former_request)(struct request_queue *, struct request *); | 49 | struct request *(*former_request)(struct request_queue *, struct request *); |
51 | struct request *(*next_request)(struct request_queue *, struct request *); | 50 | struct request *(*next_request)(struct request_queue *, struct request *); |
diff --git a/include/linux/wait.h b/include/linux/wait.h index b6f77cf60dd7..30c515520fb2 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
@@ -127,6 +127,19 @@ static inline int waitqueue_active(struct wait_queue_head *wq_head) | |||
127 | } | 127 | } |
128 | 128 | ||
129 | /** | 129 | /** |
130 | * wq_has_single_sleeper - check if there is only one sleeper | ||
131 | * @wq_head: wait queue head | ||
132 | * | ||
133 | * Returns true of wq_head has only one sleeper on the list. | ||
134 | * | ||
135 | * Please refer to the comment for waitqueue_active. | ||
136 | */ | ||
137 | static inline bool wq_has_single_sleeper(struct wait_queue_head *wq_head) | ||
138 | { | ||
139 | return list_is_singular(&wq_head->head); | ||
140 | } | ||
141 | |||
142 | /** | ||
130 | * wq_has_sleeper - check if there are any waiting processes | 143 | * wq_has_sleeper - check if there are any waiting processes |
131 | * @wq_head: wait queue head | 144 | * @wq_head: wait queue head |
132 | * | 145 | * |