diff options
-rw-r--r-- | block/blk-map.c | 91 | ||||
-rw-r--r-- | block/blk-merge.c | 8 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 111 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 8 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 149 | ||||
-rw-r--r-- | fs/fs-writeback.c | 54 | ||||
-rw-r--r-- | fs/super.c | 1 | ||||
-rw-r--r-- | include/linux/bio.h | 37 | ||||
-rw-r--r-- | include/linux/blkdev.h | 25 | ||||
-rw-r--r-- | include/linux/writeback.h | 5 |
10 files changed, 341 insertions, 148 deletions
diff --git a/block/blk-map.c b/block/blk-map.c index f565e11f465a..a54f0543b956 100644 --- a/block/blk-map.c +++ b/block/blk-map.c | |||
@@ -57,6 +57,49 @@ static int __blk_rq_unmap_user(struct bio *bio) | |||
57 | return ret; | 57 | return ret; |
58 | } | 58 | } |
59 | 59 | ||
60 | static int __blk_rq_map_user_iov(struct request *rq, | ||
61 | struct rq_map_data *map_data, struct iov_iter *iter, | ||
62 | gfp_t gfp_mask, bool copy) | ||
63 | { | ||
64 | struct request_queue *q = rq->q; | ||
65 | struct bio *bio, *orig_bio; | ||
66 | int ret; | ||
67 | |||
68 | if (copy) | ||
69 | bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); | ||
70 | else | ||
71 | bio = bio_map_user_iov(q, iter, gfp_mask); | ||
72 | |||
73 | if (IS_ERR(bio)) | ||
74 | return PTR_ERR(bio); | ||
75 | |||
76 | if (map_data && map_data->null_mapped) | ||
77 | bio_set_flag(bio, BIO_NULL_MAPPED); | ||
78 | |||
79 | iov_iter_advance(iter, bio->bi_iter.bi_size); | ||
80 | if (map_data) | ||
81 | map_data->offset += bio->bi_iter.bi_size; | ||
82 | |||
83 | orig_bio = bio; | ||
84 | blk_queue_bounce(q, &bio); | ||
85 | |||
86 | /* | ||
87 | * We link the bounce buffer in and could have to traverse it | ||
88 | * later so we have to get a ref to prevent it from being freed | ||
89 | */ | ||
90 | bio_get(bio); | ||
91 | |||
92 | ret = blk_rq_append_bio(q, rq, bio); | ||
93 | if (ret) { | ||
94 | bio_endio(bio); | ||
95 | __blk_rq_unmap_user(orig_bio); | ||
96 | bio_put(bio); | ||
97 | return ret; | ||
98 | } | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
60 | /** | 103 | /** |
61 | * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage | 104 | * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage |
62 | * @q: request queue where request should be inserted | 105 | * @q: request queue where request should be inserted |
@@ -82,10 +125,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
82 | struct rq_map_data *map_data, | 125 | struct rq_map_data *map_data, |
83 | const struct iov_iter *iter, gfp_t gfp_mask) | 126 | const struct iov_iter *iter, gfp_t gfp_mask) |
84 | { | 127 | { |
85 | struct bio *bio; | ||
86 | int unaligned = 0; | ||
87 | struct iov_iter i; | ||
88 | struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0}; | 128 | struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0}; |
129 | bool copy = (q->dma_pad_mask & iter->count) || map_data; | ||
130 | struct bio *bio = NULL; | ||
131 | struct iov_iter i; | ||
132 | int ret; | ||
89 | 133 | ||
90 | if (!iter || !iter->count) | 134 | if (!iter || !iter->count) |
91 | return -EINVAL; | 135 | return -EINVAL; |
@@ -101,42 +145,29 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
101 | */ | 145 | */ |
102 | if ((uaddr & queue_dma_alignment(q)) || | 146 | if ((uaddr & queue_dma_alignment(q)) || |
103 | iovec_gap_to_prv(q, &prv, &iov)) | 147 | iovec_gap_to_prv(q, &prv, &iov)) |
104 | unaligned = 1; | 148 | copy = true; |
105 | 149 | ||
106 | prv.iov_base = iov.iov_base; | 150 | prv.iov_base = iov.iov_base; |
107 | prv.iov_len = iov.iov_len; | 151 | prv.iov_len = iov.iov_len; |
108 | } | 152 | } |
109 | 153 | ||
110 | if (unaligned || (q->dma_pad_mask & iter->count) || map_data) | 154 | i = *iter; |
111 | bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); | 155 | do { |
112 | else | 156 | ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); |
113 | bio = bio_map_user_iov(q, iter, gfp_mask); | 157 | if (ret) |
114 | 158 | goto unmap_rq; | |
115 | if (IS_ERR(bio)) | 159 | if (!bio) |
116 | return PTR_ERR(bio); | 160 | bio = rq->bio; |
117 | 161 | } while (iov_iter_count(&i)); | |
118 | if (map_data && map_data->null_mapped) | ||
119 | bio_set_flag(bio, BIO_NULL_MAPPED); | ||
120 | |||
121 | if (bio->bi_iter.bi_size != iter->count) { | ||
122 | /* | ||
123 | * Grab an extra reference to this bio, as bio_unmap_user() | ||
124 | * expects to be able to drop it twice as it happens on the | ||
125 | * normal IO completion path | ||
126 | */ | ||
127 | bio_get(bio); | ||
128 | bio_endio(bio); | ||
129 | __blk_rq_unmap_user(bio); | ||
130 | return -EINVAL; | ||
131 | } | ||
132 | 162 | ||
133 | if (!bio_flagged(bio, BIO_USER_MAPPED)) | 163 | if (!bio_flagged(bio, BIO_USER_MAPPED)) |
134 | rq->cmd_flags |= REQ_COPY_USER; | 164 | rq->cmd_flags |= REQ_COPY_USER; |
135 | |||
136 | blk_queue_bounce(q, &bio); | ||
137 | bio_get(bio); | ||
138 | blk_rq_bio_prep(q, rq, bio); | ||
139 | return 0; | 165 | return 0; |
166 | |||
167 | unmap_rq: | ||
168 | __blk_rq_unmap_user(bio); | ||
169 | rq->bio = NULL; | ||
170 | return -EINVAL; | ||
140 | } | 171 | } |
141 | EXPORT_SYMBOL(blk_rq_map_user_iov); | 172 | EXPORT_SYMBOL(blk_rq_map_user_iov); |
142 | 173 | ||
diff --git a/block/blk-merge.c b/block/blk-merge.c index 888a7fec81f7..261353166dcf 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -304,7 +304,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, | |||
304 | struct bio *nxt) | 304 | struct bio *nxt) |
305 | { | 305 | { |
306 | struct bio_vec end_bv = { NULL }, nxt_bv; | 306 | struct bio_vec end_bv = { NULL }, nxt_bv; |
307 | struct bvec_iter iter; | ||
308 | 307 | ||
309 | if (!blk_queue_cluster(q)) | 308 | if (!blk_queue_cluster(q)) |
310 | return 0; | 309 | return 0; |
@@ -316,11 +315,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, | |||
316 | if (!bio_has_data(bio)) | 315 | if (!bio_has_data(bio)) |
317 | return 1; | 316 | return 1; |
318 | 317 | ||
319 | bio_for_each_segment(end_bv, bio, iter) | 318 | bio_get_last_bvec(bio, &end_bv); |
320 | if (end_bv.bv_len == iter.bi_size) | 319 | bio_get_first_bvec(nxt, &nxt_bv); |
321 | break; | ||
322 | |||
323 | nxt_bv = bio_iovec(nxt); | ||
324 | 320 | ||
325 | if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) | 321 | if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) |
326 | return 0; | 322 | return 0; |
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3cd921e6121e..03c46412fff4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c | |||
@@ -55,8 +55,9 @@ static void nvme_free_ns(struct kref *kref) | |||
55 | ns->disk->private_data = NULL; | 55 | ns->disk->private_data = NULL; |
56 | spin_unlock(&dev_list_lock); | 56 | spin_unlock(&dev_list_lock); |
57 | 57 | ||
58 | nvme_put_ctrl(ns->ctrl); | ||
59 | put_disk(ns->disk); | 58 | put_disk(ns->disk); |
59 | ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); | ||
60 | nvme_put_ctrl(ns->ctrl); | ||
60 | kfree(ns); | 61 | kfree(ns); |
61 | } | 62 | } |
62 | 63 | ||
@@ -183,7 +184,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, | |||
183 | goto out_unmap; | 184 | goto out_unmap; |
184 | } | 185 | } |
185 | 186 | ||
186 | if (meta_buffer) { | 187 | if (meta_buffer && meta_len) { |
187 | struct bio_integrity_payload *bip; | 188 | struct bio_integrity_payload *bip; |
188 | 189 | ||
189 | meta = kmalloc(meta_len, GFP_KERNEL); | 190 | meta = kmalloc(meta_len, GFP_KERNEL); |
@@ -373,6 +374,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
373 | 374 | ||
374 | if (copy_from_user(&io, uio, sizeof(io))) | 375 | if (copy_from_user(&io, uio, sizeof(io))) |
375 | return -EFAULT; | 376 | return -EFAULT; |
377 | if (io.flags) | ||
378 | return -EINVAL; | ||
376 | 379 | ||
377 | switch (io.opcode) { | 380 | switch (io.opcode) { |
378 | case nvme_cmd_write: | 381 | case nvme_cmd_write: |
@@ -424,6 +427,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, | |||
424 | return -EACCES; | 427 | return -EACCES; |
425 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) | 428 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) |
426 | return -EFAULT; | 429 | return -EFAULT; |
430 | if (cmd.flags) | ||
431 | return -EINVAL; | ||
427 | 432 | ||
428 | memset(&c, 0, sizeof(c)); | 433 | memset(&c, 0, sizeof(c)); |
429 | c.common.opcode = cmd.opcode; | 434 | c.common.opcode = cmd.opcode; |
@@ -556,6 +561,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) | |||
556 | u16 old_ms; | 561 | u16 old_ms; |
557 | unsigned short bs; | 562 | unsigned short bs; |
558 | 563 | ||
564 | if (test_bit(NVME_NS_DEAD, &ns->flags)) { | ||
565 | set_capacity(disk, 0); | ||
566 | return -ENODEV; | ||
567 | } | ||
559 | if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { | 568 | if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { |
560 | dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", | 569 | dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", |
561 | __func__, ns->ctrl->instance, ns->ns_id); | 570 | __func__, ns->ctrl->instance, ns->ns_id); |
@@ -831,6 +840,23 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) | |||
831 | return ret; | 840 | return ret; |
832 | } | 841 | } |
833 | 842 | ||
843 | static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, | ||
844 | struct request_queue *q) | ||
845 | { | ||
846 | if (ctrl->max_hw_sectors) { | ||
847 | u32 max_segments = | ||
848 | (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; | ||
849 | |||
850 | blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); | ||
851 | blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); | ||
852 | } | ||
853 | if (ctrl->stripe_size) | ||
854 | blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); | ||
855 | if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) | ||
856 | blk_queue_flush(q, REQ_FLUSH | REQ_FUA); | ||
857 | blk_queue_virt_boundary(q, ctrl->page_size - 1); | ||
858 | } | ||
859 | |||
834 | /* | 860 | /* |
835 | * Initialize the cached copies of the Identify data and various controller | 861 | * Initialize the cached copies of the Identify data and various controller |
836 | * register in our nvme_ctrl structure. This should be called as soon as | 862 | * register in our nvme_ctrl structure. This should be called as soon as |
@@ -888,6 +914,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) | |||
888 | } | 914 | } |
889 | } | 915 | } |
890 | 916 | ||
917 | nvme_set_queue_limits(ctrl, ctrl->admin_q); | ||
918 | |||
891 | kfree(id); | 919 | kfree(id); |
892 | return 0; | 920 | return 0; |
893 | } | 921 | } |
@@ -1118,9 +1146,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) | |||
1118 | if (!ns) | 1146 | if (!ns) |
1119 | return; | 1147 | return; |
1120 | 1148 | ||
1149 | ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL); | ||
1150 | if (ns->instance < 0) | ||
1151 | goto out_free_ns; | ||
1152 | |||
1121 | ns->queue = blk_mq_init_queue(ctrl->tagset); | 1153 | ns->queue = blk_mq_init_queue(ctrl->tagset); |
1122 | if (IS_ERR(ns->queue)) | 1154 | if (IS_ERR(ns->queue)) |
1123 | goto out_free_ns; | 1155 | goto out_release_instance; |
1124 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); | 1156 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); |
1125 | ns->queue->queuedata = ns; | 1157 | ns->queue->queuedata = ns; |
1126 | ns->ctrl = ctrl; | 1158 | ns->ctrl = ctrl; |
@@ -1134,17 +1166,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) | |||
1134 | ns->disk = disk; | 1166 | ns->disk = disk; |
1135 | ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ | 1167 | ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ |
1136 | 1168 | ||
1169 | |||
1137 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | 1170 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); |
1138 | if (ctrl->max_hw_sectors) { | 1171 | nvme_set_queue_limits(ctrl, ns->queue); |
1139 | blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors); | ||
1140 | blk_queue_max_segments(ns->queue, | ||
1141 | (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1); | ||
1142 | } | ||
1143 | if (ctrl->stripe_size) | ||
1144 | blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9); | ||
1145 | if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) | ||
1146 | blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); | ||
1147 | blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1); | ||
1148 | 1172 | ||
1149 | disk->major = nvme_major; | 1173 | disk->major = nvme_major; |
1150 | disk->first_minor = 0; | 1174 | disk->first_minor = 0; |
@@ -1153,7 +1177,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) | |||
1153 | disk->queue = ns->queue; | 1177 | disk->queue = ns->queue; |
1154 | disk->driverfs_dev = ctrl->device; | 1178 | disk->driverfs_dev = ctrl->device; |
1155 | disk->flags = GENHD_FL_EXT_DEVT; | 1179 | disk->flags = GENHD_FL_EXT_DEVT; |
1156 | sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid); | 1180 | sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance); |
1157 | 1181 | ||
1158 | if (nvme_revalidate_disk(ns->disk)) | 1182 | if (nvme_revalidate_disk(ns->disk)) |
1159 | goto out_free_disk; | 1183 | goto out_free_disk; |
@@ -1173,40 +1197,29 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) | |||
1173 | kfree(disk); | 1197 | kfree(disk); |
1174 | out_free_queue: | 1198 | out_free_queue: |
1175 | blk_cleanup_queue(ns->queue); | 1199 | blk_cleanup_queue(ns->queue); |
1200 | out_release_instance: | ||
1201 | ida_simple_remove(&ctrl->ns_ida, ns->instance); | ||
1176 | out_free_ns: | 1202 | out_free_ns: |
1177 | kfree(ns); | 1203 | kfree(ns); |
1178 | } | 1204 | } |
1179 | 1205 | ||
1180 | static void nvme_ns_remove(struct nvme_ns *ns) | 1206 | static void nvme_ns_remove(struct nvme_ns *ns) |
1181 | { | 1207 | { |
1182 | bool kill = nvme_io_incapable(ns->ctrl) && | 1208 | if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) |
1183 | !blk_queue_dying(ns->queue); | 1209 | return; |
1184 | |||
1185 | lockdep_assert_held(&ns->ctrl->namespaces_mutex); | ||
1186 | |||
1187 | if (kill) { | ||
1188 | blk_set_queue_dying(ns->queue); | ||
1189 | 1210 | ||
1190 | /* | ||
1191 | * The controller was shutdown first if we got here through | ||
1192 | * device removal. The shutdown may requeue outstanding | ||
1193 | * requests. These need to be aborted immediately so | ||
1194 | * del_gendisk doesn't block indefinitely for their completion. | ||
1195 | */ | ||
1196 | blk_mq_abort_requeue_list(ns->queue); | ||
1197 | } | ||
1198 | if (ns->disk->flags & GENHD_FL_UP) { | 1211 | if (ns->disk->flags & GENHD_FL_UP) { |
1199 | if (blk_get_integrity(ns->disk)) | 1212 | if (blk_get_integrity(ns->disk)) |
1200 | blk_integrity_unregister(ns->disk); | 1213 | blk_integrity_unregister(ns->disk); |
1201 | sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, | 1214 | sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, |
1202 | &nvme_ns_attr_group); | 1215 | &nvme_ns_attr_group); |
1203 | del_gendisk(ns->disk); | 1216 | del_gendisk(ns->disk); |
1204 | } | ||
1205 | if (kill || !blk_queue_dying(ns->queue)) { | ||
1206 | blk_mq_abort_requeue_list(ns->queue); | 1217 | blk_mq_abort_requeue_list(ns->queue); |
1207 | blk_cleanup_queue(ns->queue); | 1218 | blk_cleanup_queue(ns->queue); |
1208 | } | 1219 | } |
1220 | mutex_lock(&ns->ctrl->namespaces_mutex); | ||
1209 | list_del_init(&ns->list); | 1221 | list_del_init(&ns->list); |
1222 | mutex_unlock(&ns->ctrl->namespaces_mutex); | ||
1210 | nvme_put_ns(ns); | 1223 | nvme_put_ns(ns); |
1211 | } | 1224 | } |
1212 | 1225 | ||
@@ -1300,10 +1313,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) | |||
1300 | { | 1313 | { |
1301 | struct nvme_ns *ns, *next; | 1314 | struct nvme_ns *ns, *next; |
1302 | 1315 | ||
1303 | mutex_lock(&ctrl->namespaces_mutex); | ||
1304 | list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) | 1316 | list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) |
1305 | nvme_ns_remove(ns); | 1317 | nvme_ns_remove(ns); |
1306 | mutex_unlock(&ctrl->namespaces_mutex); | ||
1307 | } | 1318 | } |
1308 | 1319 | ||
1309 | static DEFINE_IDA(nvme_instance_ida); | 1320 | static DEFINE_IDA(nvme_instance_ida); |
@@ -1350,6 +1361,7 @@ static void nvme_free_ctrl(struct kref *kref) | |||
1350 | 1361 | ||
1351 | put_device(ctrl->device); | 1362 | put_device(ctrl->device); |
1352 | nvme_release_instance(ctrl); | 1363 | nvme_release_instance(ctrl); |
1364 | ida_destroy(&ctrl->ns_ida); | ||
1353 | 1365 | ||
1354 | ctrl->ops->free_ctrl(ctrl); | 1366 | ctrl->ops->free_ctrl(ctrl); |
1355 | } | 1367 | } |
@@ -1390,6 +1402,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, | |||
1390 | } | 1402 | } |
1391 | get_device(ctrl->device); | 1403 | get_device(ctrl->device); |
1392 | dev_set_drvdata(ctrl->device, ctrl); | 1404 | dev_set_drvdata(ctrl->device, ctrl); |
1405 | ida_init(&ctrl->ns_ida); | ||
1393 | 1406 | ||
1394 | spin_lock(&dev_list_lock); | 1407 | spin_lock(&dev_list_lock); |
1395 | list_add_tail(&ctrl->node, &nvme_ctrl_list); | 1408 | list_add_tail(&ctrl->node, &nvme_ctrl_list); |
@@ -1402,6 +1415,38 @@ out: | |||
1402 | return ret; | 1415 | return ret; |
1403 | } | 1416 | } |
1404 | 1417 | ||
1418 | /** | ||
1419 | * nvme_kill_queues(): Ends all namespace queues | ||
1420 | * @ctrl: the dead controller that needs to end | ||
1421 | * | ||
1422 | * Call this function when the driver determines it is unable to get the | ||
1423 | * controller in a state capable of servicing IO. | ||
1424 | */ | ||
1425 | void nvme_kill_queues(struct nvme_ctrl *ctrl) | ||
1426 | { | ||
1427 | struct nvme_ns *ns; | ||
1428 | |||
1429 | mutex_lock(&ctrl->namespaces_mutex); | ||
1430 | list_for_each_entry(ns, &ctrl->namespaces, list) { | ||
1431 | if (!kref_get_unless_zero(&ns->kref)) | ||
1432 | continue; | ||
1433 | |||
1434 | /* | ||
1435 | * Revalidating a dead namespace sets capacity to 0. This will | ||
1436 | * end buffered writers dirtying pages that can't be synced. | ||
1437 | */ | ||
1438 | if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags)) | ||
1439 | revalidate_disk(ns->disk); | ||
1440 | |||
1441 | blk_set_queue_dying(ns->queue); | ||
1442 | blk_mq_abort_requeue_list(ns->queue); | ||
1443 | blk_mq_start_stopped_hw_queues(ns->queue, true); | ||
1444 | |||
1445 | nvme_put_ns(ns); | ||
1446 | } | ||
1447 | mutex_unlock(&ctrl->namespaces_mutex); | ||
1448 | } | ||
1449 | |||
1405 | void nvme_stop_queues(struct nvme_ctrl *ctrl) | 1450 | void nvme_stop_queues(struct nvme_ctrl *ctrl) |
1406 | { | 1451 | { |
1407 | struct nvme_ns *ns; | 1452 | struct nvme_ns *ns; |
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9664d07d807d..fb15ba5f5d19 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -72,6 +72,7 @@ struct nvme_ctrl { | |||
72 | struct mutex namespaces_mutex; | 72 | struct mutex namespaces_mutex; |
73 | struct device *device; /* char device */ | 73 | struct device *device; /* char device */ |
74 | struct list_head node; | 74 | struct list_head node; |
75 | struct ida ns_ida; | ||
75 | 76 | ||
76 | char name[12]; | 77 | char name[12]; |
77 | char serial[20]; | 78 | char serial[20]; |
@@ -102,6 +103,7 @@ struct nvme_ns { | |||
102 | struct request_queue *queue; | 103 | struct request_queue *queue; |
103 | struct gendisk *disk; | 104 | struct gendisk *disk; |
104 | struct kref kref; | 105 | struct kref kref; |
106 | int instance; | ||
105 | 107 | ||
106 | u8 eui[8]; | 108 | u8 eui[8]; |
107 | u8 uuid[16]; | 109 | u8 uuid[16]; |
@@ -112,6 +114,11 @@ struct nvme_ns { | |||
112 | bool ext; | 114 | bool ext; |
113 | u8 pi_type; | 115 | u8 pi_type; |
114 | int type; | 116 | int type; |
117 | unsigned long flags; | ||
118 | |||
119 | #define NVME_NS_REMOVING 0 | ||
120 | #define NVME_NS_DEAD 1 | ||
121 | |||
115 | u64 mode_select_num_blocks; | 122 | u64 mode_select_num_blocks; |
116 | u32 mode_select_block_len; | 123 | u32 mode_select_block_len; |
117 | }; | 124 | }; |
@@ -240,6 +247,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl); | |||
240 | 247 | ||
241 | void nvme_stop_queues(struct nvme_ctrl *ctrl); | 248 | void nvme_stop_queues(struct nvme_ctrl *ctrl); |
242 | void nvme_start_queues(struct nvme_ctrl *ctrl); | 249 | void nvme_start_queues(struct nvme_ctrl *ctrl); |
250 | void nvme_kill_queues(struct nvme_ctrl *ctrl); | ||
243 | 251 | ||
244 | struct request *nvme_alloc_request(struct request_queue *q, | 252 | struct request *nvme_alloc_request(struct request_queue *q, |
245 | struct nvme_command *cmd, unsigned int flags); | 253 | struct nvme_command *cmd, unsigned int flags); |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a128672472ec..680f5780750c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -86,7 +86,6 @@ struct nvme_queue; | |||
86 | 86 | ||
87 | static int nvme_reset(struct nvme_dev *dev); | 87 | static int nvme_reset(struct nvme_dev *dev); |
88 | static void nvme_process_cq(struct nvme_queue *nvmeq); | 88 | static void nvme_process_cq(struct nvme_queue *nvmeq); |
89 | static void nvme_remove_dead_ctrl(struct nvme_dev *dev); | ||
90 | static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); | 89 | static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); |
91 | 90 | ||
92 | /* | 91 | /* |
@@ -120,6 +119,7 @@ struct nvme_dev { | |||
120 | unsigned long flags; | 119 | unsigned long flags; |
121 | 120 | ||
122 | #define NVME_CTRL_RESETTING 0 | 121 | #define NVME_CTRL_RESETTING 0 |
122 | #define NVME_CTRL_REMOVING 1 | ||
123 | 123 | ||
124 | struct nvme_ctrl ctrl; | 124 | struct nvme_ctrl ctrl; |
125 | struct completion ioq_wait; | 125 | struct completion ioq_wait; |
@@ -286,6 +286,17 @@ static int nvme_init_request(void *data, struct request *req, | |||
286 | return 0; | 286 | return 0; |
287 | } | 287 | } |
288 | 288 | ||
289 | static void nvme_queue_scan(struct nvme_dev *dev) | ||
290 | { | ||
291 | /* | ||
292 | * Do not queue new scan work when a controller is reset during | ||
293 | * removal. | ||
294 | */ | ||
295 | if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) | ||
296 | return; | ||
297 | queue_work(nvme_workq, &dev->scan_work); | ||
298 | } | ||
299 | |||
289 | static void nvme_complete_async_event(struct nvme_dev *dev, | 300 | static void nvme_complete_async_event(struct nvme_dev *dev, |
290 | struct nvme_completion *cqe) | 301 | struct nvme_completion *cqe) |
291 | { | 302 | { |
@@ -300,7 +311,7 @@ static void nvme_complete_async_event(struct nvme_dev *dev, | |||
300 | switch (result & 0xff07) { | 311 | switch (result & 0xff07) { |
301 | case NVME_AER_NOTICE_NS_CHANGED: | 312 | case NVME_AER_NOTICE_NS_CHANGED: |
302 | dev_info(dev->dev, "rescanning\n"); | 313 | dev_info(dev->dev, "rescanning\n"); |
303 | queue_work(nvme_workq, &dev->scan_work); | 314 | nvme_queue_scan(dev); |
304 | default: | 315 | default: |
305 | dev_warn(dev->dev, "async event result %08x\n", result); | 316 | dev_warn(dev->dev, "async event result %08x\n", result); |
306 | } | 317 | } |
@@ -679,7 +690,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
679 | 690 | ||
680 | spin_lock_irq(&nvmeq->q_lock); | 691 | spin_lock_irq(&nvmeq->q_lock); |
681 | if (unlikely(nvmeq->cq_vector < 0)) { | 692 | if (unlikely(nvmeq->cq_vector < 0)) { |
682 | ret = BLK_MQ_RQ_QUEUE_BUSY; | 693 | if (ns && !test_bit(NVME_NS_DEAD, &ns->flags)) |
694 | ret = BLK_MQ_RQ_QUEUE_BUSY; | ||
695 | else | ||
696 | ret = BLK_MQ_RQ_QUEUE_ERROR; | ||
683 | spin_unlock_irq(&nvmeq->q_lock); | 697 | spin_unlock_irq(&nvmeq->q_lock); |
684 | goto out; | 698 | goto out; |
685 | } | 699 | } |
@@ -1250,6 +1264,12 @@ static struct blk_mq_ops nvme_mq_ops = { | |||
1250 | static void nvme_dev_remove_admin(struct nvme_dev *dev) | 1264 | static void nvme_dev_remove_admin(struct nvme_dev *dev) |
1251 | { | 1265 | { |
1252 | if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { | 1266 | if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { |
1267 | /* | ||
1268 | * If the controller was reset during removal, it's possible | ||
1269 | * user requests may be waiting on a stopped queue. Start the | ||
1270 | * queue to flush these to completion. | ||
1271 | */ | ||
1272 | blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); | ||
1253 | blk_cleanup_queue(dev->ctrl.admin_q); | 1273 | blk_cleanup_queue(dev->ctrl.admin_q); |
1254 | blk_mq_free_tag_set(&dev->admin_tagset); | 1274 | blk_mq_free_tag_set(&dev->admin_tagset); |
1255 | } | 1275 | } |
@@ -1690,14 +1710,14 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
1690 | return 0; | 1710 | return 0; |
1691 | dev->ctrl.tagset = &dev->tagset; | 1711 | dev->ctrl.tagset = &dev->tagset; |
1692 | } | 1712 | } |
1693 | queue_work(nvme_workq, &dev->scan_work); | 1713 | nvme_queue_scan(dev); |
1694 | return 0; | 1714 | return 0; |
1695 | } | 1715 | } |
1696 | 1716 | ||
1697 | static int nvme_dev_map(struct nvme_dev *dev) | 1717 | static int nvme_pci_enable(struct nvme_dev *dev) |
1698 | { | 1718 | { |
1699 | u64 cap; | 1719 | u64 cap; |
1700 | int bars, result = -ENOMEM; | 1720 | int result = -ENOMEM; |
1701 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 1721 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
1702 | 1722 | ||
1703 | if (pci_enable_device_mem(pdev)) | 1723 | if (pci_enable_device_mem(pdev)) |
@@ -1705,24 +1725,14 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
1705 | 1725 | ||
1706 | dev->entry[0].vector = pdev->irq; | 1726 | dev->entry[0].vector = pdev->irq; |
1707 | pci_set_master(pdev); | 1727 | pci_set_master(pdev); |
1708 | bars = pci_select_bars(pdev, IORESOURCE_MEM); | ||
1709 | if (!bars) | ||
1710 | goto disable_pci; | ||
1711 | |||
1712 | if (pci_request_selected_regions(pdev, bars, "nvme")) | ||
1713 | goto disable_pci; | ||
1714 | 1728 | ||
1715 | if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && | 1729 | if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && |
1716 | dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) | 1730 | dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) |
1717 | goto disable; | 1731 | goto disable; |
1718 | 1732 | ||
1719 | dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); | ||
1720 | if (!dev->bar) | ||
1721 | goto disable; | ||
1722 | |||
1723 | if (readl(dev->bar + NVME_REG_CSTS) == -1) { | 1733 | if (readl(dev->bar + NVME_REG_CSTS) == -1) { |
1724 | result = -ENODEV; | 1734 | result = -ENODEV; |
1725 | goto unmap; | 1735 | goto disable; |
1726 | } | 1736 | } |
1727 | 1737 | ||
1728 | /* | 1738 | /* |
@@ -1732,7 +1742,7 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
1732 | if (!pdev->irq) { | 1742 | if (!pdev->irq) { |
1733 | result = pci_enable_msix(pdev, dev->entry, 1); | 1743 | result = pci_enable_msix(pdev, dev->entry, 1); |
1734 | if (result < 0) | 1744 | if (result < 0) |
1735 | goto unmap; | 1745 | goto disable; |
1736 | } | 1746 | } |
1737 | 1747 | ||
1738 | cap = lo_hi_readq(dev->bar + NVME_REG_CAP); | 1748 | cap = lo_hi_readq(dev->bar + NVME_REG_CAP); |
@@ -1759,18 +1769,20 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
1759 | pci_save_state(pdev); | 1769 | pci_save_state(pdev); |
1760 | return 0; | 1770 | return 0; |
1761 | 1771 | ||
1762 | unmap: | ||
1763 | iounmap(dev->bar); | ||
1764 | dev->bar = NULL; | ||
1765 | disable: | 1772 | disable: |
1766 | pci_release_regions(pdev); | ||
1767 | disable_pci: | ||
1768 | pci_disable_device(pdev); | 1773 | pci_disable_device(pdev); |
1769 | return result; | 1774 | return result; |
1770 | } | 1775 | } |
1771 | 1776 | ||
1772 | static void nvme_dev_unmap(struct nvme_dev *dev) | 1777 | static void nvme_dev_unmap(struct nvme_dev *dev) |
1773 | { | 1778 | { |
1779 | if (dev->bar) | ||
1780 | iounmap(dev->bar); | ||
1781 | pci_release_regions(to_pci_dev(dev->dev)); | ||
1782 | } | ||
1783 | |||
1784 | static void nvme_pci_disable(struct nvme_dev *dev) | ||
1785 | { | ||
1774 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 1786 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
1775 | 1787 | ||
1776 | if (pdev->msi_enabled) | 1788 | if (pdev->msi_enabled) |
@@ -1778,12 +1790,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev) | |||
1778 | else if (pdev->msix_enabled) | 1790 | else if (pdev->msix_enabled) |
1779 | pci_disable_msix(pdev); | 1791 | pci_disable_msix(pdev); |
1780 | 1792 | ||
1781 | if (dev->bar) { | ||
1782 | iounmap(dev->bar); | ||
1783 | dev->bar = NULL; | ||
1784 | pci_release_regions(pdev); | ||
1785 | } | ||
1786 | |||
1787 | if (pci_is_enabled(pdev)) { | 1793 | if (pci_is_enabled(pdev)) { |
1788 | pci_disable_pcie_error_reporting(pdev); | 1794 | pci_disable_pcie_error_reporting(pdev); |
1789 | pci_disable_device(pdev); | 1795 | pci_disable_device(pdev); |
@@ -1842,7 +1848,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) | |||
1842 | nvme_dev_list_remove(dev); | 1848 | nvme_dev_list_remove(dev); |
1843 | 1849 | ||
1844 | mutex_lock(&dev->shutdown_lock); | 1850 | mutex_lock(&dev->shutdown_lock); |
1845 | if (dev->bar) { | 1851 | if (pci_is_enabled(to_pci_dev(dev->dev))) { |
1846 | nvme_stop_queues(&dev->ctrl); | 1852 | nvme_stop_queues(&dev->ctrl); |
1847 | csts = readl(dev->bar + NVME_REG_CSTS); | 1853 | csts = readl(dev->bar + NVME_REG_CSTS); |
1848 | } | 1854 | } |
@@ -1855,7 +1861,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) | |||
1855 | nvme_disable_io_queues(dev); | 1861 | nvme_disable_io_queues(dev); |
1856 | nvme_disable_admin_queue(dev, shutdown); | 1862 | nvme_disable_admin_queue(dev, shutdown); |
1857 | } | 1863 | } |
1858 | nvme_dev_unmap(dev); | 1864 | nvme_pci_disable(dev); |
1859 | 1865 | ||
1860 | for (i = dev->queue_count - 1; i >= 0; i--) | 1866 | for (i = dev->queue_count - 1; i >= 0; i--) |
1861 | nvme_clear_queue(dev->queues[i]); | 1867 | nvme_clear_queue(dev->queues[i]); |
@@ -1899,10 +1905,20 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) | |||
1899 | kfree(dev); | 1905 | kfree(dev); |
1900 | } | 1906 | } |
1901 | 1907 | ||
1908 | static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) | ||
1909 | { | ||
1910 | dev_warn(dev->dev, "Removing after probe failure status: %d\n", status); | ||
1911 | |||
1912 | kref_get(&dev->ctrl.kref); | ||
1913 | nvme_dev_disable(dev, false); | ||
1914 | if (!schedule_work(&dev->remove_work)) | ||
1915 | nvme_put_ctrl(&dev->ctrl); | ||
1916 | } | ||
1917 | |||
1902 | static void nvme_reset_work(struct work_struct *work) | 1918 | static void nvme_reset_work(struct work_struct *work) |
1903 | { | 1919 | { |
1904 | struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); | 1920 | struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); |
1905 | int result; | 1921 | int result = -ENODEV; |
1906 | 1922 | ||
1907 | if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags))) | 1923 | if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags))) |
1908 | goto out; | 1924 | goto out; |
@@ -1911,37 +1927,37 @@ static void nvme_reset_work(struct work_struct *work) | |||
1911 | * If we're called to reset a live controller first shut it down before | 1927 | * If we're called to reset a live controller first shut it down before |
1912 | * moving on. | 1928 | * moving on. |
1913 | */ | 1929 | */ |
1914 | if (dev->bar) | 1930 | if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) |
1915 | nvme_dev_disable(dev, false); | 1931 | nvme_dev_disable(dev, false); |
1916 | 1932 | ||
1917 | set_bit(NVME_CTRL_RESETTING, &dev->flags); | 1933 | set_bit(NVME_CTRL_RESETTING, &dev->flags); |
1918 | 1934 | ||
1919 | result = nvme_dev_map(dev); | 1935 | result = nvme_pci_enable(dev); |
1920 | if (result) | 1936 | if (result) |
1921 | goto out; | 1937 | goto out; |
1922 | 1938 | ||
1923 | result = nvme_configure_admin_queue(dev); | 1939 | result = nvme_configure_admin_queue(dev); |
1924 | if (result) | 1940 | if (result) |
1925 | goto unmap; | 1941 | goto out; |
1926 | 1942 | ||
1927 | nvme_init_queue(dev->queues[0], 0); | 1943 | nvme_init_queue(dev->queues[0], 0); |
1928 | result = nvme_alloc_admin_tags(dev); | 1944 | result = nvme_alloc_admin_tags(dev); |
1929 | if (result) | 1945 | if (result) |
1930 | goto disable; | 1946 | goto out; |
1931 | 1947 | ||
1932 | result = nvme_init_identify(&dev->ctrl); | 1948 | result = nvme_init_identify(&dev->ctrl); |
1933 | if (result) | 1949 | if (result) |
1934 | goto free_tags; | 1950 | goto out; |
1935 | 1951 | ||
1936 | result = nvme_setup_io_queues(dev); | 1952 | result = nvme_setup_io_queues(dev); |
1937 | if (result) | 1953 | if (result) |
1938 | goto free_tags; | 1954 | goto out; |
1939 | 1955 | ||
1940 | dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; | 1956 | dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; |
1941 | 1957 | ||
1942 | result = nvme_dev_list_add(dev); | 1958 | result = nvme_dev_list_add(dev); |
1943 | if (result) | 1959 | if (result) |
1944 | goto remove; | 1960 | goto out; |
1945 | 1961 | ||
1946 | /* | 1962 | /* |
1947 | * Keep the controller around but remove all namespaces if we don't have | 1963 | * Keep the controller around but remove all namespaces if we don't have |
@@ -1958,19 +1974,8 @@ static void nvme_reset_work(struct work_struct *work) | |||
1958 | clear_bit(NVME_CTRL_RESETTING, &dev->flags); | 1974 | clear_bit(NVME_CTRL_RESETTING, &dev->flags); |
1959 | return; | 1975 | return; |
1960 | 1976 | ||
1961 | remove: | ||
1962 | nvme_dev_list_remove(dev); | ||
1963 | free_tags: | ||
1964 | nvme_dev_remove_admin(dev); | ||
1965 | blk_put_queue(dev->ctrl.admin_q); | ||
1966 | dev->ctrl.admin_q = NULL; | ||
1967 | dev->queues[0]->tags = NULL; | ||
1968 | disable: | ||
1969 | nvme_disable_admin_queue(dev, false); | ||
1970 | unmap: | ||
1971 | nvme_dev_unmap(dev); | ||
1972 | out: | 1977 | out: |
1973 | nvme_remove_dead_ctrl(dev); | 1978 | nvme_remove_dead_ctrl(dev, result); |
1974 | } | 1979 | } |
1975 | 1980 | ||
1976 | static void nvme_remove_dead_ctrl_work(struct work_struct *work) | 1981 | static void nvme_remove_dead_ctrl_work(struct work_struct *work) |
@@ -1978,19 +1983,12 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) | |||
1978 | struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); | 1983 | struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); |
1979 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 1984 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
1980 | 1985 | ||
1986 | nvme_kill_queues(&dev->ctrl); | ||
1981 | if (pci_get_drvdata(pdev)) | 1987 | if (pci_get_drvdata(pdev)) |
1982 | pci_stop_and_remove_bus_device_locked(pdev); | 1988 | pci_stop_and_remove_bus_device_locked(pdev); |
1983 | nvme_put_ctrl(&dev->ctrl); | 1989 | nvme_put_ctrl(&dev->ctrl); |
1984 | } | 1990 | } |
1985 | 1991 | ||
1986 | static void nvme_remove_dead_ctrl(struct nvme_dev *dev) | ||
1987 | { | ||
1988 | dev_warn(dev->dev, "Removing after probe failure\n"); | ||
1989 | kref_get(&dev->ctrl.kref); | ||
1990 | if (!schedule_work(&dev->remove_work)) | ||
1991 | nvme_put_ctrl(&dev->ctrl); | ||
1992 | } | ||
1993 | |||
1994 | static int nvme_reset(struct nvme_dev *dev) | 1992 | static int nvme_reset(struct nvme_dev *dev) |
1995 | { | 1993 | { |
1996 | if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) | 1994 | if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) |
@@ -2042,6 +2040,27 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { | |||
2042 | .free_ctrl = nvme_pci_free_ctrl, | 2040 | .free_ctrl = nvme_pci_free_ctrl, |
2043 | }; | 2041 | }; |
2044 | 2042 | ||
2043 | static int nvme_dev_map(struct nvme_dev *dev) | ||
2044 | { | ||
2045 | int bars; | ||
2046 | struct pci_dev *pdev = to_pci_dev(dev->dev); | ||
2047 | |||
2048 | bars = pci_select_bars(pdev, IORESOURCE_MEM); | ||
2049 | if (!bars) | ||
2050 | return -ENODEV; | ||
2051 | if (pci_request_selected_regions(pdev, bars, "nvme")) | ||
2052 | return -ENODEV; | ||
2053 | |||
2054 | dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); | ||
2055 | if (!dev->bar) | ||
2056 | goto release; | ||
2057 | |||
2058 | return 0; | ||
2059 | release: | ||
2060 | pci_release_regions(pdev); | ||
2061 | return -ENODEV; | ||
2062 | } | ||
2063 | |||
2045 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | 2064 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
2046 | { | 2065 | { |
2047 | int node, result = -ENOMEM; | 2066 | int node, result = -ENOMEM; |
@@ -2066,6 +2085,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2066 | dev->dev = get_device(&pdev->dev); | 2085 | dev->dev = get_device(&pdev->dev); |
2067 | pci_set_drvdata(pdev, dev); | 2086 | pci_set_drvdata(pdev, dev); |
2068 | 2087 | ||
2088 | result = nvme_dev_map(dev); | ||
2089 | if (result) | ||
2090 | goto free; | ||
2091 | |||
2069 | INIT_LIST_HEAD(&dev->node); | 2092 | INIT_LIST_HEAD(&dev->node); |
2070 | INIT_WORK(&dev->scan_work, nvme_dev_scan); | 2093 | INIT_WORK(&dev->scan_work, nvme_dev_scan); |
2071 | INIT_WORK(&dev->reset_work, nvme_reset_work); | 2094 | INIT_WORK(&dev->reset_work, nvme_reset_work); |
@@ -2089,6 +2112,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2089 | nvme_release_prp_pools(dev); | 2112 | nvme_release_prp_pools(dev); |
2090 | put_pci: | 2113 | put_pci: |
2091 | put_device(dev->dev); | 2114 | put_device(dev->dev); |
2115 | nvme_dev_unmap(dev); | ||
2092 | free: | 2116 | free: |
2093 | kfree(dev->queues); | 2117 | kfree(dev->queues); |
2094 | kfree(dev->entry); | 2118 | kfree(dev->entry); |
@@ -2112,10 +2136,16 @@ static void nvme_shutdown(struct pci_dev *pdev) | |||
2112 | nvme_dev_disable(dev, true); | 2136 | nvme_dev_disable(dev, true); |
2113 | } | 2137 | } |
2114 | 2138 | ||
2139 | /* | ||
2140 | * The driver's remove may be called on a device in a partially initialized | ||
2141 | * state. This function must not have any dependencies on the device state in | ||
2142 | * order to proceed. | ||
2143 | */ | ||
2115 | static void nvme_remove(struct pci_dev *pdev) | 2144 | static void nvme_remove(struct pci_dev *pdev) |
2116 | { | 2145 | { |
2117 | struct nvme_dev *dev = pci_get_drvdata(pdev); | 2146 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
2118 | 2147 | ||
2148 | set_bit(NVME_CTRL_REMOVING, &dev->flags); | ||
2119 | pci_set_drvdata(pdev, NULL); | 2149 | pci_set_drvdata(pdev, NULL); |
2120 | flush_work(&dev->scan_work); | 2150 | flush_work(&dev->scan_work); |
2121 | nvme_remove_namespaces(&dev->ctrl); | 2151 | nvme_remove_namespaces(&dev->ctrl); |
@@ -2126,6 +2156,7 @@ static void nvme_remove(struct pci_dev *pdev) | |||
2126 | nvme_free_queues(dev, 0); | 2156 | nvme_free_queues(dev, 0); |
2127 | nvme_release_cmb(dev); | 2157 | nvme_release_cmb(dev); |
2128 | nvme_release_prp_pools(dev); | 2158 | nvme_release_prp_pools(dev); |
2159 | nvme_dev_unmap(dev); | ||
2129 | nvme_put_ctrl(&dev->ctrl); | 2160 | nvme_put_ctrl(&dev->ctrl); |
2130 | } | 2161 | } |
2131 | 2162 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1f76d8950a57..5c46ed9f3e14 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi, | |||
223 | #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) | 223 | #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) |
224 | /* one round can affect upto 5 slots */ | 224 | /* one round can affect upto 5 slots */ |
225 | 225 | ||
226 | static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); | ||
227 | static struct workqueue_struct *isw_wq; | ||
228 | |||
226 | void __inode_attach_wb(struct inode *inode, struct page *page) | 229 | void __inode_attach_wb(struct inode *inode, struct page *page) |
227 | { | 230 | { |
228 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 231 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
317 | struct inode_switch_wbs_context *isw = | 320 | struct inode_switch_wbs_context *isw = |
318 | container_of(work, struct inode_switch_wbs_context, work); | 321 | container_of(work, struct inode_switch_wbs_context, work); |
319 | struct inode *inode = isw->inode; | 322 | struct inode *inode = isw->inode; |
320 | struct super_block *sb = inode->i_sb; | ||
321 | struct address_space *mapping = inode->i_mapping; | 323 | struct address_space *mapping = inode->i_mapping; |
322 | struct bdi_writeback *old_wb = inode->i_wb; | 324 | struct bdi_writeback *old_wb = inode->i_wb; |
323 | struct bdi_writeback *new_wb = isw->new_wb; | 325 | struct bdi_writeback *new_wb = isw->new_wb; |
@@ -424,8 +426,9 @@ skip_switch: | |||
424 | wb_put(new_wb); | 426 | wb_put(new_wb); |
425 | 427 | ||
426 | iput(inode); | 428 | iput(inode); |
427 | deactivate_super(sb); | ||
428 | kfree(isw); | 429 | kfree(isw); |
430 | |||
431 | atomic_dec(&isw_nr_in_flight); | ||
429 | } | 432 | } |
430 | 433 | ||
431 | static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) | 434 | static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) |
@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) | |||
435 | 438 | ||
436 | /* needs to grab bh-unsafe locks, bounce to work item */ | 439 | /* needs to grab bh-unsafe locks, bounce to work item */ |
437 | INIT_WORK(&isw->work, inode_switch_wbs_work_fn); | 440 | INIT_WORK(&isw->work, inode_switch_wbs_work_fn); |
438 | schedule_work(&isw->work); | 441 | queue_work(isw_wq, &isw->work); |
439 | } | 442 | } |
440 | 443 | ||
441 | /** | 444 | /** |
@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
471 | 474 | ||
472 | /* while holding I_WB_SWITCH, no one else can update the association */ | 475 | /* while holding I_WB_SWITCH, no one else can update the association */ |
473 | spin_lock(&inode->i_lock); | 476 | spin_lock(&inode->i_lock); |
474 | 477 | if (!(inode->i_sb->s_flags & MS_ACTIVE) || | |
475 | if (inode->i_state & (I_WB_SWITCH | I_FREEING) || | 478 | inode->i_state & (I_WB_SWITCH | I_FREEING) || |
476 | inode_to_wb(inode) == isw->new_wb) | 479 | inode_to_wb(inode) == isw->new_wb) { |
477 | goto out_unlock; | 480 | spin_unlock(&inode->i_lock); |
478 | 481 | goto out_free; | |
479 | if (!atomic_inc_not_zero(&inode->i_sb->s_active)) | 482 | } |
480 | goto out_unlock; | ||
481 | |||
482 | inode->i_state |= I_WB_SWITCH; | 483 | inode->i_state |= I_WB_SWITCH; |
483 | spin_unlock(&inode->i_lock); | 484 | spin_unlock(&inode->i_lock); |
484 | 485 | ||
485 | ihold(inode); | 486 | ihold(inode); |
486 | isw->inode = inode; | 487 | isw->inode = inode; |
487 | 488 | ||
489 | atomic_inc(&isw_nr_in_flight); | ||
490 | |||
488 | /* | 491 | /* |
489 | * In addition to synchronizing among switchers, I_WB_SWITCH tells | 492 | * In addition to synchronizing among switchers, I_WB_SWITCH tells |
490 | * the RCU protected stat update paths to grab the mapping's | 493 | * the RCU protected stat update paths to grab the mapping's |
@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
494 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); | 497 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); |
495 | return; | 498 | return; |
496 | 499 | ||
497 | out_unlock: | ||
498 | spin_unlock(&inode->i_lock); | ||
499 | out_free: | 500 | out_free: |
500 | if (isw->new_wb) | 501 | if (isw->new_wb) |
501 | wb_put(isw->new_wb); | 502 | wb_put(isw->new_wb); |
@@ -847,6 +848,33 @@ restart: | |||
847 | wb_put(last_wb); | 848 | wb_put(last_wb); |
848 | } | 849 | } |
849 | 850 | ||
851 | /** | ||
852 | * cgroup_writeback_umount - flush inode wb switches for umount | ||
853 | * | ||
854 | * This function is called when a super_block is about to be destroyed and | ||
855 | * flushes in-flight inode wb switches. An inode wb switch goes through | ||
856 | * RCU and then workqueue, so the two need to be flushed in order to ensure | ||
857 | * that all previously scheduled switches are finished. As wb switches are | ||
858 | * rare occurrences and synchronize_rcu() can take a while, perform | ||
859 | * flushing iff wb switches are in flight. | ||
860 | */ | ||
861 | void cgroup_writeback_umount(void) | ||
862 | { | ||
863 | if (atomic_read(&isw_nr_in_flight)) { | ||
864 | synchronize_rcu(); | ||
865 | flush_workqueue(isw_wq); | ||
866 | } | ||
867 | } | ||
868 | |||
869 | static int __init cgroup_writeback_init(void) | ||
870 | { | ||
871 | isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); | ||
872 | if (!isw_wq) | ||
873 | return -ENOMEM; | ||
874 | return 0; | ||
875 | } | ||
876 | fs_initcall(cgroup_writeback_init); | ||
877 | |||
850 | #else /* CONFIG_CGROUP_WRITEBACK */ | 878 | #else /* CONFIG_CGROUP_WRITEBACK */ |
851 | 879 | ||
852 | static struct bdi_writeback * | 880 | static struct bdi_writeback * |
diff --git a/fs/super.c b/fs/super.c index 1182af8fd5ff..74914b1bae70 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb) | |||
415 | sb->s_flags &= ~MS_ACTIVE; | 415 | sb->s_flags &= ~MS_ACTIVE; |
416 | 416 | ||
417 | fsnotify_unmount_inodes(sb); | 417 | fsnotify_unmount_inodes(sb); |
418 | cgroup_writeback_umount(); | ||
418 | 419 | ||
419 | evict_inodes(sb); | 420 | evict_inodes(sb); |
420 | 421 | ||
diff --git a/include/linux/bio.h b/include/linux/bio.h index 5349e6816cbb..cb6888824108 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit) | |||
310 | bio->bi_flags &= ~(1U << bit); | 310 | bio->bi_flags &= ~(1U << bit); |
311 | } | 311 | } |
312 | 312 | ||
313 | static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) | ||
314 | { | ||
315 | *bv = bio_iovec(bio); | ||
316 | } | ||
317 | |||
318 | static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) | ||
319 | { | ||
320 | struct bvec_iter iter = bio->bi_iter; | ||
321 | int idx; | ||
322 | |||
323 | if (!bio_flagged(bio, BIO_CLONED)) { | ||
324 | *bv = bio->bi_io_vec[bio->bi_vcnt - 1]; | ||
325 | return; | ||
326 | } | ||
327 | |||
328 | if (unlikely(!bio_multiple_segments(bio))) { | ||
329 | *bv = bio_iovec(bio); | ||
330 | return; | ||
331 | } | ||
332 | |||
333 | bio_advance_iter(bio, &iter, iter.bi_size); | ||
334 | |||
335 | if (!iter.bi_bvec_done) | ||
336 | idx = iter.bi_idx - 1; | ||
337 | else /* in the middle of bvec */ | ||
338 | idx = iter.bi_idx; | ||
339 | |||
340 | *bv = bio->bi_io_vec[idx]; | ||
341 | |||
342 | /* | ||
343 | * iter.bi_bvec_done records actual length of the last bvec | ||
344 | * if this bio ends in the middle of one io vector | ||
345 | */ | ||
346 | if (iter.bi_bvec_done) | ||
347 | bv->bv_len = iter.bi_bvec_done; | ||
348 | } | ||
349 | |||
313 | enum bip_flags { | 350 | enum bip_flags { |
314 | BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ | 351 | BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ |
315 | BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ | 352 | BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4571ef1a12a9..413c84fbc4ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -895,7 +895,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) | |||
895 | { | 895 | { |
896 | struct request_queue *q = rq->q; | 896 | struct request_queue *q = rq->q; |
897 | 897 | ||
898 | if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) | 898 | if (unlikely(rq->cmd_type != REQ_TYPE_FS)) |
899 | return q->limits.max_hw_sectors; | 899 | return q->limits.max_hw_sectors; |
900 | 900 | ||
901 | if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) | 901 | if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) |
@@ -1372,6 +1372,13 @@ static inline void put_dev_sector(Sector p) | |||
1372 | page_cache_release(p.v); | 1372 | page_cache_release(p.v); |
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | static inline bool __bvec_gap_to_prev(struct request_queue *q, | ||
1376 | struct bio_vec *bprv, unsigned int offset) | ||
1377 | { | ||
1378 | return offset || | ||
1379 | ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); | ||
1380 | } | ||
1381 | |||
1375 | /* | 1382 | /* |
1376 | * Check if adding a bio_vec after bprv with offset would create a gap in | 1383 | * Check if adding a bio_vec after bprv with offset would create a gap in |
1377 | * the SG list. Most drivers don't care about this, but some do. | 1384 | * the SG list. Most drivers don't care about this, but some do. |
@@ -1381,18 +1388,22 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, | |||
1381 | { | 1388 | { |
1382 | if (!queue_virt_boundary(q)) | 1389 | if (!queue_virt_boundary(q)) |
1383 | return false; | 1390 | return false; |
1384 | return offset || | 1391 | return __bvec_gap_to_prev(q, bprv, offset); |
1385 | ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); | ||
1386 | } | 1392 | } |
1387 | 1393 | ||
1388 | static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, | 1394 | static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, |
1389 | struct bio *next) | 1395 | struct bio *next) |
1390 | { | 1396 | { |
1391 | if (!bio_has_data(prev)) | 1397 | if (bio_has_data(prev) && queue_virt_boundary(q)) { |
1392 | return false; | 1398 | struct bio_vec pb, nb; |
1399 | |||
1400 | bio_get_last_bvec(prev, &pb); | ||
1401 | bio_get_first_bvec(next, &nb); | ||
1393 | 1402 | ||
1394 | return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], | 1403 | return __bvec_gap_to_prev(q, &pb, nb.bv_offset); |
1395 | next->bi_io_vec[0].bv_offset); | 1404 | } |
1405 | |||
1406 | return false; | ||
1396 | } | 1407 | } |
1397 | 1408 | ||
1398 | static inline bool req_gap_back_merge(struct request *req, struct bio *bio) | 1409 | static inline bool req_gap_back_merge(struct request *req, struct bio *bio) |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b333c945e571..d0b5ca5d4e08 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, | |||
198 | void wbc_detach_inode(struct writeback_control *wbc); | 198 | void wbc_detach_inode(struct writeback_control *wbc); |
199 | void wbc_account_io(struct writeback_control *wbc, struct page *page, | 199 | void wbc_account_io(struct writeback_control *wbc, struct page *page, |
200 | size_t bytes); | 200 | size_t bytes); |
201 | void cgroup_writeback_umount(void); | ||
201 | 202 | ||
202 | /** | 203 | /** |
203 | * inode_attach_wb - associate an inode with its wb | 204 | * inode_attach_wb - associate an inode with its wb |
@@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc, | |||
301 | { | 302 | { |
302 | } | 303 | } |
303 | 304 | ||
305 | static inline void cgroup_writeback_umount(void) | ||
306 | { | ||
307 | } | ||
308 | |||
304 | #endif /* CONFIG_CGROUP_WRITEBACK */ | 309 | #endif /* CONFIG_CGROUP_WRITEBACK */ |
305 | 310 | ||
306 | /* | 311 | /* |