aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-map.c91
-rw-r--r--block/blk-merge.c8
-rw-r--r--drivers/nvme/host/core.c111
-rw-r--r--drivers/nvme/host/nvme.h8
-rw-r--r--drivers/nvme/host/pci.c149
-rw-r--r--fs/fs-writeback.c54
-rw-r--r--fs/super.c1
-rw-r--r--include/linux/bio.h37
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/writeback.h5
10 files changed, 341 insertions, 148 deletions
diff --git a/block/blk-map.c b/block/blk-map.c
index f565e11f465a..a54f0543b956 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -57,6 +57,49 @@ static int __blk_rq_unmap_user(struct bio *bio)
57 return ret; 57 return ret;
58} 58}
59 59
60static int __blk_rq_map_user_iov(struct request *rq,
61 struct rq_map_data *map_data, struct iov_iter *iter,
62 gfp_t gfp_mask, bool copy)
63{
64 struct request_queue *q = rq->q;
65 struct bio *bio, *orig_bio;
66 int ret;
67
68 if (copy)
69 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
70 else
71 bio = bio_map_user_iov(q, iter, gfp_mask);
72
73 if (IS_ERR(bio))
74 return PTR_ERR(bio);
75
76 if (map_data && map_data->null_mapped)
77 bio_set_flag(bio, BIO_NULL_MAPPED);
78
79 iov_iter_advance(iter, bio->bi_iter.bi_size);
80 if (map_data)
81 map_data->offset += bio->bi_iter.bi_size;
82
83 orig_bio = bio;
84 blk_queue_bounce(q, &bio);
85
86 /*
87 * We link the bounce buffer in and could have to traverse it
88 * later so we have to get a ref to prevent it from being freed
89 */
90 bio_get(bio);
91
92 ret = blk_rq_append_bio(q, rq, bio);
93 if (ret) {
94 bio_endio(bio);
95 __blk_rq_unmap_user(orig_bio);
96 bio_put(bio);
97 return ret;
98 }
99
100 return 0;
101}
102
60/** 103/**
61 * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage 104 * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
62 * @q: request queue where request should be inserted 105 * @q: request queue where request should be inserted
@@ -82,10 +125,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
82 struct rq_map_data *map_data, 125 struct rq_map_data *map_data,
83 const struct iov_iter *iter, gfp_t gfp_mask) 126 const struct iov_iter *iter, gfp_t gfp_mask)
84{ 127{
85 struct bio *bio;
86 int unaligned = 0;
87 struct iov_iter i;
88 struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0}; 128 struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0};
129 bool copy = (q->dma_pad_mask & iter->count) || map_data;
130 struct bio *bio = NULL;
131 struct iov_iter i;
132 int ret;
89 133
90 if (!iter || !iter->count) 134 if (!iter || !iter->count)
91 return -EINVAL; 135 return -EINVAL;
@@ -101,42 +145,29 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
101 */ 145 */
102 if ((uaddr & queue_dma_alignment(q)) || 146 if ((uaddr & queue_dma_alignment(q)) ||
103 iovec_gap_to_prv(q, &prv, &iov)) 147 iovec_gap_to_prv(q, &prv, &iov))
104 unaligned = 1; 148 copy = true;
105 149
106 prv.iov_base = iov.iov_base; 150 prv.iov_base = iov.iov_base;
107 prv.iov_len = iov.iov_len; 151 prv.iov_len = iov.iov_len;
108 } 152 }
109 153
110 if (unaligned || (q->dma_pad_mask & iter->count) || map_data) 154 i = *iter;
111 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); 155 do {
112 else 156 ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
113 bio = bio_map_user_iov(q, iter, gfp_mask); 157 if (ret)
114 158 goto unmap_rq;
115 if (IS_ERR(bio)) 159 if (!bio)
116 return PTR_ERR(bio); 160 bio = rq->bio;
117 161 } while (iov_iter_count(&i));
118 if (map_data && map_data->null_mapped)
119 bio_set_flag(bio, BIO_NULL_MAPPED);
120
121 if (bio->bi_iter.bi_size != iter->count) {
122 /*
123 * Grab an extra reference to this bio, as bio_unmap_user()
124 * expects to be able to drop it twice as it happens on the
125 * normal IO completion path
126 */
127 bio_get(bio);
128 bio_endio(bio);
129 __blk_rq_unmap_user(bio);
130 return -EINVAL;
131 }
132 162
133 if (!bio_flagged(bio, BIO_USER_MAPPED)) 163 if (!bio_flagged(bio, BIO_USER_MAPPED))
134 rq->cmd_flags |= REQ_COPY_USER; 164 rq->cmd_flags |= REQ_COPY_USER;
135
136 blk_queue_bounce(q, &bio);
137 bio_get(bio);
138 blk_rq_bio_prep(q, rq, bio);
139 return 0; 165 return 0;
166
167unmap_rq:
168 __blk_rq_unmap_user(bio);
169 rq->bio = NULL;
170 return -EINVAL;
140} 171}
141EXPORT_SYMBOL(blk_rq_map_user_iov); 172EXPORT_SYMBOL(blk_rq_map_user_iov);
142 173
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 888a7fec81f7..261353166dcf 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -304,7 +304,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
304 struct bio *nxt) 304 struct bio *nxt)
305{ 305{
306 struct bio_vec end_bv = { NULL }, nxt_bv; 306 struct bio_vec end_bv = { NULL }, nxt_bv;
307 struct bvec_iter iter;
308 307
309 if (!blk_queue_cluster(q)) 308 if (!blk_queue_cluster(q))
310 return 0; 309 return 0;
@@ -316,11 +315,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
316 if (!bio_has_data(bio)) 315 if (!bio_has_data(bio))
317 return 1; 316 return 1;
318 317
319 bio_for_each_segment(end_bv, bio, iter) 318 bio_get_last_bvec(bio, &end_bv);
320 if (end_bv.bv_len == iter.bi_size) 319 bio_get_first_bvec(nxt, &nxt_bv);
321 break;
322
323 nxt_bv = bio_iovec(nxt);
324 320
325 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 321 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
326 return 0; 322 return 0;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3cd921e6121e..03c46412fff4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -55,8 +55,9 @@ static void nvme_free_ns(struct kref *kref)
55 ns->disk->private_data = NULL; 55 ns->disk->private_data = NULL;
56 spin_unlock(&dev_list_lock); 56 spin_unlock(&dev_list_lock);
57 57
58 nvme_put_ctrl(ns->ctrl);
59 put_disk(ns->disk); 58 put_disk(ns->disk);
59 ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
60 nvme_put_ctrl(ns->ctrl);
60 kfree(ns); 61 kfree(ns);
61} 62}
62 63
@@ -183,7 +184,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
183 goto out_unmap; 184 goto out_unmap;
184 } 185 }
185 186
186 if (meta_buffer) { 187 if (meta_buffer && meta_len) {
187 struct bio_integrity_payload *bip; 188 struct bio_integrity_payload *bip;
188 189
189 meta = kmalloc(meta_len, GFP_KERNEL); 190 meta = kmalloc(meta_len, GFP_KERNEL);
@@ -373,6 +374,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
373 374
374 if (copy_from_user(&io, uio, sizeof(io))) 375 if (copy_from_user(&io, uio, sizeof(io)))
375 return -EFAULT; 376 return -EFAULT;
377 if (io.flags)
378 return -EINVAL;
376 379
377 switch (io.opcode) { 380 switch (io.opcode) {
378 case nvme_cmd_write: 381 case nvme_cmd_write:
@@ -424,6 +427,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
424 return -EACCES; 427 return -EACCES;
425 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 428 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
426 return -EFAULT; 429 return -EFAULT;
430 if (cmd.flags)
431 return -EINVAL;
427 432
428 memset(&c, 0, sizeof(c)); 433 memset(&c, 0, sizeof(c));
429 c.common.opcode = cmd.opcode; 434 c.common.opcode = cmd.opcode;
@@ -556,6 +561,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
556 u16 old_ms; 561 u16 old_ms;
557 unsigned short bs; 562 unsigned short bs;
558 563
564 if (test_bit(NVME_NS_DEAD, &ns->flags)) {
565 set_capacity(disk, 0);
566 return -ENODEV;
567 }
559 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { 568 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
560 dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", 569 dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
561 __func__, ns->ctrl->instance, ns->ns_id); 570 __func__, ns->ctrl->instance, ns->ns_id);
@@ -831,6 +840,23 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
831 return ret; 840 return ret;
832} 841}
833 842
843static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
844 struct request_queue *q)
845{
846 if (ctrl->max_hw_sectors) {
847 u32 max_segments =
848 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
849
850 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
851 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
852 }
853 if (ctrl->stripe_size)
854 blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
855 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
856 blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
857 blk_queue_virt_boundary(q, ctrl->page_size - 1);
858}
859
834/* 860/*
835 * Initialize the cached copies of the Identify data and various controller 861 * Initialize the cached copies of the Identify data and various controller
836 * register in our nvme_ctrl structure. This should be called as soon as 862 * register in our nvme_ctrl structure. This should be called as soon as
@@ -888,6 +914,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
888 } 914 }
889 } 915 }
890 916
917 nvme_set_queue_limits(ctrl, ctrl->admin_q);
918
891 kfree(id); 919 kfree(id);
892 return 0; 920 return 0;
893} 921}
@@ -1118,9 +1146,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1118 if (!ns) 1146 if (!ns)
1119 return; 1147 return;
1120 1148
1149 ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
1150 if (ns->instance < 0)
1151 goto out_free_ns;
1152
1121 ns->queue = blk_mq_init_queue(ctrl->tagset); 1153 ns->queue = blk_mq_init_queue(ctrl->tagset);
1122 if (IS_ERR(ns->queue)) 1154 if (IS_ERR(ns->queue))
1123 goto out_free_ns; 1155 goto out_release_instance;
1124 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); 1156 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
1125 ns->queue->queuedata = ns; 1157 ns->queue->queuedata = ns;
1126 ns->ctrl = ctrl; 1158 ns->ctrl = ctrl;
@@ -1134,17 +1166,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1134 ns->disk = disk; 1166 ns->disk = disk;
1135 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ 1167 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
1136 1168
1169
1137 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 1170 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
1138 if (ctrl->max_hw_sectors) { 1171 nvme_set_queue_limits(ctrl, ns->queue);
1139 blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
1140 blk_queue_max_segments(ns->queue,
1141 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
1142 }
1143 if (ctrl->stripe_size)
1144 blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
1145 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
1146 blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
1147 blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
1148 1172
1149 disk->major = nvme_major; 1173 disk->major = nvme_major;
1150 disk->first_minor = 0; 1174 disk->first_minor = 0;
@@ -1153,7 +1177,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1153 disk->queue = ns->queue; 1177 disk->queue = ns->queue;
1154 disk->driverfs_dev = ctrl->device; 1178 disk->driverfs_dev = ctrl->device;
1155 disk->flags = GENHD_FL_EXT_DEVT; 1179 disk->flags = GENHD_FL_EXT_DEVT;
1156 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid); 1180 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
1157 1181
1158 if (nvme_revalidate_disk(ns->disk)) 1182 if (nvme_revalidate_disk(ns->disk))
1159 goto out_free_disk; 1183 goto out_free_disk;
@@ -1173,40 +1197,29 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1173 kfree(disk); 1197 kfree(disk);
1174 out_free_queue: 1198 out_free_queue:
1175 blk_cleanup_queue(ns->queue); 1199 blk_cleanup_queue(ns->queue);
1200 out_release_instance:
1201 ida_simple_remove(&ctrl->ns_ida, ns->instance);
1176 out_free_ns: 1202 out_free_ns:
1177 kfree(ns); 1203 kfree(ns);
1178} 1204}
1179 1205
1180static void nvme_ns_remove(struct nvme_ns *ns) 1206static void nvme_ns_remove(struct nvme_ns *ns)
1181{ 1207{
1182 bool kill = nvme_io_incapable(ns->ctrl) && 1208 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
1183 !blk_queue_dying(ns->queue); 1209 return;
1184
1185 lockdep_assert_held(&ns->ctrl->namespaces_mutex);
1186
1187 if (kill) {
1188 blk_set_queue_dying(ns->queue);
1189 1210
1190 /*
1191 * The controller was shutdown first if we got here through
1192 * device removal. The shutdown may requeue outstanding
1193 * requests. These need to be aborted immediately so
1194 * del_gendisk doesn't block indefinitely for their completion.
1195 */
1196 blk_mq_abort_requeue_list(ns->queue);
1197 }
1198 if (ns->disk->flags & GENHD_FL_UP) { 1211 if (ns->disk->flags & GENHD_FL_UP) {
1199 if (blk_get_integrity(ns->disk)) 1212 if (blk_get_integrity(ns->disk))
1200 blk_integrity_unregister(ns->disk); 1213 blk_integrity_unregister(ns->disk);
1201 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 1214 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1202 &nvme_ns_attr_group); 1215 &nvme_ns_attr_group);
1203 del_gendisk(ns->disk); 1216 del_gendisk(ns->disk);
1204 }
1205 if (kill || !blk_queue_dying(ns->queue)) {
1206 blk_mq_abort_requeue_list(ns->queue); 1217 blk_mq_abort_requeue_list(ns->queue);
1207 blk_cleanup_queue(ns->queue); 1218 blk_cleanup_queue(ns->queue);
1208 } 1219 }
1220 mutex_lock(&ns->ctrl->namespaces_mutex);
1209 list_del_init(&ns->list); 1221 list_del_init(&ns->list);
1222 mutex_unlock(&ns->ctrl->namespaces_mutex);
1210 nvme_put_ns(ns); 1223 nvme_put_ns(ns);
1211} 1224}
1212 1225
@@ -1300,10 +1313,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
1300{ 1313{
1301 struct nvme_ns *ns, *next; 1314 struct nvme_ns *ns, *next;
1302 1315
1303 mutex_lock(&ctrl->namespaces_mutex);
1304 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) 1316 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
1305 nvme_ns_remove(ns); 1317 nvme_ns_remove(ns);
1306 mutex_unlock(&ctrl->namespaces_mutex);
1307} 1318}
1308 1319
1309static DEFINE_IDA(nvme_instance_ida); 1320static DEFINE_IDA(nvme_instance_ida);
@@ -1350,6 +1361,7 @@ static void nvme_free_ctrl(struct kref *kref)
1350 1361
1351 put_device(ctrl->device); 1362 put_device(ctrl->device);
1352 nvme_release_instance(ctrl); 1363 nvme_release_instance(ctrl);
1364 ida_destroy(&ctrl->ns_ida);
1353 1365
1354 ctrl->ops->free_ctrl(ctrl); 1366 ctrl->ops->free_ctrl(ctrl);
1355} 1367}
@@ -1390,6 +1402,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
1390 } 1402 }
1391 get_device(ctrl->device); 1403 get_device(ctrl->device);
1392 dev_set_drvdata(ctrl->device, ctrl); 1404 dev_set_drvdata(ctrl->device, ctrl);
1405 ida_init(&ctrl->ns_ida);
1393 1406
1394 spin_lock(&dev_list_lock); 1407 spin_lock(&dev_list_lock);
1395 list_add_tail(&ctrl->node, &nvme_ctrl_list); 1408 list_add_tail(&ctrl->node, &nvme_ctrl_list);
@@ -1402,6 +1415,38 @@ out:
1402 return ret; 1415 return ret;
1403} 1416}
1404 1417
1418/**
1419 * nvme_kill_queues(): Ends all namespace queues
1420 * @ctrl: the dead controller that needs to end
1421 *
1422 * Call this function when the driver determines it is unable to get the
1423 * controller in a state capable of servicing IO.
1424 */
1425void nvme_kill_queues(struct nvme_ctrl *ctrl)
1426{
1427 struct nvme_ns *ns;
1428
1429 mutex_lock(&ctrl->namespaces_mutex);
1430 list_for_each_entry(ns, &ctrl->namespaces, list) {
1431 if (!kref_get_unless_zero(&ns->kref))
1432 continue;
1433
1434 /*
1435 * Revalidating a dead namespace sets capacity to 0. This will
1436 * end buffered writers dirtying pages that can't be synced.
1437 */
1438 if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags))
1439 revalidate_disk(ns->disk);
1440
1441 blk_set_queue_dying(ns->queue);
1442 blk_mq_abort_requeue_list(ns->queue);
1443 blk_mq_start_stopped_hw_queues(ns->queue, true);
1444
1445 nvme_put_ns(ns);
1446 }
1447 mutex_unlock(&ctrl->namespaces_mutex);
1448}
1449
1405void nvme_stop_queues(struct nvme_ctrl *ctrl) 1450void nvme_stop_queues(struct nvme_ctrl *ctrl)
1406{ 1451{
1407 struct nvme_ns *ns; 1452 struct nvme_ns *ns;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9664d07d807d..fb15ba5f5d19 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -72,6 +72,7 @@ struct nvme_ctrl {
72 struct mutex namespaces_mutex; 72 struct mutex namespaces_mutex;
73 struct device *device; /* char device */ 73 struct device *device; /* char device */
74 struct list_head node; 74 struct list_head node;
75 struct ida ns_ida;
75 76
76 char name[12]; 77 char name[12];
77 char serial[20]; 78 char serial[20];
@@ -102,6 +103,7 @@ struct nvme_ns {
102 struct request_queue *queue; 103 struct request_queue *queue;
103 struct gendisk *disk; 104 struct gendisk *disk;
104 struct kref kref; 105 struct kref kref;
106 int instance;
105 107
106 u8 eui[8]; 108 u8 eui[8];
107 u8 uuid[16]; 109 u8 uuid[16];
@@ -112,6 +114,11 @@ struct nvme_ns {
112 bool ext; 114 bool ext;
113 u8 pi_type; 115 u8 pi_type;
114 int type; 116 int type;
117 unsigned long flags;
118
119#define NVME_NS_REMOVING 0
120#define NVME_NS_DEAD 1
121
115 u64 mode_select_num_blocks; 122 u64 mode_select_num_blocks;
116 u32 mode_select_block_len; 123 u32 mode_select_block_len;
117}; 124};
@@ -240,6 +247,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
240 247
241void nvme_stop_queues(struct nvme_ctrl *ctrl); 248void nvme_stop_queues(struct nvme_ctrl *ctrl);
242void nvme_start_queues(struct nvme_ctrl *ctrl); 249void nvme_start_queues(struct nvme_ctrl *ctrl);
250void nvme_kill_queues(struct nvme_ctrl *ctrl);
243 251
244struct request *nvme_alloc_request(struct request_queue *q, 252struct request *nvme_alloc_request(struct request_queue *q,
245 struct nvme_command *cmd, unsigned int flags); 253 struct nvme_command *cmd, unsigned int flags);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a128672472ec..680f5780750c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -86,7 +86,6 @@ struct nvme_queue;
86 86
87static int nvme_reset(struct nvme_dev *dev); 87static int nvme_reset(struct nvme_dev *dev);
88static void nvme_process_cq(struct nvme_queue *nvmeq); 88static void nvme_process_cq(struct nvme_queue *nvmeq);
89static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
90static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); 89static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
91 90
92/* 91/*
@@ -120,6 +119,7 @@ struct nvme_dev {
120 unsigned long flags; 119 unsigned long flags;
121 120
122#define NVME_CTRL_RESETTING 0 121#define NVME_CTRL_RESETTING 0
122#define NVME_CTRL_REMOVING 1
123 123
124 struct nvme_ctrl ctrl; 124 struct nvme_ctrl ctrl;
125 struct completion ioq_wait; 125 struct completion ioq_wait;
@@ -286,6 +286,17 @@ static int nvme_init_request(void *data, struct request *req,
286 return 0; 286 return 0;
287} 287}
288 288
289static void nvme_queue_scan(struct nvme_dev *dev)
290{
291 /*
292 * Do not queue new scan work when a controller is reset during
293 * removal.
294 */
295 if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
296 return;
297 queue_work(nvme_workq, &dev->scan_work);
298}
299
289static void nvme_complete_async_event(struct nvme_dev *dev, 300static void nvme_complete_async_event(struct nvme_dev *dev,
290 struct nvme_completion *cqe) 301 struct nvme_completion *cqe)
291{ 302{
@@ -300,7 +311,7 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
300 switch (result & 0xff07) { 311 switch (result & 0xff07) {
301 case NVME_AER_NOTICE_NS_CHANGED: 312 case NVME_AER_NOTICE_NS_CHANGED:
302 dev_info(dev->dev, "rescanning\n"); 313 dev_info(dev->dev, "rescanning\n");
303 queue_work(nvme_workq, &dev->scan_work); 314 nvme_queue_scan(dev);
304 default: 315 default:
305 dev_warn(dev->dev, "async event result %08x\n", result); 316 dev_warn(dev->dev, "async event result %08x\n", result);
306 } 317 }
@@ -679,7 +690,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
679 690
680 spin_lock_irq(&nvmeq->q_lock); 691 spin_lock_irq(&nvmeq->q_lock);
681 if (unlikely(nvmeq->cq_vector < 0)) { 692 if (unlikely(nvmeq->cq_vector < 0)) {
682 ret = BLK_MQ_RQ_QUEUE_BUSY; 693 if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
694 ret = BLK_MQ_RQ_QUEUE_BUSY;
695 else
696 ret = BLK_MQ_RQ_QUEUE_ERROR;
683 spin_unlock_irq(&nvmeq->q_lock); 697 spin_unlock_irq(&nvmeq->q_lock);
684 goto out; 698 goto out;
685 } 699 }
@@ -1250,6 +1264,12 @@ static struct blk_mq_ops nvme_mq_ops = {
1250static void nvme_dev_remove_admin(struct nvme_dev *dev) 1264static void nvme_dev_remove_admin(struct nvme_dev *dev)
1251{ 1265{
1252 if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { 1266 if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
1267 /*
1268 * If the controller was reset during removal, it's possible
1269 * user requests may be waiting on a stopped queue. Start the
1270 * queue to flush these to completion.
1271 */
1272 blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
1253 blk_cleanup_queue(dev->ctrl.admin_q); 1273 blk_cleanup_queue(dev->ctrl.admin_q);
1254 blk_mq_free_tag_set(&dev->admin_tagset); 1274 blk_mq_free_tag_set(&dev->admin_tagset);
1255 } 1275 }
@@ -1690,14 +1710,14 @@ static int nvme_dev_add(struct nvme_dev *dev)
1690 return 0; 1710 return 0;
1691 dev->ctrl.tagset = &dev->tagset; 1711 dev->ctrl.tagset = &dev->tagset;
1692 } 1712 }
1693 queue_work(nvme_workq, &dev->scan_work); 1713 nvme_queue_scan(dev);
1694 return 0; 1714 return 0;
1695} 1715}
1696 1716
1697static int nvme_dev_map(struct nvme_dev *dev) 1717static int nvme_pci_enable(struct nvme_dev *dev)
1698{ 1718{
1699 u64 cap; 1719 u64 cap;
1700 int bars, result = -ENOMEM; 1720 int result = -ENOMEM;
1701 struct pci_dev *pdev = to_pci_dev(dev->dev); 1721 struct pci_dev *pdev = to_pci_dev(dev->dev);
1702 1722
1703 if (pci_enable_device_mem(pdev)) 1723 if (pci_enable_device_mem(pdev))
@@ -1705,24 +1725,14 @@ static int nvme_dev_map(struct nvme_dev *dev)
1705 1725
1706 dev->entry[0].vector = pdev->irq; 1726 dev->entry[0].vector = pdev->irq;
1707 pci_set_master(pdev); 1727 pci_set_master(pdev);
1708 bars = pci_select_bars(pdev, IORESOURCE_MEM);
1709 if (!bars)
1710 goto disable_pci;
1711
1712 if (pci_request_selected_regions(pdev, bars, "nvme"))
1713 goto disable_pci;
1714 1728
1715 if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && 1729 if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
1716 dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) 1730 dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
1717 goto disable; 1731 goto disable;
1718 1732
1719 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
1720 if (!dev->bar)
1721 goto disable;
1722
1723 if (readl(dev->bar + NVME_REG_CSTS) == -1) { 1733 if (readl(dev->bar + NVME_REG_CSTS) == -1) {
1724 result = -ENODEV; 1734 result = -ENODEV;
1725 goto unmap; 1735 goto disable;
1726 } 1736 }
1727 1737
1728 /* 1738 /*
@@ -1732,7 +1742,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
1732 if (!pdev->irq) { 1742 if (!pdev->irq) {
1733 result = pci_enable_msix(pdev, dev->entry, 1); 1743 result = pci_enable_msix(pdev, dev->entry, 1);
1734 if (result < 0) 1744 if (result < 0)
1735 goto unmap; 1745 goto disable;
1736 } 1746 }
1737 1747
1738 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); 1748 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
@@ -1759,18 +1769,20 @@ static int nvme_dev_map(struct nvme_dev *dev)
1759 pci_save_state(pdev); 1769 pci_save_state(pdev);
1760 return 0; 1770 return 0;
1761 1771
1762 unmap:
1763 iounmap(dev->bar);
1764 dev->bar = NULL;
1765 disable: 1772 disable:
1766 pci_release_regions(pdev);
1767 disable_pci:
1768 pci_disable_device(pdev); 1773 pci_disable_device(pdev);
1769 return result; 1774 return result;
1770} 1775}
1771 1776
1772static void nvme_dev_unmap(struct nvme_dev *dev) 1777static void nvme_dev_unmap(struct nvme_dev *dev)
1773{ 1778{
1779 if (dev->bar)
1780 iounmap(dev->bar);
1781 pci_release_regions(to_pci_dev(dev->dev));
1782}
1783
1784static void nvme_pci_disable(struct nvme_dev *dev)
1785{
1774 struct pci_dev *pdev = to_pci_dev(dev->dev); 1786 struct pci_dev *pdev = to_pci_dev(dev->dev);
1775 1787
1776 if (pdev->msi_enabled) 1788 if (pdev->msi_enabled)
@@ -1778,12 +1790,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
1778 else if (pdev->msix_enabled) 1790 else if (pdev->msix_enabled)
1779 pci_disable_msix(pdev); 1791 pci_disable_msix(pdev);
1780 1792
1781 if (dev->bar) {
1782 iounmap(dev->bar);
1783 dev->bar = NULL;
1784 pci_release_regions(pdev);
1785 }
1786
1787 if (pci_is_enabled(pdev)) { 1793 if (pci_is_enabled(pdev)) {
1788 pci_disable_pcie_error_reporting(pdev); 1794 pci_disable_pcie_error_reporting(pdev);
1789 pci_disable_device(pdev); 1795 pci_disable_device(pdev);
@@ -1842,7 +1848,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1842 nvme_dev_list_remove(dev); 1848 nvme_dev_list_remove(dev);
1843 1849
1844 mutex_lock(&dev->shutdown_lock); 1850 mutex_lock(&dev->shutdown_lock);
1845 if (dev->bar) { 1851 if (pci_is_enabled(to_pci_dev(dev->dev))) {
1846 nvme_stop_queues(&dev->ctrl); 1852 nvme_stop_queues(&dev->ctrl);
1847 csts = readl(dev->bar + NVME_REG_CSTS); 1853 csts = readl(dev->bar + NVME_REG_CSTS);
1848 } 1854 }
@@ -1855,7 +1861,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1855 nvme_disable_io_queues(dev); 1861 nvme_disable_io_queues(dev);
1856 nvme_disable_admin_queue(dev, shutdown); 1862 nvme_disable_admin_queue(dev, shutdown);
1857 } 1863 }
1858 nvme_dev_unmap(dev); 1864 nvme_pci_disable(dev);
1859 1865
1860 for (i = dev->queue_count - 1; i >= 0; i--) 1866 for (i = dev->queue_count - 1; i >= 0; i--)
1861 nvme_clear_queue(dev->queues[i]); 1867 nvme_clear_queue(dev->queues[i]);
@@ -1899,10 +1905,20 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
1899 kfree(dev); 1905 kfree(dev);
1900} 1906}
1901 1907
1908static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
1909{
1910 dev_warn(dev->dev, "Removing after probe failure status: %d\n", status);
1911
1912 kref_get(&dev->ctrl.kref);
1913 nvme_dev_disable(dev, false);
1914 if (!schedule_work(&dev->remove_work))
1915 nvme_put_ctrl(&dev->ctrl);
1916}
1917
1902static void nvme_reset_work(struct work_struct *work) 1918static void nvme_reset_work(struct work_struct *work)
1903{ 1919{
1904 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); 1920 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
1905 int result; 1921 int result = -ENODEV;
1906 1922
1907 if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags))) 1923 if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
1908 goto out; 1924 goto out;
@@ -1911,37 +1927,37 @@ static void nvme_reset_work(struct work_struct *work)
1911 * If we're called to reset a live controller first shut it down before 1927 * If we're called to reset a live controller first shut it down before
1912 * moving on. 1928 * moving on.
1913 */ 1929 */
1914 if (dev->bar) 1930 if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
1915 nvme_dev_disable(dev, false); 1931 nvme_dev_disable(dev, false);
1916 1932
1917 set_bit(NVME_CTRL_RESETTING, &dev->flags); 1933 set_bit(NVME_CTRL_RESETTING, &dev->flags);
1918 1934
1919 result = nvme_dev_map(dev); 1935 result = nvme_pci_enable(dev);
1920 if (result) 1936 if (result)
1921 goto out; 1937 goto out;
1922 1938
1923 result = nvme_configure_admin_queue(dev); 1939 result = nvme_configure_admin_queue(dev);
1924 if (result) 1940 if (result)
1925 goto unmap; 1941 goto out;
1926 1942
1927 nvme_init_queue(dev->queues[0], 0); 1943 nvme_init_queue(dev->queues[0], 0);
1928 result = nvme_alloc_admin_tags(dev); 1944 result = nvme_alloc_admin_tags(dev);
1929 if (result) 1945 if (result)
1930 goto disable; 1946 goto out;
1931 1947
1932 result = nvme_init_identify(&dev->ctrl); 1948 result = nvme_init_identify(&dev->ctrl);
1933 if (result) 1949 if (result)
1934 goto free_tags; 1950 goto out;
1935 1951
1936 result = nvme_setup_io_queues(dev); 1952 result = nvme_setup_io_queues(dev);
1937 if (result) 1953 if (result)
1938 goto free_tags; 1954 goto out;
1939 1955
1940 dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; 1956 dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
1941 1957
1942 result = nvme_dev_list_add(dev); 1958 result = nvme_dev_list_add(dev);
1943 if (result) 1959 if (result)
1944 goto remove; 1960 goto out;
1945 1961
1946 /* 1962 /*
1947 * Keep the controller around but remove all namespaces if we don't have 1963 * Keep the controller around but remove all namespaces if we don't have
@@ -1958,19 +1974,8 @@ static void nvme_reset_work(struct work_struct *work)
1958 clear_bit(NVME_CTRL_RESETTING, &dev->flags); 1974 clear_bit(NVME_CTRL_RESETTING, &dev->flags);
1959 return; 1975 return;
1960 1976
1961 remove:
1962 nvme_dev_list_remove(dev);
1963 free_tags:
1964 nvme_dev_remove_admin(dev);
1965 blk_put_queue(dev->ctrl.admin_q);
1966 dev->ctrl.admin_q = NULL;
1967 dev->queues[0]->tags = NULL;
1968 disable:
1969 nvme_disable_admin_queue(dev, false);
1970 unmap:
1971 nvme_dev_unmap(dev);
1972 out: 1977 out:
1973 nvme_remove_dead_ctrl(dev); 1978 nvme_remove_dead_ctrl(dev, result);
1974} 1979}
1975 1980
1976static void nvme_remove_dead_ctrl_work(struct work_struct *work) 1981static void nvme_remove_dead_ctrl_work(struct work_struct *work)
@@ -1978,19 +1983,12 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
1978 struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); 1983 struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
1979 struct pci_dev *pdev = to_pci_dev(dev->dev); 1984 struct pci_dev *pdev = to_pci_dev(dev->dev);
1980 1985
1986 nvme_kill_queues(&dev->ctrl);
1981 if (pci_get_drvdata(pdev)) 1987 if (pci_get_drvdata(pdev))
1982 pci_stop_and_remove_bus_device_locked(pdev); 1988 pci_stop_and_remove_bus_device_locked(pdev);
1983 nvme_put_ctrl(&dev->ctrl); 1989 nvme_put_ctrl(&dev->ctrl);
1984} 1990}
1985 1991
1986static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
1987{
1988 dev_warn(dev->dev, "Removing after probe failure\n");
1989 kref_get(&dev->ctrl.kref);
1990 if (!schedule_work(&dev->remove_work))
1991 nvme_put_ctrl(&dev->ctrl);
1992}
1993
1994static int nvme_reset(struct nvme_dev *dev) 1992static int nvme_reset(struct nvme_dev *dev)
1995{ 1993{
1996 if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) 1994 if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
@@ -2042,6 +2040,27 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
2042 .free_ctrl = nvme_pci_free_ctrl, 2040 .free_ctrl = nvme_pci_free_ctrl,
2043}; 2041};
2044 2042
2043static int nvme_dev_map(struct nvme_dev *dev)
2044{
2045 int bars;
2046 struct pci_dev *pdev = to_pci_dev(dev->dev);
2047
2048 bars = pci_select_bars(pdev, IORESOURCE_MEM);
2049 if (!bars)
2050 return -ENODEV;
2051 if (pci_request_selected_regions(pdev, bars, "nvme"))
2052 return -ENODEV;
2053
2054 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
2055 if (!dev->bar)
2056 goto release;
2057
2058 return 0;
2059 release:
2060 pci_release_regions(pdev);
2061 return -ENODEV;
2062}
2063
2045static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) 2064static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2046{ 2065{
2047 int node, result = -ENOMEM; 2066 int node, result = -ENOMEM;
@@ -2066,6 +2085,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2066 dev->dev = get_device(&pdev->dev); 2085 dev->dev = get_device(&pdev->dev);
2067 pci_set_drvdata(pdev, dev); 2086 pci_set_drvdata(pdev, dev);
2068 2087
2088 result = nvme_dev_map(dev);
2089 if (result)
2090 goto free;
2091
2069 INIT_LIST_HEAD(&dev->node); 2092 INIT_LIST_HEAD(&dev->node);
2070 INIT_WORK(&dev->scan_work, nvme_dev_scan); 2093 INIT_WORK(&dev->scan_work, nvme_dev_scan);
2071 INIT_WORK(&dev->reset_work, nvme_reset_work); 2094 INIT_WORK(&dev->reset_work, nvme_reset_work);
@@ -2089,6 +2112,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2089 nvme_release_prp_pools(dev); 2112 nvme_release_prp_pools(dev);
2090 put_pci: 2113 put_pci:
2091 put_device(dev->dev); 2114 put_device(dev->dev);
2115 nvme_dev_unmap(dev);
2092 free: 2116 free:
2093 kfree(dev->queues); 2117 kfree(dev->queues);
2094 kfree(dev->entry); 2118 kfree(dev->entry);
@@ -2112,10 +2136,16 @@ static void nvme_shutdown(struct pci_dev *pdev)
2112 nvme_dev_disable(dev, true); 2136 nvme_dev_disable(dev, true);
2113} 2137}
2114 2138
2139/*
2140 * The driver's remove may be called on a device in a partially initialized
2141 * state. This function must not have any dependencies on the device state in
2142 * order to proceed.
2143 */
2115static void nvme_remove(struct pci_dev *pdev) 2144static void nvme_remove(struct pci_dev *pdev)
2116{ 2145{
2117 struct nvme_dev *dev = pci_get_drvdata(pdev); 2146 struct nvme_dev *dev = pci_get_drvdata(pdev);
2118 2147
2148 set_bit(NVME_CTRL_REMOVING, &dev->flags);
2119 pci_set_drvdata(pdev, NULL); 2149 pci_set_drvdata(pdev, NULL);
2120 flush_work(&dev->scan_work); 2150 flush_work(&dev->scan_work);
2121 nvme_remove_namespaces(&dev->ctrl); 2151 nvme_remove_namespaces(&dev->ctrl);
@@ -2126,6 +2156,7 @@ static void nvme_remove(struct pci_dev *pdev)
2126 nvme_free_queues(dev, 0); 2156 nvme_free_queues(dev, 0);
2127 nvme_release_cmb(dev); 2157 nvme_release_cmb(dev);
2128 nvme_release_prp_pools(dev); 2158 nvme_release_prp_pools(dev);
2159 nvme_dev_unmap(dev);
2129 nvme_put_ctrl(&dev->ctrl); 2160 nvme_put_ctrl(&dev->ctrl);
2130} 2161}
2131 2162
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1f76d8950a57..5c46ed9f3e14 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) 223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
224 /* one round can affect upto 5 slots */ 224 /* one round can affect upto 5 slots */
225 225
226static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
227static struct workqueue_struct *isw_wq;
228
226void __inode_attach_wb(struct inode *inode, struct page *page) 229void __inode_attach_wb(struct inode *inode, struct page *page)
227{ 230{
228 struct backing_dev_info *bdi = inode_to_bdi(inode); 231 struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
317 struct inode_switch_wbs_context *isw = 320 struct inode_switch_wbs_context *isw =
318 container_of(work, struct inode_switch_wbs_context, work); 321 container_of(work, struct inode_switch_wbs_context, work);
319 struct inode *inode = isw->inode; 322 struct inode *inode = isw->inode;
320 struct super_block *sb = inode->i_sb;
321 struct address_space *mapping = inode->i_mapping; 323 struct address_space *mapping = inode->i_mapping;
322 struct bdi_writeback *old_wb = inode->i_wb; 324 struct bdi_writeback *old_wb = inode->i_wb;
323 struct bdi_writeback *new_wb = isw->new_wb; 325 struct bdi_writeback *new_wb = isw->new_wb;
@@ -424,8 +426,9 @@ skip_switch:
424 wb_put(new_wb); 426 wb_put(new_wb);
425 427
426 iput(inode); 428 iput(inode);
427 deactivate_super(sb);
428 kfree(isw); 429 kfree(isw);
430
431 atomic_dec(&isw_nr_in_flight);
429} 432}
430 433
431static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) 434static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
435 438
436 /* needs to grab bh-unsafe locks, bounce to work item */ 439 /* needs to grab bh-unsafe locks, bounce to work item */
437 INIT_WORK(&isw->work, inode_switch_wbs_work_fn); 440 INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
438 schedule_work(&isw->work); 441 queue_work(isw_wq, &isw->work);
439} 442}
440 443
441/** 444/**
@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
471 474
472 /* while holding I_WB_SWITCH, no one else can update the association */ 475 /* while holding I_WB_SWITCH, no one else can update the association */
473 spin_lock(&inode->i_lock); 476 spin_lock(&inode->i_lock);
474 477 if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
475 if (inode->i_state & (I_WB_SWITCH | I_FREEING) || 478 inode->i_state & (I_WB_SWITCH | I_FREEING) ||
476 inode_to_wb(inode) == isw->new_wb) 479 inode_to_wb(inode) == isw->new_wb) {
477 goto out_unlock; 480 spin_unlock(&inode->i_lock);
478 481 goto out_free;
479 if (!atomic_inc_not_zero(&inode->i_sb->s_active)) 482 }
480 goto out_unlock;
481
482 inode->i_state |= I_WB_SWITCH; 483 inode->i_state |= I_WB_SWITCH;
483 spin_unlock(&inode->i_lock); 484 spin_unlock(&inode->i_lock);
484 485
485 ihold(inode); 486 ihold(inode);
486 isw->inode = inode; 487 isw->inode = inode;
487 488
489 atomic_inc(&isw_nr_in_flight);
490
488 /* 491 /*
489 * In addition to synchronizing among switchers, I_WB_SWITCH tells 492 * In addition to synchronizing among switchers, I_WB_SWITCH tells
490 * the RCU protected stat update paths to grab the mapping's 493 * the RCU protected stat update paths to grab the mapping's
@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
494 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); 497 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
495 return; 498 return;
496 499
497out_unlock:
498 spin_unlock(&inode->i_lock);
499out_free: 500out_free:
500 if (isw->new_wb) 501 if (isw->new_wb)
501 wb_put(isw->new_wb); 502 wb_put(isw->new_wb);
@@ -847,6 +848,33 @@ restart:
847 wb_put(last_wb); 848 wb_put(last_wb);
848} 849}
849 850
851/**
852 * cgroup_writeback_umount - flush inode wb switches for umount
853 *
854 * This function is called when a super_block is about to be destroyed and
855 * flushes in-flight inode wb switches. An inode wb switch goes through
856 * RCU and then workqueue, so the two need to be flushed in order to ensure
857 * that all previously scheduled switches are finished. As wb switches are
858 * rare occurrences and synchronize_rcu() can take a while, perform
859 * flushing iff wb switches are in flight.
860 */
861void cgroup_writeback_umount(void)
862{
863 if (atomic_read(&isw_nr_in_flight)) {
864 synchronize_rcu();
865 flush_workqueue(isw_wq);
866 }
867}
868
869static int __init cgroup_writeback_init(void)
870{
871 isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
872 if (!isw_wq)
873 return -ENOMEM;
874 return 0;
875}
876fs_initcall(cgroup_writeback_init);
877
850#else /* CONFIG_CGROUP_WRITEBACK */ 878#else /* CONFIG_CGROUP_WRITEBACK */
851 879
852static struct bdi_writeback * 880static struct bdi_writeback *
diff --git a/fs/super.c b/fs/super.c
index 1182af8fd5ff..74914b1bae70 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb)
415 sb->s_flags &= ~MS_ACTIVE; 415 sb->s_flags &= ~MS_ACTIVE;
416 416
417 fsnotify_unmount_inodes(sb); 417 fsnotify_unmount_inodes(sb);
418 cgroup_writeback_umount();
418 419
419 evict_inodes(sb); 420 evict_inodes(sb);
420 421
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5349e6816cbb..cb6888824108 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
310 bio->bi_flags &= ~(1U << bit); 310 bio->bi_flags &= ~(1U << bit);
311} 311}
312 312
313static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
314{
315 *bv = bio_iovec(bio);
316}
317
318static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
319{
320 struct bvec_iter iter = bio->bi_iter;
321 int idx;
322
323 if (!bio_flagged(bio, BIO_CLONED)) {
324 *bv = bio->bi_io_vec[bio->bi_vcnt - 1];
325 return;
326 }
327
328 if (unlikely(!bio_multiple_segments(bio))) {
329 *bv = bio_iovec(bio);
330 return;
331 }
332
333 bio_advance_iter(bio, &iter, iter.bi_size);
334
335 if (!iter.bi_bvec_done)
336 idx = iter.bi_idx - 1;
337 else /* in the middle of bvec */
338 idx = iter.bi_idx;
339
340 *bv = bio->bi_io_vec[idx];
341
342 /*
343 * iter.bi_bvec_done records actual length of the last bvec
344 * if this bio ends in the middle of one io vector
345 */
346 if (iter.bi_bvec_done)
347 bv->bv_len = iter.bi_bvec_done;
348}
349
313enum bip_flags { 350enum bip_flags {
314 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ 351 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
315 BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ 352 BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4571ef1a12a9..413c84fbc4ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -895,7 +895,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
895{ 895{
896 struct request_queue *q = rq->q; 896 struct request_queue *q = rq->q;
897 897
898 if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) 898 if (unlikely(rq->cmd_type != REQ_TYPE_FS))
899 return q->limits.max_hw_sectors; 899 return q->limits.max_hw_sectors;
900 900
901 if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) 901 if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
@@ -1372,6 +1372,13 @@ static inline void put_dev_sector(Sector p)
1372 page_cache_release(p.v); 1372 page_cache_release(p.v);
1373} 1373}
1374 1374
1375static inline bool __bvec_gap_to_prev(struct request_queue *q,
1376 struct bio_vec *bprv, unsigned int offset)
1377{
1378 return offset ||
1379 ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
1380}
1381
1375/* 1382/*
1376 * Check if adding a bio_vec after bprv with offset would create a gap in 1383 * Check if adding a bio_vec after bprv with offset would create a gap in
1377 * the SG list. Most drivers don't care about this, but some do. 1384 * the SG list. Most drivers don't care about this, but some do.
@@ -1381,18 +1388,22 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
1381{ 1388{
1382 if (!queue_virt_boundary(q)) 1389 if (!queue_virt_boundary(q))
1383 return false; 1390 return false;
1384 return offset || 1391 return __bvec_gap_to_prev(q, bprv, offset);
1385 ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
1386} 1392}
1387 1393
1388static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, 1394static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
1389 struct bio *next) 1395 struct bio *next)
1390{ 1396{
1391 if (!bio_has_data(prev)) 1397 if (bio_has_data(prev) && queue_virt_boundary(q)) {
1392 return false; 1398 struct bio_vec pb, nb;
1399
1400 bio_get_last_bvec(prev, &pb);
1401 bio_get_first_bvec(next, &nb);
1393 1402
1394 return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], 1403 return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
1395 next->bi_io_vec[0].bv_offset); 1404 }
1405
1406 return false;
1396} 1407}
1397 1408
1398static inline bool req_gap_back_merge(struct request *req, struct bio *bio) 1409static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b333c945e571..d0b5ca5d4e08 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
198void wbc_detach_inode(struct writeback_control *wbc); 198void wbc_detach_inode(struct writeback_control *wbc);
199void wbc_account_io(struct writeback_control *wbc, struct page *page, 199void wbc_account_io(struct writeback_control *wbc, struct page *page,
200 size_t bytes); 200 size_t bytes);
201void cgroup_writeback_umount(void);
201 202
202/** 203/**
203 * inode_attach_wb - associate an inode with its wb 204 * inode_attach_wb - associate an inode with its wb
@@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc,
301{ 302{
302} 303}
303 304
305static inline void cgroup_writeback_umount(void)
306{
307}
308
304#endif /* CONFIG_CGROUP_WRITEBACK */ 309#endif /* CONFIG_CGROUP_WRITEBACK */
305 310
306/* 311/*