diff options
Diffstat (limited to 'drivers/nvdimm/blk.c')
-rw-r--r-- | drivers/nvdimm/blk.c | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c new file mode 100644 index 000000000000..4f97b248c236 --- /dev/null +++ b/drivers/nvdimm/blk.c | |||
@@ -0,0 +1,384 @@ | |||
1 | /* | ||
2 | * NVDIMM Block Window Driver | ||
3 | * Copyright (c) 2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/blkdev.h> | ||
16 | #include <linux/fs.h> | ||
17 | #include <linux/genhd.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/moduleparam.h> | ||
20 | #include <linux/nd.h> | ||
21 | #include <linux/sizes.h> | ||
22 | #include "nd.h" | ||
23 | |||
24 | struct nd_blk_device { | ||
25 | struct request_queue *queue; | ||
26 | struct gendisk *disk; | ||
27 | struct nd_namespace_blk *nsblk; | ||
28 | struct nd_blk_region *ndbr; | ||
29 | size_t disk_size; | ||
30 | u32 sector_size; | ||
31 | u32 internal_lbasize; | ||
32 | }; | ||
33 | |||
34 | static int nd_blk_major; | ||
35 | |||
36 | static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) | ||
37 | { | ||
38 | return blk_dev->nsblk->lbasize - blk_dev->sector_size; | ||
39 | } | ||
40 | |||
41 | static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, | ||
42 | resource_size_t ns_offset, unsigned int len) | ||
43 | { | ||
44 | int i; | ||
45 | |||
46 | for (i = 0; i < nsblk->num_resources; i++) { | ||
47 | if (ns_offset < resource_size(nsblk->res[i])) { | ||
48 | if (ns_offset + len > resource_size(nsblk->res[i])) { | ||
49 | dev_WARN_ONCE(&nsblk->common.dev, 1, | ||
50 | "illegal request\n"); | ||
51 | return SIZE_MAX; | ||
52 | } | ||
53 | return nsblk->res[i]->start + ns_offset; | ||
54 | } | ||
55 | ns_offset -= resource_size(nsblk->res[i]); | ||
56 | } | ||
57 | |||
58 | dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n"); | ||
59 | return SIZE_MAX; | ||
60 | } | ||
61 | |||
62 | #ifdef CONFIG_BLK_DEV_INTEGRITY | ||
63 | static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, | ||
64 | struct bio_integrity_payload *bip, u64 lba, | ||
65 | int rw) | ||
66 | { | ||
67 | unsigned int len = nd_blk_meta_size(blk_dev); | ||
68 | resource_size_t dev_offset, ns_offset; | ||
69 | struct nd_namespace_blk *nsblk; | ||
70 | struct nd_blk_region *ndbr; | ||
71 | int err = 0; | ||
72 | |||
73 | nsblk = blk_dev->nsblk; | ||
74 | ndbr = blk_dev->ndbr; | ||
75 | ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; | ||
76 | dev_offset = to_dev_offset(nsblk, ns_offset, len); | ||
77 | if (dev_offset == SIZE_MAX) | ||
78 | return -EIO; | ||
79 | |||
80 | while (len) { | ||
81 | unsigned int cur_len; | ||
82 | struct bio_vec bv; | ||
83 | void *iobuf; | ||
84 | |||
85 | bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); | ||
86 | /* | ||
87 | * The 'bv' obtained from bvec_iter_bvec has its .bv_len and | ||
88 | * .bv_offset already adjusted for iter->bi_bvec_done, and we | ||
89 | * can use those directly | ||
90 | */ | ||
91 | |||
92 | cur_len = min(len, bv.bv_len); | ||
93 | iobuf = kmap_atomic(bv.bv_page); | ||
94 | err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, | ||
95 | cur_len, rw); | ||
96 | kunmap_atomic(iobuf); | ||
97 | if (err) | ||
98 | return err; | ||
99 | |||
100 | len -= cur_len; | ||
101 | dev_offset += cur_len; | ||
102 | bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); | ||
103 | } | ||
104 | |||
105 | return err; | ||
106 | } | ||
107 | |||
108 | #else /* CONFIG_BLK_DEV_INTEGRITY */ | ||
109 | static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, | ||
110 | struct bio_integrity_payload *bip, u64 lba, | ||
111 | int rw) | ||
112 | { | ||
113 | return 0; | ||
114 | } | ||
115 | #endif | ||
116 | |||
117 | static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, | ||
118 | struct bio_integrity_payload *bip, struct page *page, | ||
119 | unsigned int len, unsigned int off, int rw, | ||
120 | sector_t sector) | ||
121 | { | ||
122 | struct nd_blk_region *ndbr = blk_dev->ndbr; | ||
123 | resource_size_t dev_offset, ns_offset; | ||
124 | int err = 0; | ||
125 | void *iobuf; | ||
126 | u64 lba; | ||
127 | |||
128 | while (len) { | ||
129 | unsigned int cur_len; | ||
130 | |||
131 | /* | ||
132 | * If we don't have an integrity payload, we don't have to | ||
133 | * split the bvec into sectors, as this would cause unnecessary | ||
134 | * Block Window setup/move steps. the do_io routine is capable | ||
135 | * of handling len <= PAGE_SIZE. | ||
136 | */ | ||
137 | cur_len = bip ? min(len, blk_dev->sector_size) : len; | ||
138 | |||
139 | lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); | ||
140 | ns_offset = lba * blk_dev->internal_lbasize; | ||
141 | dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); | ||
142 | if (dev_offset == SIZE_MAX) | ||
143 | return -EIO; | ||
144 | |||
145 | iobuf = kmap_atomic(page); | ||
146 | err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw); | ||
147 | kunmap_atomic(iobuf); | ||
148 | if (err) | ||
149 | return err; | ||
150 | |||
151 | if (bip) { | ||
152 | err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); | ||
153 | if (err) | ||
154 | return err; | ||
155 | } | ||
156 | len -= cur_len; | ||
157 | off += cur_len; | ||
158 | sector += blk_dev->sector_size >> SECTOR_SHIFT; | ||
159 | } | ||
160 | |||
161 | return err; | ||
162 | } | ||
163 | |||
164 | static void nd_blk_make_request(struct request_queue *q, struct bio *bio) | ||
165 | { | ||
166 | struct block_device *bdev = bio->bi_bdev; | ||
167 | struct gendisk *disk = bdev->bd_disk; | ||
168 | struct bio_integrity_payload *bip; | ||
169 | struct nd_blk_device *blk_dev; | ||
170 | struct bvec_iter iter; | ||
171 | unsigned long start; | ||
172 | struct bio_vec bvec; | ||
173 | int err = 0, rw; | ||
174 | bool do_acct; | ||
175 | |||
176 | /* | ||
177 | * bio_integrity_enabled also checks if the bio already has an | ||
178 | * integrity payload attached. If it does, we *don't* do a | ||
179 | * bio_integrity_prep here - the payload has been generated by | ||
180 | * another kernel subsystem, and we just pass it through. | ||
181 | */ | ||
182 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | ||
183 | err = -EIO; | ||
184 | goto out; | ||
185 | } | ||
186 | |||
187 | bip = bio_integrity(bio); | ||
188 | blk_dev = disk->private_data; | ||
189 | rw = bio_data_dir(bio); | ||
190 | do_acct = nd_iostat_start(bio, &start); | ||
191 | bio_for_each_segment(bvec, bio, iter) { | ||
192 | unsigned int len = bvec.bv_len; | ||
193 | |||
194 | BUG_ON(len > PAGE_SIZE); | ||
195 | err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, | ||
196 | bvec.bv_offset, rw, iter.bi_sector); | ||
197 | if (err) { | ||
198 | dev_info(&blk_dev->nsblk->common.dev, | ||
199 | "io error in %s sector %lld, len %d,\n", | ||
200 | (rw == READ) ? "READ" : "WRITE", | ||
201 | (unsigned long long) iter.bi_sector, len); | ||
202 | break; | ||
203 | } | ||
204 | } | ||
205 | if (do_acct) | ||
206 | nd_iostat_end(bio, start); | ||
207 | |||
208 | out: | ||
209 | bio_endio(bio, err); | ||
210 | } | ||
211 | |||
212 | static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, | ||
213 | resource_size_t offset, void *iobuf, size_t n, int rw) | ||
214 | { | ||
215 | struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); | ||
216 | struct nd_namespace_blk *nsblk = blk_dev->nsblk; | ||
217 | struct nd_blk_region *ndbr = blk_dev->ndbr; | ||
218 | resource_size_t dev_offset; | ||
219 | |||
220 | dev_offset = to_dev_offset(nsblk, offset, n); | ||
221 | |||
222 | if (unlikely(offset + n > blk_dev->disk_size)) { | ||
223 | dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); | ||
224 | return -EFAULT; | ||
225 | } | ||
226 | |||
227 | if (dev_offset == SIZE_MAX) | ||
228 | return -EIO; | ||
229 | |||
230 | return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw); | ||
231 | } | ||
232 | |||
233 | static const struct block_device_operations nd_blk_fops = { | ||
234 | .owner = THIS_MODULE, | ||
235 | .revalidate_disk = nvdimm_revalidate_disk, | ||
236 | }; | ||
237 | |||
238 | static int nd_blk_attach_disk(struct nd_namespace_common *ndns, | ||
239 | struct nd_blk_device *blk_dev) | ||
240 | { | ||
241 | resource_size_t available_disk_size; | ||
242 | struct gendisk *disk; | ||
243 | u64 internal_nlba; | ||
244 | |||
245 | internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); | ||
246 | available_disk_size = internal_nlba * blk_dev->sector_size; | ||
247 | |||
248 | blk_dev->queue = blk_alloc_queue(GFP_KERNEL); | ||
249 | if (!blk_dev->queue) | ||
250 | return -ENOMEM; | ||
251 | |||
252 | blk_queue_make_request(blk_dev->queue, nd_blk_make_request); | ||
253 | blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); | ||
254 | blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); | ||
255 | blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); | ||
256 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); | ||
257 | |||
258 | disk = blk_dev->disk = alloc_disk(0); | ||
259 | if (!disk) { | ||
260 | blk_cleanup_queue(blk_dev->queue); | ||
261 | return -ENOMEM; | ||
262 | } | ||
263 | |||
264 | disk->driverfs_dev = &ndns->dev; | ||
265 | disk->major = nd_blk_major; | ||
266 | disk->first_minor = 0; | ||
267 | disk->fops = &nd_blk_fops; | ||
268 | disk->private_data = blk_dev; | ||
269 | disk->queue = blk_dev->queue; | ||
270 | disk->flags = GENHD_FL_EXT_DEVT; | ||
271 | nvdimm_namespace_disk_name(ndns, disk->disk_name); | ||
272 | set_capacity(disk, 0); | ||
273 | add_disk(disk); | ||
274 | |||
275 | if (nd_blk_meta_size(blk_dev)) { | ||
276 | int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); | ||
277 | |||
278 | if (rc) { | ||
279 | del_gendisk(disk); | ||
280 | put_disk(disk); | ||
281 | blk_cleanup_queue(blk_dev->queue); | ||
282 | return rc; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | set_capacity(disk, available_disk_size >> SECTOR_SHIFT); | ||
287 | revalidate_disk(disk); | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static int nd_blk_probe(struct device *dev) | ||
292 | { | ||
293 | struct nd_namespace_common *ndns; | ||
294 | struct nd_namespace_blk *nsblk; | ||
295 | struct nd_blk_device *blk_dev; | ||
296 | int rc; | ||
297 | |||
298 | ndns = nvdimm_namespace_common_probe(dev); | ||
299 | if (IS_ERR(ndns)) | ||
300 | return PTR_ERR(ndns); | ||
301 | |||
302 | blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL); | ||
303 | if (!blk_dev) | ||
304 | return -ENOMEM; | ||
305 | |||
306 | nsblk = to_nd_namespace_blk(&ndns->dev); | ||
307 | blk_dev->disk_size = nvdimm_namespace_capacity(ndns); | ||
308 | blk_dev->ndbr = to_nd_blk_region(dev->parent); | ||
309 | blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); | ||
310 | blk_dev->internal_lbasize = roundup(nsblk->lbasize, | ||
311 | INT_LBASIZE_ALIGNMENT); | ||
312 | blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); | ||
313 | dev_set_drvdata(dev, blk_dev); | ||
314 | |||
315 | ndns->rw_bytes = nd_blk_rw_bytes; | ||
316 | if (is_nd_btt(dev)) | ||
317 | rc = nvdimm_namespace_attach_btt(ndns); | ||
318 | else if (nd_btt_probe(ndns, blk_dev) == 0) { | ||
319 | /* we'll come back as btt-blk */ | ||
320 | rc = -ENXIO; | ||
321 | } else | ||
322 | rc = nd_blk_attach_disk(ndns, blk_dev); | ||
323 | if (rc) | ||
324 | kfree(blk_dev); | ||
325 | return rc; | ||
326 | } | ||
327 | |||
328 | static void nd_blk_detach_disk(struct nd_blk_device *blk_dev) | ||
329 | { | ||
330 | del_gendisk(blk_dev->disk); | ||
331 | put_disk(blk_dev->disk); | ||
332 | blk_cleanup_queue(blk_dev->queue); | ||
333 | } | ||
334 | |||
335 | static int nd_blk_remove(struct device *dev) | ||
336 | { | ||
337 | struct nd_blk_device *blk_dev = dev_get_drvdata(dev); | ||
338 | |||
339 | if (is_nd_btt(dev)) | ||
340 | nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); | ||
341 | else | ||
342 | nd_blk_detach_disk(blk_dev); | ||
343 | kfree(blk_dev); | ||
344 | |||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | static struct nd_device_driver nd_blk_driver = { | ||
349 | .probe = nd_blk_probe, | ||
350 | .remove = nd_blk_remove, | ||
351 | .drv = { | ||
352 | .name = "nd_blk", | ||
353 | }, | ||
354 | .type = ND_DRIVER_NAMESPACE_BLK, | ||
355 | }; | ||
356 | |||
357 | static int __init nd_blk_init(void) | ||
358 | { | ||
359 | int rc; | ||
360 | |||
361 | rc = register_blkdev(0, "nd_blk"); | ||
362 | if (rc < 0) | ||
363 | return rc; | ||
364 | |||
365 | nd_blk_major = rc; | ||
366 | rc = nd_driver_register(&nd_blk_driver); | ||
367 | |||
368 | if (rc < 0) | ||
369 | unregister_blkdev(nd_blk_major, "nd_blk"); | ||
370 | |||
371 | return rc; | ||
372 | } | ||
373 | |||
374 | static void __exit nd_blk_exit(void) | ||
375 | { | ||
376 | driver_unregister(&nd_blk_driver.drv); | ||
377 | unregister_blkdev(nd_blk_major, "nd_blk"); | ||
378 | } | ||
379 | |||
380 | MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); | ||
381 | MODULE_LICENSE("GPL v2"); | ||
382 | MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK); | ||
383 | module_init(nd_blk_init); | ||
384 | module_exit(nd_blk_exit); | ||