diff options
author | Ross Zwisler <ross.zwisler@linux.intel.com> | 2015-04-01 03:12:19 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-04-01 11:03:56 -0400 |
commit | 9e853f2313e5eb163cb1ea461b23c2332cf6438a (patch) | |
tree | 1cc98a03ff9582111fdd8032237c406d67f976fa | |
parent | ec776ef6bbe1734c29cd6bd05219cd93b2731bd4 (diff) |
drivers/block/pmem: Add a driver for persistent memory
PMEM is a new driver that presents a reserved range of memory as
a block device. This is useful for developing with NV-DIMMs,
and can be used with volatile memory as a development platform.
This patch contains the initial driver from Ross Zwisler, with
various changes: converted it to use a platform_device for
discovery, fixed partition support and merged various patches
from Boaz Harrosh.
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-nvdimm@ml01.01.org
Link: http://lkml.kernel.org/r/1427872339-6688-3-git-send-email-hch@lst.de
[ Minor cleanups. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | MAINTAINERS | 6 | ||||
-rw-r--r-- | drivers/block/Kconfig | 11 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/pmem.c | 263 |
4 files changed, 281 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 1de6afa8ee51..4517613dc638 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -8071,6 +8071,12 @@ S: Maintained | |||
8071 | F: Documentation/blockdev/ramdisk.txt | 8071 | F: Documentation/blockdev/ramdisk.txt |
8072 | F: drivers/block/brd.c | 8072 | F: drivers/block/brd.c |
8073 | 8073 | ||
8074 | PERSISTENT MEMORY DRIVER | ||
8075 | M: Ross Zwisler <ross.zwisler@linux.intel.com> | ||
8076 | L: linux-nvdimm@lists.01.org | ||
8077 | S: Supported | ||
8078 | F: drivers/block/pmem.c | ||
8079 | |||
8074 | RANDOM NUMBER DRIVER | 8080 | RANDOM NUMBER DRIVER |
8075 | M: "Theodore Ts'o" <tytso@mit.edu> | 8081 | M: "Theodore Ts'o" <tytso@mit.edu> |
8076 | S: Maintained | 8082 | S: Maintained |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 1b8094d4d7af..eb1fed5bd516 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -404,6 +404,17 @@ config BLK_DEV_RAM_DAX | |||
404 | and will prevent RAM block device backing store memory from being | 404 | and will prevent RAM block device backing store memory from being |
405 | allocated from highmem (only a problem for highmem systems). | 405 | allocated from highmem (only a problem for highmem systems). |
406 | 406 | ||
407 | config BLK_DEV_PMEM | ||
408 | tristate "Persistent memory block device support" | ||
409 | help | ||
410 | Saying Y here will allow you to use a contiguous range of reserved | ||
411 | memory as one or more persistent block devices. | ||
412 | |||
413 | To compile this driver as a module, choose M here: the module will be | ||
414 | called 'pmem'. | ||
415 | |||
416 | If unsure, say N. | ||
417 | |||
407 | config CDROM_PKTCDVD | 418 | config CDROM_PKTCDVD |
408 | tristate "Packet writing on CD/DVD media" | 419 | tristate "Packet writing on CD/DVD media" |
409 | depends on !UML | 420 | depends on !UML |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 02b688d1438d..9cc6c18a1c7e 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -14,6 +14,7 @@ obj-$(CONFIG_PS3_VRAM) += ps3vram.o | |||
14 | obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o | 14 | obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o |
15 | obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o | 15 | obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o |
16 | obj-$(CONFIG_BLK_DEV_RAM) += brd.o | 16 | obj-$(CONFIG_BLK_DEV_RAM) += brd.o |
17 | obj-$(CONFIG_BLK_DEV_PMEM) += pmem.o | ||
17 | obj-$(CONFIG_BLK_DEV_LOOP) += loop.o | 18 | obj-$(CONFIG_BLK_DEV_LOOP) += loop.o |
18 | obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o | 19 | obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o |
19 | obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o | 20 | obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o |
diff --git a/drivers/block/pmem.c b/drivers/block/pmem.c new file mode 100644 index 000000000000..988f3846dc3e --- /dev/null +++ b/drivers/block/pmem.c | |||
@@ -0,0 +1,263 @@ | |||
1 | /* | ||
2 | * Persistent Memory Driver | ||
3 | * | ||
4 | * Copyright (c) 2014, Intel Corporation. | ||
5 | * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>. | ||
6 | * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #include <asm/cacheflush.h> | ||
19 | #include <linux/blkdev.h> | ||
20 | #include <linux/hdreg.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/platform_device.h> | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/moduleparam.h> | ||
25 | #include <linux/slab.h> | ||
26 | |||
27 | #define PMEM_MINORS 16 | ||
28 | |||
29 | struct pmem_device { | ||
30 | struct request_queue *pmem_queue; | ||
31 | struct gendisk *pmem_disk; | ||
32 | |||
33 | /* One contiguous memory region per device */ | ||
34 | phys_addr_t phys_addr; | ||
35 | void *virt_addr; | ||
36 | size_t size; | ||
37 | }; | ||
38 | |||
39 | static int pmem_major; | ||
40 | static atomic_t pmem_index; | ||
41 | |||
42 | static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, | ||
43 | unsigned int len, unsigned int off, int rw, | ||
44 | sector_t sector) | ||
45 | { | ||
46 | void *mem = kmap_atomic(page); | ||
47 | size_t pmem_off = sector << 9; | ||
48 | |||
49 | if (rw == READ) { | ||
50 | memcpy(mem + off, pmem->virt_addr + pmem_off, len); | ||
51 | flush_dcache_page(page); | ||
52 | } else { | ||
53 | flush_dcache_page(page); | ||
54 | memcpy(pmem->virt_addr + pmem_off, mem + off, len); | ||
55 | } | ||
56 | |||
57 | kunmap_atomic(mem); | ||
58 | } | ||
59 | |||
60 | static void pmem_make_request(struct request_queue *q, struct bio *bio) | ||
61 | { | ||
62 | struct block_device *bdev = bio->bi_bdev; | ||
63 | struct pmem_device *pmem = bdev->bd_disk->private_data; | ||
64 | int rw; | ||
65 | struct bio_vec bvec; | ||
66 | sector_t sector; | ||
67 | struct bvec_iter iter; | ||
68 | int err = 0; | ||
69 | |||
70 | if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) { | ||
71 | err = -EIO; | ||
72 | goto out; | ||
73 | } | ||
74 | |||
75 | BUG_ON(bio->bi_rw & REQ_DISCARD); | ||
76 | |||
77 | rw = bio_data_dir(bio); | ||
78 | sector = bio->bi_iter.bi_sector; | ||
79 | bio_for_each_segment(bvec, bio, iter) { | ||
80 | pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset, | ||
81 | rw, sector); | ||
82 | sector += bvec.bv_len >> 9; | ||
83 | } | ||
84 | |||
85 | out: | ||
86 | bio_endio(bio, err); | ||
87 | } | ||
88 | |||
89 | static int pmem_rw_page(struct block_device *bdev, sector_t sector, | ||
90 | struct page *page, int rw) | ||
91 | { | ||
92 | struct pmem_device *pmem = bdev->bd_disk->private_data; | ||
93 | |||
94 | pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); | ||
95 | page_endio(page, rw & WRITE, 0); | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | static long pmem_direct_access(struct block_device *bdev, sector_t sector, | ||
101 | void **kaddr, unsigned long *pfn, long size) | ||
102 | { | ||
103 | struct pmem_device *pmem = bdev->bd_disk->private_data; | ||
104 | size_t offset = sector << 9; | ||
105 | |||
106 | if (!pmem) | ||
107 | return -ENODEV; | ||
108 | |||
109 | *kaddr = pmem->virt_addr + offset; | ||
110 | *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; | ||
111 | |||
112 | return pmem->size - offset; | ||
113 | } | ||
114 | |||
115 | static const struct block_device_operations pmem_fops = { | ||
116 | .owner = THIS_MODULE, | ||
117 | .rw_page = pmem_rw_page, | ||
118 | .direct_access = pmem_direct_access, | ||
119 | }; | ||
120 | |||
121 | static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res) | ||
122 | { | ||
123 | struct pmem_device *pmem; | ||
124 | struct gendisk *disk; | ||
125 | int idx, err; | ||
126 | |||
127 | err = -ENOMEM; | ||
128 | pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); | ||
129 | if (!pmem) | ||
130 | goto out; | ||
131 | |||
132 | pmem->phys_addr = res->start; | ||
133 | pmem->size = resource_size(res); | ||
134 | |||
135 | err = -EINVAL; | ||
136 | if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) { | ||
137 | dev_warn(dev, "could not reserve region [0x%llx:0x%zx]\n", | ||
138 | pmem->phys_addr, pmem->size); | ||
139 | goto out_free_dev; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Map the memory as non-cachable, as we can't write back the contents | ||
144 | * of the CPU caches in case of a crash. | ||
145 | */ | ||
146 | err = -ENOMEM; | ||
147 | pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); | ||
148 | if (!pmem->virt_addr) | ||
149 | goto out_release_region; | ||
150 | |||
151 | pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL); | ||
152 | if (!pmem->pmem_queue) | ||
153 | goto out_unmap; | ||
154 | |||
155 | blk_queue_make_request(pmem->pmem_queue, pmem_make_request); | ||
156 | blk_queue_max_hw_sectors(pmem->pmem_queue, 1024); | ||
157 | blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); | ||
158 | |||
159 | disk = alloc_disk(PMEM_MINORS); | ||
160 | if (!disk) | ||
161 | goto out_free_queue; | ||
162 | |||
163 | idx = atomic_inc_return(&pmem_index) - 1; | ||
164 | |||
165 | disk->major = pmem_major; | ||
166 | disk->first_minor = PMEM_MINORS * idx; | ||
167 | disk->fops = &pmem_fops; | ||
168 | disk->private_data = pmem; | ||
169 | disk->queue = pmem->pmem_queue; | ||
170 | disk->flags = GENHD_FL_EXT_DEVT; | ||
171 | sprintf(disk->disk_name, "pmem%d", idx); | ||
172 | disk->driverfs_dev = dev; | ||
173 | set_capacity(disk, pmem->size >> 9); | ||
174 | pmem->pmem_disk = disk; | ||
175 | |||
176 | add_disk(disk); | ||
177 | |||
178 | return pmem; | ||
179 | |||
180 | out_free_queue: | ||
181 | blk_cleanup_queue(pmem->pmem_queue); | ||
182 | out_unmap: | ||
183 | iounmap(pmem->virt_addr); | ||
184 | out_release_region: | ||
185 | release_mem_region(pmem->phys_addr, pmem->size); | ||
186 | out_free_dev: | ||
187 | kfree(pmem); | ||
188 | out: | ||
189 | return ERR_PTR(err); | ||
190 | } | ||
191 | |||
192 | static void pmem_free(struct pmem_device *pmem) | ||
193 | { | ||
194 | del_gendisk(pmem->pmem_disk); | ||
195 | put_disk(pmem->pmem_disk); | ||
196 | blk_cleanup_queue(pmem->pmem_queue); | ||
197 | iounmap(pmem->virt_addr); | ||
198 | release_mem_region(pmem->phys_addr, pmem->size); | ||
199 | kfree(pmem); | ||
200 | } | ||
201 | |||
202 | static int pmem_probe(struct platform_device *pdev) | ||
203 | { | ||
204 | struct pmem_device *pmem; | ||
205 | struct resource *res; | ||
206 | |||
207 | if (WARN_ON(pdev->num_resources > 1)) | ||
208 | return -ENXIO; | ||
209 | |||
210 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
211 | if (!res) | ||
212 | return -ENXIO; | ||
213 | |||
214 | pmem = pmem_alloc(&pdev->dev, res); | ||
215 | if (IS_ERR(pmem)) | ||
216 | return PTR_ERR(pmem); | ||
217 | |||
218 | platform_set_drvdata(pdev, pmem); | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | static int pmem_remove(struct platform_device *pdev) | ||
224 | { | ||
225 | struct pmem_device *pmem = platform_get_drvdata(pdev); | ||
226 | |||
227 | pmem_free(pmem); | ||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | static struct platform_driver pmem_driver = { | ||
232 | .probe = pmem_probe, | ||
233 | .remove = pmem_remove, | ||
234 | .driver = { | ||
235 | .owner = THIS_MODULE, | ||
236 | .name = "pmem", | ||
237 | }, | ||
238 | }; | ||
239 | |||
240 | static int __init pmem_init(void) | ||
241 | { | ||
242 | int error; | ||
243 | |||
244 | pmem_major = register_blkdev(0, "pmem"); | ||
245 | if (pmem_major < 0) | ||
246 | return pmem_major; | ||
247 | |||
248 | error = platform_driver_register(&pmem_driver); | ||
249 | if (error) | ||
250 | unregister_blkdev(pmem_major, "pmem"); | ||
251 | return error; | ||
252 | } | ||
253 | module_init(pmem_init); | ||
254 | |||
255 | static void pmem_exit(void) | ||
256 | { | ||
257 | platform_driver_unregister(&pmem_driver); | ||
258 | unregister_blkdev(pmem_major, "pmem"); | ||
259 | } | ||
260 | module_exit(pmem_exit); | ||
261 | |||
262 | MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); | ||
263 | MODULE_LICENSE("GPL v2"); | ||