aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-18 11:42:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-18 11:42:49 -0400
commit34a984f7b0cc6355a1e0c184251d0d4cc86f44d2 (patch)
tree5ed28ee6cf191216be394c78234b13edbe69d1f8 /drivers/block
parent90d1c087861dcc3d1175993fc03492c137fd21bb (diff)
parent4c1eaa2344fb26bb5e936fb4d8ee307343ea0089 (diff)
Merge branch 'x86-pmem-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull PMEM driver from Ingo Molnar: "This is the initial support for the pmem block device driver: persistent non-volatile memory space mapped into the system's physical memory space as large physical memory regions. The driver is based on Intel code, written by Ross Zwisler, with fixes by Boaz Harrosh, integrated with x86 e820 memory resource management and tidied up by Christoph Hellwig. Note that there were two other separate pmem driver submissions to lkml: but apparently all parties (Ross Zwisler, Boaz Harrosh) are reasonably happy with this initial version. This version enables minimal support that enables persistent memory devices out in the wild to work as block devices, identified through a magic (non-standard) e820 flag and auto-discovered if CONFIG_X86_PMEM_LEGACY=y, or added explicitly through manipulating the memory maps via the "memmap=..." boot option with the new, special '!' modifier character. Limitations: this is a regular block device, and since the pmem areas are not struct page backed, they are invisible to the rest of the system (other than the block IO device), so direct IO to/from pmem areas, direct mmap() or XIP is not possible yet. The page cache will also shadow and double buffer pmem contents, etc. Initial support is for x86" * 'x86-pmem-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: drivers/block/pmem: Fix 32-bit build warning in pmem_alloc() drivers/block/pmem: Add a driver for persistent memory x86/mm: Add support for the non-standard protected e820 type
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig11
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/pmem.c262
3 files changed, 274 insertions, 0 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1b8094d4d7af..eb1fed5bd516 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -404,6 +404,17 @@ config BLK_DEV_RAM_DAX
404 and will prevent RAM block device backing store memory from being 404 and will prevent RAM block device backing store memory from being
405 allocated from highmem (only a problem for highmem systems). 405 allocated from highmem (only a problem for highmem systems).
406 406
407config BLK_DEV_PMEM
408 tristate "Persistent memory block device support"
409 help
410 Saying Y here will allow you to use a contiguous range of reserved
411 memory as one or more persistent block devices.
412
413 To compile this driver as a module, choose M here: the module will be
414 called 'pmem'.
415
416 If unsure, say N.
417
407config CDROM_PKTCDVD 418config CDROM_PKTCDVD
408 tristate "Packet writing on CD/DVD media" 419 tristate "Packet writing on CD/DVD media"
409 depends on !UML 420 depends on !UML
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 02b688d1438d..9cc6c18a1c7e 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_PS3_VRAM) += ps3vram.o
14obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o 14obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
15obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o 15obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
16obj-$(CONFIG_BLK_DEV_RAM) += brd.o 16obj-$(CONFIG_BLK_DEV_RAM) += brd.o
17obj-$(CONFIG_BLK_DEV_PMEM) += pmem.o
17obj-$(CONFIG_BLK_DEV_LOOP) += loop.o 18obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
18obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o 19obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
19obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o 20obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
diff --git a/drivers/block/pmem.c b/drivers/block/pmem.c
new file mode 100644
index 000000000000..eabf4a8d0085
--- /dev/null
+++ b/drivers/block/pmem.c
@@ -0,0 +1,262 @@
1/*
2 * Persistent Memory Driver
3 *
4 * Copyright (c) 2014, Intel Corporation.
5 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
6 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include <asm/cacheflush.h>
19#include <linux/blkdev.h>
20#include <linux/hdreg.h>
21#include <linux/init.h>
22#include <linux/platform_device.h>
23#include <linux/module.h>
24#include <linux/moduleparam.h>
25#include <linux/slab.h>
26
27#define PMEM_MINORS 16
28
29struct pmem_device {
30 struct request_queue *pmem_queue;
31 struct gendisk *pmem_disk;
32
33 /* One contiguous memory region per device */
34 phys_addr_t phys_addr;
35 void *virt_addr;
36 size_t size;
37};
38
39static int pmem_major;
40static atomic_t pmem_index;
41
42static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
43 unsigned int len, unsigned int off, int rw,
44 sector_t sector)
45{
46 void *mem = kmap_atomic(page);
47 size_t pmem_off = sector << 9;
48
49 if (rw == READ) {
50 memcpy(mem + off, pmem->virt_addr + pmem_off, len);
51 flush_dcache_page(page);
52 } else {
53 flush_dcache_page(page);
54 memcpy(pmem->virt_addr + pmem_off, mem + off, len);
55 }
56
57 kunmap_atomic(mem);
58}
59
60static void pmem_make_request(struct request_queue *q, struct bio *bio)
61{
62 struct block_device *bdev = bio->bi_bdev;
63 struct pmem_device *pmem = bdev->bd_disk->private_data;
64 int rw;
65 struct bio_vec bvec;
66 sector_t sector;
67 struct bvec_iter iter;
68 int err = 0;
69
70 if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
71 err = -EIO;
72 goto out;
73 }
74
75 BUG_ON(bio->bi_rw & REQ_DISCARD);
76
77 rw = bio_data_dir(bio);
78 sector = bio->bi_iter.bi_sector;
79 bio_for_each_segment(bvec, bio, iter) {
80 pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
81 rw, sector);
82 sector += bvec.bv_len >> 9;
83 }
84
85out:
86 bio_endio(bio, err);
87}
88
89static int pmem_rw_page(struct block_device *bdev, sector_t sector,
90 struct page *page, int rw)
91{
92 struct pmem_device *pmem = bdev->bd_disk->private_data;
93
94 pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
95 page_endio(page, rw & WRITE, 0);
96
97 return 0;
98}
99
100static long pmem_direct_access(struct block_device *bdev, sector_t sector,
101 void **kaddr, unsigned long *pfn, long size)
102{
103 struct pmem_device *pmem = bdev->bd_disk->private_data;
104 size_t offset = sector << 9;
105
106 if (!pmem)
107 return -ENODEV;
108
109 *kaddr = pmem->virt_addr + offset;
110 *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
111
112 return pmem->size - offset;
113}
114
115static const struct block_device_operations pmem_fops = {
116 .owner = THIS_MODULE,
117 .rw_page = pmem_rw_page,
118 .direct_access = pmem_direct_access,
119};
120
121static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res)
122{
123 struct pmem_device *pmem;
124 struct gendisk *disk;
125 int idx, err;
126
127 err = -ENOMEM;
128 pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
129 if (!pmem)
130 goto out;
131
132 pmem->phys_addr = res->start;
133 pmem->size = resource_size(res);
134
135 err = -EINVAL;
136 if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) {
137 dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size);
138 goto out_free_dev;
139 }
140
141 /*
142 * Map the memory as non-cachable, as we can't write back the contents
143 * of the CPU caches in case of a crash.
144 */
145 err = -ENOMEM;
146 pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
147 if (!pmem->virt_addr)
148 goto out_release_region;
149
150 pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
151 if (!pmem->pmem_queue)
152 goto out_unmap;
153
154 blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
155 blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
156 blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
157
158 disk = alloc_disk(PMEM_MINORS);
159 if (!disk)
160 goto out_free_queue;
161
162 idx = atomic_inc_return(&pmem_index) - 1;
163
164 disk->major = pmem_major;
165 disk->first_minor = PMEM_MINORS * idx;
166 disk->fops = &pmem_fops;
167 disk->private_data = pmem;
168 disk->queue = pmem->pmem_queue;
169 disk->flags = GENHD_FL_EXT_DEVT;
170 sprintf(disk->disk_name, "pmem%d", idx);
171 disk->driverfs_dev = dev;
172 set_capacity(disk, pmem->size >> 9);
173 pmem->pmem_disk = disk;
174
175 add_disk(disk);
176
177 return pmem;
178
179out_free_queue:
180 blk_cleanup_queue(pmem->pmem_queue);
181out_unmap:
182 iounmap(pmem->virt_addr);
183out_release_region:
184 release_mem_region(pmem->phys_addr, pmem->size);
185out_free_dev:
186 kfree(pmem);
187out:
188 return ERR_PTR(err);
189}
190
191static void pmem_free(struct pmem_device *pmem)
192{
193 del_gendisk(pmem->pmem_disk);
194 put_disk(pmem->pmem_disk);
195 blk_cleanup_queue(pmem->pmem_queue);
196 iounmap(pmem->virt_addr);
197 release_mem_region(pmem->phys_addr, pmem->size);
198 kfree(pmem);
199}
200
201static int pmem_probe(struct platform_device *pdev)
202{
203 struct pmem_device *pmem;
204 struct resource *res;
205
206 if (WARN_ON(pdev->num_resources > 1))
207 return -ENXIO;
208
209 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
210 if (!res)
211 return -ENXIO;
212
213 pmem = pmem_alloc(&pdev->dev, res);
214 if (IS_ERR(pmem))
215 return PTR_ERR(pmem);
216
217 platform_set_drvdata(pdev, pmem);
218
219 return 0;
220}
221
222static int pmem_remove(struct platform_device *pdev)
223{
224 struct pmem_device *pmem = platform_get_drvdata(pdev);
225
226 pmem_free(pmem);
227 return 0;
228}
229
230static struct platform_driver pmem_driver = {
231 .probe = pmem_probe,
232 .remove = pmem_remove,
233 .driver = {
234 .owner = THIS_MODULE,
235 .name = "pmem",
236 },
237};
238
239static int __init pmem_init(void)
240{
241 int error;
242
243 pmem_major = register_blkdev(0, "pmem");
244 if (pmem_major < 0)
245 return pmem_major;
246
247 error = platform_driver_register(&pmem_driver);
248 if (error)
249 unregister_blkdev(pmem_major, "pmem");
250 return error;
251}
252module_init(pmem_init);
253
254static void pmem_exit(void)
255{
256 platform_driver_unregister(&pmem_driver);
257 unregister_blkdev(pmem_major, "pmem");
258}
259module_exit(pmem_exit);
260
261MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
262MODULE_LICENSE("GPL v2");