summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2016-01-15 19:56:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 20:56:32 -0500
commit34c0fd540e79fb49ef9ce864dae1058cca265780 (patch)
tree7f404f3a644322d0eca02b54daa228261ed24f39
parentba049e93aef7e8c571567088b1b73f4f5b99272a (diff)
mm, dax, pmem: introduce pfn_t
For the purpose of communicating the optional presence of a 'struct page' for the pfn returned from ->direct_access(), introduce a type that encapsulates a page-frame-number plus flags. These flags contain the historical "page_link" encoding for a scatterlist entry, but can also denote "device memory". Where "device memory" is a set of pfns that are not part of the kernel's linear mapping by default, but are accessed via the same memory controller as ram. The motivation for this new type is large capacity persistent memory that needs struct page entries in the 'memmap' to support 3rd party DMA (i.e. O_DIRECT I/O with a persistent memory source/target). However, we also need it in support of maintaining a list of mapped inodes which need to be unmapped at driver teardown or freeze_bdev() time. Signed-off-by: Dan Williams <dan.j.williams@intel.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Hansen <dave@sr71.net> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/powerpc/sysdev/axonram.c9
-rw-r--r--drivers/block/brd.c7
-rw-r--r--drivers/nvdimm/pmem.c13
-rw-r--r--drivers/s390/block/dcssblk.c11
-rw-r--r--fs/dax.c11
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/pfn.h9
-rw-r--r--include/linux/pfn_t.h67
-rw-r--r--kernel/memremap.c7
9 files changed, 116 insertions, 23 deletions
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index c713b349d967..0d112b94d91d 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -43,6 +43,7 @@
43#include <linux/types.h> 43#include <linux/types.h>
44#include <linux/of_device.h> 44#include <linux/of_device.h>
45#include <linux/of_platform.h> 45#include <linux/of_platform.h>
46#include <linux/pfn_t.h>
46 47
47#include <asm/page.h> 48#include <asm/page.h>
48#include <asm/prom.h> 49#include <asm/prom.h>
@@ -142,15 +143,13 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
142 */ 143 */
143static long 144static long
144axon_ram_direct_access(struct block_device *device, sector_t sector, 145axon_ram_direct_access(struct block_device *device, sector_t sector,
145 void __pmem **kaddr, unsigned long *pfn) 146 void __pmem **kaddr, pfn_t *pfn)
146{ 147{
147 struct axon_ram_bank *bank = device->bd_disk->private_data; 148 struct axon_ram_bank *bank = device->bd_disk->private_data;
148 loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; 149 loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
149 void *addr = (void *)(bank->ph_addr + offset);
150
151 *kaddr = (void __pmem *)addr;
152 *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
153 150
151 *kaddr = (void __pmem __force *) bank->io_addr + offset;
152 *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
154 return bank->size - offset; 153 return bank->size - offset;
155} 154}
156 155
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index a5880f4ab40e..cb27190e9f39 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -19,6 +19,9 @@
19#include <linux/radix-tree.h> 19#include <linux/radix-tree.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#ifdef CONFIG_BLK_DEV_RAM_DAX
23#include <linux/pfn_t.h>
24#endif
22 25
23#include <asm/uaccess.h> 26#include <asm/uaccess.h>
24 27
@@ -378,7 +381,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
378 381
379#ifdef CONFIG_BLK_DEV_RAM_DAX 382#ifdef CONFIG_BLK_DEV_RAM_DAX
380static long brd_direct_access(struct block_device *bdev, sector_t sector, 383static long brd_direct_access(struct block_device *bdev, sector_t sector,
381 void __pmem **kaddr, unsigned long *pfn) 384 void __pmem **kaddr, pfn_t *pfn)
382{ 385{
383 struct brd_device *brd = bdev->bd_disk->private_data; 386 struct brd_device *brd = bdev->bd_disk->private_data;
384 struct page *page; 387 struct page *page;
@@ -389,7 +392,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
389 if (!page) 392 if (!page)
390 return -ENOSPC; 393 return -ENOSPC;
391 *kaddr = (void __pmem *)page_address(page); 394 *kaddr = (void __pmem *)page_address(page);
392 *pfn = page_to_pfn(page); 395 *pfn = page_to_pfn_t(page);
393 396
394 return PAGE_SIZE; 397 return PAGE_SIZE;
395} 398}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index b493ff3fccb2..5def7f4ddbd2 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -25,6 +25,7 @@
25#include <linux/moduleparam.h> 25#include <linux/moduleparam.h>
26#include <linux/badblocks.h> 26#include <linux/badblocks.h>
27#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
28#include <linux/pfn_t.h>
28#include <linux/slab.h> 29#include <linux/slab.h>
29#include <linux/pmem.h> 30#include <linux/pmem.h>
30#include <linux/nd.h> 31#include <linux/nd.h>
@@ -40,6 +41,7 @@ struct pmem_device {
40 phys_addr_t phys_addr; 41 phys_addr_t phys_addr;
41 /* when non-zero this device is hosting a 'pfn' instance */ 42 /* when non-zero this device is hosting a 'pfn' instance */
42 phys_addr_t data_offset; 43 phys_addr_t data_offset;
44 unsigned long pfn_flags;
43 void __pmem *virt_addr; 45 void __pmem *virt_addr;
44 size_t size; 46 size_t size;
45 struct badblocks bb; 47 struct badblocks bb;
@@ -135,13 +137,13 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
135} 137}
136 138
137static long pmem_direct_access(struct block_device *bdev, sector_t sector, 139static long pmem_direct_access(struct block_device *bdev, sector_t sector,
138 void __pmem **kaddr, unsigned long *pfn) 140 void __pmem **kaddr, pfn_t *pfn)
139{ 141{
140 struct pmem_device *pmem = bdev->bd_disk->private_data; 142 struct pmem_device *pmem = bdev->bd_disk->private_data;
141 resource_size_t offset = sector * 512 + pmem->data_offset; 143 resource_size_t offset = sector * 512 + pmem->data_offset;
142 144
143 *kaddr = pmem->virt_addr + offset; 145 *kaddr = pmem->virt_addr + offset;
144 *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; 146 *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
145 147
146 return pmem->size - offset; 148 return pmem->size - offset;
147} 149}
@@ -174,9 +176,11 @@ static struct pmem_device *pmem_alloc(struct device *dev,
174 return ERR_PTR(-EBUSY); 176 return ERR_PTR(-EBUSY);
175 } 177 }
176 178
177 if (pmem_should_map_pages(dev)) 179 pmem->pfn_flags = PFN_DEV;
180 if (pmem_should_map_pages(dev)) {
178 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res); 181 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res);
179 else 182 pmem->pfn_flags |= PFN_MAP;
183 } else
180 pmem->virt_addr = (void __pmem *) devm_memremap(dev, 184 pmem->virt_addr = (void __pmem *) devm_memremap(dev,
181 pmem->phys_addr, pmem->size, 185 pmem->phys_addr, pmem->size,
182 ARCH_MEMREMAP_PMEM); 186 ARCH_MEMREMAP_PMEM);
@@ -384,6 +388,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
384 pmem = dev_get_drvdata(dev); 388 pmem = dev_get_drvdata(dev);
385 devm_memunmap(dev, (void __force *) pmem->virt_addr); 389 devm_memunmap(dev, (void __force *) pmem->virt_addr);
386 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res); 390 pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res);
391 pmem->pfn_flags |= PFN_MAP;
387 if (IS_ERR(pmem->virt_addr)) { 392 if (IS_ERR(pmem->virt_addr)) {
388 rc = PTR_ERR(pmem->virt_addr); 393 rc = PTR_ERR(pmem->virt_addr);
389 goto err; 394 goto err;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 94a8f4ab57bc..ce7b70181740 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -17,6 +17,7 @@
17#include <linux/completion.h> 17#include <linux/completion.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/platform_device.h> 19#include <linux/platform_device.h>
20#include <linux/pfn_t.h>
20#include <asm/extmem.h> 21#include <asm/extmem.h>
21#include <asm/io.h> 22#include <asm/io.h>
22 23
@@ -30,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
30static blk_qc_t dcssblk_make_request(struct request_queue *q, 31static blk_qc_t dcssblk_make_request(struct request_queue *q,
31 struct bio *bio); 32 struct bio *bio);
32static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, 33static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
33 void __pmem **kaddr, unsigned long *pfn); 34 void __pmem **kaddr, pfn_t *pfn);
34 35
35static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; 36static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
36 37
@@ -883,20 +884,18 @@ fail:
883 884
884static long 885static long
885dcssblk_direct_access (struct block_device *bdev, sector_t secnum, 886dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
886 void __pmem **kaddr, unsigned long *pfn) 887 void __pmem **kaddr, pfn_t *pfn)
887{ 888{
888 struct dcssblk_dev_info *dev_info; 889 struct dcssblk_dev_info *dev_info;
889 unsigned long offset, dev_sz; 890 unsigned long offset, dev_sz;
890 void *addr;
891 891
892 dev_info = bdev->bd_disk->private_data; 892 dev_info = bdev->bd_disk->private_data;
893 if (!dev_info) 893 if (!dev_info)
894 return -ENODEV; 894 return -ENODEV;
895 dev_sz = dev_info->end - dev_info->start; 895 dev_sz = dev_info->end - dev_info->start;
896 offset = secnum * 512; 896 offset = secnum * 512;
897 addr = (void *) (dev_info->start + offset); 897 *kaddr = (void __pmem *) (dev_info->start + offset);
898 *pfn = virt_to_phys(addr) >> PAGE_SHIFT; 898 *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
899 *kaddr = (void __pmem *) addr;
900 899
901 return dev_sz - offset; 900 return dev_sz - offset;
902} 901}
diff --git a/fs/dax.c b/fs/dax.c
index 3220da70ee20..6b13d6cd9a9a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -28,6 +28,7 @@
28#include <linux/sched.h> 28#include <linux/sched.h>
29#include <linux/uio.h> 29#include <linux/uio.h>
30#include <linux/vmstat.h> 30#include <linux/vmstat.h>
31#include <linux/pfn_t.h>
31#include <linux/sizes.h> 32#include <linux/sizes.h>
32 33
33static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax) 34static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
@@ -362,7 +363,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
362 } 363 }
363 dax_unmap_atomic(bdev, &dax); 364 dax_unmap_atomic(bdev, &dax);
364 365
365 error = vm_insert_mixed(vma, vaddr, dax.pfn); 366 error = vm_insert_mixed(vma, vaddr, pfn_t_to_pfn(dax.pfn));
366 367
367 out: 368 out:
368 i_mmap_unlock_read(mapping); 369 i_mmap_unlock_read(mapping);
@@ -667,7 +668,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
667 result = VM_FAULT_SIGBUS; 668 result = VM_FAULT_SIGBUS;
668 goto out; 669 goto out;
669 } 670 }
670 if ((length < PMD_SIZE) || (dax.pfn & PG_PMD_COLOUR)) { 671 if (length < PMD_SIZE
672 || (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)) {
671 dax_unmap_atomic(bdev, &dax); 673 dax_unmap_atomic(bdev, &dax);
672 goto fallback; 674 goto fallback;
673 } 675 }
@@ -676,7 +678,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
676 * TODO: teach vmf_insert_pfn_pmd() to support 678 * TODO: teach vmf_insert_pfn_pmd() to support
677 * 'pte_special' for pmds 679 * 'pte_special' for pmds
678 */ 680 */
679 if (pfn_valid(dax.pfn)) { 681 if (pfn_t_has_page(dax.pfn)) {
680 dax_unmap_atomic(bdev, &dax); 682 dax_unmap_atomic(bdev, &dax);
681 goto fallback; 683 goto fallback;
682 } 684 }
@@ -690,7 +692,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
690 } 692 }
691 dax_unmap_atomic(bdev, &dax); 693 dax_unmap_atomic(bdev, &dax);
692 694
693 result |= vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write); 695 result |= vmf_insert_pfn_pmd(vma, address, pmd,
696 pfn_t_to_pfn(dax.pfn), write);
694 } 697 }
695 698
696 out: 699 out:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 88821fa26f19..bfb64d672e19 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -15,6 +15,7 @@
15#include <linux/backing-dev-defs.h> 15#include <linux/backing-dev-defs.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/mempool.h> 17#include <linux/mempool.h>
18#include <linux/pfn.h>
18#include <linux/bio.h> 19#include <linux/bio.h>
19#include <linux/stringify.h> 20#include <linux/stringify.h>
20#include <linux/gfp.h> 21#include <linux/gfp.h>
@@ -1628,7 +1629,7 @@ struct blk_dax_ctl {
1628 sector_t sector; 1629 sector_t sector;
1629 void __pmem *addr; 1630 void __pmem *addr;
1630 long size; 1631 long size;
1631 unsigned long pfn; 1632 pfn_t pfn;
1632}; 1633};
1633 1634
1634struct block_device_operations { 1635struct block_device_operations {
@@ -1638,7 +1639,7 @@ struct block_device_operations {
1638 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1639 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1639 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1640 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1640 long (*direct_access)(struct block_device *, sector_t, void __pmem **, 1641 long (*direct_access)(struct block_device *, sector_t, void __pmem **,
1641 unsigned long *pfn); 1642 pfn_t *);
1642 unsigned int (*check_events) (struct gendisk *disk, 1643 unsigned int (*check_events) (struct gendisk *disk,
1643 unsigned int clearing); 1644 unsigned int clearing);
1644 /* ->media_changed() is DEPRECATED, use ->check_events() instead */ 1645 /* ->media_changed() is DEPRECATED, use ->check_events() instead */
diff --git a/include/linux/pfn.h b/include/linux/pfn.h
index 97f3e88aead4..2d8e49711b63 100644
--- a/include/linux/pfn.h
+++ b/include/linux/pfn.h
@@ -3,6 +3,15 @@
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5#include <linux/types.h> 5#include <linux/types.h>
6
7/*
8 * pfn_t: encapsulates a page-frame number that is optionally backed
9 * by memmap (struct page). Whether a pfn_t has a 'struct page'
10 * backing is indicated by flags in the high bits of the value.
11 */
12typedef struct {
13 unsigned long val;
14} pfn_t;
6#endif 15#endif
7 16
8#define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK) 17#define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
new file mode 100644
index 000000000000..c557a0e0b20c
--- /dev/null
+++ b/include/linux/pfn_t.h
@@ -0,0 +1,67 @@
1#ifndef _LINUX_PFN_T_H_
2#define _LINUX_PFN_T_H_
3#include <linux/mm.h>
4
5/*
6 * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
7 * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
8 * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
9 * PFN_DEV - pfn is not covered by system memmap by default
10 * PFN_MAP - pfn has a dynamic page mapping established by a device driver
11 */
12#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \
13 << (BITS_PER_LONG - PAGE_SHIFT))
14#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1))
15#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2))
16#define PFN_DEV (1UL << (BITS_PER_LONG - 3))
17#define PFN_MAP (1UL << (BITS_PER_LONG - 4))
18
19static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags)
20{
21 pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
22
23 return pfn_t;
24}
25
26/* a default pfn to pfn_t conversion assumes that @pfn is pfn_valid() */
27static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
28{
29 return __pfn_to_pfn_t(pfn, 0);
30}
31
32extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
33
34static inline bool pfn_t_has_page(pfn_t pfn)
35{
36 return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
37}
38
39static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
40{
41 return pfn.val & ~PFN_FLAGS_MASK;
42}
43
44static inline struct page *pfn_t_to_page(pfn_t pfn)
45{
46 if (pfn_t_has_page(pfn))
47 return pfn_to_page(pfn_t_to_pfn(pfn));
48 return NULL;
49}
50
51static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
52{
53 return PFN_PHYS(pfn_t_to_pfn(pfn));
54}
55
56static inline void *pfn_t_to_virt(pfn_t pfn)
57{
58 if (pfn_t_has_page(pfn))
59 return __va(pfn_t_to_phys(pfn));
60 return NULL;
61}
62
63static inline pfn_t page_to_pfn_t(struct page *page)
64{
65 return pfn_to_pfn_t(page_to_pfn(page));
66}
67#endif /* _LINUX_PFN_T_H_ */
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 7658d32c5c78..449cb6a5d9a1 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -12,6 +12,7 @@
12 */ 12 */
13#include <linux/device.h> 13#include <linux/device.h>
14#include <linux/types.h> 14#include <linux/types.h>
15#include <linux/pfn_t.h>
15#include <linux/io.h> 16#include <linux/io.h>
16#include <linux/mm.h> 17#include <linux/mm.h>
17#include <linux/memory_hotplug.h> 18#include <linux/memory_hotplug.h>
@@ -147,6 +148,12 @@ void devm_memunmap(struct device *dev, void *addr)
147} 148}
148EXPORT_SYMBOL(devm_memunmap); 149EXPORT_SYMBOL(devm_memunmap);
149 150
151pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
152{
153 return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
154}
155EXPORT_SYMBOL(phys_to_pfn_t);
156
150#ifdef CONFIG_ZONE_DEVICE 157#ifdef CONFIG_ZONE_DEVICE
151struct page_map { 158struct page_map {
152 struct resource res; 159 struct resource res;