aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2015-04-01 03:12:18 -0400
committerIngo Molnar <mingo@kernel.org>2015-04-01 11:02:43 -0400
commitec776ef6bbe1734c29cd6bd05219cd93b2731bd4 (patch)
tree4505347045c199c3d8923e224ecbed1ffafe1949 /arch/x86
parente42391cd048809d903291d07f86ed3934ce138e9 (diff)
x86/mm: Add support for the non-standard protected e820 type
Various recent BIOSes support NVDIMMs or ADR using a non-standard e820 memory type, and Intel supplied reference Linux code using this type to various vendors. Wire this e820 table type up to export platform devices for the pmem driver so that we can use it in Linux. Based on earlier work from: Dave Jiang <dave.jiang@intel.com> Dan Williams <dan.j.williams@intel.com> Includes fixes for NUMA regions from Boaz Harrosh. Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Dan Williams <dan.j.williams@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Boaz Harrosh <boaz@plexistor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jens Axboe <axboe@fb.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Keith Busch <keith.busch@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-nvdimm@ml01.01.org Link: http://lkml.kernel.org/r/1427872339-6688-2-git-send-email-hch@lst.de [ Minor cleanups. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/include/uapi/asm/e820.h10
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/e820.c26
-rw-r--r--arch/x86/kernel/pmem.c53
5 files changed, 94 insertions, 6 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b7d31ca55187..9e3bcd6f4a48 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1430,6 +1430,16 @@ config ILLEGAL_POINTER_VALUE
1430 1430
1431source "mm/Kconfig" 1431source "mm/Kconfig"
1432 1432
1433config X86_PMEM_LEGACY
1434 bool "Support non-standard NVDIMMs and ADR protected memory"
1435 help
1436 Treat memory marked using the non-standard e820 type of 12 as used
1437 by the Intel Sandy Bridge-EP reference BIOS as protected memory.
1438 The kernel will offer these regions to the 'pmem' driver so
1439 they can be used for persistent storage.
1440
1441 Say Y if unsure.
1442
1433config HIGHPTE 1443config HIGHPTE
1434 bool "Allocate 3rd-level pagetables from highmem" 1444 bool "Allocate 3rd-level pagetables from highmem"
1435 depends on HIGHMEM 1445 depends on HIGHMEM
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index d993e33f5236..960a8a9dc4ab 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -33,6 +33,16 @@
33#define E820_NVS 4 33#define E820_NVS 4
34#define E820_UNUSABLE 5 34#define E820_UNUSABLE 5
35 35
36/*
37 * This is a non-standardized way to represent ADR or NVDIMM regions that
38 * persist over a reboot. The kernel will ignore their special capabilities
39 * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
40 *
41 * ( Note that older platforms also used 6 for the same type of memory,
42 * but newer versions switched to 12 as 6 was assigned differently. Some
43 * time they will learn... )
44 */
45#define E820_PRAM 12
36 46
37/* 47/*
38 * reserved RAM used by kernel itself 48 * reserved RAM used by kernel itself
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cdb1b70ddad0..971f18cd9ca0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
94obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 94obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
95obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o 95obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
96obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o 96obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
97obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o
97 98
98obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 99obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
99 100
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 46201deee923..11cc7d54ec3f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -149,6 +149,9 @@ static void __init e820_print_type(u32 type)
149 case E820_UNUSABLE: 149 case E820_UNUSABLE:
150 printk(KERN_CONT "unusable"); 150 printk(KERN_CONT "unusable");
151 break; 151 break;
152 case E820_PRAM:
153 printk(KERN_CONT "persistent (type %u)", type);
154 break;
152 default: 155 default:
153 printk(KERN_CONT "type %u", type); 156 printk(KERN_CONT "type %u", type);
154 break; 157 break;
@@ -343,7 +346,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
343 * continue building up new bios map based on this 346 * continue building up new bios map based on this
344 * information 347 * information
345 */ 348 */
346 if (current_type != last_type) { 349 if (current_type != last_type || current_type == E820_PRAM) {
347 if (last_type != 0) { 350 if (last_type != 0) {
348 new_bios[new_bios_entry].size = 351 new_bios[new_bios_entry].size =
349 change_point[chgidx]->addr - last_addr; 352 change_point[chgidx]->addr - last_addr;
@@ -688,6 +691,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
688 register_nosave_region(pfn, PFN_UP(ei->addr)); 691 register_nosave_region(pfn, PFN_UP(ei->addr));
689 692
690 pfn = PFN_DOWN(ei->addr + ei->size); 693 pfn = PFN_DOWN(ei->addr + ei->size);
694
691 if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) 695 if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
692 register_nosave_region(PFN_UP(ei->addr), pfn); 696 register_nosave_region(PFN_UP(ei->addr), pfn);
693 697
@@ -748,7 +752,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
748/* 752/*
749 * Find the highest page frame number we have available 753 * Find the highest page frame number we have available
750 */ 754 */
751static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) 755static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
752{ 756{
753 int i; 757 int i;
754 unsigned long last_pfn = 0; 758 unsigned long last_pfn = 0;
@@ -759,7 +763,11 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
759 unsigned long start_pfn; 763 unsigned long start_pfn;
760 unsigned long end_pfn; 764 unsigned long end_pfn;
761 765
762 if (ei->type != type) 766 /*
767 * Persistent memory is accounted as ram for purposes of
768 * establishing max_pfn and mem_map.
769 */
770 if (ei->type != E820_RAM && ei->type != E820_PRAM)
763 continue; 771 continue;
764 772
765 start_pfn = ei->addr >> PAGE_SHIFT; 773 start_pfn = ei->addr >> PAGE_SHIFT;
@@ -784,12 +792,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
784} 792}
785unsigned long __init e820_end_of_ram_pfn(void) 793unsigned long __init e820_end_of_ram_pfn(void)
786{ 794{
787 return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); 795 return e820_end_pfn(MAX_ARCH_PFN);
788} 796}
789 797
790unsigned long __init e820_end_of_low_ram_pfn(void) 798unsigned long __init e820_end_of_low_ram_pfn(void)
791{ 799{
792 return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); 800 return e820_end_pfn(1UL << (32-PAGE_SHIFT));
793} 801}
794 802
795static void early_panic(char *msg) 803static void early_panic(char *msg)
@@ -866,6 +874,9 @@ static int __init parse_memmap_one(char *p)
866 } else if (*p == '$') { 874 } else if (*p == '$') {
867 start_at = memparse(p+1, &p); 875 start_at = memparse(p+1, &p);
868 e820_add_region(start_at, mem_size, E820_RESERVED); 876 e820_add_region(start_at, mem_size, E820_RESERVED);
877 } else if (*p == '!') {
878 start_at = memparse(p+1, &p);
879 e820_add_region(start_at, mem_size, E820_PRAM);
869 } else 880 } else
870 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); 881 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
871 882
@@ -907,6 +918,7 @@ static inline const char *e820_type_to_string(int e820_type)
907 case E820_ACPI: return "ACPI Tables"; 918 case E820_ACPI: return "ACPI Tables";
908 case E820_NVS: return "ACPI Non-volatile Storage"; 919 case E820_NVS: return "ACPI Non-volatile Storage";
909 case E820_UNUSABLE: return "Unusable memory"; 920 case E820_UNUSABLE: return "Unusable memory";
921 case E820_PRAM: return "Persistent RAM";
910 default: return "reserved"; 922 default: return "reserved";
911 } 923 }
912} 924}
@@ -940,7 +952,9 @@ void __init e820_reserve_resources(void)
940 * pci device BAR resource and insert them later in 952 * pci device BAR resource and insert them later in
941 * pcibios_resource_survey() 953 * pcibios_resource_survey()
942 */ 954 */
943 if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { 955 if (((e820.map[i].type != E820_RESERVED) &&
956 (e820.map[i].type != E820_PRAM)) ||
957 res->start < (1ULL<<20)) {
944 res->flags |= IORESOURCE_BUSY; 958 res->flags |= IORESOURCE_BUSY;
945 insert_resource(&iomem_resource, res); 959 insert_resource(&iomem_resource, res);
946 } 960 }
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
new file mode 100644
index 000000000000..3420c874ddc5
--- /dev/null
+++ b/arch/x86/kernel/pmem.c
@@ -0,0 +1,53 @@
1/*
2 * Copyright (c) 2015, Christoph Hellwig.
3 */
4#include <linux/memblock.h>
5#include <linux/platform_device.h>
6#include <linux/slab.h>
7#include <asm/e820.h>
8#include <asm/page_types.h>
9#include <asm/setup.h>
10
11static __init void register_pmem_device(struct resource *res)
12{
13 struct platform_device *pdev;
14 int error;
15
16 pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO);
17 if (!pdev)
18 return;
19
20 error = platform_device_add_resources(pdev, res, 1);
21 if (error)
22 goto out_put_pdev;
23
24 error = platform_device_add(pdev);
25 if (error)
26 goto out_put_pdev;
27 return;
28
29out_put_pdev:
30 dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n");
31 platform_device_put(pdev);
32}
33
34static __init int register_pmem_devices(void)
35{
36 int i;
37
38 for (i = 0; i < e820.nr_map; i++) {
39 struct e820entry *ei = &e820.map[i];
40
41 if (ei->type == E820_PRAM) {
42 struct resource res = {
43 .flags = IORESOURCE_MEM,
44 .start = ei->addr,
45 .end = ei->addr + ei->size - 1,
46 };
47 register_pmem_device(&res);
48 }
49 }
50
51 return 0;
52}
53device_initcall(register_pmem_devices);