aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2009-01-09 03:39:43 -0500
committerLen Brown <len.brown@intel.com>2009-01-09 03:39:43 -0500
commitb2576e1d4408e134e2188c967b1f28af39cd79d4 (patch)
tree004f3c82faab760f304ce031d6d2f572e7746a50 /arch/x86/mm
parent3cc8a5f4ba91f67bbdb81a43a99281a26aab8d77 (diff)
parent2150edc6c5cf00f7adb54538b9ea2a3e9cedca3f (diff)
Merge branch 'linus' into release
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c39
-rw-r--r--arch/x86/mm/init_32.c45
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/numa_32.c4
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/pat.c236
-rw-r--r--arch/x86/mm/srat_64.c2
9 files changed, 294 insertions, 48 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 11obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 12mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 13obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 14
16obj-$(CONFIG_NUMA) += numa_$(BITS).o 15obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..9e268b6b204e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
53 53
54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
55{ 55{
56#ifdef CONFIG_MMIOTRACE_HOOKS 56#ifdef CONFIG_MMIOTRACE
57 if (unlikely(is_kmmio_active())) 57 if (unlikely(is_kmmio_active()))
58 if (kmmio_handler(regs, addr) == 1) 58 if (kmmio_handler(regs, addr) == 1)
59 return -1; 59 return -1;
@@ -393,7 +393,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
393 if (pte && pte_present(*pte) && !pte_exec(*pte)) 393 if (pte && pte_present(*pte) && !pte_exec(*pte))
394 printk(KERN_CRIT "kernel tried to execute " 394 printk(KERN_CRIT "kernel tried to execute "
395 "NX-protected page - exploit attempt? " 395 "NX-protected page - exploit attempt? "
396 "(uid: %d)\n", current->uid); 396 "(uid: %d)\n", current_uid());
397 } 397 }
398#endif 398#endif
399 399
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
413 unsigned long error_code) 413 unsigned long error_code)
414{ 414{
415 unsigned long flags = oops_begin(); 415 unsigned long flags = oops_begin();
416 int sig = SIGKILL;
416 struct task_struct *tsk; 417 struct task_struct *tsk;
417 418
418 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
423 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
424 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
425 if (__die("Bad pagetable", regs, error_code)) 426 if (__die("Bad pagetable", regs, error_code))
426 regs = NULL; 427 sig = 0;
427 oops_end(flags, regs, SIGKILL); 428 oops_end(flags, regs, sig);
428} 429}
429#endif 430#endif
430 431
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
590 int fault; 591 int fault;
591#ifdef CONFIG_X86_64 592#ifdef CONFIG_X86_64
592 unsigned long flags; 593 unsigned long flags;
594 int sig;
593#endif 595#endif
594 596
595 tsk = current; 597 tsk = current;
@@ -665,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
665 if (unlikely(in_atomic() || !mm)) 667 if (unlikely(in_atomic() || !mm))
666 goto bad_area_nosemaphore; 668 goto bad_area_nosemaphore;
667 669
668again:
669 /* 670 /*
670 * When running in the kernel we expect faults to occur only to 671 * When running in the kernel we expect faults to occur only to
671 * addresses in user space. All other faults represent errors in the 672 * addresses in user space. All other faults represent errors in the
@@ -849,32 +850,22 @@ no_context:
849 bust_spinlocks(0); 850 bust_spinlocks(0);
850 do_exit(SIGKILL); 851 do_exit(SIGKILL);
851#else 852#else
853 sig = SIGKILL;
852 if (__die("Oops", regs, error_code)) 854 if (__die("Oops", regs, error_code))
853 regs = NULL; 855 sig = 0;
854 /* Executive summary in case the body of the oops scrolled away */ 856 /* Executive summary in case the body of the oops scrolled away */
855 printk(KERN_EMERG "CR2: %016lx\n", address); 857 printk(KERN_EMERG "CR2: %016lx\n", address);
856 oops_end(flags, regs, SIGKILL); 858 oops_end(flags, regs, sig);
857#endif 859#endif
858 860
859/*
860 * We ran out of memory, or some other thing happened to us that made
861 * us unable to handle the page fault gracefully.
862 */
863out_of_memory: 861out_of_memory:
862 /*
863 * We ran out of memory, call the OOM killer, and return the userspace
864 * (which will retry the fault, or kill us if we got oom-killed).
865 */
864 up_read(&mm->mmap_sem); 866 up_read(&mm->mmap_sem);
865 if (is_global_init(tsk)) { 867 pagefault_out_of_memory();
866 yield(); 868 return;
867 /*
868 * Re-lookup the vma - in theory the vma tree might
869 * have changed:
870 */
871 goto again;
872 }
873
874 printk("VM: killing process %s\n", tsk->comm);
875 if (error_code & PF_USER)
876 do_group_exit(SIGKILL);
877 goto no_context;
878 869
879do_sigbus: 870do_sigbus:
880 up_read(&mm->mmap_sem); 871 up_read(&mm->mmap_sem);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f4242079..88f1b10de3be 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -21,6 +21,7 @@
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/pci.h>
24#include <linux/pfn.h> 25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/bootmem.h> 27#include <linux/bootmem.h>
@@ -67,7 +68,7 @@ static unsigned long __meminitdata table_top;
67 68
68static int __initdata after_init_bootmem; 69static int __initdata after_init_bootmem;
69 70
70static __init void *alloc_low_page(unsigned long *phys) 71static __init void *alloc_low_page(void)
71{ 72{
72 unsigned long pfn = table_end++; 73 unsigned long pfn = table_end++;
73 void *adr; 74 void *adr;
@@ -77,7 +78,6 @@ static __init void *alloc_low_page(unsigned long *phys)
77 78
78 adr = __va(pfn * PAGE_SIZE); 79 adr = __va(pfn * PAGE_SIZE);
79 memset(adr, 0, PAGE_SIZE); 80 memset(adr, 0, PAGE_SIZE);
80 *phys = pfn * PAGE_SIZE;
81 return adr; 81 return adr;
82} 82}
83 83
@@ -92,16 +92,17 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
92 pmd_t *pmd_table; 92 pmd_t *pmd_table;
93 93
94#ifdef CONFIG_X86_PAE 94#ifdef CONFIG_X86_PAE
95 unsigned long phys;
96 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 95 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
97 if (after_init_bootmem) 96 if (after_init_bootmem)
98 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 97 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
99 else 98 else
100 pmd_table = (pmd_t *)alloc_low_page(&phys); 99 pmd_table = (pmd_t *)alloc_low_page();
101 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 100 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 101 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
103 pud = pud_offset(pgd, 0); 102 pud = pud_offset(pgd, 0);
104 BUG_ON(pmd_table != pmd_offset(pud, 0)); 103 BUG_ON(pmd_table != pmd_offset(pud, 0));
104
105 return pmd_table;
105 } 106 }
106#endif 107#endif
107 pud = pud_offset(pgd, 0); 108 pud = pud_offset(pgd, 0);
@@ -126,10 +127,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
126 if (!page_table) 127 if (!page_table)
127 page_table = 128 page_table =
128 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 129 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
129 } else { 130 } else
130 unsigned long phys; 131 page_table = (pte_t *)alloc_low_page();
131 page_table = (pte_t *)alloc_low_page(&phys);
132 }
133 132
134 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); 133 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
135 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 134 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -329,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr)
329{ 328{
330 if (pagenr <= 256) 329 if (pagenr <= 256)
331 return 1; 330 return 1;
331 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
332 return 0;
332 if (!page_is_ram(pagenr)) 333 if (!page_is_ram(pagenr))
333 return 1; 334 return 1;
334 return 0; 335 return 0;
@@ -436,8 +437,12 @@ static void __init set_highmem_pages_init(void)
436#endif /* !CONFIG_NUMA */ 437#endif /* !CONFIG_NUMA */
437 438
438#else 439#else
439# define permanent_kmaps_init(pgd_base) do { } while (0) 440static inline void permanent_kmaps_init(pgd_t *pgd_base)
440# define set_highmem_pages_init() do { } while (0) 441{
442}
443static inline void set_highmem_pages_init(void)
444{
445}
441#endif /* CONFIG_HIGHMEM */ 446#endif /* CONFIG_HIGHMEM */
442 447
443void __init native_pagetable_setup_start(pgd_t *base) 448void __init native_pagetable_setup_start(pgd_t *base)
@@ -969,7 +974,7 @@ void __init mem_init(void)
969 int codesize, reservedpages, datasize, initsize; 974 int codesize, reservedpages, datasize, initsize;
970 int tmp; 975 int tmp;
971 976
972 start_periodic_check_for_corruption(); 977 pci_iommu_alloc();
973 978
974#ifdef CONFIG_FLATMEM 979#ifdef CONFIG_FLATMEM
975 BUG_ON(!mem_map); 980 BUG_ON(!mem_map);
@@ -1040,11 +1045,25 @@ void __init mem_init(void)
1040 (unsigned long)&_text, (unsigned long)&_etext, 1045 (unsigned long)&_text, (unsigned long)&_etext,
1041 ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 1046 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
1042 1047
1048 /*
1049 * Check boundaries twice: Some fundamental inconsistencies can
1050 * be detected at build time already.
1051 */
1052#define __FIXADDR_TOP (-PAGE_SIZE)
1053#ifdef CONFIG_HIGHMEM
1054 BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1055 BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
1056#endif
1057#define high_memory (-128UL << 20)
1058 BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
1059#undef high_memory
1060#undef __FIXADDR_TOP
1061
1043#ifdef CONFIG_HIGHMEM 1062#ifdef CONFIG_HIGHMEM
1044 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); 1063 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1045 BUG_ON(VMALLOC_END > PKMAP_BASE); 1064 BUG_ON(VMALLOC_END > PKMAP_BASE);
1046#endif 1065#endif
1047 BUG_ON(VMALLOC_START > VMALLOC_END); 1066 BUG_ON(VMALLOC_START >= VMALLOC_END);
1048 BUG_ON((unsigned long)high_memory > VMALLOC_START); 1067 BUG_ON((unsigned long)high_memory > VMALLOC_START);
1049 1068
1050 if (boot_cpu_data.wp_works_ok < 0) 1069 if (boot_cpu_data.wp_works_ok < 0)
@@ -1062,7 +1081,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
1062 unsigned long start_pfn = start >> PAGE_SHIFT; 1081 unsigned long start_pfn = start >> PAGE_SHIFT;
1063 unsigned long nr_pages = size >> PAGE_SHIFT; 1082 unsigned long nr_pages = size >> PAGE_SHIFT;
1064 1083
1065 return __add_pages(zone, start_pfn, nr_pages); 1084 return __add_pages(nid, zone, start_pfn, nr_pages);
1066} 1085}
1067#endif 1086#endif
1068 1087
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9db01db6e3cd..23f68e77ad1f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
857 if (last_mapped_pfn > max_pfn_mapped) 857 if (last_mapped_pfn > max_pfn_mapped)
858 max_pfn_mapped = last_mapped_pfn; 858 max_pfn_mapped = last_mapped_pfn;
859 859
860 ret = __add_pages(zone, start_pfn, nr_pages); 860 ret = __add_pages(nid, zone, start_pfn, nr_pages);
861 WARN_ON_ONCE(ret); 861 WARN_ON_ONCE(ret);
862 862
863 return ret; 863 return ret;
@@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr)
888{ 888{
889 if (pagenr <= 256) 889 if (pagenr <= 256)
890 return 1; 890 return 1;
891 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
892 return 0;
891 if (!page_is_ram(pagenr)) 893 if (!page_is_ram(pagenr))
892 return 1; 894 return 1;
893 return 0; 895 return 0;
@@ -902,8 +904,6 @@ void __init mem_init(void)
902 long codesize, reservedpages, datasize, initsize; 904 long codesize, reservedpages, datasize, initsize;
903 unsigned long absent_pages; 905 unsigned long absent_pages;
904 906
905 start_periodic_check_for_corruption();
906
907 pci_iommu_alloc(); 907 pci_iommu_alloc();
908 908
909 /* clear_bss() already clear the empty_zero_page */ 909 /* clear_bss() already clear the empty_zero_page */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4c4307ff3e0..bd85d42819e1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -223,7 +223,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
223 * Check if the request spans more than any BAR in the iomem resource 223 * Check if the request spans more than any BAR in the iomem resource
224 * tree. 224 * tree.
225 */ 225 */
226 WARN_ON(iomem_map_sanity_check(phys_addr, size)); 226 WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
227 KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
227 228
228 /* 229 /*
229 * Don't allow anybody to remap normal RAM that we're using.. 230 * Don't allow anybody to remap normal RAM that we're using..
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8518c678d83f..d1f7439d173c 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -239,7 +239,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
239 start_pfn = node_remap_start_pfn[node]; 239 start_pfn = node_remap_start_pfn[node];
240 size = node_remap_size[node]; 240 size = node_remap_size[node];
241 241
242 printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node); 242 printk(KERN_DEBUG "%s: node %d\n", __func__, node);
243 243
244 for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { 244 for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
245 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); 245 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
@@ -251,7 +251,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
251 PAGE_KERNEL_LARGE_EXEC)); 251 PAGE_KERNEL_LARGE_EXEC));
252 252
253 printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", 253 printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
254 __FUNCTION__, vaddr, start_pfn + pfn); 254 __func__, vaddr, start_pfn + pfn);
255 } 255 }
256 } 256 }
257} 257}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d46..71a14f89f89e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
278 int rr, i; 278 int rr, i;
279 279
280 rr = first_node(node_online_map); 280 rr = first_node(node_online_map);
281 for (i = 0; i < NR_CPUS; i++) { 281 for (i = 0; i < nr_cpu_ids; i++) {
282 if (early_cpu_to_node(i) != NUMA_NO_NODE) 282 if (early_cpu_to_node(i) != NUMA_NO_NODE)
283 continue; 283 continue;
284 numa_set_node(i, rr); 284 numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
549 memnodemap[0] = 0; 549 memnodemap[0] = 0;
550 node_set_online(0); 550 node_set_online(0);
551 node_set(0, node_possible_map); 551 node_set(0, node_possible_map);
552 for (i = 0; i < NR_CPUS; i++) 552 for (i = 0; i < nr_cpu_ids; i++)
553 numa_set_node(i, 0); 553 numa_set_node(i, 0);
554 e820_register_active_regions(0, start_pfn, last_pfn); 554 e820_register_active_regions(0, start_pfn, last_pfn);
555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index eb1bf000d12e..85cbd3cd3723 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -596,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
596 free_memtype(addr, addr + size); 596 free_memtype(addr, addr + size);
597} 597}
598 598
599/*
600 * Internal interface to reserve a range of physical memory with prot.
601 * Reserved non RAM regions only and after successful reserve_memtype,
602 * this func also keeps identity mapping (if any) in sync with this new prot.
603 */
604static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
605{
606 int is_ram = 0;
607 int id_sz, ret;
608 unsigned long flags;
609 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
610
611 is_ram = pagerange_is_ram(paddr, paddr + size);
612
613 if (is_ram != 0) {
614 /*
615 * For mapping RAM pages, drivers need to call
616 * set_memory_[uc|wc|wb] directly, for reserve and free, before
617 * setting up the PTE.
618 */
619 WARN_ON_ONCE(1);
620 return 0;
621 }
622
623 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
624 if (ret)
625 return ret;
626
627 if (flags != want_flags) {
628 free_memtype(paddr, paddr + size);
629 printk(KERN_ERR
630 "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
631 current->comm, current->pid,
632 cattr_name(want_flags),
633 (unsigned long long)paddr,
634 (unsigned long long)(paddr + size),
635 cattr_name(flags));
636 return -EINVAL;
637 }
638
639 /* Need to keep identity mapping in sync */
640 if (paddr >= __pa(high_memory))
641 return 0;
642
643 id_sz = (__pa(high_memory) < paddr + size) ?
644 __pa(high_memory) - paddr :
645 size;
646
647 if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
648 free_memtype(paddr, paddr + size);
649 printk(KERN_ERR
650 "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
651 "for %Lx-%Lx\n",
652 current->comm, current->pid,
653 cattr_name(flags),
654 (unsigned long long)paddr,
655 (unsigned long long)(paddr + size));
656 return -EINVAL;
657 }
658 return 0;
659}
660
661/*
662 * Internal interface to free a range of physical memory.
663 * Frees non RAM regions only.
664 */
665static void free_pfn_range(u64 paddr, unsigned long size)
666{
667 int is_ram;
668
669 is_ram = pagerange_is_ram(paddr, paddr + size);
670 if (is_ram == 0)
671 free_memtype(paddr, paddr + size);
672}
673
674/*
675 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
676 * copied through copy_page_range().
677 *
678 * If the vma has a linear pfn mapping for the entire range, we get the prot
679 * from pte and reserve the entire vma range with single reserve_pfn_range call.
680 * Otherwise, we reserve the entire vma range, my ging through the PTEs page
681 * by page to get physical address and protection.
682 */
683int track_pfn_vma_copy(struct vm_area_struct *vma)
684{
685 int retval = 0;
686 unsigned long i, j;
687 resource_size_t paddr;
688 unsigned long prot;
689 unsigned long vma_start = vma->vm_start;
690 unsigned long vma_end = vma->vm_end;
691 unsigned long vma_size = vma_end - vma_start;
692
693 if (!pat_enabled)
694 return 0;
695
696 if (is_linear_pfn_mapping(vma)) {
697 /*
698 * reserve the whole chunk covered by vma. We need the
699 * starting address and protection from pte.
700 */
701 if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
702 WARN_ON_ONCE(1);
703 return -EINVAL;
704 }
705 return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
706 }
707
708 /* reserve entire vma page by page, using pfn and prot from pte */
709 for (i = 0; i < vma_size; i += PAGE_SIZE) {
710 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
711 continue;
712
713 retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
714 if (retval)
715 goto cleanup_ret;
716 }
717 return 0;
718
719cleanup_ret:
720 /* Reserve error: Cleanup partial reservation and return error */
721 for (j = 0; j < i; j += PAGE_SIZE) {
722 if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
723 continue;
724
725 free_pfn_range(paddr, PAGE_SIZE);
726 }
727
728 return retval;
729}
730
731/*
732 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
733 * for physical range indicated by pfn and size.
734 *
735 * prot is passed in as a parameter for the new mapping. If the vma has a
736 * linear pfn mapping for the entire range reserve the entire vma range with
737 * single reserve_pfn_range call.
738 * Otherwise, we look t the pfn and size and reserve only the specified range
739 * page by page.
740 *
741 * Note that this function can be called with caller trying to map only a
742 * subrange/page inside the vma.
743 */
744int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
745 unsigned long pfn, unsigned long size)
746{
747 int retval = 0;
748 unsigned long i, j;
749 resource_size_t base_paddr;
750 resource_size_t paddr;
751 unsigned long vma_start = vma->vm_start;
752 unsigned long vma_end = vma->vm_end;
753 unsigned long vma_size = vma_end - vma_start;
754
755 if (!pat_enabled)
756 return 0;
757
758 if (is_linear_pfn_mapping(vma)) {
759 /* reserve the whole chunk starting from vm_pgoff */
760 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
761 return reserve_pfn_range(paddr, vma_size, prot);
762 }
763
764 /* reserve page by page using pfn and size */
765 base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
766 for (i = 0; i < size; i += PAGE_SIZE) {
767 paddr = base_paddr + i;
768 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
769 if (retval)
770 goto cleanup_ret;
771 }
772 return 0;
773
774cleanup_ret:
775 /* Reserve error: Cleanup partial reservation and return error */
776 for (j = 0; j < i; j += PAGE_SIZE) {
777 paddr = base_paddr + j;
778 free_pfn_range(paddr, PAGE_SIZE);
779 }
780
781 return retval;
782}
783
784/*
785 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
786 * untrack can be called for a specific region indicated by pfn and size or
787 * can be for the entire vma (in which case size can be zero).
788 */
789void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
790 unsigned long size)
791{
792 unsigned long i;
793 resource_size_t paddr;
794 unsigned long prot;
795 unsigned long vma_start = vma->vm_start;
796 unsigned long vma_end = vma->vm_end;
797 unsigned long vma_size = vma_end - vma_start;
798
799 if (!pat_enabled)
800 return;
801
802 if (is_linear_pfn_mapping(vma)) {
803 /* free the whole chunk starting from vm_pgoff */
804 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
805 free_pfn_range(paddr, vma_size);
806 return;
807 }
808
809 if (size != 0 && size != vma_size) {
810 /* free page by page, using pfn and size */
811 paddr = (resource_size_t)pfn << PAGE_SHIFT;
812 for (i = 0; i < size; i += PAGE_SIZE) {
813 paddr = paddr + i;
814 free_pfn_range(paddr, PAGE_SIZE);
815 }
816 } else {
817 /* free entire vma, page by page, using the pfn from pte */
818 for (i = 0; i < vma_size; i += PAGE_SIZE) {
819 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
820 continue;
821
822 free_pfn_range(paddr, PAGE_SIZE);
823 }
824 }
825}
826
827pgprot_t pgprot_writecombine(pgprot_t prot)
828{
829 if (pat_enabled)
830 return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
831 else
832 return pgprot_noncached(prot);
833}
834
599#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 835#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
600 836
601/* get Nth element of the linked list */ 837/* get Nth element of the linked list */
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14fe..09737c8af074 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
382 if (!node_online(i)) 382 if (!node_online(i))
383 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 383 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
384 384
385 for (i = 0; i < NR_CPUS; i++) { 385 for (i = 0; i < nr_cpu_ids; i++) {
386 int node = early_cpu_to_node(i); 386 int node = early_cpu_to_node(i);
387 387
388 if (node == NUMA_NO_NODE) 388 if (node == NUMA_NO_NODE)