aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-10-17 19:09:09 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-10-17 19:12:33 -0400
commit0f8f86c7bdd1c954fbe153af437a0d91a6c5721a (patch)
tree94a8d419a470a4f9852ca397bb9bbe48db92ff5c /arch/x86/mm
parentdca2d6ac09d9ef59ff46820d4f0c94b08a671202 (diff)
parentf39cdf25bf77219676ec5360980ac40b1a7e144a (diff)
Merge commit 'perf/core' into perf/hw-breakpoint
Conflicts: kernel/Makefile kernel/trace/Makefile kernel/trace/trace.h samples/Makefile Merge reason: We need to be uptodate with the perf events development branch because we plan to rewrite the breakpoints API on top of perf events.
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c27
-rw-r--r--arch/x86/mm/init.c63
-rw-r--r--arch/x86/mm/init_32.c12
-rw-r--r--arch/x86/mm/init_64.c12
-rw-r--r--arch/x86/mm/iomap_32.c27
-rw-r--r--arch/x86/mm/ioremap.c18
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c3
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c1
-rw-r--r--arch/x86/mm/mmap.c17
-rw-r--r--arch/x86/mm/pageattr.c30
-rw-r--r--arch/x86/mm/pat.c360
-rw-r--r--arch/x86/mm/setup_nx.c69
-rw-r--r--arch/x86/mm/testmmiotrace.c29
-rw-r--r--arch/x86/mm/tlb.c15
15 files changed, 437 insertions, 249 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9b5a9f59a478..06630d26e56d 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,9 +1,10 @@
1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o physaddr.o gup.o 2 pat.o pgtable.o physaddr.o gup.o setup_nx.o
3 3
4# Make sure __phys_addr has no stackprotector 4# Make sure __phys_addr has no stackprotector
5nostackp := $(call cc-option, -fno-stack-protector) 5nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_physaddr.o := $(nostackp) 6CFLAGS_physaddr.o := $(nostackp)
7CFLAGS_setup_nx.o := $(nostackp)
7 8
8obj-$(CONFIG_SMP) += tlb.o 9obj-$(CONFIG_SMP) += tlb.o
9 10
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 775a020990a5..f4cee9028cf0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
10#include <linux/bootmem.h> /* max_low_pfn */ 10#include <linux/bootmem.h> /* max_low_pfn */
11#include <linux/kprobes.h> /* __kprobes, ... */ 11#include <linux/kprobes.h> /* __kprobes, ... */
12#include <linux/mmiotrace.h> /* kmmio_handler, ... */ 12#include <linux/mmiotrace.h> /* kmmio_handler, ... */
13#include <linux/perf_counter.h> /* perf_swcounter_event */ 13#include <linux/perf_event.h> /* perf_sw_event */
14 14
15#include <asm/traps.h> /* dotraplinkage, ... */ 15#include <asm/traps.h> /* dotraplinkage, ... */
16#include <asm/pgalloc.h> /* pgd_*(), ... */ 16#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
167 info.si_errno = 0; 167 info.si_errno = 0;
168 info.si_code = si_code; 168 info.si_code = si_code;
169 info.si_addr = (void __user *)address; 169 info.si_addr = (void __user *)address;
170 info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
170 171
171 force_sig_info(si_signo, &info, tsk); 172 force_sig_info(si_signo, &info, tsk);
172} 173}
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
790} 791}
791 792
792static void 793static void
793do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) 794do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
795 unsigned int fault)
794{ 796{
795 struct task_struct *tsk = current; 797 struct task_struct *tsk = current;
796 struct mm_struct *mm = tsk->mm; 798 struct mm_struct *mm = tsk->mm;
799 int code = BUS_ADRERR;
797 800
798 up_read(&mm->mmap_sem); 801 up_read(&mm->mmap_sem);
799 802
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
809 tsk->thread.error_code = error_code; 812 tsk->thread.error_code = error_code;
810 tsk->thread.trap_no = 14; 813 tsk->thread.trap_no = 14;
811 814
812 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 815#ifdef CONFIG_MEMORY_FAILURE
816 if (fault & VM_FAULT_HWPOISON) {
817 printk(KERN_ERR
818 "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
819 tsk->comm, tsk->pid, address);
820 code = BUS_MCEERR_AR;
821 }
822#endif
823 force_sig_info_fault(SIGBUS, code, address, tsk);
813} 824}
814 825
815static noinline void 826static noinline void
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
819 if (fault & VM_FAULT_OOM) { 830 if (fault & VM_FAULT_OOM) {
820 out_of_memory(regs, error_code, address); 831 out_of_memory(regs, error_code, address);
821 } else { 832 } else {
822 if (fault & VM_FAULT_SIGBUS) 833 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
823 do_sigbus(regs, error_code, address); 834 do_sigbus(regs, error_code, address, fault);
824 else 835 else
825 BUG(); 836 BUG();
826 } 837 }
@@ -1017,7 +1028,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1017 if (unlikely(error_code & PF_RSVD)) 1028 if (unlikely(error_code & PF_RSVD))
1018 pgtable_bad(regs, error_code, address); 1029 pgtable_bad(regs, error_code, address);
1019 1030
1020 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 1031 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
1021 1032
1022 /* 1033 /*
1023 * If we're in an interrupt, have no user context or are running 1034 * If we're in an interrupt, have no user context or are running
@@ -1114,11 +1125,11 @@ good_area:
1114 1125
1115 if (fault & VM_FAULT_MAJOR) { 1126 if (fault & VM_FAULT_MAJOR) {
1116 tsk->maj_flt++; 1127 tsk->maj_flt++;
1117 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 1128 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
1118 regs, address); 1129 regs, address);
1119 } else { 1130 } else {
1120 tsk->min_flt++; 1131 tsk->min_flt++;
1121 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 1132 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
1122 regs, address); 1133 regs, address);
1123 } 1134 }
1124 1135
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 0607119cef94..73ffd5536f62 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,69 +28,6 @@ int direct_gbpages
28#endif 28#endif
29; 29;
30 30
31int nx_enabled;
32
33#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
34static int disable_nx __cpuinitdata;
35
36/*
37 * noexec = on|off
38 *
39 * Control non-executable mappings for processes.
40 *
41 * on Enable
42 * off Disable
43 */
44static int __init noexec_setup(char *str)
45{
46 if (!str)
47 return -EINVAL;
48 if (!strncmp(str, "on", 2)) {
49 __supported_pte_mask |= _PAGE_NX;
50 disable_nx = 0;
51 } else if (!strncmp(str, "off", 3)) {
52 disable_nx = 1;
53 __supported_pte_mask &= ~_PAGE_NX;
54 }
55 return 0;
56}
57early_param("noexec", noexec_setup);
58#endif
59
60#ifdef CONFIG_X86_PAE
61static void __init set_nx(void)
62{
63 unsigned int v[4], l, h;
64
65 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
66 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
67
68 if ((v[3] & (1 << 20)) && !disable_nx) {
69 rdmsr(MSR_EFER, l, h);
70 l |= EFER_NX;
71 wrmsr(MSR_EFER, l, h);
72 nx_enabled = 1;
73 __supported_pte_mask |= _PAGE_NX;
74 }
75 }
76}
77#else
78static inline void set_nx(void)
79{
80}
81#endif
82
83#ifdef CONFIG_X86_64
84void __cpuinit check_efer(void)
85{
86 unsigned long efer;
87
88 rdmsrl(MSR_EFER, efer);
89 if (!(efer & EFER_NX) || disable_nx)
90 __supported_pte_mask &= ~_PAGE_NX;
91}
92#endif
93
94static void __init find_early_table_space(unsigned long end, int use_pse, 31static void __init find_early_table_space(unsigned long end, int use_pse,
95 int use_gbpages) 32 int use_gbpages)
96{ 33{
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3cd7711bb949..30938c1d8d5d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
84#ifdef CONFIG_X86_PAE 84#ifdef CONFIG_X86_PAE
85 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 85 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
86 if (after_bootmem) 86 if (after_bootmem)
87 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 87 pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
88 else 88 else
89 pmd_table = (pmd_t *)alloc_low_page(); 89 pmd_table = (pmd_t *)alloc_low_page();
90 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 90 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
116#endif 116#endif
117 if (!page_table) 117 if (!page_table)
118 page_table = 118 page_table =
119 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 119 (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
120 } else 120 } else
121 page_table = (pte_t *)alloc_low_page(); 121 page_table = (pte_t *)alloc_low_page();
122 122
@@ -857,8 +857,6 @@ static void __init test_wp_bit(void)
857 } 857 }
858} 858}
859 859
860static struct kcore_list kcore_mem, kcore_vmalloc;
861
862void __init mem_init(void) 860void __init mem_init(void)
863{ 861{
864 int codesize, reservedpages, datasize, initsize; 862 int codesize, reservedpages, datasize, initsize;
@@ -886,13 +884,9 @@ void __init mem_init(void)
886 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 884 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
887 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 885 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
888 886
889 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
890 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
891 VMALLOC_END-VMALLOC_START);
892
893 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " 887 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
894 "%dk reserved, %dk data, %dk init, %ldk highmem)\n", 888 "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
895 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 889 nr_free_pages() << (PAGE_SHIFT-10),
896 num_physpages << (PAGE_SHIFT-10), 890 num_physpages << (PAGE_SHIFT-10),
897 codesize >> 10, 891 codesize >> 10,
898 reservedpages << (PAGE_SHIFT-10), 892 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ea56b8cbb6a6..5a4398a6006b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,8 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
647 647
648#endif /* CONFIG_MEMORY_HOTPLUG */ 648#endif /* CONFIG_MEMORY_HOTPLUG */
649 649
650static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, 650static struct kcore_list kcore_vsyscall;
651 kcore_modules, kcore_vsyscall;
652 651
653void __init mem_init(void) 652void __init mem_init(void)
654{ 653{
@@ -677,17 +676,12 @@ void __init mem_init(void)
677 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 676 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
678 677
679 /* Register memory areas for /proc/kcore */ 678 /* Register memory areas for /proc/kcore */
680 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
681 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
682 VMALLOC_END-VMALLOC_START);
683 kclist_add(&kcore_kernel, &_stext, _end - _stext);
684 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
685 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 679 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
686 VSYSCALL_END - VSYSCALL_START); 680 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
687 681
688 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " 682 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
689 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", 683 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
690 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 684 nr_free_pages() << (PAGE_SHIFT-10),
691 max_pfn << (PAGE_SHIFT-10), 685 max_pfn << (PAGE_SHIFT-10),
692 codesize >> 10, 686 codesize >> 10,
693 absent_pages << (PAGE_SHIFT-10), 687 absent_pages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index fe6f84ca121e..84e236ce76ba 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -21,7 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23 23
24int is_io_mapping_possible(resource_size_t base, unsigned long size) 24static int is_io_mapping_possible(resource_size_t base, unsigned long size)
25{ 25{
26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) 26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
27 /* There is no way to map greater than 1 << 32 address without PAE */ 27 /* There is no way to map greater than 1 << 32 address without PAE */
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
30#endif 30#endif
31 return 1; 31 return 1;
32} 32}
33EXPORT_SYMBOL_GPL(is_io_mapping_possible); 33
34int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
35{
36 unsigned long flag = _PAGE_CACHE_WC;
37 int ret;
38
39 if (!is_io_mapping_possible(base, size))
40 return -EINVAL;
41
42 ret = io_reserve_memtype(base, base + size, &flag);
43 if (ret)
44 return ret;
45
46 *prot = __pgprot(__PAGE_KERNEL | flag);
47 return 0;
48}
49EXPORT_SYMBOL_GPL(iomap_create_wc);
50
51void
52iomap_free(resource_size_t base, unsigned long size)
53{
54 io_free_memtype(base, base + size);
55}
56EXPORT_SYMBOL_GPL(iomap_free);
34 57
35void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) 58void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
36{ 59{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 04e1ad60c63a..334e63ca7b2b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
158 retval = reserve_memtype(phys_addr, (u64)phys_addr + size, 158 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
159 prot_val, &new_prot_val); 159 prot_val, &new_prot_val);
160 if (retval) { 160 if (retval) {
161 pr_debug("Warning: reserve_memtype returned %d\n", retval); 161 printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
162 return NULL; 162 return NULL;
163 } 163 }
164 164
165 if (prot_val != new_prot_val) { 165 if (prot_val != new_prot_val) {
166 /* 166 if (!is_new_memtype_allowed(phys_addr, size,
167 * Do not fallback to certain memory types with certain 167 prot_val, new_prot_val)) {
168 * requested type: 168 printk(KERN_ERR
169 * - request is uc-, return cannot be write-back
170 * - request is uc-, return cannot be write-combine
171 * - request is write-combine, return cannot be write-back
172 */
173 if ((prot_val == _PAGE_CACHE_UC_MINUS &&
174 (new_prot_val == _PAGE_CACHE_WB ||
175 new_prot_val == _PAGE_CACHE_WC)) ||
176 (prot_val == _PAGE_CACHE_WC &&
177 new_prot_val == _PAGE_CACHE_WB)) {
178 pr_debug(
179 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", 169 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
180 (unsigned long long)phys_addr, 170 (unsigned long long)phys_addr,
181 (unsigned long long)(phys_addr + size), 171 (unsigned long long)(phys_addr + size),
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 528bf954eb74..8cc183344140 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -225,9 +225,6 @@ void kmemcheck_hide(struct pt_regs *regs)
225 225
226 BUG_ON(!irqs_disabled()); 226 BUG_ON(!irqs_disabled());
227 227
228 if (data->balance == 0)
229 return;
230
231 if (unlikely(data->balance != 1)) { 228 if (unlikely(data->balance != 1)) {
232 kmemcheck_show_all(); 229 kmemcheck_show_all();
233 kmemcheck_error_save_bug(regs); 230 kmemcheck_error_save_bug(regs);
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index e773b6bd0079..3f66b82076a3 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -1,7 +1,6 @@
1#include <linux/kmemcheck.h> 1#include <linux/kmemcheck.h>
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <linux/module.h>
5 4
6#include <asm/page.h> 5#include <asm/page.h>
7#include <asm/pgtable.h> 6#include <asm/pgtable.h>
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 165829600566..c8191defc38a 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -29,13 +29,26 @@
29#include <linux/random.h> 29#include <linux/random.h>
30#include <linux/limits.h> 30#include <linux/limits.h>
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <asm/elf.h>
33
34static unsigned int stack_maxrandom_size(void)
35{
36 unsigned int max = 0;
37 if ((current->flags & PF_RANDOMIZE) &&
38 !(current->personality & ADDR_NO_RANDOMIZE)) {
39 max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
40 }
41
42 return max;
43}
44
32 45
33/* 46/*
34 * Top of mmap area (just below the process stack). 47 * Top of mmap area (just below the process stack).
35 * 48 *
36 * Leave an at least ~128 MB hole. 49 * Leave an at least ~128 MB hole with possible stack randomization.
37 */ 50 */
38#define MIN_GAP (128*1024*1024) 51#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
39#define MAX_GAP (TASK_SIZE/6*5) 52#define MAX_GAP (TASK_SIZE/6*5)
40 53
41/* 54/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7e600c1962db..dd38bfbefd1f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -12,6 +12,7 @@
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/pfn.h> 14#include <linux/pfn.h>
15#include <linux/percpu.h>
15 16
16#include <asm/e820.h> 17#include <asm/e820.h>
17#include <asm/processor.h> 18#include <asm/processor.h>
@@ -143,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
143 144
144 mb(); 145 mb();
145} 146}
147EXPORT_SYMBOL_GPL(clflush_cache_range);
146 148
147static void __cpa_flush_all(void *arg) 149static void __cpa_flush_all(void *arg)
148{ 150{
@@ -686,7 +688,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
686{ 688{
687 struct cpa_data alias_cpa; 689 struct cpa_data alias_cpa;
688 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); 690 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
689 unsigned long vaddr, remapped; 691 unsigned long vaddr;
690 int ret; 692 int ret;
691 693
692 if (cpa->pfn >= max_pfn_mapped) 694 if (cpa->pfn >= max_pfn_mapped)
@@ -744,24 +746,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
744 } 746 }
745#endif 747#endif
746 748
747 /*
748 * If the PMD page was partially used for per-cpu remapping,
749 * the recycled area needs to be split and modified. Because
750 * the area is always proper subset of a PMD page
751 * cpa->numpages is guaranteed to be 1 for these areas, so
752 * there's no need to loop over and check for further remaps.
753 */
754 remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
755 if (remapped) {
756 WARN_ON(cpa->numpages > 1);
757 alias_cpa = *cpa;
758 alias_cpa.vaddr = &remapped;
759 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
760 ret = __change_page_attr_set_clr(&alias_cpa, 0);
761 if (ret)
762 return ret;
763 }
764
765 return 0; 749 return 0;
766} 750}
767 751
@@ -822,6 +806,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
822{ 806{
823 struct cpa_data cpa; 807 struct cpa_data cpa;
824 int ret, cache, checkalias; 808 int ret, cache, checkalias;
809 unsigned long baddr = 0;
825 810
826 /* 811 /*
827 * Check, if we are requested to change a not supported 812 * Check, if we are requested to change a not supported
@@ -853,6 +838,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
853 */ 838 */
854 WARN_ON_ONCE(1); 839 WARN_ON_ONCE(1);
855 } 840 }
841 /*
842 * Save address for cache flush. *addr is modified in the call
843 * to __change_page_attr_set_clr() below.
844 */
845 baddr = *addr;
856 } 846 }
857 847
858 /* Must avoid aliasing mappings in the highmem code */ 848 /* Must avoid aliasing mappings in the highmem code */
@@ -900,7 +890,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
900 cpa_flush_array(addr, numpages, cache, 890 cpa_flush_array(addr, numpages, cache,
901 cpa.flags, pages); 891 cpa.flags, pages);
902 } else 892 } else
903 cpa_flush_range(*addr, numpages, cache); 893 cpa_flush_range(baddr, numpages, cache);
904 } else 894 } else
905 cpa_flush_all(cache); 895 cpa_flush_all(cache);
906 896
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index b2f7d3e59b86..e78cd0ec2bcf 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -15,6 +15,7 @@
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/rbtree.h>
18 19
19#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
20#include <asm/processor.h> 21#include <asm/processor.h>
@@ -80,6 +81,7 @@ enum {
80void pat_init(void) 81void pat_init(void)
81{ 82{
82 u64 pat; 83 u64 pat;
84 bool boot_cpu = !boot_pat_state;
83 85
84 if (!pat_enabled) 86 if (!pat_enabled)
85 return; 87 return;
@@ -121,8 +123,10 @@ void pat_init(void)
121 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); 123 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
122 124
123 wrmsrl(MSR_IA32_CR_PAT, pat); 125 wrmsrl(MSR_IA32_CR_PAT, pat);
124 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", 126
125 smp_processor_id(), boot_pat_state, pat); 127 if (boot_cpu)
128 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
129 smp_processor_id(), boot_pat_state, pat);
126} 130}
127 131
128#undef PAT 132#undef PAT
@@ -148,11 +152,10 @@ static char *cattr_name(unsigned long flags)
148 * areas). All the aliases have the same cache attributes of course. 152 * areas). All the aliases have the same cache attributes of course.
149 * Zero attributes are represented as holes. 153 * Zero attributes are represented as holes.
150 * 154 *
151 * Currently the data structure is a list because the number of mappings 155 * The data structure is a list that is also organized as an rbtree
152 * are expected to be relatively small. If this should be a problem 156 * sorted on the start address of memtype range.
153 * it could be changed to a rbtree or similar.
154 * 157 *
155 * memtype_lock protects the whole list. 158 * memtype_lock protects both the linear list and rbtree.
156 */ 159 */
157 160
158struct memtype { 161struct memtype {
@@ -160,11 +163,53 @@ struct memtype {
160 u64 end; 163 u64 end;
161 unsigned long type; 164 unsigned long type;
162 struct list_head nd; 165 struct list_head nd;
166 struct rb_node rb;
163}; 167};
164 168
169static struct rb_root memtype_rbroot = RB_ROOT;
165static LIST_HEAD(memtype_list); 170static LIST_HEAD(memtype_list);
166static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ 171static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
167 172
173static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
174{
175 struct rb_node *node = root->rb_node;
176 struct memtype *last_lower = NULL;
177
178 while (node) {
179 struct memtype *data = container_of(node, struct memtype, rb);
180
181 if (data->start < start) {
182 last_lower = data;
183 node = node->rb_right;
184 } else if (data->start > start) {
185 node = node->rb_left;
186 } else
187 return data;
188 }
189
190 /* Will return NULL if there is no entry with its start <= start */
191 return last_lower;
192}
193
194static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
195{
196 struct rb_node **new = &(root->rb_node);
197 struct rb_node *parent = NULL;
198
199 while (*new) {
200 struct memtype *this = container_of(*new, struct memtype, rb);
201
202 parent = *new;
203 if (data->start <= this->start)
204 new = &((*new)->rb_left);
205 else if (data->start > this->start)
206 new = &((*new)->rb_right);
207 }
208
209 rb_link_node(&data->rb, parent, new);
210 rb_insert_color(&data->rb, root);
211}
212
168/* 213/*
169 * Does intersection of PAT memory type and MTRR memory type and returns 214 * Does intersection of PAT memory type and MTRR memory type and returns
170 * the resulting memory type as PAT understands it. 215 * the resulting memory type as PAT understands it.
@@ -218,9 +263,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
218 return -EBUSY; 263 return -EBUSY;
219} 264}
220 265
221static struct memtype *cached_entry;
222static u64 cached_start;
223
224static int pat_pagerange_is_ram(unsigned long start, unsigned long end) 266static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
225{ 267{
226 int ram_page = 0, not_rampage = 0; 268 int ram_page = 0, not_rampage = 0;
@@ -249,63 +291,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
249} 291}
250 292
251/* 293/*
252 * For RAM pages, mark the pages as non WB memory type using 294 * For RAM pages, we use page flags to mark the pages with appropriate type.
253 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 295 * Here we do two pass:
254 * set_memory_wc() on a RAM page at a time before marking it as WB again. 296 * - Find the memtype of all the pages in the range, look for any conflicts
255 * This is ok, because only one driver will be owning the page and 297 * - In case of no conflicts, set the new memtype for pages in the range
256 * doing set_memory_*() calls.
257 * 298 *
258 * For now, we use PageNonWB to track that the RAM page is being mapped 299 * Caller must hold memtype_lock for atomicity.
259 * as non WB. In future, we will have to use one more flag
260 * (or some other mechanism in page_struct) to distinguish between
261 * UC and WC mapping.
262 */ 300 */
263static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, 301static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
264 unsigned long *new_type) 302 unsigned long *new_type)
265{ 303{
266 struct page *page; 304 struct page *page;
267 u64 pfn, end_pfn; 305 u64 pfn;
306
307 if (req_type == _PAGE_CACHE_UC) {
308 /* We do not support strong UC */
309 WARN_ON_ONCE(1);
310 req_type = _PAGE_CACHE_UC_MINUS;
311 }
268 312
269 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 313 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
270 page = pfn_to_page(pfn); 314 unsigned long type;
271 if (page_mapped(page) || PageNonWB(page))
272 goto out;
273 315
274 SetPageNonWB(page); 316 page = pfn_to_page(pfn);
317 type = get_page_memtype(page);
318 if (type != -1) {
319 printk(KERN_INFO "reserve_ram_pages_type failed "
320 "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
321 start, end, type, req_type);
322 if (new_type)
323 *new_type = type;
324
325 return -EBUSY;
326 }
275 } 327 }
276 return 0;
277 328
278out: 329 if (new_type)
279 end_pfn = pfn; 330 *new_type = req_type;
280 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { 331
332 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
281 page = pfn_to_page(pfn); 333 page = pfn_to_page(pfn);
282 ClearPageNonWB(page); 334 set_page_memtype(page, req_type);
283 } 335 }
284 336 return 0;
285 return -EINVAL;
286} 337}
287 338
288static int free_ram_pages_type(u64 start, u64 end) 339static int free_ram_pages_type(u64 start, u64 end)
289{ 340{
290 struct page *page; 341 struct page *page;
291 u64 pfn, end_pfn; 342 u64 pfn;
292 343
293 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 344 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
294 page = pfn_to_page(pfn); 345 page = pfn_to_page(pfn);
295 if (page_mapped(page) || !PageNonWB(page)) 346 set_page_memtype(page, -1);
296 goto out;
297
298 ClearPageNonWB(page);
299 } 347 }
300 return 0; 348 return 0;
301
302out:
303 end_pfn = pfn;
304 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
305 page = pfn_to_page(pfn);
306 SetPageNonWB(page);
307 }
308 return -EINVAL;
309} 349}
310 350
311/* 351/*
@@ -339,6 +379,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
339 if (new_type) { 379 if (new_type) {
340 if (req_type == -1) 380 if (req_type == -1)
341 *new_type = _PAGE_CACHE_WB; 381 *new_type = _PAGE_CACHE_WB;
382 else if (req_type == _PAGE_CACHE_WC)
383 *new_type = _PAGE_CACHE_UC_MINUS;
342 else 384 else
343 *new_type = req_type & _PAGE_CACHE_MASK; 385 *new_type = req_type & _PAGE_CACHE_MASK;
344 } 386 }
@@ -364,11 +406,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
364 *new_type = actual_type; 406 *new_type = actual_type;
365 407
366 is_range_ram = pat_pagerange_is_ram(start, end); 408 is_range_ram = pat_pagerange_is_ram(start, end);
367 if (is_range_ram == 1) 409 if (is_range_ram == 1) {
368 return reserve_ram_pages_type(start, end, req_type, 410
369 new_type); 411 spin_lock(&memtype_lock);
370 else if (is_range_ram < 0) 412 err = reserve_ram_pages_type(start, end, req_type, new_type);
413 spin_unlock(&memtype_lock);
414
415 return err;
416 } else if (is_range_ram < 0) {
371 return -EINVAL; 417 return -EINVAL;
418 }
372 419
373 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 420 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
374 if (!new) 421 if (!new)
@@ -380,17 +427,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
380 427
381 spin_lock(&memtype_lock); 428 spin_lock(&memtype_lock);
382 429
383 if (cached_entry && start >= cached_start)
384 entry = cached_entry;
385 else
386 entry = list_entry(&memtype_list, struct memtype, nd);
387
388 /* Search for existing mapping that overlaps the current range */ 430 /* Search for existing mapping that overlaps the current range */
389 where = NULL; 431 where = NULL;
390 list_for_each_entry_continue(entry, &memtype_list, nd) { 432 list_for_each_entry(entry, &memtype_list, nd) {
391 if (end <= entry->start) { 433 if (end <= entry->start) {
392 where = entry->nd.prev; 434 where = entry->nd.prev;
393 cached_entry = list_entry(where, struct memtype, nd);
394 break; 435 break;
395 } else if (start <= entry->start) { /* end > entry->start */ 436 } else if (start <= entry->start) { /* end > entry->start */
396 err = chk_conflict(new, entry, new_type); 437 err = chk_conflict(new, entry, new_type);
@@ -398,8 +439,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
398 dprintk("Overlap at 0x%Lx-0x%Lx\n", 439 dprintk("Overlap at 0x%Lx-0x%Lx\n",
399 entry->start, entry->end); 440 entry->start, entry->end);
400 where = entry->nd.prev; 441 where = entry->nd.prev;
401 cached_entry = list_entry(where,
402 struct memtype, nd);
403 } 442 }
404 break; 443 break;
405 } else if (start < entry->end) { /* start > entry->start */ 444 } else if (start < entry->end) { /* start > entry->start */
@@ -407,8 +446,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
407 if (!err) { 446 if (!err) {
408 dprintk("Overlap at 0x%Lx-0x%Lx\n", 447 dprintk("Overlap at 0x%Lx-0x%Lx\n",
409 entry->start, entry->end); 448 entry->start, entry->end);
410 cached_entry = list_entry(entry->nd.prev,
411 struct memtype, nd);
412 449
413 /* 450 /*
414 * Move to right position in the linked 451 * Move to right position in the linked
@@ -436,13 +473,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
436 return err; 473 return err;
437 } 474 }
438 475
439 cached_start = start;
440
441 if (where) 476 if (where)
442 list_add(&new->nd, where); 477 list_add(&new->nd, where);
443 else 478 else
444 list_add_tail(&new->nd, &memtype_list); 479 list_add_tail(&new->nd, &memtype_list);
445 480
481 memtype_rb_insert(&memtype_rbroot, new);
482
446 spin_unlock(&memtype_lock); 483 spin_unlock(&memtype_lock);
447 484
448 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", 485 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -454,7 +491,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
454 491
455int free_memtype(u64 start, u64 end) 492int free_memtype(u64 start, u64 end)
456{ 493{
457 struct memtype *entry; 494 struct memtype *entry, *saved_entry;
458 int err = -EINVAL; 495 int err = -EINVAL;
459 int is_range_ram; 496 int is_range_ram;
460 497
@@ -466,23 +503,58 @@ int free_memtype(u64 start, u64 end)
466 return 0; 503 return 0;
467 504
468 is_range_ram = pat_pagerange_is_ram(start, end); 505 is_range_ram = pat_pagerange_is_ram(start, end);
469 if (is_range_ram == 1) 506 if (is_range_ram == 1) {
470 return free_ram_pages_type(start, end); 507
471 else if (is_range_ram < 0) 508 spin_lock(&memtype_lock);
509 err = free_ram_pages_type(start, end);
510 spin_unlock(&memtype_lock);
511
512 return err;
513 } else if (is_range_ram < 0) {
472 return -EINVAL; 514 return -EINVAL;
515 }
473 516
474 spin_lock(&memtype_lock); 517 spin_lock(&memtype_lock);
475 list_for_each_entry(entry, &memtype_list, nd) { 518
519 entry = memtype_rb_search(&memtype_rbroot, start);
520 if (unlikely(entry == NULL))
521 goto unlock_ret;
522
523 /*
524 * Saved entry points to an entry with start same or less than what
525 * we searched for. Now go through the list in both directions to look
526 * for the entry that matches with both start and end, with list stored
527 * in sorted start address
528 */
529 saved_entry = entry;
530 list_for_each_entry_from(entry, &memtype_list, nd) {
476 if (entry->start == start && entry->end == end) { 531 if (entry->start == start && entry->end == end) {
477 if (cached_entry == entry || cached_start == start) 532 rb_erase(&entry->rb, &memtype_rbroot);
478 cached_entry = NULL; 533 list_del(&entry->nd);
534 kfree(entry);
535 err = 0;
536 break;
537 } else if (entry->start > start) {
538 break;
539 }
540 }
479 541
542 if (!err)
543 goto unlock_ret;
544
545 entry = saved_entry;
546 list_for_each_entry_reverse(entry, &memtype_list, nd) {
547 if (entry->start == start && entry->end == end) {
548 rb_erase(&entry->rb, &memtype_rbroot);
480 list_del(&entry->nd); 549 list_del(&entry->nd);
481 kfree(entry); 550 kfree(entry);
482 err = 0; 551 err = 0;
483 break; 552 break;
553 } else if (entry->start < start) {
554 break;
484 } 555 }
485 } 556 }
557unlock_ret:
486 spin_unlock(&memtype_lock); 558 spin_unlock(&memtype_lock);
487 559
488 if (err) { 560 if (err) {
@@ -496,6 +568,101 @@ int free_memtype(u64 start, u64 end)
496} 568}
497 569
498 570
571/**
572 * lookup_memtype - Looksup the memory type for a physical address
573 * @paddr: physical address of which memory type needs to be looked up
574 *
575 * Only to be called when PAT is enabled
576 *
577 * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
578 * _PAGE_CACHE_UC
579 */
580static unsigned long lookup_memtype(u64 paddr)
581{
582 int rettype = _PAGE_CACHE_WB;
583 struct memtype *entry;
584
585 if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
586 return rettype;
587
588 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
589 struct page *page;
590 spin_lock(&memtype_lock);
591 page = pfn_to_page(paddr >> PAGE_SHIFT);
592 rettype = get_page_memtype(page);
593 spin_unlock(&memtype_lock);
594 /*
595 * -1 from get_page_memtype() implies RAM page is in its
596 * default state and not reserved, and hence of type WB
597 */
598 if (rettype == -1)
599 rettype = _PAGE_CACHE_WB;
600
601 return rettype;
602 }
603
604 spin_lock(&memtype_lock);
605
606 entry = memtype_rb_search(&memtype_rbroot, paddr);
607 if (entry != NULL)
608 rettype = entry->type;
609 else
610 rettype = _PAGE_CACHE_UC_MINUS;
611
612 spin_unlock(&memtype_lock);
613 return rettype;
614}
615
616/**
617 * io_reserve_memtype - Request a memory type mapping for a region of memory
618 * @start: start (physical address) of the region
619 * @end: end (physical address) of the region
620 * @type: A pointer to memtype, with requested type. On success, requested
621 * or any other compatible type that was available for the region is returned
622 *
623 * On success, returns 0
624 * On failure, returns non-zero
625 */
626int io_reserve_memtype(resource_size_t start, resource_size_t end,
627 unsigned long *type)
628{
629 resource_size_t size = end - start;
630 unsigned long req_type = *type;
631 unsigned long new_type;
632 int ret;
633
634 WARN_ON_ONCE(iomem_map_sanity_check(start, size));
635
636 ret = reserve_memtype(start, end, req_type, &new_type);
637 if (ret)
638 goto out_err;
639
640 if (!is_new_memtype_allowed(start, size, req_type, new_type))
641 goto out_free;
642
643 if (kernel_map_sync_memtype(start, size, new_type) < 0)
644 goto out_free;
645
646 *type = new_type;
647 return 0;
648
649out_free:
650 free_memtype(start, end);
651 ret = -EBUSY;
652out_err:
653 return ret;
654}
655
656/**
657 * io_free_memtype - Release a memory type mapping for a region of memory
658 * @start: start (physical address) of the region
659 * @end: end (physical address) of the region
660 */
661void io_free_memtype(resource_size_t start, resource_size_t end)
662{
663 free_memtype(start, end);
664}
665
499pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 666pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
500 unsigned long size, pgprot_t vma_prot) 667 unsigned long size, pgprot_t vma_prot)
501{ 668{
@@ -577,7 +744,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
577{ 744{
578 unsigned long id_sz; 745 unsigned long id_sz;
579 746
580 if (!pat_enabled || base >= __pa(high_memory)) 747 if (base >= __pa(high_memory))
581 return 0; 748 return 0;
582 749
583 id_sz = (__pa(high_memory) < base + size) ? 750 id_sz = (__pa(high_memory) < base + size) ?
@@ -612,11 +779,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
612 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 779 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
613 780
614 /* 781 /*
615 * reserve_pfn_range() doesn't support RAM pages. Maintain the current 782 * reserve_pfn_range() for RAM pages. We do not refcount to keep
616 * behavior with RAM pages by returning success. 783 * track of number of mappings of RAM pages. We can assert that
784 * the type requested matches the type of first page in the range.
617 */ 785 */
618 if (is_ram != 0) 786 if (is_ram) {
787 if (!pat_enabled)
788 return 0;
789
790 flags = lookup_memtype(paddr);
791 if (want_flags != flags) {
792 printk(KERN_WARNING
793 "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
794 current->comm, current->pid,
795 cattr_name(want_flags),
796 (unsigned long long)paddr,
797 (unsigned long long)(paddr + size),
798 cattr_name(flags));
799 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
800 (~_PAGE_CACHE_MASK)) |
801 flags);
802 }
619 return 0; 803 return 0;
804 }
620 805
621 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 806 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
622 if (ret) 807 if (ret)
@@ -678,14 +863,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
678 unsigned long vma_size = vma->vm_end - vma->vm_start; 863 unsigned long vma_size = vma->vm_end - vma->vm_start;
679 pgprot_t pgprot; 864 pgprot_t pgprot;
680 865
681 if (!pat_enabled)
682 return 0;
683
684 /*
685 * For now, only handle remap_pfn_range() vmas where
686 * is_linear_pfn_mapping() == TRUE. Handling of
687 * vm_insert_pfn() is TBD.
688 */
689 if (is_linear_pfn_mapping(vma)) { 866 if (is_linear_pfn_mapping(vma)) {
690 /* 867 /*
691 * reserve the whole chunk covered by vma. We need the 868 * reserve the whole chunk covered by vma. We need the
@@ -713,23 +890,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
713int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 890int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
714 unsigned long pfn, unsigned long size) 891 unsigned long pfn, unsigned long size)
715{ 892{
893 unsigned long flags;
716 resource_size_t paddr; 894 resource_size_t paddr;
717 unsigned long vma_size = vma->vm_end - vma->vm_start; 895 unsigned long vma_size = vma->vm_end - vma->vm_start;
718 896
719 if (!pat_enabled)
720 return 0;
721
722 /*
723 * For now, only handle remap_pfn_range() vmas where
724 * is_linear_pfn_mapping() == TRUE. Handling of
725 * vm_insert_pfn() is TBD.
726 */
727 if (is_linear_pfn_mapping(vma)) { 897 if (is_linear_pfn_mapping(vma)) {
728 /* reserve the whole chunk starting from vm_pgoff */ 898 /* reserve the whole chunk starting from vm_pgoff */
729 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 899 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
730 return reserve_pfn_range(paddr, vma_size, prot, 0); 900 return reserve_pfn_range(paddr, vma_size, prot, 0);
731 } 901 }
732 902
903 if (!pat_enabled)
904 return 0;
905
906 /* for vm_insert_pfn and friends, we set prot based on lookup */
907 flags = lookup_memtype(pfn << PAGE_SHIFT);
908 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
909 flags);
910
733 return 0; 911 return 0;
734} 912}
735 913
@@ -744,14 +922,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
744 resource_size_t paddr; 922 resource_size_t paddr;
745 unsigned long vma_size = vma->vm_end - vma->vm_start; 923 unsigned long vma_size = vma->vm_end - vma->vm_start;
746 924
747 if (!pat_enabled)
748 return;
749
750 /*
751 * For now, only handle remap_pfn_range() vmas where
752 * is_linear_pfn_mapping() == TRUE. Handling of
753 * vm_insert_pfn() is TBD.
754 */
755 if (is_linear_pfn_mapping(vma)) { 925 if (is_linear_pfn_mapping(vma)) {
756 /* free the whole chunk starting from vm_pgoff */ 926 /* free the whole chunk starting from vm_pgoff */
757 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 927 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
new file mode 100644
index 000000000000..513d8ed5d2ec
--- /dev/null
+++ b/arch/x86/mm/setup_nx.c
@@ -0,0 +1,69 @@
1#include <linux/spinlock.h>
2#include <linux/errno.h>
3#include <linux/init.h>
4
5#include <asm/pgtable.h>
6
7int nx_enabled;
8
9#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
10static int disable_nx __cpuinitdata;
11
12/*
13 * noexec = on|off
14 *
15 * Control non-executable mappings for processes.
16 *
17 * on Enable
18 * off Disable
19 */
20static int __init noexec_setup(char *str)
21{
22 if (!str)
23 return -EINVAL;
24 if (!strncmp(str, "on", 2)) {
25 __supported_pte_mask |= _PAGE_NX;
26 disable_nx = 0;
27 } else if (!strncmp(str, "off", 3)) {
28 disable_nx = 1;
29 __supported_pte_mask &= ~_PAGE_NX;
30 }
31 return 0;
32}
33early_param("noexec", noexec_setup);
34#endif
35
36#ifdef CONFIG_X86_PAE
37void __init set_nx(void)
38{
39 unsigned int v[4], l, h;
40
41 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
42 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
43
44 if ((v[3] & (1 << 20)) && !disable_nx) {
45 rdmsr(MSR_EFER, l, h);
46 l |= EFER_NX;
47 wrmsr(MSR_EFER, l, h);
48 nx_enabled = 1;
49 __supported_pte_mask |= _PAGE_NX;
50 }
51 }
52}
53#else
54void set_nx(void)
55{
56}
57#endif
58
59#ifdef CONFIG_X86_64
60void __cpuinit check_efer(void)
61{
62 unsigned long efer;
63
64 rdmsrl(MSR_EFER, efer);
65 if (!(efer & EFER_NX) || disable_nx)
66 __supported_pte_mask &= ~_PAGE_NX;
67}
68#endif
69
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 427fd1b56df5..8565d944f7cf 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,12 +1,13 @@
1/* 1/*
2 * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> 2 * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
3 */ 3 */
4
5#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6
4#include <linux/module.h> 7#include <linux/module.h>
5#include <linux/io.h> 8#include <linux/io.h>
6#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
7 10
8#define MODULE_NAME "testmmiotrace"
9
10static unsigned long mmio_address; 11static unsigned long mmio_address;
11module_param(mmio_address, ulong, 0); 12module_param(mmio_address, ulong, 0);
12MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " 13MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i)
30static void do_write_test(void __iomem *p) 31static void do_write_test(void __iomem *p)
31{ 32{
32 unsigned int i; 33 unsigned int i;
33 pr_info(MODULE_NAME ": write test.\n"); 34 pr_info("write test.\n");
34 mmiotrace_printk("Write test.\n"); 35 mmiotrace_printk("Write test.\n");
35 36
36 for (i = 0; i < 256; i++) 37 for (i = 0; i < 256; i++)
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p)
47{ 48{
48 unsigned int i; 49 unsigned int i;
49 unsigned errs[3] = { 0 }; 50 unsigned errs[3] = { 0 };
50 pr_info(MODULE_NAME ": read test.\n"); 51 pr_info("read test.\n");
51 mmiotrace_printk("Read test.\n"); 52 mmiotrace_printk("Read test.\n");
52 53
53 for (i = 0; i < 256; i++) 54 for (i = 0; i < 256; i++)
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p)
68 69
69static void do_read_far_test(void __iomem *p) 70static void do_read_far_test(void __iomem *p)
70{ 71{
71 pr_info(MODULE_NAME ": read far test.\n"); 72 pr_info("read far test.\n");
72 mmiotrace_printk("Read far test.\n"); 73 mmiotrace_printk("Read far test.\n");
73 74
74 ioread32(p + read_far); 75 ioread32(p + read_far);
@@ -78,7 +79,7 @@ static void do_test(unsigned long size)
78{ 79{
79 void __iomem *p = ioremap_nocache(mmio_address, size); 80 void __iomem *p = ioremap_nocache(mmio_address, size);
80 if (!p) { 81 if (!p) {
81 pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); 82 pr_err("could not ioremap, aborting.\n");
82 return; 83 return;
83 } 84 }
84 mmiotrace_printk("ioremap returned %p.\n", p); 85 mmiotrace_printk("ioremap returned %p.\n", p);
@@ -94,24 +95,22 @@ static int __init init(void)
94 unsigned long size = (read_far) ? (8 << 20) : (16 << 10); 95 unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
95 96
96 if (mmio_address == 0) { 97 if (mmio_address == 0) {
97 pr_err(MODULE_NAME ": you have to use the module argument " 98 pr_err("you have to use the module argument mmio_address.\n");
98 "mmio_address.\n"); 99 pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n");
99 pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
100 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
101 return -ENXIO; 100 return -ENXIO;
102 } 101 }
103 102
104 pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " 103 pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
105 "address space, and writing 16 kB of rubbish in there.\n", 104 "and writing 16 kB of rubbish in there.\n",
106 size >> 10, mmio_address); 105 size >> 10, mmio_address);
107 do_test(size); 106 do_test(size);
108 pr_info(MODULE_NAME ": All done.\n"); 107 pr_info("All done.\n");
109 return 0; 108 return 0;
110} 109}
111 110
112static void __exit cleanup(void) 111static void __exit cleanup(void)
113{ 112{
114 pr_debug(MODULE_NAME ": unloaded.\n"); 113 pr_debug("unloaded.\n");
115} 114}
116 115
117module_init(init); 116module_init(init);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c814e144a3f0..36fe08eeb5c3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -59,7 +59,8 @@ void leave_mm(int cpu)
59{ 59{
60 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 60 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
61 BUG(); 61 BUG();
62 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); 62 cpumask_clear_cpu(cpu,
63 mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
63 load_cr3(swapper_pg_dir); 64 load_cr3(swapper_pg_dir);
64} 65}
65EXPORT_SYMBOL_GPL(leave_mm); 66EXPORT_SYMBOL_GPL(leave_mm);
@@ -234,8 +235,8 @@ void flush_tlb_current_task(void)
234 preempt_disable(); 235 preempt_disable();
235 236
236 local_flush_tlb(); 237 local_flush_tlb();
237 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 238 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
238 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); 239 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
239 preempt_enable(); 240 preempt_enable();
240} 241}
241 242
@@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm)
249 else 250 else
250 leave_mm(smp_processor_id()); 251 leave_mm(smp_processor_id());
251 } 252 }
252 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 253 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
253 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); 254 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
254 255
255 preempt_enable(); 256 preempt_enable();
256} 257}
@@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
268 leave_mm(smp_processor_id()); 269 leave_mm(smp_processor_id());
269 } 270 }
270 271
271 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) 272 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
272 flush_tlb_others(&mm->cpu_vm_mask, mm, va); 273 flush_tlb_others(mm_cpumask(mm), mm, va);
273 274
274 preempt_enable(); 275 preempt_enable();
275} 276}