aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorRussell King <rmk@dyn-67.arm.linux.org.uk>2008-04-19 12:17:34 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2008-04-19 12:17:34 -0400
commitcf816ecb533ab96b883dfdc0db174598b5b5c4d2 (patch)
tree1b7705db288ae2917105e624b01fdf81e0882bf1 /arch/x86/mm
parentadf6d34e460387ee3e8f1e1875d52bff51212c7d (diff)
parent15f7d677ccff6f0f5de8a1ee43a792567e9f9de9 (diff)
Merge branch 'merge-fixes' into devel
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile16
-rw-r--r--arch/x86/mm/Makefile_329
-rw-r--r--arch/x86/mm/Makefile_649
-rw-r--r--arch/x86/mm/discontig_32.c2
-rw-r--r--arch/x86/mm/dump_pagetables.c354
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/init_32.c35
-rw-r--r--arch/x86/mm/init_64.c222
-rw-r--r--arch/x86/mm/ioremap.c152
-rw-r--r--arch/x86/mm/numa_64.c6
-rw-r--r--arch/x86/mm/pageattr.c107
-rw-r--r--arch/x86/mm/pat.c421
-rw-r--r--arch/x86/mm/pgtable_32.c8
-rw-r--r--arch/x86/mm/srat_64.c7
14 files changed, 1221 insertions, 133 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 983291096848..20941d2954e2 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,17 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o
3
4obj-$(CONFIG_X86_32) += pgtable_32.o
5
6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
7obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10
1ifeq ($(CONFIG_X86_32),y) 11ifeq ($(CONFIG_X86_32),y)
2include ${srctree}/arch/x86/mm/Makefile_32 12obj-$(CONFIG_NUMA) += discontig_32.o
3else 13else
4include ${srctree}/arch/x86/mm/Makefile_64 14obj-$(CONFIG_NUMA) += numa_64.o
15obj-$(CONFIG_K8_NUMA) += k8topology_64.o
16obj-$(CONFIG_ACPI_NUMA) += srat_64.o
5endif 17endif
diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
deleted file mode 100644
index c36ae88bb543..000000000000
--- a/arch/x86/mm/Makefile_32
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for the linux i386-specific parts of the memory manager.
3#
4
5obj-y := init_32.o pgtable_32.o fault.o ioremap.o extable.o pageattr.o mmap.o
6
7obj-$(CONFIG_NUMA) += discontig_32.o
8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
9obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
deleted file mode 100644
index 688c8c28ac8f..000000000000
--- a/arch/x86/mm/Makefile_64
+++ /dev/null
@@ -1,9 +0,0 @@
1#
2# Makefile for the linux x86_64-specific parts of the memory manager.
3#
4
5obj-y := init_64.o fault.o ioremap.o extable.o pageattr.o mmap.o
6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
7obj-$(CONFIG_NUMA) += numa_64.o
8obj-$(CONFIG_K8_NUMA) += k8topology_64.o
9obj-$(CONFIG_ACPI_NUMA) += srat_64.o
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 8e25e06ff730..eba0bbede7a6 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -37,7 +37,7 @@
37#include <asm/e820.h> 37#include <asm/e820.h>
38#include <asm/setup.h> 38#include <asm/setup.h>
39#include <asm/mmzone.h> 39#include <asm/mmzone.h>
40#include <bios_ebda.h> 40#include <asm/bios_ebda.h>
41 41
42struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 42struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
43EXPORT_SYMBOL(node_data); 43EXPORT_SYMBOL(node_data);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
new file mode 100644
index 000000000000..6791b8334bc6
--- /dev/null
+++ b/arch/x86/mm/dump_pagetables.c
@@ -0,0 +1,354 @@
1/*
2 * Debug helper to dump the current kernel pagetables of the system
3 * so that we can see what the various memory ranges are set to.
4 *
5 * (C) Copyright 2008 Intel Corporation
6 *
7 * Author: Arjan van de Ven <arjan@linux.intel.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
12 * of the License.
13 */
14
15#include <linux/debugfs.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/seq_file.h>
19
20#include <asm/pgtable.h>
21
22/*
23 * The dumper groups pagetable entries of the same type into one, and for
24 * that it needs to keep some state when walking, and flush this state
25 * when a "break" in the continuity is found.
26 */
27struct pg_state {
28 int level;
29 pgprot_t current_prot;
30 unsigned long start_address;
31 unsigned long current_address;
32 const struct addr_marker *marker;
33};
34
35struct addr_marker {
36 unsigned long start_address;
37 const char *name;
38};
39
40/* Address space markers hints */
41static struct addr_marker address_markers[] = {
42 { 0, "User Space" },
43#ifdef CONFIG_X86_64
44 { 0x8000000000000000UL, "Kernel Space" },
45 { 0xffff810000000000UL, "Low Kernel Mapping" },
46 { VMALLOC_START, "vmalloc() Area" },
47 { VMEMMAP_START, "Vmemmap" },
48 { __START_KERNEL_map, "High Kernel Mapping" },
49 { MODULES_VADDR, "Modules" },
50 { MODULES_END, "End Modules" },
51#else
52 { PAGE_OFFSET, "Kernel Mapping" },
53 { 0/* VMALLOC_START */, "vmalloc() Area" },
54 { 0/*VMALLOC_END*/, "vmalloc() End" },
55# ifdef CONFIG_HIGHMEM
56 { 0/*PKMAP_BASE*/, "Persisent kmap() Area" },
57# endif
58 { 0/*FIXADDR_START*/, "Fixmap Area" },
59#endif
60 { -1, NULL } /* End of list */
61};
62
63/* Multipliers for offsets within the PTEs */
64#define PTE_LEVEL_MULT (PAGE_SIZE)
65#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
66#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
67#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
68
69/*
70 * Print a readable form of a pgprot_t to the seq_file
71 */
72static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
73{
74 pgprotval_t pr = pgprot_val(prot);
75 static const char * const level_name[] =
76 { "cr3", "pgd", "pud", "pmd", "pte" };
77
78 if (!pgprot_val(prot)) {
79 /* Not present */
80 seq_printf(m, " ");
81 } else {
82 if (pr & _PAGE_USER)
83 seq_printf(m, "USR ");
84 else
85 seq_printf(m, " ");
86 if (pr & _PAGE_RW)
87 seq_printf(m, "RW ");
88 else
89 seq_printf(m, "ro ");
90 if (pr & _PAGE_PWT)
91 seq_printf(m, "PWT ");
92 else
93 seq_printf(m, " ");
94 if (pr & _PAGE_PCD)
95 seq_printf(m, "PCD ");
96 else
97 seq_printf(m, " ");
98
99 /* Bit 9 has a different meaning on level 3 vs 4 */
100 if (level <= 3) {
101 if (pr & _PAGE_PSE)
102 seq_printf(m, "PSE ");
103 else
104 seq_printf(m, " ");
105 } else {
106 if (pr & _PAGE_PAT)
107 seq_printf(m, "pat ");
108 else
109 seq_printf(m, " ");
110 }
111 if (pr & _PAGE_GLOBAL)
112 seq_printf(m, "GLB ");
113 else
114 seq_printf(m, " ");
115 if (pr & _PAGE_NX)
116 seq_printf(m, "NX ");
117 else
118 seq_printf(m, "x ");
119 }
120 seq_printf(m, "%s\n", level_name[level]);
121}
122
123/*
124 * On 64 bits, sign-extend the 48 bit address to 64 bit
125 */
126static unsigned long normalize_addr(unsigned long u)
127{
128#ifdef CONFIG_X86_64
129 return (signed long)(u << 16) >> 16;
130#else
131 return u;
132#endif
133}
134
135/*
136 * This function gets called on a break in a continuous series
137 * of PTE entries; the next one is different so we need to
138 * print what we collected so far.
139 */
140static void note_page(struct seq_file *m, struct pg_state *st,
141 pgprot_t new_prot, int level)
142{
143 pgprotval_t prot, cur;
144 static const char units[] = "KMGTPE";
145
146 /*
147 * If we have a "break" in the series, we need to flush the state that
148 * we have now. "break" is either changing perms, levels or
149 * address space marker.
150 */
151 prot = pgprot_val(new_prot) & ~(PTE_MASK);
152 cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
153
154 if (!st->level) {
155 /* First entry */
156 st->current_prot = new_prot;
157 st->level = level;
158 st->marker = address_markers;
159 seq_printf(m, "---[ %s ]---\n", st->marker->name);
160 } else if (prot != cur || level != st->level ||
161 st->current_address >= st->marker[1].start_address) {
162 const char *unit = units;
163 unsigned long delta;
164
165 /*
166 * Now print the actual finished series
167 */
168 seq_printf(m, "0x%p-0x%p ",
169 (void *)st->start_address,
170 (void *)st->current_address);
171
172 delta = (st->current_address - st->start_address) >> 10;
173 while (!(delta & 1023) && unit[1]) {
174 delta >>= 10;
175 unit++;
176 }
177 seq_printf(m, "%9lu%c ", delta, *unit);
178 printk_prot(m, st->current_prot, st->level);
179
180 /*
181 * We print markers for special areas of address space,
182 * such as the start of vmalloc space etc.
183 * This helps in the interpretation.
184 */
185 if (st->current_address >= st->marker[1].start_address) {
186 st->marker++;
187 seq_printf(m, "---[ %s ]---\n", st->marker->name);
188 }
189
190 st->start_address = st->current_address;
191 st->current_prot = new_prot;
192 st->level = level;
193 }
194}
195
196static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
197 unsigned long P)
198{
199 int i;
200 pte_t *start;
201
202 start = (pte_t *) pmd_page_vaddr(addr);
203 for (i = 0; i < PTRS_PER_PTE; i++) {
204 pgprot_t prot = pte_pgprot(*start);
205
206 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
207 note_page(m, st, prot, 4);
208 start++;
209 }
210}
211
212#if PTRS_PER_PMD > 1
213
214static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
215 unsigned long P)
216{
217 int i;
218 pmd_t *start;
219
220 start = (pmd_t *) pud_page_vaddr(addr);
221 for (i = 0; i < PTRS_PER_PMD; i++) {
222 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
223 if (!pmd_none(*start)) {
224 pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;
225
226 if (pmd_large(*start) || !pmd_present(*start))
227 note_page(m, st, __pgprot(prot), 3);
228 else
229 walk_pte_level(m, st, *start,
230 P + i * PMD_LEVEL_MULT);
231 } else
232 note_page(m, st, __pgprot(0), 3);
233 start++;
234 }
235}
236
237#else
238#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
239#define pud_large(a) pmd_large(__pmd(pud_val(a)))
240#define pud_none(a) pmd_none(__pmd(pud_val(a)))
241#endif
242
243#if PTRS_PER_PUD > 1
244
245static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
246 unsigned long P)
247{
248 int i;
249 pud_t *start;
250
251 start = (pud_t *) pgd_page_vaddr(addr);
252
253 for (i = 0; i < PTRS_PER_PUD; i++) {
254 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
255 if (!pud_none(*start)) {
256 pgprotval_t prot = pud_val(*start) & ~PTE_MASK;
257
258 if (pud_large(*start) || !pud_present(*start))
259 note_page(m, st, __pgprot(prot), 2);
260 else
261 walk_pmd_level(m, st, *start,
262 P + i * PUD_LEVEL_MULT);
263 } else
264 note_page(m, st, __pgprot(0), 2);
265
266 start++;
267 }
268}
269
270#else
271#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
272#define pgd_large(a) pud_large(__pud(pgd_val(a)))
273#define pgd_none(a) pud_none(__pud(pgd_val(a)))
274#endif
275
276static void walk_pgd_level(struct seq_file *m)
277{
278#ifdef CONFIG_X86_64
279 pgd_t *start = (pgd_t *) &init_level4_pgt;
280#else
281 pgd_t *start = swapper_pg_dir;
282#endif
283 int i;
284 struct pg_state st;
285
286 memset(&st, 0, sizeof(st));
287
288 for (i = 0; i < PTRS_PER_PGD; i++) {
289 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
290 if (!pgd_none(*start)) {
291 pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;
292
293 if (pgd_large(*start) || !pgd_present(*start))
294 note_page(m, &st, __pgprot(prot), 1);
295 else
296 walk_pud_level(m, &st, *start,
297 i * PGD_LEVEL_MULT);
298 } else
299 note_page(m, &st, __pgprot(0), 1);
300
301 start++;
302 }
303
304 /* Flush out the last page */
305 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
306 note_page(m, &st, __pgprot(0), 0);
307}
308
309static int ptdump_show(struct seq_file *m, void *v)
310{
311 walk_pgd_level(m);
312 return 0;
313}
314
315static int ptdump_open(struct inode *inode, struct file *filp)
316{
317 return single_open(filp, ptdump_show, NULL);
318}
319
320static const struct file_operations ptdump_fops = {
321 .open = ptdump_open,
322 .read = seq_read,
323 .llseek = seq_lseek,
324 .release = single_release,
325};
326
327int pt_dump_init(void)
328{
329 struct dentry *pe;
330
331#ifdef CONFIG_X86_32
332 /* Not a compile-time constant on x86-32 */
333 address_markers[2].start_address = VMALLOC_START;
334 address_markers[3].start_address = VMALLOC_END;
335# ifdef CONFIG_HIGHMEM
336 address_markers[4].start_address = PKMAP_BASE;
337 address_markers[5].start_address = FIXADDR_START;
338# else
339 address_markers[4].start_address = FIXADDR_START;
340# endif
341#endif
342
343 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
344 &ptdump_fops);
345 if (!pe)
346 return -ENOMEM;
347
348 return 0;
349}
350
351__initcall(pt_dump_init);
352MODULE_LICENSE("GPL");
353MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
354MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ec08d8389850..fd7e1798c75a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -639,7 +639,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
639#ifdef CONFIG_X86_32 639#ifdef CONFIG_X86_32
640 /* It's safe to allow irq's after cr2 has been saved and the vmalloc 640 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
641 fault has been handled. */ 641 fault has been handled. */
642 if (regs->flags & (X86_EFLAGS_IF|VM_MASK)) 642 if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
643 local_irq_enable(); 643 local_irq_enable();
644 644
645 /* 645 /*
@@ -976,9 +976,5 @@ void vmalloc_sync_all(void)
976 if (address == start) 976 if (address == start)
977 start = address + PGDIR_SIZE; 977 start = address + PGDIR_SIZE;
978 } 978 }
979 /* Check that there is no need to do the same for the modules area. */
980 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
981 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
982 (__START_KERNEL & PGDIR_MASK)));
983#endif 979#endif
984} 980}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ee1091a46964..1500dc8d63e4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -51,6 +51,8 @@
51 51
52unsigned int __VMALLOC_RESERVE = 128 << 20; 52unsigned int __VMALLOC_RESERVE = 128 << 20;
53 53
54unsigned long max_pfn_mapped;
55
54DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 56DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
55unsigned long highstart_pfn, highend_pfn; 57unsigned long highstart_pfn, highend_pfn;
56 58
@@ -179,8 +181,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
179 /* 181 /*
180 * Map with big pages if possible, otherwise 182 * Map with big pages if possible, otherwise
181 * create normal page tables: 183 * create normal page tables:
184 *
185 * Don't use a large page for the first 2/4MB of memory
186 * because there are often fixed size MTRRs in there
187 * and overlapping MTRRs into large pages can cause
188 * slowdowns.
182 */ 189 */
183 if (cpu_has_pse) { 190 if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
184 unsigned int addr2; 191 unsigned int addr2;
185 pgprot_t prot = PAGE_KERNEL_LARGE; 192 pgprot_t prot = PAGE_KERNEL_LARGE;
186 193
@@ -194,6 +201,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
194 set_pmd(pmd, pfn_pmd(pfn, prot)); 201 set_pmd(pmd, pfn_pmd(pfn, prot));
195 202
196 pfn += PTRS_PER_PTE; 203 pfn += PTRS_PER_PTE;
204 max_pfn_mapped = pfn;
197 continue; 205 continue;
198 } 206 }
199 pte = one_page_table_init(pmd); 207 pte = one_page_table_init(pmd);
@@ -208,6 +216,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
208 216
209 set_pte(pte, pfn_pte(pfn, prot)); 217 set_pte(pte, pfn_pte(pfn, prot));
210 } 218 }
219 max_pfn_mapped = pfn;
211 } 220 }
212 } 221 }
213} 222}
@@ -723,25 +732,17 @@ void mark_rodata_ro(void)
723 unsigned long start = PFN_ALIGN(_text); 732 unsigned long start = PFN_ALIGN(_text);
724 unsigned long size = PFN_ALIGN(_etext) - start; 733 unsigned long size = PFN_ALIGN(_etext) - start;
725 734
726#ifndef CONFIG_KPROBES 735 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
727#ifdef CONFIG_HOTPLUG_CPU 736 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
728 /* It must still be possible to apply SMP alternatives. */ 737 size >> 10);
729 if (num_possible_cpus() <= 1)
730#endif
731 {
732 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
733 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
734 size >> 10);
735 738
736#ifdef CONFIG_CPA_DEBUG 739#ifdef CONFIG_CPA_DEBUG
737 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", 740 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
738 start, start+size); 741 start, start+size);
739 set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT); 742 set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
740 743
741 printk(KERN_INFO "Testing CPA: write protecting again\n"); 744 printk(KERN_INFO "Testing CPA: write protecting again\n");
742 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 745 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
743#endif
744 }
745#endif 746#endif
746 start += size; 747 start += size;
747 size = (unsigned long)__end_rodata - start; 748 size = (unsigned long)__end_rodata - start;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a02a14f0f324..1076097dcab2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,26 @@ static unsigned long dma_reserve __initdata;
54 54
55DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 55DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
56 56
57int direct_gbpages __meminitdata
58#ifdef CONFIG_DIRECT_GBPAGES
59 = 1
60#endif
61;
62
63static int __init parse_direct_gbpages_off(char *arg)
64{
65 direct_gbpages = 0;
66 return 0;
67}
68early_param("nogbpages", parse_direct_gbpages_off);
69
70static int __init parse_direct_gbpages_on(char *arg)
71{
72 direct_gbpages = 1;
73 return 0;
74}
75early_param("gbpages", parse_direct_gbpages_on);
76
57/* 77/*
58 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 78 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
59 * physical space so we can cache the place of the first one and move 79 * physical space so we can cache the place of the first one and move
@@ -69,9 +89,6 @@ void show_mem(void)
69 89
70 printk(KERN_INFO "Mem-info:\n"); 90 printk(KERN_INFO "Mem-info:\n");
71 show_free_areas(); 91 show_free_areas();
72 printk(KERN_INFO "Free swap: %6ldkB\n",
73 nr_swap_pages << (PAGE_SHIFT-10));
74
75 for_each_online_pgdat(pgdat) { 92 for_each_online_pgdat(pgdat) {
76 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 93 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 /* 94 /*
@@ -296,7 +313,7 @@ __meminit void early_iounmap(void *addr, unsigned long size)
296 __flush_tlb_all(); 313 __flush_tlb_all();
297} 314}
298 315
299static void __meminit 316static unsigned long __meminit
300phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) 317phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
301{ 318{
302 int i = pmd_index(address); 319 int i = pmd_index(address);
@@ -318,21 +335,26 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
318 set_pte((pte_t *)pmd, 335 set_pte((pte_t *)pmd,
319 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 336 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
320 } 337 }
338 return address;
321} 339}
322 340
323static void __meminit 341static unsigned long __meminit
324phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) 342phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
325{ 343{
326 pmd_t *pmd = pmd_offset(pud, 0); 344 pmd_t *pmd = pmd_offset(pud, 0);
345 unsigned long last_map_addr;
346
327 spin_lock(&init_mm.page_table_lock); 347 spin_lock(&init_mm.page_table_lock);
328 phys_pmd_init(pmd, address, end); 348 last_map_addr = phys_pmd_init(pmd, address, end);
329 spin_unlock(&init_mm.page_table_lock); 349 spin_unlock(&init_mm.page_table_lock);
330 __flush_tlb_all(); 350 __flush_tlb_all();
351 return last_map_addr;
331} 352}
332 353
333static void __meminit 354static unsigned long __meminit
334phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) 355phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
335{ 356{
357 unsigned long last_map_addr = end;
336 int i = pud_index(addr); 358 int i = pud_index(addr);
337 359
338 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { 360 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
@@ -350,7 +372,15 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
350 } 372 }
351 373
352 if (pud_val(*pud)) { 374 if (pud_val(*pud)) {
353 phys_pmd_update(pud, addr, end); 375 if (!pud_large(*pud))
376 last_map_addr = phys_pmd_update(pud, addr, end);
377 continue;
378 }
379
380 if (direct_gbpages) {
381 set_pte((pte_t *)pud,
382 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
383 last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
354 continue; 384 continue;
355 } 385 }
356 386
@@ -358,12 +388,14 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
358 388
359 spin_lock(&init_mm.page_table_lock); 389 spin_lock(&init_mm.page_table_lock);
360 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 390 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
361 phys_pmd_init(pmd, addr, end); 391 last_map_addr = phys_pmd_init(pmd, addr, end);
362 spin_unlock(&init_mm.page_table_lock); 392 spin_unlock(&init_mm.page_table_lock);
363 393
364 unmap_low_page(pmd); 394 unmap_low_page(pmd);
365 } 395 }
366 __flush_tlb_all(); 396 __flush_tlb_all();
397
398 return last_map_addr >> PAGE_SHIFT;
367} 399}
368 400
369static void __init find_early_table_space(unsigned long end) 401static void __init find_early_table_space(unsigned long end)
@@ -371,9 +403,11 @@ static void __init find_early_table_space(unsigned long end)
371 unsigned long puds, pmds, tables, start; 403 unsigned long puds, pmds, tables, start;
372 404
373 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 405 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
374 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 406 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
375 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + 407 if (!direct_gbpages) {
376 round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 408 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
409 tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
410 }
377 411
378 /* 412 /*
379 * RED-PEN putting page tables only on node 0 could 413 * RED-PEN putting page tables only on node 0 could
@@ -393,16 +427,135 @@ static void __init find_early_table_space(unsigned long end)
393 (table_start << PAGE_SHIFT) + tables); 427 (table_start << PAGE_SHIFT) + tables);
394} 428}
395 429
430static void __init init_gbpages(void)
431{
432 if (direct_gbpages && cpu_has_gbpages)
433 printk(KERN_INFO "Using GB pages for direct mapping\n");
434 else
435 direct_gbpages = 0;
436}
437
438#ifdef CONFIG_MEMTEST_BOOTPARAM
439
440static void __init memtest(unsigned long start_phys, unsigned long size,
441 unsigned pattern)
442{
443 unsigned long i;
444 unsigned long *start;
445 unsigned long start_bad;
446 unsigned long last_bad;
447 unsigned long val;
448 unsigned long start_phys_aligned;
449 unsigned long count;
450 unsigned long incr;
451
452 switch (pattern) {
453 case 0:
454 val = 0UL;
455 break;
456 case 1:
457 val = -1UL;
458 break;
459 case 2:
460 val = 0x5555555555555555UL;
461 break;
462 case 3:
463 val = 0xaaaaaaaaaaaaaaaaUL;
464 break;
465 default:
466 return;
467 }
468
469 incr = sizeof(unsigned long);
470 start_phys_aligned = ALIGN(start_phys, incr);
471 count = (size - (start_phys_aligned - start_phys))/incr;
472 start = __va(start_phys_aligned);
473 start_bad = 0;
474 last_bad = 0;
475
476 for (i = 0; i < count; i++)
477 start[i] = val;
478 for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
479 if (*start != val) {
480 if (start_phys_aligned == last_bad + incr) {
481 last_bad += incr;
482 } else {
483 if (start_bad) {
484 printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
485 val, start_bad, last_bad + incr);
486 reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
487 }
488 start_bad = last_bad = start_phys_aligned;
489 }
490 }
491 }
492 if (start_bad) {
493 printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
494 val, start_bad, last_bad + incr);
495 reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
496 }
497
498}
499
500static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
501
502static int __init parse_memtest(char *arg)
503{
504 if (arg)
505 memtest_pattern = simple_strtoul(arg, NULL, 0);
506 return 0;
507}
508
509early_param("memtest", parse_memtest);
510
511static void __init early_memtest(unsigned long start, unsigned long end)
512{
513 unsigned long t_start, t_size;
514 unsigned pattern;
515
516 if (!memtest_pattern)
517 return;
518
519 printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
520 for (pattern = 0; pattern < memtest_pattern; pattern++) {
521 t_start = start;
522 t_size = 0;
523 while (t_start < end) {
524 t_start = find_e820_area_size(t_start, &t_size, 1);
525
526 /* done ? */
527 if (t_start >= end)
528 break;
529 if (t_start + t_size > end)
530 t_size = end - t_start;
531
532 printk(KERN_CONT "\n %016lx - %016lx pattern %d",
533 t_start, t_start + t_size, pattern);
534
535 memtest(t_start, t_size, pattern);
536
537 t_start += t_size;
538 }
539 }
540 printk(KERN_CONT "\n");
541}
542#else
543static void __init early_memtest(unsigned long start, unsigned long end)
544{
545}
546#endif
547
396/* 548/*
397 * Setup the direct mapping of the physical memory at PAGE_OFFSET. 549 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
398 * This runs before bootmem is initialized and gets pages directly from 550 * This runs before bootmem is initialized and gets pages directly from
399 * the physical memory. To access them they are temporarily mapped. 551 * the physical memory. To access them they are temporarily mapped.
400 */ 552 */
401void __init_refok init_memory_mapping(unsigned long start, unsigned long end) 553unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
402{ 554{
403 unsigned long next; 555 unsigned long next, last_map_addr = end;
556 unsigned long start_phys = start, end_phys = end;
404 557
405 pr_debug("init_memory_mapping\n"); 558 printk(KERN_INFO "init_memory_mapping\n");
406 559
407 /* 560 /*
408 * Find space for the kernel direct mapping tables. 561 * Find space for the kernel direct mapping tables.
@@ -411,8 +564,10 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
411 * memory mapped. Unfortunately this is done currently before the 564 * memory mapped. Unfortunately this is done currently before the
412 * nodes are discovered. 565 * nodes are discovered.
413 */ 566 */
414 if (!after_bootmem) 567 if (!after_bootmem) {
568 init_gbpages();
415 find_early_table_space(end); 569 find_early_table_space(end);
570 }
416 571
417 start = (unsigned long)__va(start); 572 start = (unsigned long)__va(start);
418 end = (unsigned long)__va(end); 573 end = (unsigned long)__va(end);
@@ -430,7 +585,7 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
430 next = start + PGDIR_SIZE; 585 next = start + PGDIR_SIZE;
431 if (next > end) 586 if (next > end)
432 next = end; 587 next = end;
433 phys_pud_init(pud, __pa(start), __pa(next)); 588 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
434 if (!after_bootmem) 589 if (!after_bootmem)
435 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 590 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
436 unmap_low_page(pud); 591 unmap_low_page(pud);
@@ -443,6 +598,11 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
443 if (!after_bootmem) 598 if (!after_bootmem)
444 reserve_early(table_start << PAGE_SHIFT, 599 reserve_early(table_start << PAGE_SHIFT,
445 table_end << PAGE_SHIFT, "PGTABLE"); 600 table_end << PAGE_SHIFT, "PGTABLE");
601
602 if (!after_bootmem)
603 early_memtest(start_phys, end_phys);
604
605 return last_map_addr;
446} 606}
447 607
448#ifndef CONFIG_NUMA 608#ifndef CONFIG_NUMA
@@ -482,11 +642,13 @@ int arch_add_memory(int nid, u64 start, u64 size)
482{ 642{
483 struct pglist_data *pgdat = NODE_DATA(nid); 643 struct pglist_data *pgdat = NODE_DATA(nid);
484 struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 644 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
485 unsigned long start_pfn = start >> PAGE_SHIFT; 645 unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
486 unsigned long nr_pages = size >> PAGE_SHIFT; 646 unsigned long nr_pages = size >> PAGE_SHIFT;
487 int ret; 647 int ret;
488 648
489 init_memory_mapping(start, start + size-1); 649 last_mapped_pfn = init_memory_mapping(start, start + size-1);
650 if (last_mapped_pfn > max_pfn_mapped)
651 max_pfn_mapped = last_mapped_pfn;
490 652
491 ret = __add_pages(zone, start_pfn, nr_pages); 653 ret = __add_pages(zone, start_pfn, nr_pages);
492 WARN_ON(1); 654 WARN_ON(1);
@@ -596,24 +758,7 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
596 758
597void mark_rodata_ro(void) 759void mark_rodata_ro(void)
598{ 760{
599 unsigned long start = (unsigned long)_stext, end; 761 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
600
601#ifdef CONFIG_HOTPLUG_CPU
602 /* It must still be possible to apply SMP alternatives. */
603 if (num_possible_cpus() > 1)
604 start = (unsigned long)_etext;
605#endif
606
607#ifdef CONFIG_KPROBES
608 start = (unsigned long)__start_rodata;
609#endif
610
611 end = (unsigned long)__end_rodata;
612 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
613 end &= PAGE_MASK;
614 if (end <= start)
615 return;
616
617 762
618 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 763 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
619 (end - start) >> 10); 764 (end - start) >> 10);
@@ -636,6 +781,7 @@ void mark_rodata_ro(void)
636 set_memory_ro(start, (end-start) >> PAGE_SHIFT); 781 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
637#endif 782#endif
638} 783}
784
639#endif 785#endif
640 786
641#ifdef CONFIG_BLK_DEV_INITRD 787#ifdef CONFIG_BLK_DEV_INITRD
@@ -657,7 +803,7 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
657 * This can happen with kdump kernels when accessing 803 * This can happen with kdump kernels when accessing
658 * firmware tables: 804 * firmware tables:
659 */ 805 */
660 if (pfn < end_pfn_map) 806 if (pfn < max_pfn_mapped)
661 return; 807 return;
662 808
663 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", 809 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 794895c6dcc9..c590fd200e29 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -19,11 +19,7 @@
19#include <asm/pgtable.h> 19#include <asm/pgtable.h>
20#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
22 22#include <asm/pat.h>
23enum ioremap_mode {
24 IOR_MODE_UNCACHED,
25 IOR_MODE_CACHED,
26};
27 23
28#ifdef CONFIG_X86_64 24#ifdef CONFIG_X86_64
29 25
@@ -35,11 +31,23 @@ unsigned long __phys_addr(unsigned long x)
35} 31}
36EXPORT_SYMBOL(__phys_addr); 32EXPORT_SYMBOL(__phys_addr);
37 33
34static inline int phys_addr_valid(unsigned long addr)
35{
36 return addr < (1UL << boot_cpu_data.x86_phys_bits);
37}
38
39#else
40
41static inline int phys_addr_valid(unsigned long addr)
42{
43 return 1;
44}
45
38#endif 46#endif
39 47
40int page_is_ram(unsigned long pagenr) 48int page_is_ram(unsigned long pagenr)
41{ 49{
42 unsigned long addr, end; 50 resource_size_t addr, end;
43 int i; 51 int i;
44 52
45 /* 53 /*
@@ -78,19 +86,22 @@ int page_is_ram(unsigned long pagenr)
78 * Fix up the linear direct mapping of the kernel to avoid cache attribute 86 * Fix up the linear direct mapping of the kernel to avoid cache attribute
79 * conflicts. 87 * conflicts.
80 */ 88 */
81static int ioremap_change_attr(unsigned long vaddr, unsigned long size, 89int ioremap_change_attr(unsigned long vaddr, unsigned long size,
82 enum ioremap_mode mode) 90 unsigned long prot_val)
83{ 91{
84 unsigned long nrpages = size >> PAGE_SHIFT; 92 unsigned long nrpages = size >> PAGE_SHIFT;
85 int err; 93 int err;
86 94
87 switch (mode) { 95 switch (prot_val) {
88 case IOR_MODE_UNCACHED: 96 case _PAGE_CACHE_UC:
89 default: 97 default:
90 err = set_memory_uc(vaddr, nrpages); 98 err = _set_memory_uc(vaddr, nrpages);
99 break;
100 case _PAGE_CACHE_WC:
101 err = _set_memory_wc(vaddr, nrpages);
91 break; 102 break;
92 case IOR_MODE_CACHED: 103 case _PAGE_CACHE_WB:
93 err = set_memory_wb(vaddr, nrpages); 104 err = _set_memory_wb(vaddr, nrpages);
94 break; 105 break;
95 } 106 }
96 107
@@ -107,17 +118,27 @@ static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
107 * caller shouldn't need to know that small detail. 118 * caller shouldn't need to know that small detail.
108 */ 119 */
109static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, 120static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
110 enum ioremap_mode mode) 121 unsigned long prot_val)
111{ 122{
112 unsigned long pfn, offset, last_addr, vaddr; 123 unsigned long pfn, offset, vaddr;
124 resource_size_t last_addr;
113 struct vm_struct *area; 125 struct vm_struct *area;
126 unsigned long new_prot_val;
114 pgprot_t prot; 127 pgprot_t prot;
128 int retval;
115 129
116 /* Don't allow wraparound or zero size */ 130 /* Don't allow wraparound or zero size */
117 last_addr = phys_addr + size - 1; 131 last_addr = phys_addr + size - 1;
118 if (!size || last_addr < phys_addr) 132 if (!size || last_addr < phys_addr)
119 return NULL; 133 return NULL;
120 134
135 if (!phys_addr_valid(phys_addr)) {
136 printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
137 phys_addr);
138 WARN_ON_ONCE(1);
139 return NULL;
140 }
141
121 /* 142 /*
122 * Don't remap the low PCI/ISA area, it's always mapped.. 143 * Don't remap the low PCI/ISA area, it's always mapped..
123 */ 144 */
@@ -127,25 +148,14 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
127 /* 148 /*
128 * Don't allow anybody to remap normal RAM that we're using.. 149 * Don't allow anybody to remap normal RAM that we're using..
129 */ 150 */
130 for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped && 151 for (pfn = phys_addr >> PAGE_SHIFT;
131 (pfn << PAGE_SHIFT) < last_addr; pfn++) { 152 (pfn << PAGE_SHIFT) < last_addr; pfn++) {
132 if (page_is_ram(pfn) && pfn_valid(pfn) &&
133 !PageReserved(pfn_to_page(pfn)))
134 return NULL;
135 }
136 153
137 switch (mode) { 154 int is_ram = page_is_ram(pfn);
138 case IOR_MODE_UNCACHED: 155
139 default: 156 if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
140 /* 157 return NULL;
141 * FIXME: we will use UC MINUS for now, as video fb drivers 158 WARN_ON_ONCE(is_ram);
142 * depend on it. Upcoming ioremap_wc() will fix this behavior.
143 */
144 prot = PAGE_KERNEL_UC_MINUS;
145 break;
146 case IOR_MODE_CACHED:
147 prot = PAGE_KERNEL;
148 break;
149 } 159 }
150 160
151 /* 161 /*
@@ -155,6 +165,49 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
155 phys_addr &= PAGE_MASK; 165 phys_addr &= PAGE_MASK;
156 size = PAGE_ALIGN(last_addr+1) - phys_addr; 166 size = PAGE_ALIGN(last_addr+1) - phys_addr;
157 167
168 retval = reserve_memtype(phys_addr, phys_addr + size,
169 prot_val, &new_prot_val);
170 if (retval) {
171 pr_debug("Warning: reserve_memtype returned %d\n", retval);
172 return NULL;
173 }
174
175 if (prot_val != new_prot_val) {
176 /*
177 * Do not fallback to certain memory types with certain
178 * requested type:
179 * - request is uncached, return cannot be write-back
180 * - request is uncached, return cannot be write-combine
181 * - request is write-combine, return cannot be write-back
182 */
183 if ((prot_val == _PAGE_CACHE_UC &&
184 (new_prot_val == _PAGE_CACHE_WB ||
185 new_prot_val == _PAGE_CACHE_WC)) ||
186 (prot_val == _PAGE_CACHE_WC &&
187 new_prot_val == _PAGE_CACHE_WB)) {
188 pr_debug(
189 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
190 phys_addr, phys_addr + size,
191 prot_val, new_prot_val);
192 free_memtype(phys_addr, phys_addr + size);
193 return NULL;
194 }
195 prot_val = new_prot_val;
196 }
197
198 switch (prot_val) {
199 case _PAGE_CACHE_UC:
200 default:
201 prot = PAGE_KERNEL_NOCACHE;
202 break;
203 case _PAGE_CACHE_WC:
204 prot = PAGE_KERNEL_WC;
205 break;
206 case _PAGE_CACHE_WB:
207 prot = PAGE_KERNEL;
208 break;
209 }
210
158 /* 211 /*
159 * Ok, go for it.. 212 * Ok, go for it..
160 */ 213 */
@@ -164,11 +217,13 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
164 area->phys_addr = phys_addr; 217 area->phys_addr = phys_addr;
165 vaddr = (unsigned long) area->addr; 218 vaddr = (unsigned long) area->addr;
166 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { 219 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
220 free_memtype(phys_addr, phys_addr + size);
167 free_vm_area(area); 221 free_vm_area(area);
168 return NULL; 222 return NULL;
169 } 223 }
170 224
171 if (ioremap_change_attr(vaddr, size, mode) < 0) { 225 if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
226 free_memtype(phys_addr, phys_addr + size);
172 vunmap(area->addr); 227 vunmap(area->addr);
173 return NULL; 228 return NULL;
174 } 229 }
@@ -199,13 +254,32 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
199 */ 254 */
200void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) 255void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
201{ 256{
202 return __ioremap(phys_addr, size, IOR_MODE_UNCACHED); 257 return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
203} 258}
204EXPORT_SYMBOL(ioremap_nocache); 259EXPORT_SYMBOL(ioremap_nocache);
205 260
261/**
262 * ioremap_wc - map memory into CPU space write combined
263 * @offset: bus address of the memory
264 * @size: size of the resource to map
265 *
266 * This version of ioremap ensures that the memory is marked write combining.
267 * Write combining allows faster writes to some hardware devices.
268 *
269 * Must be freed with iounmap.
270 */
271void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
272{
273 if (pat_wc_enabled)
274 return __ioremap(phys_addr, size, _PAGE_CACHE_WC);
275 else
276 return ioremap_nocache(phys_addr, size);
277}
278EXPORT_SYMBOL(ioremap_wc);
279
206void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) 280void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
207{ 281{
208 return __ioremap(phys_addr, size, IOR_MODE_CACHED); 282 return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
209} 283}
210EXPORT_SYMBOL(ioremap_cache); 284EXPORT_SYMBOL(ioremap_cache);
211 285
@@ -252,6 +326,8 @@ void iounmap(volatile void __iomem *addr)
252 return; 326 return;
253 } 327 }
254 328
329 free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
330
255 /* Finally remove it */ 331 /* Finally remove it */
256 o = remove_vm_area((void *)addr); 332 o = remove_vm_area((void *)addr);
257 BUG_ON(p != o || o == NULL); 333 BUG_ON(p != o || o == NULL);
@@ -272,8 +348,8 @@ static int __init early_ioremap_debug_setup(char *str)
272early_param("early_ioremap_debug", early_ioremap_debug_setup); 348early_param("early_ioremap_debug", early_ioremap_debug_setup);
273 349
274static __initdata int after_paging_init; 350static __initdata int after_paging_init;
275static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] 351static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
276 __attribute__((aligned(PAGE_SIZE))); 352 __section(.bss.page_aligned);
277 353
278static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) 354static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
279{ 355{
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 16b82ad34b96..2ea56f48f29b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,13 +31,15 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
31 31
32struct memnode memnode; 32struct memnode memnode;
33 33
34#ifdef CONFIG_SMP
34int x86_cpu_to_node_map_init[NR_CPUS] = { 35int x86_cpu_to_node_map_init[NR_CPUS] = {
35 [0 ... NR_CPUS-1] = NUMA_NO_NODE 36 [0 ... NR_CPUS-1] = NUMA_NO_NODE
36}; 37};
37void *x86_cpu_to_node_map_early_ptr; 38void *x86_cpu_to_node_map_early_ptr;
39EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
40#endif
38DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; 41DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
39EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map); 42EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
40EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
41 43
42s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 44s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
43 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 45 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
@@ -548,8 +550,6 @@ void __cpuinit numa_set_node(int cpu, int node)
548{ 550{
549 int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; 551 int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
550 552
551 cpu_pda(cpu)->nodenumber = node;
552
553 if(cpu_to_node_map) 553 if(cpu_to_node_map)
554 cpu_to_node_map[cpu] = node; 554 cpu_to_node_map[cpu] = node;
555 else if(per_cpu_offset(cpu)) 555 else if(per_cpu_offset(cpu))
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7b79f6be4e7d..f7823a172868 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -9,6 +9,8 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/seq_file.h>
13#include <linux/debugfs.h>
12 14
13#include <asm/e820.h> 15#include <asm/e820.h>
14#include <asm/processor.h> 16#include <asm/processor.h>
@@ -17,6 +19,7 @@
17#include <asm/uaccess.h> 19#include <asm/uaccess.h>
18#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
19#include <asm/proto.h> 21#include <asm/proto.h>
22#include <asm/pat.h>
20 23
21/* 24/*
22 * The current flushing context - we pass it instead of 5 arguments: 25 * The current flushing context - we pass it instead of 5 arguments:
@@ -28,6 +31,7 @@ struct cpa_data {
28 int numpages; 31 int numpages;
29 int flushtlb; 32 int flushtlb;
30 unsigned long pfn; 33 unsigned long pfn;
34 unsigned force_split : 1;
31}; 35};
32 36
33#ifdef CONFIG_X86_64 37#ifdef CONFIG_X86_64
@@ -259,6 +263,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
259 int i, do_split = 1; 263 int i, do_split = 1;
260 unsigned int level; 264 unsigned int level;
261 265
266 if (cpa->force_split)
267 return 1;
268
262 spin_lock_irqsave(&pgd_lock, flags); 269 spin_lock_irqsave(&pgd_lock, flags);
263 /* 270 /*
264 * Check for races, another CPU might have split this page 271 * Check for races, another CPU might have split this page
@@ -535,7 +542,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
535repeat: 542repeat:
536 kpte = lookup_address(address, &level); 543 kpte = lookup_address(address, &level);
537 if (!kpte) 544 if (!kpte)
538 return primary ? -EINVAL : 0; 545 return 0;
539 546
540 old_pte = *kpte; 547 old_pte = *kpte;
541 if (!pte_val(old_pte)) { 548 if (!pte_val(old_pte)) {
@@ -693,7 +700,8 @@ static inline int cache_attr(pgprot_t attr)
693} 700}
694 701
695static int change_page_attr_set_clr(unsigned long addr, int numpages, 702static int change_page_attr_set_clr(unsigned long addr, int numpages,
696 pgprot_t mask_set, pgprot_t mask_clr) 703 pgprot_t mask_set, pgprot_t mask_clr,
704 int force_split)
697{ 705{
698 struct cpa_data cpa; 706 struct cpa_data cpa;
699 int ret, cache, checkalias; 707 int ret, cache, checkalias;
@@ -704,7 +712,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
704 */ 712 */
705 mask_set = canon_pgprot(mask_set); 713 mask_set = canon_pgprot(mask_set);
706 mask_clr = canon_pgprot(mask_clr); 714 mask_clr = canon_pgprot(mask_clr);
707 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) 715 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
708 return 0; 716 return 0;
709 717
710 /* Ensure we are PAGE_SIZE aligned */ 718 /* Ensure we are PAGE_SIZE aligned */
@@ -721,6 +729,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
721 cpa.mask_set = mask_set; 729 cpa.mask_set = mask_set;
722 cpa.mask_clr = mask_clr; 730 cpa.mask_clr = mask_clr;
723 cpa.flushtlb = 0; 731 cpa.flushtlb = 0;
732 cpa.force_split = force_split;
724 733
725 /* No alias checking for _NX bit modifications */ 734 /* No alias checking for _NX bit modifications */
726 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 735 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -759,26 +768,61 @@ out:
759static inline int change_page_attr_set(unsigned long addr, int numpages, 768static inline int change_page_attr_set(unsigned long addr, int numpages,
760 pgprot_t mask) 769 pgprot_t mask)
761{ 770{
762 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0)); 771 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
763} 772}
764 773
765static inline int change_page_attr_clear(unsigned long addr, int numpages, 774static inline int change_page_attr_clear(unsigned long addr, int numpages,
766 pgprot_t mask) 775 pgprot_t mask)
767{ 776{
768 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask); 777 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
769} 778}
770 779
771int set_memory_uc(unsigned long addr, int numpages) 780int _set_memory_uc(unsigned long addr, int numpages)
772{ 781{
773 return change_page_attr_set(addr, numpages, 782 return change_page_attr_set(addr, numpages,
774 __pgprot(_PAGE_PCD)); 783 __pgprot(_PAGE_CACHE_UC));
784}
785
786int set_memory_uc(unsigned long addr, int numpages)
787{
788 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
789 _PAGE_CACHE_UC, NULL))
790 return -EINVAL;
791
792 return _set_memory_uc(addr, numpages);
775} 793}
776EXPORT_SYMBOL(set_memory_uc); 794EXPORT_SYMBOL(set_memory_uc);
777 795
778int set_memory_wb(unsigned long addr, int numpages) 796int _set_memory_wc(unsigned long addr, int numpages)
797{
798 return change_page_attr_set(addr, numpages,
799 __pgprot(_PAGE_CACHE_WC));
800}
801
802int set_memory_wc(unsigned long addr, int numpages)
803{
804 if (!pat_wc_enabled)
805 return set_memory_uc(addr, numpages);
806
807 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
808 _PAGE_CACHE_WC, NULL))
809 return -EINVAL;
810
811 return _set_memory_wc(addr, numpages);
812}
813EXPORT_SYMBOL(set_memory_wc);
814
815int _set_memory_wb(unsigned long addr, int numpages)
779{ 816{
780 return change_page_attr_clear(addr, numpages, 817 return change_page_attr_clear(addr, numpages,
781 __pgprot(_PAGE_PCD | _PAGE_PWT)); 818 __pgprot(_PAGE_CACHE_MASK));
819}
820
821int set_memory_wb(unsigned long addr, int numpages)
822{
823 free_memtype(addr, addr + numpages * PAGE_SIZE);
824
825 return _set_memory_wb(addr, numpages);
782} 826}
783EXPORT_SYMBOL(set_memory_wb); 827EXPORT_SYMBOL(set_memory_wb);
784 828
@@ -809,6 +853,12 @@ int set_memory_np(unsigned long addr, int numpages)
809 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); 853 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
810} 854}
811 855
856int set_memory_4k(unsigned long addr, int numpages)
857{
858 return change_page_attr_set_clr(addr, numpages, __pgprot(0),
859 __pgprot(0), 1);
860}
861
812int set_pages_uc(struct page *page, int numpages) 862int set_pages_uc(struct page *page, int numpages)
813{ 863{
814 unsigned long addr = (unsigned long)page_address(page); 864 unsigned long addr = (unsigned long)page_address(page);
@@ -918,6 +968,45 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
918 cpa_fill_pool(NULL); 968 cpa_fill_pool(NULL);
919} 969}
920 970
971#ifdef CONFIG_DEBUG_FS
972static int dpa_show(struct seq_file *m, void *v)
973{
974 seq_puts(m, "DEBUG_PAGEALLOC\n");
975 seq_printf(m, "pool_size : %lu\n", pool_size);
976 seq_printf(m, "pool_pages : %lu\n", pool_pages);
977 seq_printf(m, "pool_low : %lu\n", pool_low);
978 seq_printf(m, "pool_used : %lu\n", pool_used);
979 seq_printf(m, "pool_failed : %lu\n", pool_failed);
980
981 return 0;
982}
983
984static int dpa_open(struct inode *inode, struct file *filp)
985{
986 return single_open(filp, dpa_show, NULL);
987}
988
989static const struct file_operations dpa_fops = {
990 .open = dpa_open,
991 .read = seq_read,
992 .llseek = seq_lseek,
993 .release = single_release,
994};
995
996int __init debug_pagealloc_proc_init(void)
997{
998 struct dentry *de;
999
1000 de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
1001 &dpa_fops);
1002 if (!de)
1003 return -ENOMEM;
1004
1005 return 0;
1006}
1007__initcall(debug_pagealloc_proc_init);
1008#endif
1009
921#ifdef CONFIG_HIBERNATION 1010#ifdef CONFIG_HIBERNATION
922 1011
923bool kernel_page_present(struct page *page) 1012bool kernel_page_present(struct page *page)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
new file mode 100644
index 000000000000..72c0f6097402
--- /dev/null
+++ b/arch/x86/mm/pat.c
@@ -0,0 +1,421 @@
1/*
2 * Handle caching attributes in page tables (PAT)
3 *
4 * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5 * Suresh B Siddha <suresh.b.siddha@intel.com>
6 *
7 * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
8 */
9
10#include <linux/mm.h>
11#include <linux/kernel.h>
12#include <linux/gfp.h>
13#include <linux/fs.h>
14
15#include <asm/msr.h>
16#include <asm/tlbflush.h>
17#include <asm/processor.h>
18#include <asm/pgtable.h>
19#include <asm/pat.h>
20#include <asm/e820.h>
21#include <asm/cacheflush.h>
22#include <asm/fcntl.h>
23#include <asm/mtrr.h>
24
25int pat_wc_enabled = 1;
26
27static u64 __read_mostly boot_pat_state;
28
29static int nopat(char *str)
30{
31 pat_wc_enabled = 0;
32 printk(KERN_INFO "x86: PAT support disabled.\n");
33
34 return 0;
35}
36early_param("nopat", nopat);
37
38static int pat_known_cpu(void)
39{
40 if (!pat_wc_enabled)
41 return 0;
42
43 if (cpu_has_pat)
44 return 1;
45
46 pat_wc_enabled = 0;
47 printk(KERN_INFO "CPU and/or kernel does not support PAT.\n");
48 return 0;
49}
50
51enum {
52 PAT_UC = 0, /* uncached */
53 PAT_WC = 1, /* Write combining */
54 PAT_WT = 4, /* Write Through */
55 PAT_WP = 5, /* Write Protected */
56 PAT_WB = 6, /* Write Back (default) */
57 PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
58};
59
60#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
61
62void pat_init(void)
63{
64 u64 pat;
65
66#ifndef CONFIG_X86_PAT
67 nopat(NULL);
68#endif
69
70 /* Boot CPU enables PAT based on CPU feature */
71 if (!smp_processor_id() && !pat_known_cpu())
72 return;
73
74 /* APs enable PAT iff boot CPU has enabled it before */
75 if (smp_processor_id() && !pat_wc_enabled)
76 return;
77
78 /* Set PWT to Write-Combining. All other bits stay the same */
79 /*
80 * PTE encoding used in Linux:
81 * PAT
82 * |PCD
83 * ||PWT
84 * |||
85 * 000 WB _PAGE_CACHE_WB
86 * 001 WC _PAGE_CACHE_WC
87 * 010 UC- _PAGE_CACHE_UC_MINUS
88 * 011 UC _PAGE_CACHE_UC
89 * PAT bit unused
90 */
91 pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
92 PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
93
94 /* Boot CPU check */
95 if (!smp_processor_id()) {
96 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
97 }
98
99 wrmsrl(MSR_IA32_CR_PAT, pat);
100 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
101 smp_processor_id(), boot_pat_state, pat);
102}
103
104#undef PAT
105
106static char *cattr_name(unsigned long flags)
107{
108 switch (flags & _PAGE_CACHE_MASK) {
109 case _PAGE_CACHE_UC: return "uncached";
110 case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
111 case _PAGE_CACHE_WB: return "write-back";
112 case _PAGE_CACHE_WC: return "write-combining";
113 default: return "broken";
114 }
115}
116
117/*
118 * The global memtype list keeps track of memory type for specific
119 * physical memory areas. Conflicting memory types in different
120 * mappings can cause CPU cache corruption. To avoid this we keep track.
121 *
122 * The list is sorted based on starting address and can contain multiple
123 * entries for each address (this allows reference counting for overlapping
124 * areas). All the aliases have the same cache attributes of course.
125 * Zero attributes are represented as holes.
126 *
127 * Currently the data structure is a list because the number of mappings
128 * are expected to be relatively small. If this should be a problem
129 * it could be changed to a rbtree or similar.
130 *
131 * memtype_lock protects the whole list.
132 */
133
134struct memtype {
135 u64 start;
136 u64 end;
137 unsigned long type;
138 struct list_head nd;
139};
140
141static LIST_HEAD(memtype_list);
142static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
143
144/*
145 * Does intersection of PAT memory type and MTRR memory type and returns
146 * the resulting memory type as PAT understands it.
147 * (Type in pat and mtrr will not have same value)
148 * The intersection is based on "Effective Memory Type" tables in IA-32
149 * SDM vol 3a
150 */
151static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
152 unsigned long *ret_prot)
153{
154 unsigned long pat_type;
155 u8 mtrr_type;
156
157 mtrr_type = mtrr_type_lookup(start, end);
158 if (mtrr_type == 0xFF) { /* MTRR not enabled */
159 *ret_prot = prot;
160 return 0;
161 }
162 if (mtrr_type == 0xFE) { /* MTRR match error */
163 *ret_prot = _PAGE_CACHE_UC;
164 return -1;
165 }
166 if (mtrr_type != MTRR_TYPE_UNCACHABLE &&
167 mtrr_type != MTRR_TYPE_WRBACK &&
168 mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */
169 *ret_prot = _PAGE_CACHE_UC;
170 return -1;
171 }
172
173 pat_type = prot & _PAGE_CACHE_MASK;
174 prot &= (~_PAGE_CACHE_MASK);
175
176 /* Currently doing intersection by hand. Optimize it later. */
177 if (pat_type == _PAGE_CACHE_WC) {
178 *ret_prot = prot | _PAGE_CACHE_WC;
179 } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
180 *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
181 } else if (pat_type == _PAGE_CACHE_UC ||
182 mtrr_type == MTRR_TYPE_UNCACHABLE) {
183 *ret_prot = prot | _PAGE_CACHE_UC;
184 } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
185 *ret_prot = prot | _PAGE_CACHE_WC;
186 } else {
187 *ret_prot = prot | _PAGE_CACHE_WB;
188 }
189
190 return 0;
191}
192
193int reserve_memtype(u64 start, u64 end, unsigned long req_type,
194 unsigned long *ret_type)
195{
196 struct memtype *new_entry = NULL;
197 struct memtype *parse;
198 unsigned long actual_type;
199 int err = 0;
200
201 /* Only track when pat_wc_enabled */
202 if (!pat_wc_enabled) {
203 if (ret_type)
204 *ret_type = req_type;
205
206 return 0;
207 }
208
209 /* Low ISA region is always mapped WB in page table. No need to track */
210 if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
211 if (ret_type)
212 *ret_type = _PAGE_CACHE_WB;
213
214 return 0;
215 }
216
217 req_type &= _PAGE_CACHE_MASK;
218 err = pat_x_mtrr_type(start, end, req_type, &actual_type);
219 if (err) {
220 if (ret_type)
221 *ret_type = actual_type;
222
223 return -EINVAL;
224 }
225
226 new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
227 if (!new_entry)
228 return -ENOMEM;
229
230 new_entry->start = start;
231 new_entry->end = end;
232 new_entry->type = actual_type;
233
234 if (ret_type)
235 *ret_type = actual_type;
236
237 spin_lock(&memtype_lock);
238
239 /* Search for existing mapping that overlaps the current range */
240 list_for_each_entry(parse, &memtype_list, nd) {
241 struct memtype *saved_ptr;
242
243 if (parse->start >= end) {
244 printk("New Entry\n");
245 list_add(&new_entry->nd, parse->nd.prev);
246 new_entry = NULL;
247 break;
248 }
249
250 if (start <= parse->start && end >= parse->start) {
251 if (actual_type != parse->type && ret_type) {
252 actual_type = parse->type;
253 *ret_type = actual_type;
254 new_entry->type = actual_type;
255 }
256
257 if (actual_type != parse->type) {
258 printk(
259 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
260 current->comm, current->pid,
261 start, end,
262 cattr_name(actual_type),
263 cattr_name(parse->type));
264 err = -EBUSY;
265 break;
266 }
267
268 saved_ptr = parse;
269 /*
270 * Check to see whether the request overlaps more
271 * than one entry in the list
272 */
273 list_for_each_entry_continue(parse, &memtype_list, nd) {
274 if (end <= parse->start) {
275 break;
276 }
277
278 if (actual_type != parse->type) {
279 printk(
280 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
281 current->comm, current->pid,
282 start, end,
283 cattr_name(actual_type),
284 cattr_name(parse->type));
285 err = -EBUSY;
286 break;
287 }
288 }
289
290 if (err) {
291 break;
292 }
293
294 printk("Overlap at 0x%Lx-0x%Lx\n",
295 saved_ptr->start, saved_ptr->end);
296 /* No conflict. Go ahead and add this new entry */
297 list_add(&new_entry->nd, saved_ptr->nd.prev);
298 new_entry = NULL;
299 break;
300 }
301
302 if (start < parse->end) {
303 if (actual_type != parse->type && ret_type) {
304 actual_type = parse->type;
305 *ret_type = actual_type;
306 new_entry->type = actual_type;
307 }
308
309 if (actual_type != parse->type) {
310 printk(
311 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
312 current->comm, current->pid,
313 start, end,
314 cattr_name(actual_type),
315 cattr_name(parse->type));
316 err = -EBUSY;
317 break;
318 }
319
320 saved_ptr = parse;
321 /*
322 * Check to see whether the request overlaps more
323 * than one entry in the list
324 */
325 list_for_each_entry_continue(parse, &memtype_list, nd) {
326 if (end <= parse->start) {
327 break;
328 }
329
330 if (actual_type != parse->type) {
331 printk(
332 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
333 current->comm, current->pid,
334 start, end,
335 cattr_name(actual_type),
336 cattr_name(parse->type));
337 err = -EBUSY;
338 break;
339 }
340 }
341
342 if (err) {
343 break;
344 }
345
346 printk("Overlap at 0x%Lx-0x%Lx\n",
347 saved_ptr->start, saved_ptr->end);
348 /* No conflict. Go ahead and add this new entry */
349 list_add(&new_entry->nd, &saved_ptr->nd);
350 new_entry = NULL;
351 break;
352 }
353 }
354
355 if (err) {
356 printk(
357 "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
358 start, end, cattr_name(new_entry->type),
359 cattr_name(req_type));
360 kfree(new_entry);
361 spin_unlock(&memtype_lock);
362 return err;
363 }
364
365 if (new_entry) {
366 /* No conflict. Not yet added to the list. Add to the tail */
367 list_add_tail(&new_entry->nd, &memtype_list);
368 printk("New Entry\n");
369 }
370
371 if (ret_type) {
372 printk(
373 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
374 start, end, cattr_name(actual_type),
375 cattr_name(req_type), cattr_name(*ret_type));
376 } else {
377 printk(
378 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
379 start, end, cattr_name(actual_type),
380 cattr_name(req_type));
381 }
382
383 spin_unlock(&memtype_lock);
384 return err;
385}
386
387int free_memtype(u64 start, u64 end)
388{
389 struct memtype *ml;
390 int err = -EINVAL;
391
392 /* Only track when pat_wc_enabled */
393 if (!pat_wc_enabled) {
394 return 0;
395 }
396
397 /* Low ISA region is always mapped WB. No need to track */
398 if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
399 return 0;
400 }
401
402 spin_lock(&memtype_lock);
403 list_for_each_entry(ml, &memtype_list, nd) {
404 if (ml->start == start && ml->end == end) {
405 list_del(&ml->nd);
406 kfree(ml);
407 err = 0;
408 break;
409 }
410 }
411 spin_unlock(&memtype_lock);
412
413 if (err) {
414 printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n",
415 current->comm, current->pid, start, end);
416 }
417
418 printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end);
419 return err;
420}
421
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 2f9e9afcb9f4..3165ec0672bd 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -36,7 +36,6 @@ void show_mem(void)
36 36
37 printk(KERN_INFO "Mem-info:\n"); 37 printk(KERN_INFO "Mem-info:\n");
38 show_free_areas(); 38 show_free_areas();
39 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
40 for_each_online_pgdat(pgdat) { 39 for_each_online_pgdat(pgdat) {
41 pgdat_resize_lock(pgdat, &flags); 40 pgdat_resize_lock(pgdat, &flags);
42 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 41 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
@@ -381,3 +380,10 @@ void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
381} 380}
382 381
383#endif 382#endif
383
384int pmd_bad(pmd_t pmd)
385{
386 WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
387
388 return pmd_bad_v1(pmd);
389}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 845001c617cc..1bae9c855ceb 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -20,6 +20,7 @@
20#include <asm/proto.h> 20#include <asm/proto.h>
21#include <asm/numa.h> 21#include <asm/numa.h>
22#include <asm/e820.h> 22#include <asm/e820.h>
23#include <asm/genapic.h>
23 24
24int acpi_numa __initdata; 25int acpi_numa __initdata;
25 26
@@ -132,7 +133,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
132 int pxm, node; 133 int pxm, node;
133 int apic_id; 134 int apic_id;
134 135
135 apic_id = pa->apic_id;
136 if (srat_disabled()) 136 if (srat_disabled())
137 return; 137 return;
138 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) { 138 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
@@ -148,6 +148,11 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
148 bad_srat(); 148 bad_srat();
149 return; 149 return;
150 } 150 }
151
152 if (is_uv_system())
153 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
154 else
155 apic_id = pa->apic_id;
151 apicid_to_node[apic_id] = node; 156 apicid_to_node[apic_id] = node;
152 acpi_numa = 1; 157 acpi_numa = 1;
153 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", 158 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",