aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-07 16:34:16 -0400
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-07 16:34:16 -0400
commit38f4b8c0da01ae7cd9b93386842ce272d6fde9ab (patch)
tree3c8c52201aac038094bfea7efdd0984a8f62045e /arch/x86/mm
parenta811454027352c762e0d5bba1b1d8f7d26bf96ae (diff)
parent8e2c4f2844c0e8dcdfe312e5f2204854ca8532c6 (diff)
Merge commit 'origin/master' into for-linus/xen/master
* commit 'origin/master': (4825 commits) Fix build errors due to CONFIG_BRANCH_TRACER=y parport: Use the PCI IRQ if offered tty: jsm cleanups Adjust path to gpio headers KGDB_SERIAL_CONSOLE check for module Change KCONFIG name tty: Blackin CTS/RTS Change hardware flow control from poll to interrupt driven Add support for the MAX3100 SPI UART. lanana: assign a device name and numbering for MAX3100 serqt: initial clean up pass for tty side tty: Use the generic RS485 ioctl on CRIS tty: Correct inline types for tty_driver_kref_get() splice: fix deadlock in splicing to file nilfs2: support nanosecond timestamp nilfs2: introduce secondary super block nilfs2: simplify handling of active state of segments nilfs2: mark minor flag for checkpoint created by internal operation nilfs2: clean up sketch file nilfs2: super block operations fix endian bug ... Conflicts: arch/x86/include/asm/thread_info.h arch/x86/lguest/boot.c drivers/xen/manage.c
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/highmem_32.c69
-rw-r--r--arch/x86/mm/init.c344
-rw-r--r--arch/x86/mm/init_32.c291
-rw-r--r--arch/x86/mm/init_64.c317
-rw-r--r--arch/x86/mm/iomap_32.c30
-rw-r--r--arch/x86/mm/ioremap.c52
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/memtest.c3
-rw-r--r--arch/x86/mm/mmio-mod.c19
-rw-r--r--arch/x86/mm/numa.c67
-rw-r--r--arch/x86/mm/numa_32.c5
-rw-r--r--arch/x86/mm/numa_64.c111
-rw-r--r--arch/x86/mm/pageattr.c147
-rw-r--r--arch/x86/mm/pat.c5
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/mm/srat_64.c30
-rw-r--r--arch/x86/mm/tlb.c5
18 files changed, 776 insertions, 725 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 08537747cb58..fdd30d08ab52 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o 14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
16 16
17obj-$(CONFIG_NUMA) += numa_$(BITS).o 17obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
18obj-$(CONFIG_K8_NUMA) += k8topology_64.o 18obj-$(CONFIG_K8_NUMA) += k8topology_64.o
19obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 19obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
20 20
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index e81dfa408157..58f621e81919 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -19,49 +19,6 @@ void kunmap(struct page *page)
19 kunmap_high(page); 19 kunmap_high(page);
20} 20}
21 21
22static void debug_kmap_atomic_prot(enum km_type type)
23{
24#ifdef CONFIG_DEBUG_HIGHMEM
25 static unsigned warn_count = 10;
26
27 if (unlikely(warn_count == 0))
28 return;
29
30 if (unlikely(in_interrupt())) {
31 if (in_irq()) {
32 if (type != KM_IRQ0 && type != KM_IRQ1 &&
33 type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
34 type != KM_BOUNCE_READ) {
35 WARN_ON(1);
36 warn_count--;
37 }
38 } else if (!irqs_disabled()) { /* softirq */
39 if (type != KM_IRQ0 && type != KM_IRQ1 &&
40 type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
41 type != KM_SKB_SUNRPC_DATA &&
42 type != KM_SKB_DATA_SOFTIRQ &&
43 type != KM_BOUNCE_READ) {
44 WARN_ON(1);
45 warn_count--;
46 }
47 }
48 }
49
50 if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
51 type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
52 if (!irqs_disabled()) {
53 WARN_ON(1);
54 warn_count--;
55 }
56 } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
57 if (irq_count() == 0 && !irqs_disabled()) {
58 WARN_ON(1);
59 warn_count--;
60 }
61 }
62#endif
63}
64
65/* 22/*
66 * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because 23 * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
67 * no global lock is needed and because the kmap code must perform a global TLB 24 * no global lock is needed and because the kmap code must perform a global TLB
@@ -81,7 +38,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
81 if (!PageHighMem(page)) 38 if (!PageHighMem(page))
82 return page_address(page); 39 return page_address(page);
83 40
84 debug_kmap_atomic_prot(type); 41 debug_kmap_atomic(type);
85 42
86 idx = type + KM_TYPE_NR*smp_processor_id(); 43 idx = type + KM_TYPE_NR*smp_processor_id();
87 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
@@ -119,22 +76,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
119 pagefault_enable(); 76 pagefault_enable();
120} 77}
121 78
122/* This is the same as kmap_atomic() but can map memory that doesn't 79/*
80 * This is the same as kmap_atomic() but can map memory that doesn't
123 * have a struct page associated with it. 81 * have a struct page associated with it.
124 */ 82 */
125void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) 83void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
126{ 84{
127 enum fixed_addresses idx; 85 return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
128 unsigned long vaddr;
129
130 pagefault_disable();
131
132 idx = type + KM_TYPE_NR*smp_processor_id();
133 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
134 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
135 arch_flush_lazy_mmu_mode();
136
137 return (void*) vaddr;
138} 86}
139EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ 87EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
140 88
@@ -156,7 +104,6 @@ EXPORT_SYMBOL(kunmap);
156EXPORT_SYMBOL(kmap_atomic); 104EXPORT_SYMBOL(kmap_atomic);
157EXPORT_SYMBOL(kunmap_atomic); 105EXPORT_SYMBOL(kunmap_atomic);
158 106
159#ifdef CONFIG_NUMA
160void __init set_highmem_pages_init(void) 107void __init set_highmem_pages_init(void)
161{ 108{
162 struct zone *zone; 109 struct zone *zone;
@@ -180,11 +127,3 @@ void __init set_highmem_pages_init(void)
180 } 127 }
181 totalram_pages += totalhigh_pages; 128 totalram_pages += totalhigh_pages;
182} 129}
183#else
184void __init set_highmem_pages_init(void)
185{
186 add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
187
188 totalram_pages += totalhigh_pages;
189}
190#endif /* CONFIG_NUMA */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ce6a722587d8..fd3da1dda1c9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,8 +1,345 @@
1#include <linux/ioport.h>
1#include <linux/swap.h> 2#include <linux/swap.h>
3
2#include <asm/cacheflush.h> 4#include <asm/cacheflush.h>
5#include <asm/e820.h>
6#include <asm/init.h>
3#include <asm/page.h> 7#include <asm/page.h>
8#include <asm/page_types.h>
4#include <asm/sections.h> 9#include <asm/sections.h>
5#include <asm/system.h> 10#include <asm/system.h>
11#include <asm/tlbflush.h>
12
13unsigned long __initdata e820_table_start;
14unsigned long __meminitdata e820_table_end;
15unsigned long __meminitdata e820_table_top;
16
17int after_bootmem;
18
19int direct_gbpages
20#ifdef CONFIG_DIRECT_GBPAGES
21 = 1
22#endif
23;
24
25static void __init find_early_table_space(unsigned long end, int use_pse,
26 int use_gbpages)
27{
28 unsigned long puds, pmds, ptes, tables, start;
29
30 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
31 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
32
33 if (use_gbpages) {
34 unsigned long extra;
35
36 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
37 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
38 } else
39 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
40
41 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
42
43 if (use_pse) {
44 unsigned long extra;
45
46 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
47#ifdef CONFIG_X86_32
48 extra += PMD_SIZE;
49#endif
50 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
51 } else
52 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
53
54 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
55
56#ifdef CONFIG_X86_32
57 /* for fixmap */
58 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
59#endif
60
61 /*
62 * RED-PEN putting page tables only on node 0 could
63 * cause a hotspot and fill up ZONE_DMA. The page tables
64 * need roughly 0.5KB per GB.
65 */
66#ifdef CONFIG_X86_32
67 start = 0x7000;
68 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
69 tables, PAGE_SIZE);
70#else /* CONFIG_X86_64 */
71 start = 0x8000;
72 e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
73#endif
74 if (e820_table_start == -1UL)
75 panic("Cannot find space for the kernel page tables");
76
77 e820_table_start >>= PAGE_SHIFT;
78 e820_table_end = e820_table_start;
79 e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
80
81 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
82 end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
83}
84
85struct map_range {
86 unsigned long start;
87 unsigned long end;
88 unsigned page_size_mask;
89};
90
91#ifdef CONFIG_X86_32
92#define NR_RANGE_MR 3
93#else /* CONFIG_X86_64 */
94#define NR_RANGE_MR 5
95#endif
96
97static int __meminit save_mr(struct map_range *mr, int nr_range,
98 unsigned long start_pfn, unsigned long end_pfn,
99 unsigned long page_size_mask)
100{
101 if (start_pfn < end_pfn) {
102 if (nr_range >= NR_RANGE_MR)
103 panic("run out of range for init_memory_mapping\n");
104 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
105 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
106 mr[nr_range].page_size_mask = page_size_mask;
107 nr_range++;
108 }
109
110 return nr_range;
111}
112
113#ifdef CONFIG_X86_64
114static void __init init_gbpages(void)
115{
116 if (direct_gbpages && cpu_has_gbpages)
117 printk(KERN_INFO "Using GB pages for direct mapping\n");
118 else
119 direct_gbpages = 0;
120}
121#else
122static inline void init_gbpages(void)
123{
124}
125#endif
126
127/*
128 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
129 * This runs before bootmem is initialized and gets pages directly from
130 * the physical memory. To access them they are temporarily mapped.
131 */
132unsigned long __init_refok init_memory_mapping(unsigned long start,
133 unsigned long end)
134{
135 unsigned long page_size_mask = 0;
136 unsigned long start_pfn, end_pfn;
137 unsigned long ret = 0;
138 unsigned long pos;
139
140 struct map_range mr[NR_RANGE_MR];
141 int nr_range, i;
142 int use_pse, use_gbpages;
143
144 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
145
146 if (!after_bootmem)
147 init_gbpages();
148
149#ifdef CONFIG_DEBUG_PAGEALLOC
150 /*
151 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
152 * This will simplify cpa(), which otherwise needs to support splitting
153 * large pages into small in interrupt context, etc.
154 */
155 use_pse = use_gbpages = 0;
156#else
157 use_pse = cpu_has_pse;
158 use_gbpages = direct_gbpages;
159#endif
160
161#ifdef CONFIG_X86_32
162#ifdef CONFIG_X86_PAE
163 set_nx();
164 if (nx_enabled)
165 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
166#endif
167
168 /* Enable PSE if available */
169 if (cpu_has_pse)
170 set_in_cr4(X86_CR4_PSE);
171
172 /* Enable PGE if available */
173 if (cpu_has_pge) {
174 set_in_cr4(X86_CR4_PGE);
175 __supported_pte_mask |= _PAGE_GLOBAL;
176 }
177#endif
178
179 if (use_gbpages)
180 page_size_mask |= 1 << PG_LEVEL_1G;
181 if (use_pse)
182 page_size_mask |= 1 << PG_LEVEL_2M;
183
184 memset(mr, 0, sizeof(mr));
185 nr_range = 0;
186
187 /* head if not big page alignment ? */
188 start_pfn = start >> PAGE_SHIFT;
189 pos = start_pfn << PAGE_SHIFT;
190#ifdef CONFIG_X86_32
191 /*
192 * Don't use a large page for the first 2/4MB of memory
193 * because there are often fixed size MTRRs in there
194 * and overlapping MTRRs into large pages can cause
195 * slowdowns.
196 */
197 if (pos == 0)
198 end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
199 else
200 end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
201 << (PMD_SHIFT - PAGE_SHIFT);
202#else /* CONFIG_X86_64 */
203 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
204 << (PMD_SHIFT - PAGE_SHIFT);
205#endif
206 if (end_pfn > (end >> PAGE_SHIFT))
207 end_pfn = end >> PAGE_SHIFT;
208 if (start_pfn < end_pfn) {
209 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
210 pos = end_pfn << PAGE_SHIFT;
211 }
212
213 /* big page (2M) range */
214 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
215 << (PMD_SHIFT - PAGE_SHIFT);
216#ifdef CONFIG_X86_32
217 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
218#else /* CONFIG_X86_64 */
219 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
220 << (PUD_SHIFT - PAGE_SHIFT);
221 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
222 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
223#endif
224
225 if (start_pfn < end_pfn) {
226 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
227 page_size_mask & (1<<PG_LEVEL_2M));
228 pos = end_pfn << PAGE_SHIFT;
229 }
230
231#ifdef CONFIG_X86_64
232 /* big page (1G) range */
233 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
234 << (PUD_SHIFT - PAGE_SHIFT);
235 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
236 if (start_pfn < end_pfn) {
237 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
238 page_size_mask &
239 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
240 pos = end_pfn << PAGE_SHIFT;
241 }
242
243 /* tail is not big page (1G) alignment */
244 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
245 << (PMD_SHIFT - PAGE_SHIFT);
246 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
247 if (start_pfn < end_pfn) {
248 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
249 page_size_mask & (1<<PG_LEVEL_2M));
250 pos = end_pfn << PAGE_SHIFT;
251 }
252#endif
253
254 /* tail is not big page (2M) alignment */
255 start_pfn = pos>>PAGE_SHIFT;
256 end_pfn = end>>PAGE_SHIFT;
257 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
258
259 /* try to merge same page size and continuous */
260 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
261 unsigned long old_start;
262 if (mr[i].end != mr[i+1].start ||
263 mr[i].page_size_mask != mr[i+1].page_size_mask)
264 continue;
265 /* move it */
266 old_start = mr[i].start;
267 memmove(&mr[i], &mr[i+1],
268 (nr_range - 1 - i) * sizeof(struct map_range));
269 mr[i--].start = old_start;
270 nr_range--;
271 }
272
273 for (i = 0; i < nr_range; i++)
274 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
275 mr[i].start, mr[i].end,
276 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
277 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
278
279 /*
280 * Find space for the kernel direct mapping tables.
281 *
282 * Later we should allocate these tables in the local node of the
283 * memory mapped. Unfortunately this is done currently before the
284 * nodes are discovered.
285 */
286 if (!after_bootmem)
287 find_early_table_space(end, use_pse, use_gbpages);
288
289#ifdef CONFIG_X86_32
290 for (i = 0; i < nr_range; i++)
291 kernel_physical_mapping_init(mr[i].start, mr[i].end,
292 mr[i].page_size_mask);
293 ret = end;
294#else /* CONFIG_X86_64 */
295 for (i = 0; i < nr_range; i++)
296 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
297 mr[i].page_size_mask);
298#endif
299
300#ifdef CONFIG_X86_32
301 early_ioremap_page_table_range_init();
302
303 load_cr3(swapper_pg_dir);
304#endif
305
306#ifdef CONFIG_X86_64
307 if (!after_bootmem)
308 mmu_cr4_features = read_cr4();
309#endif
310 __flush_tlb_all();
311
312 if (!after_bootmem && e820_table_end > e820_table_start)
313 reserve_early(e820_table_start << PAGE_SHIFT,
314 e820_table_end << PAGE_SHIFT, "PGTABLE");
315
316 if (!after_bootmem)
317 early_memtest(start, end);
318
319 return ret >> PAGE_SHIFT;
320}
321
322
323/*
324 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
325 * is valid. The argument is a physical page number.
326 *
327 *
328 * On x86, access has to be given to the first megabyte of ram because that area
329 * contains bios code and data regions used by X and dosemu and similar apps.
330 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
331 * mmio resources as well as potential bios/acpi data regions.
332 */
333int devmem_is_allowed(unsigned long pagenr)
334{
335 if (pagenr <= 256)
336 return 1;
337 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
338 return 0;
339 if (!page_is_ram(pagenr))
340 return 1;
341 return 0;
342}
6 343
7void free_init_pages(char *what, unsigned long begin, unsigned long end) 344void free_init_pages(char *what, unsigned long begin, unsigned long end)
8{ 345{
@@ -47,3 +384,10 @@ void free_initmem(void)
47 (unsigned long)(&__init_begin), 384 (unsigned long)(&__init_begin),
48 (unsigned long)(&__init_end)); 385 (unsigned long)(&__init_end));
49} 386}
387
388#ifdef CONFIG_BLK_DEV_INITRD
389void free_initrd_mem(unsigned long start, unsigned long end)
390{
391 free_init_pages("initrd memory", start, end);
392}
393#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 47df0e1bbeb9..749559ed80f5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,6 +49,7 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/init.h>
52 53
53unsigned long max_low_pfn_mapped; 54unsigned long max_low_pfn_mapped;
54unsigned long max_pfn_mapped; 55unsigned long max_pfn_mapped;
@@ -58,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn;
58 59
59static noinline int do_test_wp_bit(void); 60static noinline int do_test_wp_bit(void);
60 61
61 62bool __read_mostly __vmalloc_start_set = false;
62static unsigned long __initdata table_start;
63static unsigned long __meminitdata table_end;
64static unsigned long __meminitdata table_top;
65
66static int __initdata after_init_bootmem;
67 63
68static __init void *alloc_low_page(void) 64static __init void *alloc_low_page(void)
69{ 65{
70 unsigned long pfn = table_end++; 66 unsigned long pfn = e820_table_end++;
71 void *adr; 67 void *adr;
72 68
73 if (pfn >= table_top) 69 if (pfn >= e820_table_top)
74 panic("alloc_low_page: ran out of memory"); 70 panic("alloc_low_page: ran out of memory");
75 71
76 adr = __va(pfn * PAGE_SIZE); 72 adr = __va(pfn * PAGE_SIZE);
@@ -90,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
90 86
91#ifdef CONFIG_X86_PAE 87#ifdef CONFIG_X86_PAE
92 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 88 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
93 if (after_init_bootmem) 89 if (after_bootmem)
94 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 90 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
95 else 91 else
96 pmd_table = (pmd_t *)alloc_low_page(); 92 pmd_table = (pmd_t *)alloc_low_page();
@@ -117,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
117 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 113 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
118 pte_t *page_table = NULL; 114 pte_t *page_table = NULL;
119 115
120 if (after_init_bootmem) { 116 if (after_bootmem) {
121#ifdef CONFIG_DEBUG_PAGEALLOC 117#ifdef CONFIG_DEBUG_PAGEALLOC
122 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); 118 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
123#endif 119#endif
@@ -168,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
168 if (pmd_idx_kmap_begin != pmd_idx_kmap_end 164 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
169 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin 165 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
170 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end 166 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
171 && ((__pa(pte) >> PAGE_SHIFT) < table_start 167 && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
172 || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { 168 || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
173 pte_t *newpte; 169 pte_t *newpte;
174 int i; 170 int i;
175 171
176 BUG_ON(after_init_bootmem); 172 BUG_ON(after_bootmem);
177 newpte = alloc_low_page(); 173 newpte = alloc_low_page();
178 for (i = 0; i < PTRS_PER_PTE; i++) 174 for (i = 0; i < PTRS_PER_PTE; i++)
179 set_pte(newpte + i, pte[i]); 175 set_pte(newpte + i, pte[i]);
@@ -242,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr)
242 * of max_low_pfn pages, by creating page tables starting from address 238 * of max_low_pfn pages, by creating page tables starting from address
243 * PAGE_OFFSET: 239 * PAGE_OFFSET:
244 */ 240 */
245static void __init kernel_physical_mapping_init(pgd_t *pgd_base, 241unsigned long __init
246 unsigned long start_pfn, 242kernel_physical_mapping_init(unsigned long start,
247 unsigned long end_pfn, 243 unsigned long end,
248 int use_pse) 244 unsigned long page_size_mask)
249{ 245{
246 int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
247 unsigned long start_pfn, end_pfn;
248 pgd_t *pgd_base = swapper_pg_dir;
250 int pgd_idx, pmd_idx, pte_ofs; 249 int pgd_idx, pmd_idx, pte_ofs;
251 unsigned long pfn; 250 unsigned long pfn;
252 pgd_t *pgd; 251 pgd_t *pgd;
@@ -255,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
255 unsigned pages_2m, pages_4k; 254 unsigned pages_2m, pages_4k;
256 int mapping_iter; 255 int mapping_iter;
257 256
257 start_pfn = start >> PAGE_SHIFT;
258 end_pfn = end >> PAGE_SHIFT;
259
258 /* 260 /*
259 * First iteration will setup identity mapping using large/small pages 261 * First iteration will setup identity mapping using large/small pages
260 * based on use_pse, with other attributes same as set by 262 * based on use_pse, with other attributes same as set by
@@ -369,26 +371,6 @@ repeat:
369 mapping_iter = 2; 371 mapping_iter = 2;
370 goto repeat; 372 goto repeat;
371 } 373 }
372}
373
374/*
375 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
376 * is valid. The argument is a physical page number.
377 *
378 *
379 * On x86, access has to be given to the first megabyte of ram because that area
380 * contains bios code and data regions used by X and dosemu and similar apps.
381 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
382 * mmio resources as well as potential bios/acpi data regions.
383 */
384int devmem_is_allowed(unsigned long pagenr)
385{
386 if (pagenr <= 256)
387 return 1;
388 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
389 return 0;
390 if (!page_is_ram(pagenr))
391 return 1;
392 return 0; 374 return 0;
393} 375}
394 376
@@ -545,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base)
545 * be partially populated, and so it avoids stomping on any existing 527 * be partially populated, and so it avoids stomping on any existing
546 * mappings. 528 * mappings.
547 */ 529 */
548static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) 530void __init early_ioremap_page_table_range_init(void)
549{ 531{
532 pgd_t *pgd_base = swapper_pg_dir;
550 unsigned long vaddr, end; 533 unsigned long vaddr, end;
551 534
552 /* 535 /*
@@ -641,7 +624,7 @@ static int __init noexec_setup(char *str)
641} 624}
642early_param("noexec", noexec_setup); 625early_param("noexec", noexec_setup);
643 626
644static void __init set_nx(void) 627void __init set_nx(void)
645{ 628{
646 unsigned int v[4], l, h; 629 unsigned int v[4], l, h;
647 630
@@ -793,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn,
793#ifdef CONFIG_FLATMEM 776#ifdef CONFIG_FLATMEM
794 max_mapnr = num_physpages; 777 max_mapnr = num_physpages;
795#endif 778#endif
779 __vmalloc_start_set = true;
780
796 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 781 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
797 pages_to_mb(max_low_pfn)); 782 pages_to_mb(max_low_pfn));
798 783
@@ -814,176 +799,66 @@ static void __init zone_sizes_init(void)
814 free_area_init_nodes(max_zone_pfns); 799 free_area_init_nodes(max_zone_pfns);
815} 800}
816 801
802static unsigned long __init setup_node_bootmem(int nodeid,
803 unsigned long start_pfn,
804 unsigned long end_pfn,
805 unsigned long bootmap)
806{
807 unsigned long bootmap_size;
808
809 /* don't touch min_low_pfn */
810 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
811 bootmap >> PAGE_SHIFT,
812 start_pfn, end_pfn);
813 printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
814 nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
815 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
816 nodeid, bootmap, bootmap + bootmap_size);
817 free_bootmem_with_active_regions(nodeid, end_pfn);
818 early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
819
820 return bootmap + bootmap_size;
821}
822
817void __init setup_bootmem_allocator(void) 823void __init setup_bootmem_allocator(void)
818{ 824{
819 int i; 825 int nodeid;
820 unsigned long bootmap_size, bootmap; 826 unsigned long bootmap_size, bootmap;
821 /* 827 /*
822 * Initialize the boot-time allocator (with low memory only): 828 * Initialize the boot-time allocator (with low memory only):
823 */ 829 */
824 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; 830 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
825 bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, 831 bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
826 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
827 PAGE_SIZE); 832 PAGE_SIZE);
828 if (bootmap == -1L) 833 if (bootmap == -1L)
829 panic("Cannot find bootmem map of size %ld\n", bootmap_size); 834 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
830 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); 835 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
831 836
832 /* don't touch min_low_pfn */
833 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
834 min_low_pfn, max_low_pfn);
835 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 837 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
836 max_pfn_mapped<<PAGE_SHIFT); 838 max_pfn_mapped<<PAGE_SHIFT);
837 printk(KERN_INFO " low ram: %08lx - %08lx\n", 839 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
838 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
839 printk(KERN_INFO " bootmap %08lx - %08lx\n",
840 bootmap, bootmap + bootmap_size);
841 for_each_online_node(i)
842 free_bootmem_with_active_regions(i, max_low_pfn);
843 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
844
845 after_init_bootmem = 1;
846}
847
848static void __init find_early_table_space(unsigned long end, int use_pse)
849{
850 unsigned long puds, pmds, ptes, tables, start;
851
852 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
853 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
854
855 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
856 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
857 840
858 if (use_pse) { 841 for_each_online_node(nodeid) {
859 unsigned long extra; 842 unsigned long start_pfn, end_pfn;
860
861 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
862 extra += PMD_SIZE;
863 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
864 } else
865 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
866
867 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
868
869 /* for fixmap */
870 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
871
872 /*
873 * RED-PEN putting page tables only on node 0 could
874 * cause a hotspot and fill up ZONE_DMA. The page tables
875 * need roughly 0.5KB per GB.
876 */
877 start = 0x7000;
878 table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
879 tables, PAGE_SIZE);
880 if (table_start == -1UL)
881 panic("Cannot find space for the kernel page tables");
882
883 table_start >>= PAGE_SHIFT;
884 table_end = table_start;
885 table_top = table_start + (tables>>PAGE_SHIFT);
886
887 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
888 end, table_start << PAGE_SHIFT,
889 (table_start << PAGE_SHIFT) + tables);
890}
891 843
892unsigned long __init_refok init_memory_mapping(unsigned long start, 844#ifdef CONFIG_NEED_MULTIPLE_NODES
893 unsigned long end) 845 start_pfn = node_start_pfn[nodeid];
894{ 846 end_pfn = node_end_pfn[nodeid];
895 pgd_t *pgd_base = swapper_pg_dir; 847 if (start_pfn > max_low_pfn)
896 unsigned long start_pfn, end_pfn; 848 continue;
897 unsigned long big_page_start; 849 if (end_pfn > max_low_pfn)
898#ifdef CONFIG_DEBUG_PAGEALLOC 850 end_pfn = max_low_pfn;
899 /*
900 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
901 * This will simplify cpa(), which otherwise needs to support splitting
902 * large pages into small in interrupt context, etc.
903 */
904 int use_pse = 0;
905#else 851#else
906 int use_pse = cpu_has_pse; 852 start_pfn = 0;
853 end_pfn = max_low_pfn;
907#endif 854#endif
908 855 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
909 /* 856 bootmap);
910 * Find space for the kernel direct mapping tables.
911 */
912 if (!after_init_bootmem)
913 find_early_table_space(end, use_pse);
914
915#ifdef CONFIG_X86_PAE
916 set_nx();
917 if (nx_enabled)
918 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
919#endif
920
921 /* Enable PSE if available */
922 if (cpu_has_pse)
923 set_in_cr4(X86_CR4_PSE);
924
925 /* Enable PGE if available */
926 if (cpu_has_pge) {
927 set_in_cr4(X86_CR4_PGE);
928 __supported_pte_mask |= _PAGE_GLOBAL;
929 }
930
931 /*
932 * Don't use a large page for the first 2/4MB of memory
933 * because there are often fixed size MTRRs in there
934 * and overlapping MTRRs into large pages can cause
935 * slowdowns.
936 */
937 big_page_start = PMD_SIZE;
938
939 if (start < big_page_start) {
940 start_pfn = start >> PAGE_SHIFT;
941 end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
942 } else {
943 /* head is not big page alignment ? */
944 start_pfn = start >> PAGE_SHIFT;
945 end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
946 << (PMD_SHIFT - PAGE_SHIFT);
947 }
948 if (start_pfn < end_pfn)
949 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
950
951 /* big page range */
952 start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
953 << (PMD_SHIFT - PAGE_SHIFT);
954 if (start_pfn < (big_page_start >> PAGE_SHIFT))
955 start_pfn = big_page_start >> PAGE_SHIFT;
956 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
957 if (start_pfn < end_pfn)
958 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
959 use_pse);
960
961 /* tail is not big page alignment ? */
962 start_pfn = end_pfn;
963 if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
964 end_pfn = end >> PAGE_SHIFT;
965 if (start_pfn < end_pfn)
966 kernel_physical_mapping_init(pgd_base, start_pfn,
967 end_pfn, 0);
968 } 857 }
969 858
970 early_ioremap_page_table_range_init(pgd_base); 859 after_bootmem = 1;
971
972 load_cr3(swapper_pg_dir);
973
974 __flush_tlb_all();
975
976 if (!after_init_bootmem)
977 reserve_early(table_start << PAGE_SHIFT,
978 table_end << PAGE_SHIFT, "PGTABLE");
979
980 if (!after_init_bootmem)
981 early_memtest(start, end);
982
983 return end >> PAGE_SHIFT;
984} 860}
985 861
986
987/* 862/*
988 * paging_init() sets up the page tables - note that the first 8MB are 863 * paging_init() sets up the page tables - note that the first 8MB are
989 * already mapped by head.S. 864 * already mapped by head.S.
@@ -1179,17 +1054,47 @@ static noinline int do_test_wp_bit(void)
1179const int rodata_test_data = 0xC3; 1054const int rodata_test_data = 0xC3;
1180EXPORT_SYMBOL_GPL(rodata_test_data); 1055EXPORT_SYMBOL_GPL(rodata_test_data);
1181 1056
1057static int kernel_set_to_readonly;
1058
1059void set_kernel_text_rw(void)
1060{
1061 unsigned long start = PFN_ALIGN(_text);
1062 unsigned long size = PFN_ALIGN(_etext) - start;
1063
1064 if (!kernel_set_to_readonly)
1065 return;
1066
1067 pr_debug("Set kernel text: %lx - %lx for read write\n",
1068 start, start+size);
1069
1070 set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
1071}
1072
1073void set_kernel_text_ro(void)
1074{
1075 unsigned long start = PFN_ALIGN(_text);
1076 unsigned long size = PFN_ALIGN(_etext) - start;
1077
1078 if (!kernel_set_to_readonly)
1079 return;
1080
1081 pr_debug("Set kernel text: %lx - %lx for read only\n",
1082 start, start+size);
1083
1084 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1085}
1086
1182void mark_rodata_ro(void) 1087void mark_rodata_ro(void)
1183{ 1088{
1184 unsigned long start = PFN_ALIGN(_text); 1089 unsigned long start = PFN_ALIGN(_text);
1185 unsigned long size = PFN_ALIGN(_etext) - start; 1090 unsigned long size = PFN_ALIGN(_etext) - start;
1186 1091
1187#ifndef CONFIG_DYNAMIC_FTRACE
1188 /* Dynamic tracing modifies the kernel text section */
1189 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 1092 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1190 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 1093 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
1191 size >> 10); 1094 size >> 10);
1192 1095
1096 kernel_set_to_readonly = 1;
1097
1193#ifdef CONFIG_CPA_DEBUG 1098#ifdef CONFIG_CPA_DEBUG
1194 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", 1099 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
1195 start, start+size); 1100 start, start+size);
@@ -1198,7 +1103,6 @@ void mark_rodata_ro(void)
1198 printk(KERN_INFO "Testing CPA: write protecting again\n"); 1103 printk(KERN_INFO "Testing CPA: write protecting again\n");
1199 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 1104 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
1200#endif 1105#endif
1201#endif /* CONFIG_DYNAMIC_FTRACE */
1202 1106
1203 start += size; 1107 start += size;
1204 size = (unsigned long)__end_rodata - start; 1108 size = (unsigned long)__end_rodata - start;
@@ -1217,13 +1121,6 @@ void mark_rodata_ro(void)
1217} 1121}
1218#endif 1122#endif
1219 1123
1220#ifdef CONFIG_BLK_DEV_INITRD
1221void free_initrd_mem(unsigned long start, unsigned long end)
1222{
1223 free_init_pages("initrd memory", start, end);
1224}
1225#endif
1226
1227int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 1124int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1228 int flags) 1125 int flags)
1229{ 1126{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 07f44d491df1..1753e8020df6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -48,6 +48,7 @@
48#include <asm/kdebug.h> 48#include <asm/kdebug.h>
49#include <asm/numa.h> 49#include <asm/numa.h>
50#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
51#include <asm/init.h>
51 52
52/* 53/*
53 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 54 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata;
61 62
62DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 63DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
63 64
64int direct_gbpages
65#ifdef CONFIG_DIRECT_GBPAGES
66 = 1
67#endif
68;
69
70static int __init parse_direct_gbpages_off(char *arg) 65static int __init parse_direct_gbpages_off(char *arg)
71{ 66{
72 direct_gbpages = 0; 67 direct_gbpages = 0;
@@ -87,12 +82,10 @@ early_param("gbpages", parse_direct_gbpages_on);
87 * around without checking the pgd every time. 82 * around without checking the pgd every time.
88 */ 83 */
89 84
90int after_bootmem;
91
92pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; 85pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
93EXPORT_SYMBOL_GPL(__supported_pte_mask); 86EXPORT_SYMBOL_GPL(__supported_pte_mask);
94 87
95static int do_not_nx __cpuinitdata; 88static int disable_nx __cpuinitdata;
96 89
97/* 90/*
98 * noexec=on|off 91 * noexec=on|off
@@ -107,9 +100,9 @@ static int __init nonx_setup(char *str)
107 return -EINVAL; 100 return -EINVAL;
108 if (!strncmp(str, "on", 2)) { 101 if (!strncmp(str, "on", 2)) {
109 __supported_pte_mask |= _PAGE_NX; 102 __supported_pte_mask |= _PAGE_NX;
110 do_not_nx = 0; 103 disable_nx = 0;
111 } else if (!strncmp(str, "off", 3)) { 104 } else if (!strncmp(str, "off", 3)) {
112 do_not_nx = 1; 105 disable_nx = 1;
113 __supported_pte_mask &= ~_PAGE_NX; 106 __supported_pte_mask &= ~_PAGE_NX;
114 } 107 }
115 return 0; 108 return 0;
@@ -121,7 +114,7 @@ void __cpuinit check_efer(void)
121 unsigned long efer; 114 unsigned long efer;
122 115
123 rdmsrl(MSR_EFER, efer); 116 rdmsrl(MSR_EFER, efer);
124 if (!(efer & EFER_NX) || do_not_nx) 117 if (!(efer & EFER_NX) || disable_nx)
125 __supported_pte_mask &= ~_PAGE_NX; 118 __supported_pte_mask &= ~_PAGE_NX;
126} 119}
127 120
@@ -325,13 +318,9 @@ void __init cleanup_highmap(void)
325 } 318 }
326} 319}
327 320
328static unsigned long __initdata table_start;
329static unsigned long __meminitdata table_end;
330static unsigned long __meminitdata table_top;
331
332static __ref void *alloc_low_page(unsigned long *phys) 321static __ref void *alloc_low_page(unsigned long *phys)
333{ 322{
334 unsigned long pfn = table_end++; 323 unsigned long pfn = e820_table_end++;
335 void *adr; 324 void *adr;
336 325
337 if (after_bootmem) { 326 if (after_bootmem) {
@@ -341,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
341 return adr; 330 return adr;
342 } 331 }
343 332
344 if (pfn >= table_top) 333 if (pfn >= e820_table_top)
345 panic("alloc_low_page: ran out of memory"); 334 panic("alloc_low_page: ran out of memory");
346 335
347 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 336 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -581,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
581 return phys_pud_init(pud, addr, end, page_size_mask); 570 return phys_pud_init(pud, addr, end, page_size_mask);
582} 571}
583 572
584static void __init find_early_table_space(unsigned long end, int use_pse, 573unsigned long __init
585 int use_gbpages) 574kernel_physical_mapping_init(unsigned long start,
586{ 575 unsigned long end,
587 unsigned long puds, pmds, ptes, tables, start; 576 unsigned long page_size_mask)
588
589 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
590 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
591 if (use_gbpages) {
592 unsigned long extra;
593 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
594 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
595 } else
596 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
597 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
598
599 if (use_pse) {
600 unsigned long extra;
601 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
602 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
603 } else
604 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
605 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
606
607 /*
608 * RED-PEN putting page tables only on node 0 could
609 * cause a hotspot and fill up ZONE_DMA. The page tables
610 * need roughly 0.5KB per GB.
611 */
612 start = 0x8000;
613 table_start = find_e820_area(start, end, tables, PAGE_SIZE);
614 if (table_start == -1UL)
615 panic("Cannot find space for the kernel page tables");
616
617 table_start >>= PAGE_SHIFT;
618 table_end = table_start;
619 table_top = table_start + (tables >> PAGE_SHIFT);
620
621 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
622 end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
623}
624
625static void __init init_gbpages(void)
626{
627 if (direct_gbpages && cpu_has_gbpages)
628 printk(KERN_INFO "Using GB pages for direct mapping\n");
629 else
630 direct_gbpages = 0;
631}
632
633static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
634 unsigned long end,
635 unsigned long page_size_mask)
636{ 577{
637 578
638 unsigned long next, last_map_addr = end; 579 unsigned long next, last_map_addr = end;
@@ -669,176 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
669 return last_map_addr; 610 return last_map_addr;
670} 611}
671 612
672struct map_range {
673 unsigned long start;
674 unsigned long end;
675 unsigned page_size_mask;
676};
677
678#define NR_RANGE_MR 5
679
680static int save_mr(struct map_range *mr, int nr_range,
681 unsigned long start_pfn, unsigned long end_pfn,
682 unsigned long page_size_mask)
683{
684
685 if (start_pfn < end_pfn) {
686 if (nr_range >= NR_RANGE_MR)
687 panic("run out of range for init_memory_mapping\n");
688 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
689 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
690 mr[nr_range].page_size_mask = page_size_mask;
691 nr_range++;
692 }
693
694 return nr_range;
695}
696
697/*
698 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
699 * This runs before bootmem is initialized and gets pages directly from
700 * the physical memory. To access them they are temporarily mapped.
701 */
702unsigned long __init_refok init_memory_mapping(unsigned long start,
703 unsigned long end)
704{
705 unsigned long last_map_addr = 0;
706 unsigned long page_size_mask = 0;
707 unsigned long start_pfn, end_pfn;
708 unsigned long pos;
709
710 struct map_range mr[NR_RANGE_MR];
711 int nr_range, i;
712 int use_pse, use_gbpages;
713
714 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
715
716 /*
717 * Find space for the kernel direct mapping tables.
718 *
719 * Later we should allocate these tables in the local node of the
720 * memory mapped. Unfortunately this is done currently before the
721 * nodes are discovered.
722 */
723 if (!after_bootmem)
724 init_gbpages();
725
726#ifdef CONFIG_DEBUG_PAGEALLOC
727 /*
728 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
729 * This will simplify cpa(), which otherwise needs to support splitting
730 * large pages into small in interrupt context, etc.
731 */
732 use_pse = use_gbpages = 0;
733#else
734 use_pse = cpu_has_pse;
735 use_gbpages = direct_gbpages;
736#endif
737
738 if (use_gbpages)
739 page_size_mask |= 1 << PG_LEVEL_1G;
740 if (use_pse)
741 page_size_mask |= 1 << PG_LEVEL_2M;
742
743 memset(mr, 0, sizeof(mr));
744 nr_range = 0;
745
746 /* head if not big page alignment ?*/
747 start_pfn = start >> PAGE_SHIFT;
748 pos = start_pfn << PAGE_SHIFT;
749 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
750 << (PMD_SHIFT - PAGE_SHIFT);
751 if (end_pfn > (end >> PAGE_SHIFT))
752 end_pfn = end >> PAGE_SHIFT;
753 if (start_pfn < end_pfn) {
754 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
755 pos = end_pfn << PAGE_SHIFT;
756 }
757
758 /* big page (2M) range*/
759 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
760 << (PMD_SHIFT - PAGE_SHIFT);
761 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
762 << (PUD_SHIFT - PAGE_SHIFT);
763 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
764 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
765 if (start_pfn < end_pfn) {
766 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
767 page_size_mask & (1<<PG_LEVEL_2M));
768 pos = end_pfn << PAGE_SHIFT;
769 }
770
771 /* big page (1G) range */
772 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
773 << (PUD_SHIFT - PAGE_SHIFT);
774 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
775 if (start_pfn < end_pfn) {
776 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
777 page_size_mask &
778 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
779 pos = end_pfn << PAGE_SHIFT;
780 }
781
782 /* tail is not big page (1G) alignment */
783 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
784 << (PMD_SHIFT - PAGE_SHIFT);
785 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
786 if (start_pfn < end_pfn) {
787 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
788 page_size_mask & (1<<PG_LEVEL_2M));
789 pos = end_pfn << PAGE_SHIFT;
790 }
791
792 /* tail is not big page (2M) alignment */
793 start_pfn = pos>>PAGE_SHIFT;
794 end_pfn = end>>PAGE_SHIFT;
795 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
796
797 /* try to merge same page size and continuous */
798 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
799 unsigned long old_start;
800 if (mr[i].end != mr[i+1].start ||
801 mr[i].page_size_mask != mr[i+1].page_size_mask)
802 continue;
803 /* move it */
804 old_start = mr[i].start;
805 memmove(&mr[i], &mr[i+1],
806 (nr_range - 1 - i) * sizeof (struct map_range));
807 mr[i--].start = old_start;
808 nr_range--;
809 }
810
811 for (i = 0; i < nr_range; i++)
812 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
813 mr[i].start, mr[i].end,
814 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
815 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
816
817 if (!after_bootmem)
818 find_early_table_space(end, use_pse, use_gbpages);
819
820 for (i = 0; i < nr_range; i++)
821 last_map_addr = kernel_physical_mapping_init(
822 mr[i].start, mr[i].end,
823 mr[i].page_size_mask);
824
825 if (!after_bootmem)
826 mmu_cr4_features = read_cr4();
827 __flush_tlb_all();
828
829 if (!after_bootmem && table_end > table_start)
830 reserve_early(table_start << PAGE_SHIFT,
831 table_end << PAGE_SHIFT, "PGTABLE");
832
833 printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
834 last_map_addr, end);
835
836 if (!after_bootmem)
837 early_memtest(start, end);
838
839 return last_map_addr >> PAGE_SHIFT;
840}
841
842#ifndef CONFIG_NUMA 613#ifndef CONFIG_NUMA
843void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) 614void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
844{ 615{
@@ -910,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
910 681
911#endif /* CONFIG_MEMORY_HOTPLUG */ 682#endif /* CONFIG_MEMORY_HOTPLUG */
912 683
913/*
914 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
915 * is valid. The argument is a physical page number.
916 *
917 *
918 * On x86, access has to be given to the first megabyte of ram because that area
919 * contains bios code and data regions used by X and dosemu and similar apps.
920 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
921 * mmio resources as well as potential bios/acpi data regions.
922 */
923int devmem_is_allowed(unsigned long pagenr)
924{
925 if (pagenr <= 256)
926 return 1;
927 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
928 return 0;
929 if (!page_is_ram(pagenr))
930 return 1;
931 return 0;
932}
933
934
935static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, 684static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
936 kcore_modules, kcore_vsyscall; 685 kcore_modules, kcore_vsyscall;
937 686
@@ -985,21 +734,48 @@ void __init mem_init(void)
985const int rodata_test_data = 0xC3; 734const int rodata_test_data = 0xC3;
986EXPORT_SYMBOL_GPL(rodata_test_data); 735EXPORT_SYMBOL_GPL(rodata_test_data);
987 736
737static int kernel_set_to_readonly;
738
739void set_kernel_text_rw(void)
740{
741 unsigned long start = PFN_ALIGN(_stext);
742 unsigned long end = PFN_ALIGN(__start_rodata);
743
744 if (!kernel_set_to_readonly)
745 return;
746
747 pr_debug("Set kernel text: %lx - %lx for read write\n",
748 start, end);
749
750 set_memory_rw(start, (end - start) >> PAGE_SHIFT);
751}
752
753void set_kernel_text_ro(void)
754{
755 unsigned long start = PFN_ALIGN(_stext);
756 unsigned long end = PFN_ALIGN(__start_rodata);
757
758 if (!kernel_set_to_readonly)
759 return;
760
761 pr_debug("Set kernel text: %lx - %lx for read only\n",
762 start, end);
763
764 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
765}
766
988void mark_rodata_ro(void) 767void mark_rodata_ro(void)
989{ 768{
990 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 769 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
991 unsigned long rodata_start = 770 unsigned long rodata_start =
992 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 771 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
993 772
994#ifdef CONFIG_DYNAMIC_FTRACE
995 /* Dynamic tracing modifies the kernel text section */
996 start = rodata_start;
997#endif
998
999 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 773 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
1000 (end - start) >> 10); 774 (end - start) >> 10);
1001 set_memory_ro(start, (end - start) >> PAGE_SHIFT); 775 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
1002 776
777 kernel_set_to_readonly = 1;
778
1003 /* 779 /*
1004 * The rodata section (but not the kernel text!) should also be 780 * The rodata section (but not the kernel text!) should also be
1005 * not-executable. 781 * not-executable.
@@ -1019,13 +795,6 @@ void mark_rodata_ro(void)
1019 795
1020#endif 796#endif
1021 797
1022#ifdef CONFIG_BLK_DEV_INITRD
1023void free_initrd_mem(unsigned long start, unsigned long end)
1024{
1025 free_init_pages("initrd memory", start, end);
1026}
1027#endif
1028
1029int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 798int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1030 int flags) 799 int flags)
1031{ 800{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index b6a61f3d7ef8..fe6f84ca121e 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -19,10 +19,11 @@
19#include <asm/iomap.h> 19#include <asm/iomap.h>
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/highmem.h>
22 23
23int is_io_mapping_possible(resource_size_t base, unsigned long size) 24int is_io_mapping_possible(resource_size_t base, unsigned long size)
24{ 25{
25#ifndef CONFIG_X86_PAE 26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
26 /* There is no way to map greater than 1 << 32 address without PAE */ 27 /* There is no way to map greater than 1 << 32 address without PAE */
27 if (base + size > 0x100000000ULL) 28 if (base + size > 0x100000000ULL)
28 return 0; 29 return 0;
@@ -31,16 +32,28 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
31} 32}
32EXPORT_SYMBOL_GPL(is_io_mapping_possible); 33EXPORT_SYMBOL_GPL(is_io_mapping_possible);
33 34
34/* Map 'pfn' using fixed map 'type' and protections 'prot' 35void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
35 */
36void *
37iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
38{ 36{
39 enum fixed_addresses idx; 37 enum fixed_addresses idx;
40 unsigned long vaddr; 38 unsigned long vaddr;
41 39
42 pagefault_disable(); 40 pagefault_disable();
43 41
42 debug_kmap_atomic(type);
43 idx = type + KM_TYPE_NR * smp_processor_id();
44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
45 set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
46 arch_flush_lazy_mmu_mode();
47
48 return (void *)vaddr;
49}
50
51/*
52 * Map 'pfn' using fixed map 'type' and protections 'prot'
53 */
54void *
55iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
56{
44 /* 57 /*
45 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. 58 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
46 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the 59 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
@@ -50,12 +63,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
50 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) 63 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
51 prot = PAGE_KERNEL_UC_MINUS; 64 prot = PAGE_KERNEL_UC_MINUS;
52 65
53 idx = type + KM_TYPE_NR*smp_processor_id(); 66 return kmap_atomic_prot_pfn(pfn, type, prot);
54 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
55 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
56 arch_flush_lazy_mmu_mode();
57
58 return (void*) vaddr;
59} 67}
60EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); 68EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
61 69
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 433f7bd4648a..0dfa09d69e80 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -22,13 +22,17 @@
22#include <asm/pgalloc.h> 22#include <asm/pgalloc.h>
23#include <asm/pat.h> 23#include <asm/pat.h>
24 24
25#ifdef CONFIG_X86_64 25static inline int phys_addr_valid(resource_size_t addr)
26
27static inline int phys_addr_valid(unsigned long addr)
28{ 26{
29 return addr < (1UL << boot_cpu_data.x86_phys_bits); 27#ifdef CONFIG_PHYS_ADDR_T_64BIT
28 return !(addr >> boot_cpu_data.x86_phys_bits);
29#else
30 return 1;
31#endif
30} 32}
31 33
34#ifdef CONFIG_X86_64
35
32unsigned long __phys_addr(unsigned long x) 36unsigned long __phys_addr(unsigned long x)
33{ 37{
34 if (x >= __START_KERNEL_map) { 38 if (x >= __START_KERNEL_map) {
@@ -38,8 +42,7 @@ unsigned long __phys_addr(unsigned long x)
38 } else { 42 } else {
39 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 43 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
40 x -= PAGE_OFFSET; 44 x -= PAGE_OFFSET;
41 VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : 45 VIRTUAL_BUG_ON(!phys_addr_valid(x));
42 !phys_addr_valid(x));
43 } 46 }
44 return x; 47 return x;
45} 48}
@@ -56,10 +59,8 @@ bool __virt_addr_valid(unsigned long x)
56 if (x < PAGE_OFFSET) 59 if (x < PAGE_OFFSET)
57 return false; 60 return false;
58 x -= PAGE_OFFSET; 61 x -= PAGE_OFFSET;
59 if (system_state == SYSTEM_BOOTING ? 62 if (!phys_addr_valid(x))
60 x > MAXMEM : !phys_addr_valid(x)) {
61 return false; 63 return false;
62 }
63 } 64 }
64 65
65 return pfn_valid(x >> PAGE_SHIFT); 66 return pfn_valid(x >> PAGE_SHIFT);
@@ -68,18 +69,12 @@ EXPORT_SYMBOL(__virt_addr_valid);
68 69
69#else 70#else
70 71
71static inline int phys_addr_valid(unsigned long addr)
72{
73 return 1;
74}
75
76#ifdef CONFIG_DEBUG_VIRTUAL 72#ifdef CONFIG_DEBUG_VIRTUAL
77unsigned long __phys_addr(unsigned long x) 73unsigned long __phys_addr(unsigned long x)
78{ 74{
79 /* VMALLOC_* aren't constants; not available at the boot time */ 75 /* VMALLOC_* aren't constants */
80 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 76 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
81 VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && 77 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
82 is_vmalloc_addr((void *) x));
83 return x - PAGE_OFFSET; 78 return x - PAGE_OFFSET;
84} 79}
85EXPORT_SYMBOL(__phys_addr); 80EXPORT_SYMBOL(__phys_addr);
@@ -89,7 +84,9 @@ bool __virt_addr_valid(unsigned long x)
89{ 84{
90 if (x < PAGE_OFFSET) 85 if (x < PAGE_OFFSET)
91 return false; 86 return false;
92 if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) 87 if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
88 return false;
89 if (x >= FIXADDR_START)
93 return false; 90 return false;
94 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); 91 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
95} 92}
@@ -508,13 +505,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
508 return &bm_pte[pte_index(addr)]; 505 return &bm_pte[pte_index(addr)];
509} 506}
510 507
508static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
509
511void __init early_ioremap_init(void) 510void __init early_ioremap_init(void)
512{ 511{
513 pmd_t *pmd; 512 pmd_t *pmd;
513 int i;
514 514
515 if (early_ioremap_debug) 515 if (early_ioremap_debug)
516 printk(KERN_INFO "early_ioremap_init()\n"); 516 printk(KERN_INFO "early_ioremap_init()\n");
517 517
518 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
519 slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
520
518 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 521 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
519 memset(bm_pte, 0, sizeof(bm_pte)); 522 memset(bm_pte, 0, sizeof(bm_pte));
520 pmd_populate_kernel(&init_mm, pmd, bm_pte); 523 pmd_populate_kernel(&init_mm, pmd, bm_pte);
@@ -581,6 +584,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
581 584
582static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; 585static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
583static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; 586static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
587
584static int __init check_early_ioremap_leak(void) 588static int __init check_early_ioremap_leak(void)
585{ 589{
586 int count = 0; 590 int count = 0;
@@ -602,7 +606,8 @@ static int __init check_early_ioremap_leak(void)
602} 606}
603late_initcall(check_early_ioremap_leak); 607late_initcall(check_early_ioremap_leak);
604 608
605static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) 609static void __init __iomem *
610__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
606{ 611{
607 unsigned long offset, last_addr; 612 unsigned long offset, last_addr;
608 unsigned int nrpages; 613 unsigned int nrpages;
@@ -668,9 +673,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo
668 --nrpages; 673 --nrpages;
669 } 674 }
670 if (early_ioremap_debug) 675 if (early_ioremap_debug)
671 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); 676 printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
672 677
673 prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); 678 prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
674 return prev_map[slot]; 679 return prev_map[slot];
675} 680}
676 681
@@ -738,8 +743,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
738 } 743 }
739 prev_map[slot] = NULL; 744 prev_map[slot] = NULL;
740} 745}
741
742void __this_fixmap_does_not_exist(void)
743{
744 WARN_ON(1);
745}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 6a518dd08a36..4f115e00486b 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
311 311
312 if (!ctx->active) { 312 if (!ctx->active) {
313 pr_warning("kmmio: spurious debug trap on CPU %d.\n", 313 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
314 smp_processor_id()); 314 smp_processor_id());
315 goto out; 315 goto out;
316 } 316 }
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 0bcd7883d036..605c8be06217 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg)
100{ 100{
101 if (arg) 101 if (arg)
102 memtest_pattern = simple_strtoul(arg, NULL, 0); 102 memtest_pattern = simple_strtoul(arg, NULL, 0);
103 else
104 memtest_pattern = ARRAY_SIZE(patterns);
105
103 return 0; 106 return 0;
104} 107}
105 108
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 2c4baa88f2cb..c9342ed8b402 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -378,27 +378,34 @@ static void clear_trace_list(void)
378} 378}
379 379
380#ifdef CONFIG_HOTPLUG_CPU 380#ifdef CONFIG_HOTPLUG_CPU
381static cpumask_t downed_cpus; 381static cpumask_var_t downed_cpus;
382 382
383static void enter_uniprocessor(void) 383static void enter_uniprocessor(void)
384{ 384{
385 int cpu; 385 int cpu;
386 int err; 386 int err;
387 387
388 if (downed_cpus == NULL &&
389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
390 pr_notice(NAME "Failed to allocate mask\n");
391 goto out;
392 }
393
388 get_online_cpus(); 394 get_online_cpus();
389 downed_cpus = cpu_online_map; 395 cpumask_copy(downed_cpus, cpu_online_mask);
390 cpu_clear(first_cpu(cpu_online_map), downed_cpus); 396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
391 if (num_online_cpus() > 1) 397 if (num_online_cpus() > 1)
392 pr_notice(NAME "Disabling non-boot CPUs...\n"); 398 pr_notice(NAME "Disabling non-boot CPUs...\n");
393 put_online_cpus(); 399 put_online_cpus();
394 400
395 for_each_cpu_mask(cpu, downed_cpus) { 401 for_each_cpu(cpu, downed_cpus) {
396 err = cpu_down(cpu); 402 err = cpu_down(cpu);
397 if (!err) 403 if (!err)
398 pr_info(NAME "CPU%d is down.\n", cpu); 404 pr_info(NAME "CPU%d is down.\n", cpu);
399 else 405 else
400 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); 406 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
401 } 407 }
408out:
402 if (num_online_cpus() > 1) 409 if (num_online_cpus() > 1)
403 pr_warning(NAME "multiple CPUs still online, " 410 pr_warning(NAME "multiple CPUs still online, "
404 "may miss events.\n"); 411 "may miss events.\n");
@@ -411,10 +418,10 @@ static void __ref leave_uniprocessor(void)
411 int cpu; 418 int cpu;
412 int err; 419 int err;
413 420
414 if (cpus_weight(downed_cpus) == 0) 421 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
415 return; 422 return;
416 pr_notice(NAME "Re-enabling CPUs...\n"); 423 pr_notice(NAME "Re-enabling CPUs...\n");
417 for_each_cpu_mask(cpu, downed_cpus) { 424 for_each_cpu(cpu, downed_cpus) {
418 err = cpu_up(cpu); 425 err = cpu_up(cpu);
419 if (!err) 426 if (!err)
420 pr_info(NAME "enabled CPU%d.\n", cpu); 427 pr_info(NAME "enabled CPU%d.\n", cpu);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
new file mode 100644
index 000000000000..550df481accd
--- /dev/null
+++ b/arch/x86/mm/numa.c
@@ -0,0 +1,67 @@
1/* Common code for 32 and 64-bit NUMA */
2#include <linux/topology.h>
3#include <linux/module.h>
4#include <linux/bootmem.h>
5
6#ifdef CONFIG_DEBUG_PER_CPU_MAPS
7# define DBG(x...) printk(KERN_DEBUG x)
8#else
9# define DBG(x...)
10#endif
11
12/*
13 * Which logical CPUs are on which nodes
14 */
15cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
16EXPORT_SYMBOL(node_to_cpumask_map);
17
18/*
19 * Allocate node_to_cpumask_map based on number of available nodes
20 * Requires node_possible_map to be valid.
21 *
22 * Note: node_to_cpumask() is not valid until after this is done.
23 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
24 */
25void __init setup_node_to_cpumask_map(void)
26{
27 unsigned int node, num = 0;
28
29 /* setup nr_node_ids if not done yet */
30 if (nr_node_ids == MAX_NUMNODES) {
31 for_each_node_mask(node, node_possible_map)
32 num = node;
33 nr_node_ids = num + 1;
34 }
35
36 /* allocate the map */
37 for (node = 0; node < nr_node_ids; node++)
38 alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
39
40 /* cpumask_of_node() will now work */
41 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
42}
43
44#ifdef CONFIG_DEBUG_PER_CPU_MAPS
45/*
46 * Returns a pointer to the bitmask of CPUs on Node 'node'.
47 */
48const struct cpumask *cpumask_of_node(int node)
49{
50 if (node >= nr_node_ids) {
51 printk(KERN_WARNING
52 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
53 node, nr_node_ids);
54 dump_stack();
55 return cpu_none_mask;
56 }
57 if (node_to_cpumask_map[node] == NULL) {
58 printk(KERN_WARNING
59 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
60 node);
61 dump_stack();
62 return cpu_online_mask;
63 }
64 return node_to_cpumask_map[node];
65}
66EXPORT_SYMBOL(cpumask_of_node);
67#endif
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 451fe95a0352..3daefa04ace5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn,
416 for_each_online_node(nid) 416 for_each_online_node(nid)
417 propagate_e820_map_node(nid); 417 propagate_e820_map_node(nid);
418 418
419 for_each_online_node(nid) 419 for_each_online_node(nid) {
420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
421 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
422 }
421 423
422 NODE_DATA(0)->bdata = &bootmem_node_data[0];
423 setup_bootmem_allocator(); 424 setup_bootmem_allocator();
424} 425}
425 426
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 64c9cf043cdd..d73aaa892371 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,12 +20,6 @@
20#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h> 21#include <asm/k8.h>
22 22
23#ifdef CONFIG_DEBUG_PER_CPU_MAPS
24# define DBG(x...) printk(KERN_DEBUG x)
25#else
26# define DBG(x...)
27#endif
28
29struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
30EXPORT_SYMBOL(node_data); 24EXPORT_SYMBOL(node_data);
31 25
@@ -49,12 +43,6 @@ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
49EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 43EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
50 44
51/* 45/*
52 * Which logical CPUs are on which nodes
53 */
54cpumask_t *node_to_cpumask_map;
55EXPORT_SYMBOL(node_to_cpumask_map);
56
57/*
58 * Given a shift value, try to populate memnodemap[] 46 * Given a shift value, try to populate memnodemap[]
59 * Returns : 47 * Returns :
60 * 1 if OK 48 * 1 if OK
@@ -661,36 +649,6 @@ void __init init_cpu_to_node(void)
661#endif 649#endif
662 650
663 651
664/*
665 * Allocate node_to_cpumask_map based on number of available nodes
666 * Requires node_possible_map to be valid.
667 *
668 * Note: node_to_cpumask() is not valid until after this is done.
669 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
670 */
671void __init setup_node_to_cpumask_map(void)
672{
673 unsigned int node, num = 0;
674 cpumask_t *map;
675
676 /* setup nr_node_ids if not done yet */
677 if (nr_node_ids == MAX_NUMNODES) {
678 for_each_node_mask(node, node_possible_map)
679 num = node;
680 nr_node_ids = num + 1;
681 }
682
683 /* allocate the map */
684 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
685 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
686
687 pr_debug("Node to cpumask map at %p for %d nodes\n",
688 map, nr_node_ids);
689
690 /* node_to_cpumask() will now work */
691 node_to_cpumask_map = map;
692}
693
694void __cpuinit numa_set_node(int cpu, int node) 652void __cpuinit numa_set_node(int cpu, int node)
695{ 653{
696 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 654 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
@@ -723,12 +681,12 @@ void __cpuinit numa_clear_node(int cpu)
723 681
724void __cpuinit numa_add_cpu(int cpu) 682void __cpuinit numa_add_cpu(int cpu)
725{ 683{
726 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 684 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
727} 685}
728 686
729void __cpuinit numa_remove_cpu(int cpu) 687void __cpuinit numa_remove_cpu(int cpu)
730{ 688{
731 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 689 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
732} 690}
733 691
734#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 692#else /* CONFIG_DEBUG_PER_CPU_MAPS */
@@ -739,20 +697,20 @@ void __cpuinit numa_remove_cpu(int cpu)
739static void __cpuinit numa_set_cpumask(int cpu, int enable) 697static void __cpuinit numa_set_cpumask(int cpu, int enable)
740{ 698{
741 int node = early_cpu_to_node(cpu); 699 int node = early_cpu_to_node(cpu);
742 cpumask_t *mask; 700 struct cpumask *mask;
743 char buf[64]; 701 char buf[64];
744 702
745 if (node_to_cpumask_map == NULL) { 703 mask = node_to_cpumask_map[node];
746 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 704 if (mask == NULL) {
705 printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
747 dump_stack(); 706 dump_stack();
748 return; 707 return;
749 } 708 }
750 709
751 mask = &node_to_cpumask_map[node];
752 if (enable) 710 if (enable)
753 cpu_set(cpu, *mask); 711 cpumask_set_cpu(cpu, mask);
754 else 712 else
755 cpu_clear(cpu, *mask); 713 cpumask_clear_cpu(cpu, mask);
756 714
757 cpulist_scnprintf(buf, sizeof(buf), mask); 715 cpulist_scnprintf(buf, sizeof(buf), mask);
758 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 716 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
@@ -799,59 +757,6 @@ int early_cpu_to_node(int cpu)
799 return per_cpu(x86_cpu_to_node_map, cpu); 757 return per_cpu(x86_cpu_to_node_map, cpu);
800} 758}
801 759
802
803/* empty cpumask */
804static const cpumask_t cpu_mask_none;
805
806/*
807 * Returns a pointer to the bitmask of CPUs on Node 'node'.
808 */
809const cpumask_t *cpumask_of_node(int node)
810{
811 if (node_to_cpumask_map == NULL) {
812 printk(KERN_WARNING
813 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
814 node);
815 dump_stack();
816 return (const cpumask_t *)&cpu_online_map;
817 }
818 if (node >= nr_node_ids) {
819 printk(KERN_WARNING
820 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
821 node, nr_node_ids);
822 dump_stack();
823 return &cpu_mask_none;
824 }
825 return &node_to_cpumask_map[node];
826}
827EXPORT_SYMBOL(cpumask_of_node);
828
829/*
830 * Returns a bitmask of CPUs on Node 'node'.
831 *
832 * Side note: this function creates the returned cpumask on the stack
833 * so with a high NR_CPUS count, excessive stack space is used. The
834 * node_to_cpumask_ptr function should be used whenever possible.
835 */
836cpumask_t node_to_cpumask(int node)
837{
838 if (node_to_cpumask_map == NULL) {
839 printk(KERN_WARNING
840 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
841 dump_stack();
842 return cpu_online_map;
843 }
844 if (node >= nr_node_ids) {
845 printk(KERN_WARNING
846 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
847 node, nr_node_ids);
848 dump_stack();
849 return cpu_mask_none;
850 }
851 return node_to_cpumask_map[node];
852}
853EXPORT_SYMBOL(node_to_cpumask);
854
855/* 760/*
856 * --------- end of debug versions of the numa functions --------- 761 * --------- end of debug versions of the numa functions ---------
857 */ 762 */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9015e5e412b5..660cac75ae11 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
16#include <asm/processor.h> 16#include <asm/processor.h>
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/sections.h> 18#include <asm/sections.h>
19#include <asm/setup.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
21#include <asm/proto.h> 22#include <asm/proto.h>
@@ -33,6 +34,7 @@ struct cpa_data {
33 unsigned long pfn; 34 unsigned long pfn;
34 unsigned force_split : 1; 35 unsigned force_split : 1;
35 int curpage; 36 int curpage;
37 struct page **pages;
36}; 38};
37 39
38/* 40/*
@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock);
45 47
46#define CPA_FLUSHTLB 1 48#define CPA_FLUSHTLB 1
47#define CPA_ARRAY 2 49#define CPA_ARRAY 2
50#define CPA_PAGES_ARRAY 4
48 51
49#ifdef CONFIG_PROC_FS 52#ifdef CONFIG_PROC_FS
50static unsigned long direct_pages_count[PG_LEVEL_NUM]; 53static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -95,7 +98,7 @@ static inline unsigned long highmap_start_pfn(void)
95 98
96static inline unsigned long highmap_end_pfn(void) 99static inline unsigned long highmap_end_pfn(void)
97{ 100{
98 return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; 101 return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
99} 102}
100 103
101#endif 104#endif
@@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
201 } 204 }
202} 205}
203 206
204static void cpa_flush_array(unsigned long *start, int numpages, int cache) 207static void cpa_flush_array(unsigned long *start, int numpages, int cache,
208 int in_flags, struct page **pages)
205{ 209{
206 unsigned int i, level; 210 unsigned int i, level;
207 unsigned long *addr;
208 211
209 BUG_ON(irqs_disabled()); 212 BUG_ON(irqs_disabled());
210 213
@@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache)
225 * will cause all other CPUs to flush the same 228 * will cause all other CPUs to flush the same
226 * cachelines: 229 * cachelines:
227 */ 230 */
228 for (i = 0, addr = start; i < numpages; i++, addr++) { 231 for (i = 0; i < numpages; i++) {
229 pte_t *pte = lookup_address(*addr, &level); 232 unsigned long addr;
233 pte_t *pte;
234
235 if (in_flags & CPA_PAGES_ARRAY)
236 addr = (unsigned long)page_address(pages[i]);
237 else
238 addr = start[i];
239
240 pte = lookup_address(addr, &level);
230 241
231 /* 242 /*
232 * Only flush present addresses: 243 * Only flush present addresses:
233 */ 244 */
234 if (pte && (pte_val(*pte) & _PAGE_PRESENT)) 245 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
235 clflush_cache_range((void *) *addr, PAGE_SIZE); 246 clflush_cache_range((void *)addr, PAGE_SIZE);
236 } 247 }
237} 248}
238 249
@@ -584,7 +595,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
584 unsigned int level; 595 unsigned int level;
585 pte_t *kpte, old_pte; 596 pte_t *kpte, old_pte;
586 597
587 if (cpa->flags & CPA_ARRAY) 598 if (cpa->flags & CPA_PAGES_ARRAY)
599 address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
600 else if (cpa->flags & CPA_ARRAY)
588 address = cpa->vaddr[cpa->curpage]; 601 address = cpa->vaddr[cpa->curpage];
589 else 602 else
590 address = *cpa->vaddr; 603 address = *cpa->vaddr;
@@ -687,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
687 * No need to redo, when the primary call touched the direct 700 * No need to redo, when the primary call touched the direct
688 * mapping already: 701 * mapping already:
689 */ 702 */
690 if (cpa->flags & CPA_ARRAY) 703 if (cpa->flags & CPA_PAGES_ARRAY)
704 vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
705 else if (cpa->flags & CPA_ARRAY)
691 vaddr = cpa->vaddr[cpa->curpage]; 706 vaddr = cpa->vaddr[cpa->curpage];
692 else 707 else
693 vaddr = *cpa->vaddr; 708 vaddr = *cpa->vaddr;
@@ -698,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
698 alias_cpa = *cpa; 713 alias_cpa = *cpa;
699 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 714 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
700 alias_cpa.vaddr = &temp_cpa_vaddr; 715 alias_cpa.vaddr = &temp_cpa_vaddr;
701 alias_cpa.flags &= ~CPA_ARRAY; 716 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
702 717
703 718
704 ret = __change_page_attr_set_clr(&alias_cpa, 0); 719 ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -711,7 +726,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
711 * No need to redo, when the primary call touched the high 726 * No need to redo, when the primary call touched the high
712 * mapping already: 727 * mapping already:
713 */ 728 */
714 if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) 729 if (within(vaddr, (unsigned long) _text, _brk_end))
715 return 0; 730 return 0;
716 731
717 /* 732 /*
@@ -724,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
724 alias_cpa = *cpa; 739 alias_cpa = *cpa;
725 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 740 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
726 alias_cpa.vaddr = &temp_cpa_vaddr; 741 alias_cpa.vaddr = &temp_cpa_vaddr;
727 alias_cpa.flags &= ~CPA_ARRAY; 742 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
728 743
729 /* 744 /*
730 * The high mapping range is imprecise, so ignore the return value. 745 * The high mapping range is imprecise, so ignore the return value.
@@ -745,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
745 */ 760 */
746 cpa->numpages = numpages; 761 cpa->numpages = numpages;
747 /* for array changes, we can't use large page */ 762 /* for array changes, we can't use large page */
748 if (cpa->flags & CPA_ARRAY) 763 if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
749 cpa->numpages = 1; 764 cpa->numpages = 1;
750 765
751 if (!debug_pagealloc) 766 if (!debug_pagealloc)
@@ -769,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
769 */ 784 */
770 BUG_ON(cpa->numpages > numpages); 785 BUG_ON(cpa->numpages > numpages);
771 numpages -= cpa->numpages; 786 numpages -= cpa->numpages;
772 if (cpa->flags & CPA_ARRAY) 787 if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
773 cpa->curpage++; 788 cpa->curpage++;
774 else 789 else
775 *cpa->vaddr += cpa->numpages * PAGE_SIZE; 790 *cpa->vaddr += cpa->numpages * PAGE_SIZE;
@@ -786,7 +801,8 @@ static inline int cache_attr(pgprot_t attr)
786 801
787static int change_page_attr_set_clr(unsigned long *addr, int numpages, 802static int change_page_attr_set_clr(unsigned long *addr, int numpages,
788 pgprot_t mask_set, pgprot_t mask_clr, 803 pgprot_t mask_set, pgprot_t mask_clr,
789 int force_split, int array) 804 int force_split, int in_flag,
805 struct page **pages)
790{ 806{
791 struct cpa_data cpa; 807 struct cpa_data cpa;
792 int ret, cache, checkalias; 808 int ret, cache, checkalias;
@@ -801,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
801 return 0; 817 return 0;
802 818
803 /* Ensure we are PAGE_SIZE aligned */ 819 /* Ensure we are PAGE_SIZE aligned */
804 if (!array) { 820 if (in_flag & CPA_ARRAY) {
805 if (*addr & ~PAGE_MASK) {
806 *addr &= PAGE_MASK;
807 /*
808 * People should not be passing in unaligned addresses:
809 */
810 WARN_ON_ONCE(1);
811 }
812 } else {
813 int i; 821 int i;
814 for (i = 0; i < numpages; i++) { 822 for (i = 0; i < numpages; i++) {
815 if (addr[i] & ~PAGE_MASK) { 823 if (addr[i] & ~PAGE_MASK) {
@@ -817,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
817 WARN_ON_ONCE(1); 825 WARN_ON_ONCE(1);
818 } 826 }
819 } 827 }
828 } else if (!(in_flag & CPA_PAGES_ARRAY)) {
829 /*
830 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
831 * No need to cehck in that case
832 */
833 if (*addr & ~PAGE_MASK) {
834 *addr &= PAGE_MASK;
835 /*
836 * People should not be passing in unaligned addresses:
837 */
838 WARN_ON_ONCE(1);
839 }
820 } 840 }
821 841
822 /* Must avoid aliasing mappings in the highmem code */ 842 /* Must avoid aliasing mappings in the highmem code */
@@ -825,6 +845,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
825 vm_unmap_aliases(); 845 vm_unmap_aliases();
826 846
827 cpa.vaddr = addr; 847 cpa.vaddr = addr;
848 cpa.pages = pages;
828 cpa.numpages = numpages; 849 cpa.numpages = numpages;
829 cpa.mask_set = mask_set; 850 cpa.mask_set = mask_set;
830 cpa.mask_clr = mask_clr; 851 cpa.mask_clr = mask_clr;
@@ -832,8 +853,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
832 cpa.curpage = 0; 853 cpa.curpage = 0;
833 cpa.force_split = force_split; 854 cpa.force_split = force_split;
834 855
835 if (array) 856 if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
836 cpa.flags |= CPA_ARRAY; 857 cpa.flags |= in_flag;
837 858
838 /* No alias checking for _NX bit modifications */ 859 /* No alias checking for _NX bit modifications */
839 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 860 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -859,9 +880,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
859 * wbindv): 880 * wbindv):
860 */ 881 */
861 if (!ret && cpu_has_clflush) { 882 if (!ret && cpu_has_clflush) {
862 if (cpa.flags & CPA_ARRAY) 883 if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
863 cpa_flush_array(addr, numpages, cache); 884 cpa_flush_array(addr, numpages, cache,
864 else 885 cpa.flags, pages);
886 } else
865 cpa_flush_range(*addr, numpages, cache); 887 cpa_flush_range(*addr, numpages, cache);
866 } else 888 } else
867 cpa_flush_all(cache); 889 cpa_flush_all(cache);
@@ -874,14 +896,28 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages,
874 pgprot_t mask, int array) 896 pgprot_t mask, int array)
875{ 897{
876 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, 898 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
877 array); 899 (array ? CPA_ARRAY : 0), NULL);
878} 900}
879 901
880static inline int change_page_attr_clear(unsigned long *addr, int numpages, 902static inline int change_page_attr_clear(unsigned long *addr, int numpages,
881 pgprot_t mask, int array) 903 pgprot_t mask, int array)
882{ 904{
883 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, 905 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
884 array); 906 (array ? CPA_ARRAY : 0), NULL);
907}
908
909static inline int cpa_set_pages_array(struct page **pages, int numpages,
910 pgprot_t mask)
911{
912 return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
913 CPA_PAGES_ARRAY, pages);
914}
915
916static inline int cpa_clear_pages_array(struct page **pages, int numpages,
917 pgprot_t mask)
918{
919 return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
920 CPA_PAGES_ARRAY, pages);
885} 921}
886 922
887int _set_memory_uc(unsigned long addr, int numpages) 923int _set_memory_uc(unsigned long addr, int numpages)
@@ -1029,7 +1065,7 @@ int set_memory_np(unsigned long addr, int numpages)
1029int set_memory_4k(unsigned long addr, int numpages) 1065int set_memory_4k(unsigned long addr, int numpages)
1030{ 1066{
1031 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 1067 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
1032 __pgprot(0), 1, 0); 1068 __pgprot(0), 1, 0, NULL);
1033} 1069}
1034 1070
1035int set_pages_uc(struct page *page, int numpages) 1071int set_pages_uc(struct page *page, int numpages)
@@ -1040,6 +1076,35 @@ int set_pages_uc(struct page *page, int numpages)
1040} 1076}
1041EXPORT_SYMBOL(set_pages_uc); 1077EXPORT_SYMBOL(set_pages_uc);
1042 1078
1079int set_pages_array_uc(struct page **pages, int addrinarray)
1080{
1081 unsigned long start;
1082 unsigned long end;
1083 int i;
1084 int free_idx;
1085
1086 for (i = 0; i < addrinarray; i++) {
1087 start = (unsigned long)page_address(pages[i]);
1088 end = start + PAGE_SIZE;
1089 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
1090 goto err_out;
1091 }
1092
1093 if (cpa_set_pages_array(pages, addrinarray,
1094 __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
1095 return 0; /* Success */
1096 }
1097err_out:
1098 free_idx = i;
1099 for (i = 0; i < free_idx; i++) {
1100 start = (unsigned long)page_address(pages[i]);
1101 end = start + PAGE_SIZE;
1102 free_memtype(start, end);
1103 }
1104 return -EINVAL;
1105}
1106EXPORT_SYMBOL(set_pages_array_uc);
1107
1043int set_pages_wb(struct page *page, int numpages) 1108int set_pages_wb(struct page *page, int numpages)
1044{ 1109{
1045 unsigned long addr = (unsigned long)page_address(page); 1110 unsigned long addr = (unsigned long)page_address(page);
@@ -1048,6 +1113,26 @@ int set_pages_wb(struct page *page, int numpages)
1048} 1113}
1049EXPORT_SYMBOL(set_pages_wb); 1114EXPORT_SYMBOL(set_pages_wb);
1050 1115
1116int set_pages_array_wb(struct page **pages, int addrinarray)
1117{
1118 int retval;
1119 unsigned long start;
1120 unsigned long end;
1121 int i;
1122
1123 retval = cpa_clear_pages_array(pages, addrinarray,
1124 __pgprot(_PAGE_CACHE_MASK));
1125
1126 for (i = 0; i < addrinarray; i++) {
1127 start = (unsigned long)page_address(pages[i]);
1128 end = start + PAGE_SIZE;
1129 free_memtype(start, end);
1130 }
1131
1132 return retval;
1133}
1134EXPORT_SYMBOL(set_pages_array_wb);
1135
1051int set_pages_x(struct page *page, int numpages) 1136int set_pages_x(struct page *page, int numpages)
1052{ 1137{
1053 unsigned long addr = (unsigned long)page_address(page); 1138 unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2ed37158012d..640339ee4fb2 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,10 +677,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
677 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 677 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
678 678
679 /* 679 /*
680 * reserve_pfn_range() doesn't support RAM pages. 680 * reserve_pfn_range() doesn't support RAM pages. Maintain the current
681 * behavior with RAM pages by returning success.
681 */ 682 */
682 if (is_ram != 0) 683 if (is_ram != 0)
683 return -EINVAL; 684 return 0;
684 685
685 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 686 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
686 if (ret) 687 if (ret)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index f2e477c91c1b..46c8834aedc0 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -50,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
50 } 50 }
51 pte = pte_offset_kernel(pmd, vaddr); 51 pte = pte_offset_kernel(pmd, vaddr);
52 if (pte_val(pteval)) 52 if (pte_val(pteval))
53 set_pte_present(&init_mm, vaddr, pte, pteval); 53 set_pte_at(&init_mm, vaddr, pte, pteval);
54 else 54 else
55 pte_clear(&init_mm, vaddr, pte); 55 pte_clear(&init_mm, vaddr, pte);
56 56
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 574c8bc95ef0..c7d272b8574c 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -116,6 +116,36 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
116 reserve_early(phys, phys + length, "ACPI SLIT"); 116 reserve_early(phys, phys + length, "ACPI SLIT");
117} 117}
118 118
119/* Callback for Proximity Domain -> x2APIC mapping */
120void __init
121acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
122{
123 int pxm, node;
124 int apic_id;
125
126 if (srat_disabled())
127 return;
128 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
129 bad_srat();
130 return;
131 }
132 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
133 return;
134 pxm = pa->proximity_domain;
135 node = setup_node(pxm);
136 if (node < 0) {
137 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
138 bad_srat();
139 return;
140 }
141
142 apic_id = pa->apic_id;
143 apicid_to_node[apic_id] = node;
144 acpi_numa = 1;
145 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
146 pxm, apic_id, node);
147}
148
119/* Callback for Proximity Domain -> LAPIC mapping */ 149/* Callback for Proximity Domain -> LAPIC mapping */
120void __init 150void __init
121acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) 151acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a654d59e4483..821e97017e95 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -187,11 +187,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
187 cpumask, cpumask_of(smp_processor_id())); 187 cpumask, cpumask_of(smp_processor_id()));
188 188
189 /* 189 /*
190 * Make the above memory operations globally visible before
191 * sending the IPI.
192 */
193 smp_mb();
194 /*
195 * We have to send the IPI only to 190 * We have to send the IPI only to
196 * CPUs affected. 191 * CPUs affected.
197 */ 192 */