aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/highmem_32.c24
-rw-r--r--arch/x86/mm/init.c344
-rw-r--r--arch/x86/mm/init_32.c256
-rw-r--r--arch/x86/mm/init_64.c280
-rw-r--r--arch/x86/mm/iomap_32.c26
-rw-r--r--arch/x86/mm/ioremap.c52
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/memtest.c3
-rw-r--r--arch/x86/mm/numa_32.c5
-rw-r--r--arch/x86/mm/pageattr.c147
-rw-r--r--arch/x86/mm/pat.c5
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/mm/tlb.c5
13 files changed, 589 insertions, 562 deletions
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 00f127c80b0e..522db5e3d0bf 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -121,22 +121,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
121 pagefault_enable(); 121 pagefault_enable();
122} 122}
123 123
124/* This is the same as kmap_atomic() but can map memory that doesn't 124/*
125 * This is the same as kmap_atomic() but can map memory that doesn't
125 * have a struct page associated with it. 126 * have a struct page associated with it.
126 */ 127 */
127void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) 128void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
128{ 129{
129 enum fixed_addresses idx; 130 return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
130 unsigned long vaddr;
131
132 pagefault_disable();
133
134 idx = type + KM_TYPE_NR*smp_processor_id();
135 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
136 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
137 arch_flush_lazy_mmu_mode();
138
139 return (void*) vaddr;
140} 131}
141EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ 132EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
142 133
@@ -158,7 +149,6 @@ EXPORT_SYMBOL(kunmap);
158EXPORT_SYMBOL(kmap_atomic); 149EXPORT_SYMBOL(kmap_atomic);
159EXPORT_SYMBOL(kunmap_atomic); 150EXPORT_SYMBOL(kunmap_atomic);
160 151
161#ifdef CONFIG_NUMA
162void __init set_highmem_pages_init(void) 152void __init set_highmem_pages_init(void)
163{ 153{
164 struct zone *zone; 154 struct zone *zone;
@@ -182,11 +172,3 @@ void __init set_highmem_pages_init(void)
182 } 172 }
183 totalram_pages += totalhigh_pages; 173 totalram_pages += totalhigh_pages;
184} 174}
185#else
186void __init set_highmem_pages_init(void)
187{
188 add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
189
190 totalram_pages += totalhigh_pages;
191}
192#endif /* CONFIG_NUMA */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ce6a722587d8..fd3da1dda1c9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,8 +1,345 @@
1#include <linux/ioport.h>
1#include <linux/swap.h> 2#include <linux/swap.h>
3
2#include <asm/cacheflush.h> 4#include <asm/cacheflush.h>
5#include <asm/e820.h>
6#include <asm/init.h>
3#include <asm/page.h> 7#include <asm/page.h>
8#include <asm/page_types.h>
4#include <asm/sections.h> 9#include <asm/sections.h>
5#include <asm/system.h> 10#include <asm/system.h>
11#include <asm/tlbflush.h>
12
13unsigned long __initdata e820_table_start;
14unsigned long __meminitdata e820_table_end;
15unsigned long __meminitdata e820_table_top;
16
17int after_bootmem;
18
19int direct_gbpages
20#ifdef CONFIG_DIRECT_GBPAGES
21 = 1
22#endif
23;
24
25static void __init find_early_table_space(unsigned long end, int use_pse,
26 int use_gbpages)
27{
28 unsigned long puds, pmds, ptes, tables, start;
29
30 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
31 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
32
33 if (use_gbpages) {
34 unsigned long extra;
35
36 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
37 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
38 } else
39 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
40
41 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
42
43 if (use_pse) {
44 unsigned long extra;
45
46 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
47#ifdef CONFIG_X86_32
48 extra += PMD_SIZE;
49#endif
50 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
51 } else
52 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
53
54 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
55
56#ifdef CONFIG_X86_32
57 /* for fixmap */
58 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
59#endif
60
61 /*
62 * RED-PEN putting page tables only on node 0 could
63 * cause a hotspot and fill up ZONE_DMA. The page tables
64 * need roughly 0.5KB per GB.
65 */
66#ifdef CONFIG_X86_32
67 start = 0x7000;
68 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
69 tables, PAGE_SIZE);
70#else /* CONFIG_X86_64 */
71 start = 0x8000;
72 e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
73#endif
74 if (e820_table_start == -1UL)
75 panic("Cannot find space for the kernel page tables");
76
77 e820_table_start >>= PAGE_SHIFT;
78 e820_table_end = e820_table_start;
79 e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
80
81 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
82 end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
83}
84
85struct map_range {
86 unsigned long start;
87 unsigned long end;
88 unsigned page_size_mask;
89};
90
91#ifdef CONFIG_X86_32
92#define NR_RANGE_MR 3
93#else /* CONFIG_X86_64 */
94#define NR_RANGE_MR 5
95#endif
96
97static int __meminit save_mr(struct map_range *mr, int nr_range,
98 unsigned long start_pfn, unsigned long end_pfn,
99 unsigned long page_size_mask)
100{
101 if (start_pfn < end_pfn) {
102 if (nr_range >= NR_RANGE_MR)
103 panic("run out of range for init_memory_mapping\n");
104 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
105 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
106 mr[nr_range].page_size_mask = page_size_mask;
107 nr_range++;
108 }
109
110 return nr_range;
111}
112
113#ifdef CONFIG_X86_64
114static void __init init_gbpages(void)
115{
116 if (direct_gbpages && cpu_has_gbpages)
117 printk(KERN_INFO "Using GB pages for direct mapping\n");
118 else
119 direct_gbpages = 0;
120}
121#else
122static inline void init_gbpages(void)
123{
124}
125#endif
126
127/*
128 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
129 * This runs before bootmem is initialized and gets pages directly from
130 * the physical memory. To access them they are temporarily mapped.
131 */
132unsigned long __init_refok init_memory_mapping(unsigned long start,
133 unsigned long end)
134{
135 unsigned long page_size_mask = 0;
136 unsigned long start_pfn, end_pfn;
137 unsigned long ret = 0;
138 unsigned long pos;
139
140 struct map_range mr[NR_RANGE_MR];
141 int nr_range, i;
142 int use_pse, use_gbpages;
143
144 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
145
146 if (!after_bootmem)
147 init_gbpages();
148
149#ifdef CONFIG_DEBUG_PAGEALLOC
150 /*
151 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
152 * This will simplify cpa(), which otherwise needs to support splitting
153 * large pages into small in interrupt context, etc.
154 */
155 use_pse = use_gbpages = 0;
156#else
157 use_pse = cpu_has_pse;
158 use_gbpages = direct_gbpages;
159#endif
160
161#ifdef CONFIG_X86_32
162#ifdef CONFIG_X86_PAE
163 set_nx();
164 if (nx_enabled)
165 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
166#endif
167
168 /* Enable PSE if available */
169 if (cpu_has_pse)
170 set_in_cr4(X86_CR4_PSE);
171
172 /* Enable PGE if available */
173 if (cpu_has_pge) {
174 set_in_cr4(X86_CR4_PGE);
175 __supported_pte_mask |= _PAGE_GLOBAL;
176 }
177#endif
178
179 if (use_gbpages)
180 page_size_mask |= 1 << PG_LEVEL_1G;
181 if (use_pse)
182 page_size_mask |= 1 << PG_LEVEL_2M;
183
184 memset(mr, 0, sizeof(mr));
185 nr_range = 0;
186
187 /* head if not big page alignment ? */
188 start_pfn = start >> PAGE_SHIFT;
189 pos = start_pfn << PAGE_SHIFT;
190#ifdef CONFIG_X86_32
191 /*
192 * Don't use a large page for the first 2/4MB of memory
193 * because there are often fixed size MTRRs in there
194 * and overlapping MTRRs into large pages can cause
195 * slowdowns.
196 */
197 if (pos == 0)
198 end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
199 else
200 end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
201 << (PMD_SHIFT - PAGE_SHIFT);
202#else /* CONFIG_X86_64 */
203 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
204 << (PMD_SHIFT - PAGE_SHIFT);
205#endif
206 if (end_pfn > (end >> PAGE_SHIFT))
207 end_pfn = end >> PAGE_SHIFT;
208 if (start_pfn < end_pfn) {
209 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
210 pos = end_pfn << PAGE_SHIFT;
211 }
212
213 /* big page (2M) range */
214 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
215 << (PMD_SHIFT - PAGE_SHIFT);
216#ifdef CONFIG_X86_32
217 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
218#else /* CONFIG_X86_64 */
219 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
220 << (PUD_SHIFT - PAGE_SHIFT);
221 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
222 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
223#endif
224
225 if (start_pfn < end_pfn) {
226 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
227 page_size_mask & (1<<PG_LEVEL_2M));
228 pos = end_pfn << PAGE_SHIFT;
229 }
230
231#ifdef CONFIG_X86_64
232 /* big page (1G) range */
233 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
234 << (PUD_SHIFT - PAGE_SHIFT);
235 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
236 if (start_pfn < end_pfn) {
237 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
238 page_size_mask &
239 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
240 pos = end_pfn << PAGE_SHIFT;
241 }
242
243 /* tail is not big page (1G) alignment */
244 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
245 << (PMD_SHIFT - PAGE_SHIFT);
246 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
247 if (start_pfn < end_pfn) {
248 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
249 page_size_mask & (1<<PG_LEVEL_2M));
250 pos = end_pfn << PAGE_SHIFT;
251 }
252#endif
253
254 /* tail is not big page (2M) alignment */
255 start_pfn = pos>>PAGE_SHIFT;
256 end_pfn = end>>PAGE_SHIFT;
257 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
258
259 /* try to merge same page size and continuous */
260 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
261 unsigned long old_start;
262 if (mr[i].end != mr[i+1].start ||
263 mr[i].page_size_mask != mr[i+1].page_size_mask)
264 continue;
265 /* move it */
266 old_start = mr[i].start;
267 memmove(&mr[i], &mr[i+1],
268 (nr_range - 1 - i) * sizeof(struct map_range));
269 mr[i--].start = old_start;
270 nr_range--;
271 }
272
273 for (i = 0; i < nr_range; i++)
274 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
275 mr[i].start, mr[i].end,
276 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
277 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
278
279 /*
280 * Find space for the kernel direct mapping tables.
281 *
282 * Later we should allocate these tables in the local node of the
283 * memory mapped. Unfortunately this is done currently before the
284 * nodes are discovered.
285 */
286 if (!after_bootmem)
287 find_early_table_space(end, use_pse, use_gbpages);
288
289#ifdef CONFIG_X86_32
290 for (i = 0; i < nr_range; i++)
291 kernel_physical_mapping_init(mr[i].start, mr[i].end,
292 mr[i].page_size_mask);
293 ret = end;
294#else /* CONFIG_X86_64 */
295 for (i = 0; i < nr_range; i++)
296 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
297 mr[i].page_size_mask);
298#endif
299
300#ifdef CONFIG_X86_32
301 early_ioremap_page_table_range_init();
302
303 load_cr3(swapper_pg_dir);
304#endif
305
306#ifdef CONFIG_X86_64
307 if (!after_bootmem)
308 mmu_cr4_features = read_cr4();
309#endif
310 __flush_tlb_all();
311
312 if (!after_bootmem && e820_table_end > e820_table_start)
313 reserve_early(e820_table_start << PAGE_SHIFT,
314 e820_table_end << PAGE_SHIFT, "PGTABLE");
315
316 if (!after_bootmem)
317 early_memtest(start, end);
318
319 return ret >> PAGE_SHIFT;
320}
321
322
323/*
324 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
325 * is valid. The argument is a physical page number.
326 *
327 *
328 * On x86, access has to be given to the first megabyte of ram because that area
329 * contains bios code and data regions used by X and dosemu and similar apps.
330 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
331 * mmio resources as well as potential bios/acpi data regions.
332 */
333int devmem_is_allowed(unsigned long pagenr)
334{
335 if (pagenr <= 256)
336 return 1;
337 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
338 return 0;
339 if (!page_is_ram(pagenr))
340 return 1;
341 return 0;
342}
6 343
7void free_init_pages(char *what, unsigned long begin, unsigned long end) 344void free_init_pages(char *what, unsigned long begin, unsigned long end)
8{ 345{
@@ -47,3 +384,10 @@ void free_initmem(void)
47 (unsigned long)(&__init_begin), 384 (unsigned long)(&__init_begin),
48 (unsigned long)(&__init_end)); 385 (unsigned long)(&__init_end));
49} 386}
387
388#ifdef CONFIG_BLK_DEV_INITRD
389void free_initrd_mem(unsigned long start, unsigned long end)
390{
391 free_init_pages("initrd memory", start, end);
392}
393#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 47df0e1bbeb9..db81e9a8556b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,6 +49,7 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/init.h>
52 53
53unsigned long max_low_pfn_mapped; 54unsigned long max_low_pfn_mapped;
54unsigned long max_pfn_mapped; 55unsigned long max_pfn_mapped;
@@ -58,19 +59,14 @@ unsigned long highstart_pfn, highend_pfn;
58 59
59static noinline int do_test_wp_bit(void); 60static noinline int do_test_wp_bit(void);
60 61
61 62bool __read_mostly __vmalloc_start_set = false;
62static unsigned long __initdata table_start;
63static unsigned long __meminitdata table_end;
64static unsigned long __meminitdata table_top;
65
66static int __initdata after_init_bootmem;
67 63
68static __init void *alloc_low_page(void) 64static __init void *alloc_low_page(void)
69{ 65{
70 unsigned long pfn = table_end++; 66 unsigned long pfn = e820_table_end++;
71 void *adr; 67 void *adr;
72 68
73 if (pfn >= table_top) 69 if (pfn >= e820_table_top)
74 panic("alloc_low_page: ran out of memory"); 70 panic("alloc_low_page: ran out of memory");
75 71
76 adr = __va(pfn * PAGE_SIZE); 72 adr = __va(pfn * PAGE_SIZE);
@@ -90,7 +86,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
90 86
91#ifdef CONFIG_X86_PAE 87#ifdef CONFIG_X86_PAE
92 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 88 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
93 if (after_init_bootmem) 89 if (after_bootmem)
94 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 90 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
95 else 91 else
96 pmd_table = (pmd_t *)alloc_low_page(); 92 pmd_table = (pmd_t *)alloc_low_page();
@@ -117,7 +113,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
117 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 113 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
118 pte_t *page_table = NULL; 114 pte_t *page_table = NULL;
119 115
120 if (after_init_bootmem) { 116 if (after_bootmem) {
121#ifdef CONFIG_DEBUG_PAGEALLOC 117#ifdef CONFIG_DEBUG_PAGEALLOC
122 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); 118 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
123#endif 119#endif
@@ -168,12 +164,12 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
168 if (pmd_idx_kmap_begin != pmd_idx_kmap_end 164 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
169 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin 165 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
170 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end 166 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
171 && ((__pa(pte) >> PAGE_SHIFT) < table_start 167 && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
172 || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { 168 || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
173 pte_t *newpte; 169 pte_t *newpte;
174 int i; 170 int i;
175 171
176 BUG_ON(after_init_bootmem); 172 BUG_ON(after_bootmem);
177 newpte = alloc_low_page(); 173 newpte = alloc_low_page();
178 for (i = 0; i < PTRS_PER_PTE; i++) 174 for (i = 0; i < PTRS_PER_PTE; i++)
179 set_pte(newpte + i, pte[i]); 175 set_pte(newpte + i, pte[i]);
@@ -242,11 +238,14 @@ static inline int is_kernel_text(unsigned long addr)
242 * of max_low_pfn pages, by creating page tables starting from address 238 * of max_low_pfn pages, by creating page tables starting from address
243 * PAGE_OFFSET: 239 * PAGE_OFFSET:
244 */ 240 */
245static void __init kernel_physical_mapping_init(pgd_t *pgd_base, 241unsigned long __init
246 unsigned long start_pfn, 242kernel_physical_mapping_init(unsigned long start,
247 unsigned long end_pfn, 243 unsigned long end,
248 int use_pse) 244 unsigned long page_size_mask)
249{ 245{
246 int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
247 unsigned long start_pfn, end_pfn;
248 pgd_t *pgd_base = swapper_pg_dir;
250 int pgd_idx, pmd_idx, pte_ofs; 249 int pgd_idx, pmd_idx, pte_ofs;
251 unsigned long pfn; 250 unsigned long pfn;
252 pgd_t *pgd; 251 pgd_t *pgd;
@@ -255,6 +254,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
255 unsigned pages_2m, pages_4k; 254 unsigned pages_2m, pages_4k;
256 int mapping_iter; 255 int mapping_iter;
257 256
257 start_pfn = start >> PAGE_SHIFT;
258 end_pfn = end >> PAGE_SHIFT;
259
258 /* 260 /*
259 * First iteration will setup identity mapping using large/small pages 261 * First iteration will setup identity mapping using large/small pages
260 * based on use_pse, with other attributes same as set by 262 * based on use_pse, with other attributes same as set by
@@ -369,26 +371,6 @@ repeat:
369 mapping_iter = 2; 371 mapping_iter = 2;
370 goto repeat; 372 goto repeat;
371 } 373 }
372}
373
374/*
375 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
376 * is valid. The argument is a physical page number.
377 *
378 *
379 * On x86, access has to be given to the first megabyte of ram because that area
380 * contains bios code and data regions used by X and dosemu and similar apps.
381 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
382 * mmio resources as well as potential bios/acpi data regions.
383 */
384int devmem_is_allowed(unsigned long pagenr)
385{
386 if (pagenr <= 256)
387 return 1;
388 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
389 return 0;
390 if (!page_is_ram(pagenr))
391 return 1;
392 return 0; 374 return 0;
393} 375}
394 376
@@ -545,8 +527,9 @@ void __init native_pagetable_setup_done(pgd_t *base)
545 * be partially populated, and so it avoids stomping on any existing 527 * be partially populated, and so it avoids stomping on any existing
546 * mappings. 528 * mappings.
547 */ 529 */
548static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) 530void __init early_ioremap_page_table_range_init(void)
549{ 531{
532 pgd_t *pgd_base = swapper_pg_dir;
550 unsigned long vaddr, end; 533 unsigned long vaddr, end;
551 534
552 /* 535 /*
@@ -641,7 +624,7 @@ static int __init noexec_setup(char *str)
641} 624}
642early_param("noexec", noexec_setup); 625early_param("noexec", noexec_setup);
643 626
644static void __init set_nx(void) 627void __init set_nx(void)
645{ 628{
646 unsigned int v[4], l, h; 629 unsigned int v[4], l, h;
647 630
@@ -793,6 +776,8 @@ void __init initmem_init(unsigned long start_pfn,
793#ifdef CONFIG_FLATMEM 776#ifdef CONFIG_FLATMEM
794 max_mapnr = num_physpages; 777 max_mapnr = num_physpages;
795#endif 778#endif
779 __vmalloc_start_set = true;
780
796 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 781 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
797 pages_to_mb(max_low_pfn)); 782 pages_to_mb(max_low_pfn));
798 783
@@ -814,176 +799,66 @@ static void __init zone_sizes_init(void)
814 free_area_init_nodes(max_zone_pfns); 799 free_area_init_nodes(max_zone_pfns);
815} 800}
816 801
802static unsigned long __init setup_node_bootmem(int nodeid,
803 unsigned long start_pfn,
804 unsigned long end_pfn,
805 unsigned long bootmap)
806{
807 unsigned long bootmap_size;
808
809 /* don't touch min_low_pfn */
810 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
811 bootmap >> PAGE_SHIFT,
812 start_pfn, end_pfn);
813 printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
814 nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
815 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
816 nodeid, bootmap, bootmap + bootmap_size);
817 free_bootmem_with_active_regions(nodeid, end_pfn);
818 early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
819
820 return bootmap + bootmap_size;
821}
822
817void __init setup_bootmem_allocator(void) 823void __init setup_bootmem_allocator(void)
818{ 824{
819 int i; 825 int nodeid;
820 unsigned long bootmap_size, bootmap; 826 unsigned long bootmap_size, bootmap;
821 /* 827 /*
822 * Initialize the boot-time allocator (with low memory only): 828 * Initialize the boot-time allocator (with low memory only):
823 */ 829 */
824 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; 830 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
825 bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, 831 bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
826 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
827 PAGE_SIZE); 832 PAGE_SIZE);
828 if (bootmap == -1L) 833 if (bootmap == -1L)
829 panic("Cannot find bootmem map of size %ld\n", bootmap_size); 834 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
830 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); 835 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
831 836
832 /* don't touch min_low_pfn */
833 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
834 min_low_pfn, max_low_pfn);
835 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 837 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
836 max_pfn_mapped<<PAGE_SHIFT); 838 max_pfn_mapped<<PAGE_SHIFT);
837 printk(KERN_INFO " low ram: %08lx - %08lx\n", 839 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
838 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
839 printk(KERN_INFO " bootmap %08lx - %08lx\n",
840 bootmap, bootmap + bootmap_size);
841 for_each_online_node(i)
842 free_bootmem_with_active_regions(i, max_low_pfn);
843 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
844
845 after_init_bootmem = 1;
846}
847
848static void __init find_early_table_space(unsigned long end, int use_pse)
849{
850 unsigned long puds, pmds, ptes, tables, start;
851 840
852 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 841 for_each_online_node(nodeid) {
853 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); 842 unsigned long start_pfn, end_pfn;
854 843
855 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 844#ifdef CONFIG_NEED_MULTIPLE_NODES
856 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); 845 start_pfn = node_start_pfn[nodeid];
857 846 end_pfn = node_end_pfn[nodeid];
858 if (use_pse) { 847 if (start_pfn > max_low_pfn)
859 unsigned long extra; 848 continue;
860 849 if (end_pfn > max_low_pfn)
861 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); 850 end_pfn = max_low_pfn;
862 extra += PMD_SIZE;
863 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
864 } else
865 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
866
867 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
868
869 /* for fixmap */
870 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
871
872 /*
873 * RED-PEN putting page tables only on node 0 could
874 * cause a hotspot and fill up ZONE_DMA. The page tables
875 * need roughly 0.5KB per GB.
876 */
877 start = 0x7000;
878 table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
879 tables, PAGE_SIZE);
880 if (table_start == -1UL)
881 panic("Cannot find space for the kernel page tables");
882
883 table_start >>= PAGE_SHIFT;
884 table_end = table_start;
885 table_top = table_start + (tables>>PAGE_SHIFT);
886
887 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
888 end, table_start << PAGE_SHIFT,
889 (table_start << PAGE_SHIFT) + tables);
890}
891
892unsigned long __init_refok init_memory_mapping(unsigned long start,
893 unsigned long end)
894{
895 pgd_t *pgd_base = swapper_pg_dir;
896 unsigned long start_pfn, end_pfn;
897 unsigned long big_page_start;
898#ifdef CONFIG_DEBUG_PAGEALLOC
899 /*
900 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
901 * This will simplify cpa(), which otherwise needs to support splitting
902 * large pages into small in interrupt context, etc.
903 */
904 int use_pse = 0;
905#else 851#else
906 int use_pse = cpu_has_pse; 852 start_pfn = 0;
853 end_pfn = max_low_pfn;
907#endif 854#endif
908 855 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
909 /* 856 bootmap);
910 * Find space for the kernel direct mapping tables.
911 */
912 if (!after_init_bootmem)
913 find_early_table_space(end, use_pse);
914
915#ifdef CONFIG_X86_PAE
916 set_nx();
917 if (nx_enabled)
918 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
919#endif
920
921 /* Enable PSE if available */
922 if (cpu_has_pse)
923 set_in_cr4(X86_CR4_PSE);
924
925 /* Enable PGE if available */
926 if (cpu_has_pge) {
927 set_in_cr4(X86_CR4_PGE);
928 __supported_pte_mask |= _PAGE_GLOBAL;
929 }
930
931 /*
932 * Don't use a large page for the first 2/4MB of memory
933 * because there are often fixed size MTRRs in there
934 * and overlapping MTRRs into large pages can cause
935 * slowdowns.
936 */
937 big_page_start = PMD_SIZE;
938
939 if (start < big_page_start) {
940 start_pfn = start >> PAGE_SHIFT;
941 end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
942 } else {
943 /* head is not big page alignment ? */
944 start_pfn = start >> PAGE_SHIFT;
945 end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
946 << (PMD_SHIFT - PAGE_SHIFT);
947 } 857 }
948 if (start_pfn < end_pfn)
949 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
950
951 /* big page range */
952 start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
953 << (PMD_SHIFT - PAGE_SHIFT);
954 if (start_pfn < (big_page_start >> PAGE_SHIFT))
955 start_pfn = big_page_start >> PAGE_SHIFT;
956 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
957 if (start_pfn < end_pfn)
958 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
959 use_pse);
960
961 /* tail is not big page alignment ? */
962 start_pfn = end_pfn;
963 if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
964 end_pfn = end >> PAGE_SHIFT;
965 if (start_pfn < end_pfn)
966 kernel_physical_mapping_init(pgd_base, start_pfn,
967 end_pfn, 0);
968 }
969
970 early_ioremap_page_table_range_init(pgd_base);
971 858
972 load_cr3(swapper_pg_dir); 859 after_bootmem = 1;
973
974 __flush_tlb_all();
975
976 if (!after_init_bootmem)
977 reserve_early(table_start << PAGE_SHIFT,
978 table_end << PAGE_SHIFT, "PGTABLE");
979
980 if (!after_init_bootmem)
981 early_memtest(start, end);
982
983 return end >> PAGE_SHIFT;
984} 860}
985 861
986
987/* 862/*
988 * paging_init() sets up the page tables - note that the first 8MB are 863 * paging_init() sets up the page tables - note that the first 8MB are
989 * already mapped by head.S. 864 * already mapped by head.S.
@@ -1217,13 +1092,6 @@ void mark_rodata_ro(void)
1217} 1092}
1218#endif 1093#endif
1219 1094
1220#ifdef CONFIG_BLK_DEV_INITRD
1221void free_initrd_mem(unsigned long start, unsigned long end)
1222{
1223 free_init_pages("initrd memory", start, end);
1224}
1225#endif
1226
1227int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 1095int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1228 int flags) 1096 int flags)
1229{ 1097{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 07f44d491df1..54efa57d1c03 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -48,6 +48,7 @@
48#include <asm/kdebug.h> 48#include <asm/kdebug.h>
49#include <asm/numa.h> 49#include <asm/numa.h>
50#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
51#include <asm/init.h>
51 52
52/* 53/*
53 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 54 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -61,12 +62,6 @@ static unsigned long dma_reserve __initdata;
61 62
62DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 63DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
63 64
64int direct_gbpages
65#ifdef CONFIG_DIRECT_GBPAGES
66 = 1
67#endif
68;
69
70static int __init parse_direct_gbpages_off(char *arg) 65static int __init parse_direct_gbpages_off(char *arg)
71{ 66{
72 direct_gbpages = 0; 67 direct_gbpages = 0;
@@ -87,12 +82,10 @@ early_param("gbpages", parse_direct_gbpages_on);
87 * around without checking the pgd every time. 82 * around without checking the pgd every time.
88 */ 83 */
89 84
90int after_bootmem;
91
92pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; 85pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
93EXPORT_SYMBOL_GPL(__supported_pte_mask); 86EXPORT_SYMBOL_GPL(__supported_pte_mask);
94 87
95static int do_not_nx __cpuinitdata; 88static int disable_nx __cpuinitdata;
96 89
97/* 90/*
98 * noexec=on|off 91 * noexec=on|off
@@ -107,9 +100,9 @@ static int __init nonx_setup(char *str)
107 return -EINVAL; 100 return -EINVAL;
108 if (!strncmp(str, "on", 2)) { 101 if (!strncmp(str, "on", 2)) {
109 __supported_pte_mask |= _PAGE_NX; 102 __supported_pte_mask |= _PAGE_NX;
110 do_not_nx = 0; 103 disable_nx = 0;
111 } else if (!strncmp(str, "off", 3)) { 104 } else if (!strncmp(str, "off", 3)) {
112 do_not_nx = 1; 105 disable_nx = 1;
113 __supported_pte_mask &= ~_PAGE_NX; 106 __supported_pte_mask &= ~_PAGE_NX;
114 } 107 }
115 return 0; 108 return 0;
@@ -121,7 +114,7 @@ void __cpuinit check_efer(void)
121 unsigned long efer; 114 unsigned long efer;
122 115
123 rdmsrl(MSR_EFER, efer); 116 rdmsrl(MSR_EFER, efer);
124 if (!(efer & EFER_NX) || do_not_nx) 117 if (!(efer & EFER_NX) || disable_nx)
125 __supported_pte_mask &= ~_PAGE_NX; 118 __supported_pte_mask &= ~_PAGE_NX;
126} 119}
127 120
@@ -325,13 +318,9 @@ void __init cleanup_highmap(void)
325 } 318 }
326} 319}
327 320
328static unsigned long __initdata table_start;
329static unsigned long __meminitdata table_end;
330static unsigned long __meminitdata table_top;
331
332static __ref void *alloc_low_page(unsigned long *phys) 321static __ref void *alloc_low_page(unsigned long *phys)
333{ 322{
334 unsigned long pfn = table_end++; 323 unsigned long pfn = e820_table_end++;
335 void *adr; 324 void *adr;
336 325
337 if (after_bootmem) { 326 if (after_bootmem) {
@@ -341,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
341 return adr; 330 return adr;
342 } 331 }
343 332
344 if (pfn >= table_top) 333 if (pfn >= e820_table_top)
345 panic("alloc_low_page: ran out of memory"); 334 panic("alloc_low_page: ran out of memory");
346 335
347 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 336 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -581,58 +570,10 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
581 return phys_pud_init(pud, addr, end, page_size_mask); 570 return phys_pud_init(pud, addr, end, page_size_mask);
582} 571}
583 572
584static void __init find_early_table_space(unsigned long end, int use_pse, 573unsigned long __init
585 int use_gbpages) 574kernel_physical_mapping_init(unsigned long start,
586{ 575 unsigned long end,
587 unsigned long puds, pmds, ptes, tables, start; 576 unsigned long page_size_mask)
588
589 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
590 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
591 if (use_gbpages) {
592 unsigned long extra;
593 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
594 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
595 } else
596 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
597 tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
598
599 if (use_pse) {
600 unsigned long extra;
601 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
602 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
603 } else
604 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
605 tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
606
607 /*
608 * RED-PEN putting page tables only on node 0 could
609 * cause a hotspot and fill up ZONE_DMA. The page tables
610 * need roughly 0.5KB per GB.
611 */
612 start = 0x8000;
613 table_start = find_e820_area(start, end, tables, PAGE_SIZE);
614 if (table_start == -1UL)
615 panic("Cannot find space for the kernel page tables");
616
617 table_start >>= PAGE_SHIFT;
618 table_end = table_start;
619 table_top = table_start + (tables >> PAGE_SHIFT);
620
621 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
622 end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
623}
624
625static void __init init_gbpages(void)
626{
627 if (direct_gbpages && cpu_has_gbpages)
628 printk(KERN_INFO "Using GB pages for direct mapping\n");
629 else
630 direct_gbpages = 0;
631}
632
633static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
634 unsigned long end,
635 unsigned long page_size_mask)
636{ 577{
637 578
638 unsigned long next, last_map_addr = end; 579 unsigned long next, last_map_addr = end;
@@ -669,176 +610,6 @@ static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
669 return last_map_addr; 610 return last_map_addr;
670} 611}
671 612
672struct map_range {
673 unsigned long start;
674 unsigned long end;
675 unsigned page_size_mask;
676};
677
678#define NR_RANGE_MR 5
679
680static int save_mr(struct map_range *mr, int nr_range,
681 unsigned long start_pfn, unsigned long end_pfn,
682 unsigned long page_size_mask)
683{
684
685 if (start_pfn < end_pfn) {
686 if (nr_range >= NR_RANGE_MR)
687 panic("run out of range for init_memory_mapping\n");
688 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
689 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
690 mr[nr_range].page_size_mask = page_size_mask;
691 nr_range++;
692 }
693
694 return nr_range;
695}
696
697/*
698 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
699 * This runs before bootmem is initialized and gets pages directly from
700 * the physical memory. To access them they are temporarily mapped.
701 */
702unsigned long __init_refok init_memory_mapping(unsigned long start,
703 unsigned long end)
704{
705 unsigned long last_map_addr = 0;
706 unsigned long page_size_mask = 0;
707 unsigned long start_pfn, end_pfn;
708 unsigned long pos;
709
710 struct map_range mr[NR_RANGE_MR];
711 int nr_range, i;
712 int use_pse, use_gbpages;
713
714 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
715
716 /*
717 * Find space for the kernel direct mapping tables.
718 *
719 * Later we should allocate these tables in the local node of the
720 * memory mapped. Unfortunately this is done currently before the
721 * nodes are discovered.
722 */
723 if (!after_bootmem)
724 init_gbpages();
725
726#ifdef CONFIG_DEBUG_PAGEALLOC
727 /*
728 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
729 * This will simplify cpa(), which otherwise needs to support splitting
730 * large pages into small in interrupt context, etc.
731 */
732 use_pse = use_gbpages = 0;
733#else
734 use_pse = cpu_has_pse;
735 use_gbpages = direct_gbpages;
736#endif
737
738 if (use_gbpages)
739 page_size_mask |= 1 << PG_LEVEL_1G;
740 if (use_pse)
741 page_size_mask |= 1 << PG_LEVEL_2M;
742
743 memset(mr, 0, sizeof(mr));
744 nr_range = 0;
745
746 /* head if not big page alignment ?*/
747 start_pfn = start >> PAGE_SHIFT;
748 pos = start_pfn << PAGE_SHIFT;
749 end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
750 << (PMD_SHIFT - PAGE_SHIFT);
751 if (end_pfn > (end >> PAGE_SHIFT))
752 end_pfn = end >> PAGE_SHIFT;
753 if (start_pfn < end_pfn) {
754 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
755 pos = end_pfn << PAGE_SHIFT;
756 }
757
758 /* big page (2M) range*/
759 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
760 << (PMD_SHIFT - PAGE_SHIFT);
761 end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
762 << (PUD_SHIFT - PAGE_SHIFT);
763 if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
764 end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
765 if (start_pfn < end_pfn) {
766 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
767 page_size_mask & (1<<PG_LEVEL_2M));
768 pos = end_pfn << PAGE_SHIFT;
769 }
770
771 /* big page (1G) range */
772 start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
773 << (PUD_SHIFT - PAGE_SHIFT);
774 end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
775 if (start_pfn < end_pfn) {
776 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
777 page_size_mask &
778 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
779 pos = end_pfn << PAGE_SHIFT;
780 }
781
782 /* tail is not big page (1G) alignment */
783 start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
784 << (PMD_SHIFT - PAGE_SHIFT);
785 end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
786 if (start_pfn < end_pfn) {
787 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
788 page_size_mask & (1<<PG_LEVEL_2M));
789 pos = end_pfn << PAGE_SHIFT;
790 }
791
792 /* tail is not big page (2M) alignment */
793 start_pfn = pos>>PAGE_SHIFT;
794 end_pfn = end>>PAGE_SHIFT;
795 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
796
797 /* try to merge same page size and continuous */
798 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
799 unsigned long old_start;
800 if (mr[i].end != mr[i+1].start ||
801 mr[i].page_size_mask != mr[i+1].page_size_mask)
802 continue;
803 /* move it */
804 old_start = mr[i].start;
805 memmove(&mr[i], &mr[i+1],
806 (nr_range - 1 - i) * sizeof (struct map_range));
807 mr[i--].start = old_start;
808 nr_range--;
809 }
810
811 for (i = 0; i < nr_range; i++)
812 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
813 mr[i].start, mr[i].end,
814 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
815 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
816
817 if (!after_bootmem)
818 find_early_table_space(end, use_pse, use_gbpages);
819
820 for (i = 0; i < nr_range; i++)
821 last_map_addr = kernel_physical_mapping_init(
822 mr[i].start, mr[i].end,
823 mr[i].page_size_mask);
824
825 if (!after_bootmem)
826 mmu_cr4_features = read_cr4();
827 __flush_tlb_all();
828
829 if (!after_bootmem && table_end > table_start)
830 reserve_early(table_start << PAGE_SHIFT,
831 table_end << PAGE_SHIFT, "PGTABLE");
832
833 printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
834 last_map_addr, end);
835
836 if (!after_bootmem)
837 early_memtest(start, end);
838
839 return last_map_addr >> PAGE_SHIFT;
840}
841
842#ifndef CONFIG_NUMA 613#ifndef CONFIG_NUMA
843void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) 614void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
844{ 615{
@@ -910,28 +681,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
910 681
911#endif /* CONFIG_MEMORY_HOTPLUG */ 682#endif /* CONFIG_MEMORY_HOTPLUG */
912 683
913/*
914 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
915 * is valid. The argument is a physical page number.
916 *
917 *
918 * On x86, access has to be given to the first megabyte of ram because that area
919 * contains bios code and data regions used by X and dosemu and similar apps.
920 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
921 * mmio resources as well as potential bios/acpi data regions.
922 */
923int devmem_is_allowed(unsigned long pagenr)
924{
925 if (pagenr <= 256)
926 return 1;
927 if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
928 return 0;
929 if (!page_is_ram(pagenr))
930 return 1;
931 return 0;
932}
933
934
935static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, 684static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
936 kcore_modules, kcore_vsyscall; 685 kcore_modules, kcore_vsyscall;
937 686
@@ -1019,13 +768,6 @@ void mark_rodata_ro(void)
1019 768
1020#endif 769#endif
1021 770
1022#ifdef CONFIG_BLK_DEV_INITRD
1023void free_initrd_mem(unsigned long start, unsigned long end)
1024{
1025 free_init_pages("initrd memory", start, end);
1026}
1027#endif
1028
1029int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 771int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1030 int flags) 772 int flags)
1031{ 773{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 04102d42ff42..699c9b2895ae 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -31,16 +31,27 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
31} 31}
32EXPORT_SYMBOL_GPL(is_io_mapping_possible); 32EXPORT_SYMBOL_GPL(is_io_mapping_possible);
33 33
34/* Map 'pfn' using fixed map 'type' and protections 'prot' 34void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
35 */
36void *
37iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
38{ 35{
39 enum fixed_addresses idx; 36 enum fixed_addresses idx;
40 unsigned long vaddr; 37 unsigned long vaddr;
41 38
42 pagefault_disable(); 39 pagefault_disable();
43 40
41 idx = type + KM_TYPE_NR * smp_processor_id();
42 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
43 set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
44 arch_flush_lazy_mmu_mode();
45
46 return (void *)vaddr;
47}
48
49/*
50 * Map 'pfn' using fixed map 'type' and protections 'prot'
51 */
52void *
53iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
54{
44 /* 55 /*
45 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. 56 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
46 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the 57 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
@@ -50,12 +61,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
50 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) 61 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
51 prot = PAGE_KERNEL_UC_MINUS; 62 prot = PAGE_KERNEL_UC_MINUS;
52 63
53 idx = type + KM_TYPE_NR*smp_processor_id(); 64 return kmap_atomic_prot_pfn(pfn, type, prot);
54 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
55 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
56 arch_flush_lazy_mmu_mode();
57
58 return (void*) vaddr;
59} 65}
60EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); 66EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
61 67
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 433f7bd4648a..0dfa09d69e80 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -22,13 +22,17 @@
22#include <asm/pgalloc.h> 22#include <asm/pgalloc.h>
23#include <asm/pat.h> 23#include <asm/pat.h>
24 24
25#ifdef CONFIG_X86_64 25static inline int phys_addr_valid(resource_size_t addr)
26
27static inline int phys_addr_valid(unsigned long addr)
28{ 26{
29 return addr < (1UL << boot_cpu_data.x86_phys_bits); 27#ifdef CONFIG_PHYS_ADDR_T_64BIT
28 return !(addr >> boot_cpu_data.x86_phys_bits);
29#else
30 return 1;
31#endif
30} 32}
31 33
34#ifdef CONFIG_X86_64
35
32unsigned long __phys_addr(unsigned long x) 36unsigned long __phys_addr(unsigned long x)
33{ 37{
34 if (x >= __START_KERNEL_map) { 38 if (x >= __START_KERNEL_map) {
@@ -38,8 +42,7 @@ unsigned long __phys_addr(unsigned long x)
38 } else { 42 } else {
39 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 43 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
40 x -= PAGE_OFFSET; 44 x -= PAGE_OFFSET;
41 VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : 45 VIRTUAL_BUG_ON(!phys_addr_valid(x));
42 !phys_addr_valid(x));
43 } 46 }
44 return x; 47 return x;
45} 48}
@@ -56,10 +59,8 @@ bool __virt_addr_valid(unsigned long x)
56 if (x < PAGE_OFFSET) 59 if (x < PAGE_OFFSET)
57 return false; 60 return false;
58 x -= PAGE_OFFSET; 61 x -= PAGE_OFFSET;
59 if (system_state == SYSTEM_BOOTING ? 62 if (!phys_addr_valid(x))
60 x > MAXMEM : !phys_addr_valid(x)) {
61 return false; 63 return false;
62 }
63 } 64 }
64 65
65 return pfn_valid(x >> PAGE_SHIFT); 66 return pfn_valid(x >> PAGE_SHIFT);
@@ -68,18 +69,12 @@ EXPORT_SYMBOL(__virt_addr_valid);
68 69
69#else 70#else
70 71
71static inline int phys_addr_valid(unsigned long addr)
72{
73 return 1;
74}
75
76#ifdef CONFIG_DEBUG_VIRTUAL 72#ifdef CONFIG_DEBUG_VIRTUAL
77unsigned long __phys_addr(unsigned long x) 73unsigned long __phys_addr(unsigned long x)
78{ 74{
79 /* VMALLOC_* aren't constants; not available at the boot time */ 75 /* VMALLOC_* aren't constants */
80 VIRTUAL_BUG_ON(x < PAGE_OFFSET); 76 VIRTUAL_BUG_ON(x < PAGE_OFFSET);
81 VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING && 77 VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
82 is_vmalloc_addr((void *) x));
83 return x - PAGE_OFFSET; 78 return x - PAGE_OFFSET;
84} 79}
85EXPORT_SYMBOL(__phys_addr); 80EXPORT_SYMBOL(__phys_addr);
@@ -89,7 +84,9 @@ bool __virt_addr_valid(unsigned long x)
89{ 84{
90 if (x < PAGE_OFFSET) 85 if (x < PAGE_OFFSET)
91 return false; 86 return false;
92 if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x)) 87 if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
88 return false;
89 if (x >= FIXADDR_START)
93 return false; 90 return false;
94 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); 91 return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
95} 92}
@@ -508,13 +505,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
508 return &bm_pte[pte_index(addr)]; 505 return &bm_pte[pte_index(addr)];
509} 506}
510 507
508static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
509
511void __init early_ioremap_init(void) 510void __init early_ioremap_init(void)
512{ 511{
513 pmd_t *pmd; 512 pmd_t *pmd;
513 int i;
514 514
515 if (early_ioremap_debug) 515 if (early_ioremap_debug)
516 printk(KERN_INFO "early_ioremap_init()\n"); 516 printk(KERN_INFO "early_ioremap_init()\n");
517 517
518 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
519 slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
520
518 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 521 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
519 memset(bm_pte, 0, sizeof(bm_pte)); 522 memset(bm_pte, 0, sizeof(bm_pte));
520 pmd_populate_kernel(&init_mm, pmd, bm_pte); 523 pmd_populate_kernel(&init_mm, pmd, bm_pte);
@@ -581,6 +584,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
581 584
582static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; 585static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
583static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; 586static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
587
584static int __init check_early_ioremap_leak(void) 588static int __init check_early_ioremap_leak(void)
585{ 589{
586 int count = 0; 590 int count = 0;
@@ -602,7 +606,8 @@ static int __init check_early_ioremap_leak(void)
602} 606}
603late_initcall(check_early_ioremap_leak); 607late_initcall(check_early_ioremap_leak);
604 608
605static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) 609static void __init __iomem *
610__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
606{ 611{
607 unsigned long offset, last_addr; 612 unsigned long offset, last_addr;
608 unsigned int nrpages; 613 unsigned int nrpages;
@@ -668,9 +673,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo
668 --nrpages; 673 --nrpages;
669 } 674 }
670 if (early_ioremap_debug) 675 if (early_ioremap_debug)
671 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); 676 printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
672 677
673 prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); 678 prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
674 return prev_map[slot]; 679 return prev_map[slot];
675} 680}
676 681
@@ -738,8 +743,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
738 } 743 }
739 prev_map[slot] = NULL; 744 prev_map[slot] = NULL;
740} 745}
741
742void __this_fixmap_does_not_exist(void)
743{
744 WARN_ON(1);
745}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 6a518dd08a36..4f115e00486b 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
311 311
312 if (!ctx->active) { 312 if (!ctx->active) {
313 pr_warning("kmmio: spurious debug trap on CPU %d.\n", 313 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
314 smp_processor_id()); 314 smp_processor_id());
315 goto out; 315 goto out;
316 } 316 }
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 0bcd7883d036..605c8be06217 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg)
100{ 100{
101 if (arg) 101 if (arg)
102 memtest_pattern = simple_strtoul(arg, NULL, 0); 102 memtest_pattern = simple_strtoul(arg, NULL, 0);
103 else
104 memtest_pattern = ARRAY_SIZE(patterns);
105
103 return 0; 106 return 0;
104} 107}
105 108
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 451fe95a0352..3daefa04ace5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -416,10 +416,11 @@ void __init initmem_init(unsigned long start_pfn,
416 for_each_online_node(nid) 416 for_each_online_node(nid)
417 propagate_e820_map_node(nid); 417 propagate_e820_map_node(nid);
418 418
419 for_each_online_node(nid) 419 for_each_online_node(nid) {
420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
421 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
422 }
421 423
422 NODE_DATA(0)->bdata = &bootmem_node_data[0];
423 setup_bootmem_allocator(); 424 setup_bootmem_allocator();
424} 425}
425 426
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9c4294986af7..d71e1b636ce6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
16#include <asm/processor.h> 16#include <asm/processor.h>
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/sections.h> 18#include <asm/sections.h>
19#include <asm/setup.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
21#include <asm/proto.h> 22#include <asm/proto.h>
@@ -33,6 +34,7 @@ struct cpa_data {
33 unsigned long pfn; 34 unsigned long pfn;
34 unsigned force_split : 1; 35 unsigned force_split : 1;
35 int curpage; 36 int curpage;
37 struct page **pages;
36}; 38};
37 39
38/* 40/*
@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock);
45 47
46#define CPA_FLUSHTLB 1 48#define CPA_FLUSHTLB 1
47#define CPA_ARRAY 2 49#define CPA_ARRAY 2
50#define CPA_PAGES_ARRAY 4
48 51
49#ifdef CONFIG_PROC_FS 52#ifdef CONFIG_PROC_FS
50static unsigned long direct_pages_count[PG_LEVEL_NUM]; 53static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -95,7 +98,7 @@ static inline unsigned long highmap_start_pfn(void)
95 98
96static inline unsigned long highmap_end_pfn(void) 99static inline unsigned long highmap_end_pfn(void)
97{ 100{
98 return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; 101 return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
99} 102}
100 103
101#endif 104#endif
@@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
201 } 204 }
202} 205}
203 206
204static void cpa_flush_array(unsigned long *start, int numpages, int cache) 207static void cpa_flush_array(unsigned long *start, int numpages, int cache,
208 int in_flags, struct page **pages)
205{ 209{
206 unsigned int i, level; 210 unsigned int i, level;
207 unsigned long *addr;
208 211
209 BUG_ON(irqs_disabled()); 212 BUG_ON(irqs_disabled());
210 213
@@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache)
225 * will cause all other CPUs to flush the same 228 * will cause all other CPUs to flush the same
226 * cachelines: 229 * cachelines:
227 */ 230 */
228 for (i = 0, addr = start; i < numpages; i++, addr++) { 231 for (i = 0; i < numpages; i++) {
229 pte_t *pte = lookup_address(*addr, &level); 232 unsigned long addr;
233 pte_t *pte;
234
235 if (in_flags & CPA_PAGES_ARRAY)
236 addr = (unsigned long)page_address(pages[i]);
237 else
238 addr = start[i];
239
240 pte = lookup_address(addr, &level);
230 241
231 /* 242 /*
232 * Only flush present addresses: 243 * Only flush present addresses:
233 */ 244 */
234 if (pte && (pte_val(*pte) & _PAGE_PRESENT)) 245 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
235 clflush_cache_range((void *) *addr, PAGE_SIZE); 246 clflush_cache_range((void *)addr, PAGE_SIZE);
236 } 247 }
237} 248}
238 249
@@ -584,7 +595,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
584 unsigned int level; 595 unsigned int level;
585 pte_t *kpte, old_pte; 596 pte_t *kpte, old_pte;
586 597
587 if (cpa->flags & CPA_ARRAY) 598 if (cpa->flags & CPA_PAGES_ARRAY)
599 address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
600 else if (cpa->flags & CPA_ARRAY)
588 address = cpa->vaddr[cpa->curpage]; 601 address = cpa->vaddr[cpa->curpage];
589 else 602 else
590 address = *cpa->vaddr; 603 address = *cpa->vaddr;
@@ -687,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
687 * No need to redo, when the primary call touched the direct 700 * No need to redo, when the primary call touched the direct
688 * mapping already: 701 * mapping already:
689 */ 702 */
690 if (cpa->flags & CPA_ARRAY) 703 if (cpa->flags & CPA_PAGES_ARRAY)
704 vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
705 else if (cpa->flags & CPA_ARRAY)
691 vaddr = cpa->vaddr[cpa->curpage]; 706 vaddr = cpa->vaddr[cpa->curpage];
692 else 707 else
693 vaddr = *cpa->vaddr; 708 vaddr = *cpa->vaddr;
@@ -698,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
698 alias_cpa = *cpa; 713 alias_cpa = *cpa;
699 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 714 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
700 alias_cpa.vaddr = &temp_cpa_vaddr; 715 alias_cpa.vaddr = &temp_cpa_vaddr;
701 alias_cpa.flags &= ~CPA_ARRAY; 716 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
702 717
703 718
704 ret = __change_page_attr_set_clr(&alias_cpa, 0); 719 ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -711,7 +726,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
711 * No need to redo, when the primary call touched the high 726 * No need to redo, when the primary call touched the high
712 * mapping already: 727 * mapping already:
713 */ 728 */
714 if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) 729 if (within(vaddr, (unsigned long) _text, _brk_end))
715 return 0; 730 return 0;
716 731
717 /* 732 /*
@@ -724,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
724 alias_cpa = *cpa; 739 alias_cpa = *cpa;
725 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 740 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
726 alias_cpa.vaddr = &temp_cpa_vaddr; 741 alias_cpa.vaddr = &temp_cpa_vaddr;
727 alias_cpa.flags &= ~CPA_ARRAY; 742 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
728 743
729 /* 744 /*
730 * The high mapping range is imprecise, so ignore the return value. 745 * The high mapping range is imprecise, so ignore the return value.
@@ -745,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
745 */ 760 */
746 cpa->numpages = numpages; 761 cpa->numpages = numpages;
747 /* for array changes, we can't use large page */ 762 /* for array changes, we can't use large page */
748 if (cpa->flags & CPA_ARRAY) 763 if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
749 cpa->numpages = 1; 764 cpa->numpages = 1;
750 765
751 if (!debug_pagealloc) 766 if (!debug_pagealloc)
@@ -769,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
769 */ 784 */
770 BUG_ON(cpa->numpages > numpages); 785 BUG_ON(cpa->numpages > numpages);
771 numpages -= cpa->numpages; 786 numpages -= cpa->numpages;
772 if (cpa->flags & CPA_ARRAY) 787 if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
773 cpa->curpage++; 788 cpa->curpage++;
774 else 789 else
775 *cpa->vaddr += cpa->numpages * PAGE_SIZE; 790 *cpa->vaddr += cpa->numpages * PAGE_SIZE;
@@ -786,7 +801,8 @@ static inline int cache_attr(pgprot_t attr)
786 801
787static int change_page_attr_set_clr(unsigned long *addr, int numpages, 802static int change_page_attr_set_clr(unsigned long *addr, int numpages,
788 pgprot_t mask_set, pgprot_t mask_clr, 803 pgprot_t mask_set, pgprot_t mask_clr,
789 int force_split, int array) 804 int force_split, int in_flag,
805 struct page **pages)
790{ 806{
791 struct cpa_data cpa; 807 struct cpa_data cpa;
792 int ret, cache, checkalias; 808 int ret, cache, checkalias;
@@ -801,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
801 return 0; 817 return 0;
802 818
803 /* Ensure we are PAGE_SIZE aligned */ 819 /* Ensure we are PAGE_SIZE aligned */
804 if (!array) { 820 if (in_flag & CPA_ARRAY) {
805 if (*addr & ~PAGE_MASK) {
806 *addr &= PAGE_MASK;
807 /*
808 * People should not be passing in unaligned addresses:
809 */
810 WARN_ON_ONCE(1);
811 }
812 } else {
813 int i; 821 int i;
814 for (i = 0; i < numpages; i++) { 822 for (i = 0; i < numpages; i++) {
815 if (addr[i] & ~PAGE_MASK) { 823 if (addr[i] & ~PAGE_MASK) {
@@ -817,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
817 WARN_ON_ONCE(1); 825 WARN_ON_ONCE(1);
818 } 826 }
819 } 827 }
828 } else if (!(in_flag & CPA_PAGES_ARRAY)) {
829 /*
830 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
831 * No need to cehck in that case
832 */
833 if (*addr & ~PAGE_MASK) {
834 *addr &= PAGE_MASK;
835 /*
836 * People should not be passing in unaligned addresses:
837 */
838 WARN_ON_ONCE(1);
839 }
820 } 840 }
821 841
822 /* Must avoid aliasing mappings in the highmem code */ 842 /* Must avoid aliasing mappings in the highmem code */
@@ -832,6 +852,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
832 arch_flush_lazy_mmu_mode(); 852 arch_flush_lazy_mmu_mode();
833 853
834 cpa.vaddr = addr; 854 cpa.vaddr = addr;
855 cpa.pages = pages;
835 cpa.numpages = numpages; 856 cpa.numpages = numpages;
836 cpa.mask_set = mask_set; 857 cpa.mask_set = mask_set;
837 cpa.mask_clr = mask_clr; 858 cpa.mask_clr = mask_clr;
@@ -839,8 +860,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
839 cpa.curpage = 0; 860 cpa.curpage = 0;
840 cpa.force_split = force_split; 861 cpa.force_split = force_split;
841 862
842 if (array) 863 if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
843 cpa.flags |= CPA_ARRAY; 864 cpa.flags |= in_flag;
844 865
845 /* No alias checking for _NX bit modifications */ 866 /* No alias checking for _NX bit modifications */
846 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 867 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -866,9 +887,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
866 * wbindv): 887 * wbindv):
867 */ 888 */
868 if (!ret && cpu_has_clflush) { 889 if (!ret && cpu_has_clflush) {
869 if (cpa.flags & CPA_ARRAY) 890 if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
870 cpa_flush_array(addr, numpages, cache); 891 cpa_flush_array(addr, numpages, cache,
871 else 892 cpa.flags, pages);
893 } else
872 cpa_flush_range(*addr, numpages, cache); 894 cpa_flush_range(*addr, numpages, cache);
873 } else 895 } else
874 cpa_flush_all(cache); 896 cpa_flush_all(cache);
@@ -888,14 +910,28 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages,
888 pgprot_t mask, int array) 910 pgprot_t mask, int array)
889{ 911{
890 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, 912 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
891 array); 913 (array ? CPA_ARRAY : 0), NULL);
892} 914}
893 915
894static inline int change_page_attr_clear(unsigned long *addr, int numpages, 916static inline int change_page_attr_clear(unsigned long *addr, int numpages,
895 pgprot_t mask, int array) 917 pgprot_t mask, int array)
896{ 918{
897 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, 919 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
898 array); 920 (array ? CPA_ARRAY : 0), NULL);
921}
922
923static inline int cpa_set_pages_array(struct page **pages, int numpages,
924 pgprot_t mask)
925{
926 return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
927 CPA_PAGES_ARRAY, pages);
928}
929
930static inline int cpa_clear_pages_array(struct page **pages, int numpages,
931 pgprot_t mask)
932{
933 return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
934 CPA_PAGES_ARRAY, pages);
899} 935}
900 936
901int _set_memory_uc(unsigned long addr, int numpages) 937int _set_memory_uc(unsigned long addr, int numpages)
@@ -1043,7 +1079,7 @@ int set_memory_np(unsigned long addr, int numpages)
1043int set_memory_4k(unsigned long addr, int numpages) 1079int set_memory_4k(unsigned long addr, int numpages)
1044{ 1080{
1045 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 1081 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
1046 __pgprot(0), 1, 0); 1082 __pgprot(0), 1, 0, NULL);
1047} 1083}
1048 1084
1049int set_pages_uc(struct page *page, int numpages) 1085int set_pages_uc(struct page *page, int numpages)
@@ -1054,6 +1090,35 @@ int set_pages_uc(struct page *page, int numpages)
1054} 1090}
1055EXPORT_SYMBOL(set_pages_uc); 1091EXPORT_SYMBOL(set_pages_uc);
1056 1092
1093int set_pages_array_uc(struct page **pages, int addrinarray)
1094{
1095 unsigned long start;
1096 unsigned long end;
1097 int i;
1098 int free_idx;
1099
1100 for (i = 0; i < addrinarray; i++) {
1101 start = (unsigned long)page_address(pages[i]);
1102 end = start + PAGE_SIZE;
1103 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
1104 goto err_out;
1105 }
1106
1107 if (cpa_set_pages_array(pages, addrinarray,
1108 __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
1109 return 0; /* Success */
1110 }
1111err_out:
1112 free_idx = i;
1113 for (i = 0; i < free_idx; i++) {
1114 start = (unsigned long)page_address(pages[i]);
1115 end = start + PAGE_SIZE;
1116 free_memtype(start, end);
1117 }
1118 return -EINVAL;
1119}
1120EXPORT_SYMBOL(set_pages_array_uc);
1121
1057int set_pages_wb(struct page *page, int numpages) 1122int set_pages_wb(struct page *page, int numpages)
1058{ 1123{
1059 unsigned long addr = (unsigned long)page_address(page); 1124 unsigned long addr = (unsigned long)page_address(page);
@@ -1062,6 +1127,26 @@ int set_pages_wb(struct page *page, int numpages)
1062} 1127}
1063EXPORT_SYMBOL(set_pages_wb); 1128EXPORT_SYMBOL(set_pages_wb);
1064 1129
1130int set_pages_array_wb(struct page **pages, int addrinarray)
1131{
1132 int retval;
1133 unsigned long start;
1134 unsigned long end;
1135 int i;
1136
1137 retval = cpa_clear_pages_array(pages, addrinarray,
1138 __pgprot(_PAGE_CACHE_MASK));
1139
1140 for (i = 0; i < addrinarray; i++) {
1141 start = (unsigned long)page_address(pages[i]);
1142 end = start + PAGE_SIZE;
1143 free_memtype(start, end);
1144 }
1145
1146 return retval;
1147}
1148EXPORT_SYMBOL(set_pages_array_wb);
1149
1065int set_pages_x(struct page *page, int numpages) 1150int set_pages_x(struct page *page, int numpages)
1066{ 1151{
1067 unsigned long addr = (unsigned long)page_address(page); 1152 unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2ed37158012d..640339ee4fb2 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,10 +677,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
677 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 677 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
678 678
679 /* 679 /*
680 * reserve_pfn_range() doesn't support RAM pages. 680 * reserve_pfn_range() doesn't support RAM pages. Maintain the current
681 * behavior with RAM pages by returning success.
681 */ 682 */
682 if (is_ram != 0) 683 if (is_ram != 0)
683 return -EINVAL; 684 return 0;
684 685
685 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 686 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
686 if (ret) 687 if (ret)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index f2e477c91c1b..46c8834aedc0 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -50,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
50 } 50 }
51 pte = pte_offset_kernel(pmd, vaddr); 51 pte = pte_offset_kernel(pmd, vaddr);
52 if (pte_val(pteval)) 52 if (pte_val(pteval))
53 set_pte_present(&init_mm, vaddr, pte, pteval); 53 set_pte_at(&init_mm, vaddr, pte, pteval);
54 else 54 else
55 pte_clear(&init_mm, vaddr, pte); 55 pte_clear(&init_mm, vaddr, pte);
56 56
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a654d59e4483..821e97017e95 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -187,11 +187,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
187 cpumask, cpumask_of(smp_processor_id())); 187 cpumask, cpumask_of(smp_processor_id()));
188 188
189 /* 189 /*
190 * Make the above memory operations globally visible before
191 * sending the IPI.
192 */
193 smp_mb();
194 /*
195 * We have to send the IPI only to 190 * We have to send the IPI only to
196 * CPUs affected. 191 * CPUs affected.
197 */ 192 */