aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/init_32.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/init_32.c')
-rw-r--r--arch/x86/mm/init_32.c425
1 files changed, 198 insertions, 227 deletions
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3c76d194fd2c..da524fb22422 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -27,7 +27,6 @@
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
30#include <linux/efi.h>
31#include <linux/memory_hotplug.h> 30#include <linux/memory_hotplug.h>
32#include <linux/initrd.h> 31#include <linux/initrd.h>
33#include <linux/cpumask.h> 32#include <linux/cpumask.h>
@@ -40,8 +39,10 @@
40#include <asm/fixmap.h> 39#include <asm/fixmap.h>
41#include <asm/e820.h> 40#include <asm/e820.h>
42#include <asm/apic.h> 41#include <asm/apic.h>
42#include <asm/bugs.h>
43#include <asm/tlb.h> 43#include <asm/tlb.h>
44#include <asm/tlbflush.h> 44#include <asm/tlbflush.h>
45#include <asm/pgalloc.h>
45#include <asm/sections.h> 46#include <asm/sections.h>
46#include <asm/paravirt.h> 47#include <asm/paravirt.h>
47 48
@@ -50,7 +51,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20;
50DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 51DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
51unsigned long highstart_pfn, highend_pfn; 52unsigned long highstart_pfn, highend_pfn;
52 53
53static int noinline do_test_wp_bit(void); 54static noinline int do_test_wp_bit(void);
54 55
55/* 56/*
56 * Creates a middle page table and puts a pointer to it in the 57 * Creates a middle page table and puts a pointer to it in the
@@ -61,26 +62,26 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
61{ 62{
62 pud_t *pud; 63 pud_t *pud;
63 pmd_t *pmd_table; 64 pmd_t *pmd_table;
64 65
65#ifdef CONFIG_X86_PAE 66#ifdef CONFIG_X86_PAE
66 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 67 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
67 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 68 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
68 69
69 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); 70 paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
70 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 71 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
71 pud = pud_offset(pgd, 0); 72 pud = pud_offset(pgd, 0);
72 if (pmd_table != pmd_offset(pud, 0)) 73 BUG_ON(pmd_table != pmd_offset(pud, 0));
73 BUG();
74 } 74 }
75#endif 75#endif
76 pud = pud_offset(pgd, 0); 76 pud = pud_offset(pgd, 0);
77 pmd_table = pmd_offset(pud, 0); 77 pmd_table = pmd_offset(pud, 0);
78
78 return pmd_table; 79 return pmd_table;
79} 80}
80 81
81/* 82/*
82 * Create a page table and place a pointer to it in a middle page 83 * Create a page table and place a pointer to it in a middle page
83 * directory entry. 84 * directory entry:
84 */ 85 */
85static pte_t * __init one_page_table_init(pmd_t *pmd) 86static pte_t * __init one_page_table_init(pmd_t *pmd)
86{ 87{
@@ -90,9 +91,10 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
90#ifdef CONFIG_DEBUG_PAGEALLOC 91#ifdef CONFIG_DEBUG_PAGEALLOC
91 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); 92 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
92#endif 93#endif
93 if (!page_table) 94 if (!page_table) {
94 page_table = 95 page_table =
95 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 96 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
97 }
96 98
97 paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); 99 paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
98 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 100 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -103,22 +105,21 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
103} 105}
104 106
105/* 107/*
106 * This function initializes a certain range of kernel virtual memory 108 * This function initializes a certain range of kernel virtual memory
107 * with new bootmem page tables, everywhere page tables are missing in 109 * with new bootmem page tables, everywhere page tables are missing in
108 * the given range. 110 * the given range.
109 */ 111 *
110 112 * NOTE: The pagetables are allocated contiguous on the physical space
111/* 113 * so we can cache the place of the first one and move around without
112 * NOTE: The pagetables are allocated contiguous on the physical space
113 * so we can cache the place of the first one and move around without
114 * checking the pgd every time. 114 * checking the pgd every time.
115 */ 115 */
116static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 116static void __init
117page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
117{ 118{
118 pgd_t *pgd;
119 pmd_t *pmd;
120 int pgd_idx, pmd_idx; 119 int pgd_idx, pmd_idx;
121 unsigned long vaddr; 120 unsigned long vaddr;
121 pgd_t *pgd;
122 pmd_t *pmd;
122 123
123 vaddr = start; 124 vaddr = start;
124 pgd_idx = pgd_index(vaddr); 125 pgd_idx = pgd_index(vaddr);
@@ -128,7 +129,8 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
128 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { 129 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
129 pmd = one_md_table_init(pgd); 130 pmd = one_md_table_init(pgd);
130 pmd = pmd + pmd_index(vaddr); 131 pmd = pmd + pmd_index(vaddr);
131 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { 132 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
133 pmd++, pmd_idx++) {
132 one_page_table_init(pmd); 134 one_page_table_init(pmd);
133 135
134 vaddr += PMD_SIZE; 136 vaddr += PMD_SIZE;
@@ -145,17 +147,17 @@ static inline int is_kernel_text(unsigned long addr)
145} 147}
146 148
147/* 149/*
148 * This maps the physical memory to kernel virtual address space, a total 150 * This maps the physical memory to kernel virtual address space, a total
149 * of max_low_pfn pages, by creating page tables starting from address 151 * of max_low_pfn pages, by creating page tables starting from address
150 * PAGE_OFFSET. 152 * PAGE_OFFSET:
151 */ 153 */
152static void __init kernel_physical_mapping_init(pgd_t *pgd_base) 154static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
153{ 155{
156 int pgd_idx, pmd_idx, pte_ofs;
154 unsigned long pfn; 157 unsigned long pfn;
155 pgd_t *pgd; 158 pgd_t *pgd;
156 pmd_t *pmd; 159 pmd_t *pmd;
157 pte_t *pte; 160 pte_t *pte;
158 int pgd_idx, pmd_idx, pte_ofs;
159 161
160 pgd_idx = pgd_index(PAGE_OFFSET); 162 pgd_idx = pgd_index(PAGE_OFFSET);
161 pgd = pgd_base + pgd_idx; 163 pgd = pgd_base + pgd_idx;
@@ -165,29 +167,43 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
165 pmd = one_md_table_init(pgd); 167 pmd = one_md_table_init(pgd);
166 if (pfn >= max_low_pfn) 168 if (pfn >= max_low_pfn)
167 continue; 169 continue;
168 for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
169 unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
170 170
171 /* Map with big pages if possible, otherwise create normal page tables. */ 171 for (pmd_idx = 0;
172 pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
173 pmd++, pmd_idx++) {
174 unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
175
176 /*
177 * Map with big pages if possible, otherwise
178 * create normal page tables:
179 */
172 if (cpu_has_pse) { 180 if (cpu_has_pse) {
173 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; 181 unsigned int addr2;
174 if (is_kernel_text(address) || is_kernel_text(address2)) 182 pgprot_t prot = PAGE_KERNEL_LARGE;
175 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); 183
176 else 184 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
177 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); 185 PAGE_OFFSET + PAGE_SIZE-1;
186
187 if (is_kernel_text(addr) ||
188 is_kernel_text(addr2))
189 prot = PAGE_KERNEL_LARGE_EXEC;
190
191 set_pmd(pmd, pfn_pmd(pfn, prot));
178 192
179 pfn += PTRS_PER_PTE; 193 pfn += PTRS_PER_PTE;
180 } else { 194 continue;
181 pte = one_page_table_init(pmd); 195 }
182 196 pte = one_page_table_init(pmd);
183 for (pte_ofs = 0; 197
184 pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; 198 for (pte_ofs = 0;
185 pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { 199 pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
186 if (is_kernel_text(address)) 200 pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
187 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 201 pgprot_t prot = PAGE_KERNEL;
188 else 202
189 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); 203 if (is_kernel_text(addr))
190 } 204 prot = PAGE_KERNEL_EXEC;
205
206 set_pte(pte, pfn_pte(pfn, prot));
191 } 207 }
192 } 208 }
193 } 209 }
@@ -200,57 +216,23 @@ static inline int page_kills_ppro(unsigned long pagenr)
200 return 0; 216 return 0;
201} 217}
202 218
203int page_is_ram(unsigned long pagenr)
204{
205 int i;
206 unsigned long addr, end;
207
208 if (efi_enabled) {
209 efi_memory_desc_t *md;
210 void *p;
211
212 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
213 md = p;
214 if (!is_available_memory(md))
215 continue;
216 addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
217 end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
218
219 if ((pagenr >= addr) && (pagenr < end))
220 return 1;
221 }
222 return 0;
223 }
224
225 for (i = 0; i < e820.nr_map; i++) {
226
227 if (e820.map[i].type != E820_RAM) /* not usable memory */
228 continue;
229 /*
230 * !!!FIXME!!! Some BIOSen report areas as RAM that
231 * are not. Notably the 640->1Mb area. We need a sanity
232 * check here.
233 */
234 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
235 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
236 if ((pagenr >= addr) && (pagenr < end))
237 return 1;
238 }
239 return 0;
240}
241
242#ifdef CONFIG_HIGHMEM 219#ifdef CONFIG_HIGHMEM
243pte_t *kmap_pte; 220pte_t *kmap_pte;
244pgprot_t kmap_prot; 221pgprot_t kmap_prot;
245 222
246#define kmap_get_fixmap_pte(vaddr) \ 223static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
247 pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) 224{
225 return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
226 vaddr), vaddr), vaddr);
227}
248 228
249static void __init kmap_init(void) 229static void __init kmap_init(void)
250{ 230{
251 unsigned long kmap_vstart; 231 unsigned long kmap_vstart;
252 232
253 /* cache the first kmap pte */ 233 /*
234 * Cache the first kmap pte:
235 */
254 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); 236 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
255 kmap_pte = kmap_get_fixmap_pte(kmap_vstart); 237 kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
256 238
@@ -259,11 +241,11 @@ static void __init kmap_init(void)
259 241
260static void __init permanent_kmaps_init(pgd_t *pgd_base) 242static void __init permanent_kmaps_init(pgd_t *pgd_base)
261{ 243{
244 unsigned long vaddr;
262 pgd_t *pgd; 245 pgd_t *pgd;
263 pud_t *pud; 246 pud_t *pud;
264 pmd_t *pmd; 247 pmd_t *pmd;
265 pte_t *pte; 248 pte_t *pte;
266 unsigned long vaddr;
267 249
268 vaddr = PKMAP_BASE; 250 vaddr = PKMAP_BASE;
269 page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); 251 page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
@@ -272,7 +254,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
272 pud = pud_offset(pgd, vaddr); 254 pud = pud_offset(pgd, vaddr);
273 pmd = pmd_offset(pud, vaddr); 255 pmd = pmd_offset(pud, vaddr);
274 pte = pte_offset_kernel(pmd, vaddr); 256 pte = pte_offset_kernel(pmd, vaddr);
275 pkmap_page_table = pte; 257 pkmap_page_table = pte;
276} 258}
277 259
278static void __meminit free_new_highpage(struct page *page) 260static void __meminit free_new_highpage(struct page *page)
@@ -291,7 +273,8 @@ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
291 SetPageReserved(page); 273 SetPageReserved(page);
292} 274}
293 275
294static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) 276static int __meminit
277add_one_highpage_hotplug(struct page *page, unsigned long pfn)
295{ 278{
296 free_new_highpage(page); 279 free_new_highpage(page);
297 totalram_pages++; 280 totalram_pages++;
@@ -299,6 +282,7 @@ static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long p
299 max_mapnr = max(pfn, max_mapnr); 282 max_mapnr = max(pfn, max_mapnr);
300#endif 283#endif
301 num_physpages++; 284 num_physpages++;
285
302 return 0; 286 return 0;
303} 287}
304 288
@@ -306,7 +290,7 @@ static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long p
306 * Not currently handling the NUMA case. 290 * Not currently handling the NUMA case.
307 * Assuming single node and all memory that 291 * Assuming single node and all memory that
308 * has been added dynamically that would be 292 * has been added dynamically that would be
309 * onlined here is in HIGHMEM 293 * onlined here is in HIGHMEM.
310 */ 294 */
311void __meminit online_page(struct page *page) 295void __meminit online_page(struct page *page)
312{ 296{
@@ -314,13 +298,11 @@ void __meminit online_page(struct page *page)
314 add_one_highpage_hotplug(page, page_to_pfn(page)); 298 add_one_highpage_hotplug(page, page_to_pfn(page));
315} 299}
316 300
317 301#ifndef CONFIG_NUMA
318#ifdef CONFIG_NUMA
319extern void set_highmem_pages_init(int);
320#else
321static void __init set_highmem_pages_init(int bad_ppro) 302static void __init set_highmem_pages_init(int bad_ppro)
322{ 303{
323 int pfn; 304 int pfn;
305
324 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { 306 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
325 /* 307 /*
326 * Holes under sparsemem might not have no mem_map[]: 308 * Holes under sparsemem might not have no mem_map[]:
@@ -330,23 +312,18 @@ static void __init set_highmem_pages_init(int bad_ppro)
330 } 312 }
331 totalram_pages += totalhigh_pages; 313 totalram_pages += totalhigh_pages;
332} 314}
333#endif /* CONFIG_FLATMEM */ 315#endif /* !CONFIG_NUMA */
334 316
335#else 317#else
336#define kmap_init() do { } while (0) 318# define kmap_init() do { } while (0)
337#define permanent_kmaps_init(pgd_base) do { } while (0) 319# define permanent_kmaps_init(pgd_base) do { } while (0)
338#define set_highmem_pages_init(bad_ppro) do { } while (0) 320# define set_highmem_pages_init(bad_ppro) do { } while (0)
339#endif /* CONFIG_HIGHMEM */ 321#endif /* CONFIG_HIGHMEM */
340 322
341unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; 323pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
342EXPORT_SYMBOL(__PAGE_KERNEL); 324EXPORT_SYMBOL(__PAGE_KERNEL);
343unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
344 325
345#ifdef CONFIG_NUMA 326pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
346extern void __init remap_numa_kva(void);
347#else
348#define remap_numa_kva() do {} while (0)
349#endif
350 327
351void __init native_pagetable_setup_start(pgd_t *base) 328void __init native_pagetable_setup_start(pgd_t *base)
352{ 329{
@@ -372,7 +349,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
372 memset(&base[USER_PTRS_PER_PGD], 0, 349 memset(&base[USER_PTRS_PER_PGD], 0,
373 KERNEL_PGD_PTRS * sizeof(pgd_t)); 350 KERNEL_PGD_PTRS * sizeof(pgd_t));
374#else 351#else
375 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); 352 paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
376#endif 353#endif
377} 354}
378 355
@@ -410,10 +387,10 @@ void __init native_pagetable_setup_done(pgd_t *base)
410 * be partially populated, and so it avoids stomping on any existing 387 * be partially populated, and so it avoids stomping on any existing
411 * mappings. 388 * mappings.
412 */ 389 */
413static void __init pagetable_init (void) 390static void __init pagetable_init(void)
414{ 391{
415 unsigned long vaddr, end;
416 pgd_t *pgd_base = swapper_pg_dir; 392 pgd_t *pgd_base = swapper_pg_dir;
393 unsigned long vaddr, end;
417 394
418 paravirt_pagetable_setup_start(pgd_base); 395 paravirt_pagetable_setup_start(pgd_base);
419 396
@@ -435,9 +412,11 @@ static void __init pagetable_init (void)
435 * Fixed mappings, only the page table structure has to be 412 * Fixed mappings, only the page table structure has to be
436 * created - mappings will be set by set_fixmap(): 413 * created - mappings will be set by set_fixmap():
437 */ 414 */
415 early_ioremap_clear();
438 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 416 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
439 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; 417 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
440 page_table_range_init(vaddr, end, pgd_base); 418 page_table_range_init(vaddr, end, pgd_base);
419 early_ioremap_reset();
441 420
442 permanent_kmaps_init(pgd_base); 421 permanent_kmaps_init(pgd_base);
443 422
@@ -450,7 +429,7 @@ static void __init pagetable_init (void)
450 * driver might have split up a kernel 4MB mapping. 429 * driver might have split up a kernel 4MB mapping.
451 */ 430 */
452char __nosavedata swsusp_pg_dir[PAGE_SIZE] 431char __nosavedata swsusp_pg_dir[PAGE_SIZE]
453 __attribute__ ((aligned (PAGE_SIZE))); 432 __attribute__ ((aligned(PAGE_SIZE)));
454 433
455static inline void save_pg_dir(void) 434static inline void save_pg_dir(void)
456{ 435{
@@ -462,7 +441,7 @@ static inline void save_pg_dir(void)
462} 441}
463#endif 442#endif
464 443
465void zap_low_mappings (void) 444void zap_low_mappings(void)
466{ 445{
467 int i; 446 int i;
468 447
@@ -474,22 +453,24 @@ void zap_low_mappings (void)
474 * Note that "pgd_clear()" doesn't do it for 453 * Note that "pgd_clear()" doesn't do it for
475 * us, because pgd_clear() is a no-op on i386. 454 * us, because pgd_clear() is a no-op on i386.
476 */ 455 */
477 for (i = 0; i < USER_PTRS_PER_PGD; i++) 456 for (i = 0; i < USER_PTRS_PER_PGD; i++) {
478#ifdef CONFIG_X86_PAE 457#ifdef CONFIG_X86_PAE
479 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 458 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
480#else 459#else
481 set_pgd(swapper_pg_dir+i, __pgd(0)); 460 set_pgd(swapper_pg_dir+i, __pgd(0));
482#endif 461#endif
462 }
483 flush_tlb_all(); 463 flush_tlb_all();
484} 464}
485 465
486int nx_enabled = 0; 466int nx_enabled;
467
468pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
469EXPORT_SYMBOL_GPL(__supported_pte_mask);
487 470
488#ifdef CONFIG_X86_PAE 471#ifdef CONFIG_X86_PAE
489 472
490static int disable_nx __initdata = 0; 473static int disable_nx __initdata;
491u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
492EXPORT_SYMBOL_GPL(__supported_pte_mask);
493 474
494/* 475/*
495 * noexec = on|off 476 * noexec = on|off
@@ -506,11 +487,14 @@ static int __init noexec_setup(char *str)
506 __supported_pte_mask |= _PAGE_NX; 487 __supported_pte_mask |= _PAGE_NX;
507 disable_nx = 0; 488 disable_nx = 0;
508 } 489 }
509 } else if (!strcmp(str,"off")) { 490 } else {
510 disable_nx = 1; 491 if (!strcmp(str, "off")) {
511 __supported_pte_mask &= ~_PAGE_NX; 492 disable_nx = 1;
512 } else 493 __supported_pte_mask &= ~_PAGE_NX;
513 return -EINVAL; 494 } else {
495 return -EINVAL;
496 }
497 }
514 498
515 return 0; 499 return 0;
516} 500}
@@ -522,6 +506,7 @@ static void __init set_nx(void)
522 506
523 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { 507 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
524 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); 508 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
509
525 if ((v[3] & (1 << 20)) && !disable_nx) { 510 if ((v[3] & (1 << 20)) && !disable_nx) {
526 rdmsr(MSR_EFER, l, h); 511 rdmsr(MSR_EFER, l, h);
527 l |= EFER_NX; 512 l |= EFER_NX;
@@ -531,35 +516,6 @@ static void __init set_nx(void)
531 } 516 }
532 } 517 }
533} 518}
534
535/*
536 * Enables/disables executability of a given kernel page and
537 * returns the previous setting.
538 */
539int __init set_kernel_exec(unsigned long vaddr, int enable)
540{
541 pte_t *pte;
542 int ret = 1;
543
544 if (!nx_enabled)
545 goto out;
546
547 pte = lookup_address(vaddr);
548 BUG_ON(!pte);
549
550 if (!pte_exec_kernel(*pte))
551 ret = 0;
552
553 if (enable)
554 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
555 else
556 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
557 pte_update_defer(&init_mm, vaddr, pte);
558 __flush_tlb_all();
559out:
560 return ret;
561}
562
563#endif 519#endif
564 520
565/* 521/*
@@ -574,9 +530,8 @@ void __init paging_init(void)
574#ifdef CONFIG_X86_PAE 530#ifdef CONFIG_X86_PAE
575 set_nx(); 531 set_nx();
576 if (nx_enabled) 532 if (nx_enabled)
577 printk("NX (Execute Disable) protection: active\n"); 533 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
578#endif 534#endif
579
580 pagetable_init(); 535 pagetable_init();
581 536
582 load_cr3(swapper_pg_dir); 537 load_cr3(swapper_pg_dir);
@@ -600,10 +555,10 @@ void __init paging_init(void)
600 * used to involve black magic jumps to work around some nasty CPU bugs, 555 * used to involve black magic jumps to work around some nasty CPU bugs,
601 * but fortunately the switch to using exceptions got rid of all that. 556 * but fortunately the switch to using exceptions got rid of all that.
602 */ 557 */
603
604static void __init test_wp_bit(void) 558static void __init test_wp_bit(void)
605{ 559{
606 printk("Checking if this processor honours the WP bit even in supervisor mode... "); 560 printk(KERN_INFO
561 "Checking if this processor honours the WP bit even in supervisor mode...");
607 562
608 /* Any page-aligned address will do, the test is non-destructive */ 563 /* Any page-aligned address will do, the test is non-destructive */
609 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); 564 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
@@ -611,47 +566,46 @@ static void __init test_wp_bit(void)
611 clear_fixmap(FIX_WP_TEST); 566 clear_fixmap(FIX_WP_TEST);
612 567
613 if (!boot_cpu_data.wp_works_ok) { 568 if (!boot_cpu_data.wp_works_ok) {
614 printk("No.\n"); 569 printk(KERN_CONT "No.\n");
615#ifdef CONFIG_X86_WP_WORKS_OK 570#ifdef CONFIG_X86_WP_WORKS_OK
616 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); 571 panic(
572 "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
617#endif 573#endif
618 } else { 574 } else {
619 printk("Ok.\n"); 575 printk(KERN_CONT "Ok.\n");
620 } 576 }
621} 577}
622 578
623static struct kcore_list kcore_mem, kcore_vmalloc; 579static struct kcore_list kcore_mem, kcore_vmalloc;
624 580
625void __init mem_init(void) 581void __init mem_init(void)
626{ 582{
627 extern int ppro_with_ram_bug(void);
628 int codesize, reservedpages, datasize, initsize; 583 int codesize, reservedpages, datasize, initsize;
629 int tmp; 584 int tmp, bad_ppro;
630 int bad_ppro;
631 585
632#ifdef CONFIG_FLATMEM 586#ifdef CONFIG_FLATMEM
633 BUG_ON(!mem_map); 587 BUG_ON(!mem_map);
634#endif 588#endif
635
636 bad_ppro = ppro_with_ram_bug(); 589 bad_ppro = ppro_with_ram_bug();
637 590
638#ifdef CONFIG_HIGHMEM 591#ifdef CONFIG_HIGHMEM
639 /* check that fixmap and pkmap do not overlap */ 592 /* check that fixmap and pkmap do not overlap */
640 if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { 593 if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
641 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); 594 printk(KERN_ERR
595 "fixmap and kmap areas overlap - this will crash\n");
642 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", 596 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
643 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); 597 PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
598 FIXADDR_START);
644 BUG(); 599 BUG();
645 } 600 }
646#endif 601#endif
647
648 /* this will put all low memory onto the freelists */ 602 /* this will put all low memory onto the freelists */
649 totalram_pages += free_all_bootmem(); 603 totalram_pages += free_all_bootmem();
650 604
651 reservedpages = 0; 605 reservedpages = 0;
652 for (tmp = 0; tmp < max_low_pfn; tmp++) 606 for (tmp = 0; tmp < max_low_pfn; tmp++)
653 /* 607 /*
654 * Only count reserved RAM pages 608 * Only count reserved RAM pages:
655 */ 609 */
656 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 610 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
657 reservedpages++; 611 reservedpages++;
@@ -662,11 +616,12 @@ void __init mem_init(void)
662 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 616 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
663 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 617 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
664 618
665 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 619 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
666 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 620 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
667 VMALLOC_END-VMALLOC_START); 621 VMALLOC_END-VMALLOC_START);
668 622
669 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", 623 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
624 "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
670 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 625 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
671 num_physpages << (PAGE_SHIFT-10), 626 num_physpages << (PAGE_SHIFT-10),
672 codesize >> 10, 627 codesize >> 10,
@@ -677,45 +632,46 @@ void __init mem_init(void)
677 ); 632 );
678 633
679#if 1 /* double-sanity-check paranoia */ 634#if 1 /* double-sanity-check paranoia */
680 printk("virtual kernel memory layout:\n" 635 printk(KERN_INFO "virtual kernel memory layout:\n"
681 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 636 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
682#ifdef CONFIG_HIGHMEM 637#ifdef CONFIG_HIGHMEM
683 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 638 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
684#endif 639#endif
685 " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" 640 " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
686 " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" 641 " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
687 " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" 642 " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
688 " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" 643 " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
689 " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", 644 " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
690 FIXADDR_START, FIXADDR_TOP, 645 FIXADDR_START, FIXADDR_TOP,
691 (FIXADDR_TOP - FIXADDR_START) >> 10, 646 (FIXADDR_TOP - FIXADDR_START) >> 10,
692 647
693#ifdef CONFIG_HIGHMEM 648#ifdef CONFIG_HIGHMEM
694 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 649 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
695 (LAST_PKMAP*PAGE_SIZE) >> 10, 650 (LAST_PKMAP*PAGE_SIZE) >> 10,
696#endif 651#endif
697 652
698 VMALLOC_START, VMALLOC_END, 653 VMALLOC_START, VMALLOC_END,
699 (VMALLOC_END - VMALLOC_START) >> 20, 654 (VMALLOC_END - VMALLOC_START) >> 20,
700 655
701 (unsigned long)__va(0), (unsigned long)high_memory, 656 (unsigned long)__va(0), (unsigned long)high_memory,
702 ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, 657 ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
703 658
704 (unsigned long)&__init_begin, (unsigned long)&__init_end, 659 (unsigned long)&__init_begin, (unsigned long)&__init_end,
705 ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, 660 ((unsigned long)&__init_end -
661 (unsigned long)&__init_begin) >> 10,
706 662
707 (unsigned long)&_etext, (unsigned long)&_edata, 663 (unsigned long)&_etext, (unsigned long)&_edata,
708 ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, 664 ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
709 665
710 (unsigned long)&_text, (unsigned long)&_etext, 666 (unsigned long)&_text, (unsigned long)&_etext,
711 ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 667 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
712 668
713#ifdef CONFIG_HIGHMEM 669#ifdef CONFIG_HIGHMEM
714 BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); 670 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
715 BUG_ON(VMALLOC_END > PKMAP_BASE); 671 BUG_ON(VMALLOC_END > PKMAP_BASE);
716#endif 672#endif
717 BUG_ON(VMALLOC_START > VMALLOC_END); 673 BUG_ON(VMALLOC_START > VMALLOC_END);
718 BUG_ON((unsigned long)high_memory > VMALLOC_START); 674 BUG_ON((unsigned long)high_memory > VMALLOC_START);
719#endif /* double-sanity-check paranoia */ 675#endif /* double-sanity-check paranoia */
720 676
721#ifdef CONFIG_X86_PAE 677#ifdef CONFIG_X86_PAE
@@ -746,49 +702,38 @@ int arch_add_memory(int nid, u64 start, u64 size)
746 702
747 return __add_pages(zone, start_pfn, nr_pages); 703 return __add_pages(zone, start_pfn, nr_pages);
748} 704}
749
750#endif 705#endif
751 706
752struct kmem_cache *pmd_cache;
753
754void __init pgtable_cache_init(void)
755{
756 if (PTRS_PER_PMD > 1)
757 pmd_cache = kmem_cache_create("pmd",
758 PTRS_PER_PMD*sizeof(pmd_t),
759 PTRS_PER_PMD*sizeof(pmd_t),
760 SLAB_PANIC,
761 pmd_ctor);
762}
763
764/* 707/*
765 * This function cannot be __init, since exceptions don't work in that 708 * This function cannot be __init, since exceptions don't work in that
766 * section. Put this after the callers, so that it cannot be inlined. 709 * section. Put this after the callers, so that it cannot be inlined.
767 */ 710 */
768static int noinline do_test_wp_bit(void) 711static noinline int do_test_wp_bit(void)
769{ 712{
770 char tmp_reg; 713 char tmp_reg;
771 int flag; 714 int flag;
772 715
773 __asm__ __volatile__( 716 __asm__ __volatile__(
774 " movb %0,%1 \n" 717 " movb %0, %1 \n"
775 "1: movb %1,%0 \n" 718 "1: movb %1, %0 \n"
776 " xorl %2,%2 \n" 719 " xorl %2, %2 \n"
777 "2: \n" 720 "2: \n"
778 ".section __ex_table,\"a\"\n" 721 ".section __ex_table, \"a\"\n"
779 " .align 4 \n" 722 " .align 4 \n"
780 " .long 1b,2b \n" 723 " .long 1b, 2b \n"
781 ".previous \n" 724 ".previous \n"
782 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), 725 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
783 "=q" (tmp_reg), 726 "=q" (tmp_reg),
784 "=r" (flag) 727 "=r" (flag)
785 :"2" (1) 728 :"2" (1)
786 :"memory"); 729 :"memory");
787 730
788 return flag; 731 return flag;
789} 732}
790 733
791#ifdef CONFIG_DEBUG_RODATA 734#ifdef CONFIG_DEBUG_RODATA
735const int rodata_test_data = 0xC3;
736EXPORT_SYMBOL_GPL(rodata_test_data);
792 737
793void mark_rodata_ro(void) 738void mark_rodata_ro(void)
794{ 739{
@@ -801,32 +746,58 @@ void mark_rodata_ro(void)
801 if (num_possible_cpus() <= 1) 746 if (num_possible_cpus() <= 1)
802#endif 747#endif
803 { 748 {
804 change_page_attr(virt_to_page(start), 749 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
805 size >> PAGE_SHIFT, PAGE_KERNEL_RX); 750 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
806 printk("Write protecting the kernel text: %luk\n", size >> 10); 751 size >> 10);
752
753#ifdef CONFIG_CPA_DEBUG
754 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
755 start, start+size);
756 set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
757
758 printk(KERN_INFO "Testing CPA: write protecting again\n");
759 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
760#endif
807 } 761 }
808#endif 762#endif
809 start += size; 763 start += size;
810 size = (unsigned long)__end_rodata - start; 764 size = (unsigned long)__end_rodata - start;
811 change_page_attr(virt_to_page(start), 765 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
812 size >> PAGE_SHIFT, PAGE_KERNEL_RO); 766 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
813 printk("Write protecting the kernel read-only data: %luk\n", 767 size >> 10);
814 size >> 10); 768 rodata_test();
815 769
816 /* 770#ifdef CONFIG_CPA_DEBUG
817 * change_page_attr() requires a global_flush_tlb() call after it. 771 printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
818 * We do this after the printk so that if something went wrong in the 772 set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
819 * change, the printk gets out at least to give a better debug hint 773
820 * of who is the culprit. 774 printk(KERN_INFO "Testing CPA: write protecting again\n");
821 */ 775 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
822 global_flush_tlb(); 776#endif
823} 777}
824#endif 778#endif
825 779
826void free_init_pages(char *what, unsigned long begin, unsigned long end) 780void free_init_pages(char *what, unsigned long begin, unsigned long end)
827{ 781{
782#ifdef CONFIG_DEBUG_PAGEALLOC
783 /*
784 * If debugging page accesses then do not free this memory but
785 * mark them not present - any buggy init-section access will
786 * create a kernel page fault:
787 */
788 printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
789 begin, PAGE_ALIGN(end));
790 set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
791#else
828 unsigned long addr; 792 unsigned long addr;
829 793
794 /*
795 * We just marked the kernel text read only above, now that
796 * we are going to free part of that, we need to make that
797 * writeable first.
798 */
799 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
800
830 for (addr = begin; addr < end; addr += PAGE_SIZE) { 801 for (addr = begin; addr < end; addr += PAGE_SIZE) {
831 ClearPageReserved(virt_to_page(addr)); 802 ClearPageReserved(virt_to_page(addr));
832 init_page_count(virt_to_page(addr)); 803 init_page_count(virt_to_page(addr));
@@ -835,6 +806,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
835 totalram_pages++; 806 totalram_pages++;
836 } 807 }
837 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 808 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
809#endif
838} 810}
839 811
840void free_initmem(void) 812void free_initmem(void)
@@ -850,4 +822,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
850 free_init_pages("initrd memory", start, end); 822 free_init_pages("initrd memory", start, end);
851} 823}
852#endif 824#endif
853