aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /arch/x86/mm
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/extable.c31
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/hugetlbpage.c1
-rw-r--r--arch/x86/mm/init.c44
-rw-r--r--arch/x86/mm/init_32.c30
-rw-r--r--arch/x86/mm/init_64.c64
-rw-r--r--arch/x86/mm/ioremap.c81
-rw-r--r--arch/x86/mm/k8topology_64.c101
-rw-r--r--arch/x86/mm/kmemcheck/error.c19
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c16
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h2
-rw-r--r--arch/x86/mm/kmmio.c58
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/mmio-mod.c72
-rw-r--r--arch/x86/mm/numa_32.c7
-rw-r--r--arch/x86/mm/numa_64.c506
-rw-r--r--arch/x86/mm/pageattr.c45
-rw-r--r--arch/x86/mm/pat.c25
-rw-r--r--arch/x86/mm/pgtable.c32
-rw-r--r--arch/x86/mm/pgtable_32.c3
-rw-r--r--arch/x86/mm/setup_nx.c59
-rw-r--r--arch/x86/mm/srat_32.c2
-rw-r--r--arch/x86/mm/srat_64.c44
-rw-r--r--arch/x86/mm/testmmiotrace.c29
-rw-r--r--arch/x86/mm/tlb.c11
27 files changed, 775 insertions, 528 deletions
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 61b41ca3b5a2..d0474ad2a6e5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs)
35 35
36 return 0; 36 return 0;
37} 37}
38
39#ifdef CONFIG_X86_64
40/*
41 * Need to defined our own search_extable on X86_64 to work around
42 * a B stepping K8 bug.
43 */
44const struct exception_table_entry *
45search_extable(const struct exception_table_entry *first,
46 const struct exception_table_entry *last,
47 unsigned long value)
48{
49 /* B stepping K8 bug */
50 if ((value >> 32) == 0)
51 value |= 0xffffffffUL << 32;
52
53 while (first <= last) {
54 const struct exception_table_entry *mid;
55 long diff;
56
57 mid = (last - first) / 2 + first;
58 diff = mid->insn - value;
59 if (diff == 0)
60 return mid;
61 else if (diff < 0)
62 first = mid+1;
63 else
64 last = mid-1;
65 }
66 return NULL;
67}
68#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f4cee9028cf0..f62777940dfb 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
38 * Returns 0 if mmiotrace is disabled, or if the fault is not 38 * Returns 0 if mmiotrace is disabled, or if the fault is not
39 * handled by mmiotrace: 39 * handled by mmiotrace:
40 */ 40 */
41static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 41static inline int __kprobes
42kmmio_fault(struct pt_regs *regs, unsigned long addr)
42{ 43{
43 if (unlikely(is_kmmio_active())) 44 if (unlikely(is_kmmio_active()))
44 if (kmmio_handler(regs, addr) == 1) 45 if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
46 return 0; 47 return 0;
47} 48}
48 49
49static inline int notify_page_fault(struct pt_regs *regs) 50static inline int __kprobes notify_page_fault(struct pt_regs *regs)
50{ 51{
51 int ret = 0; 52 int ret = 0;
52 53
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void)
240 * 241 *
241 * Handle a fault on the vmalloc or module mapping area 242 * Handle a fault on the vmalloc or module mapping area
242 */ 243 */
243static noinline int vmalloc_fault(unsigned long address) 244static noinline __kprobes int vmalloc_fault(unsigned long address)
244{ 245{
245 unsigned long pgd_paddr; 246 unsigned long pgd_paddr;
246 pmd_t *pmd_k; 247 pmd_t *pmd_k;
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void)
357 * 358 *
358 * This assumes no large pages in there. 359 * This assumes no large pages in there.
359 */ 360 */
360static noinline int vmalloc_fault(unsigned long address) 361static noinline __kprobes int vmalloc_fault(unsigned long address)
361{ 362{
362 pgd_t *pgd, *pgd_ref; 363 pgd_t *pgd, *pgd_ref;
363 pud_t *pud, *pud_ref; 364 pud_t *pud, *pud_ref;
@@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
658 show_fault_oops(regs, error_code, address); 659 show_fault_oops(regs, error_code, address);
659 660
660 stackend = end_of_stack(tsk); 661 stackend = end_of_stack(tsk);
661 if (*stackend != STACK_END_MAGIC) 662 if (tsk != &init_task && *stackend != STACK_END_MAGIC)
662 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); 663 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
663 664
664 tsk->thread.cr2 = address; 665 tsk->thread.cr2 = address;
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
860 * There are no security implications to leaving a stale TLB when 861 * There are no security implications to leaving a stale TLB when
861 * increasing the permissions on a page. 862 * increasing the permissions on a page.
862 */ 863 */
863static noinline int 864static noinline __kprobes int
864spurious_fault(unsigned long error_code, unsigned long address) 865spurious_fault(unsigned long error_code, unsigned long address)
865{ 866{
866 pgd_t *pgd; 867 pgd_t *pgd;
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 71da1bca13cb..738e6593799d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
18#else 18#else
19 /* 19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking 20 * With get_user_pages_fast, we walk down the pagetables without taking
21 * any locks. For this we would like to load the pointers atoimcally, 21 * any locks. For this we would like to load the pointers atomically,
22 * but that is not possible (without expensive cmpxchg8b) on PAE. What 22 * but that is not possible (without expensive cmpxchg8b) on PAE. What
23 * we do have is the guarantee that a pte will only either go from not 23 * we do have is the guarantee that a pte will only either go from not
24 * present to present, or present to not present or both -- it will not 24 * present to present, or present to not present or both -- it will not
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f46c340727b8..069ce7c37c01 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -9,7 +9,6 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/hugetlb.h> 10#include <linux/hugetlb.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/slab.h>
13#include <linux/err.h> 12#include <linux/err.h>
14#include <linux/sysctl.h> 13#include <linux/sysctl.h>
15#include <asm/mman.h> 14#include <asm/mman.h>
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 73ffd5536f62..b278535b14aa 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1,3 +1,4 @@
1#include <linux/gfp.h>
1#include <linux/initrd.h> 2#include <linux/initrd.h>
2#include <linux/ioport.h> 3#include <linux/ioport.h>
3#include <linux/swap.h> 4#include <linux/swap.h>
@@ -146,10 +147,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
146 use_gbpages = direct_gbpages; 147 use_gbpages = direct_gbpages;
147#endif 148#endif
148 149
149 set_nx();
150 if (nx_enabled)
151 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
152
153 /* Enable PSE if available */ 150 /* Enable PSE if available */
154 if (cpu_has_pse) 151 if (cpu_has_pse)
155 set_in_cr4(X86_CR4_PSE); 152 set_in_cr4(X86_CR4_PSE);
@@ -270,16 +267,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
270 if (!after_bootmem) 267 if (!after_bootmem)
271 find_early_table_space(end, use_pse, use_gbpages); 268 find_early_table_space(end, use_pse, use_gbpages);
272 269
273#ifdef CONFIG_X86_32
274 for (i = 0; i < nr_range; i++)
275 kernel_physical_mapping_init(mr[i].start, mr[i].end,
276 mr[i].page_size_mask);
277 ret = end;
278#else /* CONFIG_X86_64 */
279 for (i = 0; i < nr_range; i++) 270 for (i = 0; i < nr_range; i++)
280 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, 271 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
281 mr[i].page_size_mask); 272 mr[i].page_size_mask);
282#endif
283 273
284#ifdef CONFIG_X86_32 274#ifdef CONFIG_X86_32
285 early_ioremap_page_table_range_init(); 275 early_ioremap_page_table_range_init();
@@ -342,11 +332,23 @@ int devmem_is_allowed(unsigned long pagenr)
342 332
343void free_init_pages(char *what, unsigned long begin, unsigned long end) 333void free_init_pages(char *what, unsigned long begin, unsigned long end)
344{ 334{
345 unsigned long addr = begin; 335 unsigned long addr;
336 unsigned long begin_aligned, end_aligned;
337
338 /* Make sure boundaries are page aligned */
339 begin_aligned = PAGE_ALIGN(begin);
340 end_aligned = end & PAGE_MASK;
346 341
347 if (addr >= end) 342 if (WARN_ON(begin_aligned != begin || end_aligned != end)) {
343 begin = begin_aligned;
344 end = end_aligned;
345 }
346
347 if (begin >= end)
348 return; 348 return;
349 349
350 addr = begin;
351
350 /* 352 /*
351 * If debugging page accesses then do not free this memory but 353 * If debugging page accesses then do not free this memory but
352 * mark them not present - any buggy init-section access will 354 * mark them not present - any buggy init-section access will
@@ -354,7 +356,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
354 */ 356 */
355#ifdef CONFIG_DEBUG_PAGEALLOC 357#ifdef CONFIG_DEBUG_PAGEALLOC
356 printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", 358 printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
357 begin, PAGE_ALIGN(end)); 359 begin, end);
358 set_memory_np(begin, (end - begin) >> PAGE_SHIFT); 360 set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
359#else 361#else
360 /* 362 /*
@@ -369,8 +371,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
369 for (; addr < end; addr += PAGE_SIZE) { 371 for (; addr < end; addr += PAGE_SIZE) {
370 ClearPageReserved(virt_to_page(addr)); 372 ClearPageReserved(virt_to_page(addr));
371 init_page_count(virt_to_page(addr)); 373 init_page_count(virt_to_page(addr));
372 memset((void *)(addr & ~(PAGE_SIZE-1)), 374 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
373 POISON_FREE_INITMEM, PAGE_SIZE);
374 free_page(addr); 375 free_page(addr);
375 totalram_pages++; 376 totalram_pages++;
376 } 377 }
@@ -387,6 +388,15 @@ void free_initmem(void)
387#ifdef CONFIG_BLK_DEV_INITRD 388#ifdef CONFIG_BLK_DEV_INITRD
388void free_initrd_mem(unsigned long start, unsigned long end) 389void free_initrd_mem(unsigned long start, unsigned long end)
389{ 390{
390 free_init_pages("initrd memory", start, end); 391 /*
392 * end could be not aligned, and We can not align that,
393 * decompresser could be confused by aligned initrd_end
394 * We already reserve the end partial page before in
395 * - i386_start_kernel()
396 * - x86_64_start_kernel()
397 * - relocate_initrd()
398 * So here We can do PAGE_ALIGN() safely to get partial page to be freed
399 */
400 free_init_pages("initrd memory", start, PAGE_ALIGN(end));
391} 401}
392#endif 402#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 30938c1d8d5d..bca79091b9d6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -25,11 +25,11 @@
25#include <linux/pfn.h> 25#include <linux/pfn.h>
26#include <linux/poison.h> 26#include <linux/poison.h>
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/slab.h>
29#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
30#include <linux/memory_hotplug.h> 29#include <linux/memory_hotplug.h>
31#include <linux/initrd.h> 30#include <linux/initrd.h>
32#include <linux/cpumask.h> 31#include <linux/cpumask.h>
32#include <linux/gfp.h>
33 33
34#include <asm/asm.h> 34#include <asm/asm.h>
35#include <asm/bios_ebda.h> 35#include <asm/bios_ebda.h>
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start,
241 unsigned long page_size_mask) 241 unsigned long page_size_mask)
242{ 242{
243 int use_pse = page_size_mask == (1<<PG_LEVEL_2M); 243 int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
244 unsigned long last_map_addr = end;
244 unsigned long start_pfn, end_pfn; 245 unsigned long start_pfn, end_pfn;
245 pgd_t *pgd_base = swapper_pg_dir; 246 pgd_t *pgd_base = swapper_pg_dir;
246 int pgd_idx, pmd_idx, pte_ofs; 247 int pgd_idx, pmd_idx, pte_ofs;
@@ -341,9 +342,10 @@ repeat:
341 prot = PAGE_KERNEL_EXEC; 342 prot = PAGE_KERNEL_EXEC;
342 343
343 pages_4k++; 344 pages_4k++;
344 if (mapping_iter == 1) 345 if (mapping_iter == 1) {
345 set_pte(pte, pfn_pte(pfn, init_prot)); 346 set_pte(pte, pfn_pte(pfn, init_prot));
346 else 347 last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
348 } else
347 set_pte(pte, pfn_pte(pfn, prot)); 349 set_pte(pte, pfn_pte(pfn, prot));
348 } 350 }
349 } 351 }
@@ -368,7 +370,7 @@ repeat:
368 mapping_iter = 2; 370 mapping_iter = 2;
369 goto repeat; 371 goto repeat;
370 } 372 }
371 return 0; 373 return last_map_addr;
372} 374}
373 375
374pte_t *kmap_pte; 376pte_t *kmap_pte;
@@ -412,7 +414,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
412 pkmap_page_table = pte; 414 pkmap_page_table = pte;
413} 415}
414 416
415static void __init add_one_highpage_init(struct page *page, int pfn) 417static void __init add_one_highpage_init(struct page *page)
416{ 418{
417 ClearPageReserved(page); 419 ClearPageReserved(page);
418 init_page_count(page); 420 init_page_count(page);
@@ -445,7 +447,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
445 if (!pfn_valid(node_pfn)) 447 if (!pfn_valid(node_pfn))
446 continue; 448 continue;
447 page = pfn_to_page(node_pfn); 449 page = pfn_to_page(node_pfn);
448 add_one_highpage_init(page, node_pfn); 450 add_one_highpage_init(page);
449 } 451 }
450 452
451 return 0; 453 return 0;
@@ -703,8 +705,8 @@ void __init find_low_pfn_range(void)
703} 705}
704 706
705#ifndef CONFIG_NEED_MULTIPLE_NODES 707#ifndef CONFIG_NEED_MULTIPLE_NODES
706void __init initmem_init(unsigned long start_pfn, 708void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
707 unsigned long end_pfn) 709 int acpi, int k8)
708{ 710{
709#ifdef CONFIG_HIGHMEM 711#ifdef CONFIG_HIGHMEM
710 highstart_pfn = highend_pfn = max_pfn; 712 highstart_pfn = highend_pfn = max_pfn;
@@ -748,6 +750,7 @@ static void __init zone_sizes_init(void)
748 free_area_init_nodes(max_zone_pfns); 750 free_area_init_nodes(max_zone_pfns);
749} 751}
750 752
753#ifndef CONFIG_NO_BOOTMEM
751static unsigned long __init setup_node_bootmem(int nodeid, 754static unsigned long __init setup_node_bootmem(int nodeid,
752 unsigned long start_pfn, 755 unsigned long start_pfn,
753 unsigned long end_pfn, 756 unsigned long end_pfn,
@@ -764,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid,
764 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", 767 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
765 nodeid, bootmap, bootmap + bootmap_size); 768 nodeid, bootmap, bootmap + bootmap_size);
766 free_bootmem_with_active_regions(nodeid, end_pfn); 769 free_bootmem_with_active_regions(nodeid, end_pfn);
767 early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
768 770
769 return bootmap + bootmap_size; 771 return bootmap + bootmap_size;
770} 772}
773#endif
771 774
772void __init setup_bootmem_allocator(void) 775void __init setup_bootmem_allocator(void)
773{ 776{
777#ifndef CONFIG_NO_BOOTMEM
774 int nodeid; 778 int nodeid;
775 unsigned long bootmap_size, bootmap; 779 unsigned long bootmap_size, bootmap;
776 /* 780 /*
@@ -782,11 +786,13 @@ void __init setup_bootmem_allocator(void)
782 if (bootmap == -1L) 786 if (bootmap == -1L)
783 panic("Cannot find bootmem map of size %ld\n", bootmap_size); 787 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
784 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); 788 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
789#endif
785 790
786 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 791 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
787 max_pfn_mapped<<PAGE_SHIFT); 792 max_pfn_mapped<<PAGE_SHIFT);
788 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); 793 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
789 794
795#ifndef CONFIG_NO_BOOTMEM
790 for_each_online_node(nodeid) { 796 for_each_online_node(nodeid) {
791 unsigned long start_pfn, end_pfn; 797 unsigned long start_pfn, end_pfn;
792 798
@@ -804,6 +810,7 @@ void __init setup_bootmem_allocator(void)
804 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, 810 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
805 bootmap); 811 bootmap);
806 } 812 }
813#endif
807 814
808 after_bootmem = 1; 815 after_bootmem = 1;
809} 816}
@@ -892,8 +899,7 @@ void __init mem_init(void)
892 reservedpages << (PAGE_SHIFT-10), 899 reservedpages << (PAGE_SHIFT-10),
893 datasize >> 10, 900 datasize >> 10,
894 initsize >> 10, 901 initsize >> 10,
895 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 902 totalhigh_pages << (PAGE_SHIFT-10));
896 );
897 903
898 printk(KERN_INFO "virtual kernel memory layout:\n" 904 printk(KERN_INFO "virtual kernel memory layout:\n"
899 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 905 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
@@ -997,7 +1003,7 @@ static noinline int do_test_wp_bit(void)
997const int rodata_test_data = 0xC3; 1003const int rodata_test_data = 0xC3;
998EXPORT_SYMBOL_GPL(rodata_test_data); 1004EXPORT_SYMBOL_GPL(rodata_test_data);
999 1005
1000static int kernel_set_to_readonly; 1006int kernel_set_to_readonly __read_mostly;
1001 1007
1002void set_kernel_text_rw(void) 1008void set_kernel_text_rw(void)
1003{ 1009{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5a4398a6006b..ee41bba315d1 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -29,6 +29,7 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/memory_hotplug.h> 30#include <linux/memory_hotplug.h>
31#include <linux/nmi.h> 31#include <linux/nmi.h>
32#include <linux/gfp.h>
32 33
33#include <asm/processor.h> 34#include <asm/processor.h>
34#include <asm/bios_ebda.h> 35#include <asm/bios_ebda.h>
@@ -49,6 +50,7 @@
49#include <asm/numa.h> 50#include <asm/numa.h>
50#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
51#include <asm/init.h> 52#include <asm/init.h>
53#include <linux/bootmem.h>
52 54
53static unsigned long dma_reserve __initdata; 55static unsigned long dma_reserve __initdata;
54 56
@@ -568,8 +570,10 @@ kernel_physical_mapping_init(unsigned long start,
568} 570}
569 571
570#ifndef CONFIG_NUMA 572#ifndef CONFIG_NUMA
571void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) 573void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
574 int acpi, int k8)
572{ 575{
576#ifndef CONFIG_NO_BOOTMEM
573 unsigned long bootmap_size, bootmap; 577 unsigned long bootmap_size, bootmap;
574 578
575 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; 579 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -577,13 +581,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
577 PAGE_SIZE); 581 PAGE_SIZE);
578 if (bootmap == -1L) 582 if (bootmap == -1L)
579 panic("Cannot find bootmem map of size %ld\n", bootmap_size); 583 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
584 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
580 /* don't touch min_low_pfn */ 585 /* don't touch min_low_pfn */
581 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, 586 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
582 0, end_pfn); 587 0, end_pfn);
583 e820_register_active_regions(0, start_pfn, end_pfn); 588 e820_register_active_regions(0, start_pfn, end_pfn);
584 free_bootmem_with_active_regions(0, end_pfn); 589 free_bootmem_with_active_regions(0, end_pfn);
585 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); 590#else
586 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); 591 e820_register_active_regions(0, start_pfn, end_pfn);
592#endif
587} 593}
588#endif 594#endif
589 595
@@ -615,6 +621,21 @@ void __init paging_init(void)
615 */ 621 */
616#ifdef CONFIG_MEMORY_HOTPLUG 622#ifdef CONFIG_MEMORY_HOTPLUG
617/* 623/*
624 * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
625 * updating.
626 */
627static void update_end_of_memory_vars(u64 start, u64 size)
628{
629 unsigned long end_pfn = PFN_UP(start + size);
630
631 if (end_pfn > max_pfn) {
632 max_pfn = end_pfn;
633 max_low_pfn = end_pfn;
634 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
635 }
636}
637
638/*
618 * Memory is added always to NORMAL zone. This means you will never get 639 * Memory is added always to NORMAL zone. This means you will never get
619 * additional DMA/DMA32 memory. 640 * additional DMA/DMA32 memory.
620 */ 641 */
@@ -633,6 +654,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
633 ret = __add_pages(nid, zone, start_pfn, nr_pages); 654 ret = __add_pages(nid, zone, start_pfn, nr_pages);
634 WARN_ON_ONCE(ret); 655 WARN_ON_ONCE(ret);
635 656
657 /* update max_pfn, max_low_pfn and high_memory */
658 update_end_of_memory_vars(start, size);
659
636 return ret; 660 return ret;
637} 661}
638EXPORT_SYMBOL_GPL(arch_add_memory); 662EXPORT_SYMBOL_GPL(arch_add_memory);
@@ -694,12 +718,12 @@ void __init mem_init(void)
694const int rodata_test_data = 0xC3; 718const int rodata_test_data = 0xC3;
695EXPORT_SYMBOL_GPL(rodata_test_data); 719EXPORT_SYMBOL_GPL(rodata_test_data);
696 720
697static int kernel_set_to_readonly; 721int kernel_set_to_readonly;
698 722
699void set_kernel_text_rw(void) 723void set_kernel_text_rw(void)
700{ 724{
701 unsigned long start = PFN_ALIGN(_stext); 725 unsigned long start = PFN_ALIGN(_text);
702 unsigned long end = PFN_ALIGN(__start_rodata); 726 unsigned long end = PFN_ALIGN(__stop___ex_table);
703 727
704 if (!kernel_set_to_readonly) 728 if (!kernel_set_to_readonly)
705 return; 729 return;
@@ -707,13 +731,18 @@ void set_kernel_text_rw(void)
707 pr_debug("Set kernel text: %lx - %lx for read write\n", 731 pr_debug("Set kernel text: %lx - %lx for read write\n",
708 start, end); 732 start, end);
709 733
734 /*
735 * Make the kernel identity mapping for text RW. Kernel text
736 * mapping will always be RO. Refer to the comment in
737 * static_protections() in pageattr.c
738 */
710 set_memory_rw(start, (end - start) >> PAGE_SHIFT); 739 set_memory_rw(start, (end - start) >> PAGE_SHIFT);
711} 740}
712 741
713void set_kernel_text_ro(void) 742void set_kernel_text_ro(void)
714{ 743{
715 unsigned long start = PFN_ALIGN(_stext); 744 unsigned long start = PFN_ALIGN(_text);
716 unsigned long end = PFN_ALIGN(__start_rodata); 745 unsigned long end = PFN_ALIGN(__stop___ex_table);
717 746
718 if (!kernel_set_to_readonly) 747 if (!kernel_set_to_readonly)
719 return; 748 return;
@@ -721,14 +750,21 @@ void set_kernel_text_ro(void)
721 pr_debug("Set kernel text: %lx - %lx for read only\n", 750 pr_debug("Set kernel text: %lx - %lx for read only\n",
722 start, end); 751 start, end);
723 752
753 /*
754 * Set the kernel identity mapping for text RO.
755 */
724 set_memory_ro(start, (end - start) >> PAGE_SHIFT); 756 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
725} 757}
726 758
727void mark_rodata_ro(void) 759void mark_rodata_ro(void)
728{ 760{
729 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 761 unsigned long start = PFN_ALIGN(_text);
730 unsigned long rodata_start = 762 unsigned long rodata_start =
731 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 763 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
764 unsigned long end = (unsigned long) &__end_rodata_hpage_align;
765 unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
766 unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
767 unsigned long data_start = (unsigned long) &_sdata;
732 768
733 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 769 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
734 (end - start) >> 10); 770 (end - start) >> 10);
@@ -751,6 +787,14 @@ void mark_rodata_ro(void)
751 printk(KERN_INFO "Testing CPA: again\n"); 787 printk(KERN_INFO "Testing CPA: again\n");
752 set_memory_ro(start, (end-start) >> PAGE_SHIFT); 788 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
753#endif 789#endif
790
791 free_init_pages("unused kernel memory",
792 (unsigned long) page_address(virt_to_page(text_end)),
793 (unsigned long)
794 page_address(virt_to_page(rodata_start)));
795 free_init_pages("unused kernel memory",
796 (unsigned long) page_address(virt_to_page(rodata_end)),
797 (unsigned long) page_address(virt_to_page(data_start)));
754} 798}
755 799
756#endif 800#endif
@@ -934,7 +978,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
934 if (pmd_none(*pmd)) { 978 if (pmd_none(*pmd)) {
935 pte_t entry; 979 pte_t entry;
936 980
937 p = vmemmap_alloc_block(PMD_SIZE, node); 981 p = vmemmap_alloc_block_buf(PMD_SIZE, node);
938 if (!p) 982 if (!p)
939 return -ENOMEM; 983 return -ENOMEM;
940 984
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2feb9bdedaaf..12e4d2d3c110 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,43 +24,6 @@
24 24
25#include "physaddr.h" 25#include "physaddr.h"
26 26
27int page_is_ram(unsigned long pagenr)
28{
29 resource_size_t addr, end;
30 int i;
31
32 /*
33 * A special case is the first 4Kb of memory;
34 * This is a BIOS owned area, not kernel ram, but generally
35 * not listed as such in the E820 table.
36 */
37 if (pagenr == 0)
38 return 0;
39
40 /*
41 * Second special case: Some BIOSen report the PC BIOS
42 * area (640->1Mb) as ram even though it is not.
43 */
44 if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
45 pagenr < (BIOS_END >> PAGE_SHIFT))
46 return 0;
47
48 for (i = 0; i < e820.nr_map; i++) {
49 /*
50 * Not usable memory:
51 */
52 if (e820.map[i].type != E820_RAM)
53 continue;
54 addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
55 end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
56
57
58 if ((pagenr >= addr) && (pagenr < end))
59 return 1;
60 }
61 return 0;
62}
63
64/* 27/*
65 * Fix up the linear direct mapping of the kernel to avoid cache attribute 28 * Fix up the linear direct mapping of the kernel to avoid cache attribute
66 * conflicts. 29 * conflicts.
@@ -281,30 +244,6 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
281} 244}
282EXPORT_SYMBOL(ioremap_cache); 245EXPORT_SYMBOL(ioremap_cache);
283 246
284static void __iomem *ioremap_default(resource_size_t phys_addr,
285 unsigned long size)
286{
287 unsigned long flags;
288 void __iomem *ret;
289 int err;
290
291 /*
292 * - WB for WB-able memory and no other conflicting mappings
293 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
294 * - Inherit from confliting mappings otherwise
295 */
296 err = reserve_memtype(phys_addr, phys_addr + size,
297 _PAGE_CACHE_WB, &flags);
298 if (err < 0)
299 return NULL;
300
301 ret = __ioremap_caller(phys_addr, size, flags,
302 __builtin_return_address(0));
303
304 free_memtype(phys_addr, phys_addr + size);
305 return ret;
306}
307
308void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, 247void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
309 unsigned long prot_val) 248 unsigned long prot_val)
310{ 249{
@@ -380,7 +319,7 @@ void *xlate_dev_mem_ptr(unsigned long phys)
380 if (page_is_ram(start >> PAGE_SHIFT)) 319 if (page_is_ram(start >> PAGE_SHIFT))
381 return __va(phys); 320 return __va(phys);
382 321
383 addr = (void __force *)ioremap_default(start, PAGE_SIZE); 322 addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
384 if (addr) 323 if (addr)
385 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); 324 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
386 325
@@ -446,6 +385,10 @@ void __init early_ioremap_init(void)
446 * The boot-ioremap range spans multiple pmds, for which 385 * The boot-ioremap range spans multiple pmds, for which
447 * we are not prepared: 386 * we are not prepared:
448 */ 387 */
388#define __FIXADDR_TOP (-PAGE_SIZE)
389 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
390 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
391#undef __FIXADDR_TOP
449 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { 392 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
450 WARN_ON(1); 393 WARN_ON(1);
451 printk(KERN_WARNING "pmd %p != %p\n", 394 printk(KERN_WARNING "pmd %p != %p\n",
@@ -505,6 +448,20 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
505static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; 448static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
506static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; 449static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
507 450
451void __init fixup_early_ioremap(void)
452{
453 int i;
454
455 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
456 if (prev_map[i]) {
457 WARN_ON(1);
458 break;
459 }
460 }
461
462 early_ioremap_init();
463}
464
508static int __init check_early_ioremap_leak(void) 465static int __init check_early_ioremap_leak(void)
509{ 466{
510 int count = 0; 467 int count = 0;
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 268f8255280f..970ed579d4e4 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -24,6 +24,9 @@
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/k8.h> 25#include <asm/k8.h>
26 26
27static struct bootnode __initdata nodes[8];
28static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
29
27static __init int find_northbridge(void) 30static __init int find_northbridge(void)
28{ 31{
29 int num; 32 int num;
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void)
54 * need to get boot_cpu_id so can use that to create apicid_to_node 57 * need to get boot_cpu_id so can use that to create apicid_to_node
55 * in k8_scan_nodes() 58 * in k8_scan_nodes()
56 */ 59 */
57 /*
58 * Find possible boot-time SMP configuration:
59 */
60#ifdef CONFIG_X86_MPPARSE
61 early_find_smp_config();
62#endif
63#ifdef CONFIG_ACPI
64 /*
65 * Read APIC information from ACPI tables.
66 */
67 early_acpi_boot_init();
68#endif
69#ifdef CONFIG_X86_MPPARSE 60#ifdef CONFIG_X86_MPPARSE
70 /* 61 /*
71 * get boot-time SMP configuration: 62 * get boot-time SMP configuration:
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void)
76 early_init_lapic_mapping(); 67 early_init_lapic_mapping();
77} 68}
78 69
79int __init k8_scan_nodes(unsigned long start, unsigned long end) 70int __init k8_get_nodes(struct bootnode *physnodes)
80{ 71{
81 unsigned numnodes, cores, bits, apicid_base; 72 int i;
73 int ret = 0;
74
75 for_each_node_mask(i, nodes_parsed) {
76 physnodes[ret].start = nodes[i].start;
77 physnodes[ret].end = nodes[i].end;
78 ret++;
79 }
80 return ret;
81}
82
83int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
84{
85 unsigned long start = PFN_PHYS(start_pfn);
86 unsigned long end = PFN_PHYS(end_pfn);
87 unsigned numnodes;
82 unsigned long prevbase; 88 unsigned long prevbase;
83 struct bootnode nodes[8]; 89 int i, nb, found = 0;
84 int i, j, nb, found = 0;
85 u32 nodeid, reg; 90 u32 nodeid, reg;
86 91
87 if (!early_pci_allowed()) 92 if (!early_pci_allowed())
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
91 if (nb < 0) 96 if (nb < 0)
92 return nb; 97 return nb;
93 98
94 printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); 99 pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
95 100
96 reg = read_pci_config(0, nb, 0, 0x60); 101 reg = read_pci_config(0, nb, 0, 0x60);
97 numnodes = ((reg >> 4) & 0xF) + 1; 102 numnodes = ((reg >> 4) & 0xF) + 1;
98 if (numnodes <= 1) 103 if (numnodes <= 1)
99 return -1; 104 return -1;
100 105
101 printk(KERN_INFO "Number of nodes %d\n", numnodes); 106 pr_info("Number of physical nodes %d\n", numnodes);
102 107
103 memset(&nodes, 0, sizeof(nodes));
104 prevbase = 0; 108 prevbase = 0;
105 for (i = 0; i < 8; i++) { 109 for (i = 0; i < 8; i++) {
106 unsigned long base, limit; 110 unsigned long base, limit;
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
111 nodeid = limit & 7; 115 nodeid = limit & 7;
112 if ((base & 3) == 0) { 116 if ((base & 3) == 0) {
113 if (i < numnodes) 117 if (i < numnodes)
114 printk("Skipping disabled node %d\n", i); 118 pr_info("Skipping disabled node %d\n", i);
115 continue; 119 continue;
116 } 120 }
117 if (nodeid >= numnodes) { 121 if (nodeid >= numnodes) {
118 printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, 122 pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
119 base, limit); 123 base, limit);
120 continue; 124 continue;
121 } 125 }
122 126
123 if (!limit) { 127 if (!limit) {
124 printk(KERN_INFO "Skipping node entry %d (base %lx)\n", 128 pr_info("Skipping node entry %d (base %lx)\n",
125 i, base); 129 i, base);
126 continue; 130 continue;
127 } 131 }
128 if ((base >> 8) & 3 || (limit >> 8) & 3) { 132 if ((base >> 8) & 3 || (limit >> 8) & 3) {
129 printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", 133 pr_err("Node %d using interleaving mode %lx/%lx\n",
130 nodeid, (base>>8)&3, (limit>>8) & 3); 134 nodeid, (base >> 8) & 3, (limit >> 8) & 3);
131 return -1; 135 return -1;
132 } 136 }
133 if (node_isset(nodeid, node_possible_map)) { 137 if (node_isset(nodeid, nodes_parsed)) {
134 printk(KERN_INFO "Node %d already present. Skipping\n", 138 pr_info("Node %d already present, skipping\n",
135 nodeid); 139 nodeid);
136 continue; 140 continue;
137 } 141 }
138 142
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
141 limit |= (1<<24)-1; 145 limit |= (1<<24)-1;
142 limit++; 146 limit++;
143 147
144 if (limit > max_pfn << PAGE_SHIFT) 148 if (limit > end)
145 limit = max_pfn << PAGE_SHIFT; 149 limit = end;
146 if (limit <= base) 150 if (limit <= base)
147 continue; 151 continue;
148 152
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
154 if (limit > end) 158 if (limit > end)
155 limit = end; 159 limit = end;
156 if (limit == base) { 160 if (limit == base) {
157 printk(KERN_ERR "Empty node %d\n", nodeid); 161 pr_err("Empty node %d\n", nodeid);
158 continue; 162 continue;
159 } 163 }
160 if (limit < base) { 164 if (limit < base) {
161 printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", 165 pr_err("Node %d bogus settings %lx-%lx.\n",
162 nodeid, base, limit); 166 nodeid, base, limit);
163 continue; 167 continue;
164 } 168 }
165 169
166 /* Could sort here, but pun for now. Should not happen anyroads. */ 170 /* Could sort here, but pun for now. Should not happen anyroads. */
167 if (prevbase > base) { 171 if (prevbase > base) {
168 printk(KERN_ERR "Node map not sorted %lx,%lx\n", 172 pr_err("Node map not sorted %lx,%lx\n",
169 prevbase, base); 173 prevbase, base);
170 return -1; 174 return -1;
171 } 175 }
172 176
173 printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", 177 pr_info("Node %d MemBase %016lx Limit %016lx\n",
174 nodeid, base, limit); 178 nodeid, base, limit);
175 179
176 found++; 180 found++;
177 181
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
180 184
181 prevbase = base; 185 prevbase = base;
182 186
183 node_set(nodeid, node_possible_map); 187 node_set(nodeid, nodes_parsed);
184 } 188 }
185 189
186 if (!found) 190 if (!found)
187 return -1; 191 return -1;
192 return 0;
193}
194
195int __init k8_scan_nodes(void)
196{
197 unsigned int bits;
198 unsigned int cores;
199 unsigned int apicid_base;
200 int i;
188 201
202 BUG_ON(nodes_empty(nodes_parsed));
203 node_possible_map = nodes_parsed;
189 memnode_shift = compute_hash_shift(nodes, 8, NULL); 204 memnode_shift = compute_hash_shift(nodes, 8, NULL);
190 if (memnode_shift < 0) { 205 if (memnode_shift < 0) {
191 printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 206 pr_err("No NUMA node hash function found. Contact maintainer\n");
192 return -1; 207 return -1;
193 } 208 }
194 printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); 209 pr_info("Using node hash shift of %d\n", memnode_shift);
195 210
196 /* use the coreid bits from early_identify_cpu */ 211 /* use the coreid bits from early_identify_cpu */
197 bits = boot_cpu_data.x86_coreid_bits; 212 bits = boot_cpu_data.x86_coreid_bits;
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
200 /* need to get boot_cpu_id early for system with apicid lifting */ 215 /* need to get boot_cpu_id early for system with apicid lifting */
201 early_get_boot_cpu_id(); 216 early_get_boot_cpu_id();
202 if (boot_cpu_physical_apicid > 0) { 217 if (boot_cpu_physical_apicid > 0) {
203 printk(KERN_INFO "BSP APIC ID: %02x\n", 218 pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
204 boot_cpu_physical_apicid);
205 apicid_base = boot_cpu_physical_apicid; 219 apicid_base = boot_cpu_physical_apicid;
206 } 220 }
207 221
208 for (i = 0; i < 8; i++) { 222 for_each_node_mask(i, node_possible_map) {
209 if (nodes[i].start == nodes[i].end) 223 int j;
210 continue;
211 224
212 e820_register_active_regions(i, 225 e820_register_active_regions(i,
213 nodes[i].start >> PAGE_SHIFT, 226 nodes[i].start >> PAGE_SHIFT,
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index 4901d0dafda6..af3b6c8a436f 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -106,26 +106,25 @@ void kmemcheck_error_recall(void)
106 106
107 switch (e->type) { 107 switch (e->type) {
108 case KMEMCHECK_ERROR_INVALID_ACCESS: 108 case KMEMCHECK_ERROR_INVALID_ACCESS:
109 printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " 109 printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n",
110 "from %s memory (%p)\n",
111 8 * e->size, e->state < ARRAY_SIZE(desc) ? 110 8 * e->size, e->state < ARRAY_SIZE(desc) ?
112 desc[e->state] : "(invalid shadow state)", 111 desc[e->state] : "(invalid shadow state)",
113 (void *) e->address); 112 (void *) e->address);
114 113
115 printk(KERN_INFO); 114 printk(KERN_WARNING);
116 for (i = 0; i < SHADOW_COPY_SIZE; ++i) 115 for (i = 0; i < SHADOW_COPY_SIZE; ++i)
117 printk("%02x", e->memory_copy[i]); 116 printk(KERN_CONT "%02x", e->memory_copy[i]);
118 printk("\n"); 117 printk(KERN_CONT "\n");
119 118
120 printk(KERN_INFO); 119 printk(KERN_WARNING);
121 for (i = 0; i < SHADOW_COPY_SIZE; ++i) { 120 for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
122 if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) 121 if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
123 printk(" %c", short_desc[e->shadow_copy[i]]); 122 printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]);
124 else 123 else
125 printk(" ?"); 124 printk(KERN_CONT " ?");
126 } 125 }
127 printk("\n"); 126 printk(KERN_CONT "\n");
128 printk(KERN_INFO "%*c\n", 2 + 2 127 printk(KERN_WARNING "%*c\n", 2 + 2
129 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); 128 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
130 break; 129 break;
131 case KMEMCHECK_ERROR_BUG: 130 case KMEMCHECK_ERROR_BUG:
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 8cc183344140..b3b531a4f8e5 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
337 if (!shadow) 337 if (!shadow)
338 return true; 338 return true;
339 339
340 status = kmemcheck_shadow_test(shadow, size); 340 status = kmemcheck_shadow_test_all(shadow, size);
341 341
342 return status == KMEMCHECK_SHADOW_INITIALIZED; 342 return status == KMEMCHECK_SHADOW_INITIALIZED;
343} 343}
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index 3f66b82076a3..aec124214d97 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
125 125
126enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) 126enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
127{ 127{
128#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
128 uint8_t *x; 129 uint8_t *x;
129 unsigned int i; 130 unsigned int i;
130 131
131 x = shadow; 132 x = shadow;
132 133
133#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
134 /* 134 /*
135 * Make sure _some_ bytes are initialized. Gcc frequently generates 135 * Make sure _some_ bytes are initialized. Gcc frequently generates
136 * code to access neighboring bytes. 136 * code to access neighboring bytes.
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
139 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) 139 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
140 return x[i]; 140 return x[i];
141 } 141 }
142
143 return x[0];
142#else 144#else
145 return kmemcheck_shadow_test_all(shadow, size);
146#endif
147}
148
149enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
150{
151 uint8_t *x;
152 unsigned int i;
153
154 x = shadow;
155
143 /* All bytes must be initialized. */ 156 /* All bytes must be initialized. */
144 for (i = 0; i < size; ++i) { 157 for (i = 0; i < size; ++i) {
145 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) 158 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
146 return x[i]; 159 return x[i];
147 } 160 }
148#endif
149 161
150 return x[0]; 162 return x[0];
151} 163}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
index af46d9ab9d86..ff0b2f70fbcb 100644
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -11,6 +11,8 @@ enum kmemcheck_shadow {
11void *kmemcheck_shadow_lookup(unsigned long address); 11void *kmemcheck_shadow_lookup(unsigned long address);
12 12
13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); 13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
14enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
15 unsigned int size);
14void kmemcheck_shadow_set(void *shadow, unsigned int size); 16void kmemcheck_shadow_set(void *shadow, unsigned int size);
15 17
16#endif 18#endif
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..5d0e67fff1a6 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -5,6 +5,8 @@
5 * 2008 Pekka Paalanen <pq@iki.fi> 5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
8#include <linux/list.h> 10#include <linux/list.h>
9#include <linux/rculist.h> 11#include <linux/rculist.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
@@ -19,6 +21,7 @@
19#include <linux/kdebug.h> 21#include <linux/kdebug.h>
20#include <linux/mutex.h> 22#include <linux/mutex.h>
21#include <linux/io.h> 23#include <linux/io.h>
24#include <linux/slab.h>
22#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
23#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
24#include <linux/errno.h> 27#include <linux/errno.h>
@@ -136,7 +139,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
136 pte_t *pte = lookup_address(f->page, &level); 139 pte_t *pte = lookup_address(f->page, &level);
137 140
138 if (!pte) { 141 if (!pte) {
139 pr_err("kmmio: no pte for page 0x%08lx\n", f->page); 142 pr_err("no pte for page 0x%08lx\n", f->page);
140 return -1; 143 return -1;
141 } 144 }
142 145
@@ -148,7 +151,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
148 clear_pte_presence(pte, clear, &f->old_presence); 151 clear_pte_presence(pte, clear, &f->old_presence);
149 break; 152 break;
150 default: 153 default:
151 pr_err("kmmio: unexpected page level 0x%x.\n", level); 154 pr_err("unexpected page level 0x%x.\n", level);
152 return -1; 155 return -1;
153 } 156 }
154 157
@@ -170,13 +173,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
170static int arm_kmmio_fault_page(struct kmmio_fault_page *f) 173static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
171{ 174{
172 int ret; 175 int ret;
173 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); 176 WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
174 if (f->armed) { 177 if (f->armed) {
175 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", 178 pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
176 f->page, f->count, !!f->old_presence); 179 f->page, f->count, !!f->old_presence);
177 } 180 }
178 ret = clear_page_presence(f, true); 181 ret = clear_page_presence(f, true);
179 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); 182 WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
183 f->page);
180 f->armed = true; 184 f->armed = true;
181 return ret; 185 return ret;
182} 186}
@@ -203,7 +207,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
203 */ 207 */
204/* 208/*
205 * Interrupts are disabled on entry as trap3 is an interrupt gate 209 * Interrupts are disabled on entry as trap3 is an interrupt gate
206 * and they remain disabled thorough out this function. 210 * and they remain disabled throughout this function.
207 */ 211 */
208int kmmio_handler(struct pt_regs *regs, unsigned long addr) 212int kmmio_handler(struct pt_regs *regs, unsigned long addr)
209{ 213{
@@ -240,24 +244,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
240 * condition needs handling by do_page_fault(), the 244 * condition needs handling by do_page_fault(), the
241 * page really not being present is the most common. 245 * page really not being present is the most common.
242 */ 246 */
243 pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", 247 pr_debug("secondary hit for 0x%08lx CPU %d.\n",
244 addr, smp_processor_id()); 248 addr, smp_processor_id());
245 249
246 if (!faultpage->old_presence) 250 if (!faultpage->old_presence)
247 pr_info("kmmio: unexpected secondary hit for " 251 pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
248 "address 0x%08lx on CPU %d.\n", addr, 252 addr, smp_processor_id());
249 smp_processor_id());
250 } else { 253 } else {
251 /* 254 /*
252 * Prevent overwriting already in-flight context. 255 * Prevent overwriting already in-flight context.
253 * This should not happen, let's hope disarming at 256 * This should not happen, let's hope disarming at
254 * least prevents a panic. 257 * least prevents a panic.
255 */ 258 */
256 pr_emerg("kmmio: recursive probe hit on CPU %d, " 259 pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
257 "for address 0x%08lx. Ignoring.\n", 260 smp_processor_id(), addr);
258 smp_processor_id(), addr); 261 pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
259 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
260 ctx->addr);
261 disarm_kmmio_fault_page(faultpage); 262 disarm_kmmio_fault_page(faultpage);
262 } 263 }
263 goto no_kmmio_ctx; 264 goto no_kmmio_ctx;
@@ -302,7 +303,7 @@ no_kmmio:
302 303
303/* 304/*
304 * Interrupts are disabled on entry as trap1 is an interrupt gate 305 * Interrupts are disabled on entry as trap1 is an interrupt gate
305 * and they remain disabled thorough out this function. 306 * and they remain disabled throughout this function.
306 * This must always get called as the pair to kmmio_handler(). 307 * This must always get called as the pair to kmmio_handler().
307 */ 308 */
308static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) 309static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
@@ -316,8 +317,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
316 * something external causing them (f.e. using a debugger while 317 * something external causing them (f.e. using a debugger while
317 * mmio tracing enabled), or erroneous behaviour 318 * mmio tracing enabled), or erroneous behaviour
318 */ 319 */
319 pr_warning("kmmio: unexpected debug trap on CPU %d.\n", 320 pr_warning("unexpected debug trap on CPU %d.\n",
320 smp_processor_id()); 321 smp_processor_id());
321 goto out; 322 goto out;
322 } 323 }
323 324
@@ -425,7 +426,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
425 list_add_rcu(&p->list, &kmmio_probes); 426 list_add_rcu(&p->list, &kmmio_probes);
426 while (size < size_lim) { 427 while (size < size_lim) {
427 if (add_kmmio_fault_page(p->addr + size)) 428 if (add_kmmio_fault_page(p->addr + size))
428 pr_err("kmmio: Unable to set page fault.\n"); 429 pr_err("Unable to set page fault.\n");
429 size += PAGE_SIZE; 430 size += PAGE_SIZE;
430 } 431 }
431out: 432out:
@@ -490,7 +491,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
490 * 2. remove_kmmio_fault_pages() 491 * 2. remove_kmmio_fault_pages()
491 * Remove the pages from kmmio_page_table. 492 * Remove the pages from kmmio_page_table.
492 * 3. rcu_free_kmmio_fault_pages() 493 * 3. rcu_free_kmmio_fault_pages()
493 * Actally free the kmmio_fault_page structs as with RCU. 494 * Actually free the kmmio_fault_page structs as with RCU.
494 */ 495 */
495void unregister_kmmio_probe(struct kmmio_probe *p) 496void unregister_kmmio_probe(struct kmmio_probe *p)
496{ 497{
@@ -511,7 +512,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
511 512
512 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); 513 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
513 if (!drelease) { 514 if (!drelease) {
514 pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); 515 pr_crit("leaking kmmio_fault_page objects.\n");
515 return; 516 return;
516 } 517 }
517 drelease->release_list = release_list; 518 drelease->release_list = release_list;
@@ -538,10 +539,17 @@ static int
538kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) 539kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
539{ 540{
540 struct die_args *arg = args; 541 struct die_args *arg = args;
542 unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
541 543
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 544 if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) 545 if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
546 /*
547 * Reset the BS bit in dr6 (pointed by args->err) to
548 * denote completion of processing
549 */
550 *dr6_p &= ~DR_STEP;
544 return NOTIFY_STOP; 551 return NOTIFY_STOP;
552 }
545 553
546 return NOTIFY_DONE; 554 return NOTIFY_DONE;
547} 555}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c8191defc38a..1dab5194fd9d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void)
71 if (current->personality & ADDR_COMPAT_LAYOUT) 71 if (current->personality & ADDR_COMPAT_LAYOUT)
72 return 1; 72 return 1;
73 73
74 if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) 74 if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
75 return 1; 75 return 1;
76 76
77 return sysctl_legacy_va_layout; 77 return sysctl_legacy_va_layout;
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void)
96 96
97static unsigned long mmap_base(void) 97static unsigned long mmap_base(void)
98{ 98{
99 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; 99 unsigned long gap = rlimit(RLIMIT_STACK);
100 100
101 if (gap < MIN_GAP) 101 if (gap < MIN_GAP)
102 gap = MIN_GAP; 102 gap = MIN_GAP;
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 132772a8ec57..3adff7dcc148 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -19,10 +19,14 @@
19 * 19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi. 20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */ 21 */
22
23#define pr_fmt(fmt) "mmiotrace: " fmt
24
22#define DEBUG 1 25#define DEBUG 1
23 26
24#include <linux/module.h> 27#include <linux/module.h>
25#include <linux/debugfs.h> 28#include <linux/debugfs.h>
29#include <linux/slab.h>
26#include <linux/uaccess.h> 30#include <linux/uaccess.h>
27#include <linux/io.h> 31#include <linux/io.h>
28#include <linux/version.h> 32#include <linux/version.h>
@@ -36,8 +40,6 @@
36 40
37#include "pf_in.h" 41#include "pf_in.h"
38 42
39#define NAME "mmiotrace: "
40
41struct trap_reason { 43struct trap_reason {
42 unsigned long addr; 44 unsigned long addr;
43 unsigned long ip; 45 unsigned long ip;
@@ -96,17 +98,18 @@ static void print_pte(unsigned long address)
96 pte_t *pte = lookup_address(address, &level); 98 pte_t *pte = lookup_address(address, &level);
97 99
98 if (!pte) { 100 if (!pte) {
99 pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", 101 pr_err("Error in %s: no pte for page 0x%08lx\n",
100 __func__, address); 102 __func__, address);
101 return; 103 return;
102 } 104 }
103 105
104 if (level == PG_LEVEL_2M) { 106 if (level == PG_LEVEL_2M) {
105 pr_emerg(NAME "4MB pages are not currently supported: " 107 pr_emerg("4MB pages are not currently supported: 0x%08lx\n",
106 "0x%08lx\n", address); 108 address);
107 BUG(); 109 BUG();
108 } 110 }
109 pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, 111 pr_info("pte for 0x%lx: 0x%llx 0x%llx\n",
112 address,
110 (unsigned long long)pte_val(*pte), 113 (unsigned long long)pte_val(*pte),
111 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); 114 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
112} 115}
@@ -118,22 +121,21 @@ static void print_pte(unsigned long address)
118static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) 121static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
119{ 122{
120 const struct trap_reason *my_reason = &get_cpu_var(pf_reason); 123 const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
121 pr_emerg(NAME "unexpected fault for address: 0x%08lx, " 124 pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n",
122 "last fault for address: 0x%08lx\n", 125 addr, my_reason->addr);
123 addr, my_reason->addr);
124 print_pte(addr); 126 print_pte(addr);
125 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); 127 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
126 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); 128 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
127#ifdef __i386__ 129#ifdef __i386__
128 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", 130 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
129 regs->ax, regs->bx, regs->cx, regs->dx); 131 regs->ax, regs->bx, regs->cx, regs->dx);
130 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", 132 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
131 regs->si, regs->di, regs->bp, regs->sp); 133 regs->si, regs->di, regs->bp, regs->sp);
132#else 134#else
133 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", 135 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
134 regs->ax, regs->cx, regs->dx); 136 regs->ax, regs->cx, regs->dx);
135 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", 137 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
136 regs->si, regs->di, regs->bp, regs->sp); 138 regs->si, regs->di, regs->bp, regs->sp);
137#endif 139#endif
138 put_cpu_var(pf_reason); 140 put_cpu_var(pf_reason);
139 BUG(); 141 BUG();
@@ -213,7 +215,7 @@ static void post(struct kmmio_probe *p, unsigned long condition,
213 /* this should always return the active_trace count to 0 */ 215 /* this should always return the active_trace count to 0 */
214 my_reason->active_traces--; 216 my_reason->active_traces--;
215 if (my_reason->active_traces) { 217 if (my_reason->active_traces) {
216 pr_emerg(NAME "unexpected post handler"); 218 pr_emerg("unexpected post handler");
217 BUG(); 219 BUG();
218 } 220 }
219 221
@@ -244,7 +246,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
244 }; 246 };
245 247
246 if (!trace) { 248 if (!trace) {
247 pr_err(NAME "kmalloc failed in ioremap\n"); 249 pr_err("kmalloc failed in ioremap\n");
248 return; 250 return;
249 } 251 }
250 252
@@ -282,8 +284,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
282 if (!is_enabled()) /* recheck and proper locking in *_core() */ 284 if (!is_enabled()) /* recheck and proper locking in *_core() */
283 return; 285 return;
284 286
285 pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", 287 pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n",
286 (unsigned long long)offset, size, addr); 288 (unsigned long long)offset, size, addr);
287 if ((filter_offset) && (offset != filter_offset)) 289 if ((filter_offset) && (offset != filter_offset))
288 return; 290 return;
289 ioremap_trace_core(offset, size, addr); 291 ioremap_trace_core(offset, size, addr);
@@ -301,7 +303,7 @@ static void iounmap_trace_core(volatile void __iomem *addr)
301 struct remap_trace *tmp; 303 struct remap_trace *tmp;
302 struct remap_trace *found_trace = NULL; 304 struct remap_trace *found_trace = NULL;
303 305
304 pr_debug(NAME "Unmapping %p.\n", addr); 306 pr_debug("Unmapping %p.\n", addr);
305 307
306 spin_lock_irq(&trace_lock); 308 spin_lock_irq(&trace_lock);
307 if (!is_enabled()) 309 if (!is_enabled())
@@ -363,9 +365,8 @@ static void clear_trace_list(void)
363 * Caller also ensures is_enabled() cannot change. 365 * Caller also ensures is_enabled() cannot change.
364 */ 366 */
365 list_for_each_entry(trace, &trace_list, list) { 367 list_for_each_entry(trace, &trace_list, list) {
366 pr_notice(NAME "purging non-iounmapped " 368 pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n",
367 "trace @0x%08lx, size 0x%lx.\n", 369 trace->probe.addr, trace->probe.len);
368 trace->probe.addr, trace->probe.len);
369 if (!nommiotrace) 370 if (!nommiotrace)
370 unregister_kmmio_probe(&trace->probe); 371 unregister_kmmio_probe(&trace->probe);
371 } 372 }
@@ -387,7 +388,7 @@ static void enter_uniprocessor(void)
387 388
388 if (downed_cpus == NULL && 389 if (downed_cpus == NULL &&
389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { 390 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
390 pr_notice(NAME "Failed to allocate mask\n"); 391 pr_notice("Failed to allocate mask\n");
391 goto out; 392 goto out;
392 } 393 }
393 394
@@ -395,20 +396,19 @@ static void enter_uniprocessor(void)
395 cpumask_copy(downed_cpus, cpu_online_mask); 396 cpumask_copy(downed_cpus, cpu_online_mask);
396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); 397 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
397 if (num_online_cpus() > 1) 398 if (num_online_cpus() > 1)
398 pr_notice(NAME "Disabling non-boot CPUs...\n"); 399 pr_notice("Disabling non-boot CPUs...\n");
399 put_online_cpus(); 400 put_online_cpus();
400 401
401 for_each_cpu(cpu, downed_cpus) { 402 for_each_cpu(cpu, downed_cpus) {
402 err = cpu_down(cpu); 403 err = cpu_down(cpu);
403 if (!err) 404 if (!err)
404 pr_info(NAME "CPU%d is down.\n", cpu); 405 pr_info("CPU%d is down.\n", cpu);
405 else 406 else
406 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); 407 pr_err("Error taking CPU%d down: %d\n", cpu, err);
407 } 408 }
408out: 409out:
409 if (num_online_cpus() > 1) 410 if (num_online_cpus() > 1)
410 pr_warning(NAME "multiple CPUs still online, " 411 pr_warning("multiple CPUs still online, may miss events.\n");
411 "may miss events.\n");
412} 412}
413 413
414/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, 414/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
@@ -420,13 +420,13 @@ static void __ref leave_uniprocessor(void)
420 420
421 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) 421 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
422 return; 422 return;
423 pr_notice(NAME "Re-enabling CPUs...\n"); 423 pr_notice("Re-enabling CPUs...\n");
424 for_each_cpu(cpu, downed_cpus) { 424 for_each_cpu(cpu, downed_cpus) {
425 err = cpu_up(cpu); 425 err = cpu_up(cpu);
426 if (!err) 426 if (!err)
427 pr_info(NAME "enabled CPU%d.\n", cpu); 427 pr_info("enabled CPU%d.\n", cpu);
428 else 428 else
429 pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); 429 pr_err("cannot re-enable CPU%d: %d\n", cpu, err);
430 } 430 }
431} 431}
432 432
@@ -434,8 +434,8 @@ static void __ref leave_uniprocessor(void)
434static void enter_uniprocessor(void) 434static void enter_uniprocessor(void)
435{ 435{
436 if (num_online_cpus() > 1) 436 if (num_online_cpus() > 1)
437 pr_warning(NAME "multiple CPUs are online, may miss events. " 437 pr_warning("multiple CPUs are online, may miss events. "
438 "Suggest booting with maxcpus=1 kernel argument.\n"); 438 "Suggest booting with maxcpus=1 kernel argument.\n");
439} 439}
440 440
441static void leave_uniprocessor(void) 441static void leave_uniprocessor(void)
@@ -450,13 +450,13 @@ void enable_mmiotrace(void)
450 goto out; 450 goto out;
451 451
452 if (nommiotrace) 452 if (nommiotrace)
453 pr_info(NAME "MMIO tracing disabled.\n"); 453 pr_info("MMIO tracing disabled.\n");
454 kmmio_init(); 454 kmmio_init();
455 enter_uniprocessor(); 455 enter_uniprocessor();
456 spin_lock_irq(&trace_lock); 456 spin_lock_irq(&trace_lock);
457 atomic_inc(&mmiotrace_enabled); 457 atomic_inc(&mmiotrace_enabled);
458 spin_unlock_irq(&trace_lock); 458 spin_unlock_irq(&trace_lock);
459 pr_info(NAME "enabled.\n"); 459 pr_info("enabled.\n");
460out: 460out:
461 mutex_unlock(&mmiotrace_mutex); 461 mutex_unlock(&mmiotrace_mutex);
462} 462}
@@ -475,7 +475,7 @@ void disable_mmiotrace(void)
475 clear_trace_list(); /* guarantees: no more kmmio callbacks */ 475 clear_trace_list(); /* guarantees: no more kmmio callbacks */
476 leave_uniprocessor(); 476 leave_uniprocessor();
477 kmmio_cleanup(); 477 kmmio_cleanup();
478 pr_info(NAME "disabled.\n"); 478 pr_info("disabled.\n");
479out: 479out:
480 mutex_unlock(&mmiotrace_mutex); 480 mutex_unlock(&mmiotrace_mutex);
481} 481}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index d2530062fe00..809baaaf48b1 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid)
347 (ulong) node_remap_end_vaddr[nid]); 347 (ulong) node_remap_end_vaddr[nid]);
348} 348}
349 349
350void __init initmem_init(unsigned long start_pfn, 350void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
351 unsigned long end_pfn) 351 int acpi, int k8)
352{ 352{
353 int nid; 353 int nid;
354 long kva_target_pfn; 354 long kva_target_pfn;
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn,
418 418
419 for_each_online_node(nid) { 419 for_each_online_node(nid) {
420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
421 NODE_DATA(nid)->node_id = nid;
422#ifndef CONFIG_NO_BOOTMEM
421 NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; 423 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
424#endif
422 } 425 }
423 426
424 setup_bootmem_allocator(); 427 setup_bootmem_allocator();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 459913beac71..8948f47fde05 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
163 unsigned long end, unsigned long size, 163 unsigned long end, unsigned long size,
164 unsigned long align) 164 unsigned long align)
165{ 165{
166 unsigned long mem = find_e820_area(start, end, size, align); 166 unsigned long mem;
167 void *ptr;
168 167
168 /*
169 * put it on high as possible
170 * something will go with NODE_DATA
171 */
172 if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
173 start = MAX_DMA_PFN<<PAGE_SHIFT;
174 if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
175 end > (MAX_DMA32_PFN<<PAGE_SHIFT))
176 start = MAX_DMA32_PFN<<PAGE_SHIFT;
177 mem = find_e820_area(start, end, size, align);
178 if (mem != -1L)
179 return __va(mem);
180
181 /* extend the search scope */
182 end = max_pfn_mapped << PAGE_SHIFT;
183 if (end > (MAX_DMA32_PFN<<PAGE_SHIFT))
184 start = MAX_DMA32_PFN<<PAGE_SHIFT;
185 else
186 start = MAX_DMA_PFN<<PAGE_SHIFT;
187 mem = find_e820_area(start, end, size, align);
169 if (mem != -1L) 188 if (mem != -1L)
170 return __va(mem); 189 return __va(mem);
171 190
172 ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); 191 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
173 if (ptr == NULL) {
174 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
175 size, nodeid); 192 size, nodeid);
176 return NULL; 193
177 } 194 return NULL;
178 return ptr;
179} 195}
180 196
181/* Initialize bootmem allocator for a node */ 197/* Initialize bootmem allocator for a node */
182void __init 198void __init
183setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 199setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
184{ 200{
185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 201 unsigned long start_pfn, last_pfn, nodedata_phys;
186 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 202 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
187 unsigned long bootmap_start, nodedata_phys;
188 void *bootmap;
189 int nid; 203 int nid;
204#ifndef CONFIG_NO_BOOTMEM
205 unsigned long bootmap_start, bootmap_pages, bootmap_size;
206 void *bootmap;
207#endif
190 208
191 if (!end) 209 if (!end)
192 return; 210 return;
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
200 218
201 start = roundup(start, ZONE_ALIGN); 219 start = roundup(start, ZONE_ALIGN);
202 220
203 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 221 printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
204 start, end); 222 start, end);
205 223
206 start_pfn = start >> PAGE_SHIFT; 224 start_pfn = start >> PAGE_SHIFT;
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
211 if (node_data[nodeid] == NULL) 229 if (node_data[nodeid] == NULL)
212 return; 230 return;
213 nodedata_phys = __pa(node_data[nodeid]); 231 nodedata_phys = __pa(node_data[nodeid]);
232 reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
214 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, 233 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
215 nodedata_phys + pgdat_size - 1); 234 nodedata_phys + pgdat_size - 1);
235 nid = phys_to_nid(nodedata_phys);
236 if (nid != nodeid)
237 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
216 238
217 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 239 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
218 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; 240 NODE_DATA(nodeid)->node_id = nodeid;
219 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 241 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
220 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 242 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
221 243
244#ifndef CONFIG_NO_BOOTMEM
245 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
246
222 /* 247 /*
223 * Find a place for the bootmem map 248 * Find a place for the bootmem map
224 * nodedata_phys could be on other nodes by alloc_bootmem, 249 * nodedata_phys could be on other nodes by alloc_bootmem,
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
227 * of alloc_bootmem, that could clash with reserved range 252 * of alloc_bootmem, that could clash with reserved range
228 */ 253 */
229 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); 254 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
230 nid = phys_to_nid(nodedata_phys); 255 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
231 if (nid == nodeid)
232 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
233 else
234 bootmap_start = roundup(start, PAGE_SIZE);
235 /* 256 /*
236 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like 257 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
237 * to use that to align to PAGE_SIZE 258 * to use that to align to PAGE_SIZE
@@ -239,12 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
239 bootmap = early_node_mem(nodeid, bootmap_start, end, 260 bootmap = early_node_mem(nodeid, bootmap_start, end,
240 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); 261 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
241 if (bootmap == NULL) { 262 if (bootmap == NULL) {
242 if (nodedata_phys < start || nodedata_phys >= end) 263 free_early(nodedata_phys, nodedata_phys + pgdat_size);
243 free_bootmem(nodedata_phys, pgdat_size);
244 node_data[nodeid] = NULL; 264 node_data[nodeid] = NULL;
245 return; 265 return;
246 } 266 }
247 bootmap_start = __pa(bootmap); 267 bootmap_start = __pa(bootmap);
268 reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT),
269 "BOOTMAP");
248 270
249 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 271 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
250 bootmap_start >> PAGE_SHIFT, 272 bootmap_start >> PAGE_SHIFT,
@@ -253,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
253 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", 275 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
254 bootmap_start, bootmap_start + bootmap_size - 1, 276 bootmap_start, bootmap_start + bootmap_size - 1,
255 bootmap_pages); 277 bootmap_pages);
256
257 free_bootmem_with_active_regions(nodeid, end);
258
259 /*
260 * convert early reserve to bootmem reserve earlier
261 * otherwise early_node_mem could use early reserved mem
262 * on previous node
263 */
264 early_res_to_bootmem(start, end);
265
266 /*
267 * in some case early_node_mem could use alloc_bootmem
268 * to get range on other node, don't reserve that again
269 */
270 if (nid != nodeid)
271 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
272 else
273 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
274 pgdat_size, BOOTMEM_DEFAULT);
275 nid = phys_to_nid(bootmap_start); 278 nid = phys_to_nid(bootmap_start);
276 if (nid != nodeid) 279 if (nid != nodeid)
277 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); 280 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
278 else 281
279 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 282 free_bootmem_with_active_regions(nodeid, end);
280 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 283#endif
281 284
282 node_set_online(nodeid); 285 node_set_online(nodeid);
283} 286}
@@ -306,8 +309,71 @@ void __init numa_init_array(void)
306 309
307#ifdef CONFIG_NUMA_EMU 310#ifdef CONFIG_NUMA_EMU
308/* Numa emulation */ 311/* Numa emulation */
312static struct bootnode nodes[MAX_NUMNODES] __initdata;
313static struct bootnode physnodes[MAX_NUMNODES] __initdata;
309static char *cmdline __initdata; 314static char *cmdline __initdata;
310 315
316static int __init setup_physnodes(unsigned long start, unsigned long end,
317 int acpi, int k8)
318{
319 int nr_nodes = 0;
320 int ret = 0;
321 int i;
322
323#ifdef CONFIG_ACPI_NUMA
324 if (acpi)
325 nr_nodes = acpi_get_nodes(physnodes);
326#endif
327#ifdef CONFIG_K8_NUMA
328 if (k8)
329 nr_nodes = k8_get_nodes(physnodes);
330#endif
331 /*
332 * Basic sanity checking on the physical node map: there may be errors
333 * if the SRAT or K8 incorrectly reported the topology or the mem=
334 * kernel parameter is used.
335 */
336 for (i = 0; i < nr_nodes; i++) {
337 if (physnodes[i].start == physnodes[i].end)
338 continue;
339 if (physnodes[i].start > end) {
340 physnodes[i].end = physnodes[i].start;
341 continue;
342 }
343 if (physnodes[i].end < start) {
344 physnodes[i].start = physnodes[i].end;
345 continue;
346 }
347 if (physnodes[i].start < start)
348 physnodes[i].start = start;
349 if (physnodes[i].end > end)
350 physnodes[i].end = end;
351 }
352
353 /*
354 * Remove all nodes that have no memory or were truncated because of the
355 * limited address range.
356 */
357 for (i = 0; i < nr_nodes; i++) {
358 if (physnodes[i].start == physnodes[i].end)
359 continue;
360 physnodes[ret].start = physnodes[i].start;
361 physnodes[ret].end = physnodes[i].end;
362 ret++;
363 }
364
365 /*
366 * If no physical topology was detected, a single node is faked to cover
367 * the entire address space.
368 */
369 if (!ret) {
370 physnodes[ret].start = start;
371 physnodes[ret].end = end;
372 ret = 1;
373 }
374 return ret;
375}
376
311/* 377/*
312 * Setups up nid to range from addr to addr + size. If the end 378 * Setups up nid to range from addr to addr + size. If the end
313 * boundary is greater than max_addr, then max_addr is used instead. 379 * boundary is greater than max_addr, then max_addr is used instead.
@@ -315,11 +381,9 @@ static char *cmdline __initdata;
315 * allocation past addr and -1 otherwise. addr is adjusted to be at 381 * allocation past addr and -1 otherwise. addr is adjusted to be at
316 * the end of the node. 382 * the end of the node.
317 */ 383 */
318static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, 384static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
319 u64 size, u64 max_addr)
320{ 385{
321 int ret = 0; 386 int ret = 0;
322
323 nodes[nid].start = *addr; 387 nodes[nid].start = *addr;
324 *addr += size; 388 *addr += size;
325 if (*addr >= max_addr) { 389 if (*addr >= max_addr) {
@@ -335,167 +399,234 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
335} 399}
336 400
337/* 401/*
338 * Splits num_nodes nodes up equally starting at node_start. The return value 402 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
339 * is the number of nodes split up and addr is adjusted to be at the end of the 403 * to max_addr. The return value is the number of nodes allocated.
340 * last node allocated.
341 */ 404 */
342static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, 405static int __init split_nodes_interleave(u64 addr, u64 max_addr,
343 u64 max_addr, int node_start, 406 int nr_phys_nodes, int nr_nodes)
344 int num_nodes)
345{ 407{
346 unsigned int big; 408 nodemask_t physnode_mask = NODE_MASK_NONE;
347 u64 size; 409 u64 size;
410 int big;
411 int ret = 0;
348 int i; 412 int i;
349 413
350 if (num_nodes <= 0) 414 if (nr_nodes <= 0)
351 return -1; 415 return -1;
352 if (num_nodes > MAX_NUMNODES) 416 if (nr_nodes > MAX_NUMNODES) {
353 num_nodes = MAX_NUMNODES; 417 pr_info("numa=fake=%d too large, reducing to %d\n",
354 size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / 418 nr_nodes, MAX_NUMNODES);
355 num_nodes; 419 nr_nodes = MAX_NUMNODES;
420 }
421
422 size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes;
356 /* 423 /*
357 * Calculate the number of big nodes that can be allocated as a result 424 * Calculate the number of big nodes that can be allocated as a result
358 * of consolidating the leftovers. 425 * of consolidating the remainder.
359 */ 426 */
360 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / 427 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
361 FAKE_NODE_MIN_SIZE; 428 FAKE_NODE_MIN_SIZE;
362 429
363 /* Round down to nearest FAKE_NODE_MIN_SIZE. */
364 size &= FAKE_NODE_MIN_HASH_MASK; 430 size &= FAKE_NODE_MIN_HASH_MASK;
365 if (!size) { 431 if (!size) {
366 printk(KERN_ERR "Not enough memory for each node. " 432 pr_err("Not enough memory for each node. "
367 "NUMA emulation disabled.\n"); 433 "NUMA emulation disabled.\n");
368 return -1; 434 return -1;
369 } 435 }
370 436
371 for (i = node_start; i < num_nodes + node_start; i++) { 437 for (i = 0; i < nr_phys_nodes; i++)
372 u64 end = *addr + size; 438 if (physnodes[i].start != physnodes[i].end)
439 node_set(i, physnode_mask);
373 440
374 if (i < big) 441 /*
375 end += FAKE_NODE_MIN_SIZE; 442 * Continue to fill physical nodes with fake nodes until there is no
376 /* 443 * memory left on any of them.
377 * The final node can have the remaining system RAM. Other 444 */
378 * nodes receive roughly the same amount of available pages. 445 while (nodes_weight(physnode_mask)) {
379 */ 446 for_each_node_mask(i, physnode_mask) {
380 if (i == num_nodes + node_start - 1) 447 u64 end = physnodes[i].start + size;
381 end = max_addr; 448 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
382 else 449
383 while (end - *addr - e820_hole_size(*addr, end) < 450 if (ret < big)
384 size) { 451 end += FAKE_NODE_MIN_SIZE;
452
453 /*
454 * Continue to add memory to this fake node if its
455 * non-reserved memory is less than the per-node size.
456 */
457 while (end - physnodes[i].start -
458 e820_hole_size(physnodes[i].start, end) < size) {
385 end += FAKE_NODE_MIN_SIZE; 459 end += FAKE_NODE_MIN_SIZE;
386 if (end > max_addr) { 460 if (end > physnodes[i].end) {
387 end = max_addr; 461 end = physnodes[i].end;
388 break; 462 break;
389 } 463 }
390 } 464 }
391 if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) 465
392 break; 466 /*
467 * If there won't be at least FAKE_NODE_MIN_SIZE of
468 * non-reserved memory in ZONE_DMA32 for the next node,
469 * this one must extend to the boundary.
470 */
471 if (end < dma32_end && dma32_end - end -
472 e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
473 end = dma32_end;
474
475 /*
476 * If there won't be enough non-reserved memory for the
477 * next node, this one must extend to the end of the
478 * physical node.
479 */
480 if (physnodes[i].end - end -
481 e820_hole_size(end, physnodes[i].end) < size)
482 end = physnodes[i].end;
483
484 /*
485 * Avoid allocating more nodes than requested, which can
486 * happen as a result of rounding down each node's size
487 * to FAKE_NODE_MIN_SIZE.
488 */
489 if (nodes_weight(physnode_mask) + ret >= nr_nodes)
490 end = physnodes[i].end;
491
492 if (setup_node_range(ret++, &physnodes[i].start,
493 end - physnodes[i].start,
494 physnodes[i].end) < 0)
495 node_clear(i, physnode_mask);
496 }
393 } 497 }
394 return i - node_start + 1; 498 return ret;
395} 499}
396 500
397/* 501/*
398 * Splits the remaining system RAM into chunks of size. The remaining memory is 502 * Returns the end address of a node so that there is at least `size' amount of
399 * always assigned to a final node and can be asymmetric. Returns the number of 503 * non-reserved memory or `max_addr' is reached.
400 * nodes split.
401 */ 504 */
402static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, 505static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
403 u64 max_addr, int node_start, u64 size)
404{ 506{
405 int i = node_start; 507 u64 end = start + size;
406 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; 508
407 while (!setup_node_range(i++, nodes, addr, size, max_addr)) 509 while (end - start - e820_hole_size(start, end) < size) {
408 ; 510 end += FAKE_NODE_MIN_SIZE;
409 return i - node_start; 511 if (end > max_addr) {
512 end = max_addr;
513 break;
514 }
515 }
516 return end;
410} 517}
411 518
412/* 519/*
413 * Sets up the system RAM area from start_pfn to last_pfn according to the 520 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
414 * numa=fake command-line option. 521 * `addr' to `max_addr'. The return value is the number of nodes allocated.
415 */ 522 */
416static struct bootnode nodes[MAX_NUMNODES] __initdata; 523static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
417
418static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn)
419{ 524{
420 u64 size, addr = start_pfn << PAGE_SHIFT; 525 nodemask_t physnode_mask = NODE_MASK_NONE;
421 u64 max_addr = last_pfn << PAGE_SHIFT; 526 u64 min_size;
422 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; 527 int ret = 0;
528 int i;
423 529
424 memset(&nodes, 0, sizeof(nodes)); 530 if (!size)
531 return -1;
425 /* 532 /*
426 * If the numa=fake command-line is just a single number N, split the 533 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
427 * system RAM into N fake nodes. 534 * increased accordingly if the requested size is too small. This
535 * creates a uniform distribution of node sizes across the entire
536 * machine (but not necessarily over physical nodes).
428 */ 537 */
429 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { 538 min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) /
430 long n = simple_strtol(cmdline, NULL, 0); 539 MAX_NUMNODES;
431 540 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
432 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); 541 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
433 if (num_nodes < 0) 542 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
434 return num_nodes; 543 FAKE_NODE_MIN_HASH_MASK;
435 goto out; 544 if (size < min_size) {
545 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
546 size >> 20, min_size >> 20);
547 size = min_size;
436 } 548 }
549 size &= FAKE_NODE_MIN_HASH_MASK;
437 550
438 /* Parse the command line. */ 551 for (i = 0; i < MAX_NUMNODES; i++)
439 for (coeff_flag = 0; ; cmdline++) { 552 if (physnodes[i].start != physnodes[i].end)
440 if (*cmdline && isdigit(*cmdline)) { 553 node_set(i, physnode_mask);
441 num = num * 10 + *cmdline - '0'; 554 /*
442 continue; 555 * Fill physical nodes with fake nodes of size until there is no memory
443 } 556 * left on any of them.
444 if (*cmdline == '*') { 557 */
445 if (num > 0) 558 while (nodes_weight(physnode_mask)) {
446 coeff = num; 559 for_each_node_mask(i, physnode_mask) {
447 coeff_flag = 1; 560 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
448 } 561 u64 end;
449 if (!*cmdline || *cmdline == ',') { 562
450 if (!coeff_flag) 563 end = find_end_of_node(physnodes[i].start,
451 coeff = 1; 564 physnodes[i].end, size);
452 /* 565 /*
453 * Round down to the nearest FAKE_NODE_MIN_SIZE. 566 * If there won't be at least FAKE_NODE_MIN_SIZE of
454 * Command-line coefficients are in megabytes. 567 * non-reserved memory in ZONE_DMA32 for the next node,
568 * this one must extend to the boundary.
455 */ 569 */
456 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; 570 if (end < dma32_end && dma32_end - end -
457 if (size) 571 e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
458 for (i = 0; i < coeff; i++, num_nodes++) 572 end = dma32_end;
459 if (setup_node_range(num_nodes, nodes, 573
460 &addr, size, max_addr) < 0) 574 /*
461 goto done; 575 * If there won't be enough non-reserved memory for the
462 if (!*cmdline) 576 * next node, this one must extend to the end of the
463 break; 577 * physical node.
464 coeff_flag = 0; 578 */
465 coeff = -1; 579 if (physnodes[i].end - end -
580 e820_hole_size(end, physnodes[i].end) < size)
581 end = physnodes[i].end;
582
583 /*
584 * Setup the fake node that will be allocated as bootmem
585 * later. If setup_node_range() returns non-zero, there
586 * is no more memory available on this physical node.
587 */
588 if (setup_node_range(ret++, &physnodes[i].start,
589 end - physnodes[i].start,
590 physnodes[i].end) < 0)
591 node_clear(i, physnode_mask);
466 } 592 }
467 num = 0;
468 } 593 }
469done: 594 return ret;
470 if (!num_nodes) 595}
471 return -1; 596
472 /* Fill remainder of system RAM, if appropriate. */ 597/*
473 if (addr < max_addr) { 598 * Sets up the system RAM area from start_pfn to last_pfn according to the
474 if (coeff_flag && coeff < 0) { 599 * numa=fake command-line option.
475 /* Split remaining nodes into num-sized chunks */ 600 */
476 num_nodes += split_nodes_by_size(nodes, &addr, max_addr, 601static int __init numa_emulation(unsigned long start_pfn,
477 num_nodes, num); 602 unsigned long last_pfn, int acpi, int k8)
478 goto out; 603{
479 } 604 u64 addr = start_pfn << PAGE_SHIFT;
480 switch (*(cmdline - 1)) { 605 u64 max_addr = last_pfn << PAGE_SHIFT;
481 case '*': 606 int num_phys_nodes;
482 /* Split remaining nodes into coeff chunks */ 607 int num_nodes;
483 if (coeff <= 0) 608 int i;
484 break; 609
485 num_nodes += split_nodes_equally(nodes, &addr, max_addr, 610 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
486 num_nodes, coeff); 611 /*
487 break; 612 * If the numa=fake command-line contains a 'M' or 'G', it represents
488 case ',': 613 * the fixed node size. Otherwise, if it is just a single number N,
489 /* Do not allocate remaining system RAM */ 614 * split the system RAM into N fake nodes.
490 break; 615 */
491 default: 616 if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
492 /* Give one final node */ 617 u64 size;
493 setup_node_range(num_nodes, nodes, &addr, 618
494 max_addr - addr, max_addr); 619 size = memparse(cmdline, &cmdline);
495 num_nodes++; 620 num_nodes = split_nodes_size_interleave(addr, max_addr, size);
496 } 621 } else {
622 unsigned long n;
623
624 n = simple_strtoul(cmdline, NULL, 0);
625 num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
497 } 626 }
498out: 627
628 if (num_nodes < 0)
629 return num_nodes;
499 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 630 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
500 if (memnode_shift < 0) { 631 if (memnode_shift < 0) {
501 memnode_shift = 0; 632 memnode_shift = 0;
@@ -505,14 +636,10 @@ out:
505 } 636 }
506 637
507 /* 638 /*
508 * We need to vacate all active ranges that may have been registered by 639 * We need to vacate all active ranges that may have been registered for
509 * SRAT and set acpi_numa to -1 so that srat_disabled() always returns 640 * the e820 memory map.
510 * true. NUMA emulation has succeeded so we will not scan ACPI nodes.
511 */ 641 */
512 remove_all_active_ranges(); 642 remove_all_active_ranges();
513#ifdef CONFIG_ACPI_NUMA
514 acpi_numa = -1;
515#endif
516 for_each_node_mask(i, node_possible_map) { 643 for_each_node_mask(i, node_possible_map) {
517 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 644 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
518 nodes[i].end >> PAGE_SHIFT); 645 nodes[i].end >> PAGE_SHIFT);
@@ -524,7 +651,8 @@ out:
524} 651}
525#endif /* CONFIG_NUMA_EMU */ 652#endif /* CONFIG_NUMA_EMU */
526 653
527void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) 654void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
655 int acpi, int k8)
528{ 656{
529 int i; 657 int i;
530 658
@@ -532,23 +660,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
532 nodes_clear(node_online_map); 660 nodes_clear(node_online_map);
533 661
534#ifdef CONFIG_NUMA_EMU 662#ifdef CONFIG_NUMA_EMU
535 if (cmdline && !numa_emulation(start_pfn, last_pfn)) 663 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
536 return; 664 return;
537 nodes_clear(node_possible_map); 665 nodes_clear(node_possible_map);
538 nodes_clear(node_online_map); 666 nodes_clear(node_online_map);
539#endif 667#endif
540 668
541#ifdef CONFIG_ACPI_NUMA 669#ifdef CONFIG_ACPI_NUMA
542 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 670 if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
543 last_pfn << PAGE_SHIFT)) 671 last_pfn << PAGE_SHIFT))
544 return; 672 return;
545 nodes_clear(node_possible_map); 673 nodes_clear(node_possible_map);
546 nodes_clear(node_online_map); 674 nodes_clear(node_online_map);
547#endif 675#endif
548 676
549#ifdef CONFIG_K8_NUMA 677#ifdef CONFIG_K8_NUMA
550 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, 678 if (!numa_off && k8 && !k8_scan_nodes())
551 last_pfn<<PAGE_SHIFT))
552 return; 679 return;
553 nodes_clear(node_possible_map); 680 nodes_clear(node_possible_map);
554 nodes_clear(node_online_map); 681 nodes_clear(node_online_map);
@@ -579,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void)
579 for_each_online_node(i) 706 for_each_online_node(i)
580 pages += free_all_bootmem_node(NODE_DATA(i)); 707 pages += free_all_bootmem_node(NODE_DATA(i));
581 708
709#ifdef CONFIG_NO_BOOTMEM
710 pages += free_all_memory_core_early(MAX_NUMNODES);
711#endif
712
582 return pages; 713 return pages;
583} 714}
584 715
@@ -601,6 +732,25 @@ static __init int numa_setup(char *opt)
601early_param("numa", numa_setup); 732early_param("numa", numa_setup);
602 733
603#ifdef CONFIG_NUMA 734#ifdef CONFIG_NUMA
735
736static __init int find_near_online_node(int node)
737{
738 int n, val;
739 int min_val = INT_MAX;
740 int best_node = -1;
741
742 for_each_online_node(n) {
743 val = node_distance(node, n);
744
745 if (val < min_val) {
746 min_val = val;
747 best_node = n;
748 }
749 }
750
751 return best_node;
752}
753
604/* 754/*
605 * Setup early cpu_to_node. 755 * Setup early cpu_to_node.
606 * 756 *
@@ -632,7 +782,7 @@ void __init init_cpu_to_node(void)
632 if (node == NUMA_NO_NODE) 782 if (node == NUMA_NO_NODE)
633 continue; 783 continue;
634 if (!node_online(node)) 784 if (!node_online(node))
635 continue; 785 node = find_near_online_node(node);
636 numa_set_node(cpu, node); 786 numa_set_node(cpu, node);
637 } 787 }
638} 788}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dd38bfbefd1f..28195c350b97 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -6,13 +6,13 @@
6#include <linux/bootmem.h> 6#include <linux/bootmem.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <linux/mm.h> 9#include <linux/mm.h>
11#include <linux/interrupt.h> 10#include <linux/interrupt.h>
12#include <linux/seq_file.h> 11#include <linux/seq_file.h>
13#include <linux/debugfs.h> 12#include <linux/debugfs.h>
14#include <linux/pfn.h> 13#include <linux/pfn.h>
15#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <linux/gfp.h>
16 16
17#include <asm/e820.h> 17#include <asm/e820.h>
18#include <asm/processor.h> 18#include <asm/processor.h>
@@ -279,6 +279,43 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
279 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 279 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
280 pgprot_val(forbidden) |= _PAGE_RW; 280 pgprot_val(forbidden) |= _PAGE_RW;
281 281
282#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
283 /*
284 * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
285 * kernel text mappings for the large page aligned text, rodata sections
286 * will be always read-only. For the kernel identity mappings covering
287 * the holes caused by this alignment can be anything that user asks.
288 *
289 * This will preserve the large page mappings for kernel text/data
290 * at no extra cost.
291 */
292 if (kernel_set_to_readonly &&
293 within(address, (unsigned long)_text,
294 (unsigned long)__end_rodata_hpage_align)) {
295 unsigned int level;
296
297 /*
298 * Don't enforce the !RW mapping for the kernel text mapping,
299 * if the current mapping is already using small page mapping.
300 * No need to work hard to preserve large page mappings in this
301 * case.
302 *
303 * This also fixes the Linux Xen paravirt guest boot failure
304 * (because of unexpected read-only mappings for kernel identity
305 * mappings). In this paravirt guest case, the kernel text
306 * mapping and the kernel identity mapping share the same
307 * page-table pages. Thus we can't really use different
308 * protections for the kernel text and identity mappings. Also,
309 * these shared mappings are made of small page mappings.
310 * Thus this don't enforce !RW mapping for small page kernel
311 * text mapping logic will help Linux Xen parvirt guest boot
312 * aswell.
313 */
314 if (lookup_address(address, &level) && (level != PG_LEVEL_4K))
315 pgprot_val(forbidden) |= _PAGE_RW;
316 }
317#endif
318
282 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 319 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
283 320
284 return prot; 321 return prot;
@@ -1069,12 +1106,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
1069 1106
1070int set_memory_x(unsigned long addr, int numpages) 1107int set_memory_x(unsigned long addr, int numpages)
1071{ 1108{
1109 if (!(__supported_pte_mask & _PAGE_NX))
1110 return 0;
1111
1072 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); 1112 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
1073} 1113}
1074EXPORT_SYMBOL(set_memory_x); 1114EXPORT_SYMBOL(set_memory_x);
1075 1115
1076int set_memory_nx(unsigned long addr, int numpages) 1116int set_memory_nx(unsigned long addr, int numpages)
1077{ 1117{
1118 if (!(__supported_pte_mask & _PAGE_NX))
1119 return 0;
1120
1078 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); 1121 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
1079} 1122}
1080EXPORT_SYMBOL(set_memory_nx); 1123EXPORT_SYMBOL(set_memory_nx);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e78cd0ec2bcf..edc8b95afc1a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -12,7 +12,7 @@
12#include <linux/debugfs.h> 12#include <linux/debugfs.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/gfp.h> 15#include <linux/slab.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/rbtree.h> 18#include <linux/rbtree.h>
@@ -20,6 +20,7 @@
20#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
21#include <asm/processor.h> 21#include <asm/processor.h>
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23#include <asm/x86_init.h>
23#include <asm/pgtable.h> 24#include <asm/pgtable.h>
24#include <asm/fcntl.h> 25#include <asm/fcntl.h>
25#include <asm/e820.h> 26#include <asm/e820.h>
@@ -355,9 +356,6 @@ static int free_ram_pages_type(u64 start, u64 end)
355 * - _PAGE_CACHE_UC_MINUS 356 * - _PAGE_CACHE_UC_MINUS
356 * - _PAGE_CACHE_UC 357 * - _PAGE_CACHE_UC
357 * 358 *
358 * req_type will have a special case value '-1', when requester want to inherit
359 * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
360 *
361 * If new_type is NULL, function will return an error if it cannot reserve the 359 * If new_type is NULL, function will return an error if it cannot reserve the
362 * region with req_type. If new_type is non-NULL, function will return 360 * region with req_type. If new_type is non-NULL, function will return
363 * available type in new_type in case of no error. In case of any error 361 * available type in new_type in case of no error. In case of any error
@@ -377,9 +375,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
377 if (!pat_enabled) { 375 if (!pat_enabled) {
378 /* This is identical to page table setting without PAT */ 376 /* This is identical to page table setting without PAT */
379 if (new_type) { 377 if (new_type) {
380 if (req_type == -1) 378 if (req_type == _PAGE_CACHE_WC)
381 *new_type = _PAGE_CACHE_WB;
382 else if (req_type == _PAGE_CACHE_WC)
383 *new_type = _PAGE_CACHE_UC_MINUS; 379 *new_type = _PAGE_CACHE_UC_MINUS;
384 else 380 else
385 *new_type = req_type & _PAGE_CACHE_MASK; 381 *new_type = req_type & _PAGE_CACHE_MASK;
@@ -388,7 +384,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
388 } 384 }
389 385
390 /* Low ISA region is always mapped WB in page table. No need to track */ 386 /* Low ISA region is always mapped WB in page table. No need to track */
391 if (is_ISA_range(start, end - 1)) { 387 if (x86_platform.is_untracked_pat_range(start, end)) {
392 if (new_type) 388 if (new_type)
393 *new_type = _PAGE_CACHE_WB; 389 *new_type = _PAGE_CACHE_WB;
394 return 0; 390 return 0;
@@ -499,7 +495,7 @@ int free_memtype(u64 start, u64 end)
499 return 0; 495 return 0;
500 496
501 /* Low ISA region is always mapped WB. No need to track */ 497 /* Low ISA region is always mapped WB. No need to track */
502 if (is_ISA_range(start, end - 1)) 498 if (x86_platform.is_untracked_pat_range(start, end))
503 return 0; 499 return 0;
504 500
505 is_range_ram = pat_pagerange_is_ram(start, end); 501 is_range_ram = pat_pagerange_is_ram(start, end);
@@ -582,7 +578,7 @@ static unsigned long lookup_memtype(u64 paddr)
582 int rettype = _PAGE_CACHE_WB; 578 int rettype = _PAGE_CACHE_WB;
583 struct memtype *entry; 579 struct memtype *entry;
584 580
585 if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) 581 if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
586 return rettype; 582 return rettype;
587 583
588 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { 584 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
@@ -708,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
708 if (!range_is_allowed(pfn, size)) 704 if (!range_is_allowed(pfn, size))
709 return 0; 705 return 0;
710 706
711 if (file->f_flags & O_SYNC) { 707 if (file->f_flags & O_DSYNC)
712 flags = _PAGE_CACHE_UC_MINUS; 708 flags = _PAGE_CACHE_UC_MINUS;
713 }
714 709
715#ifdef CONFIG_X86_32 710#ifdef CONFIG_X86_32
716 /* 711 /*
@@ -1018,8 +1013,10 @@ static const struct file_operations memtype_fops = {
1018 1013
1019static int __init pat_memtype_list_init(void) 1014static int __init pat_memtype_list_init(void)
1020{ 1015{
1021 debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, 1016 if (pat_enabled) {
1022 NULL, &memtype_fops); 1017 debugfs_create_file("pat_memtype_list", S_IRUSR,
1018 arch_debugfs_dir, NULL, &memtype_fops);
1019 }
1023 return 0; 1020 return 0;
1024} 1021}
1025 1022
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ed34f5e35999..5c4ee422590e 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,4 +1,5 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/gfp.h>
2#include <asm/pgalloc.h> 3#include <asm/pgalloc.h>
3#include <asm/pgtable.h> 4#include <asm/pgtable.h>
4#include <asm/tlb.h> 5#include <asm/tlb.h>
@@ -6,6 +7,14 @@
6 7
7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO 8#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
8 9
10#ifdef CONFIG_HIGHPTE
11#define PGALLOC_USER_GFP __GFP_HIGHMEM
12#else
13#define PGALLOC_USER_GFP 0
14#endif
15
16gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
17
9pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 18pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
10{ 19{
11 return (pte_t *)__get_free_page(PGALLOC_GFP); 20 return (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -15,16 +24,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
15{ 24{
16 struct page *pte; 25 struct page *pte;
17 26
18#ifdef CONFIG_HIGHPTE 27 pte = alloc_pages(__userpte_alloc_gfp, 0);
19 pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
20#else
21 pte = alloc_pages(PGALLOC_GFP, 0);
22#endif
23 if (pte) 28 if (pte)
24 pgtable_page_ctor(pte); 29 pgtable_page_ctor(pte);
25 return pte; 30 return pte;
26} 31}
27 32
33static int __init setup_userpte(char *arg)
34{
35 if (!arg)
36 return -EINVAL;
37
38 /*
39 * "userpte=nohigh" disables allocation of user pagetables in
40 * high memory.
41 */
42 if (strcmp(arg, "nohigh") == 0)
43 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
44 else
45 return -EINVAL;
46 return 0;
47}
48early_param("userpte", setup_userpte);
49
28void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 50void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
29{ 51{
30 pgtable_page_dtor(pte); 52 pgtable_page_dtor(pte);
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 46c8834aedc0..792854003ed3 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -6,7 +6,6 @@
6#include <linux/swap.h> 6#include <linux/swap.h>
7#include <linux/smp.h> 7#include <linux/smp.h>
8#include <linux/highmem.h> 8#include <linux/highmem.h>
9#include <linux/slab.h>
10#include <linux/pagemap.h> 9#include <linux/pagemap.h>
11#include <linux/spinlock.h> 10#include <linux/spinlock.h>
12#include <linux/module.h> 11#include <linux/module.h>
@@ -19,6 +18,7 @@
19#include <asm/e820.h> 18#include <asm/e820.h>
20#include <asm/tlb.h> 19#include <asm/tlb.h>
21#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm/io.h>
22 22
23unsigned int __VMALLOC_RESERVE = 128 << 20; 23unsigned int __VMALLOC_RESERVE = 128 << 20;
24 24
@@ -129,6 +129,7 @@ static int __init parse_reservetop(char *arg)
129 129
130 address = memparse(arg, &arg); 130 address = memparse(arg, &arg);
131 reserve_top_address(address); 131 reserve_top_address(address);
132 fixup_early_ioremap();
132 return 0; 133 return 0;
133} 134}
134early_param("reservetop", parse_reservetop); 135early_param("reservetop", parse_reservetop);
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 513d8ed5d2ec..a3250aa34086 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -3,10 +3,8 @@
3#include <linux/init.h> 3#include <linux/init.h>
4 4
5#include <asm/pgtable.h> 5#include <asm/pgtable.h>
6#include <asm/proto.h>
6 7
7int nx_enabled;
8
9#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
10static int disable_nx __cpuinitdata; 8static int disable_nx __cpuinitdata;
11 9
12/* 10/*
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str)
22 if (!str) 20 if (!str)
23 return -EINVAL; 21 return -EINVAL;
24 if (!strncmp(str, "on", 2)) { 22 if (!strncmp(str, "on", 2)) {
25 __supported_pte_mask |= _PAGE_NX;
26 disable_nx = 0; 23 disable_nx = 0;
27 } else if (!strncmp(str, "off", 3)) { 24 } else if (!strncmp(str, "off", 3)) {
28 disable_nx = 1; 25 disable_nx = 1;
29 __supported_pte_mask &= ~_PAGE_NX;
30 } 26 }
27 x86_configure_nx();
31 return 0; 28 return 0;
32} 29}
33early_param("noexec", noexec_setup); 30early_param("noexec", noexec_setup);
34#endif
35 31
36#ifdef CONFIG_X86_PAE 32void __cpuinit x86_configure_nx(void)
37void __init set_nx(void)
38{ 33{
39 unsigned int v[4], l, h; 34 if (cpu_has_nx && !disable_nx)
40 35 __supported_pte_mask |= _PAGE_NX;
41 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { 36 else
42 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); 37 __supported_pte_mask &= ~_PAGE_NX;
38}
43 39
44 if ((v[3] & (1 << 20)) && !disable_nx) { 40void __init x86_report_nx(void)
45 rdmsr(MSR_EFER, l, h); 41{
46 l |= EFER_NX; 42 if (!cpu_has_nx) {
47 wrmsr(MSR_EFER, l, h); 43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
48 nx_enabled = 1; 44 "missing in CPU or disabled in BIOS!\n");
49 __supported_pte_mask |= _PAGE_NX; 45 } else {
46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
47 if (disable_nx) {
48 printk(KERN_INFO "NX (Execute Disable) protection: "
49 "disabled by kernel command line option\n");
50 } else {
51 printk(KERN_INFO "NX (Execute Disable) protection: "
52 "active\n");
50 } 53 }
51 }
52}
53#else 54#else
54void set_nx(void) 55 /* 32bit non-PAE kernel, NX cannot be used */
55{ 56 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
56} 57 "cannot be enabled: non-PAE kernel!\n");
57#endif 58#endif
58 59 }
59#ifdef CONFIG_X86_64
60void __cpuinit check_efer(void)
61{
62 unsigned long efer;
63
64 rdmsrl(MSR_EFER, efer);
65 if (!(efer & EFER_NX) || disable_nx)
66 __supported_pte_mask &= ~_PAGE_NX;
67} 60}
68#endif
69
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6f8aa33031c7..9324f13492d5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void)
267 e820_register_active_regions(chunk->nid, chunk->start_pfn, 267 e820_register_active_regions(chunk->nid, chunk->start_pfn,
268 min(chunk->end_pfn, max_pfn)); 268 min(chunk->end_pfn, max_pfn));
269 } 269 }
270 /* for out of order entries in SRAT */
271 sort_node_map();
270 272
271 for_each_online_node(nid) { 273 for_each_online_node(nid) {
272 unsigned long start = node_start_pfn[nid]; 274 unsigned long start = node_start_pfn[nid];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index dbb5381f7b3b..38512d0c4742 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
136 apicid_to_node[apic_id] = node; 136 apicid_to_node[apic_id] = node;
137 node_set(node, cpu_nodes_parsed); 137 node_set(node, cpu_nodes_parsed);
138 acpi_numa = 1; 138 acpi_numa = 1;
139 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", 139 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
140 pxm, apic_id, node); 140 pxm, apic_id, node);
141} 141}
142 142
@@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
170 apicid_to_node[apic_id] = node; 170 apicid_to_node[apic_id] = node;
171 node_set(node, cpu_nodes_parsed); 171 node_set(node, cpu_nodes_parsed);
172 acpi_numa = 1; 172 acpi_numa = 1;
173 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", 173 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
174 pxm, apic_id, node); 174 pxm, apic_id, node);
175} 175}
176 176
@@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
229 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); 229 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
230 } 230 }
231 231
232 if (changed) 232 if (changed) {
233 node_set(node, cpu_nodes_parsed);
233 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", 234 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
234 nd->start, nd->end); 235 nd->start, nd->end);
236 }
235} 237}
236 238
237/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 239/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -290,8 +292,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
290 292
291 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, 293 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
292 start, end); 294 start, end);
293 e820_register_active_regions(node, start >> PAGE_SHIFT,
294 end >> PAGE_SHIFT);
295 295
296 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { 296 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
297 update_nodes_add(node, start, end); 297 update_nodes_add(node, start, end);
@@ -319,7 +319,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
319 unsigned long s = nodes[i].start >> PAGE_SHIFT; 319 unsigned long s = nodes[i].start >> PAGE_SHIFT;
320 unsigned long e = nodes[i].end >> PAGE_SHIFT; 320 unsigned long e = nodes[i].end >> PAGE_SHIFT;
321 pxmram += e - s; 321 pxmram += e - s;
322 pxmram -= absent_pages_in_range(s, e); 322 pxmram -= __absent_pages_in_range(i, s, e);
323 if ((long)pxmram < 0) 323 if ((long)pxmram < 0)
324 pxmram = 0; 324 pxmram = 0;
325 } 325 }
@@ -338,6 +338,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
338 338
339void __init acpi_numa_arch_fixup(void) {} 339void __init acpi_numa_arch_fixup(void) {}
340 340
341int __init acpi_get_nodes(struct bootnode *physnodes)
342{
343 int i;
344 int ret = 0;
345
346 for_each_node_mask(i, nodes_parsed) {
347 physnodes[ret].start = nodes[i].start;
348 physnodes[ret].end = nodes[i].end;
349 ret++;
350 }
351 return ret;
352}
353
341/* Use the information discovered above to actually set up the nodes. */ 354/* Use the information discovered above to actually set up the nodes. */
342int __init acpi_scan_nodes(unsigned long start, unsigned long end) 355int __init acpi_scan_nodes(unsigned long start, unsigned long end)
343{ 356{
@@ -350,11 +363,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
350 for (i = 0; i < MAX_NUMNODES; i++) 363 for (i = 0; i < MAX_NUMNODES; i++)
351 cutoff_node(i, start, end); 364 cutoff_node(i, start, end);
352 365
353 if (!nodes_cover_memory(nodes)) {
354 bad_srat();
355 return -1;
356 }
357
358 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, 366 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
359 memblk_nodeid); 367 memblk_nodeid);
360 if (memnode_shift < 0) { 368 if (memnode_shift < 0) {
@@ -364,6 +372,16 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
364 return -1; 372 return -1;
365 } 373 }
366 374
375 for_each_node_mask(i, nodes_parsed)
376 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
377 nodes[i].end >> PAGE_SHIFT);
378 /* for out of order entries in SRAT */
379 sort_node_map();
380 if (!nodes_cover_memory(nodes)) {
381 bad_srat();
382 return -1;
383 }
384
367 /* Account for nodes with cpus and no memory */ 385 /* Account for nodes with cpus and no memory */
368 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); 386 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
369 387
@@ -443,7 +461,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
443 * node, it must now point to the fake node ID. 461 * node, it must now point to the fake node ID.
444 */ 462 */
445 for (j = 0; j < MAX_LOCAL_APIC; j++) 463 for (j = 0; j < MAX_LOCAL_APIC; j++)
446 if (apicid_to_node[j] == nid) 464 if (apicid_to_node[j] == nid &&
465 fake_apicid_to_node[j] == NUMA_NO_NODE)
447 fake_apicid_to_node[j] = i; 466 fake_apicid_to_node[j] = i;
448 } 467 }
449 for (i = 0; i < num_nodes; i++) 468 for (i = 0; i < num_nodes; i++)
@@ -454,7 +473,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
454 for (i = 0; i < num_nodes; i++) 473 for (i = 0; i < num_nodes; i++)
455 if (fake_nodes[i].start != fake_nodes[i].end) 474 if (fake_nodes[i].start != fake_nodes[i].end)
456 node_set(i, nodes_parsed); 475 node_set(i, nodes_parsed);
457 WARN_ON(!nodes_cover_memory(fake_nodes));
458} 476}
459 477
460static int null_slit_node_compare(int a, int b) 478static int null_slit_node_compare(int a, int b)
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 427fd1b56df5..8565d944f7cf 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,12 +1,13 @@
1/* 1/*
2 * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> 2 * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
3 */ 3 */
4
5#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6
4#include <linux/module.h> 7#include <linux/module.h>
5#include <linux/io.h> 8#include <linux/io.h>
6#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
7 10
8#define MODULE_NAME "testmmiotrace"
9
10static unsigned long mmio_address; 11static unsigned long mmio_address;
11module_param(mmio_address, ulong, 0); 12module_param(mmio_address, ulong, 0);
12MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " 13MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i)
30static void do_write_test(void __iomem *p) 31static void do_write_test(void __iomem *p)
31{ 32{
32 unsigned int i; 33 unsigned int i;
33 pr_info(MODULE_NAME ": write test.\n"); 34 pr_info("write test.\n");
34 mmiotrace_printk("Write test.\n"); 35 mmiotrace_printk("Write test.\n");
35 36
36 for (i = 0; i < 256; i++) 37 for (i = 0; i < 256; i++)
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p)
47{ 48{
48 unsigned int i; 49 unsigned int i;
49 unsigned errs[3] = { 0 }; 50 unsigned errs[3] = { 0 };
50 pr_info(MODULE_NAME ": read test.\n"); 51 pr_info("read test.\n");
51 mmiotrace_printk("Read test.\n"); 52 mmiotrace_printk("Read test.\n");
52 53
53 for (i = 0; i < 256; i++) 54 for (i = 0; i < 256; i++)
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p)
68 69
69static void do_read_far_test(void __iomem *p) 70static void do_read_far_test(void __iomem *p)
70{ 71{
71 pr_info(MODULE_NAME ": read far test.\n"); 72 pr_info("read far test.\n");
72 mmiotrace_printk("Read far test.\n"); 73 mmiotrace_printk("Read far test.\n");
73 74
74 ioread32(p + read_far); 75 ioread32(p + read_far);
@@ -78,7 +79,7 @@ static void do_test(unsigned long size)
78{ 79{
79 void __iomem *p = ioremap_nocache(mmio_address, size); 80 void __iomem *p = ioremap_nocache(mmio_address, size);
80 if (!p) { 81 if (!p) {
81 pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); 82 pr_err("could not ioremap, aborting.\n");
82 return; 83 return;
83 } 84 }
84 mmiotrace_printk("ioremap returned %p.\n", p); 85 mmiotrace_printk("ioremap returned %p.\n", p);
@@ -94,24 +95,22 @@ static int __init init(void)
94 unsigned long size = (read_far) ? (8 << 20) : (16 << 10); 95 unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
95 96
96 if (mmio_address == 0) { 97 if (mmio_address == 0) {
97 pr_err(MODULE_NAME ": you have to use the module argument " 98 pr_err("you have to use the module argument mmio_address.\n");
98 "mmio_address.\n"); 99 pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n");
99 pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
100 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
101 return -ENXIO; 100 return -ENXIO;
102 } 101 }
103 102
104 pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " 103 pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
105 "address space, and writing 16 kB of rubbish in there.\n", 104 "and writing 16 kB of rubbish in there.\n",
106 size >> 10, mmio_address); 105 size >> 10, mmio_address);
107 do_test(size); 106 do_test(size);
108 pr_info(MODULE_NAME ": All done.\n"); 107 pr_info("All done.\n");
109 return 0; 108 return 0;
110} 109}
111 110
112static void __exit cleanup(void) 111static void __exit cleanup(void)
113{ 112{
114 pr_debug(MODULE_NAME ": unloaded.\n"); 113 pr_debug("unloaded.\n");
115} 114}
116 115
117module_init(init); 116module_init(init);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 36fe08eeb5c3..426f3a1a64d3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -8,6 +8,7 @@
8 8
9#include <asm/tlbflush.h> 9#include <asm/tlbflush.h>
10#include <asm/mmu_context.h> 10#include <asm/mmu_context.h>
11#include <asm/cache.h>
11#include <asm/apic.h> 12#include <asm/apic.h>
12#include <asm/uv/uv.h> 13#include <asm/uv/uv.h>
13 14
@@ -40,10 +41,10 @@ union smp_flush_state {
40 struct { 41 struct {
41 struct mm_struct *flush_mm; 42 struct mm_struct *flush_mm;
42 unsigned long flush_va; 43 unsigned long flush_va;
43 spinlock_t tlbstate_lock; 44 raw_spinlock_t tlbstate_lock;
44 DECLARE_BITMAP(flush_cpumask, NR_CPUS); 45 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
45 }; 46 };
46 char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; 47 char pad[INTERNODE_CACHE_BYTES];
47} ____cacheline_internodealigned_in_smp; 48} ____cacheline_internodealigned_in_smp;
48 49
49/* State is put into the per CPU data section, but padded 50/* State is put into the per CPU data section, but padded
@@ -180,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
180 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is 181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
181 * probably not worth checking this for a cache-hot lock. 182 * probably not worth checking this for a cache-hot lock.
182 */ 183 */
183 spin_lock(&f->tlbstate_lock); 184 raw_spin_lock(&f->tlbstate_lock);
184 185
185 f->flush_mm = mm; 186 f->flush_mm = mm;
186 f->flush_va = va; 187 f->flush_va = va;
@@ -198,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
198 199
199 f->flush_mm = NULL; 200 f->flush_mm = NULL;
200 f->flush_va = 0; 201 f->flush_va = 0;
201 spin_unlock(&f->tlbstate_lock); 202 raw_spin_unlock(&f->tlbstate_lock);
202} 203}
203 204
204void native_flush_tlb_others(const struct cpumask *cpumask, 205void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -222,7 +223,7 @@ static int __cpuinit init_smp_flush(void)
222 int i; 223 int i;
223 224
224 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 225 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
225 spin_lock_init(&flush_state[i].tlbstate_lock); 226 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
226 227
227 return 0; 228 return 0;
228} 229}