diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
commit | ada47b5fe13d89735805b566185f4885f5a3f750 (patch) | |
tree | 644b88f8a71896307d71438e9b3af49126ffb22b /arch/x86/mm | |
parent | 43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff) | |
parent | 3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff) |
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/extable.c | 31 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 44 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 30 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 64 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 81 | ||||
-rw-r--r-- | arch/x86/mm/k8topology_64.c | 101 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/error.c | 19 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/kmemcheck.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/shadow.c | 16 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/shadow.h | 2 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 58 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/mmio-mod.c | 72 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 506 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 45 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 25 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 32 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/setup_nx.c | 59 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 44 | ||||
-rw-r--r-- | arch/x86/mm/testmmiotrace.c | 29 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 11 |
27 files changed, 775 insertions, 528 deletions
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca3b5a2..d0474ad2a6e5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
@@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) | |||
35 | 35 | ||
36 | return 0; | 36 | return 0; |
37 | } | 37 | } |
38 | |||
39 | #ifdef CONFIG_X86_64 | ||
40 | /* | ||
41 | * Need to defined our own search_extable on X86_64 to work around | ||
42 | * a B stepping K8 bug. | ||
43 | */ | ||
44 | const struct exception_table_entry * | ||
45 | search_extable(const struct exception_table_entry *first, | ||
46 | const struct exception_table_entry *last, | ||
47 | unsigned long value) | ||
48 | { | ||
49 | /* B stepping K8 bug */ | ||
50 | if ((value >> 32) == 0) | ||
51 | value |= 0xffffffffUL << 32; | ||
52 | |||
53 | while (first <= last) { | ||
54 | const struct exception_table_entry *mid; | ||
55 | long diff; | ||
56 | |||
57 | mid = (last - first) / 2 + first; | ||
58 | diff = mid->insn - value; | ||
59 | if (diff == 0) | ||
60 | return mid; | ||
61 | else if (diff < 0) | ||
62 | first = mid+1; | ||
63 | else | ||
64 | last = mid-1; | ||
65 | } | ||
66 | return NULL; | ||
67 | } | ||
68 | #endif | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf0..f62777940dfb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -38,7 +38,8 @@ enum x86_pf_error_code { | |||
38 | * Returns 0 if mmiotrace is disabled, or if the fault is not | 38 | * Returns 0 if mmiotrace is disabled, or if the fault is not |
39 | * handled by mmiotrace: | 39 | * handled by mmiotrace: |
40 | */ | 40 | */ |
41 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | 41 | static inline int __kprobes |
42 | kmmio_fault(struct pt_regs *regs, unsigned long addr) | ||
42 | { | 43 | { |
43 | if (unlikely(is_kmmio_active())) | 44 | if (unlikely(is_kmmio_active())) |
44 | if (kmmio_handler(regs, addr) == 1) | 45 | if (kmmio_handler(regs, addr) == 1) |
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | |||
46 | return 0; | 47 | return 0; |
47 | } | 48 | } |
48 | 49 | ||
49 | static inline int notify_page_fault(struct pt_regs *regs) | 50 | static inline int __kprobes notify_page_fault(struct pt_regs *regs) |
50 | { | 51 | { |
51 | int ret = 0; | 52 | int ret = 0; |
52 | 53 | ||
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void) | |||
240 | * | 241 | * |
241 | * Handle a fault on the vmalloc or module mapping area | 242 | * Handle a fault on the vmalloc or module mapping area |
242 | */ | 243 | */ |
243 | static noinline int vmalloc_fault(unsigned long address) | 244 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
244 | { | 245 | { |
245 | unsigned long pgd_paddr; | 246 | unsigned long pgd_paddr; |
246 | pmd_t *pmd_k; | 247 | pmd_t *pmd_k; |
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void) | |||
357 | * | 358 | * |
358 | * This assumes no large pages in there. | 359 | * This assumes no large pages in there. |
359 | */ | 360 | */ |
360 | static noinline int vmalloc_fault(unsigned long address) | 361 | static noinline __kprobes int vmalloc_fault(unsigned long address) |
361 | { | 362 | { |
362 | pgd_t *pgd, *pgd_ref; | 363 | pgd_t *pgd, *pgd_ref; |
363 | pud_t *pud, *pud_ref; | 364 | pud_t *pud, *pud_ref; |
@@ -658,7 +659,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
658 | show_fault_oops(regs, error_code, address); | 659 | show_fault_oops(regs, error_code, address); |
659 | 660 | ||
660 | stackend = end_of_stack(tsk); | 661 | stackend = end_of_stack(tsk); |
661 | if (*stackend != STACK_END_MAGIC) | 662 | if (tsk != &init_task && *stackend != STACK_END_MAGIC) |
662 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); | 663 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); |
663 | 664 | ||
664 | tsk->thread.cr2 = address; | 665 | tsk->thread.cr2 = address; |
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) | |||
860 | * There are no security implications to leaving a stale TLB when | 861 | * There are no security implications to leaving a stale TLB when |
861 | * increasing the permissions on a page. | 862 | * increasing the permissions on a page. |
862 | */ | 863 | */ |
863 | static noinline int | 864 | static noinline __kprobes int |
864 | spurious_fault(unsigned long error_code, unsigned long address) | 865 | spurious_fault(unsigned long error_code, unsigned long address) |
865 | { | 866 | { |
866 | pgd_t *pgd; | 867 | pgd_t *pgd; |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bca13cb..738e6593799d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) | |||
18 | #else | 18 | #else |
19 | /* | 19 | /* |
20 | * With get_user_pages_fast, we walk down the pagetables without taking | 20 | * With get_user_pages_fast, we walk down the pagetables without taking |
21 | * any locks. For this we would like to load the pointers atoimcally, | 21 | * any locks. For this we would like to load the pointers atomically, |
22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What | 22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What |
23 | * we do have is the guarantee that a pte will only either go from not | 23 | * we do have is the guarantee that a pte will only either go from not |
24 | * present to present, or present to not present or both -- it will not | 24 | * present to present, or present to not present or both -- it will not |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f46c340727b8..069ce7c37c01 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/hugetlb.h> | 10 | #include <linux/hugetlb.h> |
11 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
12 | #include <linux/slab.h> | ||
13 | #include <linux/err.h> | 12 | #include <linux/err.h> |
14 | #include <linux/sysctl.h> | 13 | #include <linux/sysctl.h> |
15 | #include <asm/mman.h> | 14 | #include <asm/mman.h> |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 73ffd5536f62..b278535b14aa 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/gfp.h> | ||
1 | #include <linux/initrd.h> | 2 | #include <linux/initrd.h> |
2 | #include <linux/ioport.h> | 3 | #include <linux/ioport.h> |
3 | #include <linux/swap.h> | 4 | #include <linux/swap.h> |
@@ -146,10 +147,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
146 | use_gbpages = direct_gbpages; | 147 | use_gbpages = direct_gbpages; |
147 | #endif | 148 | #endif |
148 | 149 | ||
149 | set_nx(); | ||
150 | if (nx_enabled) | ||
151 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
152 | |||
153 | /* Enable PSE if available */ | 150 | /* Enable PSE if available */ |
154 | if (cpu_has_pse) | 151 | if (cpu_has_pse) |
155 | set_in_cr4(X86_CR4_PSE); | 152 | set_in_cr4(X86_CR4_PSE); |
@@ -270,16 +267,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
270 | if (!after_bootmem) | 267 | if (!after_bootmem) |
271 | find_early_table_space(end, use_pse, use_gbpages); | 268 | find_early_table_space(end, use_pse, use_gbpages); |
272 | 269 | ||
273 | #ifdef CONFIG_X86_32 | ||
274 | for (i = 0; i < nr_range; i++) | ||
275 | kernel_physical_mapping_init(mr[i].start, mr[i].end, | ||
276 | mr[i].page_size_mask); | ||
277 | ret = end; | ||
278 | #else /* CONFIG_X86_64 */ | ||
279 | for (i = 0; i < nr_range; i++) | 270 | for (i = 0; i < nr_range; i++) |
280 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | 271 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, |
281 | mr[i].page_size_mask); | 272 | mr[i].page_size_mask); |
282 | #endif | ||
283 | 273 | ||
284 | #ifdef CONFIG_X86_32 | 274 | #ifdef CONFIG_X86_32 |
285 | early_ioremap_page_table_range_init(); | 275 | early_ioremap_page_table_range_init(); |
@@ -342,11 +332,23 @@ int devmem_is_allowed(unsigned long pagenr) | |||
342 | 332 | ||
343 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 333 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
344 | { | 334 | { |
345 | unsigned long addr = begin; | 335 | unsigned long addr; |
336 | unsigned long begin_aligned, end_aligned; | ||
337 | |||
338 | /* Make sure boundaries are page aligned */ | ||
339 | begin_aligned = PAGE_ALIGN(begin); | ||
340 | end_aligned = end & PAGE_MASK; | ||
346 | 341 | ||
347 | if (addr >= end) | 342 | if (WARN_ON(begin_aligned != begin || end_aligned != end)) { |
343 | begin = begin_aligned; | ||
344 | end = end_aligned; | ||
345 | } | ||
346 | |||
347 | if (begin >= end) | ||
348 | return; | 348 | return; |
349 | 349 | ||
350 | addr = begin; | ||
351 | |||
350 | /* | 352 | /* |
351 | * If debugging page accesses then do not free this memory but | 353 | * If debugging page accesses then do not free this memory but |
352 | * mark them not present - any buggy init-section access will | 354 | * mark them not present - any buggy init-section access will |
@@ -354,7 +356,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
354 | */ | 356 | */ |
355 | #ifdef CONFIG_DEBUG_PAGEALLOC | 357 | #ifdef CONFIG_DEBUG_PAGEALLOC |
356 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", | 358 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", |
357 | begin, PAGE_ALIGN(end)); | 359 | begin, end); |
358 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | 360 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); |
359 | #else | 361 | #else |
360 | /* | 362 | /* |
@@ -369,8 +371,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
369 | for (; addr < end; addr += PAGE_SIZE) { | 371 | for (; addr < end; addr += PAGE_SIZE) { |
370 | ClearPageReserved(virt_to_page(addr)); | 372 | ClearPageReserved(virt_to_page(addr)); |
371 | init_page_count(virt_to_page(addr)); | 373 | init_page_count(virt_to_page(addr)); |
372 | memset((void *)(addr & ~(PAGE_SIZE-1)), | 374 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); |
373 | POISON_FREE_INITMEM, PAGE_SIZE); | ||
374 | free_page(addr); | 375 | free_page(addr); |
375 | totalram_pages++; | 376 | totalram_pages++; |
376 | } | 377 | } |
@@ -387,6 +388,15 @@ void free_initmem(void) | |||
387 | #ifdef CONFIG_BLK_DEV_INITRD | 388 | #ifdef CONFIG_BLK_DEV_INITRD |
388 | void free_initrd_mem(unsigned long start, unsigned long end) | 389 | void free_initrd_mem(unsigned long start, unsigned long end) |
389 | { | 390 | { |
390 | free_init_pages("initrd memory", start, end); | 391 | /* |
392 | * end could be not aligned, and We can not align that, | ||
393 | * decompresser could be confused by aligned initrd_end | ||
394 | * We already reserve the end partial page before in | ||
395 | * - i386_start_kernel() | ||
396 | * - x86_64_start_kernel() | ||
397 | * - relocate_initrd() | ||
398 | * So here We can do PAGE_ALIGN() safely to get partial page to be freed | ||
399 | */ | ||
400 | free_init_pages("initrd memory", start, PAGE_ALIGN(end)); | ||
391 | } | 401 | } |
392 | #endif | 402 | #endif |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 30938c1d8d5d..bca79091b9d6 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -25,11 +25,11 @@ | |||
25 | #include <linux/pfn.h> | 25 | #include <linux/pfn.h> |
26 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
30 | #include <linux/memory_hotplug.h> | 29 | #include <linux/memory_hotplug.h> |
31 | #include <linux/initrd.h> | 30 | #include <linux/initrd.h> |
32 | #include <linux/cpumask.h> | 31 | #include <linux/cpumask.h> |
32 | #include <linux/gfp.h> | ||
33 | 33 | ||
34 | #include <asm/asm.h> | 34 | #include <asm/asm.h> |
35 | #include <asm/bios_ebda.h> | 35 | #include <asm/bios_ebda.h> |
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
241 | unsigned long page_size_mask) | 241 | unsigned long page_size_mask) |
242 | { | 242 | { |
243 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); | 243 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); |
244 | unsigned long last_map_addr = end; | ||
244 | unsigned long start_pfn, end_pfn; | 245 | unsigned long start_pfn, end_pfn; |
245 | pgd_t *pgd_base = swapper_pg_dir; | 246 | pgd_t *pgd_base = swapper_pg_dir; |
246 | int pgd_idx, pmd_idx, pte_ofs; | 247 | int pgd_idx, pmd_idx, pte_ofs; |
@@ -341,9 +342,10 @@ repeat: | |||
341 | prot = PAGE_KERNEL_EXEC; | 342 | prot = PAGE_KERNEL_EXEC; |
342 | 343 | ||
343 | pages_4k++; | 344 | pages_4k++; |
344 | if (mapping_iter == 1) | 345 | if (mapping_iter == 1) { |
345 | set_pte(pte, pfn_pte(pfn, init_prot)); | 346 | set_pte(pte, pfn_pte(pfn, init_prot)); |
346 | else | 347 | last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE; |
348 | } else | ||
347 | set_pte(pte, pfn_pte(pfn, prot)); | 349 | set_pte(pte, pfn_pte(pfn, prot)); |
348 | } | 350 | } |
349 | } | 351 | } |
@@ -368,7 +370,7 @@ repeat: | |||
368 | mapping_iter = 2; | 370 | mapping_iter = 2; |
369 | goto repeat; | 371 | goto repeat; |
370 | } | 372 | } |
371 | return 0; | 373 | return last_map_addr; |
372 | } | 374 | } |
373 | 375 | ||
374 | pte_t *kmap_pte; | 376 | pte_t *kmap_pte; |
@@ -412,7 +414,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
412 | pkmap_page_table = pte; | 414 | pkmap_page_table = pte; |
413 | } | 415 | } |
414 | 416 | ||
415 | static void __init add_one_highpage_init(struct page *page, int pfn) | 417 | static void __init add_one_highpage_init(struct page *page) |
416 | { | 418 | { |
417 | ClearPageReserved(page); | 419 | ClearPageReserved(page); |
418 | init_page_count(page); | 420 | init_page_count(page); |
@@ -445,7 +447,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn, | |||
445 | if (!pfn_valid(node_pfn)) | 447 | if (!pfn_valid(node_pfn)) |
446 | continue; | 448 | continue; |
447 | page = pfn_to_page(node_pfn); | 449 | page = pfn_to_page(node_pfn); |
448 | add_one_highpage_init(page, node_pfn); | 450 | add_one_highpage_init(page); |
449 | } | 451 | } |
450 | 452 | ||
451 | return 0; | 453 | return 0; |
@@ -703,8 +705,8 @@ void __init find_low_pfn_range(void) | |||
703 | } | 705 | } |
704 | 706 | ||
705 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 707 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
706 | void __init initmem_init(unsigned long start_pfn, | 708 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
707 | unsigned long end_pfn) | 709 | int acpi, int k8) |
708 | { | 710 | { |
709 | #ifdef CONFIG_HIGHMEM | 711 | #ifdef CONFIG_HIGHMEM |
710 | highstart_pfn = highend_pfn = max_pfn; | 712 | highstart_pfn = highend_pfn = max_pfn; |
@@ -748,6 +750,7 @@ static void __init zone_sizes_init(void) | |||
748 | free_area_init_nodes(max_zone_pfns); | 750 | free_area_init_nodes(max_zone_pfns); |
749 | } | 751 | } |
750 | 752 | ||
753 | #ifndef CONFIG_NO_BOOTMEM | ||
751 | static unsigned long __init setup_node_bootmem(int nodeid, | 754 | static unsigned long __init setup_node_bootmem(int nodeid, |
752 | unsigned long start_pfn, | 755 | unsigned long start_pfn, |
753 | unsigned long end_pfn, | 756 | unsigned long end_pfn, |
@@ -764,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid, | |||
764 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | 767 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", |
765 | nodeid, bootmap, bootmap + bootmap_size); | 768 | nodeid, bootmap, bootmap + bootmap_size); |
766 | free_bootmem_with_active_regions(nodeid, end_pfn); | 769 | free_bootmem_with_active_regions(nodeid, end_pfn); |
767 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
768 | 770 | ||
769 | return bootmap + bootmap_size; | 771 | return bootmap + bootmap_size; |
770 | } | 772 | } |
773 | #endif | ||
771 | 774 | ||
772 | void __init setup_bootmem_allocator(void) | 775 | void __init setup_bootmem_allocator(void) |
773 | { | 776 | { |
777 | #ifndef CONFIG_NO_BOOTMEM | ||
774 | int nodeid; | 778 | int nodeid; |
775 | unsigned long bootmap_size, bootmap; | 779 | unsigned long bootmap_size, bootmap; |
776 | /* | 780 | /* |
@@ -782,11 +786,13 @@ void __init setup_bootmem_allocator(void) | |||
782 | if (bootmap == -1L) | 786 | if (bootmap == -1L) |
783 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 787 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
784 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 788 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
789 | #endif | ||
785 | 790 | ||
786 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 791 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
787 | max_pfn_mapped<<PAGE_SHIFT); | 792 | max_pfn_mapped<<PAGE_SHIFT); |
788 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 793 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
789 | 794 | ||
795 | #ifndef CONFIG_NO_BOOTMEM | ||
790 | for_each_online_node(nodeid) { | 796 | for_each_online_node(nodeid) { |
791 | unsigned long start_pfn, end_pfn; | 797 | unsigned long start_pfn, end_pfn; |
792 | 798 | ||
@@ -804,6 +810,7 @@ void __init setup_bootmem_allocator(void) | |||
804 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | 810 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, |
805 | bootmap); | 811 | bootmap); |
806 | } | 812 | } |
813 | #endif | ||
807 | 814 | ||
808 | after_bootmem = 1; | 815 | after_bootmem = 1; |
809 | } | 816 | } |
@@ -892,8 +899,7 @@ void __init mem_init(void) | |||
892 | reservedpages << (PAGE_SHIFT-10), | 899 | reservedpages << (PAGE_SHIFT-10), |
893 | datasize >> 10, | 900 | datasize >> 10, |
894 | initsize >> 10, | 901 | initsize >> 10, |
895 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) | 902 | totalhigh_pages << (PAGE_SHIFT-10)); |
896 | ); | ||
897 | 903 | ||
898 | printk(KERN_INFO "virtual kernel memory layout:\n" | 904 | printk(KERN_INFO "virtual kernel memory layout:\n" |
899 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | 905 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" |
@@ -997,7 +1003,7 @@ static noinline int do_test_wp_bit(void) | |||
997 | const int rodata_test_data = 0xC3; | 1003 | const int rodata_test_data = 0xC3; |
998 | EXPORT_SYMBOL_GPL(rodata_test_data); | 1004 | EXPORT_SYMBOL_GPL(rodata_test_data); |
999 | 1005 | ||
1000 | static int kernel_set_to_readonly; | 1006 | int kernel_set_to_readonly __read_mostly; |
1001 | 1007 | ||
1002 | void set_kernel_text_rw(void) | 1008 | void set_kernel_text_rw(void) |
1003 | { | 1009 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5a4398a6006b..ee41bba315d1 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/memory_hotplug.h> | 30 | #include <linux/memory_hotplug.h> |
31 | #include <linux/nmi.h> | 31 | #include <linux/nmi.h> |
32 | #include <linux/gfp.h> | ||
32 | 33 | ||
33 | #include <asm/processor.h> | 34 | #include <asm/processor.h> |
34 | #include <asm/bios_ebda.h> | 35 | #include <asm/bios_ebda.h> |
@@ -49,6 +50,7 @@ | |||
49 | #include <asm/numa.h> | 50 | #include <asm/numa.h> |
50 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | 52 | #include <asm/init.h> |
53 | #include <linux/bootmem.h> | ||
52 | 54 | ||
53 | static unsigned long dma_reserve __initdata; | 55 | static unsigned long dma_reserve __initdata; |
54 | 56 | ||
@@ -568,8 +570,10 @@ kernel_physical_mapping_init(unsigned long start, | |||
568 | } | 570 | } |
569 | 571 | ||
570 | #ifndef CONFIG_NUMA | 572 | #ifndef CONFIG_NUMA |
571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 573 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
574 | int acpi, int k8) | ||
572 | { | 575 | { |
576 | #ifndef CONFIG_NO_BOOTMEM | ||
573 | unsigned long bootmap_size, bootmap; | 577 | unsigned long bootmap_size, bootmap; |
574 | 578 | ||
575 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 579 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
@@ -577,13 +581,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
577 | PAGE_SIZE); | 581 | PAGE_SIZE); |
578 | if (bootmap == -1L) | 582 | if (bootmap == -1L) |
579 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 583 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
584 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | ||
580 | /* don't touch min_low_pfn */ | 585 | /* don't touch min_low_pfn */ |
581 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | 586 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, |
582 | 0, end_pfn); | 587 | 0, end_pfn); |
583 | e820_register_active_regions(0, start_pfn, end_pfn); | 588 | e820_register_active_regions(0, start_pfn, end_pfn); |
584 | free_bootmem_with_active_regions(0, end_pfn); | 589 | free_bootmem_with_active_regions(0, end_pfn); |
585 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 590 | #else |
586 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 591 | e820_register_active_regions(0, start_pfn, end_pfn); |
592 | #endif | ||
587 | } | 593 | } |
588 | #endif | 594 | #endif |
589 | 595 | ||
@@ -615,6 +621,21 @@ void __init paging_init(void) | |||
615 | */ | 621 | */ |
616 | #ifdef CONFIG_MEMORY_HOTPLUG | 622 | #ifdef CONFIG_MEMORY_HOTPLUG |
617 | /* | 623 | /* |
624 | * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need | ||
625 | * updating. | ||
626 | */ | ||
627 | static void update_end_of_memory_vars(u64 start, u64 size) | ||
628 | { | ||
629 | unsigned long end_pfn = PFN_UP(start + size); | ||
630 | |||
631 | if (end_pfn > max_pfn) { | ||
632 | max_pfn = end_pfn; | ||
633 | max_low_pfn = end_pfn; | ||
634 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | ||
635 | } | ||
636 | } | ||
637 | |||
638 | /* | ||
618 | * Memory is added always to NORMAL zone. This means you will never get | 639 | * Memory is added always to NORMAL zone. This means you will never get |
619 | * additional DMA/DMA32 memory. | 640 | * additional DMA/DMA32 memory. |
620 | */ | 641 | */ |
@@ -633,6 +654,9 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
633 | ret = __add_pages(nid, zone, start_pfn, nr_pages); | 654 | ret = __add_pages(nid, zone, start_pfn, nr_pages); |
634 | WARN_ON_ONCE(ret); | 655 | WARN_ON_ONCE(ret); |
635 | 656 | ||
657 | /* update max_pfn, max_low_pfn and high_memory */ | ||
658 | update_end_of_memory_vars(start, size); | ||
659 | |||
636 | return ret; | 660 | return ret; |
637 | } | 661 | } |
638 | EXPORT_SYMBOL_GPL(arch_add_memory); | 662 | EXPORT_SYMBOL_GPL(arch_add_memory); |
@@ -694,12 +718,12 @@ void __init mem_init(void) | |||
694 | const int rodata_test_data = 0xC3; | 718 | const int rodata_test_data = 0xC3; |
695 | EXPORT_SYMBOL_GPL(rodata_test_data); | 719 | EXPORT_SYMBOL_GPL(rodata_test_data); |
696 | 720 | ||
697 | static int kernel_set_to_readonly; | 721 | int kernel_set_to_readonly; |
698 | 722 | ||
699 | void set_kernel_text_rw(void) | 723 | void set_kernel_text_rw(void) |
700 | { | 724 | { |
701 | unsigned long start = PFN_ALIGN(_stext); | 725 | unsigned long start = PFN_ALIGN(_text); |
702 | unsigned long end = PFN_ALIGN(__start_rodata); | 726 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
703 | 727 | ||
704 | if (!kernel_set_to_readonly) | 728 | if (!kernel_set_to_readonly) |
705 | return; | 729 | return; |
@@ -707,13 +731,18 @@ void set_kernel_text_rw(void) | |||
707 | pr_debug("Set kernel text: %lx - %lx for read write\n", | 731 | pr_debug("Set kernel text: %lx - %lx for read write\n", |
708 | start, end); | 732 | start, end); |
709 | 733 | ||
734 | /* | ||
735 | * Make the kernel identity mapping for text RW. Kernel text | ||
736 | * mapping will always be RO. Refer to the comment in | ||
737 | * static_protections() in pageattr.c | ||
738 | */ | ||
710 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); | 739 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); |
711 | } | 740 | } |
712 | 741 | ||
713 | void set_kernel_text_ro(void) | 742 | void set_kernel_text_ro(void) |
714 | { | 743 | { |
715 | unsigned long start = PFN_ALIGN(_stext); | 744 | unsigned long start = PFN_ALIGN(_text); |
716 | unsigned long end = PFN_ALIGN(__start_rodata); | 745 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
717 | 746 | ||
718 | if (!kernel_set_to_readonly) | 747 | if (!kernel_set_to_readonly) |
719 | return; | 748 | return; |
@@ -721,14 +750,21 @@ void set_kernel_text_ro(void) | |||
721 | pr_debug("Set kernel text: %lx - %lx for read only\n", | 750 | pr_debug("Set kernel text: %lx - %lx for read only\n", |
722 | start, end); | 751 | start, end); |
723 | 752 | ||
753 | /* | ||
754 | * Set the kernel identity mapping for text RO. | ||
755 | */ | ||
724 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); | 756 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); |
725 | } | 757 | } |
726 | 758 | ||
727 | void mark_rodata_ro(void) | 759 | void mark_rodata_ro(void) |
728 | { | 760 | { |
729 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | 761 | unsigned long start = PFN_ALIGN(_text); |
730 | unsigned long rodata_start = | 762 | unsigned long rodata_start = |
731 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 763 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; |
764 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | ||
765 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | ||
766 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | ||
767 | unsigned long data_start = (unsigned long) &_sdata; | ||
732 | 768 | ||
733 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 769 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
734 | (end - start) >> 10); | 770 | (end - start) >> 10); |
@@ -751,6 +787,14 @@ void mark_rodata_ro(void) | |||
751 | printk(KERN_INFO "Testing CPA: again\n"); | 787 | printk(KERN_INFO "Testing CPA: again\n"); |
752 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); | 788 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); |
753 | #endif | 789 | #endif |
790 | |||
791 | free_init_pages("unused kernel memory", | ||
792 | (unsigned long) page_address(virt_to_page(text_end)), | ||
793 | (unsigned long) | ||
794 | page_address(virt_to_page(rodata_start))); | ||
795 | free_init_pages("unused kernel memory", | ||
796 | (unsigned long) page_address(virt_to_page(rodata_end)), | ||
797 | (unsigned long) page_address(virt_to_page(data_start))); | ||
754 | } | 798 | } |
755 | 799 | ||
756 | #endif | 800 | #endif |
@@ -934,7 +978,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
934 | if (pmd_none(*pmd)) { | 978 | if (pmd_none(*pmd)) { |
935 | pte_t entry; | 979 | pte_t entry; |
936 | 980 | ||
937 | p = vmemmap_alloc_block(PMD_SIZE, node); | 981 | p = vmemmap_alloc_block_buf(PMD_SIZE, node); |
938 | if (!p) | 982 | if (!p) |
939 | return -ENOMEM; | 983 | return -ENOMEM; |
940 | 984 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2feb9bdedaaf..12e4d2d3c110 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -24,43 +24,6 @@ | |||
24 | 24 | ||
25 | #include "physaddr.h" | 25 | #include "physaddr.h" |
26 | 26 | ||
27 | int page_is_ram(unsigned long pagenr) | ||
28 | { | ||
29 | resource_size_t addr, end; | ||
30 | int i; | ||
31 | |||
32 | /* | ||
33 | * A special case is the first 4Kb of memory; | ||
34 | * This is a BIOS owned area, not kernel ram, but generally | ||
35 | * not listed as such in the E820 table. | ||
36 | */ | ||
37 | if (pagenr == 0) | ||
38 | return 0; | ||
39 | |||
40 | /* | ||
41 | * Second special case: Some BIOSen report the PC BIOS | ||
42 | * area (640->1Mb) as ram even though it is not. | ||
43 | */ | ||
44 | if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && | ||
45 | pagenr < (BIOS_END >> PAGE_SHIFT)) | ||
46 | return 0; | ||
47 | |||
48 | for (i = 0; i < e820.nr_map; i++) { | ||
49 | /* | ||
50 | * Not usable memory: | ||
51 | */ | ||
52 | if (e820.map[i].type != E820_RAM) | ||
53 | continue; | ||
54 | addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT; | ||
55 | end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; | ||
56 | |||
57 | |||
58 | if ((pagenr >= addr) && (pagenr < end)) | ||
59 | return 1; | ||
60 | } | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | /* | 27 | /* |
65 | * Fix up the linear direct mapping of the kernel to avoid cache attribute | 28 | * Fix up the linear direct mapping of the kernel to avoid cache attribute |
66 | * conflicts. | 29 | * conflicts. |
@@ -281,30 +244,6 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) | |||
281 | } | 244 | } |
282 | EXPORT_SYMBOL(ioremap_cache); | 245 | EXPORT_SYMBOL(ioremap_cache); |
283 | 246 | ||
284 | static void __iomem *ioremap_default(resource_size_t phys_addr, | ||
285 | unsigned long size) | ||
286 | { | ||
287 | unsigned long flags; | ||
288 | void __iomem *ret; | ||
289 | int err; | ||
290 | |||
291 | /* | ||
292 | * - WB for WB-able memory and no other conflicting mappings | ||
293 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | ||
294 | * - Inherit from confliting mappings otherwise | ||
295 | */ | ||
296 | err = reserve_memtype(phys_addr, phys_addr + size, | ||
297 | _PAGE_CACHE_WB, &flags); | ||
298 | if (err < 0) | ||
299 | return NULL; | ||
300 | |||
301 | ret = __ioremap_caller(phys_addr, size, flags, | ||
302 | __builtin_return_address(0)); | ||
303 | |||
304 | free_memtype(phys_addr, phys_addr + size); | ||
305 | return ret; | ||
306 | } | ||
307 | |||
308 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | 247 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, |
309 | unsigned long prot_val) | 248 | unsigned long prot_val) |
310 | { | 249 | { |
@@ -380,7 +319,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) | |||
380 | if (page_is_ram(start >> PAGE_SHIFT)) | 319 | if (page_is_ram(start >> PAGE_SHIFT)) |
381 | return __va(phys); | 320 | return __va(phys); |
382 | 321 | ||
383 | addr = (void __force *)ioremap_default(start, PAGE_SIZE); | 322 | addr = (void __force *)ioremap_cache(start, PAGE_SIZE); |
384 | if (addr) | 323 | if (addr) |
385 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); | 324 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); |
386 | 325 | ||
@@ -446,6 +385,10 @@ void __init early_ioremap_init(void) | |||
446 | * The boot-ioremap range spans multiple pmds, for which | 385 | * The boot-ioremap range spans multiple pmds, for which |
447 | * we are not prepared: | 386 | * we are not prepared: |
448 | */ | 387 | */ |
388 | #define __FIXADDR_TOP (-PAGE_SIZE) | ||
389 | BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) | ||
390 | != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); | ||
391 | #undef __FIXADDR_TOP | ||
449 | if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { | 392 | if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { |
450 | WARN_ON(1); | 393 | WARN_ON(1); |
451 | printk(KERN_WARNING "pmd %p != %p\n", | 394 | printk(KERN_WARNING "pmd %p != %p\n", |
@@ -505,6 +448,20 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) | |||
505 | static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; | 448 | static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; |
506 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; | 449 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; |
507 | 450 | ||
451 | void __init fixup_early_ioremap(void) | ||
452 | { | ||
453 | int i; | ||
454 | |||
455 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { | ||
456 | if (prev_map[i]) { | ||
457 | WARN_ON(1); | ||
458 | break; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | early_ioremap_init(); | ||
463 | } | ||
464 | |||
508 | static int __init check_early_ioremap_leak(void) | 465 | static int __init check_early_ioremap_leak(void) |
509 | { | 466 | { |
510 | int count = 0; | 467 | int count = 0; |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 268f8255280f..970ed579d4e4 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -24,6 +24,9 @@ | |||
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/k8.h> | 25 | #include <asm/k8.h> |
26 | 26 | ||
27 | static struct bootnode __initdata nodes[8]; | ||
28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | ||
29 | |||
27 | static __init int find_northbridge(void) | 30 | static __init int find_northbridge(void) |
28 | { | 31 | { |
29 | int num; | 32 | int num; |
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void) | |||
54 | * need to get boot_cpu_id so can use that to create apicid_to_node | 57 | * need to get boot_cpu_id so can use that to create apicid_to_node |
55 | * in k8_scan_nodes() | 58 | * in k8_scan_nodes() |
56 | */ | 59 | */ |
57 | /* | ||
58 | * Find possible boot-time SMP configuration: | ||
59 | */ | ||
60 | #ifdef CONFIG_X86_MPPARSE | ||
61 | early_find_smp_config(); | ||
62 | #endif | ||
63 | #ifdef CONFIG_ACPI | ||
64 | /* | ||
65 | * Read APIC information from ACPI tables. | ||
66 | */ | ||
67 | early_acpi_boot_init(); | ||
68 | #endif | ||
69 | #ifdef CONFIG_X86_MPPARSE | 60 | #ifdef CONFIG_X86_MPPARSE |
70 | /* | 61 | /* |
71 | * get boot-time SMP configuration: | 62 | * get boot-time SMP configuration: |
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void) | |||
76 | early_init_lapic_mapping(); | 67 | early_init_lapic_mapping(); |
77 | } | 68 | } |
78 | 69 | ||
79 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 70 | int __init k8_get_nodes(struct bootnode *physnodes) |
80 | { | 71 | { |
81 | unsigned numnodes, cores, bits, apicid_base; | 72 | int i; |
73 | int ret = 0; | ||
74 | |||
75 | for_each_node_mask(i, nodes_parsed) { | ||
76 | physnodes[ret].start = nodes[i].start; | ||
77 | physnodes[ret].end = nodes[i].end; | ||
78 | ret++; | ||
79 | } | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) | ||
84 | { | ||
85 | unsigned long start = PFN_PHYS(start_pfn); | ||
86 | unsigned long end = PFN_PHYS(end_pfn); | ||
87 | unsigned numnodes; | ||
82 | unsigned long prevbase; | 88 | unsigned long prevbase; |
83 | struct bootnode nodes[8]; | 89 | int i, nb, found = 0; |
84 | int i, j, nb, found = 0; | ||
85 | u32 nodeid, reg; | 90 | u32 nodeid, reg; |
86 | 91 | ||
87 | if (!early_pci_allowed()) | 92 | if (!early_pci_allowed()) |
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
91 | if (nb < 0) | 96 | if (nb < 0) |
92 | return nb; | 97 | return nb; |
93 | 98 | ||
94 | printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); | 99 | pr_info("Scanning NUMA topology in Northbridge %d\n", nb); |
95 | 100 | ||
96 | reg = read_pci_config(0, nb, 0, 0x60); | 101 | reg = read_pci_config(0, nb, 0, 0x60); |
97 | numnodes = ((reg >> 4) & 0xF) + 1; | 102 | numnodes = ((reg >> 4) & 0xF) + 1; |
98 | if (numnodes <= 1) | 103 | if (numnodes <= 1) |
99 | return -1; | 104 | return -1; |
100 | 105 | ||
101 | printk(KERN_INFO "Number of nodes %d\n", numnodes); | 106 | pr_info("Number of physical nodes %d\n", numnodes); |
102 | 107 | ||
103 | memset(&nodes, 0, sizeof(nodes)); | ||
104 | prevbase = 0; | 108 | prevbase = 0; |
105 | for (i = 0; i < 8; i++) { | 109 | for (i = 0; i < 8; i++) { |
106 | unsigned long base, limit; | 110 | unsigned long base, limit; |
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
111 | nodeid = limit & 7; | 115 | nodeid = limit & 7; |
112 | if ((base & 3) == 0) { | 116 | if ((base & 3) == 0) { |
113 | if (i < numnodes) | 117 | if (i < numnodes) |
114 | printk("Skipping disabled node %d\n", i); | 118 | pr_info("Skipping disabled node %d\n", i); |
115 | continue; | 119 | continue; |
116 | } | 120 | } |
117 | if (nodeid >= numnodes) { | 121 | if (nodeid >= numnodes) { |
118 | printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, | 122 | pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, |
119 | base, limit); | 123 | base, limit); |
120 | continue; | 124 | continue; |
121 | } | 125 | } |
122 | 126 | ||
123 | if (!limit) { | 127 | if (!limit) { |
124 | printk(KERN_INFO "Skipping node entry %d (base %lx)\n", | 128 | pr_info("Skipping node entry %d (base %lx)\n", |
125 | i, base); | 129 | i, base); |
126 | continue; | 130 | continue; |
127 | } | 131 | } |
128 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 132 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
129 | printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", | 133 | pr_err("Node %d using interleaving mode %lx/%lx\n", |
130 | nodeid, (base>>8)&3, (limit>>8) & 3); | 134 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
131 | return -1; | 135 | return -1; |
132 | } | 136 | } |
133 | if (node_isset(nodeid, node_possible_map)) { | 137 | if (node_isset(nodeid, nodes_parsed)) { |
134 | printk(KERN_INFO "Node %d already present. Skipping\n", | 138 | pr_info("Node %d already present, skipping\n", |
135 | nodeid); | 139 | nodeid); |
136 | continue; | 140 | continue; |
137 | } | 141 | } |
138 | 142 | ||
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
141 | limit |= (1<<24)-1; | 145 | limit |= (1<<24)-1; |
142 | limit++; | 146 | limit++; |
143 | 147 | ||
144 | if (limit > max_pfn << PAGE_SHIFT) | 148 | if (limit > end) |
145 | limit = max_pfn << PAGE_SHIFT; | 149 | limit = end; |
146 | if (limit <= base) | 150 | if (limit <= base) |
147 | continue; | 151 | continue; |
148 | 152 | ||
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
154 | if (limit > end) | 158 | if (limit > end) |
155 | limit = end; | 159 | limit = end; |
156 | if (limit == base) { | 160 | if (limit == base) { |
157 | printk(KERN_ERR "Empty node %d\n", nodeid); | 161 | pr_err("Empty node %d\n", nodeid); |
158 | continue; | 162 | continue; |
159 | } | 163 | } |
160 | if (limit < base) { | 164 | if (limit < base) { |
161 | printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", | 165 | pr_err("Node %d bogus settings %lx-%lx.\n", |
162 | nodeid, base, limit); | 166 | nodeid, base, limit); |
163 | continue; | 167 | continue; |
164 | } | 168 | } |
165 | 169 | ||
166 | /* Could sort here, but pun for now. Should not happen anyroads. */ | 170 | /* Could sort here, but pun for now. Should not happen anyroads. */ |
167 | if (prevbase > base) { | 171 | if (prevbase > base) { |
168 | printk(KERN_ERR "Node map not sorted %lx,%lx\n", | 172 | pr_err("Node map not sorted %lx,%lx\n", |
169 | prevbase, base); | 173 | prevbase, base); |
170 | return -1; | 174 | return -1; |
171 | } | 175 | } |
172 | 176 | ||
173 | printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", | 177 | pr_info("Node %d MemBase %016lx Limit %016lx\n", |
174 | nodeid, base, limit); | 178 | nodeid, base, limit); |
175 | 179 | ||
176 | found++; | 180 | found++; |
177 | 181 | ||
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
180 | 184 | ||
181 | prevbase = base; | 185 | prevbase = base; |
182 | 186 | ||
183 | node_set(nodeid, node_possible_map); | 187 | node_set(nodeid, nodes_parsed); |
184 | } | 188 | } |
185 | 189 | ||
186 | if (!found) | 190 | if (!found) |
187 | return -1; | 191 | return -1; |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int __init k8_scan_nodes(void) | ||
196 | { | ||
197 | unsigned int bits; | ||
198 | unsigned int cores; | ||
199 | unsigned int apicid_base; | ||
200 | int i; | ||
188 | 201 | ||
202 | BUG_ON(nodes_empty(nodes_parsed)); | ||
203 | node_possible_map = nodes_parsed; | ||
189 | memnode_shift = compute_hash_shift(nodes, 8, NULL); | 204 | memnode_shift = compute_hash_shift(nodes, 8, NULL); |
190 | if (memnode_shift < 0) { | 205 | if (memnode_shift < 0) { |
191 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); | 206 | pr_err("No NUMA node hash function found. Contact maintainer\n"); |
192 | return -1; | 207 | return -1; |
193 | } | 208 | } |
194 | printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); | 209 | pr_info("Using node hash shift of %d\n", memnode_shift); |
195 | 210 | ||
196 | /* use the coreid bits from early_identify_cpu */ | 211 | /* use the coreid bits from early_identify_cpu */ |
197 | bits = boot_cpu_data.x86_coreid_bits; | 212 | bits = boot_cpu_data.x86_coreid_bits; |
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
200 | /* need to get boot_cpu_id early for system with apicid lifting */ | 215 | /* need to get boot_cpu_id early for system with apicid lifting */ |
201 | early_get_boot_cpu_id(); | 216 | early_get_boot_cpu_id(); |
202 | if (boot_cpu_physical_apicid > 0) { | 217 | if (boot_cpu_physical_apicid > 0) { |
203 | printk(KERN_INFO "BSP APIC ID: %02x\n", | 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); |
204 | boot_cpu_physical_apicid); | ||
205 | apicid_base = boot_cpu_physical_apicid; | 219 | apicid_base = boot_cpu_physical_apicid; |
206 | } | 220 | } |
207 | 221 | ||
208 | for (i = 0; i < 8; i++) { | 222 | for_each_node_mask(i, node_possible_map) { |
209 | if (nodes[i].start == nodes[i].end) | 223 | int j; |
210 | continue; | ||
211 | 224 | ||
212 | e820_register_active_regions(i, | 225 | e820_register_active_regions(i, |
213 | nodes[i].start >> PAGE_SHIFT, | 226 | nodes[i].start >> PAGE_SHIFT, |
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 4901d0dafda6..af3b6c8a436f 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -106,26 +106,25 @@ void kmemcheck_error_recall(void) | |||
106 | 106 | ||
107 | switch (e->type) { | 107 | switch (e->type) { |
108 | case KMEMCHECK_ERROR_INVALID_ACCESS: | 108 | case KMEMCHECK_ERROR_INVALID_ACCESS: |
109 | printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " | 109 | printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", |
110 | "from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | 110 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? |
112 | desc[e->state] : "(invalid shadow state)", | 111 | desc[e->state] : "(invalid shadow state)", |
113 | (void *) e->address); | 112 | (void *) e->address); |
114 | 113 | ||
115 | printk(KERN_INFO); | 114 | printk(KERN_WARNING); |
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | 115 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) |
117 | printk("%02x", e->memory_copy[i]); | 116 | printk(KERN_CONT "%02x", e->memory_copy[i]); |
118 | printk("\n"); | 117 | printk(KERN_CONT "\n"); |
119 | 118 | ||
120 | printk(KERN_INFO); | 119 | printk(KERN_WARNING); |
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | 120 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { |
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | 121 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) |
123 | printk(" %c", short_desc[e->shadow_copy[i]]); | 122 | printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); |
124 | else | 123 | else |
125 | printk(" ?"); | 124 | printk(KERN_CONT " ?"); |
126 | } | 125 | } |
127 | printk("\n"); | 126 | printk(KERN_CONT "\n"); |
128 | printk(KERN_INFO "%*c\n", 2 + 2 | 127 | printk(KERN_WARNING "%*c\n", 2 + 2 |
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | 128 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); |
130 | break; | 129 | break; |
131 | case KMEMCHECK_ERROR_BUG: | 130 | case KMEMCHECK_ERROR_BUG: |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 8cc183344140..b3b531a4f8e5 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | |||
337 | if (!shadow) | 337 | if (!shadow) |
338 | return true; | 338 | return true; |
339 | 339 | ||
340 | status = kmemcheck_shadow_test(shadow, size); | 340 | status = kmemcheck_shadow_test_all(shadow, size); |
341 | 341 | ||
342 | return status == KMEMCHECK_SHADOW_INITIALIZED; | 342 | return status == KMEMCHECK_SHADOW_INITIALIZED; |
343 | } | 343 | } |
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c index 3f66b82076a3..aec124214d97 100644 --- a/arch/x86/mm/kmemcheck/shadow.c +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | |||
125 | 125 | ||
126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | 126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) |
127 | { | 127 | { |
128 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
128 | uint8_t *x; | 129 | uint8_t *x; |
129 | unsigned int i; | 130 | unsigned int i; |
130 | 131 | ||
131 | x = shadow; | 132 | x = shadow; |
132 | 133 | ||
133 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
134 | /* | 134 | /* |
135 | * Make sure _some_ bytes are initialized. Gcc frequently generates | 135 | * Make sure _some_ bytes are initialized. Gcc frequently generates |
136 | * code to access neighboring bytes. | 136 | * code to access neighboring bytes. |
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | |||
139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | 139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) |
140 | return x[i]; | 140 | return x[i]; |
141 | } | 141 | } |
142 | |||
143 | return x[0]; | ||
142 | #else | 144 | #else |
145 | return kmemcheck_shadow_test_all(shadow, size); | ||
146 | #endif | ||
147 | } | ||
148 | |||
149 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) | ||
150 | { | ||
151 | uint8_t *x; | ||
152 | unsigned int i; | ||
153 | |||
154 | x = shadow; | ||
155 | |||
143 | /* All bytes must be initialized. */ | 156 | /* All bytes must be initialized. */ |
144 | for (i = 0; i < size; ++i) { | 157 | for (i = 0; i < size; ++i) { |
145 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | 158 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) |
146 | return x[i]; | 159 | return x[i]; |
147 | } | 160 | } |
148 | #endif | ||
149 | 161 | ||
150 | return x[0]; | 162 | return x[0]; |
151 | } | 163 | } |
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index af46d9ab9d86..ff0b2f70fbcb 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -11,6 +11,8 @@ enum kmemcheck_shadow { | |||
11 | void *kmemcheck_shadow_lookup(unsigned long address); | 11 | void *kmemcheck_shadow_lookup(unsigned long address); |
12 | 12 | ||
13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | 13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); |
14 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, | ||
15 | unsigned int size); | ||
14 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | 16 | void kmemcheck_shadow_set(void *shadow, unsigned int size); |
15 | 17 | ||
16 | #endif | 18 | #endif |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..5d0e67fff1a6 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -5,6 +5,8 @@ | |||
5 | * 2008 Pekka Paalanen <pq@iki.fi> | 5 | * 2008 Pekka Paalanen <pq@iki.fi> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
9 | |||
8 | #include <linux/list.h> | 10 | #include <linux/list.h> |
9 | #include <linux/rculist.h> | 11 | #include <linux/rculist.h> |
10 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
@@ -19,6 +21,7 @@ | |||
19 | #include <linux/kdebug.h> | 21 | #include <linux/kdebug.h> |
20 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
21 | #include <linux/io.h> | 23 | #include <linux/io.h> |
24 | #include <linux/slab.h> | ||
22 | #include <asm/cacheflush.h> | 25 | #include <asm/cacheflush.h> |
23 | #include <asm/tlbflush.h> | 26 | #include <asm/tlbflush.h> |
24 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
@@ -136,7 +139,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
136 | pte_t *pte = lookup_address(f->page, &level); | 139 | pte_t *pte = lookup_address(f->page, &level); |
137 | 140 | ||
138 | if (!pte) { | 141 | if (!pte) { |
139 | pr_err("kmmio: no pte for page 0x%08lx\n", f->page); | 142 | pr_err("no pte for page 0x%08lx\n", f->page); |
140 | return -1; | 143 | return -1; |
141 | } | 144 | } |
142 | 145 | ||
@@ -148,7 +151,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
148 | clear_pte_presence(pte, clear, &f->old_presence); | 151 | clear_pte_presence(pte, clear, &f->old_presence); |
149 | break; | 152 | break; |
150 | default: | 153 | default: |
151 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 154 | pr_err("unexpected page level 0x%x.\n", level); |
152 | return -1; | 155 | return -1; |
153 | } | 156 | } |
154 | 157 | ||
@@ -170,13 +173,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
170 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | 173 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) |
171 | { | 174 | { |
172 | int ret; | 175 | int ret; |
173 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | 176 | WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); |
174 | if (f->armed) { | 177 | if (f->armed) { |
175 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | 178 | pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", |
176 | f->page, f->count, !!f->old_presence); | 179 | f->page, f->count, !!f->old_presence); |
177 | } | 180 | } |
178 | ret = clear_page_presence(f, true); | 181 | ret = clear_page_presence(f, true); |
179 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | 182 | WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), |
183 | f->page); | ||
180 | f->armed = true; | 184 | f->armed = true; |
181 | return ret; | 185 | return ret; |
182 | } | 186 | } |
@@ -203,7 +207,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
203 | */ | 207 | */ |
204 | /* | 208 | /* |
205 | * Interrupts are disabled on entry as trap3 is an interrupt gate | 209 | * Interrupts are disabled on entry as trap3 is an interrupt gate |
206 | * and they remain disabled thorough out this function. | 210 | * and they remain disabled throughout this function. |
207 | */ | 211 | */ |
208 | int kmmio_handler(struct pt_regs *regs, unsigned long addr) | 212 | int kmmio_handler(struct pt_regs *regs, unsigned long addr) |
209 | { | 213 | { |
@@ -240,24 +244,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
240 | * condition needs handling by do_page_fault(), the | 244 | * condition needs handling by do_page_fault(), the |
241 | * page really not being present is the most common. | 245 | * page really not being present is the most common. |
242 | */ | 246 | */ |
243 | pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", | 247 | pr_debug("secondary hit for 0x%08lx CPU %d.\n", |
244 | addr, smp_processor_id()); | 248 | addr, smp_processor_id()); |
245 | 249 | ||
246 | if (!faultpage->old_presence) | 250 | if (!faultpage->old_presence) |
247 | pr_info("kmmio: unexpected secondary hit for " | 251 | pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", |
248 | "address 0x%08lx on CPU %d.\n", addr, | 252 | addr, smp_processor_id()); |
249 | smp_processor_id()); | ||
250 | } else { | 253 | } else { |
251 | /* | 254 | /* |
252 | * Prevent overwriting already in-flight context. | 255 | * Prevent overwriting already in-flight context. |
253 | * This should not happen, let's hope disarming at | 256 | * This should not happen, let's hope disarming at |
254 | * least prevents a panic. | 257 | * least prevents a panic. |
255 | */ | 258 | */ |
256 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | 259 | pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", |
257 | "for address 0x%08lx. Ignoring.\n", | 260 | smp_processor_id(), addr); |
258 | smp_processor_id(), addr); | 261 | pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); |
259 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", | ||
260 | ctx->addr); | ||
261 | disarm_kmmio_fault_page(faultpage); | 262 | disarm_kmmio_fault_page(faultpage); |
262 | } | 263 | } |
263 | goto no_kmmio_ctx; | 264 | goto no_kmmio_ctx; |
@@ -302,7 +303,7 @@ no_kmmio: | |||
302 | 303 | ||
303 | /* | 304 | /* |
304 | * Interrupts are disabled on entry as trap1 is an interrupt gate | 305 | * Interrupts are disabled on entry as trap1 is an interrupt gate |
305 | * and they remain disabled thorough out this function. | 306 | * and they remain disabled throughout this function. |
306 | * This must always get called as the pair to kmmio_handler(). | 307 | * This must always get called as the pair to kmmio_handler(). |
307 | */ | 308 | */ |
308 | static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | 309 | static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) |
@@ -316,8 +317,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
316 | * something external causing them (f.e. using a debugger while | 317 | * something external causing them (f.e. using a debugger while |
317 | * mmio tracing enabled), or erroneous behaviour | 318 | * mmio tracing enabled), or erroneous behaviour |
318 | */ | 319 | */ |
319 | pr_warning("kmmio: unexpected debug trap on CPU %d.\n", | 320 | pr_warning("unexpected debug trap on CPU %d.\n", |
320 | smp_processor_id()); | 321 | smp_processor_id()); |
321 | goto out; | 322 | goto out; |
322 | } | 323 | } |
323 | 324 | ||
@@ -425,7 +426,7 @@ int register_kmmio_probe(struct kmmio_probe *p) | |||
425 | list_add_rcu(&p->list, &kmmio_probes); | 426 | list_add_rcu(&p->list, &kmmio_probes); |
426 | while (size < size_lim) { | 427 | while (size < size_lim) { |
427 | if (add_kmmio_fault_page(p->addr + size)) | 428 | if (add_kmmio_fault_page(p->addr + size)) |
428 | pr_err("kmmio: Unable to set page fault.\n"); | 429 | pr_err("Unable to set page fault.\n"); |
429 | size += PAGE_SIZE; | 430 | size += PAGE_SIZE; |
430 | } | 431 | } |
431 | out: | 432 | out: |
@@ -490,7 +491,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) | |||
490 | * 2. remove_kmmio_fault_pages() | 491 | * 2. remove_kmmio_fault_pages() |
491 | * Remove the pages from kmmio_page_table. | 492 | * Remove the pages from kmmio_page_table. |
492 | * 3. rcu_free_kmmio_fault_pages() | 493 | * 3. rcu_free_kmmio_fault_pages() |
493 | * Actally free the kmmio_fault_page structs as with RCU. | 494 | * Actually free the kmmio_fault_page structs as with RCU. |
494 | */ | 495 | */ |
495 | void unregister_kmmio_probe(struct kmmio_probe *p) | 496 | void unregister_kmmio_probe(struct kmmio_probe *p) |
496 | { | 497 | { |
@@ -511,7 +512,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
511 | 512 | ||
512 | drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); | 513 | drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); |
513 | if (!drelease) { | 514 | if (!drelease) { |
514 | pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); | 515 | pr_crit("leaking kmmio_fault_page objects.\n"); |
515 | return; | 516 | return; |
516 | } | 517 | } |
517 | drelease->release_list = release_list; | 518 | drelease->release_list = release_list; |
@@ -538,10 +539,17 @@ static int | |||
538 | kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) | 539 | kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) |
539 | { | 540 | { |
540 | struct die_args *arg = args; | 541 | struct die_args *arg = args; |
542 | unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err); | ||
541 | 543 | ||
542 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | 544 | if (val == DIE_DEBUG && (*dr6_p & DR_STEP)) |
543 | if (post_kmmio_handler(arg->err, arg->regs) == 1) | 545 | if (post_kmmio_handler(*dr6_p, arg->regs) == 1) { |
546 | /* | ||
547 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
548 | * denote completion of processing | ||
549 | */ | ||
550 | *dr6_p &= ~DR_STEP; | ||
544 | return NOTIFY_STOP; | 551 | return NOTIFY_STOP; |
552 | } | ||
545 | 553 | ||
546 | return NOTIFY_DONE; | 554 | return NOTIFY_DONE; |
547 | } | 555 | } |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c8191defc38a..1dab5194fd9d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void) | |||
71 | if (current->personality & ADDR_COMPAT_LAYOUT) | 71 | if (current->personality & ADDR_COMPAT_LAYOUT) |
72 | return 1; | 72 | return 1; |
73 | 73 | ||
74 | if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) | 74 | if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) |
75 | return 1; | 75 | return 1; |
76 | 76 | ||
77 | return sysctl_legacy_va_layout; | 77 | return sysctl_legacy_va_layout; |
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void) | |||
96 | 96 | ||
97 | static unsigned long mmap_base(void) | 97 | static unsigned long mmap_base(void) |
98 | { | 98 | { |
99 | unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; | 99 | unsigned long gap = rlimit(RLIMIT_STACK); |
100 | 100 | ||
101 | if (gap < MIN_GAP) | 101 | if (gap < MIN_GAP) |
102 | gap = MIN_GAP; | 102 | gap = MIN_GAP; |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 132772a8ec57..3adff7dcc148 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -19,10 +19,14 @@ | |||
19 | * | 19 | * |
20 | * Derived from the read-mod example from relay-examples by Tom Zanussi. | 20 | * Derived from the read-mod example from relay-examples by Tom Zanussi. |
21 | */ | 21 | */ |
22 | |||
23 | #define pr_fmt(fmt) "mmiotrace: " fmt | ||
24 | |||
22 | #define DEBUG 1 | 25 | #define DEBUG 1 |
23 | 26 | ||
24 | #include <linux/module.h> | 27 | #include <linux/module.h> |
25 | #include <linux/debugfs.h> | 28 | #include <linux/debugfs.h> |
29 | #include <linux/slab.h> | ||
26 | #include <linux/uaccess.h> | 30 | #include <linux/uaccess.h> |
27 | #include <linux/io.h> | 31 | #include <linux/io.h> |
28 | #include <linux/version.h> | 32 | #include <linux/version.h> |
@@ -36,8 +40,6 @@ | |||
36 | 40 | ||
37 | #include "pf_in.h" | 41 | #include "pf_in.h" |
38 | 42 | ||
39 | #define NAME "mmiotrace: " | ||
40 | |||
41 | struct trap_reason { | 43 | struct trap_reason { |
42 | unsigned long addr; | 44 | unsigned long addr; |
43 | unsigned long ip; | 45 | unsigned long ip; |
@@ -96,17 +98,18 @@ static void print_pte(unsigned long address) | |||
96 | pte_t *pte = lookup_address(address, &level); | 98 | pte_t *pte = lookup_address(address, &level); |
97 | 99 | ||
98 | if (!pte) { | 100 | if (!pte) { |
99 | pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", | 101 | pr_err("Error in %s: no pte for page 0x%08lx\n", |
100 | __func__, address); | 102 | __func__, address); |
101 | return; | 103 | return; |
102 | } | 104 | } |
103 | 105 | ||
104 | if (level == PG_LEVEL_2M) { | 106 | if (level == PG_LEVEL_2M) { |
105 | pr_emerg(NAME "4MB pages are not currently supported: " | 107 | pr_emerg("4MB pages are not currently supported: 0x%08lx\n", |
106 | "0x%08lx\n", address); | 108 | address); |
107 | BUG(); | 109 | BUG(); |
108 | } | 110 | } |
109 | pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, | 111 | pr_info("pte for 0x%lx: 0x%llx 0x%llx\n", |
112 | address, | ||
110 | (unsigned long long)pte_val(*pte), | 113 | (unsigned long long)pte_val(*pte), |
111 | (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); | 114 | (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); |
112 | } | 115 | } |
@@ -118,22 +121,21 @@ static void print_pte(unsigned long address) | |||
118 | static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) | 121 | static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) |
119 | { | 122 | { |
120 | const struct trap_reason *my_reason = &get_cpu_var(pf_reason); | 123 | const struct trap_reason *my_reason = &get_cpu_var(pf_reason); |
121 | pr_emerg(NAME "unexpected fault for address: 0x%08lx, " | 124 | pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", |
122 | "last fault for address: 0x%08lx\n", | 125 | addr, my_reason->addr); |
123 | addr, my_reason->addr); | ||
124 | print_pte(addr); | 126 | print_pte(addr); |
125 | print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); | 127 | print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); |
126 | print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); | 128 | print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); |
127 | #ifdef __i386__ | 129 | #ifdef __i386__ |
128 | pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", | 130 | pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", |
129 | regs->ax, regs->bx, regs->cx, regs->dx); | 131 | regs->ax, regs->bx, regs->cx, regs->dx); |
130 | pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | 132 | pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
131 | regs->si, regs->di, regs->bp, regs->sp); | 133 | regs->si, regs->di, regs->bp, regs->sp); |
132 | #else | 134 | #else |
133 | pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", | 135 | pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", |
134 | regs->ax, regs->cx, regs->dx); | 136 | regs->ax, regs->cx, regs->dx); |
135 | pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", | 137 | pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", |
136 | regs->si, regs->di, regs->bp, regs->sp); | 138 | regs->si, regs->di, regs->bp, regs->sp); |
137 | #endif | 139 | #endif |
138 | put_cpu_var(pf_reason); | 140 | put_cpu_var(pf_reason); |
139 | BUG(); | 141 | BUG(); |
@@ -213,7 +215,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, | |||
213 | /* this should always return the active_trace count to 0 */ | 215 | /* this should always return the active_trace count to 0 */ |
214 | my_reason->active_traces--; | 216 | my_reason->active_traces--; |
215 | if (my_reason->active_traces) { | 217 | if (my_reason->active_traces) { |
216 | pr_emerg(NAME "unexpected post handler"); | 218 | pr_emerg("unexpected post handler"); |
217 | BUG(); | 219 | BUG(); |
218 | } | 220 | } |
219 | 221 | ||
@@ -244,7 +246,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, | |||
244 | }; | 246 | }; |
245 | 247 | ||
246 | if (!trace) { | 248 | if (!trace) { |
247 | pr_err(NAME "kmalloc failed in ioremap\n"); | 249 | pr_err("kmalloc failed in ioremap\n"); |
248 | return; | 250 | return; |
249 | } | 251 | } |
250 | 252 | ||
@@ -282,8 +284,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size, | |||
282 | if (!is_enabled()) /* recheck and proper locking in *_core() */ | 284 | if (!is_enabled()) /* recheck and proper locking in *_core() */ |
283 | return; | 285 | return; |
284 | 286 | ||
285 | pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", | 287 | pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n", |
286 | (unsigned long long)offset, size, addr); | 288 | (unsigned long long)offset, size, addr); |
287 | if ((filter_offset) && (offset != filter_offset)) | 289 | if ((filter_offset) && (offset != filter_offset)) |
288 | return; | 290 | return; |
289 | ioremap_trace_core(offset, size, addr); | 291 | ioremap_trace_core(offset, size, addr); |
@@ -301,7 +303,7 @@ static void iounmap_trace_core(volatile void __iomem *addr) | |||
301 | struct remap_trace *tmp; | 303 | struct remap_trace *tmp; |
302 | struct remap_trace *found_trace = NULL; | 304 | struct remap_trace *found_trace = NULL; |
303 | 305 | ||
304 | pr_debug(NAME "Unmapping %p.\n", addr); | 306 | pr_debug("Unmapping %p.\n", addr); |
305 | 307 | ||
306 | spin_lock_irq(&trace_lock); | 308 | spin_lock_irq(&trace_lock); |
307 | if (!is_enabled()) | 309 | if (!is_enabled()) |
@@ -363,9 +365,8 @@ static void clear_trace_list(void) | |||
363 | * Caller also ensures is_enabled() cannot change. | 365 | * Caller also ensures is_enabled() cannot change. |
364 | */ | 366 | */ |
365 | list_for_each_entry(trace, &trace_list, list) { | 367 | list_for_each_entry(trace, &trace_list, list) { |
366 | pr_notice(NAME "purging non-iounmapped " | 368 | pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n", |
367 | "trace @0x%08lx, size 0x%lx.\n", | 369 | trace->probe.addr, trace->probe.len); |
368 | trace->probe.addr, trace->probe.len); | ||
369 | if (!nommiotrace) | 370 | if (!nommiotrace) |
370 | unregister_kmmio_probe(&trace->probe); | 371 | unregister_kmmio_probe(&trace->probe); |
371 | } | 372 | } |
@@ -387,7 +388,7 @@ static void enter_uniprocessor(void) | |||
387 | 388 | ||
388 | if (downed_cpus == NULL && | 389 | if (downed_cpus == NULL && |
389 | !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { | 390 | !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { |
390 | pr_notice(NAME "Failed to allocate mask\n"); | 391 | pr_notice("Failed to allocate mask\n"); |
391 | goto out; | 392 | goto out; |
392 | } | 393 | } |
393 | 394 | ||
@@ -395,20 +396,19 @@ static void enter_uniprocessor(void) | |||
395 | cpumask_copy(downed_cpus, cpu_online_mask); | 396 | cpumask_copy(downed_cpus, cpu_online_mask); |
396 | cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); | 397 | cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); |
397 | if (num_online_cpus() > 1) | 398 | if (num_online_cpus() > 1) |
398 | pr_notice(NAME "Disabling non-boot CPUs...\n"); | 399 | pr_notice("Disabling non-boot CPUs...\n"); |
399 | put_online_cpus(); | 400 | put_online_cpus(); |
400 | 401 | ||
401 | for_each_cpu(cpu, downed_cpus) { | 402 | for_each_cpu(cpu, downed_cpus) { |
402 | err = cpu_down(cpu); | 403 | err = cpu_down(cpu); |
403 | if (!err) | 404 | if (!err) |
404 | pr_info(NAME "CPU%d is down.\n", cpu); | 405 | pr_info("CPU%d is down.\n", cpu); |
405 | else | 406 | else |
406 | pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); | 407 | pr_err("Error taking CPU%d down: %d\n", cpu, err); |
407 | } | 408 | } |
408 | out: | 409 | out: |
409 | if (num_online_cpus() > 1) | 410 | if (num_online_cpus() > 1) |
410 | pr_warning(NAME "multiple CPUs still online, " | 411 | pr_warning("multiple CPUs still online, may miss events.\n"); |
411 | "may miss events.\n"); | ||
412 | } | 412 | } |
413 | 413 | ||
414 | /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, | 414 | /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, |
@@ -420,13 +420,13 @@ static void __ref leave_uniprocessor(void) | |||
420 | 420 | ||
421 | if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) | 421 | if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) |
422 | return; | 422 | return; |
423 | pr_notice(NAME "Re-enabling CPUs...\n"); | 423 | pr_notice("Re-enabling CPUs...\n"); |
424 | for_each_cpu(cpu, downed_cpus) { | 424 | for_each_cpu(cpu, downed_cpus) { |
425 | err = cpu_up(cpu); | 425 | err = cpu_up(cpu); |
426 | if (!err) | 426 | if (!err) |
427 | pr_info(NAME "enabled CPU%d.\n", cpu); | 427 | pr_info("enabled CPU%d.\n", cpu); |
428 | else | 428 | else |
429 | pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); | 429 | pr_err("cannot re-enable CPU%d: %d\n", cpu, err); |
430 | } | 430 | } |
431 | } | 431 | } |
432 | 432 | ||
@@ -434,8 +434,8 @@ static void __ref leave_uniprocessor(void) | |||
434 | static void enter_uniprocessor(void) | 434 | static void enter_uniprocessor(void) |
435 | { | 435 | { |
436 | if (num_online_cpus() > 1) | 436 | if (num_online_cpus() > 1) |
437 | pr_warning(NAME "multiple CPUs are online, may miss events. " | 437 | pr_warning("multiple CPUs are online, may miss events. " |
438 | "Suggest booting with maxcpus=1 kernel argument.\n"); | 438 | "Suggest booting with maxcpus=1 kernel argument.\n"); |
439 | } | 439 | } |
440 | 440 | ||
441 | static void leave_uniprocessor(void) | 441 | static void leave_uniprocessor(void) |
@@ -450,13 +450,13 @@ void enable_mmiotrace(void) | |||
450 | goto out; | 450 | goto out; |
451 | 451 | ||
452 | if (nommiotrace) | 452 | if (nommiotrace) |
453 | pr_info(NAME "MMIO tracing disabled.\n"); | 453 | pr_info("MMIO tracing disabled.\n"); |
454 | kmmio_init(); | 454 | kmmio_init(); |
455 | enter_uniprocessor(); | 455 | enter_uniprocessor(); |
456 | spin_lock_irq(&trace_lock); | 456 | spin_lock_irq(&trace_lock); |
457 | atomic_inc(&mmiotrace_enabled); | 457 | atomic_inc(&mmiotrace_enabled); |
458 | spin_unlock_irq(&trace_lock); | 458 | spin_unlock_irq(&trace_lock); |
459 | pr_info(NAME "enabled.\n"); | 459 | pr_info("enabled.\n"); |
460 | out: | 460 | out: |
461 | mutex_unlock(&mmiotrace_mutex); | 461 | mutex_unlock(&mmiotrace_mutex); |
462 | } | 462 | } |
@@ -475,7 +475,7 @@ void disable_mmiotrace(void) | |||
475 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ | 475 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ |
476 | leave_uniprocessor(); | 476 | leave_uniprocessor(); |
477 | kmmio_cleanup(); | 477 | kmmio_cleanup(); |
478 | pr_info(NAME "disabled.\n"); | 478 | pr_info("disabled.\n"); |
479 | out: | 479 | out: |
480 | mutex_unlock(&mmiotrace_mutex); | 480 | mutex_unlock(&mmiotrace_mutex); |
481 | } | 481 | } |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d2530062fe00..809baaaf48b1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid) | |||
347 | (ulong) node_remap_end_vaddr[nid]); | 347 | (ulong) node_remap_end_vaddr[nid]); |
348 | } | 348 | } |
349 | 349 | ||
350 | void __init initmem_init(unsigned long start_pfn, | 350 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
351 | unsigned long end_pfn) | 351 | int acpi, int k8) |
352 | { | 352 | { |
353 | int nid; | 353 | int nid; |
354 | long kva_target_pfn; | 354 | long kva_target_pfn; |
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, | |||
418 | 418 | ||
419 | for_each_online_node(nid) { | 419 | for_each_online_node(nid) { |
420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
421 | NODE_DATA(nid)->node_id = nid; | ||
422 | #ifndef CONFIG_NO_BOOTMEM | ||
421 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | 423 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; |
424 | #endif | ||
422 | } | 425 | } |
423 | 426 | ||
424 | setup_bootmem_allocator(); | 427 | setup_bootmem_allocator(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 459913beac71..8948f47fde05 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
163 | unsigned long end, unsigned long size, | 163 | unsigned long end, unsigned long size, |
164 | unsigned long align) | 164 | unsigned long align) |
165 | { | 165 | { |
166 | unsigned long mem = find_e820_area(start, end, size, align); | 166 | unsigned long mem; |
167 | void *ptr; | ||
168 | 167 | ||
168 | /* | ||
169 | * put it on high as possible | ||
170 | * something will go with NODE_DATA | ||
171 | */ | ||
172 | if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) | ||
173 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
174 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | ||
175 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
176 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
177 | mem = find_e820_area(start, end, size, align); | ||
178 | if (mem != -1L) | ||
179 | return __va(mem); | ||
180 | |||
181 | /* extend the search scope */ | ||
182 | end = max_pfn_mapped << PAGE_SHIFT; | ||
183 | if (end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
184 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
185 | else | ||
186 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
187 | mem = find_e820_area(start, end, size, align); | ||
169 | if (mem != -1L) | 188 | if (mem != -1L) |
170 | return __va(mem); | 189 | return __va(mem); |
171 | 190 | ||
172 | ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); | 191 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", |
173 | if (ptr == NULL) { | ||
174 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
175 | size, nodeid); | 192 | size, nodeid); |
176 | return NULL; | 193 | |
177 | } | 194 | return NULL; |
178 | return ptr; | ||
179 | } | 195 | } |
180 | 196 | ||
181 | /* Initialize bootmem allocator for a node */ | 197 | /* Initialize bootmem allocator for a node */ |
182 | void __init | 198 | void __init |
183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 200 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 201 | unsigned long start_pfn, last_pfn, nodedata_phys; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
187 | unsigned long bootmap_start, nodedata_phys; | ||
188 | void *bootmap; | ||
189 | int nid; | 203 | int nid; |
204 | #ifndef CONFIG_NO_BOOTMEM | ||
205 | unsigned long bootmap_start, bootmap_pages, bootmap_size; | ||
206 | void *bootmap; | ||
207 | #endif | ||
190 | 208 | ||
191 | if (!end) | 209 | if (!end) |
192 | return; | 210 | return; |
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
200 | 218 | ||
201 | start = roundup(start, ZONE_ALIGN); | 219 | start = roundup(start, ZONE_ALIGN); |
202 | 220 | ||
203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 221 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, |
204 | start, end); | 222 | start, end); |
205 | 223 | ||
206 | start_pfn = start >> PAGE_SHIFT; | 224 | start_pfn = start >> PAGE_SHIFT; |
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
211 | if (node_data[nodeid] == NULL) | 229 | if (node_data[nodeid] == NULL) |
212 | return; | 230 | return; |
213 | nodedata_phys = __pa(node_data[nodeid]); | 231 | nodedata_phys = __pa(node_data[nodeid]); |
232 | reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | ||
214 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | 233 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, |
215 | nodedata_phys + pgdat_size - 1); | 234 | nodedata_phys + pgdat_size - 1); |
235 | nid = phys_to_nid(nodedata_phys); | ||
236 | if (nid != nodeid) | ||
237 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
216 | 238 | ||
217 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 239 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
218 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | 240 | NODE_DATA(nodeid)->node_id = nodeid; |
219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 241 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
220 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 242 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
221 | 243 | ||
244 | #ifndef CONFIG_NO_BOOTMEM | ||
245 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | ||
246 | |||
222 | /* | 247 | /* |
223 | * Find a place for the bootmem map | 248 | * Find a place for the bootmem map |
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | 249 | * nodedata_phys could be on other nodes by alloc_bootmem, |
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
227 | * of alloc_bootmem, that could clash with reserved range | 252 | * of alloc_bootmem, that could clash with reserved range |
228 | */ | 253 | */ |
229 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); | 254 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); |
230 | nid = phys_to_nid(nodedata_phys); | 255 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = roundup(start, PAGE_SIZE); | ||
235 | /* | 256 | /* |
236 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like | 257 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
237 | * to use that to align to PAGE_SIZE | 258 | * to use that to align to PAGE_SIZE |
@@ -239,12 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 260 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 261 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 262 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) | 263 | free_early(nodedata_phys, nodedata_phys + pgdat_size); |
243 | free_bootmem(nodedata_phys, pgdat_size); | ||
244 | node_data[nodeid] = NULL; | 264 | node_data[nodeid] = NULL; |
245 | return; | 265 | return; |
246 | } | 266 | } |
247 | bootmap_start = __pa(bootmap); | 267 | bootmap_start = __pa(bootmap); |
268 | reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT), | ||
269 | "BOOTMAP"); | ||
248 | 270 | ||
249 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 271 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
250 | bootmap_start >> PAGE_SHIFT, | 272 | bootmap_start >> PAGE_SHIFT, |
@@ -253,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
253 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | 275 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", |
254 | bootmap_start, bootmap_start + bootmap_size - 1, | 276 | bootmap_start, bootmap_start + bootmap_size - 1, |
255 | bootmap_pages); | 277 | bootmap_pages); |
256 | |||
257 | free_bootmem_with_active_regions(nodeid, end); | ||
258 | |||
259 | /* | ||
260 | * convert early reserve to bootmem reserve earlier | ||
261 | * otherwise early_node_mem could use early reserved mem | ||
262 | * on previous node | ||
263 | */ | ||
264 | early_res_to_bootmem(start, end); | ||
265 | |||
266 | /* | ||
267 | * in some case early_node_mem could use alloc_bootmem | ||
268 | * to get range on other node, don't reserve that again | ||
269 | */ | ||
270 | if (nid != nodeid) | ||
271 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
272 | else | ||
273 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
274 | pgdat_size, BOOTMEM_DEFAULT); | ||
275 | nid = phys_to_nid(bootmap_start); | 278 | nid = phys_to_nid(bootmap_start); |
276 | if (nid != nodeid) | 279 | if (nid != nodeid) |
277 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | 280 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); |
278 | else | 281 | |
279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 282 | free_bootmem_with_active_regions(nodeid, end); |
280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 283 | #endif |
281 | 284 | ||
282 | node_set_online(nodeid); | 285 | node_set_online(nodeid); |
283 | } | 286 | } |
@@ -306,8 +309,71 @@ void __init numa_init_array(void) | |||
306 | 309 | ||
307 | #ifdef CONFIG_NUMA_EMU | 310 | #ifdef CONFIG_NUMA_EMU |
308 | /* Numa emulation */ | 311 | /* Numa emulation */ |
312 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | ||
313 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; | ||
309 | static char *cmdline __initdata; | 314 | static char *cmdline __initdata; |
310 | 315 | ||
316 | static int __init setup_physnodes(unsigned long start, unsigned long end, | ||
317 | int acpi, int k8) | ||
318 | { | ||
319 | int nr_nodes = 0; | ||
320 | int ret = 0; | ||
321 | int i; | ||
322 | |||
323 | #ifdef CONFIG_ACPI_NUMA | ||
324 | if (acpi) | ||
325 | nr_nodes = acpi_get_nodes(physnodes); | ||
326 | #endif | ||
327 | #ifdef CONFIG_K8_NUMA | ||
328 | if (k8) | ||
329 | nr_nodes = k8_get_nodes(physnodes); | ||
330 | #endif | ||
331 | /* | ||
332 | * Basic sanity checking on the physical node map: there may be errors | ||
333 | * if the SRAT or K8 incorrectly reported the topology or the mem= | ||
334 | * kernel parameter is used. | ||
335 | */ | ||
336 | for (i = 0; i < nr_nodes; i++) { | ||
337 | if (physnodes[i].start == physnodes[i].end) | ||
338 | continue; | ||
339 | if (physnodes[i].start > end) { | ||
340 | physnodes[i].end = physnodes[i].start; | ||
341 | continue; | ||
342 | } | ||
343 | if (physnodes[i].end < start) { | ||
344 | physnodes[i].start = physnodes[i].end; | ||
345 | continue; | ||
346 | } | ||
347 | if (physnodes[i].start < start) | ||
348 | physnodes[i].start = start; | ||
349 | if (physnodes[i].end > end) | ||
350 | physnodes[i].end = end; | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Remove all nodes that have no memory or were truncated because of the | ||
355 | * limited address range. | ||
356 | */ | ||
357 | for (i = 0; i < nr_nodes; i++) { | ||
358 | if (physnodes[i].start == physnodes[i].end) | ||
359 | continue; | ||
360 | physnodes[ret].start = physnodes[i].start; | ||
361 | physnodes[ret].end = physnodes[i].end; | ||
362 | ret++; | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * If no physical topology was detected, a single node is faked to cover | ||
367 | * the entire address space. | ||
368 | */ | ||
369 | if (!ret) { | ||
370 | physnodes[ret].start = start; | ||
371 | physnodes[ret].end = end; | ||
372 | ret = 1; | ||
373 | } | ||
374 | return ret; | ||
375 | } | ||
376 | |||
311 | /* | 377 | /* |
312 | * Setups up nid to range from addr to addr + size. If the end | 378 | * Setups up nid to range from addr to addr + size. If the end |
313 | * boundary is greater than max_addr, then max_addr is used instead. | 379 | * boundary is greater than max_addr, then max_addr is used instead. |
@@ -315,11 +381,9 @@ static char *cmdline __initdata; | |||
315 | * allocation past addr and -1 otherwise. addr is adjusted to be at | 381 | * allocation past addr and -1 otherwise. addr is adjusted to be at |
316 | * the end of the node. | 382 | * the end of the node. |
317 | */ | 383 | */ |
318 | static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | 384 | static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) |
319 | u64 size, u64 max_addr) | ||
320 | { | 385 | { |
321 | int ret = 0; | 386 | int ret = 0; |
322 | |||
323 | nodes[nid].start = *addr; | 387 | nodes[nid].start = *addr; |
324 | *addr += size; | 388 | *addr += size; |
325 | if (*addr >= max_addr) { | 389 | if (*addr >= max_addr) { |
@@ -335,167 +399,234 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | |||
335 | } | 399 | } |
336 | 400 | ||
337 | /* | 401 | /* |
338 | * Splits num_nodes nodes up equally starting at node_start. The return value | 402 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr |
339 | * is the number of nodes split up and addr is adjusted to be at the end of the | 403 | * to max_addr. The return value is the number of nodes allocated. |
340 | * last node allocated. | ||
341 | */ | 404 | */ |
342 | static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | 405 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, |
343 | u64 max_addr, int node_start, | 406 | int nr_phys_nodes, int nr_nodes) |
344 | int num_nodes) | ||
345 | { | 407 | { |
346 | unsigned int big; | 408 | nodemask_t physnode_mask = NODE_MASK_NONE; |
347 | u64 size; | 409 | u64 size; |
410 | int big; | ||
411 | int ret = 0; | ||
348 | int i; | 412 | int i; |
349 | 413 | ||
350 | if (num_nodes <= 0) | 414 | if (nr_nodes <= 0) |
351 | return -1; | 415 | return -1; |
352 | if (num_nodes > MAX_NUMNODES) | 416 | if (nr_nodes > MAX_NUMNODES) { |
353 | num_nodes = MAX_NUMNODES; | 417 | pr_info("numa=fake=%d too large, reducing to %d\n", |
354 | size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / | 418 | nr_nodes, MAX_NUMNODES); |
355 | num_nodes; | 419 | nr_nodes = MAX_NUMNODES; |
420 | } | ||
421 | |||
422 | size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes; | ||
356 | /* | 423 | /* |
357 | * Calculate the number of big nodes that can be allocated as a result | 424 | * Calculate the number of big nodes that can be allocated as a result |
358 | * of consolidating the leftovers. | 425 | * of consolidating the remainder. |
359 | */ | 426 | */ |
360 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / | 427 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) / |
361 | FAKE_NODE_MIN_SIZE; | 428 | FAKE_NODE_MIN_SIZE; |
362 | 429 | ||
363 | /* Round down to nearest FAKE_NODE_MIN_SIZE. */ | ||
364 | size &= FAKE_NODE_MIN_HASH_MASK; | 430 | size &= FAKE_NODE_MIN_HASH_MASK; |
365 | if (!size) { | 431 | if (!size) { |
366 | printk(KERN_ERR "Not enough memory for each node. " | 432 | pr_err("Not enough memory for each node. " |
367 | "NUMA emulation disabled.\n"); | 433 | "NUMA emulation disabled.\n"); |
368 | return -1; | 434 | return -1; |
369 | } | 435 | } |
370 | 436 | ||
371 | for (i = node_start; i < num_nodes + node_start; i++) { | 437 | for (i = 0; i < nr_phys_nodes; i++) |
372 | u64 end = *addr + size; | 438 | if (physnodes[i].start != physnodes[i].end) |
439 | node_set(i, physnode_mask); | ||
373 | 440 | ||
374 | if (i < big) | 441 | /* |
375 | end += FAKE_NODE_MIN_SIZE; | 442 | * Continue to fill physical nodes with fake nodes until there is no |
376 | /* | 443 | * memory left on any of them. |
377 | * The final node can have the remaining system RAM. Other | 444 | */ |
378 | * nodes receive roughly the same amount of available pages. | 445 | while (nodes_weight(physnode_mask)) { |
379 | */ | 446 | for_each_node_mask(i, physnode_mask) { |
380 | if (i == num_nodes + node_start - 1) | 447 | u64 end = physnodes[i].start + size; |
381 | end = max_addr; | 448 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); |
382 | else | 449 | |
383 | while (end - *addr - e820_hole_size(*addr, end) < | 450 | if (ret < big) |
384 | size) { | 451 | end += FAKE_NODE_MIN_SIZE; |
452 | |||
453 | /* | ||
454 | * Continue to add memory to this fake node if its | ||
455 | * non-reserved memory is less than the per-node size. | ||
456 | */ | ||
457 | while (end - physnodes[i].start - | ||
458 | e820_hole_size(physnodes[i].start, end) < size) { | ||
385 | end += FAKE_NODE_MIN_SIZE; | 459 | end += FAKE_NODE_MIN_SIZE; |
386 | if (end > max_addr) { | 460 | if (end > physnodes[i].end) { |
387 | end = max_addr; | 461 | end = physnodes[i].end; |
388 | break; | 462 | break; |
389 | } | 463 | } |
390 | } | 464 | } |
391 | if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) | 465 | |
392 | break; | 466 | /* |
467 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
468 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
469 | * this one must extend to the boundary. | ||
470 | */ | ||
471 | if (end < dma32_end && dma32_end - end - | ||
472 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
473 | end = dma32_end; | ||
474 | |||
475 | /* | ||
476 | * If there won't be enough non-reserved memory for the | ||
477 | * next node, this one must extend to the end of the | ||
478 | * physical node. | ||
479 | */ | ||
480 | if (physnodes[i].end - end - | ||
481 | e820_hole_size(end, physnodes[i].end) < size) | ||
482 | end = physnodes[i].end; | ||
483 | |||
484 | /* | ||
485 | * Avoid allocating more nodes than requested, which can | ||
486 | * happen as a result of rounding down each node's size | ||
487 | * to FAKE_NODE_MIN_SIZE. | ||
488 | */ | ||
489 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) | ||
490 | end = physnodes[i].end; | ||
491 | |||
492 | if (setup_node_range(ret++, &physnodes[i].start, | ||
493 | end - physnodes[i].start, | ||
494 | physnodes[i].end) < 0) | ||
495 | node_clear(i, physnode_mask); | ||
496 | } | ||
393 | } | 497 | } |
394 | return i - node_start + 1; | 498 | return ret; |
395 | } | 499 | } |
396 | 500 | ||
397 | /* | 501 | /* |
398 | * Splits the remaining system RAM into chunks of size. The remaining memory is | 502 | * Returns the end address of a node so that there is at least `size' amount of |
399 | * always assigned to a final node and can be asymmetric. Returns the number of | 503 | * non-reserved memory or `max_addr' is reached. |
400 | * nodes split. | ||
401 | */ | 504 | */ |
402 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | 505 | static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) |
403 | u64 max_addr, int node_start, u64 size) | ||
404 | { | 506 | { |
405 | int i = node_start; | 507 | u64 end = start + size; |
406 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | 508 | |
407 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | 509 | while (end - start - e820_hole_size(start, end) < size) { |
408 | ; | 510 | end += FAKE_NODE_MIN_SIZE; |
409 | return i - node_start; | 511 | if (end > max_addr) { |
512 | end = max_addr; | ||
513 | break; | ||
514 | } | ||
515 | } | ||
516 | return end; | ||
410 | } | 517 | } |
411 | 518 | ||
412 | /* | 519 | /* |
413 | * Sets up the system RAM area from start_pfn to last_pfn according to the | 520 | * Sets up fake nodes of `size' interleaved over physical nodes ranging from |
414 | * numa=fake command-line option. | 521 | * `addr' to `max_addr'. The return value is the number of nodes allocated. |
415 | */ | 522 | */ |
416 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 523 | static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) |
417 | |||
418 | static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) | ||
419 | { | 524 | { |
420 | u64 size, addr = start_pfn << PAGE_SHIFT; | 525 | nodemask_t physnode_mask = NODE_MASK_NONE; |
421 | u64 max_addr = last_pfn << PAGE_SHIFT; | 526 | u64 min_size; |
422 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | 527 | int ret = 0; |
528 | int i; | ||
423 | 529 | ||
424 | memset(&nodes, 0, sizeof(nodes)); | 530 | if (!size) |
531 | return -1; | ||
425 | /* | 532 | /* |
426 | * If the numa=fake command-line is just a single number N, split the | 533 | * The limit on emulated nodes is MAX_NUMNODES, so the size per node is |
427 | * system RAM into N fake nodes. | 534 | * increased accordingly if the requested size is too small. This |
535 | * creates a uniform distribution of node sizes across the entire | ||
536 | * machine (but not necessarily over physical nodes). | ||
428 | */ | 537 | */ |
429 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | 538 | min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) / |
430 | long n = simple_strtol(cmdline, NULL, 0); | 539 | MAX_NUMNODES; |
431 | 540 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); | |
432 | num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); | 541 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) |
433 | if (num_nodes < 0) | 542 | min_size = (min_size + FAKE_NODE_MIN_SIZE) & |
434 | return num_nodes; | 543 | FAKE_NODE_MIN_HASH_MASK; |
435 | goto out; | 544 | if (size < min_size) { |
545 | pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", | ||
546 | size >> 20, min_size >> 20); | ||
547 | size = min_size; | ||
436 | } | 548 | } |
549 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
437 | 550 | ||
438 | /* Parse the command line. */ | 551 | for (i = 0; i < MAX_NUMNODES; i++) |
439 | for (coeff_flag = 0; ; cmdline++) { | 552 | if (physnodes[i].start != physnodes[i].end) |
440 | if (*cmdline && isdigit(*cmdline)) { | 553 | node_set(i, physnode_mask); |
441 | num = num * 10 + *cmdline - '0'; | 554 | /* |
442 | continue; | 555 | * Fill physical nodes with fake nodes of size until there is no memory |
443 | } | 556 | * left on any of them. |
444 | if (*cmdline == '*') { | 557 | */ |
445 | if (num > 0) | 558 | while (nodes_weight(physnode_mask)) { |
446 | coeff = num; | 559 | for_each_node_mask(i, physnode_mask) { |
447 | coeff_flag = 1; | 560 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; |
448 | } | 561 | u64 end; |
449 | if (!*cmdline || *cmdline == ',') { | 562 | |
450 | if (!coeff_flag) | 563 | end = find_end_of_node(physnodes[i].start, |
451 | coeff = 1; | 564 | physnodes[i].end, size); |
452 | /* | 565 | /* |
453 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | 566 | * If there won't be at least FAKE_NODE_MIN_SIZE of |
454 | * Command-line coefficients are in megabytes. | 567 | * non-reserved memory in ZONE_DMA32 for the next node, |
568 | * this one must extend to the boundary. | ||
455 | */ | 569 | */ |
456 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | 570 | if (end < dma32_end && dma32_end - end - |
457 | if (size) | 571 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) |
458 | for (i = 0; i < coeff; i++, num_nodes++) | 572 | end = dma32_end; |
459 | if (setup_node_range(num_nodes, nodes, | 573 | |
460 | &addr, size, max_addr) < 0) | 574 | /* |
461 | goto done; | 575 | * If there won't be enough non-reserved memory for the |
462 | if (!*cmdline) | 576 | * next node, this one must extend to the end of the |
463 | break; | 577 | * physical node. |
464 | coeff_flag = 0; | 578 | */ |
465 | coeff = -1; | 579 | if (physnodes[i].end - end - |
580 | e820_hole_size(end, physnodes[i].end) < size) | ||
581 | end = physnodes[i].end; | ||
582 | |||
583 | /* | ||
584 | * Setup the fake node that will be allocated as bootmem | ||
585 | * later. If setup_node_range() returns non-zero, there | ||
586 | * is no more memory available on this physical node. | ||
587 | */ | ||
588 | if (setup_node_range(ret++, &physnodes[i].start, | ||
589 | end - physnodes[i].start, | ||
590 | physnodes[i].end) < 0) | ||
591 | node_clear(i, physnode_mask); | ||
466 | } | 592 | } |
467 | num = 0; | ||
468 | } | 593 | } |
469 | done: | 594 | return ret; |
470 | if (!num_nodes) | 595 | } |
471 | return -1; | 596 | |
472 | /* Fill remainder of system RAM, if appropriate. */ | 597 | /* |
473 | if (addr < max_addr) { | 598 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
474 | if (coeff_flag && coeff < 0) { | 599 | * numa=fake command-line option. |
475 | /* Split remaining nodes into num-sized chunks */ | 600 | */ |
476 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | 601 | static int __init numa_emulation(unsigned long start_pfn, |
477 | num_nodes, num); | 602 | unsigned long last_pfn, int acpi, int k8) |
478 | goto out; | 603 | { |
479 | } | 604 | u64 addr = start_pfn << PAGE_SHIFT; |
480 | switch (*(cmdline - 1)) { | 605 | u64 max_addr = last_pfn << PAGE_SHIFT; |
481 | case '*': | 606 | int num_phys_nodes; |
482 | /* Split remaining nodes into coeff chunks */ | 607 | int num_nodes; |
483 | if (coeff <= 0) | 608 | int i; |
484 | break; | 609 | |
485 | num_nodes += split_nodes_equally(nodes, &addr, max_addr, | 610 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
486 | num_nodes, coeff); | 611 | /* |
487 | break; | 612 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
488 | case ',': | 613 | * the fixed node size. Otherwise, if it is just a single number N, |
489 | /* Do not allocate remaining system RAM */ | 614 | * split the system RAM into N fake nodes. |
490 | break; | 615 | */ |
491 | default: | 616 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { |
492 | /* Give one final node */ | 617 | u64 size; |
493 | setup_node_range(num_nodes, nodes, &addr, | 618 | |
494 | max_addr - addr, max_addr); | 619 | size = memparse(cmdline, &cmdline); |
495 | num_nodes++; | 620 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); |
496 | } | 621 | } else { |
622 | unsigned long n; | ||
623 | |||
624 | n = simple_strtoul(cmdline, NULL, 0); | ||
625 | num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); | ||
497 | } | 626 | } |
498 | out: | 627 | |
628 | if (num_nodes < 0) | ||
629 | return num_nodes; | ||
499 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); | 630 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); |
500 | if (memnode_shift < 0) { | 631 | if (memnode_shift < 0) { |
501 | memnode_shift = 0; | 632 | memnode_shift = 0; |
@@ -505,14 +636,10 @@ out: | |||
505 | } | 636 | } |
506 | 637 | ||
507 | /* | 638 | /* |
508 | * We need to vacate all active ranges that may have been registered by | 639 | * We need to vacate all active ranges that may have been registered for |
509 | * SRAT and set acpi_numa to -1 so that srat_disabled() always returns | 640 | * the e820 memory map. |
510 | * true. NUMA emulation has succeeded so we will not scan ACPI nodes. | ||
511 | */ | 641 | */ |
512 | remove_all_active_ranges(); | 642 | remove_all_active_ranges(); |
513 | #ifdef CONFIG_ACPI_NUMA | ||
514 | acpi_numa = -1; | ||
515 | #endif | ||
516 | for_each_node_mask(i, node_possible_map) { | 643 | for_each_node_mask(i, node_possible_map) { |
517 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 644 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
518 | nodes[i].end >> PAGE_SHIFT); | 645 | nodes[i].end >> PAGE_SHIFT); |
@@ -524,7 +651,8 @@ out: | |||
524 | } | 651 | } |
525 | #endif /* CONFIG_NUMA_EMU */ | 652 | #endif /* CONFIG_NUMA_EMU */ |
526 | 653 | ||
527 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | 654 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, |
655 | int acpi, int k8) | ||
528 | { | 656 | { |
529 | int i; | 657 | int i; |
530 | 658 | ||
@@ -532,23 +660,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | |||
532 | nodes_clear(node_online_map); | 660 | nodes_clear(node_online_map); |
533 | 661 | ||
534 | #ifdef CONFIG_NUMA_EMU | 662 | #ifdef CONFIG_NUMA_EMU |
535 | if (cmdline && !numa_emulation(start_pfn, last_pfn)) | 663 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) |
536 | return; | 664 | return; |
537 | nodes_clear(node_possible_map); | 665 | nodes_clear(node_possible_map); |
538 | nodes_clear(node_online_map); | 666 | nodes_clear(node_online_map); |
539 | #endif | 667 | #endif |
540 | 668 | ||
541 | #ifdef CONFIG_ACPI_NUMA | 669 | #ifdef CONFIG_ACPI_NUMA |
542 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 670 | if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
543 | last_pfn << PAGE_SHIFT)) | 671 | last_pfn << PAGE_SHIFT)) |
544 | return; | 672 | return; |
545 | nodes_clear(node_possible_map); | 673 | nodes_clear(node_possible_map); |
546 | nodes_clear(node_online_map); | 674 | nodes_clear(node_online_map); |
547 | #endif | 675 | #endif |
548 | 676 | ||
549 | #ifdef CONFIG_K8_NUMA | 677 | #ifdef CONFIG_K8_NUMA |
550 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, | 678 | if (!numa_off && k8 && !k8_scan_nodes()) |
551 | last_pfn<<PAGE_SHIFT)) | ||
552 | return; | 679 | return; |
553 | nodes_clear(node_possible_map); | 680 | nodes_clear(node_possible_map); |
554 | nodes_clear(node_online_map); | 681 | nodes_clear(node_online_map); |
@@ -579,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void) | |||
579 | for_each_online_node(i) | 706 | for_each_online_node(i) |
580 | pages += free_all_bootmem_node(NODE_DATA(i)); | 707 | pages += free_all_bootmem_node(NODE_DATA(i)); |
581 | 708 | ||
709 | #ifdef CONFIG_NO_BOOTMEM | ||
710 | pages += free_all_memory_core_early(MAX_NUMNODES); | ||
711 | #endif | ||
712 | |||
582 | return pages; | 713 | return pages; |
583 | } | 714 | } |
584 | 715 | ||
@@ -601,6 +732,25 @@ static __init int numa_setup(char *opt) | |||
601 | early_param("numa", numa_setup); | 732 | early_param("numa", numa_setup); |
602 | 733 | ||
603 | #ifdef CONFIG_NUMA | 734 | #ifdef CONFIG_NUMA |
735 | |||
736 | static __init int find_near_online_node(int node) | ||
737 | { | ||
738 | int n, val; | ||
739 | int min_val = INT_MAX; | ||
740 | int best_node = -1; | ||
741 | |||
742 | for_each_online_node(n) { | ||
743 | val = node_distance(node, n); | ||
744 | |||
745 | if (val < min_val) { | ||
746 | min_val = val; | ||
747 | best_node = n; | ||
748 | } | ||
749 | } | ||
750 | |||
751 | return best_node; | ||
752 | } | ||
753 | |||
604 | /* | 754 | /* |
605 | * Setup early cpu_to_node. | 755 | * Setup early cpu_to_node. |
606 | * | 756 | * |
@@ -632,7 +782,7 @@ void __init init_cpu_to_node(void) | |||
632 | if (node == NUMA_NO_NODE) | 782 | if (node == NUMA_NO_NODE) |
633 | continue; | 783 | continue; |
634 | if (!node_online(node)) | 784 | if (!node_online(node)) |
635 | continue; | 785 | node = find_near_online_node(node); |
636 | numa_set_node(cpu, node); | 786 | numa_set_node(cpu, node); |
637 | } | 787 | } |
638 | } | 788 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dd38bfbefd1f..28195c350b97 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -6,13 +6,13 @@ | |||
6 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/slab.h> | ||
10 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
11 | #include <linux/interrupt.h> | 10 | #include <linux/interrupt.h> |
12 | #include <linux/seq_file.h> | 11 | #include <linux/seq_file.h> |
13 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
14 | #include <linux/pfn.h> | 13 | #include <linux/pfn.h> |
15 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <linux/gfp.h> | ||
16 | 16 | ||
17 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
@@ -279,6 +279,43 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
280 | pgprot_val(forbidden) |= _PAGE_RW; | 280 | pgprot_val(forbidden) |= _PAGE_RW; |
281 | 281 | ||
282 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
283 | /* | ||
284 | * Once the kernel maps the text as RO (kernel_set_to_readonly is set), | ||
285 | * kernel text mappings for the large page aligned text, rodata sections | ||
286 | * will be always read-only. For the kernel identity mappings covering | ||
287 | * the holes caused by this alignment can be anything that user asks. | ||
288 | * | ||
289 | * This will preserve the large page mappings for kernel text/data | ||
290 | * at no extra cost. | ||
291 | */ | ||
292 | if (kernel_set_to_readonly && | ||
293 | within(address, (unsigned long)_text, | ||
294 | (unsigned long)__end_rodata_hpage_align)) { | ||
295 | unsigned int level; | ||
296 | |||
297 | /* | ||
298 | * Don't enforce the !RW mapping for the kernel text mapping, | ||
299 | * if the current mapping is already using small page mapping. | ||
300 | * No need to work hard to preserve large page mappings in this | ||
301 | * case. | ||
302 | * | ||
303 | * This also fixes the Linux Xen paravirt guest boot failure | ||
304 | * (because of unexpected read-only mappings for kernel identity | ||
305 | * mappings). In this paravirt guest case, the kernel text | ||
306 | * mapping and the kernel identity mapping share the same | ||
307 | * page-table pages. Thus we can't really use different | ||
308 | * protections for the kernel text and identity mappings. Also, | ||
309 | * these shared mappings are made of small page mappings. | ||
310 | * Thus this don't enforce !RW mapping for small page kernel | ||
311 | * text mapping logic will help Linux Xen parvirt guest boot | ||
312 | * aswell. | ||
313 | */ | ||
314 | if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) | ||
315 | pgprot_val(forbidden) |= _PAGE_RW; | ||
316 | } | ||
317 | #endif | ||
318 | |||
282 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 319 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
283 | 320 | ||
284 | return prot; | 321 | return prot; |
@@ -1069,12 +1106,18 @@ EXPORT_SYMBOL(set_memory_array_wb); | |||
1069 | 1106 | ||
1070 | int set_memory_x(unsigned long addr, int numpages) | 1107 | int set_memory_x(unsigned long addr, int numpages) |
1071 | { | 1108 | { |
1109 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1110 | return 0; | ||
1111 | |||
1072 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1112 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1073 | } | 1113 | } |
1074 | EXPORT_SYMBOL(set_memory_x); | 1114 | EXPORT_SYMBOL(set_memory_x); |
1075 | 1115 | ||
1076 | int set_memory_nx(unsigned long addr, int numpages) | 1116 | int set_memory_nx(unsigned long addr, int numpages) |
1077 | { | 1117 | { |
1118 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1119 | return 0; | ||
1120 | |||
1078 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1121 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1079 | } | 1122 | } |
1080 | EXPORT_SYMBOL(set_memory_nx); | 1123 | EXPORT_SYMBOL(set_memory_nx); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e78cd0ec2bcf..edc8b95afc1a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/rbtree.h> | 18 | #include <linux/rbtree.h> |
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
21 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
22 | #include <asm/tlbflush.h> | 22 | #include <asm/tlbflush.h> |
23 | #include <asm/x86_init.h> | ||
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
24 | #include <asm/fcntl.h> | 25 | #include <asm/fcntl.h> |
25 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
@@ -355,9 +356,6 @@ static int free_ram_pages_type(u64 start, u64 end) | |||
355 | * - _PAGE_CACHE_UC_MINUS | 356 | * - _PAGE_CACHE_UC_MINUS |
356 | * - _PAGE_CACHE_UC | 357 | * - _PAGE_CACHE_UC |
357 | * | 358 | * |
358 | * req_type will have a special case value '-1', when requester want to inherit | ||
359 | * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. | ||
360 | * | ||
361 | * If new_type is NULL, function will return an error if it cannot reserve the | 359 | * If new_type is NULL, function will return an error if it cannot reserve the |
362 | * region with req_type. If new_type is non-NULL, function will return | 360 | * region with req_type. If new_type is non-NULL, function will return |
363 | * available type in new_type in case of no error. In case of any error | 361 | * available type in new_type in case of no error. In case of any error |
@@ -377,9 +375,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
377 | if (!pat_enabled) { | 375 | if (!pat_enabled) { |
378 | /* This is identical to page table setting without PAT */ | 376 | /* This is identical to page table setting without PAT */ |
379 | if (new_type) { | 377 | if (new_type) { |
380 | if (req_type == -1) | 378 | if (req_type == _PAGE_CACHE_WC) |
381 | *new_type = _PAGE_CACHE_WB; | ||
382 | else if (req_type == _PAGE_CACHE_WC) | ||
383 | *new_type = _PAGE_CACHE_UC_MINUS; | 379 | *new_type = _PAGE_CACHE_UC_MINUS; |
384 | else | 380 | else |
385 | *new_type = req_type & _PAGE_CACHE_MASK; | 381 | *new_type = req_type & _PAGE_CACHE_MASK; |
@@ -388,7 +384,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
388 | } | 384 | } |
389 | 385 | ||
390 | /* Low ISA region is always mapped WB in page table. No need to track */ | 386 | /* Low ISA region is always mapped WB in page table. No need to track */ |
391 | if (is_ISA_range(start, end - 1)) { | 387 | if (x86_platform.is_untracked_pat_range(start, end)) { |
392 | if (new_type) | 388 | if (new_type) |
393 | *new_type = _PAGE_CACHE_WB; | 389 | *new_type = _PAGE_CACHE_WB; |
394 | return 0; | 390 | return 0; |
@@ -499,7 +495,7 @@ int free_memtype(u64 start, u64 end) | |||
499 | return 0; | 495 | return 0; |
500 | 496 | ||
501 | /* Low ISA region is always mapped WB. No need to track */ | 497 | /* Low ISA region is always mapped WB. No need to track */ |
502 | if (is_ISA_range(start, end - 1)) | 498 | if (x86_platform.is_untracked_pat_range(start, end)) |
503 | return 0; | 499 | return 0; |
504 | 500 | ||
505 | is_range_ram = pat_pagerange_is_ram(start, end); | 501 | is_range_ram = pat_pagerange_is_ram(start, end); |
@@ -582,7 +578,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
582 | int rettype = _PAGE_CACHE_WB; | 578 | int rettype = _PAGE_CACHE_WB; |
583 | struct memtype *entry; | 579 | struct memtype *entry; |
584 | 580 | ||
585 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | 581 | if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) |
586 | return rettype; | 582 | return rettype; |
587 | 583 | ||
588 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 584 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
@@ -708,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
708 | if (!range_is_allowed(pfn, size)) | 704 | if (!range_is_allowed(pfn, size)) |
709 | return 0; | 705 | return 0; |
710 | 706 | ||
711 | if (file->f_flags & O_SYNC) { | 707 | if (file->f_flags & O_DSYNC) |
712 | flags = _PAGE_CACHE_UC_MINUS; | 708 | flags = _PAGE_CACHE_UC_MINUS; |
713 | } | ||
714 | 709 | ||
715 | #ifdef CONFIG_X86_32 | 710 | #ifdef CONFIG_X86_32 |
716 | /* | 711 | /* |
@@ -1018,8 +1013,10 @@ static const struct file_operations memtype_fops = { | |||
1018 | 1013 | ||
1019 | static int __init pat_memtype_list_init(void) | 1014 | static int __init pat_memtype_list_init(void) |
1020 | { | 1015 | { |
1021 | debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, | 1016 | if (pat_enabled) { |
1022 | NULL, &memtype_fops); | 1017 | debugfs_create_file("pat_memtype_list", S_IRUSR, |
1018 | arch_debugfs_dir, NULL, &memtype_fops); | ||
1019 | } | ||
1023 | return 0; | 1020 | return 0; |
1024 | } | 1021 | } |
1025 | 1022 | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ed34f5e35999..5c4ee422590e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/mm.h> | 1 | #include <linux/mm.h> |
2 | #include <linux/gfp.h> | ||
2 | #include <asm/pgalloc.h> | 3 | #include <asm/pgalloc.h> |
3 | #include <asm/pgtable.h> | 4 | #include <asm/pgtable.h> |
4 | #include <asm/tlb.h> | 5 | #include <asm/tlb.h> |
@@ -6,6 +7,14 @@ | |||
6 | 7 | ||
7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | 8 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO |
8 | 9 | ||
10 | #ifdef CONFIG_HIGHPTE | ||
11 | #define PGALLOC_USER_GFP __GFP_HIGHMEM | ||
12 | #else | ||
13 | #define PGALLOC_USER_GFP 0 | ||
14 | #endif | ||
15 | |||
16 | gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; | ||
17 | |||
9 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 18 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
10 | { | 19 | { |
11 | return (pte_t *)__get_free_page(PGALLOC_GFP); | 20 | return (pte_t *)__get_free_page(PGALLOC_GFP); |
@@ -15,16 +24,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
15 | { | 24 | { |
16 | struct page *pte; | 25 | struct page *pte; |
17 | 26 | ||
18 | #ifdef CONFIG_HIGHPTE | 27 | pte = alloc_pages(__userpte_alloc_gfp, 0); |
19 | pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); | ||
20 | #else | ||
21 | pte = alloc_pages(PGALLOC_GFP, 0); | ||
22 | #endif | ||
23 | if (pte) | 28 | if (pte) |
24 | pgtable_page_ctor(pte); | 29 | pgtable_page_ctor(pte); |
25 | return pte; | 30 | return pte; |
26 | } | 31 | } |
27 | 32 | ||
33 | static int __init setup_userpte(char *arg) | ||
34 | { | ||
35 | if (!arg) | ||
36 | return -EINVAL; | ||
37 | |||
38 | /* | ||
39 | * "userpte=nohigh" disables allocation of user pagetables in | ||
40 | * high memory. | ||
41 | */ | ||
42 | if (strcmp(arg, "nohigh") == 0) | ||
43 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
44 | else | ||
45 | return -EINVAL; | ||
46 | return 0; | ||
47 | } | ||
48 | early_param("userpte", setup_userpte); | ||
49 | |||
28 | void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | 50 | void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) |
29 | { | 51 | { |
30 | pgtable_page_dtor(pte); | 52 | pgtable_page_dtor(pte); |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 46c8834aedc0..792854003ed3 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/swap.h> | 6 | #include <linux/swap.h> |
7 | #include <linux/smp.h> | 7 | #include <linux/smp.h> |
8 | #include <linux/highmem.h> | 8 | #include <linux/highmem.h> |
9 | #include <linux/slab.h> | ||
10 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
11 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
12 | #include <linux/module.h> | 11 | #include <linux/module.h> |
@@ -19,6 +18,7 @@ | |||
19 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
20 | #include <asm/tlb.h> | 19 | #include <asm/tlb.h> |
21 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
21 | #include <asm/io.h> | ||
22 | 22 | ||
23 | unsigned int __VMALLOC_RESERVE = 128 << 20; | 23 | unsigned int __VMALLOC_RESERVE = 128 << 20; |
24 | 24 | ||
@@ -129,6 +129,7 @@ static int __init parse_reservetop(char *arg) | |||
129 | 129 | ||
130 | address = memparse(arg, &arg); | 130 | address = memparse(arg, &arg); |
131 | reserve_top_address(address); | 131 | reserve_top_address(address); |
132 | fixup_early_ioremap(); | ||
132 | return 0; | 133 | return 0; |
133 | } | 134 | } |
134 | early_param("reservetop", parse_reservetop); | 135 | early_param("reservetop", parse_reservetop); |
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 513d8ed5d2ec..a3250aa34086 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -3,10 +3,8 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | 4 | ||
5 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
6 | #include <asm/proto.h> | ||
6 | 7 | ||
7 | int nx_enabled; | ||
8 | |||
9 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
10 | static int disable_nx __cpuinitdata; | 8 | static int disable_nx __cpuinitdata; |
11 | 9 | ||
12 | /* | 10 | /* |
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str) | |||
22 | if (!str) | 20 | if (!str) |
23 | return -EINVAL; | 21 | return -EINVAL; |
24 | if (!strncmp(str, "on", 2)) { | 22 | if (!strncmp(str, "on", 2)) { |
25 | __supported_pte_mask |= _PAGE_NX; | ||
26 | disable_nx = 0; | 23 | disable_nx = 0; |
27 | } else if (!strncmp(str, "off", 3)) { | 24 | } else if (!strncmp(str, "off", 3)) { |
28 | disable_nx = 1; | 25 | disable_nx = 1; |
29 | __supported_pte_mask &= ~_PAGE_NX; | ||
30 | } | 26 | } |
27 | x86_configure_nx(); | ||
31 | return 0; | 28 | return 0; |
32 | } | 29 | } |
33 | early_param("noexec", noexec_setup); | 30 | early_param("noexec", noexec_setup); |
34 | #endif | ||
35 | 31 | ||
36 | #ifdef CONFIG_X86_PAE | 32 | void __cpuinit x86_configure_nx(void) |
37 | void __init set_nx(void) | ||
38 | { | 33 | { |
39 | unsigned int v[4], l, h; | 34 | if (cpu_has_nx && !disable_nx) |
40 | 35 | __supported_pte_mask |= _PAGE_NX; | |
41 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | 36 | else |
42 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | 37 | __supported_pte_mask &= ~_PAGE_NX; |
38 | } | ||
43 | 39 | ||
44 | if ((v[3] & (1 << 20)) && !disable_nx) { | 40 | void __init x86_report_nx(void) |
45 | rdmsr(MSR_EFER, l, h); | 41 | { |
46 | l |= EFER_NX; | 42 | if (!cpu_has_nx) { |
47 | wrmsr(MSR_EFER, l, h); | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
48 | nx_enabled = 1; | 44 | "missing in CPU or disabled in BIOS!\n"); |
49 | __supported_pte_mask |= _PAGE_NX; | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
47 | if (disable_nx) { | ||
48 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
49 | "disabled by kernel command line option\n"); | ||
50 | } else { | ||
51 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
52 | "active\n"); | ||
50 | } | 53 | } |
51 | } | ||
52 | } | ||
53 | #else | 54 | #else |
54 | void set_nx(void) | 55 | /* 32bit non-PAE kernel, NX cannot be used */ |
55 | { | 56 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
56 | } | 57 | "cannot be enabled: non-PAE kernel!\n"); |
57 | #endif | 58 | #endif |
58 | 59 | } | |
59 | #ifdef CONFIG_X86_64 | ||
60 | void __cpuinit check_efer(void) | ||
61 | { | ||
62 | unsigned long efer; | ||
63 | |||
64 | rdmsrl(MSR_EFER, efer); | ||
65 | if (!(efer & EFER_NX) || disable_nx) | ||
66 | __supported_pte_mask &= ~_PAGE_NX; | ||
67 | } | 60 | } |
68 | #endif | ||
69 | |||
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 6f8aa33031c7..9324f13492d5 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void) | |||
267 | e820_register_active_regions(chunk->nid, chunk->start_pfn, | 267 | e820_register_active_regions(chunk->nid, chunk->start_pfn, |
268 | min(chunk->end_pfn, max_pfn)); | 268 | min(chunk->end_pfn, max_pfn)); |
269 | } | 269 | } |
270 | /* for out of order entries in SRAT */ | ||
271 | sort_node_map(); | ||
270 | 272 | ||
271 | for_each_online_node(nid) { | 273 | for_each_online_node(nid) { |
272 | unsigned long start = node_start_pfn[nid]; | 274 | unsigned long start = node_start_pfn[nid]; |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381f7b3b..38512d0c4742 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
136 | apicid_to_node[apic_id] = node; | 136 | apicid_to_node[apic_id] = node; |
137 | node_set(node, cpu_nodes_parsed); | 137 | node_set(node, cpu_nodes_parsed); |
138 | acpi_numa = 1; | 138 | acpi_numa = 1; |
139 | printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", | 139 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", |
140 | pxm, apic_id, node); | 140 | pxm, apic_id, node); |
141 | } | 141 | } |
142 | 142 | ||
@@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
170 | apicid_to_node[apic_id] = node; | 170 | apicid_to_node[apic_id] = node; |
171 | node_set(node, cpu_nodes_parsed); | 171 | node_set(node, cpu_nodes_parsed); |
172 | acpi_numa = 1; | 172 | acpi_numa = 1; |
173 | printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", | 173 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", |
174 | pxm, apic_id, node); | 174 | pxm, apic_id, node); |
175 | } | 175 | } |
176 | 176 | ||
@@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end) | |||
229 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | 229 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); |
230 | } | 230 | } |
231 | 231 | ||
232 | if (changed) | 232 | if (changed) { |
233 | node_set(node, cpu_nodes_parsed); | ||
233 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", | 234 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
234 | nd->start, nd->end); | 235 | nd->start, nd->end); |
236 | } | ||
235 | } | 237 | } |
236 | 238 | ||
237 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 239 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
@@ -290,8 +292,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
290 | 292 | ||
291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 293 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
292 | start, end); | 294 | start, end); |
293 | e820_register_active_regions(node, start >> PAGE_SHIFT, | ||
294 | end >> PAGE_SHIFT); | ||
295 | 295 | ||
296 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { | 296 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
297 | update_nodes_add(node, start, end); | 297 | update_nodes_add(node, start, end); |
@@ -319,7 +319,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
319 | unsigned long s = nodes[i].start >> PAGE_SHIFT; | 319 | unsigned long s = nodes[i].start >> PAGE_SHIFT; |
320 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | 320 | unsigned long e = nodes[i].end >> PAGE_SHIFT; |
321 | pxmram += e - s; | 321 | pxmram += e - s; |
322 | pxmram -= absent_pages_in_range(s, e); | 322 | pxmram -= __absent_pages_in_range(i, s, e); |
323 | if ((long)pxmram < 0) | 323 | if ((long)pxmram < 0) |
324 | pxmram = 0; | 324 | pxmram = 0; |
325 | } | 325 | } |
@@ -338,6 +338,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
338 | 338 | ||
339 | void __init acpi_numa_arch_fixup(void) {} | 339 | void __init acpi_numa_arch_fixup(void) {} |
340 | 340 | ||
341 | int __init acpi_get_nodes(struct bootnode *physnodes) | ||
342 | { | ||
343 | int i; | ||
344 | int ret = 0; | ||
345 | |||
346 | for_each_node_mask(i, nodes_parsed) { | ||
347 | physnodes[ret].start = nodes[i].start; | ||
348 | physnodes[ret].end = nodes[i].end; | ||
349 | ret++; | ||
350 | } | ||
351 | return ret; | ||
352 | } | ||
353 | |||
341 | /* Use the information discovered above to actually set up the nodes. */ | 354 | /* Use the information discovered above to actually set up the nodes. */ |
342 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | 355 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) |
343 | { | 356 | { |
@@ -350,11 +363,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
350 | for (i = 0; i < MAX_NUMNODES; i++) | 363 | for (i = 0; i < MAX_NUMNODES; i++) |
351 | cutoff_node(i, start, end); | 364 | cutoff_node(i, start, end); |
352 | 365 | ||
353 | if (!nodes_cover_memory(nodes)) { | ||
354 | bad_srat(); | ||
355 | return -1; | ||
356 | } | ||
357 | |||
358 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 366 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
359 | memblk_nodeid); | 367 | memblk_nodeid); |
360 | if (memnode_shift < 0) { | 368 | if (memnode_shift < 0) { |
@@ -364,6 +372,16 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
364 | return -1; | 372 | return -1; |
365 | } | 373 | } |
366 | 374 | ||
375 | for_each_node_mask(i, nodes_parsed) | ||
376 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | ||
377 | nodes[i].end >> PAGE_SHIFT); | ||
378 | /* for out of order entries in SRAT */ | ||
379 | sort_node_map(); | ||
380 | if (!nodes_cover_memory(nodes)) { | ||
381 | bad_srat(); | ||
382 | return -1; | ||
383 | } | ||
384 | |||
367 | /* Account for nodes with cpus and no memory */ | 385 | /* Account for nodes with cpus and no memory */ |
368 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); | 386 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); |
369 | 387 | ||
@@ -443,7 +461,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
443 | * node, it must now point to the fake node ID. | 461 | * node, it must now point to the fake node ID. |
444 | */ | 462 | */ |
445 | for (j = 0; j < MAX_LOCAL_APIC; j++) | 463 | for (j = 0; j < MAX_LOCAL_APIC; j++) |
446 | if (apicid_to_node[j] == nid) | 464 | if (apicid_to_node[j] == nid && |
465 | fake_apicid_to_node[j] == NUMA_NO_NODE) | ||
447 | fake_apicid_to_node[j] = i; | 466 | fake_apicid_to_node[j] = i; |
448 | } | 467 | } |
449 | for (i = 0; i < num_nodes; i++) | 468 | for (i = 0; i < num_nodes; i++) |
@@ -454,7 +473,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
454 | for (i = 0; i < num_nodes; i++) | 473 | for (i = 0; i < num_nodes; i++) |
455 | if (fake_nodes[i].start != fake_nodes[i].end) | 474 | if (fake_nodes[i].start != fake_nodes[i].end) |
456 | node_set(i, nodes_parsed); | 475 | node_set(i, nodes_parsed); |
457 | WARN_ON(!nodes_cover_memory(fake_nodes)); | ||
458 | } | 476 | } |
459 | 477 | ||
460 | static int null_slit_node_compare(int a, int b) | 478 | static int null_slit_node_compare(int a, int b) |
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b56df5..8565d944f7cf 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -1,12 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> | 2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> |
3 | */ | 3 | */ |
4 | |||
5 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
6 | |||
4 | #include <linux/module.h> | 7 | #include <linux/module.h> |
5 | #include <linux/io.h> | 8 | #include <linux/io.h> |
6 | #include <linux/mmiotrace.h> | 9 | #include <linux/mmiotrace.h> |
7 | 10 | ||
8 | #define MODULE_NAME "testmmiotrace" | ||
9 | |||
10 | static unsigned long mmio_address; | 11 | static unsigned long mmio_address; |
11 | module_param(mmio_address, ulong, 0); | 12 | module_param(mmio_address, ulong, 0); |
12 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " | 13 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " |
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i) | |||
30 | static void do_write_test(void __iomem *p) | 31 | static void do_write_test(void __iomem *p) |
31 | { | 32 | { |
32 | unsigned int i; | 33 | unsigned int i; |
33 | pr_info(MODULE_NAME ": write test.\n"); | 34 | pr_info("write test.\n"); |
34 | mmiotrace_printk("Write test.\n"); | 35 | mmiotrace_printk("Write test.\n"); |
35 | 36 | ||
36 | for (i = 0; i < 256; i++) | 37 | for (i = 0; i < 256; i++) |
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) | |||
47 | { | 48 | { |
48 | unsigned int i; | 49 | unsigned int i; |
49 | unsigned errs[3] = { 0 }; | 50 | unsigned errs[3] = { 0 }; |
50 | pr_info(MODULE_NAME ": read test.\n"); | 51 | pr_info("read test.\n"); |
51 | mmiotrace_printk("Read test.\n"); | 52 | mmiotrace_printk("Read test.\n"); |
52 | 53 | ||
53 | for (i = 0; i < 256; i++) | 54 | for (i = 0; i < 256; i++) |
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) | |||
68 | 69 | ||
69 | static void do_read_far_test(void __iomem *p) | 70 | static void do_read_far_test(void __iomem *p) |
70 | { | 71 | { |
71 | pr_info(MODULE_NAME ": read far test.\n"); | 72 | pr_info("read far test.\n"); |
72 | mmiotrace_printk("Read far test.\n"); | 73 | mmiotrace_printk("Read far test.\n"); |
73 | 74 | ||
74 | ioread32(p + read_far); | 75 | ioread32(p + read_far); |
@@ -78,7 +79,7 @@ static void do_test(unsigned long size) | |||
78 | { | 79 | { |
79 | void __iomem *p = ioremap_nocache(mmio_address, size); | 80 | void __iomem *p = ioremap_nocache(mmio_address, size); |
80 | if (!p) { | 81 | if (!p) { |
81 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | 82 | pr_err("could not ioremap, aborting.\n"); |
82 | return; | 83 | return; |
83 | } | 84 | } |
84 | mmiotrace_printk("ioremap returned %p.\n", p); | 85 | mmiotrace_printk("ioremap returned %p.\n", p); |
@@ -94,24 +95,22 @@ static int __init init(void) | |||
94 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | 95 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); |
95 | 96 | ||
96 | if (mmio_address == 0) { | 97 | if (mmio_address == 0) { |
97 | pr_err(MODULE_NAME ": you have to use the module argument " | 98 | pr_err("you have to use the module argument mmio_address.\n"); |
98 | "mmio_address.\n"); | 99 | pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); |
99 | pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" | ||
100 | " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); | ||
101 | return -ENXIO; | 100 | return -ENXIO; |
102 | } | 101 | } |
103 | 102 | ||
104 | pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " | 103 | pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " |
105 | "address space, and writing 16 kB of rubbish in there.\n", | 104 | "and writing 16 kB of rubbish in there.\n", |
106 | size >> 10, mmio_address); | 105 | size >> 10, mmio_address); |
107 | do_test(size); | 106 | do_test(size); |
108 | pr_info(MODULE_NAME ": All done.\n"); | 107 | pr_info("All done.\n"); |
109 | return 0; | 108 | return 0; |
110 | } | 109 | } |
111 | 110 | ||
112 | static void __exit cleanup(void) | 111 | static void __exit cleanup(void) |
113 | { | 112 | { |
114 | pr_debug(MODULE_NAME ": unloaded.\n"); | 113 | pr_debug("unloaded.\n"); |
115 | } | 114 | } |
116 | 115 | ||
117 | module_init(init); | 116 | module_init(init); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 36fe08eeb5c3..426f3a1a64d3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <asm/tlbflush.h> | 9 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 10 | #include <asm/mmu_context.h> |
11 | #include <asm/cache.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/uv/uv.h> | 13 | #include <asm/uv/uv.h> |
13 | 14 | ||
@@ -40,10 +41,10 @@ union smp_flush_state { | |||
40 | struct { | 41 | struct { |
41 | struct mm_struct *flush_mm; | 42 | struct mm_struct *flush_mm; |
42 | unsigned long flush_va; | 43 | unsigned long flush_va; |
43 | spinlock_t tlbstate_lock; | 44 | raw_spinlock_t tlbstate_lock; |
44 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
45 | }; | 46 | }; |
46 | char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; | 47 | char pad[INTERNODE_CACHE_BYTES]; |
47 | } ____cacheline_internodealigned_in_smp; | 48 | } ____cacheline_internodealigned_in_smp; |
48 | 49 | ||
49 | /* State is put into the per CPU data section, but padded | 50 | /* State is put into the per CPU data section, but padded |
@@ -180,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
180 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | 181 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is |
181 | * probably not worth checking this for a cache-hot lock. | 182 | * probably not worth checking this for a cache-hot lock. |
182 | */ | 183 | */ |
183 | spin_lock(&f->tlbstate_lock); | 184 | raw_spin_lock(&f->tlbstate_lock); |
184 | 185 | ||
185 | f->flush_mm = mm; | 186 | f->flush_mm = mm; |
186 | f->flush_va = va; | 187 | f->flush_va = va; |
@@ -198,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
198 | 199 | ||
199 | f->flush_mm = NULL; | 200 | f->flush_mm = NULL; |
200 | f->flush_va = 0; | 201 | f->flush_va = 0; |
201 | spin_unlock(&f->tlbstate_lock); | 202 | raw_spin_unlock(&f->tlbstate_lock); |
202 | } | 203 | } |
203 | 204 | ||
204 | void native_flush_tlb_others(const struct cpumask *cpumask, | 205 | void native_flush_tlb_others(const struct cpumask *cpumask, |
@@ -222,7 +223,7 @@ static int __cpuinit init_smp_flush(void) | |||
222 | int i; | 223 | int i; |
223 | 224 | ||
224 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | 225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
225 | spin_lock_init(&flush_state[i].tlbstate_lock); | 226 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); |
226 | 227 | ||
227 | return 0; | 228 | return 0; |
228 | } | 229 | } |