diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/numa_64.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/page_32_types.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/page_64_types.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/page_types.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64_types.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/sparsemem.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/traps.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 46 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 51 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 77 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 61 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 98 |
21 files changed, 208 insertions, 316 deletions
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 064ed6df4cbe..c4ae822e415f 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, | |||
17 | extern void numa_init_array(void); | 17 | extern void numa_init_array(void); |
18 | extern int numa_off; | 18 | extern int numa_off; |
19 | 19 | ||
20 | extern void srat_reserve_add_area(int nodeid); | ||
21 | extern int hotadd_percent; | ||
22 | |||
23 | extern s16 apicid_to_node[MAX_LOCAL_APIC]; | 20 | extern s16 apicid_to_node[MAX_LOCAL_APIC]; |
24 | 21 | ||
25 | extern unsigned long numa_free_all_bootmem(void); | 22 | extern unsigned long numa_free_all_bootmem(void); |
@@ -27,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, | |||
27 | unsigned long end); | 24 | unsigned long end); |
28 | 25 | ||
29 | #ifdef CONFIG_NUMA | 26 | #ifdef CONFIG_NUMA |
27 | /* | ||
28 | * Too small node sizes may confuse the VM badly. Usually they | ||
29 | * result from BIOS bugs. So dont recognize nodes as standalone | ||
30 | * NUMA entities that have less than this amount of RAM listed: | ||
31 | */ | ||
32 | #define NODE_MIN_SIZE (4*1024*1024) | ||
33 | |||
30 | extern void __init init_cpu_to_node(void); | 34 | extern void __init init_cpu_to_node(void); |
31 | extern void __cpuinit numa_set_node(int cpu, int node); | 35 | extern void __cpuinit numa_set_node(int cpu, int node); |
32 | extern void __cpuinit numa_clear_node(int cpu); | 36 | extern void __cpuinit numa_clear_node(int cpu); |
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0f915ae649a7..6f1b7331313f 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h | |||
@@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE; | |||
54 | extern int sysctl_legacy_va_layout; | 54 | extern int sysctl_legacy_va_layout; |
55 | 55 | ||
56 | extern void find_low_pfn_range(void); | 56 | extern void find_low_pfn_range(void); |
57 | extern unsigned long init_memory_mapping(unsigned long start, | ||
58 | unsigned long end); | ||
59 | extern void initmem_init(unsigned long, unsigned long); | ||
60 | extern void free_initmem(void); | ||
61 | extern void setup_bootmem_allocator(void); | 57 | extern void setup_bootmem_allocator(void); |
62 | 58 | ||
63 | #endif /* !__ASSEMBLY__ */ | 59 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e11900f2500e..8d382d3abf38 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -39,7 +39,7 @@ | |||
39 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) | 39 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) |
40 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) | 40 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) |
41 | 41 | ||
42 | /* See Documentation/x86_64/mm.txt for a description of the memory map. */ | 42 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ |
43 | #define __PHYSICAL_MASK_SHIFT 46 | 43 | #define __PHYSICAL_MASK_SHIFT 46 |
44 | #define __VIRTUAL_MASK_SHIFT 48 | 44 | #define __VIRTUAL_MASK_SHIFT 48 |
45 | 45 | ||
@@ -63,12 +63,6 @@ extern unsigned long __phys_addr(unsigned long); | |||
63 | 63 | ||
64 | #define vmemmap ((struct page *)VMEMMAP_START) | 64 | #define vmemmap ((struct page *)VMEMMAP_START) |
65 | 65 | ||
66 | extern unsigned long init_memory_mapping(unsigned long start, | ||
67 | unsigned long end); | ||
68 | |||
69 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | ||
70 | extern void free_initmem(void); | ||
71 | |||
72 | extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); | 66 | extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); |
73 | extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); | 67 | extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); |
74 | 68 | ||
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 826ad37006ab..6473f5ccff85 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr); | |||
46 | extern unsigned long max_low_pfn_mapped; | 46 | extern unsigned long max_low_pfn_mapped; |
47 | extern unsigned long max_pfn_mapped; | 47 | extern unsigned long max_pfn_mapped; |
48 | 48 | ||
49 | extern unsigned long init_memory_mapping(unsigned long start, | ||
50 | unsigned long end); | ||
51 | |||
52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | ||
53 | extern void free_initmem(void); | ||
54 | |||
49 | #endif /* !__ASSEMBLY__ */ | 55 | #endif /* !__ASSEMBLY__ */ |
50 | 56 | ||
51 | #endif /* _ASM_X86_PAGE_DEFS_H */ | 57 | #endif /* _ASM_X86_PAGE_DEFS_H */ |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index fbf42b8e0383..766ea16fbbbd 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t; | |||
51 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) | 51 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) |
52 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | 52 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) |
53 | 53 | ||
54 | 54 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ | |
55 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) | 55 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) |
56 | #define VMALLOC_START _AC(0xffffc20000000000, UL) | 56 | #define VMALLOC_START _AC(0xffffc90000000000, UL) |
57 | #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) | 57 | #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) |
58 | #define VMEMMAP_START _AC(0xffffe20000000000, UL) | 58 | #define VMEMMAP_START _AC(0xffffea0000000000, UL) |
59 | #define MODULES_VADDR _AC(0xffffffffa0000000, UL) | 59 | #define MODULES_VADDR _AC(0xffffffffa0000000, UL) |
60 | #define MODULES_END _AC(0xffffffffff000000, UL) | 60 | #define MODULES_END _AC(0xffffffffff000000, UL) |
61 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | 61 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b8238dc8786d..4d258ad76a0f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -273,7 +273,6 @@ typedef struct page *pgtable_t; | |||
273 | 273 | ||
274 | extern pteval_t __supported_pte_mask; | 274 | extern pteval_t __supported_pte_mask; |
275 | extern int nx_enabled; | 275 | extern int nx_enabled; |
276 | extern void set_nx(void); | ||
277 | 276 | ||
278 | #define pgprot_writecombine pgprot_writecombine | 277 | #define pgprot_writecombine pgprot_writecombine |
279 | extern pgprot_t pgprot_writecombine(pgprot_t prot); | 278 | extern pgprot_t pgprot_writecombine(pgprot_t prot); |
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index e3cc3c063ec5..4517d6b93188 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h | |||
@@ -27,7 +27,7 @@ | |||
27 | #else /* CONFIG_X86_32 */ | 27 | #else /* CONFIG_X86_32 */ |
28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ | 28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ |
29 | # define MAX_PHYSADDR_BITS 44 | 29 | # define MAX_PHYSADDR_BITS 44 |
30 | # define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ | 30 | # define MAX_PHYSMEM_BITS 46 |
31 | #endif | 31 | #endif |
32 | 32 | ||
33 | #endif /* CONFIG_SPARSEMEM */ | 33 | #endif /* CONFIG_SPARSEMEM */ |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 9aa3ab262055..cbfdc26b1460 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_TRAPS_H | 2 | #define _ASM_X86_TRAPS_H |
3 | 3 | ||
4 | #include <asm/debugreg.h> | 4 | #include <asm/debugreg.h> |
5 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ | ||
5 | 6 | ||
6 | #ifdef CONFIG_X86_32 | 7 | #ifdef CONFIG_X86_32 |
7 | #define dotraplinkage | 8 | #define dotraplinkage |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 006281302925..7271fa33d791 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -617,7 +617,7 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, | |||
617 | */ | 617 | */ |
618 | __init void e820_setup_gap(void) | 618 | __init void e820_setup_gap(void) |
619 | { | 619 | { |
620 | unsigned long gapstart, gapsize, round; | 620 | unsigned long gapstart, gapsize; |
621 | int found; | 621 | int found; |
622 | 622 | ||
623 | gapstart = 0x10000000; | 623 | gapstart = 0x10000000; |
@@ -635,14 +635,9 @@ __init void e820_setup_gap(void) | |||
635 | #endif | 635 | #endif |
636 | 636 | ||
637 | /* | 637 | /* |
638 | * See how much we want to round up: start off with | 638 | * e820_reserve_resources_late protect stolen RAM already |
639 | * rounding to the next 1MB area. | ||
640 | */ | 639 | */ |
641 | round = 0x100000; | 640 | pci_mem_start = gapstart; |
642 | while ((gapsize >> 4) > round) | ||
643 | round += round; | ||
644 | /* Fun with two's complement */ | ||
645 | pci_mem_start = (gapstart + round) & -round; | ||
646 | 641 | ||
647 | printk(KERN_INFO | 642 | printk(KERN_INFO |
648 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | 643 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", |
@@ -1371,6 +1366,23 @@ void __init e820_reserve_resources(void) | |||
1371 | } | 1366 | } |
1372 | } | 1367 | } |
1373 | 1368 | ||
1369 | /* How much should we pad RAM ending depending on where it is? */ | ||
1370 | static unsigned long ram_alignment(resource_size_t pos) | ||
1371 | { | ||
1372 | unsigned long mb = pos >> 20; | ||
1373 | |||
1374 | /* To 64kB in the first megabyte */ | ||
1375 | if (!mb) | ||
1376 | return 64*1024; | ||
1377 | |||
1378 | /* To 1MB in the first 16MB */ | ||
1379 | if (mb < 16) | ||
1380 | return 1024*1024; | ||
1381 | |||
1382 | /* To 32MB for anything above that */ | ||
1383 | return 32*1024*1024; | ||
1384 | } | ||
1385 | |||
1374 | void __init e820_reserve_resources_late(void) | 1386 | void __init e820_reserve_resources_late(void) |
1375 | { | 1387 | { |
1376 | int i; | 1388 | int i; |
@@ -1382,6 +1394,24 @@ void __init e820_reserve_resources_late(void) | |||
1382 | insert_resource_expand_to_fit(&iomem_resource, res); | 1394 | insert_resource_expand_to_fit(&iomem_resource, res); |
1383 | res++; | 1395 | res++; |
1384 | } | 1396 | } |
1397 | |||
1398 | /* | ||
1399 | * Try to bump up RAM regions to reasonable boundaries to | ||
1400 | * avoid stolen RAM: | ||
1401 | */ | ||
1402 | for (i = 0; i < e820.nr_map; i++) { | ||
1403 | struct e820entry *entry = &e820_saved.map[i]; | ||
1404 | resource_size_t start, end; | ||
1405 | |||
1406 | if (entry->type != E820_RAM) | ||
1407 | continue; | ||
1408 | start = entry->addr + entry->size; | ||
1409 | end = round_up(start, ram_alignment(start)); | ||
1410 | if (start == end) | ||
1411 | continue; | ||
1412 | reserve_region_with_split(&iomem_resource, start, | ||
1413 | end - 1, "RAM buffer"); | ||
1414 | } | ||
1385 | } | 1415 | } |
1386 | 1416 | ||
1387 | char *__init default_machine_specific_memory_setup(void) | 1417 | char *__init default_machine_specific_memory_setup(void) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3e21e38d7e37..e22d63bdc8ff 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | ||
11 | #include <trace/power.h> | 12 | #include <trace/power.h> |
12 | #include <asm/system.h> | 13 | #include <asm/system.h> |
13 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
@@ -614,3 +615,16 @@ static int __init idle_setup(char *str) | |||
614 | } | 615 | } |
615 | early_param("idle", idle_setup); | 616 | early_param("idle", idle_setup); |
616 | 617 | ||
618 | unsigned long arch_align_stack(unsigned long sp) | ||
619 | { | ||
620 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | ||
621 | sp -= get_random_int() % 8192; | ||
622 | return sp & ~0xf; | ||
623 | } | ||
624 | |||
625 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
626 | { | ||
627 | unsigned long range_end = mm->brk + 0x02000000; | ||
628 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
629 | } | ||
630 | |||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 76f8f84043a2..56d50b7d71df 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -9,8 +9,6 @@ | |||
9 | * This file handles the architecture-dependent parts of process handling.. | 9 | * This file handles the architecture-dependent parts of process handling.. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <stdarg.h> | ||
13 | |||
14 | #include <linux/stackprotector.h> | 12 | #include <linux/stackprotector.h> |
15 | #include <linux/cpu.h> | 13 | #include <linux/cpu.h> |
16 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
@@ -33,7 +31,6 @@ | |||
33 | #include <linux/module.h> | 31 | #include <linux/module.h> |
34 | #include <linux/kallsyms.h> | 32 | #include <linux/kallsyms.h> |
35 | #include <linux/ptrace.h> | 33 | #include <linux/ptrace.h> |
36 | #include <linux/random.h> | ||
37 | #include <linux/personality.h> | 34 | #include <linux/personality.h> |
38 | #include <linux/tick.h> | 35 | #include <linux/tick.h> |
39 | #include <linux/percpu.h> | 36 | #include <linux/percpu.h> |
@@ -497,15 +494,3 @@ unsigned long get_wchan(struct task_struct *p) | |||
497 | return 0; | 494 | return 0; |
498 | } | 495 | } |
499 | 496 | ||
500 | unsigned long arch_align_stack(unsigned long sp) | ||
501 | { | ||
502 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | ||
503 | sp -= get_random_int() % 8192; | ||
504 | return sp & ~0xf; | ||
505 | } | ||
506 | |||
507 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
508 | { | ||
509 | unsigned long range_end = mm->brk + 0x02000000; | ||
510 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
511 | } | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b751a41392b1..9d6b20e6cd80 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -14,8 +14,6 @@ | |||
14 | * This file handles the architecture-dependent parts of process handling.. | 14 | * This file handles the architecture-dependent parts of process handling.. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <stdarg.h> | ||
18 | |||
19 | #include <linux/stackprotector.h> | 17 | #include <linux/stackprotector.h> |
20 | #include <linux/cpu.h> | 18 | #include <linux/cpu.h> |
21 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
@@ -32,7 +30,6 @@ | |||
32 | #include <linux/delay.h> | 30 | #include <linux/delay.h> |
33 | #include <linux/module.h> | 31 | #include <linux/module.h> |
34 | #include <linux/ptrace.h> | 32 | #include <linux/ptrace.h> |
35 | #include <linux/random.h> | ||
36 | #include <linux/notifier.h> | 33 | #include <linux/notifier.h> |
37 | #include <linux/kprobes.h> | 34 | #include <linux/kprobes.h> |
38 | #include <linux/kdebug.h> | 35 | #include <linux/kdebug.h> |
@@ -660,15 +657,3 @@ long sys_arch_prctl(int code, unsigned long addr) | |||
660 | return do_arch_prctl(current, code, addr); | 657 | return do_arch_prctl(current, code, addr); |
661 | } | 658 | } |
662 | 659 | ||
663 | unsigned long arch_align_stack(unsigned long sp) | ||
664 | { | ||
665 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | ||
666 | sp -= get_random_int() % 8192; | ||
667 | return sp & ~0xf; | ||
668 | } | ||
669 | |||
670 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
671 | { | ||
672 | unsigned long range_end = mm->brk + 0x02000000; | ||
673 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
674 | } | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7791eef95b91..d1c636bf31a7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -112,6 +112,14 @@ | |||
112 | #define ARCH_SETUP | 112 | #define ARCH_SETUP |
113 | #endif | 113 | #endif |
114 | 114 | ||
115 | /* | ||
116 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
117 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
118 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
119 | */ | ||
120 | unsigned long max_low_pfn_mapped; | ||
121 | unsigned long max_pfn_mapped; | ||
122 | |||
115 | RESERVE_BRK(dmi_alloc, 65536); | 123 | RESERVE_BRK(dmi_alloc, 65536); |
116 | 124 | ||
117 | unsigned int boot_cpu_id __read_mostly; | 125 | unsigned int boot_cpu_id __read_mostly; |
@@ -860,12 +868,16 @@ void __init setup_arch(char **cmdline_p) | |||
860 | max_low_pfn = max_pfn; | 868 | max_low_pfn = max_pfn; |
861 | 869 | ||
862 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | 870 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; |
871 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
863 | #endif | 872 | #endif |
864 | 873 | ||
865 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | 874 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION |
866 | setup_bios_corruption_check(); | 875 | setup_bios_corruption_check(); |
867 | #endif | 876 | #endif |
868 | 877 | ||
878 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | ||
879 | max_pfn_mapped<<PAGE_SHIFT); | ||
880 | |||
869 | reserve_brk(); | 881 | reserve_brk(); |
870 | 882 | ||
871 | /* max_pfn_mapped is updated here */ | 883 | /* max_pfn_mapped is updated here */ |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8f0e13be36b3..9c3f0823e6aa 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -425,6 +425,14 @@ void __init setup_per_cpu_areas(void) | |||
425 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | 425 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; |
426 | #endif | 426 | #endif |
427 | 427 | ||
428 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | ||
429 | /* | ||
430 | * make sure boot cpu node_number is right, when boot cpu is on the | ||
431 | * node that doesn't have mem installed | ||
432 | */ | ||
433 | per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); | ||
434 | #endif | ||
435 | |||
428 | /* Setup node to cpumask map */ | 436 | /* Setup node to cpumask map */ |
429 | setup_node_to_cpumask_map(); | 437 | setup_node_to_cpumask_map(); |
430 | 438 | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e7277cbcfb40..a725b7f760ae 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
161 | st->current_address >= st->marker[1].start_address) { | 161 | st->current_address >= st->marker[1].start_address) { |
162 | const char *unit = units; | 162 | const char *unit = units; |
163 | unsigned long delta; | 163 | unsigned long delta; |
164 | int width = sizeof(unsigned long) * 2; | ||
164 | 165 | ||
165 | /* | 166 | /* |
166 | * Now print the actual finished series | 167 | * Now print the actual finished series |
167 | */ | 168 | */ |
168 | seq_printf(m, "0x%p-0x%p ", | 169 | seq_printf(m, "0x%0*lx-0x%0*lx ", |
169 | (void *)st->start_address, | 170 | width, st->start_address, |
170 | (void *)st->current_address); | 171 | width, st->current_address); |
171 | 172 | ||
172 | delta = (st->current_address - st->start_address) >> 10; | 173 | delta = (st->current_address - st->start_address) >> 10; |
173 | while (!(delta & 1023) && unit[1]) { | 174 | while (!(delta & 1023) && unit[1]) { |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..b9ca6d767dbb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -3,40 +3,16 @@ | |||
3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. | 3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. |
4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar | 4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar |
5 | */ | 5 | */ |
6 | #include <linux/interrupt.h> | 6 | #include <linux/magic.h> /* STACK_END_MAGIC */ |
7 | #include <linux/mmiotrace.h> | 7 | #include <linux/sched.h> /* test_thread_flag(), ... */ |
8 | #include <linux/bootmem.h> | 8 | #include <linux/kdebug.h> /* oops_begin/end, ... */ |
9 | #include <linux/compiler.h> | 9 | #include <linux/module.h> /* search_exception_table */ |
10 | #include <linux/highmem.h> | 10 | #include <linux/bootmem.h> /* max_low_pfn */ |
11 | #include <linux/kprobes.h> | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ |
12 | #include <linux/uaccess.h> | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/vmalloc.h> | 13 | |
14 | #include <linux/vt_kern.h> | 14 | #include <asm/traps.h> /* dotraplinkage, ... */ |
15 | #include <linux/signal.h> | 15 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
16 | #include <linux/kernel.h> | ||
17 | #include <linux/ptrace.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kdebug.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/magic.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/mman.h> | ||
27 | #include <linux/tty.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/mm.h> | ||
30 | |||
31 | #include <asm-generic/sections.h> | ||
32 | |||
33 | #include <asm/tlbflush.h> | ||
34 | #include <asm/pgalloc.h> | ||
35 | #include <asm/segment.h> | ||
36 | #include <asm/system.h> | ||
37 | #include <asm/proto.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/desc.h> | ||
40 | 16 | ||
41 | /* | 17 | /* |
42 | * Page fault error code bits: | 18 | * Page fault error code bits: |
@@ -538,8 +514,6 @@ bad: | |||
538 | static int is_errata93(struct pt_regs *regs, unsigned long address) | 514 | static int is_errata93(struct pt_regs *regs, unsigned long address) |
539 | { | 515 | { |
540 | #ifdef CONFIG_X86_64 | 516 | #ifdef CONFIG_X86_64 |
541 | static int once; | ||
542 | |||
543 | if (address != regs->ip) | 517 | if (address != regs->ip) |
544 | return 0; | 518 | return 0; |
545 | 519 | ||
@@ -549,10 +523,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) | |||
549 | address |= 0xffffffffUL << 32; | 523 | address |= 0xffffffffUL << 32; |
550 | if ((address >= (u64)_stext && address <= (u64)_etext) || | 524 | if ((address >= (u64)_stext && address <= (u64)_etext) || |
551 | (address >= MODULES_VADDR && address <= MODULES_END)) { | 525 | (address >= MODULES_VADDR && address <= MODULES_END)) { |
552 | if (!once) { | 526 | printk_once(errata93_warning); |
553 | printk(errata93_warning); | ||
554 | once = 1; | ||
555 | } | ||
556 | regs->ip = address; | 527 | regs->ip = address; |
557 | return 1; | 528 | return 1; |
558 | } | 529 | } |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4b98df0973b9..34c1bfb64f1c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -11,6 +11,9 @@ | |||
11 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
14 | #include <asm/tlb.h> | ||
15 | |||
16 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
14 | 17 | ||
15 | unsigned long __initdata e820_table_start; | 18 | unsigned long __initdata e820_table_start; |
16 | unsigned long __meminitdata e820_table_end; | 19 | unsigned long __meminitdata e820_table_end; |
@@ -24,6 +27,69 @@ int direct_gbpages | |||
24 | #endif | 27 | #endif |
25 | ; | 28 | ; |
26 | 29 | ||
30 | int nx_enabled; | ||
31 | |||
32 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
33 | static int disable_nx __cpuinitdata; | ||
34 | |||
35 | /* | ||
36 | * noexec = on|off | ||
37 | * | ||
38 | * Control non-executable mappings for processes. | ||
39 | * | ||
40 | * on Enable | ||
41 | * off Disable | ||
42 | */ | ||
43 | static int __init noexec_setup(char *str) | ||
44 | { | ||
45 | if (!str) | ||
46 | return -EINVAL; | ||
47 | if (!strncmp(str, "on", 2)) { | ||
48 | __supported_pte_mask |= _PAGE_NX; | ||
49 | disable_nx = 0; | ||
50 | } else if (!strncmp(str, "off", 3)) { | ||
51 | disable_nx = 1; | ||
52 | __supported_pte_mask &= ~_PAGE_NX; | ||
53 | } | ||
54 | return 0; | ||
55 | } | ||
56 | early_param("noexec", noexec_setup); | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_X86_PAE | ||
60 | static void __init set_nx(void) | ||
61 | { | ||
62 | unsigned int v[4], l, h; | ||
63 | |||
64 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
65 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
66 | |||
67 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
68 | rdmsr(MSR_EFER, l, h); | ||
69 | l |= EFER_NX; | ||
70 | wrmsr(MSR_EFER, l, h); | ||
71 | nx_enabled = 1; | ||
72 | __supported_pte_mask |= _PAGE_NX; | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | #else | ||
77 | static inline void set_nx(void) | ||
78 | { | ||
79 | } | ||
80 | #endif | ||
81 | |||
82 | #ifdef CONFIG_X86_64 | ||
83 | void __cpuinit check_efer(void) | ||
84 | { | ||
85 | unsigned long efer; | ||
86 | |||
87 | rdmsrl(MSR_EFER, efer); | ||
88 | if (!(efer & EFER_NX) || disable_nx) | ||
89 | __supported_pte_mask &= ~_PAGE_NX; | ||
90 | } | ||
91 | #endif | ||
92 | |||
27 | static void __init find_early_table_space(unsigned long end, int use_pse, | 93 | static void __init find_early_table_space(unsigned long end, int use_pse, |
28 | int use_gbpages) | 94 | int use_gbpages) |
29 | { | 95 | { |
@@ -67,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
67 | */ | 133 | */ |
68 | #ifdef CONFIG_X86_32 | 134 | #ifdef CONFIG_X86_32 |
69 | start = 0x7000; | 135 | start = 0x7000; |
70 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | 136 | #else |
71 | tables, PAGE_SIZE); | ||
72 | #else /* CONFIG_X86_64 */ | ||
73 | start = 0x8000; | 137 | start = 0x8000; |
74 | e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
75 | #endif | 138 | #endif |
139 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
140 | tables, PAGE_SIZE); | ||
76 | if (e820_table_start == -1UL) | 141 | if (e820_table_start == -1UL) |
77 | panic("Cannot find space for the kernel page tables"); | 142 | panic("Cannot find space for the kernel page tables"); |
78 | 143 | ||
@@ -160,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
160 | use_gbpages = direct_gbpages; | 225 | use_gbpages = direct_gbpages; |
161 | #endif | 226 | #endif |
162 | 227 | ||
163 | #ifdef CONFIG_X86_32 | ||
164 | #ifdef CONFIG_X86_PAE | ||
165 | set_nx(); | 228 | set_nx(); |
166 | if (nx_enabled) | 229 | if (nx_enabled) |
167 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | 230 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); |
168 | #endif | ||
169 | 231 | ||
170 | /* Enable PSE if available */ | 232 | /* Enable PSE if available */ |
171 | if (cpu_has_pse) | 233 | if (cpu_has_pse) |
@@ -176,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
176 | set_in_cr4(X86_CR4_PGE); | 238 | set_in_cr4(X86_CR4_PGE); |
177 | __supported_pte_mask |= _PAGE_GLOBAL; | 239 | __supported_pte_mask |= _PAGE_GLOBAL; |
178 | } | 240 | } |
179 | #endif | ||
180 | 241 | ||
181 | if (use_gbpages) | 242 | if (use_gbpages) |
182 | page_size_mask |= 1 << PG_LEVEL_1G; | 243 | page_size_mask |= 1 << PG_LEVEL_1G; |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 749559ed80f5..949708d7a481 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -49,12 +49,9 @@ | |||
49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/page_types.h> | ||
52 | #include <asm/init.h> | 53 | #include <asm/init.h> |
53 | 54 | ||
54 | unsigned long max_low_pfn_mapped; | ||
55 | unsigned long max_pfn_mapped; | ||
56 | |||
57 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
58 | unsigned long highstart_pfn, highend_pfn; | 55 | unsigned long highstart_pfn, highend_pfn; |
59 | 56 | ||
60 | static noinline int do_test_wp_bit(void); | 57 | static noinline int do_test_wp_bit(void); |
@@ -587,61 +584,9 @@ void zap_low_mappings(void) | |||
587 | flush_tlb_all(); | 584 | flush_tlb_all(); |
588 | } | 585 | } |
589 | 586 | ||
590 | int nx_enabled; | ||
591 | |||
592 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); | 587 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); |
593 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 588 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
594 | 589 | ||
595 | #ifdef CONFIG_X86_PAE | ||
596 | |||
597 | static int disable_nx __initdata; | ||
598 | |||
599 | /* | ||
600 | * noexec = on|off | ||
601 | * | ||
602 | * Control non executable mappings. | ||
603 | * | ||
604 | * on Enable | ||
605 | * off Disable | ||
606 | */ | ||
607 | static int __init noexec_setup(char *str) | ||
608 | { | ||
609 | if (!str || !strcmp(str, "on")) { | ||
610 | if (cpu_has_nx) { | ||
611 | __supported_pte_mask |= _PAGE_NX; | ||
612 | disable_nx = 0; | ||
613 | } | ||
614 | } else { | ||
615 | if (!strcmp(str, "off")) { | ||
616 | disable_nx = 1; | ||
617 | __supported_pte_mask &= ~_PAGE_NX; | ||
618 | } else { | ||
619 | return -EINVAL; | ||
620 | } | ||
621 | } | ||
622 | |||
623 | return 0; | ||
624 | } | ||
625 | early_param("noexec", noexec_setup); | ||
626 | |||
627 | void __init set_nx(void) | ||
628 | { | ||
629 | unsigned int v[4], l, h; | ||
630 | |||
631 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
632 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
633 | |||
634 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
635 | rdmsr(MSR_EFER, l, h); | ||
636 | l |= EFER_NX; | ||
637 | wrmsr(MSR_EFER, l, h); | ||
638 | nx_enabled = 1; | ||
639 | __supported_pte_mask |= _PAGE_NX; | ||
640 | } | ||
641 | } | ||
642 | } | ||
643 | #endif | ||
644 | |||
645 | /* user-defined highmem size */ | 590 | /* user-defined highmem size */ |
646 | static unsigned int highmem_pages = -1; | 591 | static unsigned int highmem_pages = -1; |
647 | 592 | ||
@@ -761,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn, | |||
761 | highstart_pfn = highend_pfn = max_pfn; | 706 | highstart_pfn = highend_pfn = max_pfn; |
762 | if (max_pfn > max_low_pfn) | 707 | if (max_pfn > max_low_pfn) |
763 | highstart_pfn = max_low_pfn; | 708 | highstart_pfn = max_low_pfn; |
764 | memory_present(0, 0, highend_pfn); | ||
765 | e820_register_active_regions(0, 0, highend_pfn); | 709 | e820_register_active_regions(0, 0, highend_pfn); |
710 | sparse_memory_present_with_active_regions(0); | ||
766 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 711 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
767 | pages_to_mb(highend_pfn - highstart_pfn)); | 712 | pages_to_mb(highend_pfn - highstart_pfn)); |
768 | num_physpages = highend_pfn; | 713 | num_physpages = highend_pfn; |
769 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 714 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
770 | #else | 715 | #else |
771 | memory_present(0, 0, max_low_pfn); | ||
772 | e820_register_active_regions(0, 0, max_low_pfn); | 716 | e820_register_active_regions(0, 0, max_low_pfn); |
717 | sparse_memory_present_with_active_regions(0); | ||
773 | num_physpages = max_low_pfn; | 718 | num_physpages = max_low_pfn; |
774 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 719 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
775 | #endif | 720 | #endif |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1753e8020df6..52bb9519bb86 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -50,18 +50,8 @@ | |||
50 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | 51 | #include <asm/init.h> |
52 | 52 | ||
53 | /* | ||
54 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
55 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
56 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
57 | */ | ||
58 | unsigned long max_low_pfn_mapped; | ||
59 | unsigned long max_pfn_mapped; | ||
60 | |||
61 | static unsigned long dma_reserve __initdata; | 53 | static unsigned long dma_reserve __initdata; |
62 | 54 | ||
63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
64 | |||
65 | static int __init parse_direct_gbpages_off(char *arg) | 55 | static int __init parse_direct_gbpages_off(char *arg) |
66 | { | 56 | { |
67 | direct_gbpages = 0; | 57 | direct_gbpages = 0; |
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
85 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 75 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
86 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 76 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
87 | 77 | ||
88 | static int disable_nx __cpuinitdata; | ||
89 | |||
90 | /* | ||
91 | * noexec=on|off | ||
92 | * Control non-executable mappings for 64-bit processes. | ||
93 | * | ||
94 | * on Enable (default) | ||
95 | * off Disable | ||
96 | */ | ||
97 | static int __init nonx_setup(char *str) | ||
98 | { | ||
99 | if (!str) | ||
100 | return -EINVAL; | ||
101 | if (!strncmp(str, "on", 2)) { | ||
102 | __supported_pte_mask |= _PAGE_NX; | ||
103 | disable_nx = 0; | ||
104 | } else if (!strncmp(str, "off", 3)) { | ||
105 | disable_nx = 1; | ||
106 | __supported_pte_mask &= ~_PAGE_NX; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | early_param("noexec", nonx_setup); | ||
111 | |||
112 | void __cpuinit check_efer(void) | ||
113 | { | ||
114 | unsigned long efer; | ||
115 | |||
116 | rdmsrl(MSR_EFER, efer); | ||
117 | if (!(efer & EFER_NX) || disable_nx) | ||
118 | __supported_pte_mask &= ~_PAGE_NX; | ||
119 | } | ||
120 | |||
121 | int force_personality32; | 78 | int force_personality32; |
122 | 79 | ||
123 | /* | 80 | /* |
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
628 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 585 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); |
629 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 586 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
630 | } | 587 | } |
588 | #endif | ||
631 | 589 | ||
632 | void __init paging_init(void) | 590 | void __init paging_init(void) |
633 | { | 591 | { |
@@ -638,11 +596,10 @@ void __init paging_init(void) | |||
638 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 596 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
639 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | 597 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
640 | 598 | ||
641 | memory_present(0, 0, max_pfn); | 599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
642 | sparse_init(); | 600 | sparse_init(); |
643 | free_area_init_nodes(max_zone_pfns); | 601 | free_area_init_nodes(max_zone_pfns); |
644 | } | 602 | } |
645 | #endif | ||
646 | 603 | ||
647 | /* | 604 | /* |
648 | * Memory hotplug specific functions | 605 | * Memory hotplug specific functions |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2d05a12029dc..459913beac71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | /* Initialize bootmem allocator for a node */ | 181 | /* Initialize bootmem allocator for a node */ |
182 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 182 | void __init |
183 | unsigned long end) | 183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 184 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
186 | unsigned long bootmap_start, nodedata_phys; | 187 | unsigned long bootmap_start, nodedata_phys; |
187 | void *bootmap; | 188 | void *bootmap; |
188 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
189 | int nid; | 189 | int nid; |
190 | 190 | ||
191 | if (!end) | 191 | if (!end) |
192 | return; | 192 | return; |
193 | 193 | ||
194 | /* | ||
195 | * Don't confuse VM with a node that doesn't have the | ||
196 | * minimum amount of memory: | ||
197 | */ | ||
198 | if (end && (end - start) < NODE_MIN_SIZE) | ||
199 | return; | ||
200 | |||
194 | start = roundup(start, ZONE_ALIGN); | 201 | start = roundup(start, ZONE_ALIGN); |
195 | 202 | ||
196 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, |
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
272 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, |
273 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); |
274 | 281 | ||
275 | #ifdef CONFIG_ACPI_NUMA | ||
276 | srat_reserve_add_area(nodeid); | ||
277 | #endif | ||
278 | node_set_online(nodeid); | 282 | node_set_online(nodeid); |
279 | } | 283 | } |
280 | 284 | ||
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
578 | return pages; | 582 | return pages; |
579 | } | 583 | } |
580 | 584 | ||
581 | void __init paging_init(void) | ||
582 | { | ||
583 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
584 | |||
585 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
586 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
587 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
588 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | ||
589 | |||
590 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | ||
591 | sparse_init(); | ||
592 | |||
593 | free_area_init_nodes(max_zone_pfns); | ||
594 | } | ||
595 | |||
596 | static __init int numa_setup(char *opt) | 585 | static __init int numa_setup(char *opt) |
597 | { | 586 | { |
598 | if (!opt) | 587 | if (!opt) |
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt) | |||
606 | #ifdef CONFIG_ACPI_NUMA | 595 | #ifdef CONFIG_ACPI_NUMA |
607 | if (!strncmp(opt, "noacpi", 6)) | 596 | if (!strncmp(opt, "noacpi", 6)) |
608 | acpi_numa = -1; | 597 | acpi_numa = -1; |
609 | if (!strncmp(opt, "hotadd=", 7)) | ||
610 | hotadd_percent = simple_strtoul(opt+7, NULL, 10); | ||
611 | #endif | 598 | #endif |
612 | return 0; | 599 | return 0; |
613 | } | 600 | } |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 01765955baaf..2dfcbf9df2ae 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata; | |||
31 | static nodemask_t cpu_nodes_parsed __initdata; | 31 | static nodemask_t cpu_nodes_parsed __initdata; |
32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
33 | static struct bootnode nodes_add[MAX_NUMNODES]; | 33 | static struct bootnode nodes_add[MAX_NUMNODES]; |
34 | static int found_add_area __initdata; | ||
35 | int hotadd_percent __initdata = 0; | ||
36 | 34 | ||
37 | static int num_node_memblks __initdata; | 35 | static int num_node_memblks __initdata; |
38 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | 36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; |
39 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | 37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; |
40 | 38 | ||
41 | /* Too small nodes confuse the VM badly. Usually they result | ||
42 | from BIOS bugs. */ | ||
43 | #define NODE_MIN_SIZE (4*1024*1024) | ||
44 | |||
45 | static __init int setup_node(int pxm) | 39 | static __init int setup_node(int pxm) |
46 | { | 40 | { |
47 | return acpi_map_pxm_to_node(pxm); | 41 | return acpi_map_pxm_to_node(pxm); |
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) | |||
66 | { | 60 | { |
67 | struct bootnode *nd = &nodes[i]; | 61 | struct bootnode *nd = &nodes[i]; |
68 | 62 | ||
69 | if (found_add_area) | ||
70 | return; | ||
71 | |||
72 | if (nd->start < start) { | 63 | if (nd->start < start) { |
73 | nd->start = start; | 64 | nd->start = start; |
74 | if (nd->end < nd->start) | 65 | if (nd->end < nd->start) |
@@ -86,7 +77,6 @@ static __init void bad_srat(void) | |||
86 | int i; | 77 | int i; |
87 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 78 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
88 | acpi_numa = -1; | 79 | acpi_numa = -1; |
89 | found_add_area = 0; | ||
90 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 80 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
91 | apicid_to_node[i] = NUMA_NO_NODE; | 81 | apicid_to_node[i] = NUMA_NO_NODE; |
92 | for (i = 0; i < MAX_NUMNODES; i++) | 82 | for (i = 0; i < MAX_NUMNODES; i++) |
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
182 | pxm, apic_id, node); | 172 | pxm, apic_id, node); |
183 | } | 173 | } |
184 | 174 | ||
185 | static int update_end_of_memory(unsigned long end) {return -1;} | ||
186 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} | ||
187 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 175 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
188 | static inline int save_add_info(void) {return 1;} | 176 | static inline int save_add_info(void) {return 1;} |
189 | #else | 177 | #else |
190 | static inline int save_add_info(void) {return 0;} | 178 | static inline int save_add_info(void) {return 0;} |
191 | #endif | 179 | #endif |
192 | /* | 180 | /* |
193 | * Update nodes_add and decide if to include add are in the zone. | 181 | * Update nodes_add[] |
194 | * Both SPARSE and RESERVE need nodes_add information. | 182 | * This code supports one contiguous hot add area per node |
195 | * This code supports one contiguous hot add area per node. | ||
196 | */ | 183 | */ |
197 | static int __init | 184 | static void __init |
198 | reserve_hotadd(int node, unsigned long start, unsigned long end) | 185 | update_nodes_add(int node, unsigned long start, unsigned long end) |
199 | { | 186 | { |
200 | unsigned long s_pfn = start >> PAGE_SHIFT; | 187 | unsigned long s_pfn = start >> PAGE_SHIFT; |
201 | unsigned long e_pfn = end >> PAGE_SHIFT; | 188 | unsigned long e_pfn = end >> PAGE_SHIFT; |
202 | int ret = 0, changed = 0; | 189 | int changed = 0; |
203 | struct bootnode *nd = &nodes_add[node]; | 190 | struct bootnode *nd = &nodes_add[node]; |
204 | 191 | ||
205 | /* I had some trouble with strange memory hotadd regions breaking | 192 | /* I had some trouble with strange memory hotadd regions breaking |
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
210 | mistakes */ | 197 | mistakes */ |
211 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | 198 | if ((signed long)(end - start) < NODE_MIN_SIZE) { |
212 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | 199 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); |
213 | return -1; | 200 | return; |
214 | } | 201 | } |
215 | 202 | ||
216 | /* This check might be a bit too strict, but I'm keeping it for now. */ | 203 | /* This check might be a bit too strict, but I'm keeping it for now. */ |
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
218 | printk(KERN_ERR | 205 | printk(KERN_ERR |
219 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", | 206 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", |
220 | s_pfn, e_pfn); | 207 | s_pfn, e_pfn); |
221 | return -1; | 208 | return; |
222 | } | ||
223 | |||
224 | if (!hotadd_enough_memory(&nodes_add[node])) { | ||
225 | printk(KERN_ERR "SRAT: Hotplug area too large\n"); | ||
226 | return -1; | ||
227 | } | 209 | } |
228 | 210 | ||
229 | /* Looks good */ | 211 | /* Looks good */ |
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
245 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | 227 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); |
246 | } | 228 | } |
247 | 229 | ||
248 | ret = update_end_of_memory(nd->end); | ||
249 | |||
250 | if (changed) | 230 | if (changed) |
251 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); | 231 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
252 | return ret; | 232 | nd->start, nd->end); |
253 | } | 233 | } |
254 | 234 | ||
255 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 235 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
310 | start, end); | 290 | start, end); |
311 | e820_register_active_regions(node, start >> PAGE_SHIFT, | 291 | e820_register_active_regions(node, start >> PAGE_SHIFT, |
312 | end >> PAGE_SHIFT); | 292 | end >> PAGE_SHIFT); |
313 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, | ||
314 | nd->end >> PAGE_SHIFT); | ||
315 | 293 | ||
316 | if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
317 | (reserve_hotadd(node, start, end) < 0)) { | 295 | update_nodes_add(node, start, end); |
318 | /* Ignore hotadd region. Undo damage */ | 296 | /* restore nodes[node] */ |
319 | printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); | ||
320 | *nd = oldnode; | 297 | *nd = oldnode; |
321 | if ((nd->start | nd->end) == 0) | 298 | if ((nd->start | nd->end) == 0) |
322 | node_clear(node, nodes_parsed); | 299 | node_clear(node, nodes_parsed); |
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
345 | pxmram = 0; | 322 | pxmram = 0; |
346 | } | 323 | } |
347 | 324 | ||
348 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); | 325 | e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); |
349 | /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ | 326 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
350 | if ((long)(e820ram - pxmram) >= 1*1024*1024) { | 327 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { |
351 | printk(KERN_ERR | 328 | printk(KERN_ERR |
352 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | 329 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", |
353 | (pxmram << PAGE_SHIFT) >> 20, | 330 | (pxmram << PAGE_SHIFT) >> 20, |
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
357 | return 1; | 334 | return 1; |
358 | } | 335 | } |
359 | 336 | ||
360 | static void __init unparse_node(int node) | ||
361 | { | ||
362 | int i; | ||
363 | node_clear(node, nodes_parsed); | ||
364 | node_clear(node, cpu_nodes_parsed); | ||
365 | for (i = 0; i < MAX_LOCAL_APIC; i++) { | ||
366 | if (apicid_to_node[i] == node) | ||
367 | apicid_to_node[i] = NUMA_NO_NODE; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
372 | 338 | ||
373 | /* Use the information discovered above to actually set up the nodes. */ | 339 | /* Use the information discovered above to actually set up the nodes. */ |
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
379 | return -1; | 345 | return -1; |
380 | 346 | ||
381 | /* First clean up the node list */ | 347 | /* First clean up the node list */ |
382 | for (i = 0; i < MAX_NUMNODES; i++) { | 348 | for (i = 0; i < MAX_NUMNODES; i++) |
383 | cutoff_node(i, start, end); | 349 | cutoff_node(i, start, end); |
384 | /* | ||
385 | * don't confuse VM with a node that doesn't have the | ||
386 | * minimum memory. | ||
387 | */ | ||
388 | if (nodes[i].end && | ||
389 | (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { | ||
390 | unparse_node(i); | ||
391 | node_set_offline(i); | ||
392 | } | ||
393 | } | ||
394 | 350 | ||
395 | if (!nodes_cover_memory(nodes)) { | 351 | if (!nodes_cover_memory(nodes)) { |
396 | bad_srat(); | 352 | bad_srat(); |
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
423 | 379 | ||
424 | if (node == NUMA_NO_NODE) | 380 | if (node == NUMA_NO_NODE) |
425 | continue; | 381 | continue; |
426 | if (!node_isset(node, node_possible_map)) | 382 | if (!node_online(node)) |
427 | numa_clear_node(i); | 383 | numa_clear_node(i); |
428 | } | 384 | } |
429 | numa_init_array(); | 385 | numa_init_array(); |
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b) | |||
510 | } | 466 | } |
511 | #endif /* CONFIG_NUMA_EMU */ | 467 | #endif /* CONFIG_NUMA_EMU */ |
512 | 468 | ||
513 | void __init srat_reserve_add_area(int nodeid) | ||
514 | { | ||
515 | if (found_add_area && nodes_add[nodeid].end) { | ||
516 | u64 total_mb; | ||
517 | |||
518 | printk(KERN_INFO "SRAT: Reserving hot-add memory space " | ||
519 | "for node %d at %Lx-%Lx\n", | ||
520 | nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); | ||
521 | total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) | ||
522 | >> PAGE_SHIFT; | ||
523 | total_mb *= sizeof(struct page); | ||
524 | total_mb >>= 20; | ||
525 | printk(KERN_INFO "SRAT: This will cost you %Lu MB of " | ||
526 | "pre-allocated memory.\n", (unsigned long long)total_mb); | ||
527 | reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, | ||
528 | nodes_add[nodeid].end - nodes_add[nodeid].start, | ||
529 | BOOTMEM_DEFAULT); | ||
530 | } | ||
531 | } | ||
532 | |||
533 | int __node_distance(int a, int b) | 469 | int __node_distance(int a, int b) |
534 | { | 470 | { |
535 | int index; | 471 | int index; |