diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 16:27:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 16:27:33 -0500 |
commit | e33c01972239fee4696679ae5f7d1f340f424999 (patch) | |
tree | bd4bc3223ba572719b3a53142fdb98910450fe64 /arch/x86/mm | |
parent | 343036cea2854acf8d4b4c930c0063223bc6b8a2 (diff) | |
parent | ccef086454d4c97e7b722e9303390207d681cb4c (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (36 commits)
x86, mm: Correct the implementation of is_untracked_pat_range()
x86/pat: Trivial: don't create debugfs for memtype if pat is disabled
x86, mtrr: Fix sorting of mtrr after subtracting
x86: Move find_smp_config() earlier and avoid bootmem usage
x86, platform: Change is_untracked_pat_range() to bool; cleanup init
x86: Change is_ISA_range() into an inline function
x86, mm: is_untracked_pat_range() takes a normal semiclosed range
x86, mm: Call is_untracked_pat_range() rather than is_ISA_range()
x86: UV SGI: Don't track GRU space in PAT
x86: SGI UV: Fix BAU initialization
x86, numa: Use near(er) online node instead of roundrobin for NUMA
x86, numa, bootmem: Only free bootmem on NUMA failure path
x86: Change crash kernel to reserve via reserve_early()
x86: Eliminate redundant/contradicting cache line size config options
x86: When cleaning MTRRs, do not fold WP into UC
x86: remove "extern" from function prototypes in <asm/proto.h>
x86, mm: Report state of NX protections during boot
x86, mm: Clean up and simplify NX enablement
x86, pageattr: Make set_memory_(x|nx) aware of NX support
x86, sleep: Always save the value of EFER
...
Fix up conflicts (added both iommu_shutdown and is_untracked_pat_range)
to 'struct x86_platform_ops') in
arch/x86/include/asm/x86_init.h
arch/x86/kernel/x86_init.c
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/init.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 35 | ||||
-rw-r--r-- | arch/x86/mm/k8topology_64.c | 101 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 252 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 22 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/setup_nx.c | 59 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 29 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 3 |
11 files changed, 387 insertions, 145 deletions
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 73ffd5536f62..d406c5239019 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
146 | use_gbpages = direct_gbpages; | 146 | use_gbpages = direct_gbpages; |
147 | #endif | 147 | #endif |
148 | 148 | ||
149 | set_nx(); | ||
150 | if (nx_enabled) | ||
151 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
152 | |||
153 | /* Enable PSE if available */ | 149 | /* Enable PSE if available */ |
154 | if (cpu_has_pse) | 150 | if (cpu_has_pse) |
155 | set_in_cr4(X86_CR4_PSE); | 151 | set_in_cr4(X86_CR4_PSE); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 30938c1d8d5d..c973f8e2a6cf 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
412 | pkmap_page_table = pte; | 412 | pkmap_page_table = pte; |
413 | } | 413 | } |
414 | 414 | ||
415 | static void __init add_one_highpage_init(struct page *page, int pfn) | 415 | static void __init add_one_highpage_init(struct page *page) |
416 | { | 416 | { |
417 | ClearPageReserved(page); | 417 | ClearPageReserved(page); |
418 | init_page_count(page); | 418 | init_page_count(page); |
@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn, | |||
445 | if (!pfn_valid(node_pfn)) | 445 | if (!pfn_valid(node_pfn)) |
446 | continue; | 446 | continue; |
447 | page = pfn_to_page(node_pfn); | 447 | page = pfn_to_page(node_pfn); |
448 | add_one_highpage_init(page, node_pfn); | 448 | add_one_highpage_init(page); |
449 | } | 449 | } |
450 | 450 | ||
451 | return 0; | 451 | return 0; |
@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void) | |||
703 | } | 703 | } |
704 | 704 | ||
705 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 705 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
706 | void __init initmem_init(unsigned long start_pfn, | 706 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
707 | unsigned long end_pfn) | 707 | int acpi, int k8) |
708 | { | 708 | { |
709 | #ifdef CONFIG_HIGHMEM | 709 | #ifdef CONFIG_HIGHMEM |
710 | highstart_pfn = highend_pfn = max_pfn; | 710 | highstart_pfn = highend_pfn = max_pfn; |
@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void) | |||
997 | const int rodata_test_data = 0xC3; | 997 | const int rodata_test_data = 0xC3; |
998 | EXPORT_SYMBOL_GPL(rodata_test_data); | 998 | EXPORT_SYMBOL_GPL(rodata_test_data); |
999 | 999 | ||
1000 | static int kernel_set_to_readonly; | 1000 | int kernel_set_to_readonly __read_mostly; |
1001 | 1001 | ||
1002 | void set_kernel_text_rw(void) | 1002 | void set_kernel_text_rw(void) |
1003 | { | 1003 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5a4398a6006b..5198b9bb34ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start, | |||
568 | } | 568 | } |
569 | 569 | ||
570 | #ifndef CONFIG_NUMA | 570 | #ifndef CONFIG_NUMA |
571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
572 | int acpi, int k8) | ||
572 | { | 573 | { |
573 | unsigned long bootmap_size, bootmap; | 574 | unsigned long bootmap_size, bootmap; |
574 | 575 | ||
@@ -694,12 +695,12 @@ void __init mem_init(void) | |||
694 | const int rodata_test_data = 0xC3; | 695 | const int rodata_test_data = 0xC3; |
695 | EXPORT_SYMBOL_GPL(rodata_test_data); | 696 | EXPORT_SYMBOL_GPL(rodata_test_data); |
696 | 697 | ||
697 | static int kernel_set_to_readonly; | 698 | int kernel_set_to_readonly; |
698 | 699 | ||
699 | void set_kernel_text_rw(void) | 700 | void set_kernel_text_rw(void) |
700 | { | 701 | { |
701 | unsigned long start = PFN_ALIGN(_stext); | 702 | unsigned long start = PFN_ALIGN(_text); |
702 | unsigned long end = PFN_ALIGN(__start_rodata); | 703 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
703 | 704 | ||
704 | if (!kernel_set_to_readonly) | 705 | if (!kernel_set_to_readonly) |
705 | return; | 706 | return; |
@@ -707,13 +708,18 @@ void set_kernel_text_rw(void) | |||
707 | pr_debug("Set kernel text: %lx - %lx for read write\n", | 708 | pr_debug("Set kernel text: %lx - %lx for read write\n", |
708 | start, end); | 709 | start, end); |
709 | 710 | ||
711 | /* | ||
712 | * Make the kernel identity mapping for text RW. Kernel text | ||
713 | * mapping will always be RO. Refer to the comment in | ||
714 | * static_protections() in pageattr.c | ||
715 | */ | ||
710 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); | 716 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); |
711 | } | 717 | } |
712 | 718 | ||
713 | void set_kernel_text_ro(void) | 719 | void set_kernel_text_ro(void) |
714 | { | 720 | { |
715 | unsigned long start = PFN_ALIGN(_stext); | 721 | unsigned long start = PFN_ALIGN(_text); |
716 | unsigned long end = PFN_ALIGN(__start_rodata); | 722 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
717 | 723 | ||
718 | if (!kernel_set_to_readonly) | 724 | if (!kernel_set_to_readonly) |
719 | return; | 725 | return; |
@@ -721,14 +727,21 @@ void set_kernel_text_ro(void) | |||
721 | pr_debug("Set kernel text: %lx - %lx for read only\n", | 727 | pr_debug("Set kernel text: %lx - %lx for read only\n", |
722 | start, end); | 728 | start, end); |
723 | 729 | ||
730 | /* | ||
731 | * Set the kernel identity mapping for text RO. | ||
732 | */ | ||
724 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); | 733 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); |
725 | } | 734 | } |
726 | 735 | ||
727 | void mark_rodata_ro(void) | 736 | void mark_rodata_ro(void) |
728 | { | 737 | { |
729 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | 738 | unsigned long start = PFN_ALIGN(_text); |
730 | unsigned long rodata_start = | 739 | unsigned long rodata_start = |
731 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 740 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; |
741 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | ||
742 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | ||
743 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | ||
744 | unsigned long data_start = (unsigned long) &_sdata; | ||
732 | 745 | ||
733 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 746 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
734 | (end - start) >> 10); | 747 | (end - start) >> 10); |
@@ -751,6 +764,14 @@ void mark_rodata_ro(void) | |||
751 | printk(KERN_INFO "Testing CPA: again\n"); | 764 | printk(KERN_INFO "Testing CPA: again\n"); |
752 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); | 765 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); |
753 | #endif | 766 | #endif |
767 | |||
768 | free_init_pages("unused kernel memory", | ||
769 | (unsigned long) page_address(virt_to_page(text_end)), | ||
770 | (unsigned long) | ||
771 | page_address(virt_to_page(rodata_start))); | ||
772 | free_init_pages("unused kernel memory", | ||
773 | (unsigned long) page_address(virt_to_page(rodata_end)), | ||
774 | (unsigned long) page_address(virt_to_page(data_start))); | ||
754 | } | 775 | } |
755 | 776 | ||
756 | #endif | 777 | #endif |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 268f8255280f..970ed579d4e4 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -24,6 +24,9 @@ | |||
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/k8.h> | 25 | #include <asm/k8.h> |
26 | 26 | ||
27 | static struct bootnode __initdata nodes[8]; | ||
28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | ||
29 | |||
27 | static __init int find_northbridge(void) | 30 | static __init int find_northbridge(void) |
28 | { | 31 | { |
29 | int num; | 32 | int num; |
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void) | |||
54 | * need to get boot_cpu_id so can use that to create apicid_to_node | 57 | * need to get boot_cpu_id so can use that to create apicid_to_node |
55 | * in k8_scan_nodes() | 58 | * in k8_scan_nodes() |
56 | */ | 59 | */ |
57 | /* | ||
58 | * Find possible boot-time SMP configuration: | ||
59 | */ | ||
60 | #ifdef CONFIG_X86_MPPARSE | ||
61 | early_find_smp_config(); | ||
62 | #endif | ||
63 | #ifdef CONFIG_ACPI | ||
64 | /* | ||
65 | * Read APIC information from ACPI tables. | ||
66 | */ | ||
67 | early_acpi_boot_init(); | ||
68 | #endif | ||
69 | #ifdef CONFIG_X86_MPPARSE | 60 | #ifdef CONFIG_X86_MPPARSE |
70 | /* | 61 | /* |
71 | * get boot-time SMP configuration: | 62 | * get boot-time SMP configuration: |
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void) | |||
76 | early_init_lapic_mapping(); | 67 | early_init_lapic_mapping(); |
77 | } | 68 | } |
78 | 69 | ||
79 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 70 | int __init k8_get_nodes(struct bootnode *physnodes) |
80 | { | 71 | { |
81 | unsigned numnodes, cores, bits, apicid_base; | 72 | int i; |
73 | int ret = 0; | ||
74 | |||
75 | for_each_node_mask(i, nodes_parsed) { | ||
76 | physnodes[ret].start = nodes[i].start; | ||
77 | physnodes[ret].end = nodes[i].end; | ||
78 | ret++; | ||
79 | } | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) | ||
84 | { | ||
85 | unsigned long start = PFN_PHYS(start_pfn); | ||
86 | unsigned long end = PFN_PHYS(end_pfn); | ||
87 | unsigned numnodes; | ||
82 | unsigned long prevbase; | 88 | unsigned long prevbase; |
83 | struct bootnode nodes[8]; | 89 | int i, nb, found = 0; |
84 | int i, j, nb, found = 0; | ||
85 | u32 nodeid, reg; | 90 | u32 nodeid, reg; |
86 | 91 | ||
87 | if (!early_pci_allowed()) | 92 | if (!early_pci_allowed()) |
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
91 | if (nb < 0) | 96 | if (nb < 0) |
92 | return nb; | 97 | return nb; |
93 | 98 | ||
94 | printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); | 99 | pr_info("Scanning NUMA topology in Northbridge %d\n", nb); |
95 | 100 | ||
96 | reg = read_pci_config(0, nb, 0, 0x60); | 101 | reg = read_pci_config(0, nb, 0, 0x60); |
97 | numnodes = ((reg >> 4) & 0xF) + 1; | 102 | numnodes = ((reg >> 4) & 0xF) + 1; |
98 | if (numnodes <= 1) | 103 | if (numnodes <= 1) |
99 | return -1; | 104 | return -1; |
100 | 105 | ||
101 | printk(KERN_INFO "Number of nodes %d\n", numnodes); | 106 | pr_info("Number of physical nodes %d\n", numnodes); |
102 | 107 | ||
103 | memset(&nodes, 0, sizeof(nodes)); | ||
104 | prevbase = 0; | 108 | prevbase = 0; |
105 | for (i = 0; i < 8; i++) { | 109 | for (i = 0; i < 8; i++) { |
106 | unsigned long base, limit; | 110 | unsigned long base, limit; |
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
111 | nodeid = limit & 7; | 115 | nodeid = limit & 7; |
112 | if ((base & 3) == 0) { | 116 | if ((base & 3) == 0) { |
113 | if (i < numnodes) | 117 | if (i < numnodes) |
114 | printk("Skipping disabled node %d\n", i); | 118 | pr_info("Skipping disabled node %d\n", i); |
115 | continue; | 119 | continue; |
116 | } | 120 | } |
117 | if (nodeid >= numnodes) { | 121 | if (nodeid >= numnodes) { |
118 | printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, | 122 | pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, |
119 | base, limit); | 123 | base, limit); |
120 | continue; | 124 | continue; |
121 | } | 125 | } |
122 | 126 | ||
123 | if (!limit) { | 127 | if (!limit) { |
124 | printk(KERN_INFO "Skipping node entry %d (base %lx)\n", | 128 | pr_info("Skipping node entry %d (base %lx)\n", |
125 | i, base); | 129 | i, base); |
126 | continue; | 130 | continue; |
127 | } | 131 | } |
128 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 132 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
129 | printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", | 133 | pr_err("Node %d using interleaving mode %lx/%lx\n", |
130 | nodeid, (base>>8)&3, (limit>>8) & 3); | 134 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
131 | return -1; | 135 | return -1; |
132 | } | 136 | } |
133 | if (node_isset(nodeid, node_possible_map)) { | 137 | if (node_isset(nodeid, nodes_parsed)) { |
134 | printk(KERN_INFO "Node %d already present. Skipping\n", | 138 | pr_info("Node %d already present, skipping\n", |
135 | nodeid); | 139 | nodeid); |
136 | continue; | 140 | continue; |
137 | } | 141 | } |
138 | 142 | ||
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
141 | limit |= (1<<24)-1; | 145 | limit |= (1<<24)-1; |
142 | limit++; | 146 | limit++; |
143 | 147 | ||
144 | if (limit > max_pfn << PAGE_SHIFT) | 148 | if (limit > end) |
145 | limit = max_pfn << PAGE_SHIFT; | 149 | limit = end; |
146 | if (limit <= base) | 150 | if (limit <= base) |
147 | continue; | 151 | continue; |
148 | 152 | ||
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
154 | if (limit > end) | 158 | if (limit > end) |
155 | limit = end; | 159 | limit = end; |
156 | if (limit == base) { | 160 | if (limit == base) { |
157 | printk(KERN_ERR "Empty node %d\n", nodeid); | 161 | pr_err("Empty node %d\n", nodeid); |
158 | continue; | 162 | continue; |
159 | } | 163 | } |
160 | if (limit < base) { | 164 | if (limit < base) { |
161 | printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", | 165 | pr_err("Node %d bogus settings %lx-%lx.\n", |
162 | nodeid, base, limit); | 166 | nodeid, base, limit); |
163 | continue; | 167 | continue; |
164 | } | 168 | } |
165 | 169 | ||
166 | /* Could sort here, but pun for now. Should not happen anyroads. */ | 170 | /* Could sort here, but pun for now. Should not happen anyroads. */ |
167 | if (prevbase > base) { | 171 | if (prevbase > base) { |
168 | printk(KERN_ERR "Node map not sorted %lx,%lx\n", | 172 | pr_err("Node map not sorted %lx,%lx\n", |
169 | prevbase, base); | 173 | prevbase, base); |
170 | return -1; | 174 | return -1; |
171 | } | 175 | } |
172 | 176 | ||
173 | printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", | 177 | pr_info("Node %d MemBase %016lx Limit %016lx\n", |
174 | nodeid, base, limit); | 178 | nodeid, base, limit); |
175 | 179 | ||
176 | found++; | 180 | found++; |
177 | 181 | ||
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
180 | 184 | ||
181 | prevbase = base; | 185 | prevbase = base; |
182 | 186 | ||
183 | node_set(nodeid, node_possible_map); | 187 | node_set(nodeid, nodes_parsed); |
184 | } | 188 | } |
185 | 189 | ||
186 | if (!found) | 190 | if (!found) |
187 | return -1; | 191 | return -1; |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int __init k8_scan_nodes(void) | ||
196 | { | ||
197 | unsigned int bits; | ||
198 | unsigned int cores; | ||
199 | unsigned int apicid_base; | ||
200 | int i; | ||
188 | 201 | ||
202 | BUG_ON(nodes_empty(nodes_parsed)); | ||
203 | node_possible_map = nodes_parsed; | ||
189 | memnode_shift = compute_hash_shift(nodes, 8, NULL); | 204 | memnode_shift = compute_hash_shift(nodes, 8, NULL); |
190 | if (memnode_shift < 0) { | 205 | if (memnode_shift < 0) { |
191 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); | 206 | pr_err("No NUMA node hash function found. Contact maintainer\n"); |
192 | return -1; | 207 | return -1; |
193 | } | 208 | } |
194 | printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); | 209 | pr_info("Using node hash shift of %d\n", memnode_shift); |
195 | 210 | ||
196 | /* use the coreid bits from early_identify_cpu */ | 211 | /* use the coreid bits from early_identify_cpu */ |
197 | bits = boot_cpu_data.x86_coreid_bits; | 212 | bits = boot_cpu_data.x86_coreid_bits; |
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
200 | /* need to get boot_cpu_id early for system with apicid lifting */ | 215 | /* need to get boot_cpu_id early for system with apicid lifting */ |
201 | early_get_boot_cpu_id(); | 216 | early_get_boot_cpu_id(); |
202 | if (boot_cpu_physical_apicid > 0) { | 217 | if (boot_cpu_physical_apicid > 0) { |
203 | printk(KERN_INFO "BSP APIC ID: %02x\n", | 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); |
204 | boot_cpu_physical_apicid); | ||
205 | apicid_base = boot_cpu_physical_apicid; | 219 | apicid_base = boot_cpu_physical_apicid; |
206 | } | 220 | } |
207 | 221 | ||
208 | for (i = 0; i < 8; i++) { | 222 | for_each_node_mask(i, node_possible_map) { |
209 | if (nodes[i].start == nodes[i].end) | 223 | int j; |
210 | continue; | ||
211 | 224 | ||
212 | e820_register_active_regions(i, | 225 | e820_register_active_regions(i, |
213 | nodes[i].start >> PAGE_SHIFT, | 226 | nodes[i].start >> PAGE_SHIFT, |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d2530062fe00..b20760ca7244 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid) | |||
347 | (ulong) node_remap_end_vaddr[nid]); | 347 | (ulong) node_remap_end_vaddr[nid]); |
348 | } | 348 | } |
349 | 349 | ||
350 | void __init initmem_init(unsigned long start_pfn, | 350 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
351 | unsigned long end_pfn) | 351 | int acpi, int k8) |
352 | { | 352 | { |
353 | int nid; | 353 | int nid; |
354 | long kva_target_pfn; | 354 | long kva_target_pfn; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 459913beac71..83bbc70d11bb 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -239,8 +239,14 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 239 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 241 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) | 242 | if (nodedata_phys < start || nodedata_phys >= end) { |
243 | free_bootmem(nodedata_phys, pgdat_size); | 243 | /* |
244 | * only need to free it if it is from other node | ||
245 | * bootmem | ||
246 | */ | ||
247 | if (nid != nodeid) | ||
248 | free_bootmem(nodedata_phys, pgdat_size); | ||
249 | } | ||
244 | node_data[nodeid] = NULL; | 250 | node_data[nodeid] = NULL; |
245 | return; | 251 | return; |
246 | } | 252 | } |
@@ -306,8 +312,71 @@ void __init numa_init_array(void) | |||
306 | 312 | ||
307 | #ifdef CONFIG_NUMA_EMU | 313 | #ifdef CONFIG_NUMA_EMU |
308 | /* Numa emulation */ | 314 | /* Numa emulation */ |
315 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | ||
316 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; | ||
309 | static char *cmdline __initdata; | 317 | static char *cmdline __initdata; |
310 | 318 | ||
319 | static int __init setup_physnodes(unsigned long start, unsigned long end, | ||
320 | int acpi, int k8) | ||
321 | { | ||
322 | int nr_nodes = 0; | ||
323 | int ret = 0; | ||
324 | int i; | ||
325 | |||
326 | #ifdef CONFIG_ACPI_NUMA | ||
327 | if (acpi) | ||
328 | nr_nodes = acpi_get_nodes(physnodes); | ||
329 | #endif | ||
330 | #ifdef CONFIG_K8_NUMA | ||
331 | if (k8) | ||
332 | nr_nodes = k8_get_nodes(physnodes); | ||
333 | #endif | ||
334 | /* | ||
335 | * Basic sanity checking on the physical node map: there may be errors | ||
336 | * if the SRAT or K8 incorrectly reported the topology or the mem= | ||
337 | * kernel parameter is used. | ||
338 | */ | ||
339 | for (i = 0; i < nr_nodes; i++) { | ||
340 | if (physnodes[i].start == physnodes[i].end) | ||
341 | continue; | ||
342 | if (physnodes[i].start > end) { | ||
343 | physnodes[i].end = physnodes[i].start; | ||
344 | continue; | ||
345 | } | ||
346 | if (physnodes[i].end < start) { | ||
347 | physnodes[i].start = physnodes[i].end; | ||
348 | continue; | ||
349 | } | ||
350 | if (physnodes[i].start < start) | ||
351 | physnodes[i].start = start; | ||
352 | if (physnodes[i].end > end) | ||
353 | physnodes[i].end = end; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Remove all nodes that have no memory or were truncated because of the | ||
358 | * limited address range. | ||
359 | */ | ||
360 | for (i = 0; i < nr_nodes; i++) { | ||
361 | if (physnodes[i].start == physnodes[i].end) | ||
362 | continue; | ||
363 | physnodes[ret].start = physnodes[i].start; | ||
364 | physnodes[ret].end = physnodes[i].end; | ||
365 | ret++; | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * If no physical topology was detected, a single node is faked to cover | ||
370 | * the entire address space. | ||
371 | */ | ||
372 | if (!ret) { | ||
373 | physnodes[ret].start = start; | ||
374 | physnodes[ret].end = end; | ||
375 | ret = 1; | ||
376 | } | ||
377 | return ret; | ||
378 | } | ||
379 | |||
311 | /* | 380 | /* |
312 | * Setups up nid to range from addr to addr + size. If the end | 381 | * Setups up nid to range from addr to addr + size. If the end |
313 | * boundary is greater than max_addr, then max_addr is used instead. | 382 | * boundary is greater than max_addr, then max_addr is used instead. |
@@ -315,11 +384,9 @@ static char *cmdline __initdata; | |||
315 | * allocation past addr and -1 otherwise. addr is adjusted to be at | 384 | * allocation past addr and -1 otherwise. addr is adjusted to be at |
316 | * the end of the node. | 385 | * the end of the node. |
317 | */ | 386 | */ |
318 | static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | 387 | static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) |
319 | u64 size, u64 max_addr) | ||
320 | { | 388 | { |
321 | int ret = 0; | 389 | int ret = 0; |
322 | |||
323 | nodes[nid].start = *addr; | 390 | nodes[nid].start = *addr; |
324 | *addr += size; | 391 | *addr += size; |
325 | if (*addr >= max_addr) { | 392 | if (*addr >= max_addr) { |
@@ -335,12 +402,111 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | |||
335 | } | 402 | } |
336 | 403 | ||
337 | /* | 404 | /* |
405 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr | ||
406 | * to max_addr. The return value is the number of nodes allocated. | ||
407 | */ | ||
408 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, | ||
409 | int nr_phys_nodes, int nr_nodes) | ||
410 | { | ||
411 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
412 | u64 size; | ||
413 | int big; | ||
414 | int ret = 0; | ||
415 | int i; | ||
416 | |||
417 | if (nr_nodes <= 0) | ||
418 | return -1; | ||
419 | if (nr_nodes > MAX_NUMNODES) { | ||
420 | pr_info("numa=fake=%d too large, reducing to %d\n", | ||
421 | nr_nodes, MAX_NUMNODES); | ||
422 | nr_nodes = MAX_NUMNODES; | ||
423 | } | ||
424 | |||
425 | size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes; | ||
426 | /* | ||
427 | * Calculate the number of big nodes that can be allocated as a result | ||
428 | * of consolidating the remainder. | ||
429 | */ | ||
430 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / | ||
431 | FAKE_NODE_MIN_SIZE; | ||
432 | |||
433 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
434 | if (!size) { | ||
435 | pr_err("Not enough memory for each node. " | ||
436 | "NUMA emulation disabled.\n"); | ||
437 | return -1; | ||
438 | } | ||
439 | |||
440 | for (i = 0; i < nr_phys_nodes; i++) | ||
441 | if (physnodes[i].start != physnodes[i].end) | ||
442 | node_set(i, physnode_mask); | ||
443 | |||
444 | /* | ||
445 | * Continue to fill physical nodes with fake nodes until there is no | ||
446 | * memory left on any of them. | ||
447 | */ | ||
448 | while (nodes_weight(physnode_mask)) { | ||
449 | for_each_node_mask(i, physnode_mask) { | ||
450 | u64 end = physnodes[i].start + size; | ||
451 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); | ||
452 | |||
453 | if (ret < big) | ||
454 | end += FAKE_NODE_MIN_SIZE; | ||
455 | |||
456 | /* | ||
457 | * Continue to add memory to this fake node if its | ||
458 | * non-reserved memory is less than the per-node size. | ||
459 | */ | ||
460 | while (end - physnodes[i].start - | ||
461 | e820_hole_size(physnodes[i].start, end) < size) { | ||
462 | end += FAKE_NODE_MIN_SIZE; | ||
463 | if (end > physnodes[i].end) { | ||
464 | end = physnodes[i].end; | ||
465 | break; | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
471 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
472 | * this one must extend to the boundary. | ||
473 | */ | ||
474 | if (end < dma32_end && dma32_end - end - | ||
475 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
476 | end = dma32_end; | ||
477 | |||
478 | /* | ||
479 | * If there won't be enough non-reserved memory for the | ||
480 | * next node, this one must extend to the end of the | ||
481 | * physical node. | ||
482 | */ | ||
483 | if (physnodes[i].end - end - | ||
484 | e820_hole_size(end, physnodes[i].end) < size) | ||
485 | end = physnodes[i].end; | ||
486 | |||
487 | /* | ||
488 | * Avoid allocating more nodes than requested, which can | ||
489 | * happen as a result of rounding down each node's size | ||
490 | * to FAKE_NODE_MIN_SIZE. | ||
491 | */ | ||
492 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) | ||
493 | end = physnodes[i].end; | ||
494 | |||
495 | if (setup_node_range(ret++, &physnodes[i].start, | ||
496 | end - physnodes[i].start, | ||
497 | physnodes[i].end) < 0) | ||
498 | node_clear(i, physnode_mask); | ||
499 | } | ||
500 | } | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | /* | ||
338 | * Splits num_nodes nodes up equally starting at node_start. The return value | 505 | * Splits num_nodes nodes up equally starting at node_start. The return value |
339 | * is the number of nodes split up and addr is adjusted to be at the end of the | 506 | * is the number of nodes split up and addr is adjusted to be at the end of the |
340 | * last node allocated. | 507 | * last node allocated. |
341 | */ | 508 | */ |
342 | static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | 509 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, |
343 | u64 max_addr, int node_start, | ||
344 | int num_nodes) | 510 | int num_nodes) |
345 | { | 511 | { |
346 | unsigned int big; | 512 | unsigned int big; |
@@ -388,7 +554,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
388 | break; | 554 | break; |
389 | } | 555 | } |
390 | } | 556 | } |
391 | if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) | 557 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) |
392 | break; | 558 | break; |
393 | } | 559 | } |
394 | return i - node_start + 1; | 560 | return i - node_start + 1; |
@@ -399,12 +565,12 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
399 | * always assigned to a final node and can be asymmetric. Returns the number of | 565 | * always assigned to a final node and can be asymmetric. Returns the number of |
400 | * nodes split. | 566 | * nodes split. |
401 | */ | 567 | */ |
402 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | 568 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, |
403 | u64 max_addr, int node_start, u64 size) | 569 | u64 size) |
404 | { | 570 | { |
405 | int i = node_start; | 571 | int i = node_start; |
406 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | 572 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; |
407 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | 573 | while (!setup_node_range(i++, addr, size, max_addr)) |
408 | ; | 574 | ; |
409 | return i - node_start; | 575 | return i - node_start; |
410 | } | 576 | } |
@@ -413,15 +579,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | |||
413 | * Sets up the system RAM area from start_pfn to last_pfn according to the | 579 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
414 | * numa=fake command-line option. | 580 | * numa=fake command-line option. |
415 | */ | 581 | */ |
416 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 582 | static int __init numa_emulation(unsigned long start_pfn, |
417 | 583 | unsigned long last_pfn, int acpi, int k8) | |
418 | static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) | ||
419 | { | 584 | { |
420 | u64 size, addr = start_pfn << PAGE_SHIFT; | 585 | u64 size, addr = start_pfn << PAGE_SHIFT; |
421 | u64 max_addr = last_pfn << PAGE_SHIFT; | 586 | u64 max_addr = last_pfn << PAGE_SHIFT; |
422 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | 587 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; |
588 | int num_phys_nodes; | ||
423 | 589 | ||
424 | memset(&nodes, 0, sizeof(nodes)); | 590 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
425 | /* | 591 | /* |
426 | * If the numa=fake command-line is just a single number N, split the | 592 | * If the numa=fake command-line is just a single number N, split the |
427 | * system RAM into N fake nodes. | 593 | * system RAM into N fake nodes. |
@@ -429,7 +595,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
429 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | 595 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { |
430 | long n = simple_strtol(cmdline, NULL, 0); | 596 | long n = simple_strtol(cmdline, NULL, 0); |
431 | 597 | ||
432 | num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); | 598 | num_nodes = split_nodes_interleave(addr, max_addr, |
599 | num_phys_nodes, n); | ||
433 | if (num_nodes < 0) | 600 | if (num_nodes < 0) |
434 | return num_nodes; | 601 | return num_nodes; |
435 | goto out; | 602 | goto out; |
@@ -456,8 +623,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
456 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | 623 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; |
457 | if (size) | 624 | if (size) |
458 | for (i = 0; i < coeff; i++, num_nodes++) | 625 | for (i = 0; i < coeff; i++, num_nodes++) |
459 | if (setup_node_range(num_nodes, nodes, | 626 | if (setup_node_range(num_nodes, &addr, |
460 | &addr, size, max_addr) < 0) | 627 | size, max_addr) < 0) |
461 | goto done; | 628 | goto done; |
462 | if (!*cmdline) | 629 | if (!*cmdline) |
463 | break; | 630 | break; |
@@ -473,7 +640,7 @@ done: | |||
473 | if (addr < max_addr) { | 640 | if (addr < max_addr) { |
474 | if (coeff_flag && coeff < 0) { | 641 | if (coeff_flag && coeff < 0) { |
475 | /* Split remaining nodes into num-sized chunks */ | 642 | /* Split remaining nodes into num-sized chunks */ |
476 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | 643 | num_nodes += split_nodes_by_size(&addr, max_addr, |
477 | num_nodes, num); | 644 | num_nodes, num); |
478 | goto out; | 645 | goto out; |
479 | } | 646 | } |
@@ -482,7 +649,7 @@ done: | |||
482 | /* Split remaining nodes into coeff chunks */ | 649 | /* Split remaining nodes into coeff chunks */ |
483 | if (coeff <= 0) | 650 | if (coeff <= 0) |
484 | break; | 651 | break; |
485 | num_nodes += split_nodes_equally(nodes, &addr, max_addr, | 652 | num_nodes += split_nodes_equally(&addr, max_addr, |
486 | num_nodes, coeff); | 653 | num_nodes, coeff); |
487 | break; | 654 | break; |
488 | case ',': | 655 | case ',': |
@@ -490,8 +657,8 @@ done: | |||
490 | break; | 657 | break; |
491 | default: | 658 | default: |
492 | /* Give one final node */ | 659 | /* Give one final node */ |
493 | setup_node_range(num_nodes, nodes, &addr, | 660 | setup_node_range(num_nodes, &addr, max_addr - addr, |
494 | max_addr - addr, max_addr); | 661 | max_addr); |
495 | num_nodes++; | 662 | num_nodes++; |
496 | } | 663 | } |
497 | } | 664 | } |
@@ -505,14 +672,10 @@ out: | |||
505 | } | 672 | } |
506 | 673 | ||
507 | /* | 674 | /* |
508 | * We need to vacate all active ranges that may have been registered by | 675 | * We need to vacate all active ranges that may have been registered for |
509 | * SRAT and set acpi_numa to -1 so that srat_disabled() always returns | 676 | * the e820 memory map. |
510 | * true. NUMA emulation has succeeded so we will not scan ACPI nodes. | ||
511 | */ | 677 | */ |
512 | remove_all_active_ranges(); | 678 | remove_all_active_ranges(); |
513 | #ifdef CONFIG_ACPI_NUMA | ||
514 | acpi_numa = -1; | ||
515 | #endif | ||
516 | for_each_node_mask(i, node_possible_map) { | 679 | for_each_node_mask(i, node_possible_map) { |
517 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 680 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
518 | nodes[i].end >> PAGE_SHIFT); | 681 | nodes[i].end >> PAGE_SHIFT); |
@@ -524,7 +687,8 @@ out: | |||
524 | } | 687 | } |
525 | #endif /* CONFIG_NUMA_EMU */ | 688 | #endif /* CONFIG_NUMA_EMU */ |
526 | 689 | ||
527 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | 690 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, |
691 | int acpi, int k8) | ||
528 | { | 692 | { |
529 | int i; | 693 | int i; |
530 | 694 | ||
@@ -532,23 +696,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | |||
532 | nodes_clear(node_online_map); | 696 | nodes_clear(node_online_map); |
533 | 697 | ||
534 | #ifdef CONFIG_NUMA_EMU | 698 | #ifdef CONFIG_NUMA_EMU |
535 | if (cmdline && !numa_emulation(start_pfn, last_pfn)) | 699 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) |
536 | return; | 700 | return; |
537 | nodes_clear(node_possible_map); | 701 | nodes_clear(node_possible_map); |
538 | nodes_clear(node_online_map); | 702 | nodes_clear(node_online_map); |
539 | #endif | 703 | #endif |
540 | 704 | ||
541 | #ifdef CONFIG_ACPI_NUMA | 705 | #ifdef CONFIG_ACPI_NUMA |
542 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 706 | if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
543 | last_pfn << PAGE_SHIFT)) | 707 | last_pfn << PAGE_SHIFT)) |
544 | return; | 708 | return; |
545 | nodes_clear(node_possible_map); | 709 | nodes_clear(node_possible_map); |
546 | nodes_clear(node_online_map); | 710 | nodes_clear(node_online_map); |
547 | #endif | 711 | #endif |
548 | 712 | ||
549 | #ifdef CONFIG_K8_NUMA | 713 | #ifdef CONFIG_K8_NUMA |
550 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, | 714 | if (!numa_off && k8 && !k8_scan_nodes()) |
551 | last_pfn<<PAGE_SHIFT)) | ||
552 | return; | 715 | return; |
553 | nodes_clear(node_possible_map); | 716 | nodes_clear(node_possible_map); |
554 | nodes_clear(node_online_map); | 717 | nodes_clear(node_online_map); |
@@ -601,6 +764,25 @@ static __init int numa_setup(char *opt) | |||
601 | early_param("numa", numa_setup); | 764 | early_param("numa", numa_setup); |
602 | 765 | ||
603 | #ifdef CONFIG_NUMA | 766 | #ifdef CONFIG_NUMA |
767 | |||
768 | static __init int find_near_online_node(int node) | ||
769 | { | ||
770 | int n, val; | ||
771 | int min_val = INT_MAX; | ||
772 | int best_node = -1; | ||
773 | |||
774 | for_each_online_node(n) { | ||
775 | val = node_distance(node, n); | ||
776 | |||
777 | if (val < min_val) { | ||
778 | min_val = val; | ||
779 | best_node = n; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | return best_node; | ||
784 | } | ||
785 | |||
604 | /* | 786 | /* |
605 | * Setup early cpu_to_node. | 787 | * Setup early cpu_to_node. |
606 | * | 788 | * |
@@ -632,7 +814,7 @@ void __init init_cpu_to_node(void) | |||
632 | if (node == NUMA_NO_NODE) | 814 | if (node == NUMA_NO_NODE) |
633 | continue; | 815 | continue; |
634 | if (!node_online(node)) | 816 | if (!node_online(node)) |
635 | continue; | 817 | node = find_near_online_node(node); |
636 | numa_set_node(cpu, node); | 818 | numa_set_node(cpu, node); |
637 | } | 819 | } |
638 | } | 820 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dd38bfbefd1f..1d4eb93d333c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
280 | pgprot_val(forbidden) |= _PAGE_RW; | 280 | pgprot_val(forbidden) |= _PAGE_RW; |
281 | 281 | ||
282 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
283 | /* | ||
284 | * Once the kernel maps the text as RO (kernel_set_to_readonly is set), | ||
285 | * kernel text mappings for the large page aligned text, rodata sections | ||
286 | * will be always read-only. For the kernel identity mappings covering | ||
287 | * the holes caused by this alignment can be anything that user asks. | ||
288 | * | ||
289 | * This will preserve the large page mappings for kernel text/data | ||
290 | * at no extra cost. | ||
291 | */ | ||
292 | if (kernel_set_to_readonly && | ||
293 | within(address, (unsigned long)_text, | ||
294 | (unsigned long)__end_rodata_hpage_align)) | ||
295 | pgprot_val(forbidden) |= _PAGE_RW; | ||
296 | #endif | ||
297 | |||
282 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 298 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
283 | 299 | ||
284 | return prot; | 300 | return prot; |
@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb); | |||
1069 | 1085 | ||
1070 | int set_memory_x(unsigned long addr, int numpages) | 1086 | int set_memory_x(unsigned long addr, int numpages) |
1071 | { | 1087 | { |
1088 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1089 | return 0; | ||
1090 | |||
1072 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1091 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1073 | } | 1092 | } |
1074 | EXPORT_SYMBOL(set_memory_x); | 1093 | EXPORT_SYMBOL(set_memory_x); |
1075 | 1094 | ||
1076 | int set_memory_nx(unsigned long addr, int numpages) | 1095 | int set_memory_nx(unsigned long addr, int numpages) |
1077 | { | 1096 | { |
1097 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1098 | return 0; | ||
1099 | |||
1078 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1100 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1079 | } | 1101 | } |
1080 | EXPORT_SYMBOL(set_memory_nx); | 1102 | EXPORT_SYMBOL(set_memory_nx); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e78cd0ec2bcf..a81b7e73275d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
21 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
22 | #include <asm/tlbflush.h> | 22 | #include <asm/tlbflush.h> |
23 | #include <asm/x86_init.h> | ||
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
24 | #include <asm/fcntl.h> | 25 | #include <asm/fcntl.h> |
25 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
@@ -388,7 +389,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
388 | } | 389 | } |
389 | 390 | ||
390 | /* Low ISA region is always mapped WB in page table. No need to track */ | 391 | /* Low ISA region is always mapped WB in page table. No need to track */ |
391 | if (is_ISA_range(start, end - 1)) { | 392 | if (x86_platform.is_untracked_pat_range(start, end)) { |
392 | if (new_type) | 393 | if (new_type) |
393 | *new_type = _PAGE_CACHE_WB; | 394 | *new_type = _PAGE_CACHE_WB; |
394 | return 0; | 395 | return 0; |
@@ -499,7 +500,7 @@ int free_memtype(u64 start, u64 end) | |||
499 | return 0; | 500 | return 0; |
500 | 501 | ||
501 | /* Low ISA region is always mapped WB. No need to track */ | 502 | /* Low ISA region is always mapped WB. No need to track */ |
502 | if (is_ISA_range(start, end - 1)) | 503 | if (x86_platform.is_untracked_pat_range(start, end)) |
503 | return 0; | 504 | return 0; |
504 | 505 | ||
505 | is_range_ram = pat_pagerange_is_ram(start, end); | 506 | is_range_ram = pat_pagerange_is_ram(start, end); |
@@ -582,7 +583,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
582 | int rettype = _PAGE_CACHE_WB; | 583 | int rettype = _PAGE_CACHE_WB; |
583 | struct memtype *entry; | 584 | struct memtype *entry; |
584 | 585 | ||
585 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | 586 | if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) |
586 | return rettype; | 587 | return rettype; |
587 | 588 | ||
588 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 589 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
@@ -1018,8 +1019,10 @@ static const struct file_operations memtype_fops = { | |||
1018 | 1019 | ||
1019 | static int __init pat_memtype_list_init(void) | 1020 | static int __init pat_memtype_list_init(void) |
1020 | { | 1021 | { |
1021 | debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, | 1022 | if (pat_enabled) { |
1022 | NULL, &memtype_fops); | 1023 | debugfs_create_file("pat_memtype_list", S_IRUSR, |
1024 | arch_debugfs_dir, NULL, &memtype_fops); | ||
1025 | } | ||
1023 | return 0; | 1026 | return 0; |
1024 | } | 1027 | } |
1025 | 1028 | ||
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 513d8ed5d2ec..a3250aa34086 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -3,10 +3,8 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | 4 | ||
5 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
6 | #include <asm/proto.h> | ||
6 | 7 | ||
7 | int nx_enabled; | ||
8 | |||
9 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
10 | static int disable_nx __cpuinitdata; | 8 | static int disable_nx __cpuinitdata; |
11 | 9 | ||
12 | /* | 10 | /* |
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str) | |||
22 | if (!str) | 20 | if (!str) |
23 | return -EINVAL; | 21 | return -EINVAL; |
24 | if (!strncmp(str, "on", 2)) { | 22 | if (!strncmp(str, "on", 2)) { |
25 | __supported_pte_mask |= _PAGE_NX; | ||
26 | disable_nx = 0; | 23 | disable_nx = 0; |
27 | } else if (!strncmp(str, "off", 3)) { | 24 | } else if (!strncmp(str, "off", 3)) { |
28 | disable_nx = 1; | 25 | disable_nx = 1; |
29 | __supported_pte_mask &= ~_PAGE_NX; | ||
30 | } | 26 | } |
27 | x86_configure_nx(); | ||
31 | return 0; | 28 | return 0; |
32 | } | 29 | } |
33 | early_param("noexec", noexec_setup); | 30 | early_param("noexec", noexec_setup); |
34 | #endif | ||
35 | 31 | ||
36 | #ifdef CONFIG_X86_PAE | 32 | void __cpuinit x86_configure_nx(void) |
37 | void __init set_nx(void) | ||
38 | { | 33 | { |
39 | unsigned int v[4], l, h; | 34 | if (cpu_has_nx && !disable_nx) |
40 | 35 | __supported_pte_mask |= _PAGE_NX; | |
41 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | 36 | else |
42 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | 37 | __supported_pte_mask &= ~_PAGE_NX; |
38 | } | ||
43 | 39 | ||
44 | if ((v[3] & (1 << 20)) && !disable_nx) { | 40 | void __init x86_report_nx(void) |
45 | rdmsr(MSR_EFER, l, h); | 41 | { |
46 | l |= EFER_NX; | 42 | if (!cpu_has_nx) { |
47 | wrmsr(MSR_EFER, l, h); | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
48 | nx_enabled = 1; | 44 | "missing in CPU or disabled in BIOS!\n"); |
49 | __supported_pte_mask |= _PAGE_NX; | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
47 | if (disable_nx) { | ||
48 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
49 | "disabled by kernel command line option\n"); | ||
50 | } else { | ||
51 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
52 | "active\n"); | ||
50 | } | 53 | } |
51 | } | ||
52 | } | ||
53 | #else | 54 | #else |
54 | void set_nx(void) | 55 | /* 32bit non-PAE kernel, NX cannot be used */ |
55 | { | 56 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
56 | } | 57 | "cannot be enabled: non-PAE kernel!\n"); |
57 | #endif | 58 | #endif |
58 | 59 | } | |
59 | #ifdef CONFIG_X86_64 | ||
60 | void __cpuinit check_efer(void) | ||
61 | { | ||
62 | unsigned long efer; | ||
63 | |||
64 | rdmsrl(MSR_EFER, efer); | ||
65 | if (!(efer & EFER_NX) || disable_nx) | ||
66 | __supported_pte_mask &= ~_PAGE_NX; | ||
67 | } | 60 | } |
68 | #endif | ||
69 | |||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 9d7ce96e5a5c..d89075489664 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
290 | 290 | ||
291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
292 | start, end); | 292 | start, end); |
293 | e820_register_active_regions(node, start >> PAGE_SHIFT, | ||
294 | end >> PAGE_SHIFT); | ||
295 | 293 | ||
296 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
297 | update_nodes_add(node, start, end); | 295 | update_nodes_add(node, start, end); |
@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
338 | 336 | ||
339 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
340 | 338 | ||
339 | int __init acpi_get_nodes(struct bootnode *physnodes) | ||
340 | { | ||
341 | int i; | ||
342 | int ret = 0; | ||
343 | |||
344 | for_each_node_mask(i, nodes_parsed) { | ||
345 | physnodes[ret].start = nodes[i].start; | ||
346 | physnodes[ret].end = nodes[i].end; | ||
347 | ret++; | ||
348 | } | ||
349 | return ret; | ||
350 | } | ||
351 | |||
341 | /* Use the information discovered above to actually set up the nodes. */ | 352 | /* Use the information discovered above to actually set up the nodes. */ |
342 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | 353 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) |
343 | { | 354 | { |
@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
350 | for (i = 0; i < MAX_NUMNODES; i++) | 361 | for (i = 0; i < MAX_NUMNODES; i++) |
351 | cutoff_node(i, start, end); | 362 | cutoff_node(i, start, end); |
352 | 363 | ||
353 | if (!nodes_cover_memory(nodes)) { | ||
354 | bad_srat(); | ||
355 | return -1; | ||
356 | } | ||
357 | |||
358 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 364 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
359 | memblk_nodeid); | 365 | memblk_nodeid); |
360 | if (memnode_shift < 0) { | 366 | if (memnode_shift < 0) { |
@@ -364,6 +370,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
364 | return -1; | 370 | return -1; |
365 | } | 371 | } |
366 | 372 | ||
373 | for_each_node_mask(i, nodes_parsed) | ||
374 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | ||
375 | nodes[i].end >> PAGE_SHIFT); | ||
376 | if (!nodes_cover_memory(nodes)) { | ||
377 | bad_srat(); | ||
378 | return -1; | ||
379 | } | ||
380 | |||
367 | /* Account for nodes with cpus and no memory */ | 381 | /* Account for nodes with cpus and no memory */ |
368 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); | 382 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); |
369 | 383 | ||
@@ -454,7 +468,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
454 | for (i = 0; i < num_nodes; i++) | 468 | for (i = 0; i < num_nodes; i++) |
455 | if (fake_nodes[i].start != fake_nodes[i].end) | 469 | if (fake_nodes[i].start != fake_nodes[i].end) |
456 | node_set(i, nodes_parsed); | 470 | node_set(i, nodes_parsed); |
457 | WARN_ON(!nodes_cover_memory(fake_nodes)); | ||
458 | } | 471 | } |
459 | 472 | ||
460 | static int null_slit_node_compare(int a, int b) | 473 | static int null_slit_node_compare(int a, int b) |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 36fe08eeb5c3..65b58e4b0b8b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <asm/tlbflush.h> | 9 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 10 | #include <asm/mmu_context.h> |
11 | #include <asm/cache.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/uv/uv.h> | 13 | #include <asm/uv/uv.h> |
13 | 14 | ||
@@ -43,7 +44,7 @@ union smp_flush_state { | |||
43 | spinlock_t tlbstate_lock; | 44 | spinlock_t tlbstate_lock; |
44 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
45 | }; | 46 | }; |
46 | char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; | 47 | char pad[INTERNODE_CACHE_BYTES]; |
47 | } ____cacheline_internodealigned_in_smp; | 48 | } ____cacheline_internodealigned_in_smp; |
48 | 49 | ||
49 | /* State is put into the per CPU data section, but padded | 50 | /* State is put into the per CPU data section, but padded |