diff options
author | Tejun Heo <tj@kernel.org> | 2011-05-02 08:08:43 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-05-02 08:08:47 -0400 |
commit | aff364860aa105b2deacc6f21ec8ef524460e3fc (patch) | |
tree | 18409ebe16b25b141598da9b6386d69416c06afa /arch/x86 | |
parent | c7a7b814c9dca9ee01b38e63b4a46de87156d3b6 (diff) | |
parent | 993ba1585cbb03fab012e41d1a5d24330a283b31 (diff) |
Merge branch 'x86/numa' into x86-mm
Merge reason: Pick up x86-32 remap allocator cleanup changes - 14
commits, 3fe14ab541^..993ba1585c.
3fe14ab541: x86-32, numa: Fix failure condition check in alloc_remap()
993ba1585c: x86-32, numa: Update remap allocator comments
Scheduled NUMA init 32/64bit unification changes depend on them.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/apic.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/dma.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/topology.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/amd_nb.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/apic/numaq_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/microcode_core.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 268 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 1 | ||||
-rw-r--r-- | arch/x86/oprofile/nmi_int.c | 5 | ||||
-rw-r--r-- | arch/x86/oprofile/op_counter.h | 1 | ||||
-rw-r--r-- | arch/x86/platform/uv/tlb_uv.c | 1 |
17 files changed, 149 insertions, 172 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a279d98ea95..2b7d573be54 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _ASM_X86_APIC_H | 2 | #define _ASM_X86_APIC_H |
3 | 3 | ||
4 | #include <linux/cpumask.h> | 4 | #include <linux/cpumask.h> |
5 | #include <linux/delay.h> | ||
6 | #include <linux/pm.h> | 5 | #include <linux/pm.h> |
7 | 6 | ||
8 | #include <asm/alternative.h> | 7 | #include <asm/alternative.h> |
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 97b6d8114a4..057099e5fab 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h | |||
@@ -10,7 +10,6 @@ | |||
10 | 10 | ||
11 | #include <linux/spinlock.h> /* And spinlocks */ | 11 | #include <linux/spinlock.h> /* And spinlocks */ |
12 | #include <asm/io.h> /* need byte IO */ | 12 | #include <asm/io.h> /* need byte IO */ |
13 | #include <linux/delay.h> | ||
14 | 13 | ||
15 | #ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER | 14 | #ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER |
16 | #define dma_outb outb_p | 15 | #define dma_outb outb_p |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 910a7084f7f..8dba76972fd 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -95,7 +95,6 @@ extern void setup_node_to_cpumask_map(void); | |||
95 | #ifdef CONFIG_X86_32 | 95 | #ifdef CONFIG_X86_32 |
96 | extern unsigned long node_start_pfn[]; | 96 | extern unsigned long node_start_pfn[]; |
97 | extern unsigned long node_end_pfn[]; | 97 | extern unsigned long node_end_pfn[]; |
98 | extern unsigned long node_remap_size[]; | ||
99 | #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) | 98 | #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) |
100 | 99 | ||
101 | # define SD_CACHE_NICE_TRIES 1 | 100 | # define SD_CACHE_NICE_TRIES 1 |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6801959a8b2..4c39baa8fac 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -21,7 +21,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
21 | EXPORT_SYMBOL(amd_nb_misc_ids); | 21 | EXPORT_SYMBOL(amd_nb_misc_ids); |
22 | 22 | ||
23 | static struct pci_device_id amd_nb_link_ids[] = { | 23 | static struct pci_device_id amd_nb_link_ids[] = { |
24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) }, | 24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, |
25 | {} | 25 | {} |
26 | }; | 26 | }; |
27 | 27 | ||
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index c4e557a1ebb..5260fe91bcb 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kprobes.h> | 16 | #include <linux/kprobes.h> |
17 | #include <linux/nmi.h> | 17 | #include <linux/nmi.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/delay.h> | ||
19 | 20 | ||
20 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 21 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
21 | u64 hw_nmi_get_sample_period(void) | 22 | u64 hw_nmi_get_sample_period(void) |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 6273eee5134..0aced70815f 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -93,10 +93,6 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd) | |||
93 | node_end_pfn[node]); | 93 | node_end_pfn[node]); |
94 | 94 | ||
95 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); | 95 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); |
96 | |||
97 | node_remap_size[node] = node_memmap_size_bytes(node, | ||
98 | node_start_pfn[node], | ||
99 | node_end_pfn[node]); | ||
100 | } | 96 | } |
101 | 97 | ||
102 | /* | 98 | /* |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 3c289281394..33b10a0fc09 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -23,6 +23,8 @@ | |||
23 | #include <linux/io.h> | 23 | #include <linux/io.h> |
24 | #include <linux/pci.h> | 24 | #include <linux/pci.h> |
25 | #include <linux/kdebug.h> | 25 | #include <linux/kdebug.h> |
26 | #include <linux/delay.h> | ||
27 | #include <linux/crash_dump.h> | ||
26 | 28 | ||
27 | #include <asm/uv/uv_mmrs.h> | 29 | #include <asm/uv/uv_mmrs.h> |
28 | #include <asm/uv/uv_hub.h> | 30 | #include <asm/uv/uv_hub.h> |
@@ -34,6 +36,7 @@ | |||
34 | #include <asm/ipi.h> | 36 | #include <asm/ipi.h> |
35 | #include <asm/smp.h> | 37 | #include <asm/smp.h> |
36 | #include <asm/x86_init.h> | 38 | #include <asm/x86_init.h> |
39 | #include <asm/emergency-restart.h> | ||
37 | 40 | ||
38 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 41 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
39 | 42 | ||
@@ -810,4 +813,11 @@ void __init uv_system_init(void) | |||
810 | 813 | ||
811 | /* register Legacy VGA I/O redirection handler */ | 814 | /* register Legacy VGA I/O redirection handler */ |
812 | pci_register_set_vga_state(uv_set_vga_state); | 815 | pci_register_set_vga_state(uv_set_vga_state); |
816 | |||
817 | /* | ||
818 | * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as | ||
819 | * EFI is not enabled in the kdump kernel. | ||
820 | */ | ||
821 | if (is_kdump_kernel()) | ||
822 | reboot_type = BOOT_ACPI; | ||
813 | } | 823 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a05ef63eb4..3385ea26f68 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -1626,7 +1626,7 @@ out: | |||
1626 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1626 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
1627 | { | 1627 | { |
1628 | poll_wait(file, &mce_wait, wait); | 1628 | poll_wait(file, &mce_wait, wait); |
1629 | if (rcu_dereference_check_mce(mcelog.next)) | 1629 | if (rcu_access_index(mcelog.next)) |
1630 | return POLLIN | POLLRDNORM; | 1630 | return POLLIN | POLLRDNORM; |
1631 | if (!mce_apei_read_done && apei_check_mce()) | 1631 | if (!mce_apei_read_done && apei_check_mce()) |
1632 | return POLLIN | POLLRDNORM; | 1632 | return POLLIN | POLLRDNORM; |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 307dfbbf4a8..929739a653d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -293,14 +293,24 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
293 | 293 | ||
294 | /* | 294 | /* |
295 | * HACK! | 295 | * HACK! |
296 | * We use this same function to initialize the mtrrs on boot. | 296 | * |
297 | * The state of the boot cpu's mtrrs has been saved, and we want | 297 | * We use this same function to initialize the mtrrs during boot, |
298 | * to replicate across all the APs. | 298 | * resume, runtime cpu online and on an explicit request to set a |
299 | * If we're doing that @reg is set to something special... | 299 | * specific MTRR. |
300 | * | ||
301 | * During boot or suspend, the state of the boot cpu's mtrrs has been | ||
302 | * saved, and we want to replicate that across all the cpus that come | ||
303 | * online (either at the end of boot or resume or during a runtime cpu | ||
304 | * online). If we're doing that, @reg is set to something special and on | ||
305 | * this cpu we still do mtrr_if->set_all(). During boot/resume, this | ||
306 | * is unnecessary if at this point we are still on the cpu that started | ||
307 | * the boot/resume sequence. But there is no guarantee that we are still | ||
308 | * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be | ||
309 | * sure that we are in sync with everyone else. | ||
300 | */ | 310 | */ |
301 | if (reg != ~0U) | 311 | if (reg != ~0U) |
302 | mtrr_if->set(reg, base, size, type); | 312 | mtrr_if->set(reg, base, size, type); |
303 | else if (!mtrr_aps_delayed_init) | 313 | else |
304 | mtrr_if->set_all(); | 314 | mtrr_if->set_all(); |
305 | 315 | ||
306 | /* Wait for the others */ | 316 | /* Wait for the others */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 948a31eae75..1cb0b9fc78d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
9 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
10 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
11 | #include <linux/delay.h> | ||
11 | 12 | ||
12 | #include <asm/apic.h> | 13 | #include <asm/apic.h> |
13 | #include <asm/io_apic.h> | 14 | #include <asm/io_apic.h> |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 5ed0ab549eb..f9242800bc8 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -550,6 +550,7 @@ static void __exit microcode_exit(void) | |||
550 | microcode_dev_exit(); | 550 | microcode_dev_exit(); |
551 | 551 | ||
552 | unregister_hotcpu_notifier(&mc_cpu_notifier); | 552 | unregister_hotcpu_notifier(&mc_cpu_notifier); |
553 | unregister_syscore_ops(&mc_syscore_ops); | ||
553 | 554 | ||
554 | get_online_cpus(); | 555 | get_online_cpus(); |
555 | mutex_lock(µcode_mutex); | 556 | mutex_lock(µcode_mutex); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d3ce37edb54..08c44b08bf5 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/tboot.h> | 8 | #include <linux/tboot.h> |
9 | #include <linux/delay.h> | ||
9 | #include <acpi/reboot.h> | 10 | #include <acpi/reboot.h> |
10 | #include <asm/io.h> | 11 | #include <asm/io.h> |
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index bde3906420d..c757c0a3b52 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -104,13 +104,9 @@ extern unsigned long highend_pfn, highstart_pfn; | |||
104 | 104 | ||
105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
106 | 106 | ||
107 | unsigned long node_remap_size[MAX_NUMNODES]; | ||
108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | 107 | static void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 108 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 109 | ||
111 | static unsigned long kva_start_pfn; | ||
112 | static unsigned long kva_pages; | ||
113 | |||
114 | int __cpuinit numa_cpu_node(int cpu) | 110 | int __cpuinit numa_cpu_node(int cpu) |
115 | { | 111 | { |
116 | return apic->x86_32_numa_cpu_node(cpu); | 112 | return apic->x86_32_numa_cpu_node(cpu); |
@@ -129,7 +125,6 @@ int __init get_memcfg_numa_flat(void) | |||
129 | node_end_pfn[0] = max_pfn; | 125 | node_end_pfn[0] = max_pfn; |
130 | memblock_x86_register_active_regions(0, 0, max_pfn); | 126 | memblock_x86_register_active_regions(0, 0, max_pfn); |
131 | memory_present(0, 0, max_pfn); | 127 | memory_present(0, 0, max_pfn); |
132 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | ||
133 | 128 | ||
134 | /* Indicate there is one node available. */ | 129 | /* Indicate there is one node available. */ |
135 | nodes_clear(node_online_map); | 130 | nodes_clear(node_online_map); |
@@ -164,9 +159,8 @@ static void __init allocate_pgdat(int nid) | |||
164 | { | 159 | { |
165 | char buf[16]; | 160 | char buf[16]; |
166 | 161 | ||
167 | if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) | 162 | NODE_DATA(nid) = alloc_remap(nid, ALIGN(sizeof(pg_data_t), PAGE_SIZE)); |
168 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | 163 | if (!NODE_DATA(nid)) { |
169 | else { | ||
170 | unsigned long pgdat_phys; | 164 | unsigned long pgdat_phys; |
171 | pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, | 165 | pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, |
172 | max_pfn_mapped<<PAGE_SHIFT, | 166 | max_pfn_mapped<<PAGE_SHIFT, |
@@ -182,25 +176,38 @@ static void __init allocate_pgdat(int nid) | |||
182 | } | 176 | } |
183 | 177 | ||
184 | /* | 178 | /* |
185 | * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel | 179 | * Remap memory allocator |
186 | * virtual address space (KVA) is reserved and portions of nodes are mapped | ||
187 | * using it. This is to allow node-local memory to be allocated for | ||
188 | * structures that would normally require ZONE_NORMAL. The memory is | ||
189 | * allocated with alloc_remap() and callers should be prepared to allocate | ||
190 | * from the bootmem allocator instead. | ||
191 | */ | 180 | */ |
192 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | 181 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; |
193 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | 182 | static void *node_remap_end_vaddr[MAX_NUMNODES]; |
194 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | 183 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; |
195 | static unsigned long node_remap_offset[MAX_NUMNODES]; | ||
196 | 184 | ||
185 | /** | ||
186 | * alloc_remap - Allocate remapped memory | ||
187 | * @nid: NUMA node to allocate memory from | ||
188 | * @size: The size of allocation | ||
189 | * | ||
190 | * Allocate @size bytes from the remap area of NUMA node @nid. The | ||
191 | * size of the remap area is predetermined by init_alloc_remap() and | ||
192 | * only the callers considered there should call this function. For | ||
193 | * more info, please read the comment on top of init_alloc_remap(). | ||
194 | * | ||
195 | * The caller must be ready to handle allocation failure from this | ||
196 | * function and fall back to regular memory allocator in such cases. | ||
197 | * | ||
198 | * CONTEXT: | ||
199 | * Single CPU early boot context. | ||
200 | * | ||
201 | * RETURNS: | ||
202 | * Pointer to the allocated memory on success, %NULL on failure. | ||
203 | */ | ||
197 | void *alloc_remap(int nid, unsigned long size) | 204 | void *alloc_remap(int nid, unsigned long size) |
198 | { | 205 | { |
199 | void *allocation = node_remap_alloc_vaddr[nid]; | 206 | void *allocation = node_remap_alloc_vaddr[nid]; |
200 | 207 | ||
201 | size = ALIGN(size, L1_CACHE_BYTES); | 208 | size = ALIGN(size, L1_CACHE_BYTES); |
202 | 209 | ||
203 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) | 210 | if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) |
204 | return NULL; | 211 | return NULL; |
205 | 212 | ||
206 | node_remap_alloc_vaddr[nid] += size; | 213 | node_remap_alloc_vaddr[nid] += size; |
@@ -209,26 +216,6 @@ void *alloc_remap(int nid, unsigned long size) | |||
209 | return allocation; | 216 | return allocation; |
210 | } | 217 | } |
211 | 218 | ||
212 | static void __init remap_numa_kva(void) | ||
213 | { | ||
214 | void *vaddr; | ||
215 | unsigned long pfn; | ||
216 | int node; | ||
217 | |||
218 | for_each_online_node(node) { | ||
219 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); | ||
220 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | ||
221 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | ||
222 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", | ||
223 | (unsigned long)vaddr, | ||
224 | node_remap_start_pfn[node] + pfn); | ||
225 | set_pmd_pfn((ulong) vaddr, | ||
226 | node_remap_start_pfn[node] + pfn, | ||
227 | PAGE_KERNEL_LARGE); | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | |||
232 | #ifdef CONFIG_HIBERNATION | 219 | #ifdef CONFIG_HIBERNATION |
233 | /** | 220 | /** |
234 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created | 221 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created |
@@ -240,15 +227,16 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
240 | int node; | 227 | int node; |
241 | 228 | ||
242 | for_each_online_node(node) { | 229 | for_each_online_node(node) { |
243 | unsigned long start_va, start_pfn, size, pfn; | 230 | unsigned long start_va, start_pfn, nr_pages, pfn; |
244 | 231 | ||
245 | start_va = (unsigned long)node_remap_start_vaddr[node]; | 232 | start_va = (unsigned long)node_remap_start_vaddr[node]; |
246 | start_pfn = node_remap_start_pfn[node]; | 233 | start_pfn = node_remap_start_pfn[node]; |
247 | size = node_remap_size[node]; | 234 | nr_pages = (node_remap_end_vaddr[node] - |
235 | node_remap_start_vaddr[node]) >> PAGE_SHIFT; | ||
248 | 236 | ||
249 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); | 237 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); |
250 | 238 | ||
251 | for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { | 239 | for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { |
252 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); | 240 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); |
253 | pgd_t *pgd = pgd_base + pgd_index(vaddr); | 241 | pgd_t *pgd = pgd_base + pgd_index(vaddr); |
254 | pud_t *pud = pud_offset(pgd, vaddr); | 242 | pud_t *pud = pud_offset(pgd, vaddr); |
@@ -264,132 +252,102 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
264 | } | 252 | } |
265 | #endif | 253 | #endif |
266 | 254 | ||
267 | static __init unsigned long calculate_numa_remap_pages(void) | 255 | /** |
256 | * init_alloc_remap - Initialize remap allocator for a NUMA node | ||
257 | * @nid: NUMA node to initizlie remap allocator for | ||
258 | * | ||
259 | * NUMA nodes may end up without any lowmem. As allocating pgdat and | ||
260 | * memmap on a different node with lowmem is inefficient, a special | ||
261 | * remap allocator is implemented which can be used by alloc_remap(). | ||
262 | * | ||
263 | * For each node, the amount of memory which will be necessary for | ||
264 | * pgdat and memmap is calculated and two memory areas of the size are | ||
265 | * allocated - one in the node and the other in lowmem; then, the area | ||
266 | * in the node is remapped to the lowmem area. | ||
267 | * | ||
268 | * As pgdat and memmap must be allocated in lowmem anyway, this | ||
269 | * doesn't waste lowmem address space; however, the actual lowmem | ||
270 | * which gets remapped over is wasted. The amount shouldn't be | ||
271 | * problematic on machines this feature will be used. | ||
272 | * | ||
273 | * Initialization failure isn't fatal. alloc_remap() is used | ||
274 | * opportunistically and the callers will fall back to other memory | ||
275 | * allocation mechanisms on failure. | ||
276 | */ | ||
277 | static __init void init_alloc_remap(int nid) | ||
268 | { | 278 | { |
269 | int nid; | 279 | unsigned long size, pfn; |
270 | unsigned long size, reserve_pages = 0; | 280 | u64 node_pa, remap_pa; |
281 | void *remap_va; | ||
271 | 282 | ||
272 | for_each_online_node(nid) { | 283 | /* |
273 | u64 node_kva_target; | 284 | * The acpi/srat node info can show hot-add memroy zones where |
274 | u64 node_kva_final; | 285 | * memory could be added but not currently present. |
275 | 286 | */ | |
276 | /* | 287 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", |
277 | * The acpi/srat node info can show hot-add memroy zones | 288 | nid, node_start_pfn[nid], node_end_pfn[nid]); |
278 | * where memory could be added but not currently present. | 289 | if (node_start_pfn[nid] > max_pfn) |
279 | */ | 290 | return; |
280 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", | 291 | if (!node_end_pfn[nid]) |
281 | nid, node_start_pfn[nid], node_end_pfn[nid]); | 292 | return; |
282 | if (node_start_pfn[nid] > max_pfn) | 293 | if (node_end_pfn[nid] > max_pfn) |
283 | continue; | 294 | node_end_pfn[nid] = max_pfn; |
284 | if (!node_end_pfn[nid]) | ||
285 | continue; | ||
286 | if (node_end_pfn[nid] > max_pfn) | ||
287 | node_end_pfn[nid] = max_pfn; | ||
288 | |||
289 | /* ensure the remap includes space for the pgdat. */ | ||
290 | size = node_remap_size[nid] + sizeof(pg_data_t); | ||
291 | |||
292 | /* convert size to large (pmd size) pages, rounding up */ | ||
293 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; | ||
294 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | ||
295 | size = size * PTRS_PER_PTE; | ||
296 | |||
297 | node_kva_target = round_down(node_end_pfn[nid] - size, | ||
298 | PTRS_PER_PTE); | ||
299 | node_kva_target <<= PAGE_SHIFT; | ||
300 | do { | ||
301 | node_kva_final = memblock_find_in_range(node_kva_target, | ||
302 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, | ||
303 | ((u64)size)<<PAGE_SHIFT, | ||
304 | LARGE_PAGE_BYTES); | ||
305 | node_kva_target -= LARGE_PAGE_BYTES; | ||
306 | } while (node_kva_final == MEMBLOCK_ERROR && | ||
307 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | ||
308 | |||
309 | if (node_kva_final == MEMBLOCK_ERROR) | ||
310 | panic("Can not get kva ram\n"); | ||
311 | |||
312 | node_remap_size[nid] = size; | ||
313 | node_remap_offset[nid] = reserve_pages; | ||
314 | reserve_pages += size; | ||
315 | printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" | ||
316 | " node %d at %llx\n", | ||
317 | size, nid, node_kva_final>>PAGE_SHIFT); | ||
318 | |||
319 | /* | ||
320 | * prevent kva address below max_low_pfn want it on system | ||
321 | * with less memory later. | ||
322 | * layout will be: KVA address , KVA RAM | ||
323 | * | ||
324 | * we are supposed to only record the one less then max_low_pfn | ||
325 | * but we could have some hole in high memory, and it will only | ||
326 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide | ||
327 | * to use it as free. | ||
328 | * So memblock_x86_reserve_range here, hope we don't run out of that array | ||
329 | */ | ||
330 | memblock_x86_reserve_range(node_kva_final, | ||
331 | node_kva_final+(((u64)size)<<PAGE_SHIFT), | ||
332 | "KVA RAM"); | ||
333 | |||
334 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; | ||
335 | } | ||
336 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", | ||
337 | reserve_pages); | ||
338 | return reserve_pages; | ||
339 | } | ||
340 | 295 | ||
341 | static void init_remap_allocator(int nid) | 296 | /* calculate the necessary space aligned to large page size */ |
342 | { | 297 | size = node_memmap_size_bytes(nid, node_start_pfn[nid], |
343 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 298 | min(node_end_pfn[nid], max_pfn)); |
344 | kva_start_pfn + node_remap_offset[nid]); | 299 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); |
345 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | 300 | size = ALIGN(size, LARGE_PAGE_BYTES); |
346 | (node_remap_size[nid] * PAGE_SIZE); | 301 | |
347 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | 302 | /* allocate node memory and the lowmem remap area */ |
348 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | 303 | node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT, |
349 | 304 | (u64)node_end_pfn[nid] << PAGE_SHIFT, | |
350 | printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, | 305 | size, LARGE_PAGE_BYTES); |
351 | (ulong) node_remap_start_vaddr[nid], | 306 | if (node_pa == MEMBLOCK_ERROR) { |
352 | (ulong) node_remap_end_vaddr[nid]); | 307 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", |
308 | size, nid); | ||
309 | return; | ||
310 | } | ||
311 | memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); | ||
312 | |||
313 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
314 | max_low_pfn << PAGE_SHIFT, | ||
315 | size, LARGE_PAGE_BYTES); | ||
316 | if (remap_pa == MEMBLOCK_ERROR) { | ||
317 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | ||
318 | size, nid); | ||
319 | memblock_x86_free_range(node_pa, node_pa + size); | ||
320 | return; | ||
321 | } | ||
322 | memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); | ||
323 | remap_va = phys_to_virt(remap_pa); | ||
324 | |||
325 | /* perform actual remap */ | ||
326 | for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) | ||
327 | set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), | ||
328 | (node_pa >> PAGE_SHIFT) + pfn, | ||
329 | PAGE_KERNEL_LARGE); | ||
330 | |||
331 | /* initialize remap allocator parameters */ | ||
332 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | ||
333 | node_remap_start_vaddr[nid] = remap_va; | ||
334 | node_remap_end_vaddr[nid] = remap_va + size; | ||
335 | node_remap_alloc_vaddr[nid] = remap_va; | ||
336 | |||
337 | printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", | ||
338 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | ||
353 | } | 339 | } |
354 | 340 | ||
355 | void __init initmem_init(void) | 341 | void __init initmem_init(void) |
356 | { | 342 | { |
357 | int nid; | 343 | int nid; |
358 | long kva_target_pfn; | ||
359 | |||
360 | /* | ||
361 | * When mapping a NUMA machine we allocate the node_mem_map arrays | ||
362 | * from node local memory. They are then mapped directly into KVA | ||
363 | * between zone normal and vmalloc space. Calculate the size of | ||
364 | * this space and use it to adjust the boundary between ZONE_NORMAL | ||
365 | * and ZONE_HIGHMEM. | ||
366 | */ | ||
367 | 344 | ||
368 | get_memcfg_numa(); | 345 | get_memcfg_numa(); |
369 | numa_init_array(); | 346 | numa_init_array(); |
370 | 347 | ||
371 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); | 348 | for_each_online_node(nid) |
372 | 349 | init_alloc_remap(nid); | |
373 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); | ||
374 | do { | ||
375 | kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT, | ||
376 | max_low_pfn<<PAGE_SHIFT, | ||
377 | kva_pages<<PAGE_SHIFT, | ||
378 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; | ||
379 | kva_target_pfn -= PTRS_PER_PTE; | ||
380 | } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); | ||
381 | |||
382 | if (kva_start_pfn == MEMBLOCK_ERROR) | ||
383 | panic("Can not get kva space\n"); | ||
384 | |||
385 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", | ||
386 | kva_start_pfn, max_low_pfn); | ||
387 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); | ||
388 | 350 | ||
389 | /* avoid clash with initrd */ | ||
390 | memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, | ||
391 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | ||
392 | "KVA PG"); | ||
393 | #ifdef CONFIG_HIGHMEM | 351 | #ifdef CONFIG_HIGHMEM |
394 | highstart_pfn = highend_pfn = max_pfn; | 352 | highstart_pfn = highend_pfn = max_pfn; |
395 | if (max_pfn > max_low_pfn) | 353 | if (max_pfn > max_low_pfn) |
@@ -409,12 +367,8 @@ void __init initmem_init(void) | |||
409 | 367 | ||
410 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", | 368 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", |
411 | (ulong) pfn_to_kaddr(max_low_pfn)); | 369 | (ulong) pfn_to_kaddr(max_low_pfn)); |
412 | for_each_online_node(nid) { | 370 | for_each_online_node(nid) |
413 | init_remap_allocator(nid); | ||
414 | |||
415 | allocate_pgdat(nid); | 371 | allocate_pgdat(nid); |
416 | } | ||
417 | remap_numa_kva(); | ||
418 | 372 | ||
419 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", | 373 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", |
420 | (ulong) pfn_to_kaddr(highstart_pfn)); | 374 | (ulong) pfn_to_kaddr(highstart_pfn)); |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 48651c6f657..1b9e82c96dc 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -276,7 +276,6 @@ int __init get_memcfg_from_srat(void) | |||
276 | unsigned long end = min(node_end_pfn[nid], max_pfn); | 276 | unsigned long end = min(node_end_pfn[nid], max_pfn); |
277 | 277 | ||
278 | memory_present(nid, start, end); | 278 | memory_present(nid, start, end); |
279 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); | ||
280 | } | 279 | } |
281 | return 1; | 280 | return 1; |
282 | out_fail: | 281 | out_fail: |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 8dace181c88..cf9750004a0 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -49,6 +49,10 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | |||
49 | val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; | 49 | val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; |
50 | val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; | 50 | val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; |
51 | val |= (counter_config->unit_mask & 0xFF) << 8; | 51 | val |= (counter_config->unit_mask & 0xFF) << 8; |
52 | counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV | | ||
53 | ARCH_PERFMON_EVENTSEL_EDGE | | ||
54 | ARCH_PERFMON_EVENTSEL_CMASK); | ||
55 | val |= counter_config->extra; | ||
52 | event &= model->event_mask ? model->event_mask : 0xFF; | 56 | event &= model->event_mask ? model->event_mask : 0xFF; |
53 | val |= event & 0xFF; | 57 | val |= event & 0xFF; |
54 | val |= (event & 0x0F00) << 24; | 58 | val |= (event & 0x0F00) << 24; |
@@ -440,6 +444,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
440 | oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); | 444 | oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); |
441 | oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); | 445 | oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); |
442 | oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); | 446 | oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); |
447 | oprofilefs_create_ulong(sb, dir, "extra", &counter_config[i].extra); | ||
443 | } | 448 | } |
444 | 449 | ||
445 | return 0; | 450 | return 0; |
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index e28398df0df..0b7b7b179cb 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h | |||
@@ -22,6 +22,7 @@ struct op_counter_config { | |||
22 | unsigned long kernel; | 22 | unsigned long kernel; |
23 | unsigned long user; | 23 | unsigned long user; |
24 | unsigned long unit_mask; | 24 | unsigned long unit_mask; |
25 | unsigned long extra; | ||
25 | }; | 26 | }; |
26 | 27 | ||
27 | extern struct op_counter_config counter_config[]; | 28 | extern struct op_counter_config counter_config[]; |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index a7b38d35c29..7cb6424317f 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/debugfs.h> | 11 | #include <linux/debugfs.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/delay.h> | ||
14 | 15 | ||
15 | #include <asm/mmu_context.h> | 16 | #include <asm/mmu_context.h> |
16 | #include <asm/uv/uv.h> | 17 | #include <asm/uv/uv.h> |