diff options
Diffstat (limited to 'arch/x86')
60 files changed, 1188 insertions, 1154 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0896008f7509..f15f37bfbd62 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -31,6 +31,7 @@ config X86 | |||
31 | select ARCH_WANT_FRAME_POINTERS | 31 | select ARCH_WANT_FRAME_POINTERS |
32 | select HAVE_DMA_ATTRS | 32 | select HAVE_DMA_ATTRS |
33 | select HAVE_KRETPROBES | 33 | select HAVE_KRETPROBES |
34 | select HAVE_OPTPROBES | ||
34 | select HAVE_FTRACE_MCOUNT_RECORD | 35 | select HAVE_FTRACE_MCOUNT_RECORD |
35 | select HAVE_DYNAMIC_FTRACE | 36 | select HAVE_DYNAMIC_FTRACE |
36 | select HAVE_FUNCTION_TRACER | 37 | select HAVE_FUNCTION_TRACER |
@@ -184,6 +185,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING | |||
184 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 185 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
185 | def_bool y | 186 | def_bool y |
186 | 187 | ||
188 | config HAVE_EARLY_RES | ||
189 | def_bool y | ||
190 | |||
187 | config HAVE_INTEL_TXT | 191 | config HAVE_INTEL_TXT |
188 | def_bool y | 192 | def_bool y |
189 | depends on EXPERIMENTAL && DMAR && ACPI | 193 | depends on EXPERIMENTAL && DMAR && ACPI |
@@ -569,6 +573,18 @@ config PARAVIRT_DEBUG | |||
569 | Enable to debug paravirt_ops internals. Specifically, BUG if | 573 | Enable to debug paravirt_ops internals. Specifically, BUG if |
570 | a paravirt_op is missing when it is called. | 574 | a paravirt_op is missing when it is called. |
571 | 575 | ||
576 | config NO_BOOTMEM | ||
577 | default y | ||
578 | bool "Disable Bootmem code" | ||
579 | ---help--- | ||
580 | Use early_res directly instead of bootmem before slab is ready. | ||
581 | - allocator (buddy) [generic] | ||
582 | - early allocator (bootmem) [generic] | ||
583 | - very early allocator (reserve_early*()) [x86] | ||
584 | - very very early allocator (early brk model) [x86] | ||
585 | So reduce one layer between early allocator to final allocator | ||
586 | |||
587 | |||
572 | config MEMTEST | 588 | config MEMTEST |
573 | bool "Memtest" | 589 | bool "Memtest" |
574 | ---help--- | 590 | ---help--- |
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 9046e4af66ce..280c019cfad8 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -327,7 +327,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
327 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; | 327 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; |
328 | current->mm->cached_hole_size = 0; | 328 | current->mm->cached_hole_size = 0; |
329 | 329 | ||
330 | current->mm->mmap = NULL; | ||
331 | install_exec_creds(bprm); | 330 | install_exec_creds(bprm); |
332 | current->flags &= ~PF_FORKNOEXEC; | 331 | current->flags &= ~PF_FORKNOEXEC; |
333 | 332 | ||
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index f1e253ceba4b..b09ec55650b3 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -165,10 +165,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, | |||
165 | * invalid instruction possible) or if the instructions are changed from a | 165 | * invalid instruction possible) or if the instructions are changed from a |
166 | * consistent state to another consistent state atomically. | 166 | * consistent state to another consistent state atomically. |
167 | * More care must be taken when modifying code in the SMP case because of | 167 | * More care must be taken when modifying code in the SMP case because of |
168 | * Intel's errata. | 168 | * Intel's errata. text_poke_smp() takes care that errata, but still |
169 | * doesn't support NMI/MCE handler code modifying. | ||
169 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an | 170 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an |
170 | * inconsistent instruction while you patch. | 171 | * inconsistent instruction while you patch. |
171 | */ | 172 | */ |
172 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 173 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
174 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | ||
173 | 175 | ||
174 | #endif /* _ASM_X86_ALTERNATIVE_H */ | 176 | #endif /* _ASM_X86_ALTERNATIVE_H */ |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 761249e396fe..0e22296790d3 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -111,11 +111,8 @@ extern unsigned long end_user_pfn; | |||
111 | 111 | ||
112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); | 112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); |
113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); | 113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); |
114 | extern void reserve_early(u64 start, u64 end, char *name); | ||
115 | extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); | ||
116 | extern void free_early(u64 start, u64 end); | ||
117 | extern void early_res_to_bootmem(u64 start, u64 end); | ||
118 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | 114 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
115 | #include <linux/early_res.h> | ||
119 | 116 | ||
120 | extern unsigned long e820_end_of_ram_pfn(void); | 117 | extern unsigned long e820_end_of_ram_pfn(void); |
121 | extern unsigned long e820_end_of_low_ram_pfn(void); | 118 | extern unsigned long e820_end_of_low_ram_pfn(void); |
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 014c2b85ae45..a726650fc80f 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h | |||
@@ -66,10 +66,6 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); | |||
66 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); | 66 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); |
67 | struct page *kmap_atomic_to_page(void *ptr); | 67 | struct page *kmap_atomic_to_page(void *ptr); |
68 | 68 | ||
69 | #ifndef CONFIG_PARAVIRT | ||
70 | #define kmap_atomic_pte(page, type) kmap_atomic(page, type) | ||
71 | #endif | ||
72 | |||
73 | #define flush_cache_kmaps() do { } while (0) | 69 | #define flush_cache_kmaps() do { } while (0) |
74 | 70 | ||
75 | extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, | 71 | extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, |
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091eeb1f..7ec65b18085d 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h | |||
@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; | |||
24 | #define SLAVE_ICW4_DEFAULT 0x01 | 24 | #define SLAVE_ICW4_DEFAULT 0x01 |
25 | #define PIC_ICW4_AEOI 2 | 25 | #define PIC_ICW4_AEOI 2 |
26 | 26 | ||
27 | extern spinlock_t i8259A_lock; | 27 | extern raw_spinlock_t i8259A_lock; |
28 | 28 | ||
29 | extern void init_8259A(int auto_eoi); | 29 | extern void init_8259A(int auto_eoi); |
30 | extern void enable_8259A_irq(unsigned int irq); | 30 | extern void enable_8259A_irq(unsigned int irq); |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7c7c16cde1f8..5f61f6e0ffdd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -160,6 +160,7 @@ extern int io_apic_get_redir_entries(int ioapic); | |||
160 | struct io_apic_irq_attr; | 160 | struct io_apic_irq_attr; |
161 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 161 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
162 | struct io_apic_irq_attr *irq_attr); | 162 | struct io_apic_irq_attr *irq_attr); |
163 | void setup_IO_APIC_irq_extra(u32 gsi); | ||
163 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | 164 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); |
164 | extern void ioapic_init_mappings(void); | 165 | extern void ioapic_init_mappings(void); |
165 | extern void ioapic_insert_resources(void); | 166 | extern void ioapic_insert_resources(void); |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 5458380b6ef8..262292729fc4 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -48,5 +48,6 @@ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); | |||
48 | extern int vector_used_by_percpu_irq(unsigned int vector); | 48 | extern int vector_used_by_percpu_irq(unsigned int vector); |
49 | 49 | ||
50 | extern void init_ISA_irqs(void); | 50 | extern void init_ISA_irqs(void); |
51 | extern int nr_legacy_irqs; | ||
51 | 52 | ||
52 | #endif /* _ASM_X86_IRQ_H */ | 53 | #endif /* _ASM_X86_IRQ_H */ |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4611f085cd43..8767d99c4f64 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -28,28 +28,33 @@ | |||
28 | #define MCE_VECTOR 0x12 | 28 | #define MCE_VECTOR 0x12 |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * IDT vectors usable for external interrupt sources start | 31 | * IDT vectors usable for external interrupt sources start at 0x20. |
32 | * at 0x20: | 32 | * (0x80 is the syscall vector, 0x30-0x3f are for ISA) |
33 | */ | 33 | */ |
34 | #define FIRST_EXTERNAL_VECTOR 0x20 | 34 | #define FIRST_EXTERNAL_VECTOR 0x20 |
35 | 35 | /* | |
36 | #ifdef CONFIG_X86_32 | 36 | * We start allocating at 0x21 to spread out vectors evenly between |
37 | # define SYSCALL_VECTOR 0x80 | 37 | * priority levels. (0x80 is the syscall vector) |
38 | # define IA32_SYSCALL_VECTOR 0x80 | 38 | */ |
39 | #else | 39 | #define VECTOR_OFFSET_START 1 |
40 | # define IA32_SYSCALL_VECTOR 0x80 | ||
41 | #endif | ||
42 | 40 | ||
43 | /* | 41 | /* |
44 | * Reserve the lowest usable priority level 0x20 - 0x2f for triggering | 42 | * Reserve the lowest usable vector (and hence lowest priority) 0x20 for |
45 | * cleanup after irq migration. | 43 | * triggering cleanup after irq migration. 0x21-0x2f will still be used |
44 | * for device interrupts. | ||
46 | */ | 45 | */ |
47 | #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR | 46 | #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR |
48 | 47 | ||
48 | #define IA32_SYSCALL_VECTOR 0x80 | ||
49 | #ifdef CONFIG_X86_32 | ||
50 | # define SYSCALL_VECTOR 0x80 | ||
51 | #endif | ||
52 | |||
49 | /* | 53 | /* |
50 | * Vectors 0x30-0x3f are used for ISA interrupts. | 54 | * Vectors 0x30-0x3f are used for ISA interrupts. |
55 | * round up to the next 16-vector boundary | ||
51 | */ | 56 | */ |
52 | #define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) | 57 | #define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) |
53 | 58 | ||
54 | #define IRQ1_VECTOR (IRQ0_VECTOR + 1) | 59 | #define IRQ1_VECTOR (IRQ0_VECTOR + 1) |
55 | #define IRQ2_VECTOR (IRQ0_VECTOR + 2) | 60 | #define IRQ2_VECTOR (IRQ0_VECTOR + 2) |
@@ -120,13 +125,6 @@ | |||
120 | */ | 125 | */ |
121 | #define MCE_SELF_VECTOR 0xeb | 126 | #define MCE_SELF_VECTOR 0xeb |
122 | 127 | ||
123 | /* | ||
124 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | ||
125 | * start at 0x31(0x41) to spread out vectors evenly between priority | ||
126 | * levels. (0x80 is the syscall vector) | ||
127 | */ | ||
128 | #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) | ||
129 | |||
130 | #define NR_VECTORS 256 | 128 | #define NR_VECTORS 256 |
131 | 129 | ||
132 | #define FPU_IRQ 13 | 130 | #define FPU_IRQ 13 |
@@ -154,21 +152,21 @@ static inline int invalid_vm86_irq(int irq) | |||
154 | 152 | ||
155 | #define NR_IRQS_LEGACY 16 | 153 | #define NR_IRQS_LEGACY 16 |
156 | 154 | ||
157 | #define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) | ||
158 | #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) | 155 | #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) |
159 | 156 | ||
160 | #ifdef CONFIG_X86_IO_APIC | 157 | #ifdef CONFIG_X86_IO_APIC |
161 | # ifdef CONFIG_SPARSE_IRQ | 158 | # ifdef CONFIG_SPARSE_IRQ |
159 | # define CPU_VECTOR_LIMIT (64 * NR_CPUS) | ||
162 | # define NR_IRQS \ | 160 | # define NR_IRQS \ |
163 | (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ | 161 | (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ |
164 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ | 162 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ |
165 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) | 163 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) |
166 | # else | 164 | # else |
167 | # if NR_CPUS < MAX_IO_APICS | 165 | # define CPU_VECTOR_LIMIT (32 * NR_CPUS) |
168 | # define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) | 166 | # define NR_IRQS \ |
169 | # else | 167 | (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ |
170 | # define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) | 168 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ |
171 | # endif | 169 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) |
172 | # endif | 170 | # endif |
173 | #else /* !CONFIG_X86_IO_APIC: */ | 171 | #else /* !CONFIG_X86_IO_APIC: */ |
174 | # define NR_IRQS NR_IRQS_LEGACY | 172 | # define NR_IRQS NR_IRQS_LEGACY |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4fe681de1e76..4ffa345a8ccb 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
@@ -32,7 +32,10 @@ struct kprobe; | |||
32 | 32 | ||
33 | typedef u8 kprobe_opcode_t; | 33 | typedef u8 kprobe_opcode_t; |
34 | #define BREAKPOINT_INSTRUCTION 0xcc | 34 | #define BREAKPOINT_INSTRUCTION 0xcc |
35 | #define RELATIVEJUMP_INSTRUCTION 0xe9 | 35 | #define RELATIVEJUMP_OPCODE 0xe9 |
36 | #define RELATIVEJUMP_SIZE 5 | ||
37 | #define RELATIVECALL_OPCODE 0xe8 | ||
38 | #define RELATIVE_ADDR_SIZE 4 | ||
36 | #define MAX_INSN_SIZE 16 | 39 | #define MAX_INSN_SIZE 16 |
37 | #define MAX_STACK_SIZE 64 | 40 | #define MAX_STACK_SIZE 64 |
38 | #define MIN_STACK_SIZE(ADDR) \ | 41 | #define MIN_STACK_SIZE(ADDR) \ |
@@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t; | |||
44 | 47 | ||
45 | #define flush_insn_slot(p) do { } while (0) | 48 | #define flush_insn_slot(p) do { } while (0) |
46 | 49 | ||
50 | /* optinsn template addresses */ | ||
51 | extern kprobe_opcode_t optprobe_template_entry; | ||
52 | extern kprobe_opcode_t optprobe_template_val; | ||
53 | extern kprobe_opcode_t optprobe_template_call; | ||
54 | extern kprobe_opcode_t optprobe_template_end; | ||
55 | #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) | ||
56 | #define MAX_OPTINSN_SIZE \ | ||
57 | (((unsigned long)&optprobe_template_end - \ | ||
58 | (unsigned long)&optprobe_template_entry) + \ | ||
59 | MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) | ||
60 | |||
47 | extern const int kretprobe_blacklist_size; | 61 | extern const int kretprobe_blacklist_size; |
48 | 62 | ||
49 | void arch_remove_kprobe(struct kprobe *p); | 63 | void arch_remove_kprobe(struct kprobe *p); |
@@ -64,6 +78,21 @@ struct arch_specific_insn { | |||
64 | int boostable; | 78 | int boostable; |
65 | }; | 79 | }; |
66 | 80 | ||
81 | struct arch_optimized_insn { | ||
82 | /* copy of the original instructions */ | ||
83 | kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; | ||
84 | /* detour code buffer */ | ||
85 | kprobe_opcode_t *insn; | ||
86 | /* the size of instructions copied to detour code buffer */ | ||
87 | size_t size; | ||
88 | }; | ||
89 | |||
90 | /* Return true (!0) if optinsn is prepared for optimization. */ | ||
91 | static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) | ||
92 | { | ||
93 | return optinsn->size; | ||
94 | } | ||
95 | |||
67 | struct prev_kprobe { | 96 | struct prev_kprobe { |
68 | struct kprobe *kp; | 97 | struct kprobe *kp; |
69 | unsigned long status; | 98 | unsigned long status; |
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 47b9b6f19057..2e9972468a5d 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h | |||
@@ -195,41 +195,4 @@ static inline long local_sub_return(long i, local_t *l) | |||
195 | #define __local_add(i, l) local_add((i), (l)) | 195 | #define __local_add(i, l) local_add((i), (l)) |
196 | #define __local_sub(i, l) local_sub((i), (l)) | 196 | #define __local_sub(i, l) local_sub((i), (l)) |
197 | 197 | ||
198 | /* Use these for per-cpu local_t variables: on some archs they are | ||
199 | * much more efficient than these naive implementations. Note they take | ||
200 | * a variable, not an address. | ||
201 | * | ||
202 | * X86_64: This could be done better if we moved the per cpu data directly | ||
203 | * after GS. | ||
204 | */ | ||
205 | |||
206 | /* Need to disable preemption for the cpu local counters otherwise we could | ||
207 | still access a variable of a previous CPU in a non atomic way. */ | ||
208 | #define cpu_local_wrap_v(l) \ | ||
209 | ({ \ | ||
210 | local_t res__; \ | ||
211 | preempt_disable(); \ | ||
212 | res__ = (l); \ | ||
213 | preempt_enable(); \ | ||
214 | res__; \ | ||
215 | }) | ||
216 | #define cpu_local_wrap(l) \ | ||
217 | ({ \ | ||
218 | preempt_disable(); \ | ||
219 | (l); \ | ||
220 | preempt_enable(); \ | ||
221 | }) \ | ||
222 | |||
223 | #define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var((l)))) | ||
224 | #define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var((l)), (i))) | ||
225 | #define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var((l)))) | ||
226 | #define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var((l)))) | ||
227 | #define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var((l)))) | ||
228 | #define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var((l)))) | ||
229 | |||
230 | #define __cpu_local_inc(l) cpu_local_inc((l)) | ||
231 | #define __cpu_local_dec(l) cpu_local_dec((l)) | ||
232 | #define __cpu_local_add(i, l) cpu_local_add((i), (l)) | ||
233 | #define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) | ||
234 | |||
235 | #endif /* _ASM_X86_LOCAL_H */ | 198 | #endif /* _ASM_X86_LOCAL_H */ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dd59a85a918f..5653f43d90e5 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -435,15 +435,6 @@ static inline void paravirt_release_pud(unsigned long pfn) | |||
435 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); | 435 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); |
436 | } | 436 | } |
437 | 437 | ||
438 | #ifdef CONFIG_HIGHPTE | ||
439 | static inline void *kmap_atomic_pte(struct page *page, enum km_type type) | ||
440 | { | ||
441 | unsigned long ret; | ||
442 | ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); | ||
443 | return (void *)ret; | ||
444 | } | ||
445 | #endif | ||
446 | |||
447 | static inline void pte_update(struct mm_struct *mm, unsigned long addr, | 438 | static inline void pte_update(struct mm_struct *mm, unsigned long addr, |
448 | pte_t *ptep) | 439 | pte_t *ptep) |
449 | { | 440 | { |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index b1e70d51e40c..db9ef5532341 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -304,10 +304,6 @@ struct pv_mmu_ops { | |||
304 | #endif /* PAGETABLE_LEVELS == 4 */ | 304 | #endif /* PAGETABLE_LEVELS == 4 */ |
305 | #endif /* PAGETABLE_LEVELS >= 3 */ | 305 | #endif /* PAGETABLE_LEVELS >= 3 */ |
306 | 306 | ||
307 | #ifdef CONFIG_HIGHPTE | ||
308 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | ||
309 | #endif | ||
310 | |||
311 | struct pv_lazy_ops lazy_mode; | 307 | struct pv_lazy_ops lazy_mode; |
312 | 308 | ||
313 | /* dom0 ops */ | 309 | /* dom0 ops */ |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ada8c201d513..b4a00dd4eed5 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -124,6 +124,8 @@ extern void pci_iommu_alloc(void); | |||
124 | #include "pci_64.h" | 124 | #include "pci_64.h" |
125 | #endif | 125 | #endif |
126 | 126 | ||
127 | void dma32_reserve_bootmem(void); | ||
128 | |||
127 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ | 129 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ |
128 | #include <asm-generic/pci-dma-compat.h> | 130 | #include <asm-generic/pci-dma-compat.h> |
129 | 131 | ||
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index ae5e40f67daf..fe15cfb21b9b 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h | |||
@@ -22,8 +22,6 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn, | |||
22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, | 22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, |
23 | int reg, int len, u32 value); | 23 | int reg, int len, u32 value); |
24 | 24 | ||
25 | extern void dma32_reserve_bootmem(void); | ||
26 | |||
27 | #endif /* __KERNEL__ */ | 25 | #endif /* __KERNEL__ */ |
28 | 26 | ||
29 | #endif /* _ASM_X86_PCI_64_H */ | 27 | #endif /* _ASM_X86_PCI_64_H */ |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0c44196b78ac..66a272dfd8b8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -25,19 +25,18 @@ | |||
25 | */ | 25 | */ |
26 | #ifdef CONFIG_SMP | 26 | #ifdef CONFIG_SMP |
27 | #define PER_CPU(var, reg) \ | 27 | #define PER_CPU(var, reg) \ |
28 | __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ | 28 | __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \ |
29 | lea per_cpu__##var(reg), reg | 29 | lea var(reg), reg |
30 | #define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var | 30 | #define PER_CPU_VAR(var) %__percpu_seg:var |
31 | #else /* ! SMP */ | 31 | #else /* ! SMP */ |
32 | #define PER_CPU(var, reg) \ | 32 | #define PER_CPU(var, reg) __percpu_mov_op $var, reg |
33 | __percpu_mov_op $per_cpu__##var, reg | 33 | #define PER_CPU_VAR(var) var |
34 | #define PER_CPU_VAR(var) per_cpu__##var | ||
35 | #endif /* SMP */ | 34 | #endif /* SMP */ |
36 | 35 | ||
37 | #ifdef CONFIG_X86_64_SMP | 36 | #ifdef CONFIG_X86_64_SMP |
38 | #define INIT_PER_CPU_VAR(var) init_per_cpu__##var | 37 | #define INIT_PER_CPU_VAR(var) init_per_cpu__##var |
39 | #else | 38 | #else |
40 | #define INIT_PER_CPU_VAR(var) per_cpu__##var | 39 | #define INIT_PER_CPU_VAR(var) var |
41 | #endif | 40 | #endif |
42 | 41 | ||
43 | #else /* ...!ASSEMBLY */ | 42 | #else /* ...!ASSEMBLY */ |
@@ -60,12 +59,12 @@ | |||
60 | * There also must be an entry in vmlinux_64.lds.S | 59 | * There also must be an entry in vmlinux_64.lds.S |
61 | */ | 60 | */ |
62 | #define DECLARE_INIT_PER_CPU(var) \ | 61 | #define DECLARE_INIT_PER_CPU(var) \ |
63 | extern typeof(per_cpu_var(var)) init_per_cpu_var(var) | 62 | extern typeof(var) init_per_cpu_var(var) |
64 | 63 | ||
65 | #ifdef CONFIG_X86_64_SMP | 64 | #ifdef CONFIG_X86_64_SMP |
66 | #define init_per_cpu_var(var) init_per_cpu__##var | 65 | #define init_per_cpu_var(var) init_per_cpu__##var |
67 | #else | 66 | #else |
68 | #define init_per_cpu_var(var) per_cpu_var(var) | 67 | #define init_per_cpu_var(var) var |
69 | #endif | 68 | #endif |
70 | 69 | ||
71 | /* For arch-specific code, we can use direct single-insn ops (they | 70 | /* For arch-specific code, we can use direct single-insn ops (they |
@@ -104,6 +103,64 @@ do { \ | |||
104 | } \ | 103 | } \ |
105 | } while (0) | 104 | } while (0) |
106 | 105 | ||
106 | /* | ||
107 | * Generate a percpu add to memory instruction and optimize code | ||
108 | * if a one is added or subtracted. | ||
109 | */ | ||
110 | #define percpu_add_op(var, val) \ | ||
111 | do { \ | ||
112 | typedef typeof(var) pao_T__; \ | ||
113 | const int pao_ID__ = (__builtin_constant_p(val) && \ | ||
114 | ((val) == 1 || (val) == -1)) ? (val) : 0; \ | ||
115 | if (0) { \ | ||
116 | pao_T__ pao_tmp__; \ | ||
117 | pao_tmp__ = (val); \ | ||
118 | } \ | ||
119 | switch (sizeof(var)) { \ | ||
120 | case 1: \ | ||
121 | if (pao_ID__ == 1) \ | ||
122 | asm("incb "__percpu_arg(0) : "+m" (var)); \ | ||
123 | else if (pao_ID__ == -1) \ | ||
124 | asm("decb "__percpu_arg(0) : "+m" (var)); \ | ||
125 | else \ | ||
126 | asm("addb %1, "__percpu_arg(0) \ | ||
127 | : "+m" (var) \ | ||
128 | : "qi" ((pao_T__)(val))); \ | ||
129 | break; \ | ||
130 | case 2: \ | ||
131 | if (pao_ID__ == 1) \ | ||
132 | asm("incw "__percpu_arg(0) : "+m" (var)); \ | ||
133 | else if (pao_ID__ == -1) \ | ||
134 | asm("decw "__percpu_arg(0) : "+m" (var)); \ | ||
135 | else \ | ||
136 | asm("addw %1, "__percpu_arg(0) \ | ||
137 | : "+m" (var) \ | ||
138 | : "ri" ((pao_T__)(val))); \ | ||
139 | break; \ | ||
140 | case 4: \ | ||
141 | if (pao_ID__ == 1) \ | ||
142 | asm("incl "__percpu_arg(0) : "+m" (var)); \ | ||
143 | else if (pao_ID__ == -1) \ | ||
144 | asm("decl "__percpu_arg(0) : "+m" (var)); \ | ||
145 | else \ | ||
146 | asm("addl %1, "__percpu_arg(0) \ | ||
147 | : "+m" (var) \ | ||
148 | : "ri" ((pao_T__)(val))); \ | ||
149 | break; \ | ||
150 | case 8: \ | ||
151 | if (pao_ID__ == 1) \ | ||
152 | asm("incq "__percpu_arg(0) : "+m" (var)); \ | ||
153 | else if (pao_ID__ == -1) \ | ||
154 | asm("decq "__percpu_arg(0) : "+m" (var)); \ | ||
155 | else \ | ||
156 | asm("addq %1, "__percpu_arg(0) \ | ||
157 | : "+m" (var) \ | ||
158 | : "re" ((pao_T__)(val))); \ | ||
159 | break; \ | ||
160 | default: __bad_percpu_size(); \ | ||
161 | } \ | ||
162 | } while (0) | ||
163 | |||
107 | #define percpu_from_op(op, var, constraint) \ | 164 | #define percpu_from_op(op, var, constraint) \ |
108 | ({ \ | 165 | ({ \ |
109 | typeof(var) pfo_ret__; \ | 166 | typeof(var) pfo_ret__; \ |
@@ -142,16 +199,14 @@ do { \ | |||
142 | * per-thread variables implemented as per-cpu variables and thus | 199 | * per-thread variables implemented as per-cpu variables and thus |
143 | * stable for the duration of the respective task. | 200 | * stable for the duration of the respective task. |
144 | */ | 201 | */ |
145 | #define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ | 202 | #define percpu_read(var) percpu_from_op("mov", var, "m" (var)) |
146 | "m" (per_cpu__##var)) | 203 | #define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) |
147 | #define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ | 204 | #define percpu_write(var, val) percpu_to_op("mov", var, val) |
148 | "p" (&per_cpu__##var)) | 205 | #define percpu_add(var, val) percpu_add_op(var, val) |
149 | #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) | 206 | #define percpu_sub(var, val) percpu_add_op(var, -(val)) |
150 | #define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) | 207 | #define percpu_and(var, val) percpu_to_op("and", var, val) |
151 | #define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) | 208 | #define percpu_or(var, val) percpu_to_op("or", var, val) |
152 | #define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) | 209 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) |
153 | #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) | ||
154 | #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) | ||
155 | 210 | ||
156 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 211 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
157 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 212 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -160,9 +215,9 @@ do { \ | |||
160 | #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) | 215 | #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) |
161 | #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) | 216 | #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) |
162 | #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) | 217 | #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) |
163 | #define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 218 | #define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
164 | #define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 219 | #define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
165 | #define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 220 | #define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
166 | #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 221 | #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
167 | #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 222 | #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
168 | #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 223 | #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -179,9 +234,9 @@ do { \ | |||
179 | #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) | 234 | #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) |
180 | #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) | 235 | #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) |
181 | #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) | 236 | #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) |
182 | #define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 237 | #define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
183 | #define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 238 | #define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
184 | #define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 239 | #define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
185 | #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 240 | #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
186 | #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 241 | #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
187 | #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 242 | #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -192,9 +247,9 @@ do { \ | |||
192 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 247 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
193 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 248 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
194 | 249 | ||
195 | #define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) | 250 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
196 | #define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) | 251 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
197 | #define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) | 252 | #define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
198 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 253 | #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
199 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 254 | #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
200 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 255 | #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
@@ -212,19 +267,19 @@ do { \ | |||
212 | #ifdef CONFIG_X86_64 | 267 | #ifdef CONFIG_X86_64 |
213 | #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 268 | #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
214 | #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 269 | #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
215 | #define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 270 | #define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
216 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 271 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
217 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 272 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
218 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 273 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
219 | 274 | ||
220 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 275 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
221 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 276 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
222 | #define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 277 | #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
223 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 278 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
224 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 279 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
225 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 280 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
226 | 281 | ||
227 | #define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) | 282 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
228 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 283 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
229 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 284 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
230 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 285 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
@@ -236,7 +291,7 @@ do { \ | |||
236 | ({ \ | 291 | ({ \ |
237 | int old__; \ | 292 | int old__; \ |
238 | asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ | 293 | asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ |
239 | : "=r" (old__), "+m" (per_cpu__##var) \ | 294 | : "=r" (old__), "+m" (var) \ |
240 | : "dIr" (bit)); \ | 295 | : "dIr" (bit)); \ |
241 | old__; \ | 296 | old__; \ |
242 | }) | 297 | }) |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 01fd9461d323..47339a1ac7b6 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -54,10 +54,10 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); | |||
54 | in_irq() ? KM_IRQ_PTE : \ | 54 | in_irq() ? KM_IRQ_PTE : \ |
55 | KM_PTE0) | 55 | KM_PTE0) |
56 | #define pte_offset_map(dir, address) \ | 56 | #define pte_offset_map(dir, address) \ |
57 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ | 57 | ((pte_t *)kmap_atomic(pmd_page(*(dir)), __KM_PTE) + \ |
58 | pte_index((address))) | 58 | pte_index((address))) |
59 | #define pte_offset_map_nested(dir, address) \ | 59 | #define pte_offset_map_nested(dir, address) \ |
60 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ | 60 | ((pte_t *)kmap_atomic(pmd_page(*(dir)), KM_PTE1) + \ |
61 | pte_index((address))) | 61 | pte_index((address))) |
62 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) | 62 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) |
63 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) | 63 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) |
@@ -80,7 +80,7 @@ do { \ | |||
80 | * The i386 doesn't have any external MMU info: the kernel page | 80 | * The i386 doesn't have any external MMU info: the kernel page |
81 | * tables contain all the necessary information. | 81 | * tables contain all the necessary information. |
82 | */ | 82 | */ |
83 | #define update_mmu_cache(vma, address, pte) do { } while (0) | 83 | #define update_mmu_cache(vma, address, ptep) do { } while (0) |
84 | 84 | ||
85 | #endif /* !__ASSEMBLY__ */ | 85 | #endif /* !__ASSEMBLY__ */ |
86 | 86 | ||
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c57a30117149..181be528c612 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -129,7 +129,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } | |||
129 | #define pte_unmap(pte) /* NOP */ | 129 | #define pte_unmap(pte) /* NOP */ |
130 | #define pte_unmap_nested(pte) /* NOP */ | 130 | #define pte_unmap_nested(pte) /* NOP */ |
131 | 131 | ||
132 | #define update_mmu_cache(vma, address, pte) do { } while (0) | 132 | #define update_mmu_cache(vma, address, ptep) do { } while (0) |
133 | 133 | ||
134 | /* Encode and de-code a swap entry */ | 134 | /* Encode and de-code a swap entry */ |
135 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | 135 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4009f6534f52..6f414ed88620 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -23,14 +23,4 @@ extern int reboot_force; | |||
23 | 23 | ||
24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); | 24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); |
25 | 25 | ||
26 | /* | ||
27 | * This looks more complex than it should be. But we need to | ||
28 | * get the type for the ~ right in round_down (it needs to be | ||
29 | * as wide as the result!), and we want to evaluate the macro | ||
30 | * arguments just once each. | ||
31 | */ | ||
32 | #define __round_mask(x,y) ((__typeof__(x))((y)-1)) | ||
33 | #define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1) | ||
34 | #define round_down(x,y) ((x) & ~__round_mask(x,y)) | ||
35 | |||
36 | #endif /* _ASM_X86_PROTO_H */ | 26 | #endif /* _ASM_X86_PROTO_H */ |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index e04740f7a0bb..b8fe48ee2ed9 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -32,7 +32,7 @@ extern void show_regs_common(void); | |||
32 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ | 32 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ |
33 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" | 33 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" |
34 | #define __switch_canary_oparam \ | 34 | #define __switch_canary_oparam \ |
35 | , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) | 35 | , [stack_canary] "=m" (stack_canary.canary) |
36 | #define __switch_canary_iparam \ | 36 | #define __switch_canary_iparam \ |
37 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | 37 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) |
38 | #else /* CC_STACKPROTECTOR */ | 38 | #else /* CC_STACKPROTECTOR */ |
@@ -114,7 +114,7 @@ do { \ | |||
114 | "movq %P[task_canary](%%rsi),%%r8\n\t" \ | 114 | "movq %P[task_canary](%%rsi),%%r8\n\t" \ |
115 | "movq %%r8,"__percpu_arg([gs_canary])"\n\t" | 115 | "movq %%r8,"__percpu_arg([gs_canary])"\n\t" |
116 | #define __switch_canary_oparam \ | 116 | #define __switch_canary_oparam \ |
117 | , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) | 117 | , [gs_canary] "=m" (irq_stack_union.stack_canary) |
118 | #define __switch_canary_iparam \ | 118 | #define __switch_canary_iparam \ |
119 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | 119 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) |
120 | #else /* CC_STACKPROTECTOR */ | 120 | #else /* CC_STACKPROTECTOR */ |
@@ -133,7 +133,7 @@ do { \ | |||
133 | __switch_canary \ | 133 | __switch_canary \ |
134 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ | 134 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ |
135 | "movq %%rax,%%rdi\n\t" \ | 135 | "movq %%rax,%%rdi\n\t" \ |
136 | "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ | 136 | "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ |
137 | "jnz ret_from_fork\n\t" \ | 137 | "jnz ret_from_fork\n\t" \ |
138 | RESTORE_CONTEXT \ | 138 | RESTORE_CONTEXT \ |
139 | : "=a" (last) \ | 139 | : "=a" (last) \ |
@@ -143,7 +143,7 @@ do { \ | |||
143 | [ti_flags] "i" (offsetof(struct thread_info, flags)), \ | 143 | [ti_flags] "i" (offsetof(struct thread_info, flags)), \ |
144 | [_tif_fork] "i" (_TIF_FORK), \ | 144 | [_tif_fork] "i" (_TIF_FORK), \ |
145 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ | 145 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ |
146 | [current_task] "m" (per_cpu_var(current_task)) \ | 146 | [current_task] "m" (current_task) \ |
147 | __switch_canary_iparam \ | 147 | __switch_canary_iparam \ |
148 | : "memory", "cc" __EXTRA_CLOBBER) | 148 | : "memory", "cc" __EXTRA_CLOBBER) |
149 | #endif | 149 | #endif |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f95703098f8d..738fcb60e708 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -447,6 +447,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
447 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | 447 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) |
448 | { | 448 | { |
449 | *irq = gsi; | 449 | *irq = gsi; |
450 | |||
451 | #ifdef CONFIG_X86_IO_APIC | ||
452 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) | ||
453 | setup_IO_APIC_irq_extra(gsi); | ||
454 | #endif | ||
455 | |||
450 | return 0; | 456 | return 0; |
451 | } | 457 | } |
452 | 458 | ||
@@ -474,7 +480,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
474 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); | 480 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
475 | } | 481 | } |
476 | #endif | 482 | #endif |
477 | acpi_gsi_to_irq(plat_gsi, &irq); | 483 | irq = plat_gsi; |
484 | |||
478 | return irq; | 485 | return irq; |
479 | } | 486 | } |
480 | 487 | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e6ea0342c8f8..3a4bf35c179b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include <linux/vmalloc.h> | 8 | #include <linux/vmalloc.h> |
9 | #include <linux/memory.h> | 9 | #include <linux/memory.h> |
10 | #include <linux/stop_machine.h> | ||
10 | #include <asm/alternative.h> | 11 | #include <asm/alternative.h> |
11 | #include <asm/sections.h> | 12 | #include <asm/sections.h> |
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
@@ -572,3 +573,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) | |||
572 | local_irq_restore(flags); | 573 | local_irq_restore(flags); |
573 | return addr; | 574 | return addr; |
574 | } | 575 | } |
576 | |||
577 | /* | ||
578 | * Cross-modifying kernel text with stop_machine(). | ||
579 | * This code originally comes from immediate value. | ||
580 | */ | ||
581 | static atomic_t stop_machine_first; | ||
582 | static int wrote_text; | ||
583 | |||
584 | struct text_poke_params { | ||
585 | void *addr; | ||
586 | const void *opcode; | ||
587 | size_t len; | ||
588 | }; | ||
589 | |||
590 | static int __kprobes stop_machine_text_poke(void *data) | ||
591 | { | ||
592 | struct text_poke_params *tpp = data; | ||
593 | |||
594 | if (atomic_dec_and_test(&stop_machine_first)) { | ||
595 | text_poke(tpp->addr, tpp->opcode, tpp->len); | ||
596 | smp_wmb(); /* Make sure other cpus see that this has run */ | ||
597 | wrote_text = 1; | ||
598 | } else { | ||
599 | while (!wrote_text) | ||
600 | cpu_relax(); | ||
601 | smp_mb(); /* Load wrote_text before following execution */ | ||
602 | } | ||
603 | |||
604 | flush_icache_range((unsigned long)tpp->addr, | ||
605 | (unsigned long)tpp->addr + tpp->len); | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | /** | ||
610 | * text_poke_smp - Update instructions on a live kernel on SMP | ||
611 | * @addr: address to modify | ||
612 | * @opcode: source of the copy | ||
613 | * @len: length to copy | ||
614 | * | ||
615 | * Modify multi-byte instruction by using stop_machine() on SMP. This allows | ||
616 | * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying | ||
617 | * should be allowed, since stop_machine() does _not_ protect code against | ||
618 | * NMI and MCE. | ||
619 | * | ||
620 | * Note: Must be called under get_online_cpus() and text_mutex. | ||
621 | */ | ||
622 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | ||
623 | { | ||
624 | struct text_poke_params tpp; | ||
625 | |||
626 | tpp.addr = addr; | ||
627 | tpp.opcode = opcode; | ||
628 | tpp.len = len; | ||
629 | atomic_set(&stop_machine_first, 1); | ||
630 | wrote_text = 0; | ||
631 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
632 | return addr; | ||
633 | } | ||
634 | |||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6bdd2c7ead75..14862f11cc4a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -73,8 +73,8 @@ | |||
73 | */ | 73 | */ |
74 | int sis_apic_bug = -1; | 74 | int sis_apic_bug = -1; |
75 | 75 | ||
76 | static DEFINE_SPINLOCK(ioapic_lock); | 76 | static DEFINE_RAW_SPINLOCK(ioapic_lock); |
77 | static DEFINE_SPINLOCK(vector_lock); | 77 | static DEFINE_RAW_SPINLOCK(vector_lock); |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * # of IRQ routing registers | 80 | * # of IRQ routing registers |
@@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |||
94 | /* # of MP IRQ source entries */ | 94 | /* # of MP IRQ source entries */ |
95 | int mp_irq_entries; | 95 | int mp_irq_entries; |
96 | 96 | ||
97 | /* Number of legacy interrupts */ | ||
98 | static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; | ||
99 | /* GSI interrupts */ | 97 | /* GSI interrupts */ |
100 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | 98 | static int nr_irqs_gsi = NR_IRQS_LEGACY; |
101 | 99 | ||
@@ -140,27 +138,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) | |||
140 | 138 | ||
141 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 139 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
142 | #ifdef CONFIG_SPARSE_IRQ | 140 | #ifdef CONFIG_SPARSE_IRQ |
143 | static struct irq_cfg irq_cfgx[] = { | 141 | static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; |
144 | #else | 142 | #else |
145 | static struct irq_cfg irq_cfgx[NR_IRQS] = { | 143 | static struct irq_cfg irq_cfgx[NR_IRQS]; |
146 | #endif | 144 | #endif |
147 | [0] = { .vector = IRQ0_VECTOR, }, | ||
148 | [1] = { .vector = IRQ1_VECTOR, }, | ||
149 | [2] = { .vector = IRQ2_VECTOR, }, | ||
150 | [3] = { .vector = IRQ3_VECTOR, }, | ||
151 | [4] = { .vector = IRQ4_VECTOR, }, | ||
152 | [5] = { .vector = IRQ5_VECTOR, }, | ||
153 | [6] = { .vector = IRQ6_VECTOR, }, | ||
154 | [7] = { .vector = IRQ7_VECTOR, }, | ||
155 | [8] = { .vector = IRQ8_VECTOR, }, | ||
156 | [9] = { .vector = IRQ9_VECTOR, }, | ||
157 | [10] = { .vector = IRQ10_VECTOR, }, | ||
158 | [11] = { .vector = IRQ11_VECTOR, }, | ||
159 | [12] = { .vector = IRQ12_VECTOR, }, | ||
160 | [13] = { .vector = IRQ13_VECTOR, }, | ||
161 | [14] = { .vector = IRQ14_VECTOR, }, | ||
162 | [15] = { .vector = IRQ15_VECTOR, }, | ||
163 | }; | ||
164 | 145 | ||
165 | void __init io_apic_disable_legacy(void) | 146 | void __init io_apic_disable_legacy(void) |
166 | { | 147 | { |
@@ -185,8 +166,14 @@ int __init arch_early_irq_init(void) | |||
185 | desc->chip_data = &cfg[i]; | 166 | desc->chip_data = &cfg[i]; |
186 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); | 167 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); |
187 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); | 168 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); |
188 | if (i < nr_legacy_irqs) | 169 | /* |
189 | cpumask_setall(cfg[i].domain); | 170 | * For legacy IRQ's, start with assigning irq0 to irq15 to |
171 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. | ||
172 | */ | ||
173 | if (i < nr_legacy_irqs) { | ||
174 | cfg[i].vector = IRQ0_VECTOR + i; | ||
175 | cpumask_set_cpu(0, cfg[i].domain); | ||
176 | } | ||
190 | } | 177 | } |
191 | 178 | ||
192 | return 0; | 179 | return 0; |
@@ -406,7 +393,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
406 | struct irq_pin_list *entry; | 393 | struct irq_pin_list *entry; |
407 | unsigned long flags; | 394 | unsigned long flags; |
408 | 395 | ||
409 | spin_lock_irqsave(&ioapic_lock, flags); | 396 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
410 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 397 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
411 | unsigned int reg; | 398 | unsigned int reg; |
412 | int pin; | 399 | int pin; |
@@ -415,11 +402,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
415 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | 402 | reg = io_apic_read(entry->apic, 0x10 + pin*2); |
416 | /* Is the remote IRR bit set? */ | 403 | /* Is the remote IRR bit set? */ |
417 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { | 404 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { |
418 | spin_unlock_irqrestore(&ioapic_lock, flags); | 405 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
419 | return true; | 406 | return true; |
420 | } | 407 | } |
421 | } | 408 | } |
422 | spin_unlock_irqrestore(&ioapic_lock, flags); | 409 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
423 | 410 | ||
424 | return false; | 411 | return false; |
425 | } | 412 | } |
@@ -433,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | |||
433 | { | 420 | { |
434 | union entry_union eu; | 421 | union entry_union eu; |
435 | unsigned long flags; | 422 | unsigned long flags; |
436 | spin_lock_irqsave(&ioapic_lock, flags); | 423 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
437 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | 424 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); |
438 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | 425 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); |
439 | spin_unlock_irqrestore(&ioapic_lock, flags); | 426 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
440 | return eu.entry; | 427 | return eu.entry; |
441 | } | 428 | } |
442 | 429 | ||
@@ -459,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |||
459 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 446 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
460 | { | 447 | { |
461 | unsigned long flags; | 448 | unsigned long flags; |
462 | spin_lock_irqsave(&ioapic_lock, flags); | 449 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
463 | __ioapic_write_entry(apic, pin, e); | 450 | __ioapic_write_entry(apic, pin, e); |
464 | spin_unlock_irqrestore(&ioapic_lock, flags); | 451 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
465 | } | 452 | } |
466 | 453 | ||
467 | /* | 454 | /* |
@@ -474,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin) | |||
474 | unsigned long flags; | 461 | unsigned long flags; |
475 | union entry_union eu = { .entry.mask = 1 }; | 462 | union entry_union eu = { .entry.mask = 1 }; |
476 | 463 | ||
477 | spin_lock_irqsave(&ioapic_lock, flags); | 464 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
478 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 465 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
479 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | 466 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
480 | spin_unlock_irqrestore(&ioapic_lock, flags); | 467 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
481 | } | 468 | } |
482 | 469 | ||
483 | /* | 470 | /* |
@@ -604,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | |||
604 | 591 | ||
605 | BUG_ON(!cfg); | 592 | BUG_ON(!cfg); |
606 | 593 | ||
607 | spin_lock_irqsave(&ioapic_lock, flags); | 594 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
608 | __mask_IO_APIC_irq(cfg); | 595 | __mask_IO_APIC_irq(cfg); |
609 | spin_unlock_irqrestore(&ioapic_lock, flags); | 596 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
610 | } | 597 | } |
611 | 598 | ||
612 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | 599 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) |
@@ -614,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | |||
614 | struct irq_cfg *cfg = desc->chip_data; | 601 | struct irq_cfg *cfg = desc->chip_data; |
615 | unsigned long flags; | 602 | unsigned long flags; |
616 | 603 | ||
617 | spin_lock_irqsave(&ioapic_lock, flags); | 604 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
618 | __unmask_IO_APIC_irq(cfg); | 605 | __unmask_IO_APIC_irq(cfg); |
619 | spin_unlock_irqrestore(&ioapic_lock, flags); | 606 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
620 | } | 607 | } |
621 | 608 | ||
622 | static void mask_IO_APIC_irq(unsigned int irq) | 609 | static void mask_IO_APIC_irq(unsigned int irq) |
@@ -1140,12 +1127,12 @@ void lock_vector_lock(void) | |||
1140 | /* Used to the online set of cpus does not change | 1127 | /* Used to the online set of cpus does not change |
1141 | * during assign_irq_vector. | 1128 | * during assign_irq_vector. |
1142 | */ | 1129 | */ |
1143 | spin_lock(&vector_lock); | 1130 | raw_spin_lock(&vector_lock); |
1144 | } | 1131 | } |
1145 | 1132 | ||
1146 | void unlock_vector_lock(void) | 1133 | void unlock_vector_lock(void) |
1147 | { | 1134 | { |
1148 | spin_unlock(&vector_lock); | 1135 | raw_spin_unlock(&vector_lock); |
1149 | } | 1136 | } |
1150 | 1137 | ||
1151 | static int | 1138 | static int |
@@ -1162,7 +1149,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1162 | * Also, we've got to be careful not to trash gate | 1149 | * Also, we've got to be careful not to trash gate |
1163 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 1150 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
1164 | */ | 1151 | */ |
1165 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; | 1152 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; |
1153 | static int current_offset = VECTOR_OFFSET_START % 8; | ||
1166 | unsigned int old_vector; | 1154 | unsigned int old_vector; |
1167 | int cpu, err; | 1155 | int cpu, err; |
1168 | cpumask_var_t tmp_mask; | 1156 | cpumask_var_t tmp_mask; |
@@ -1198,7 +1186,7 @@ next: | |||
1198 | if (vector >= first_system_vector) { | 1186 | if (vector >= first_system_vector) { |
1199 | /* If out of vectors on large boxen, must share them. */ | 1187 | /* If out of vectors on large boxen, must share them. */ |
1200 | offset = (offset + 1) % 8; | 1188 | offset = (offset + 1) % 8; |
1201 | vector = FIRST_DEVICE_VECTOR + offset; | 1189 | vector = FIRST_EXTERNAL_VECTOR + offset; |
1202 | } | 1190 | } |
1203 | if (unlikely(current_vector == vector)) | 1191 | if (unlikely(current_vector == vector)) |
1204 | continue; | 1192 | continue; |
@@ -1232,9 +1220,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1232 | int err; | 1220 | int err; |
1233 | unsigned long flags; | 1221 | unsigned long flags; |
1234 | 1222 | ||
1235 | spin_lock_irqsave(&vector_lock, flags); | 1223 | raw_spin_lock_irqsave(&vector_lock, flags); |
1236 | err = __assign_irq_vector(irq, cfg, mask); | 1224 | err = __assign_irq_vector(irq, cfg, mask); |
1237 | spin_unlock_irqrestore(&vector_lock, flags); | 1225 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
1238 | return err; | 1226 | return err; |
1239 | } | 1227 | } |
1240 | 1228 | ||
@@ -1268,11 +1256,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1268 | void __setup_vector_irq(int cpu) | 1256 | void __setup_vector_irq(int cpu) |
1269 | { | 1257 | { |
1270 | /* Initialize vector_irq on a new cpu */ | 1258 | /* Initialize vector_irq on a new cpu */ |
1271 | /* This function must be called with vector_lock held */ | ||
1272 | int irq, vector; | 1259 | int irq, vector; |
1273 | struct irq_cfg *cfg; | 1260 | struct irq_cfg *cfg; |
1274 | struct irq_desc *desc; | 1261 | struct irq_desc *desc; |
1275 | 1262 | ||
1263 | /* | ||
1264 | * vector_lock will make sure that we don't run into irq vector | ||
1265 | * assignments that might be happening on another cpu in parallel, | ||
1266 | * while we setup our initial vector to irq mappings. | ||
1267 | */ | ||
1268 | raw_spin_lock(&vector_lock); | ||
1276 | /* Mark the inuse vectors */ | 1269 | /* Mark the inuse vectors */ |
1277 | for_each_irq_desc(irq, desc) { | 1270 | for_each_irq_desc(irq, desc) { |
1278 | cfg = desc->chip_data; | 1271 | cfg = desc->chip_data; |
@@ -1291,6 +1284,7 @@ void __setup_vector_irq(int cpu) | |||
1291 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1284 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1292 | per_cpu(vector_irq, cpu)[vector] = -1; | 1285 | per_cpu(vector_irq, cpu)[vector] = -1; |
1293 | } | 1286 | } |
1287 | raw_spin_unlock(&vector_lock); | ||
1294 | } | 1288 | } |
1295 | 1289 | ||
1296 | static struct irq_chip ioapic_chip; | 1290 | static struct irq_chip ioapic_chip; |
@@ -1440,6 +1434,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1440 | 1434 | ||
1441 | cfg = desc->chip_data; | 1435 | cfg = desc->chip_data; |
1442 | 1436 | ||
1437 | /* | ||
1438 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy | ||
1439 | * controllers like 8259. Now that IO-APIC can handle this irq, update | ||
1440 | * the cfg->domain. | ||
1441 | */ | ||
1442 | if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) | ||
1443 | apic->vector_allocation_domain(0, cfg->domain); | ||
1444 | |||
1443 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) | 1445 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
1444 | return; | 1446 | return; |
1445 | 1447 | ||
@@ -1473,7 +1475,7 @@ static struct { | |||
1473 | 1475 | ||
1474 | static void __init setup_IO_APIC_irqs(void) | 1476 | static void __init setup_IO_APIC_irqs(void) |
1475 | { | 1477 | { |
1476 | int apic_id = 0, pin, idx, irq; | 1478 | int apic_id, pin, idx, irq; |
1477 | int notcon = 0; | 1479 | int notcon = 0; |
1478 | struct irq_desc *desc; | 1480 | struct irq_desc *desc; |
1479 | struct irq_cfg *cfg; | 1481 | struct irq_cfg *cfg; |
@@ -1481,14 +1483,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
1481 | 1483 | ||
1482 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1484 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
1483 | 1485 | ||
1484 | #ifdef CONFIG_ACPI | 1486 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) |
1485 | if (!acpi_disabled && acpi_ioapic) { | ||
1486 | apic_id = mp_find_ioapic(0); | ||
1487 | if (apic_id < 0) | ||
1488 | apic_id = 0; | ||
1489 | } | ||
1490 | #endif | ||
1491 | |||
1492 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { | 1487 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { |
1493 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1488 | idx = find_irq_entry(apic_id, pin, mp_INT); |
1494 | if (idx == -1) { | 1489 | if (idx == -1) { |
@@ -1510,6 +1505,9 @@ static void __init setup_IO_APIC_irqs(void) | |||
1510 | 1505 | ||
1511 | irq = pin_2_irq(idx, apic_id, pin); | 1506 | irq = pin_2_irq(idx, apic_id, pin); |
1512 | 1507 | ||
1508 | if ((apic_id > 0) && (irq > 16)) | ||
1509 | continue; | ||
1510 | |||
1513 | /* | 1511 | /* |
1514 | * Skip the timer IRQ if there's a quirk handler | 1512 | * Skip the timer IRQ if there's a quirk handler |
1515 | * installed and if it returns 1: | 1513 | * installed and if it returns 1: |
@@ -1539,6 +1537,56 @@ static void __init setup_IO_APIC_irqs(void) | |||
1539 | } | 1537 | } |
1540 | 1538 | ||
1541 | /* | 1539 | /* |
1540 | * for the gsit that is not in first ioapic | ||
1541 | * but could not use acpi_register_gsi() | ||
1542 | * like some special sci in IBM x3330 | ||
1543 | */ | ||
1544 | void setup_IO_APIC_irq_extra(u32 gsi) | ||
1545 | { | ||
1546 | int apic_id = 0, pin, idx, irq; | ||
1547 | int node = cpu_to_node(boot_cpu_id); | ||
1548 | struct irq_desc *desc; | ||
1549 | struct irq_cfg *cfg; | ||
1550 | |||
1551 | /* | ||
1552 | * Convert 'gsi' to 'ioapic.pin'. | ||
1553 | */ | ||
1554 | apic_id = mp_find_ioapic(gsi); | ||
1555 | if (apic_id < 0) | ||
1556 | return; | ||
1557 | |||
1558 | pin = mp_find_ioapic_pin(apic_id, gsi); | ||
1559 | idx = find_irq_entry(apic_id, pin, mp_INT); | ||
1560 | if (idx == -1) | ||
1561 | return; | ||
1562 | |||
1563 | irq = pin_2_irq(idx, apic_id, pin); | ||
1564 | #ifdef CONFIG_SPARSE_IRQ | ||
1565 | desc = irq_to_desc(irq); | ||
1566 | if (desc) | ||
1567 | return; | ||
1568 | #endif | ||
1569 | desc = irq_to_desc_alloc_node(irq, node); | ||
1570 | if (!desc) { | ||
1571 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
1572 | return; | ||
1573 | } | ||
1574 | |||
1575 | cfg = desc->chip_data; | ||
1576 | add_pin_to_irq_node(cfg, node, apic_id, pin); | ||
1577 | |||
1578 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { | ||
1579 | pr_debug("Pin %d-%d already programmed\n", | ||
1580 | mp_ioapics[apic_id].apicid, pin); | ||
1581 | return; | ||
1582 | } | ||
1583 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); | ||
1584 | |||
1585 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | ||
1586 | irq_trigger(idx), irq_polarity(idx)); | ||
1587 | } | ||
1588 | |||
1589 | /* | ||
1542 | * Set up the timer pin, possibly with the 8259A-master behind. | 1590 | * Set up the timer pin, possibly with the 8259A-master behind. |
1543 | */ | 1591 | */ |
1544 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, | 1592 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, |
@@ -1601,14 +1649,14 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1601 | 1649 | ||
1602 | for (apic = 0; apic < nr_ioapics; apic++) { | 1650 | for (apic = 0; apic < nr_ioapics; apic++) { |
1603 | 1651 | ||
1604 | spin_lock_irqsave(&ioapic_lock, flags); | 1652 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
1605 | reg_00.raw = io_apic_read(apic, 0); | 1653 | reg_00.raw = io_apic_read(apic, 0); |
1606 | reg_01.raw = io_apic_read(apic, 1); | 1654 | reg_01.raw = io_apic_read(apic, 1); |
1607 | if (reg_01.bits.version >= 0x10) | 1655 | if (reg_01.bits.version >= 0x10) |
1608 | reg_02.raw = io_apic_read(apic, 2); | 1656 | reg_02.raw = io_apic_read(apic, 2); |
1609 | if (reg_01.bits.version >= 0x20) | 1657 | if (reg_01.bits.version >= 0x20) |
1610 | reg_03.raw = io_apic_read(apic, 3); | 1658 | reg_03.raw = io_apic_read(apic, 3); |
1611 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1659 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1612 | 1660 | ||
1613 | printk("\n"); | 1661 | printk("\n"); |
1614 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); | 1662 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); |
@@ -1830,7 +1878,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1830 | 1878 | ||
1831 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | 1879 | printk(KERN_DEBUG "\nprinting PIC contents\n"); |
1832 | 1880 | ||
1833 | spin_lock_irqsave(&i8259A_lock, flags); | 1881 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
1834 | 1882 | ||
1835 | v = inb(0xa1) << 8 | inb(0x21); | 1883 | v = inb(0xa1) << 8 | inb(0x21); |
1836 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); | 1884 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); |
@@ -1844,7 +1892,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1844 | outb(0x0a,0xa0); | 1892 | outb(0x0a,0xa0); |
1845 | outb(0x0a,0x20); | 1893 | outb(0x0a,0x20); |
1846 | 1894 | ||
1847 | spin_unlock_irqrestore(&i8259A_lock, flags); | 1895 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
1848 | 1896 | ||
1849 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); | 1897 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); |
1850 | 1898 | ||
@@ -1903,9 +1951,9 @@ void __init enable_IO_APIC(void) | |||
1903 | * The number of IO-APIC IRQ registers (== #pins): | 1951 | * The number of IO-APIC IRQ registers (== #pins): |
1904 | */ | 1952 | */ |
1905 | for (apic = 0; apic < nr_ioapics; apic++) { | 1953 | for (apic = 0; apic < nr_ioapics; apic++) { |
1906 | spin_lock_irqsave(&ioapic_lock, flags); | 1954 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
1907 | reg_01.raw = io_apic_read(apic, 1); | 1955 | reg_01.raw = io_apic_read(apic, 1); |
1908 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1956 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1909 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | 1957 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; |
1910 | } | 1958 | } |
1911 | 1959 | ||
@@ -2045,9 +2093,9 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2045 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { | 2093 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { |
2046 | 2094 | ||
2047 | /* Read the register 0 value */ | 2095 | /* Read the register 0 value */ |
2048 | spin_lock_irqsave(&ioapic_lock, flags); | 2096 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2049 | reg_00.raw = io_apic_read(apic_id, 0); | 2097 | reg_00.raw = io_apic_read(apic_id, 0); |
2050 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2098 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2051 | 2099 | ||
2052 | old_id = mp_ioapics[apic_id].apicid; | 2100 | old_id = mp_ioapics[apic_id].apicid; |
2053 | 2101 | ||
@@ -2106,16 +2154,16 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2106 | mp_ioapics[apic_id].apicid); | 2154 | mp_ioapics[apic_id].apicid); |
2107 | 2155 | ||
2108 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; | 2156 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; |
2109 | spin_lock_irqsave(&ioapic_lock, flags); | 2157 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2110 | io_apic_write(apic_id, 0, reg_00.raw); | 2158 | io_apic_write(apic_id, 0, reg_00.raw); |
2111 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2159 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2112 | 2160 | ||
2113 | /* | 2161 | /* |
2114 | * Sanity check | 2162 | * Sanity check |
2115 | */ | 2163 | */ |
2116 | spin_lock_irqsave(&ioapic_lock, flags); | 2164 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2117 | reg_00.raw = io_apic_read(apic_id, 0); | 2165 | reg_00.raw = io_apic_read(apic_id, 0); |
2118 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2166 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2119 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) | 2167 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) |
2120 | printk("could not set ID!\n"); | 2168 | printk("could not set ID!\n"); |
2121 | else | 2169 | else |
@@ -2198,7 +2246,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2198 | unsigned long flags; | 2246 | unsigned long flags; |
2199 | struct irq_cfg *cfg; | 2247 | struct irq_cfg *cfg; |
2200 | 2248 | ||
2201 | spin_lock_irqsave(&ioapic_lock, flags); | 2249 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2202 | if (irq < nr_legacy_irqs) { | 2250 | if (irq < nr_legacy_irqs) { |
2203 | disable_8259A_irq(irq); | 2251 | disable_8259A_irq(irq); |
2204 | if (i8259A_irq_pending(irq)) | 2252 | if (i8259A_irq_pending(irq)) |
@@ -2206,7 +2254,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2206 | } | 2254 | } |
2207 | cfg = irq_cfg(irq); | 2255 | cfg = irq_cfg(irq); |
2208 | __unmask_IO_APIC_irq(cfg); | 2256 | __unmask_IO_APIC_irq(cfg); |
2209 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2257 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2210 | 2258 | ||
2211 | return was_pending; | 2259 | return was_pending; |
2212 | } | 2260 | } |
@@ -2217,9 +2265,9 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2217 | struct irq_cfg *cfg = irq_cfg(irq); | 2265 | struct irq_cfg *cfg = irq_cfg(irq); |
2218 | unsigned long flags; | 2266 | unsigned long flags; |
2219 | 2267 | ||
2220 | spin_lock_irqsave(&vector_lock, flags); | 2268 | raw_spin_lock_irqsave(&vector_lock, flags); |
2221 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); | 2269 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); |
2222 | spin_unlock_irqrestore(&vector_lock, flags); | 2270 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
2223 | 2271 | ||
2224 | return 1; | 2272 | return 1; |
2225 | } | 2273 | } |
@@ -2312,14 +2360,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
2312 | irq = desc->irq; | 2360 | irq = desc->irq; |
2313 | cfg = desc->chip_data; | 2361 | cfg = desc->chip_data; |
2314 | 2362 | ||
2315 | spin_lock_irqsave(&ioapic_lock, flags); | 2363 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2316 | ret = set_desc_affinity(desc, mask, &dest); | 2364 | ret = set_desc_affinity(desc, mask, &dest); |
2317 | if (!ret) { | 2365 | if (!ret) { |
2318 | /* Only the high 8 bits are valid. */ | 2366 | /* Only the high 8 bits are valid. */ |
2319 | dest = SET_APIC_LOGICAL_ID(dest); | 2367 | dest = SET_APIC_LOGICAL_ID(dest); |
2320 | __target_IO_APIC_irq(irq, dest, cfg); | 2368 | __target_IO_APIC_irq(irq, dest, cfg); |
2321 | } | 2369 | } |
2322 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2370 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2323 | 2371 | ||
2324 | return ret; | 2372 | return ret; |
2325 | } | 2373 | } |
@@ -2554,9 +2602,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc) | |||
2554 | irq = desc->irq; | 2602 | irq = desc->irq; |
2555 | cfg = desc->chip_data; | 2603 | cfg = desc->chip_data; |
2556 | 2604 | ||
2557 | spin_lock_irqsave(&ioapic_lock, flags); | 2605 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2558 | __eoi_ioapic_irq(irq, cfg); | 2606 | __eoi_ioapic_irq(irq, cfg); |
2559 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2607 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2560 | } | 2608 | } |
2561 | 2609 | ||
2562 | static void ack_apic_level(unsigned int irq) | 2610 | static void ack_apic_level(unsigned int irq) |
@@ -3138,13 +3186,13 @@ static int ioapic_resume(struct sys_device *dev) | |||
3138 | data = container_of(dev, struct sysfs_ioapic_data, dev); | 3186 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
3139 | entry = data->entry; | 3187 | entry = data->entry; |
3140 | 3188 | ||
3141 | spin_lock_irqsave(&ioapic_lock, flags); | 3189 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3142 | reg_00.raw = io_apic_read(dev->id, 0); | 3190 | reg_00.raw = io_apic_read(dev->id, 0); |
3143 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { | 3191 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { |
3144 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; | 3192 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; |
3145 | io_apic_write(dev->id, 0, reg_00.raw); | 3193 | io_apic_write(dev->id, 0, reg_00.raw); |
3146 | } | 3194 | } |
3147 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3195 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3148 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | 3196 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) |
3149 | ioapic_write_entry(dev->id, i, entry[i]); | 3197 | ioapic_write_entry(dev->id, i, entry[i]); |
3150 | 3198 | ||
@@ -3207,7 +3255,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3207 | if (irq_want < nr_irqs_gsi) | 3255 | if (irq_want < nr_irqs_gsi) |
3208 | irq_want = nr_irqs_gsi; | 3256 | irq_want = nr_irqs_gsi; |
3209 | 3257 | ||
3210 | spin_lock_irqsave(&vector_lock, flags); | 3258 | raw_spin_lock_irqsave(&vector_lock, flags); |
3211 | for (new = irq_want; new < nr_irqs; new++) { | 3259 | for (new = irq_want; new < nr_irqs; new++) { |
3212 | desc_new = irq_to_desc_alloc_node(new, node); | 3260 | desc_new = irq_to_desc_alloc_node(new, node); |
3213 | if (!desc_new) { | 3261 | if (!desc_new) { |
@@ -3226,14 +3274,11 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3226 | irq = new; | 3274 | irq = new; |
3227 | break; | 3275 | break; |
3228 | } | 3276 | } |
3229 | spin_unlock_irqrestore(&vector_lock, flags); | 3277 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3278 | |||
3279 | if (irq > 0) | ||
3280 | dynamic_irq_init_keep_chip_data(irq); | ||
3230 | 3281 | ||
3231 | if (irq > 0) { | ||
3232 | dynamic_irq_init(irq); | ||
3233 | /* restore it, in case dynamic_irq_init clear it */ | ||
3234 | if (desc_new) | ||
3235 | desc_new->chip_data = cfg_new; | ||
3236 | } | ||
3237 | return irq; | 3282 | return irq; |
3238 | } | 3283 | } |
3239 | 3284 | ||
@@ -3255,20 +3300,13 @@ int create_irq(void) | |||
3255 | void destroy_irq(unsigned int irq) | 3300 | void destroy_irq(unsigned int irq) |
3256 | { | 3301 | { |
3257 | unsigned long flags; | 3302 | unsigned long flags; |
3258 | struct irq_cfg *cfg; | ||
3259 | struct irq_desc *desc; | ||
3260 | 3303 | ||
3261 | /* store it, in case dynamic_irq_cleanup clear it */ | 3304 | dynamic_irq_cleanup_keep_chip_data(irq); |
3262 | desc = irq_to_desc(irq); | ||
3263 | cfg = desc->chip_data; | ||
3264 | dynamic_irq_cleanup(irq); | ||
3265 | /* connect back irq_cfg */ | ||
3266 | desc->chip_data = cfg; | ||
3267 | 3305 | ||
3268 | free_irte(irq); | 3306 | free_irte(irq); |
3269 | spin_lock_irqsave(&vector_lock, flags); | 3307 | raw_spin_lock_irqsave(&vector_lock, flags); |
3270 | __clear_irq_vector(irq, cfg); | 3308 | __clear_irq_vector(irq, get_irq_chip_data(irq)); |
3271 | spin_unlock_irqrestore(&vector_lock, flags); | 3309 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3272 | } | 3310 | } |
3273 | 3311 | ||
3274 | /* | 3312 | /* |
@@ -3805,9 +3843,9 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3805 | union IO_APIC_reg_01 reg_01; | 3843 | union IO_APIC_reg_01 reg_01; |
3806 | unsigned long flags; | 3844 | unsigned long flags; |
3807 | 3845 | ||
3808 | spin_lock_irqsave(&ioapic_lock, flags); | 3846 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3809 | reg_01.raw = io_apic_read(ioapic, 1); | 3847 | reg_01.raw = io_apic_read(ioapic, 1); |
3810 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3848 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3811 | 3849 | ||
3812 | return reg_01.bits.entries; | 3850 | return reg_01.bits.entries; |
3813 | } | 3851 | } |
@@ -3969,9 +4007,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3969 | if (physids_empty(apic_id_map)) | 4007 | if (physids_empty(apic_id_map)) |
3970 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); | 4008 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); |
3971 | 4009 | ||
3972 | spin_lock_irqsave(&ioapic_lock, flags); | 4010 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3973 | reg_00.raw = io_apic_read(ioapic, 0); | 4011 | reg_00.raw = io_apic_read(ioapic, 0); |
3974 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4012 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3975 | 4013 | ||
3976 | if (apic_id >= get_physical_broadcast()) { | 4014 | if (apic_id >= get_physical_broadcast()) { |
3977 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " | 4015 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " |
@@ -4005,10 +4043,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
4005 | if (reg_00.bits.ID != apic_id) { | 4043 | if (reg_00.bits.ID != apic_id) { |
4006 | reg_00.bits.ID = apic_id; | 4044 | reg_00.bits.ID = apic_id; |
4007 | 4045 | ||
4008 | spin_lock_irqsave(&ioapic_lock, flags); | 4046 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
4009 | io_apic_write(ioapic, 0, reg_00.raw); | 4047 | io_apic_write(ioapic, 0, reg_00.raw); |
4010 | reg_00.raw = io_apic_read(ioapic, 0); | 4048 | reg_00.raw = io_apic_read(ioapic, 0); |
4011 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4049 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
4012 | 4050 | ||
4013 | /* Sanity check */ | 4051 | /* Sanity check */ |
4014 | if (reg_00.bits.ID != apic_id) { | 4052 | if (reg_00.bits.ID != apic_id) { |
@@ -4029,9 +4067,9 @@ int __init io_apic_get_version(int ioapic) | |||
4029 | union IO_APIC_reg_01 reg_01; | 4067 | union IO_APIC_reg_01 reg_01; |
4030 | unsigned long flags; | 4068 | unsigned long flags; |
4031 | 4069 | ||
4032 | spin_lock_irqsave(&ioapic_lock, flags); | 4070 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
4033 | reg_01.raw = io_apic_read(ioapic, 1); | 4071 | reg_01.raw = io_apic_read(ioapic, 1); |
4034 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4072 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
4035 | 4073 | ||
4036 | return reg_01.bits.version; | 4074 | return reg_01.bits.version; |
4037 | } | 4075 | } |
@@ -4063,27 +4101,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
4063 | #ifdef CONFIG_SMP | 4101 | #ifdef CONFIG_SMP |
4064 | void __init setup_ioapic_dest(void) | 4102 | void __init setup_ioapic_dest(void) |
4065 | { | 4103 | { |
4066 | int pin, ioapic = 0, irq, irq_entry; | 4104 | int pin, ioapic, irq, irq_entry; |
4067 | struct irq_desc *desc; | 4105 | struct irq_desc *desc; |
4068 | const struct cpumask *mask; | 4106 | const struct cpumask *mask; |
4069 | 4107 | ||
4070 | if (skip_ioapic_setup == 1) | 4108 | if (skip_ioapic_setup == 1) |
4071 | return; | 4109 | return; |
4072 | 4110 | ||
4073 | #ifdef CONFIG_ACPI | 4111 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) |
4074 | if (!acpi_disabled && acpi_ioapic) { | ||
4075 | ioapic = mp_find_ioapic(0); | ||
4076 | if (ioapic < 0) | ||
4077 | ioapic = 0; | ||
4078 | } | ||
4079 | #endif | ||
4080 | |||
4081 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { | 4112 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { |
4082 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | 4113 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); |
4083 | if (irq_entry == -1) | 4114 | if (irq_entry == -1) |
4084 | continue; | 4115 | continue; |
4085 | irq = pin_2_irq(irq_entry, ioapic, pin); | 4116 | irq = pin_2_irq(irq_entry, ioapic, pin); |
4086 | 4117 | ||
4118 | if ((ioapic > 0) && (irq > 16)) | ||
4119 | continue; | ||
4120 | |||
4087 | desc = irq_to_desc(irq); | 4121 | desc = irq_to_desc(irq); |
4088 | 4122 | ||
4089 | /* | 4123 | /* |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 0159a69396cb..bd7c96b5e8d8 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -416,13 +416,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
416 | 416 | ||
417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
418 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | 418 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 419 | static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ |
420 | 420 | ||
421 | spin_lock(&lock); | 421 | raw_spin_lock(&lock); |
422 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 422 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
423 | show_regs(regs); | 423 | show_regs(regs); |
424 | dump_stack(); | 424 | dump_stack(); |
425 | spin_unlock(&lock); | 425 | raw_spin_unlock(&lock); |
426 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | 426 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
427 | 427 | ||
428 | rc = 1; | 428 | rc = 1; |
@@ -438,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
438 | * Ayiee, looks like this CPU is stuck ... | 438 | * Ayiee, looks like this CPU is stuck ... |
439 | * wait a few IRQs (5 seconds) before doing the oops ... | 439 | * wait a few IRQs (5 seconds) before doing the oops ... |
440 | */ | 440 | */ |
441 | __this_cpu_inc(per_cpu_var(alert_counter)); | 441 | __this_cpu_inc(alert_counter); |
442 | if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) | 442 | if (__this_cpu_read(alert_counter) == 5 * nmi_hz) |
443 | /* | 443 | /* |
444 | * die_nmi will return ONLY if NOTIFY_STOP happens.. | 444 | * die_nmi will return ONLY if NOTIFY_STOP happens.. |
445 | */ | 445 | */ |
@@ -447,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
447 | regs, panic_on_timeout); | 447 | regs, panic_on_timeout); |
448 | } else { | 448 | } else { |
449 | __get_cpu_var(last_irq_sum) = sum; | 449 | __get_cpu_var(last_irq_sum) = sum; |
450 | __this_cpu_write(per_cpu_var(alert_counter), 0); | 450 | __this_cpu_write(alert_counter, 0); |
451 | } | 451 | } |
452 | 452 | ||
453 | /* see if the nmi watchdog went off */ | 453 | /* see if the nmi watchdog went off */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6e44519960c8..d360b56e9825 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
806 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | 806 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, |
807 | unsigned int index) | 807 | unsigned int index) |
808 | { | 808 | { |
809 | acpi_integer control; | 809 | u64 control; |
810 | 810 | ||
811 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 811 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
812 | return; | 812 | return; |
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
824 | { | 824 | { |
825 | struct cpufreq_frequency_table *powernow_table; | 825 | struct cpufreq_frequency_table *powernow_table; |
826 | int ret_val = -ENODEV; | 826 | int ret_val = -ENODEV; |
827 | acpi_integer control, status; | 827 | u64 control, status; |
828 | 828 | ||
829 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 829 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
830 | dprintk("register performance failed: bad ACPI data\n"); | 830 | dprintk("register performance failed: bad ACPI data\n"); |
@@ -948,7 +948,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
948 | u32 fid; | 948 | u32 fid; |
949 | u32 vid; | 949 | u32 vid; |
950 | u32 freq, index; | 950 | u32 freq, index; |
951 | acpi_integer status, control; | 951 | u64 status, control; |
952 | 952 | ||
953 | if (data->exttype) { | 953 | if (data->exttype) { |
954 | status = data->acpi_data.states[i].status; | 954 | status = data->acpi_data.states[i].status; |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 09b1698e0466..06130b52f012 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -22,10 +22,10 @@ | |||
22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/sort.h> | ||
26 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
28 | #include <linux/kvm_para.h> | 27 | #include <linux/kvm_para.h> |
28 | #include <linux/range.h> | ||
29 | 29 | ||
30 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
31 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
@@ -34,11 +34,6 @@ | |||
34 | 34 | ||
35 | #include "mtrr.h" | 35 | #include "mtrr.h" |
36 | 36 | ||
37 | struct res_range { | ||
38 | unsigned long start; | ||
39 | unsigned long end; | ||
40 | }; | ||
41 | |||
42 | struct var_mtrr_range_state { | 37 | struct var_mtrr_range_state { |
43 | unsigned long base_pfn; | 38 | unsigned long base_pfn; |
44 | unsigned long size_pfn; | 39 | unsigned long size_pfn; |
@@ -56,7 +51,7 @@ struct var_mtrr_state { | |||
56 | /* Should be related to MTRR_VAR_RANGES nums */ | 51 | /* Should be related to MTRR_VAR_RANGES nums */ |
57 | #define RANGE_NUM 256 | 52 | #define RANGE_NUM 256 |
58 | 53 | ||
59 | static struct res_range __initdata range[RANGE_NUM]; | 54 | static struct range __initdata range[RANGE_NUM]; |
60 | static int __initdata nr_range; | 55 | static int __initdata nr_range; |
61 | 56 | ||
62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | 57 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; |
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | |||
64 | static int __initdata debug_print; | 59 | static int __initdata debug_print; |
65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | 60 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) |
66 | 61 | ||
67 | |||
68 | static int __init | ||
69 | add_range(struct res_range *range, int nr_range, | ||
70 | unsigned long start, unsigned long end) | ||
71 | { | ||
72 | /* Out of slots: */ | ||
73 | if (nr_range >= RANGE_NUM) | ||
74 | return nr_range; | ||
75 | |||
76 | range[nr_range].start = start; | ||
77 | range[nr_range].end = end; | ||
78 | |||
79 | nr_range++; | ||
80 | |||
81 | return nr_range; | ||
82 | } | ||
83 | |||
84 | static int __init | ||
85 | add_range_with_merge(struct res_range *range, int nr_range, | ||
86 | unsigned long start, unsigned long end) | ||
87 | { | ||
88 | int i; | ||
89 | |||
90 | /* Try to merge it with old one: */ | ||
91 | for (i = 0; i < nr_range; i++) { | ||
92 | unsigned long final_start, final_end; | ||
93 | unsigned long common_start, common_end; | ||
94 | |||
95 | if (!range[i].end) | ||
96 | continue; | ||
97 | |||
98 | common_start = max(range[i].start, start); | ||
99 | common_end = min(range[i].end, end); | ||
100 | if (common_start > common_end + 1) | ||
101 | continue; | ||
102 | |||
103 | final_start = min(range[i].start, start); | ||
104 | final_end = max(range[i].end, end); | ||
105 | |||
106 | range[i].start = final_start; | ||
107 | range[i].end = final_end; | ||
108 | return nr_range; | ||
109 | } | ||
110 | |||
111 | /* Need to add it: */ | ||
112 | return add_range(range, nr_range, start, end); | ||
113 | } | ||
114 | |||
115 | static void __init | ||
116 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
117 | { | ||
118 | int i, j; | ||
119 | |||
120 | for (j = 0; j < RANGE_NUM; j++) { | ||
121 | if (!range[j].end) | ||
122 | continue; | ||
123 | |||
124 | if (start <= range[j].start && end >= range[j].end) { | ||
125 | range[j].start = 0; | ||
126 | range[j].end = 0; | ||
127 | continue; | ||
128 | } | ||
129 | |||
130 | if (start <= range[j].start && end < range[j].end && | ||
131 | range[j].start < end + 1) { | ||
132 | range[j].start = end + 1; | ||
133 | continue; | ||
134 | } | ||
135 | |||
136 | |||
137 | if (start > range[j].start && end >= range[j].end && | ||
138 | range[j].end > start - 1) { | ||
139 | range[j].end = start - 1; | ||
140 | continue; | ||
141 | } | ||
142 | |||
143 | if (start > range[j].start && end < range[j].end) { | ||
144 | /* Find the new spare: */ | ||
145 | for (i = 0; i < RANGE_NUM; i++) { | ||
146 | if (range[i].end == 0) | ||
147 | break; | ||
148 | } | ||
149 | if (i < RANGE_NUM) { | ||
150 | range[i].end = range[j].end; | ||
151 | range[i].start = end + 1; | ||
152 | } else { | ||
153 | printk(KERN_ERR "run of slot in ranges\n"); | ||
154 | } | ||
155 | range[j].end = start - 1; | ||
156 | continue; | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static int __init cmp_range(const void *x1, const void *x2) | ||
162 | { | ||
163 | const struct res_range *r1 = x1; | ||
164 | const struct res_range *r2 = x2; | ||
165 | long start1, start2; | ||
166 | |||
167 | start1 = r1->start; | ||
168 | start2 = r2->start; | ||
169 | |||
170 | return start1 - start2; | ||
171 | } | ||
172 | |||
173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
174 | { | ||
175 | int i, j, k = az - 1, nr_range = 0; | ||
176 | |||
177 | for (i = 0; i < k; i++) { | ||
178 | if (range[i].end) | ||
179 | continue; | ||
180 | for (j = k; j > i; j--) { | ||
181 | if (range[j].end) { | ||
182 | k = j; | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | if (j == i) | ||
187 | break; | ||
188 | range[i].start = range[k].start; | ||
189 | range[i].end = range[k].end; | ||
190 | range[k].start = 0; | ||
191 | range[k].end = 0; | ||
192 | k--; | ||
193 | } | ||
194 | /* count it */ | ||
195 | for (i = 0; i < az; i++) { | ||
196 | if (!range[i].end) { | ||
197 | nr_range = i; | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* sort them */ | ||
203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
204 | |||
205 | return nr_range; | ||
206 | } | ||
207 | |||
208 | #define BIOS_BUG_MSG KERN_WARNING \ | 62 | #define BIOS_BUG_MSG KERN_WARNING \ |
209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 63 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
210 | 64 | ||
211 | static int __init | 65 | static int __init |
212 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 66 | x86_get_mtrr_mem_range(struct range *range, int nr_range, |
213 | unsigned long extra_remove_base, | 67 | unsigned long extra_remove_base, |
214 | unsigned long extra_remove_size) | 68 | unsigned long extra_remove_size) |
215 | { | 69 | { |
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
223 | continue; | 77 | continue; |
224 | base = range_state[i].base_pfn; | 78 | base = range_state[i].base_pfn; |
225 | size = range_state[i].size_pfn; | 79 | size = range_state[i].size_pfn; |
226 | nr_range = add_range_with_merge(range, nr_range, base, | 80 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, |
227 | base + size - 1); | 81 | base, base + size); |
228 | } | 82 | } |
229 | if (debug_print) { | 83 | if (debug_print) { |
230 | printk(KERN_DEBUG "After WB checking\n"); | 84 | printk(KERN_DEBUG "After WB checking\n"); |
231 | for (i = 0; i < nr_range; i++) | 85 | for (i = 0; i < nr_range; i++) |
232 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 86 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
233 | range[i].start, range[i].end + 1); | 87 | range[i].start, range[i].end); |
234 | } | 88 | } |
235 | 89 | ||
236 | /* Take out UC ranges: */ | 90 | /* Take out UC ranges: */ |
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
252 | size -= (1<<(20-PAGE_SHIFT)) - base; | 106 | size -= (1<<(20-PAGE_SHIFT)) - base; |
253 | base = 1<<(20-PAGE_SHIFT); | 107 | base = 1<<(20-PAGE_SHIFT); |
254 | } | 108 | } |
255 | subtract_range(range, base, base + size - 1); | 109 | subtract_range(range, RANGE_NUM, base, base + size); |
256 | } | 110 | } |
257 | if (extra_remove_size) | 111 | if (extra_remove_size) |
258 | subtract_range(range, extra_remove_base, | 112 | subtract_range(range, RANGE_NUM, extra_remove_base, |
259 | extra_remove_base + extra_remove_size - 1); | 113 | extra_remove_base + extra_remove_size); |
260 | 114 | ||
261 | if (debug_print) { | 115 | if (debug_print) { |
262 | printk(KERN_DEBUG "After UC checking\n"); | 116 | printk(KERN_DEBUG "After UC checking\n"); |
263 | for (i = 0; i < RANGE_NUM; i++) { | 117 | for (i = 0; i < RANGE_NUM; i++) { |
264 | if (!range[i].end) | 118 | if (!range[i].end) |
265 | continue; | 119 | continue; |
266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 120 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
267 | range[i].start, range[i].end + 1); | 121 | range[i].start, range[i].end); |
268 | } | 122 | } |
269 | } | 123 | } |
270 | 124 | ||
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
273 | if (debug_print) { | 127 | if (debug_print) { |
274 | printk(KERN_DEBUG "After sorting\n"); | 128 | printk(KERN_DEBUG "After sorting\n"); |
275 | for (i = 0; i < nr_range; i++) | 129 | for (i = 0; i < nr_range; i++) |
276 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 130 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
277 | range[i].start, range[i].end + 1); | 131 | range[i].start, range[i].end); |
278 | } | 132 | } |
279 | 133 | ||
280 | /* clear those is not used */ | ||
281 | for (i = nr_range; i < RANGE_NUM; i++) | ||
282 | memset(&range[i], 0, sizeof(range[i])); | ||
283 | |||
284 | return nr_range; | 134 | return nr_range; |
285 | } | 135 | } |
286 | 136 | ||
287 | #ifdef CONFIG_MTRR_SANITIZER | 137 | #ifdef CONFIG_MTRR_SANITIZER |
288 | 138 | ||
289 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 139 | static unsigned long __init sum_ranges(struct range *range, int nr_range) |
290 | { | 140 | { |
291 | unsigned long sum = 0; | 141 | unsigned long sum = 0; |
292 | int i; | 142 | int i; |
293 | 143 | ||
294 | for (i = 0; i < nr_range; i++) | 144 | for (i = 0; i < nr_range; i++) |
295 | sum += range[i].end + 1 - range[i].start; | 145 | sum += range[i].end - range[i].start; |
296 | 146 | ||
297 | return sum; | 147 | return sum; |
298 | } | 148 | } |
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
621 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 471 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
622 | 472 | ||
623 | static int __init | 473 | static int __init |
624 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | 474 | x86_setup_var_mtrrs(struct range *range, int nr_range, |
625 | u64 chunk_size, u64 gran_size) | 475 | u64 chunk_size, u64 gran_size) |
626 | { | 476 | { |
627 | struct var_mtrr_state var_state; | 477 | struct var_mtrr_state var_state; |
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
639 | /* Write the range: */ | 489 | /* Write the range: */ |
640 | for (i = 0; i < nr_range; i++) { | 490 | for (i = 0; i < nr_range; i++) { |
641 | set_var_mtrr_range(&var_state, range[i].start, | 491 | set_var_mtrr_range(&var_state, range[i].start, |
642 | range[i].end - range[i].start + 1); | 492 | range[i].end - range[i].start); |
643 | } | 493 | } |
644 | 494 | ||
645 | /* Write the last range: */ | 495 | /* Write the last range: */ |
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | |||
742 | unsigned long x_remove_base, | 592 | unsigned long x_remove_base, |
743 | unsigned long x_remove_size, int i) | 593 | unsigned long x_remove_size, int i) |
744 | { | 594 | { |
745 | static struct res_range range_new[RANGE_NUM]; | 595 | static struct range range_new[RANGE_NUM]; |
746 | unsigned long range_sums_new; | 596 | unsigned long range_sums_new; |
747 | static int nr_range_new; | 597 | static int nr_range_new; |
748 | int num_reg; | 598 | int num_reg; |
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
869 | * [0, 1M) should always be covered by var mtrr with WB | 719 | * [0, 1M) should always be covered by var mtrr with WB |
870 | * and fixed mtrrs should take effect before var mtrr for it: | 720 | * and fixed mtrrs should take effect before var mtrr for it: |
871 | */ | 721 | */ |
872 | nr_range = add_range_with_merge(range, nr_range, 0, | 722 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, |
873 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 723 | 1ULL<<(20 - PAGE_SHIFT)); |
874 | /* Sort the ranges: */ | 724 | /* Sort the ranges: */ |
875 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 725 | sort_range(range, nr_range); |
876 | 726 | ||
877 | range_sums = sum_ranges(range, nr_range); | 727 | range_sums = sum_ranges(range, nr_range); |
878 | printk(KERN_INFO "total RAM covered: %ldM\n", | 728 | printk(KERN_INFO "total RAM covered: %ldM\n", |
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1089 | nr_range = 0; | 939 | nr_range = 0; |
1090 | if (mtrr_tom2) { | 940 | if (mtrr_tom2) { |
1091 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | 941 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); |
1092 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | 942 | range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT; |
1093 | if (highest_pfn < range[nr_range].end + 1) | 943 | if (highest_pfn < range[nr_range].end) |
1094 | highest_pfn = range[nr_range].end + 1; | 944 | highest_pfn = range[nr_range].end; |
1095 | nr_range++; | 945 | nr_range++; |
1096 | } | 946 | } |
1097 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 947 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1103 | 953 | ||
1104 | /* Check the holes: */ | 954 | /* Check the holes: */ |
1105 | for (i = 0; i < nr_range - 1; i++) { | 955 | for (i = 0; i < nr_range - 1; i++) { |
1106 | if (range[i].end + 1 < range[i+1].start) | 956 | if (range[i].end < range[i+1].start) |
1107 | total_trim_size += real_trim_memory(range[i].end + 1, | 957 | total_trim_size += real_trim_memory(range[i].end, |
1108 | range[i+1].start); | 958 | range[i+1].start); |
1109 | } | 959 | } |
1110 | 960 | ||
1111 | /* Check the top: */ | 961 | /* Check the top: */ |
1112 | i = nr_range - 1; | 962 | i = nr_range - 1; |
1113 | if (range[i].end + 1 < end_pfn) | 963 | if (range[i].end < end_pfn) |
1114 | total_trim_size += real_trim_memory(range[i].end + 1, | 964 | total_trim_size += real_trim_memory(range[i].end, |
1115 | end_pfn); | 965 | end_pfn); |
1116 | 966 | ||
1117 | if (total_trim_size) { | 967 | if (total_trim_size) { |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index fe4622e8c837..79556bd9b602 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -145,6 +145,7 @@ struct set_mtrr_data { | |||
145 | 145 | ||
146 | /** | 146 | /** |
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | 147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. |
148 | * @info: pointer to mtrr configuration data | ||
148 | * | 149 | * |
149 | * Returns nothing. | 150 | * Returns nothing. |
150 | */ | 151 | */ |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a966b753e496..740b440fbd73 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -12,21 +12,13 @@ | |||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/ioport.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/kexec.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
21 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
22 | #include <linux/firmware-map.h> | 17 | #include <linux/firmware-map.h> |
23 | 18 | ||
24 | #include <asm/pgtable.h> | ||
25 | #include <asm/page.h> | ||
26 | #include <asm/e820.h> | 19 | #include <asm/e820.h> |
27 | #include <asm/proto.h> | 20 | #include <asm/proto.h> |
28 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
29 | #include <asm/trampoline.h> | ||
30 | 22 | ||
31 | /* | 23 | /* |
32 | * The e820 map is the map that gets modified e.g. with command line parameters | 24 | * The e820 map is the map that gets modified e.g. with command line parameters |
@@ -730,319 +722,44 @@ core_initcall(e820_mark_nvs_memory); | |||
730 | #endif | 722 | #endif |
731 | 723 | ||
732 | /* | 724 | /* |
733 | * Early reserved memory areas. | 725 | * Find a free area with specified alignment in a specific range. |
734 | */ | ||
735 | #define MAX_EARLY_RES 32 | ||
736 | |||
737 | struct early_res { | ||
738 | u64 start, end; | ||
739 | char name[16]; | ||
740 | char overlap_ok; | ||
741 | }; | ||
742 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
743 | { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ | ||
744 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE) | ||
745 | /* | ||
746 | * But first pinch a few for the stack/trampoline stuff | ||
747 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
748 | * trampoline before removing it. (see the GDT stuff) | ||
749 | */ | ||
750 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 }, | ||
751 | #endif | ||
752 | |||
753 | {} | ||
754 | }; | ||
755 | |||
756 | static int __init find_overlapped_early(u64 start, u64 end) | ||
757 | { | ||
758 | int i; | ||
759 | struct early_res *r; | ||
760 | |||
761 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
762 | r = &early_res[i]; | ||
763 | if (end > r->start && start < r->end) | ||
764 | break; | ||
765 | } | ||
766 | |||
767 | return i; | ||
768 | } | ||
769 | |||
770 | /* | ||
771 | * Drop the i-th range from the early reservation map, | ||
772 | * by copying any higher ranges down one over it, and | ||
773 | * clearing what had been the last slot. | ||
774 | */ | ||
775 | static void __init drop_range(int i) | ||
776 | { | ||
777 | int j; | ||
778 | |||
779 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
780 | ; | ||
781 | |||
782 | memmove(&early_res[i], &early_res[i + 1], | ||
783 | (j - 1 - i) * sizeof(struct early_res)); | ||
784 | |||
785 | early_res[j - 1].end = 0; | ||
786 | } | ||
787 | |||
788 | /* | ||
789 | * Split any existing ranges that: | ||
790 | * 1) are marked 'overlap_ok', and | ||
791 | * 2) overlap with the stated range [start, end) | ||
792 | * into whatever portion (if any) of the existing range is entirely | ||
793 | * below or entirely above the stated range. Drop the portion | ||
794 | * of the existing range that overlaps with the stated range, | ||
795 | * which will allow the caller of this routine to then add that | ||
796 | * stated range without conflicting with any existing range. | ||
797 | */ | 726 | */ |
798 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | 727 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) |
799 | { | 728 | { |
800 | int i; | 729 | int i; |
801 | struct early_res *r; | ||
802 | u64 lower_start, lower_end; | ||
803 | u64 upper_start, upper_end; | ||
804 | char name[16]; | ||
805 | 730 | ||
806 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 731 | for (i = 0; i < e820.nr_map; i++) { |
807 | r = &early_res[i]; | 732 | struct e820entry *ei = &e820.map[i]; |
733 | u64 addr; | ||
734 | u64 ei_start, ei_last; | ||
808 | 735 | ||
809 | /* Continue past non-overlapping ranges */ | 736 | if (ei->type != E820_RAM) |
810 | if (end <= r->start || start >= r->end) | ||
811 | continue; | 737 | continue; |
812 | 738 | ||
813 | /* | 739 | ei_last = ei->addr + ei->size; |
814 | * Leave non-ok overlaps as is; let caller | 740 | ei_start = ei->addr; |
815 | * panic "Overlapping early reservations" | 741 | addr = find_early_area(ei_start, ei_last, start, end, |
816 | * when it hits this overlap. | 742 | size, align); |
817 | */ | ||
818 | if (!r->overlap_ok) | ||
819 | return; | ||
820 | |||
821 | /* | ||
822 | * We have an ok overlap. We will drop it from the early | ||
823 | * reservation map, and add back in any non-overlapping | ||
824 | * portions (lower or upper) as separate, overlap_ok, | ||
825 | * non-overlapping ranges. | ||
826 | */ | ||
827 | |||
828 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | ||
829 | strncpy(name, r->name, sizeof(name) - 1); | ||
830 | |||
831 | lower_start = lower_end = 0; | ||
832 | upper_start = upper_end = 0; | ||
833 | if (r->start < start) { | ||
834 | lower_start = r->start; | ||
835 | lower_end = start; | ||
836 | } | ||
837 | if (r->end > end) { | ||
838 | upper_start = end; | ||
839 | upper_end = r->end; | ||
840 | } | ||
841 | |||
842 | /* 2. Drop the original ok overlapping range */ | ||
843 | drop_range(i); | ||
844 | |||
845 | i--; /* resume for-loop on copied down entry */ | ||
846 | |||
847 | /* 3. Add back in any non-overlapping ranges. */ | ||
848 | if (lower_end) | ||
849 | reserve_early_overlap_ok(lower_start, lower_end, name); | ||
850 | if (upper_end) | ||
851 | reserve_early_overlap_ok(upper_start, upper_end, name); | ||
852 | } | ||
853 | } | ||
854 | |||
855 | static void __init __reserve_early(u64 start, u64 end, char *name, | ||
856 | int overlap_ok) | ||
857 | { | ||
858 | int i; | ||
859 | struct early_res *r; | ||
860 | |||
861 | i = find_overlapped_early(start, end); | ||
862 | if (i >= MAX_EARLY_RES) | ||
863 | panic("Too many early reservations"); | ||
864 | r = &early_res[i]; | ||
865 | if (r->end) | ||
866 | panic("Overlapping early reservations " | ||
867 | "%llx-%llx %s to %llx-%llx %s\n", | ||
868 | start, end - 1, name?name:"", r->start, | ||
869 | r->end - 1, r->name); | ||
870 | r->start = start; | ||
871 | r->end = end; | ||
872 | r->overlap_ok = overlap_ok; | ||
873 | if (name) | ||
874 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * A few early reservtations come here. | ||
879 | * | ||
880 | * The 'overlap_ok' in the name of this routine does -not- mean it | ||
881 | * is ok for these reservations to overlap an earlier reservation. | ||
882 | * Rather it means that it is ok for subsequent reservations to | ||
883 | * overlap this one. | ||
884 | * | ||
885 | * Use this entry point to reserve early ranges when you are doing | ||
886 | * so out of "Paranoia", reserving perhaps more memory than you need, | ||
887 | * just in case, and don't mind a subsequent overlapping reservation | ||
888 | * that is known to be needed. | ||
889 | * | ||
890 | * The drop_overlaps_that_are_ok() call here isn't really needed. | ||
891 | * It would be needed if we had two colliding 'overlap_ok' | ||
892 | * reservations, so that the second such would not panic on the | ||
893 | * overlap with the first. We don't have any such as of this | ||
894 | * writing, but might as well tolerate such if it happens in | ||
895 | * the future. | ||
896 | */ | ||
897 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | ||
898 | { | ||
899 | drop_overlaps_that_are_ok(start, end); | ||
900 | __reserve_early(start, end, name, 1); | ||
901 | } | ||
902 | |||
903 | /* | ||
904 | * Most early reservations come here. | ||
905 | * | ||
906 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | ||
907 | * 'overlap_ok' ranges, so that we can then reserve this memory | ||
908 | * range without risk of panic'ing on an overlapping overlap_ok | ||
909 | * early reservation. | ||
910 | */ | ||
911 | void __init reserve_early(u64 start, u64 end, char *name) | ||
912 | { | ||
913 | if (start >= end) | ||
914 | return; | ||
915 | |||
916 | drop_overlaps_that_are_ok(start, end); | ||
917 | __reserve_early(start, end, name, 0); | ||
918 | } | ||
919 | |||
920 | void __init free_early(u64 start, u64 end) | ||
921 | { | ||
922 | struct early_res *r; | ||
923 | int i; | ||
924 | |||
925 | i = find_overlapped_early(start, end); | ||
926 | r = &early_res[i]; | ||
927 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | ||
928 | panic("free_early on not reserved area: %llx-%llx!", | ||
929 | start, end - 1); | ||
930 | |||
931 | drop_range(i); | ||
932 | } | ||
933 | |||
934 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
935 | { | ||
936 | int i, count; | ||
937 | u64 final_start, final_end; | ||
938 | |||
939 | count = 0; | ||
940 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | ||
941 | count++; | ||
942 | |||
943 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", | ||
944 | count, start, end); | ||
945 | for (i = 0; i < count; i++) { | ||
946 | struct early_res *r = &early_res[i]; | ||
947 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | ||
948 | r->start, r->end, r->name); | ||
949 | final_start = max(start, r->start); | ||
950 | final_end = min(end, r->end); | ||
951 | if (final_start >= final_end) { | ||
952 | printk(KERN_CONT "\n"); | ||
953 | continue; | ||
954 | } | ||
955 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", | ||
956 | final_start, final_end); | ||
957 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
958 | BOOTMEM_DEFAULT); | ||
959 | } | ||
960 | } | ||
961 | 743 | ||
962 | /* Check for already reserved areas */ | 744 | if (addr != -1ULL) |
963 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | 745 | return addr; |
964 | { | ||
965 | int i; | ||
966 | u64 addr = *addrp; | ||
967 | int changed = 0; | ||
968 | struct early_res *r; | ||
969 | again: | ||
970 | i = find_overlapped_early(addr, addr + size); | ||
971 | r = &early_res[i]; | ||
972 | if (i < MAX_EARLY_RES && r->end) { | ||
973 | *addrp = addr = round_up(r->end, align); | ||
974 | changed = 1; | ||
975 | goto again; | ||
976 | } | 746 | } |
977 | return changed; | 747 | return -1ULL; |
978 | } | 748 | } |
979 | 749 | ||
980 | /* Check for already reserved areas */ | 750 | u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) |
981 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
982 | { | 751 | { |
983 | int i; | 752 | return find_e820_area(start, end, size, align); |
984 | u64 addr = *addrp, last; | ||
985 | u64 size = *sizep; | ||
986 | int changed = 0; | ||
987 | again: | ||
988 | last = addr + size; | ||
989 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
990 | struct early_res *r = &early_res[i]; | ||
991 | if (last > r->start && addr < r->start) { | ||
992 | size = r->start - addr; | ||
993 | changed = 1; | ||
994 | goto again; | ||
995 | } | ||
996 | if (last > r->end && addr < r->end) { | ||
997 | addr = round_up(r->end, align); | ||
998 | size = last - addr; | ||
999 | changed = 1; | ||
1000 | goto again; | ||
1001 | } | ||
1002 | if (last <= r->end && addr >= r->start) { | ||
1003 | (*sizep)++; | ||
1004 | return 0; | ||
1005 | } | ||
1006 | } | ||
1007 | if (changed) { | ||
1008 | *addrp = addr; | ||
1009 | *sizep = size; | ||
1010 | } | ||
1011 | return changed; | ||
1012 | } | 753 | } |
1013 | 754 | ||
1014 | /* | 755 | u64 __init get_max_mapped(void) |
1015 | * Find a free area with specified alignment in a specific range. | ||
1016 | */ | ||
1017 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
1018 | { | 756 | { |
1019 | int i; | 757 | u64 end = max_pfn_mapped; |
1020 | 758 | ||
1021 | for (i = 0; i < e820.nr_map; i++) { | 759 | end <<= PAGE_SHIFT; |
1022 | struct e820entry *ei = &e820.map[i]; | ||
1023 | u64 addr, last; | ||
1024 | u64 ei_last; | ||
1025 | 760 | ||
1026 | if (ei->type != E820_RAM) | 761 | return end; |
1027 | continue; | ||
1028 | addr = round_up(ei->addr, align); | ||
1029 | ei_last = ei->addr + ei->size; | ||
1030 | if (addr < start) | ||
1031 | addr = round_up(start, align); | ||
1032 | if (addr >= ei_last) | ||
1033 | continue; | ||
1034 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
1035 | ; | ||
1036 | last = addr + size; | ||
1037 | if (last > ei_last) | ||
1038 | continue; | ||
1039 | if (last > end) | ||
1040 | continue; | ||
1041 | return addr; | ||
1042 | } | ||
1043 | return -1ULL; | ||
1044 | } | 762 | } |
1045 | |||
1046 | /* | 763 | /* |
1047 | * Find next free range after *start | 764 | * Find next free range after *start |
1048 | */ | 765 | */ |
@@ -1052,25 +769,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |||
1052 | 769 | ||
1053 | for (i = 0; i < e820.nr_map; i++) { | 770 | for (i = 0; i < e820.nr_map; i++) { |
1054 | struct e820entry *ei = &e820.map[i]; | 771 | struct e820entry *ei = &e820.map[i]; |
1055 | u64 addr, last; | 772 | u64 addr; |
1056 | u64 ei_last; | 773 | u64 ei_start, ei_last; |
1057 | 774 | ||
1058 | if (ei->type != E820_RAM) | 775 | if (ei->type != E820_RAM) |
1059 | continue; | 776 | continue; |
1060 | addr = round_up(ei->addr, align); | 777 | |
1061 | ei_last = ei->addr + ei->size; | 778 | ei_last = ei->addr + ei->size; |
1062 | if (addr < start) | 779 | ei_start = ei->addr; |
1063 | addr = round_up(start, align); | 780 | addr = find_early_area_size(ei_start, ei_last, start, |
1064 | if (addr >= ei_last) | 781 | sizep, align); |
1065 | continue; | 782 | |
1066 | *sizep = ei_last - addr; | 783 | if (addr != -1ULL) |
1067 | while (bad_addr_size(&addr, sizep, align) && | 784 | return addr; |
1068 | addr + *sizep <= ei_last) | ||
1069 | ; | ||
1070 | last = addr + *sizep; | ||
1071 | if (last > ei_last) | ||
1072 | continue; | ||
1073 | return addr; | ||
1074 | } | 785 | } |
1075 | 786 | ||
1076 | return -1ULL; | 787 | return -1ULL; |
@@ -1429,6 +1140,8 @@ void __init e820_reserve_resources_late(void) | |||
1429 | end = MAX_RESOURCE_SIZE; | 1140 | end = MAX_RESOURCE_SIZE; |
1430 | if (start >= end) | 1141 | if (start >= end) |
1431 | continue; | 1142 | continue; |
1143 | printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", | ||
1144 | start, end); | ||
1432 | reserve_region_with_split(&iomem_resource, start, end, | 1145 | reserve_region_with_split(&iomem_resource, start, end, |
1433 | "RAM buffer"); | 1146 | "RAM buffer"); |
1434 | } | 1147 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 5051b94c9069..adedeef1dedc 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -29,6 +29,16 @@ static void __init i386_default_early_setup(void) | |||
29 | 29 | ||
30 | void __init i386_start_kernel(void) | 30 | void __init i386_start_kernel(void) |
31 | { | 31 | { |
32 | #ifdef CONFIG_X86_TRAMPOLINE | ||
33 | /* | ||
34 | * But first pinch a few for the stack/trampoline stuff | ||
35 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
36 | * trampoline before removing it. (see the GDT stuff) | ||
37 | */ | ||
38 | reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, | ||
39 | "EX TRAMPOLINE"); | ||
40 | #endif | ||
41 | |||
32 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 42 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
33 | 43 | ||
34 | #ifdef CONFIG_BLK_DEV_INITRD | 44 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 7fd318bac59c..37c3d4b17d85 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -442,8 +442,8 @@ is386: movl $2,%ecx # set MP | |||
442 | */ | 442 | */ |
443 | cmpb $0,ready | 443 | cmpb $0,ready |
444 | jne 1f | 444 | jne 1f |
445 | movl $per_cpu__gdt_page,%eax | 445 | movl $gdt_page,%eax |
446 | movl $per_cpu__stack_canary,%ecx | 446 | movl $stack_canary,%ecx |
447 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | 447 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) |
448 | shrl $16, %ecx | 448 | shrl $16, %ecx |
449 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | 449 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) |
@@ -706,7 +706,7 @@ idt_descr: | |||
706 | .word 0 # 32 bit align gdt_desc.address | 706 | .word 0 # 32 bit align gdt_desc.address |
707 | ENTRY(early_gdt_descr) | 707 | ENTRY(early_gdt_descr) |
708 | .word GDT_ENTRIES*8-1 | 708 | .word GDT_ENTRIES*8-1 |
709 | .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ | 709 | .long gdt_page /* Overwritten for secondary CPUs */ |
710 | 710 | ||
711 | /* | 711 | /* |
712 | * The boot_gdt must mirror the equivalent in setup.S and is | 712 | * The boot_gdt must mirror the equivalent in setup.S and is |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad80a1c718c6..ee4fa1bfcb33 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -266,7 +266,7 @@ static void hpet_resume_device(void) | |||
266 | force_hpet_resume(); | 266 | force_hpet_resume(); |
267 | } | 267 | } |
268 | 268 | ||
269 | static void hpet_resume_counter(void) | 269 | static void hpet_resume_counter(struct clocksource *cs) |
270 | { | 270 | { |
271 | hpet_resume_device(); | 271 | hpet_resume_device(); |
272 | hpet_restart_counter(); | 272 | hpet_restart_counter(); |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102bef80..8c93a84bb627 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -32,7 +32,7 @@ | |||
32 | */ | 32 | */ |
33 | 33 | ||
34 | static int i8259A_auto_eoi; | 34 | static int i8259A_auto_eoi; |
35 | DEFINE_SPINLOCK(i8259A_lock); | 35 | DEFINE_RAW_SPINLOCK(i8259A_lock); |
36 | static void mask_and_ack_8259A(unsigned int); | 36 | static void mask_and_ack_8259A(unsigned int); |
37 | 37 | ||
38 | struct irq_chip i8259A_chip = { | 38 | struct irq_chip i8259A_chip = { |
@@ -68,13 +68,13 @@ void disable_8259A_irq(unsigned int irq) | |||
68 | unsigned int mask = 1 << irq; | 68 | unsigned int mask = 1 << irq; |
69 | unsigned long flags; | 69 | unsigned long flags; |
70 | 70 | ||
71 | spin_lock_irqsave(&i8259A_lock, flags); | 71 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
72 | cached_irq_mask |= mask; | 72 | cached_irq_mask |= mask; |
73 | if (irq & 8) | 73 | if (irq & 8) |
74 | outb(cached_slave_mask, PIC_SLAVE_IMR); | 74 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
75 | else | 75 | else |
76 | outb(cached_master_mask, PIC_MASTER_IMR); | 76 | outb(cached_master_mask, PIC_MASTER_IMR); |
77 | spin_unlock_irqrestore(&i8259A_lock, flags); | 77 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
78 | } | 78 | } |
79 | 79 | ||
80 | void enable_8259A_irq(unsigned int irq) | 80 | void enable_8259A_irq(unsigned int irq) |
@@ -82,13 +82,13 @@ void enable_8259A_irq(unsigned int irq) | |||
82 | unsigned int mask = ~(1 << irq); | 82 | unsigned int mask = ~(1 << irq); |
83 | unsigned long flags; | 83 | unsigned long flags; |
84 | 84 | ||
85 | spin_lock_irqsave(&i8259A_lock, flags); | 85 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
86 | cached_irq_mask &= mask; | 86 | cached_irq_mask &= mask; |
87 | if (irq & 8) | 87 | if (irq & 8) |
88 | outb(cached_slave_mask, PIC_SLAVE_IMR); | 88 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
89 | else | 89 | else |
90 | outb(cached_master_mask, PIC_MASTER_IMR); | 90 | outb(cached_master_mask, PIC_MASTER_IMR); |
91 | spin_unlock_irqrestore(&i8259A_lock, flags); | 91 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
92 | } | 92 | } |
93 | 93 | ||
94 | int i8259A_irq_pending(unsigned int irq) | 94 | int i8259A_irq_pending(unsigned int irq) |
@@ -97,12 +97,12 @@ int i8259A_irq_pending(unsigned int irq) | |||
97 | unsigned long flags; | 97 | unsigned long flags; |
98 | int ret; | 98 | int ret; |
99 | 99 | ||
100 | spin_lock_irqsave(&i8259A_lock, flags); | 100 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
101 | if (irq < 8) | 101 | if (irq < 8) |
102 | ret = inb(PIC_MASTER_CMD) & mask; | 102 | ret = inb(PIC_MASTER_CMD) & mask; |
103 | else | 103 | else |
104 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); | 104 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); |
105 | spin_unlock_irqrestore(&i8259A_lock, flags); | 105 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
106 | 106 | ||
107 | return ret; | 107 | return ret; |
108 | } | 108 | } |
@@ -150,7 +150,7 @@ static void mask_and_ack_8259A(unsigned int irq) | |||
150 | unsigned int irqmask = 1 << irq; | 150 | unsigned int irqmask = 1 << irq; |
151 | unsigned long flags; | 151 | unsigned long flags; |
152 | 152 | ||
153 | spin_lock_irqsave(&i8259A_lock, flags); | 153 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
154 | /* | 154 | /* |
155 | * Lightweight spurious IRQ detection. We do not want | 155 | * Lightweight spurious IRQ detection. We do not want |
156 | * to overdo spurious IRQ handling - it's usually a sign | 156 | * to overdo spurious IRQ handling - it's usually a sign |
@@ -183,7 +183,7 @@ handle_real_irq: | |||
183 | outb(cached_master_mask, PIC_MASTER_IMR); | 183 | outb(cached_master_mask, PIC_MASTER_IMR); |
184 | outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ | 184 | outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ |
185 | } | 185 | } |
186 | spin_unlock_irqrestore(&i8259A_lock, flags); | 186 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
187 | return; | 187 | return; |
188 | 188 | ||
189 | spurious_8259A_irq: | 189 | spurious_8259A_irq: |
@@ -285,24 +285,24 @@ void mask_8259A(void) | |||
285 | { | 285 | { |
286 | unsigned long flags; | 286 | unsigned long flags; |
287 | 287 | ||
288 | spin_lock_irqsave(&i8259A_lock, flags); | 288 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
289 | 289 | ||
290 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 290 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
291 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 291 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
292 | 292 | ||
293 | spin_unlock_irqrestore(&i8259A_lock, flags); | 293 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
294 | } | 294 | } |
295 | 295 | ||
296 | void unmask_8259A(void) | 296 | void unmask_8259A(void) |
297 | { | 297 | { |
298 | unsigned long flags; | 298 | unsigned long flags; |
299 | 299 | ||
300 | spin_lock_irqsave(&i8259A_lock, flags); | 300 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
301 | 301 | ||
302 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | 302 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ |
303 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | 303 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ |
304 | 304 | ||
305 | spin_unlock_irqrestore(&i8259A_lock, flags); | 305 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
306 | } | 306 | } |
307 | 307 | ||
308 | void init_8259A(int auto_eoi) | 308 | void init_8259A(int auto_eoi) |
@@ -311,7 +311,7 @@ void init_8259A(int auto_eoi) | |||
311 | 311 | ||
312 | i8259A_auto_eoi = auto_eoi; | 312 | i8259A_auto_eoi = auto_eoi; |
313 | 313 | ||
314 | spin_lock_irqsave(&i8259A_lock, flags); | 314 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
315 | 315 | ||
316 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 316 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
317 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 317 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
@@ -356,5 +356,5 @@ void init_8259A(int auto_eoi) | |||
356 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | 356 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ |
357 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | 357 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ |
358 | 358 | ||
359 | spin_unlock_irqrestore(&i8259A_lock, flags); | 359 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
360 | } | 360 | } |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index d5932226614f..fce55d532631 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -84,24 +84,7 @@ static struct irqaction irq2 = { | |||
84 | }; | 84 | }; |
85 | 85 | ||
86 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | 86 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { |
87 | [0 ... IRQ0_VECTOR - 1] = -1, | 87 | [0 ... NR_VECTORS - 1] = -1, |
88 | [IRQ0_VECTOR] = 0, | ||
89 | [IRQ1_VECTOR] = 1, | ||
90 | [IRQ2_VECTOR] = 2, | ||
91 | [IRQ3_VECTOR] = 3, | ||
92 | [IRQ4_VECTOR] = 4, | ||
93 | [IRQ5_VECTOR] = 5, | ||
94 | [IRQ6_VECTOR] = 6, | ||
95 | [IRQ7_VECTOR] = 7, | ||
96 | [IRQ8_VECTOR] = 8, | ||
97 | [IRQ9_VECTOR] = 9, | ||
98 | [IRQ10_VECTOR] = 10, | ||
99 | [IRQ11_VECTOR] = 11, | ||
100 | [IRQ12_VECTOR] = 12, | ||
101 | [IRQ13_VECTOR] = 13, | ||
102 | [IRQ14_VECTOR] = 14, | ||
103 | [IRQ15_VECTOR] = 15, | ||
104 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | ||
105 | }; | 88 | }; |
106 | 89 | ||
107 | int vector_used_by_percpu_irq(unsigned int vector) | 90 | int vector_used_by_percpu_irq(unsigned int vector) |
@@ -116,6 +99,9 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
116 | return 0; | 99 | return 0; |
117 | } | 100 | } |
118 | 101 | ||
102 | /* Number of legacy interrupts */ | ||
103 | int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; | ||
104 | |||
119 | void __init init_ISA_irqs(void) | 105 | void __init init_ISA_irqs(void) |
120 | { | 106 | { |
121 | int i; | 107 | int i; |
@@ -142,6 +128,19 @@ void __init init_ISA_irqs(void) | |||
142 | 128 | ||
143 | void __init init_IRQ(void) | 129 | void __init init_IRQ(void) |
144 | { | 130 | { |
131 | int i; | ||
132 | |||
133 | /* | ||
134 | * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. | ||
135 | * If these IRQ's are handled by legacy interrupt-controllers like PIC, | ||
136 | * then this configuration will likely be static after the boot. If | ||
137 | * these IRQ's are handled by more mordern controllers like IO-APIC, | ||
138 | * then this vector space can be freed and re-used dynamically as the | ||
139 | * irq's migrate etc. | ||
140 | */ | ||
141 | for (i = 0; i < nr_legacy_irqs; i++) | ||
142 | per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; | ||
143 | |||
145 | x86_init.irqs.intr_init(); | 144 | x86_init.irqs.intr_init(); |
146 | } | 145 | } |
147 | 146 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 5de9f4a9c3fd..b43bbaebe2c0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | 51 | #include <linux/kallsyms.h> |
52 | #include <linux/ftrace.h> | ||
52 | 53 | ||
53 | #include <asm/cacheflush.h> | 54 | #include <asm/cacheflush.h> |
54 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
@@ -106,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
106 | }; | 107 | }; |
107 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 108 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
108 | 109 | ||
109 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 110 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
110 | static void __kprobes set_jmp_op(void *from, void *to) | ||
111 | { | 111 | { |
112 | struct __arch_jmp_op { | 112 | struct __arch_relative_insn { |
113 | char op; | 113 | u8 op; |
114 | s32 raddr; | 114 | s32 raddr; |
115 | } __attribute__((packed)) * jop; | 115 | } __attribute__((packed)) *insn; |
116 | jop = (struct __arch_jmp_op *)from; | 116 | |
117 | jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); | 117 | insn = (struct __arch_relative_insn *)from; |
118 | jop->op = RELATIVEJUMP_INSTRUCTION; | 118 | insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); |
119 | insn->op = op; | ||
120 | } | ||
121 | |||
122 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | ||
123 | static void __kprobes synthesize_reljump(void *from, void *to) | ||
124 | { | ||
125 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | ||
119 | } | 126 | } |
120 | 127 | ||
121 | /* | 128 | /* |
@@ -202,7 +209,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
202 | /* | 209 | /* |
203 | * Basically, kp->ainsn.insn has an original instruction. | 210 | * Basically, kp->ainsn.insn has an original instruction. |
204 | * However, RIP-relative instruction can not do single-stepping | 211 | * However, RIP-relative instruction can not do single-stepping |
205 | * at different place, fix_riprel() tweaks the displacement of | 212 | * at different place, __copy_instruction() tweaks the displacement of |
206 | * that instruction. In that case, we can't recover the instruction | 213 | * that instruction. In that case, we can't recover the instruction |
207 | * from the kp->ainsn.insn. | 214 | * from the kp->ainsn.insn. |
208 | * | 215 | * |
@@ -284,21 +291,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
284 | } | 291 | } |
285 | 292 | ||
286 | /* | 293 | /* |
287 | * Adjust the displacement if the instruction uses the %rip-relative | 294 | * Copy an instruction and adjust the displacement if the instruction |
288 | * addressing mode. | 295 | * uses the %rip-relative addressing mode. |
289 | * If it does, Return the address of the 32-bit displacement word. | 296 | * If it does, Return the address of the 32-bit displacement word. |
290 | * If not, return null. | 297 | * If not, return null. |
291 | * Only applicable to 64-bit x86. | 298 | * Only applicable to 64-bit x86. |
292 | */ | 299 | */ |
293 | static void __kprobes fix_riprel(struct kprobe *p) | 300 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) |
294 | { | 301 | { |
295 | #ifdef CONFIG_X86_64 | ||
296 | struct insn insn; | 302 | struct insn insn; |
297 | kernel_insn_init(&insn, p->ainsn.insn); | 303 | int ret; |
304 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
298 | 305 | ||
306 | kernel_insn_init(&insn, src); | ||
307 | if (recover) { | ||
308 | insn_get_opcode(&insn); | ||
309 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
310 | ret = recover_probed_instruction(buf, | ||
311 | (unsigned long)src); | ||
312 | if (ret) | ||
313 | return 0; | ||
314 | kernel_insn_init(&insn, buf); | ||
315 | } | ||
316 | } | ||
317 | insn_get_length(&insn); | ||
318 | memcpy(dest, insn.kaddr, insn.length); | ||
319 | |||
320 | #ifdef CONFIG_X86_64 | ||
299 | if (insn_rip_relative(&insn)) { | 321 | if (insn_rip_relative(&insn)) { |
300 | s64 newdisp; | 322 | s64 newdisp; |
301 | u8 *disp; | 323 | u8 *disp; |
324 | kernel_insn_init(&insn, dest); | ||
302 | insn_get_displacement(&insn); | 325 | insn_get_displacement(&insn); |
303 | /* | 326 | /* |
304 | * The copied instruction uses the %rip-relative addressing | 327 | * The copied instruction uses the %rip-relative addressing |
@@ -312,20 +335,23 @@ static void __kprobes fix_riprel(struct kprobe *p) | |||
312 | * extension of the original signed 32-bit displacement would | 335 | * extension of the original signed 32-bit displacement would |
313 | * have given. | 336 | * have given. |
314 | */ | 337 | */ |
315 | newdisp = (u8 *) p->addr + (s64) insn.displacement.value - | 338 | newdisp = (u8 *) src + (s64) insn.displacement.value - |
316 | (u8 *) p->ainsn.insn; | 339 | (u8 *) dest; |
317 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ | 340 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
318 | disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); | 341 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
319 | *(s32 *) disp = (s32) newdisp; | 342 | *(s32 *) disp = (s32) newdisp; |
320 | } | 343 | } |
321 | #endif | 344 | #endif |
345 | return insn.length; | ||
322 | } | 346 | } |
323 | 347 | ||
324 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 348 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
325 | { | 349 | { |
326 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 350 | /* |
327 | 351 | * Copy an instruction without recovering int3, because it will be | |
328 | fix_riprel(p); | 352 | * put by another subsystem. |
353 | */ | ||
354 | __copy_instruction(p->ainsn.insn, p->addr, 0); | ||
329 | 355 | ||
330 | if (can_boost(p->addr)) | 356 | if (can_boost(p->addr)) |
331 | p->ainsn.boostable = 0; | 357 | p->ainsn.boostable = 0; |
@@ -406,18 +432,6 @@ static void __kprobes restore_btf(void) | |||
406 | update_debugctlmsr(current->thread.debugctlmsr); | 432 | update_debugctlmsr(current->thread.debugctlmsr); |
407 | } | 433 | } |
408 | 434 | ||
409 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
410 | { | ||
411 | clear_btf(); | ||
412 | regs->flags |= X86_EFLAGS_TF; | ||
413 | regs->flags &= ~X86_EFLAGS_IF; | ||
414 | /* single step inline if the instruction is an int3 */ | ||
415 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
416 | regs->ip = (unsigned long)p->addr; | ||
417 | else | ||
418 | regs->ip = (unsigned long)p->ainsn.insn; | ||
419 | } | ||
420 | |||
421 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
422 | struct pt_regs *regs) | 436 | struct pt_regs *regs) |
423 | { | 437 | { |
@@ -429,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
429 | *sara = (unsigned long) &kretprobe_trampoline; | 443 | *sara = (unsigned long) &kretprobe_trampoline; |
430 | } | 444 | } |
431 | 445 | ||
446 | #ifdef CONFIG_OPTPROBES | ||
447 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
448 | struct pt_regs *regs, | ||
449 | int reenter); | ||
450 | #else | ||
451 | #define setup_detour_execution(p, regs, reenter) (0) | ||
452 | #endif | ||
453 | |||
432 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | 454 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, |
433 | struct kprobe_ctlblk *kcb) | 455 | struct kprobe_ctlblk *kcb, int reenter) |
434 | { | 456 | { |
457 | if (setup_detour_execution(p, regs, reenter)) | ||
458 | return; | ||
459 | |||
435 | #if !defined(CONFIG_PREEMPT) | 460 | #if !defined(CONFIG_PREEMPT) |
436 | if (p->ainsn.boostable == 1 && !p->post_handler) { | 461 | if (p->ainsn.boostable == 1 && !p->post_handler) { |
437 | /* Boost up -- we can execute copied instructions directly */ | 462 | /* Boost up -- we can execute copied instructions directly */ |
438 | reset_current_kprobe(); | 463 | if (!reenter) |
464 | reset_current_kprobe(); | ||
465 | /* | ||
466 | * Reentering boosted probe doesn't reset current_kprobe, | ||
467 | * nor set current_kprobe, because it doesn't use single | ||
468 | * stepping. | ||
469 | */ | ||
439 | regs->ip = (unsigned long)p->ainsn.insn; | 470 | regs->ip = (unsigned long)p->ainsn.insn; |
440 | preempt_enable_no_resched(); | 471 | preempt_enable_no_resched(); |
441 | return; | 472 | return; |
442 | } | 473 | } |
443 | #endif | 474 | #endif |
444 | prepare_singlestep(p, regs); | 475 | if (reenter) { |
445 | kcb->kprobe_status = KPROBE_HIT_SS; | 476 | save_previous_kprobe(kcb); |
477 | set_current_kprobe(p, regs, kcb); | ||
478 | kcb->kprobe_status = KPROBE_REENTER; | ||
479 | } else | ||
480 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
481 | /* Prepare real single stepping */ | ||
482 | clear_btf(); | ||
483 | regs->flags |= X86_EFLAGS_TF; | ||
484 | regs->flags &= ~X86_EFLAGS_IF; | ||
485 | /* single step inline if the instruction is an int3 */ | ||
486 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
487 | regs->ip = (unsigned long)p->addr; | ||
488 | else | ||
489 | regs->ip = (unsigned long)p->ainsn.insn; | ||
446 | } | 490 | } |
447 | 491 | ||
448 | /* | 492 | /* |
@@ -456,11 +500,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
456 | switch (kcb->kprobe_status) { | 500 | switch (kcb->kprobe_status) { |
457 | case KPROBE_HIT_SSDONE: | 501 | case KPROBE_HIT_SSDONE: |
458 | case KPROBE_HIT_ACTIVE: | 502 | case KPROBE_HIT_ACTIVE: |
459 | save_previous_kprobe(kcb); | ||
460 | set_current_kprobe(p, regs, kcb); | ||
461 | kprobes_inc_nmissed_count(p); | 503 | kprobes_inc_nmissed_count(p); |
462 | prepare_singlestep(p, regs); | 504 | setup_singlestep(p, regs, kcb, 1); |
463 | kcb->kprobe_status = KPROBE_REENTER; | ||
464 | break; | 505 | break; |
465 | case KPROBE_HIT_SS: | 506 | case KPROBE_HIT_SS: |
466 | /* A probe has been hit in the codepath leading up to, or just | 507 | /* A probe has been hit in the codepath leading up to, or just |
@@ -535,13 +576,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
535 | * more here. | 576 | * more here. |
536 | */ | 577 | */ |
537 | if (!p->pre_handler || !p->pre_handler(p, regs)) | 578 | if (!p->pre_handler || !p->pre_handler(p, regs)) |
538 | setup_singlestep(p, regs, kcb); | 579 | setup_singlestep(p, regs, kcb, 0); |
539 | return 1; | 580 | return 1; |
540 | } | 581 | } |
541 | } else if (kprobe_running()) { | 582 | } else if (kprobe_running()) { |
542 | p = __get_cpu_var(current_kprobe); | 583 | p = __get_cpu_var(current_kprobe); |
543 | if (p->break_handler && p->break_handler(p, regs)) { | 584 | if (p->break_handler && p->break_handler(p, regs)) { |
544 | setup_singlestep(p, regs, kcb); | 585 | setup_singlestep(p, regs, kcb, 0); |
545 | return 1; | 586 | return 1; |
546 | } | 587 | } |
547 | } /* else: not a kprobe fault; let the kernel handle it */ | 588 | } /* else: not a kprobe fault; let the kernel handle it */ |
@@ -550,6 +591,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
550 | return 0; | 591 | return 0; |
551 | } | 592 | } |
552 | 593 | ||
594 | #ifdef CONFIG_X86_64 | ||
595 | #define SAVE_REGS_STRING \ | ||
596 | /* Skip cs, ip, orig_ax. */ \ | ||
597 | " subq $24, %rsp\n" \ | ||
598 | " pushq %rdi\n" \ | ||
599 | " pushq %rsi\n" \ | ||
600 | " pushq %rdx\n" \ | ||
601 | " pushq %rcx\n" \ | ||
602 | " pushq %rax\n" \ | ||
603 | " pushq %r8\n" \ | ||
604 | " pushq %r9\n" \ | ||
605 | " pushq %r10\n" \ | ||
606 | " pushq %r11\n" \ | ||
607 | " pushq %rbx\n" \ | ||
608 | " pushq %rbp\n" \ | ||
609 | " pushq %r12\n" \ | ||
610 | " pushq %r13\n" \ | ||
611 | " pushq %r14\n" \ | ||
612 | " pushq %r15\n" | ||
613 | #define RESTORE_REGS_STRING \ | ||
614 | " popq %r15\n" \ | ||
615 | " popq %r14\n" \ | ||
616 | " popq %r13\n" \ | ||
617 | " popq %r12\n" \ | ||
618 | " popq %rbp\n" \ | ||
619 | " popq %rbx\n" \ | ||
620 | " popq %r11\n" \ | ||
621 | " popq %r10\n" \ | ||
622 | " popq %r9\n" \ | ||
623 | " popq %r8\n" \ | ||
624 | " popq %rax\n" \ | ||
625 | " popq %rcx\n" \ | ||
626 | " popq %rdx\n" \ | ||
627 | " popq %rsi\n" \ | ||
628 | " popq %rdi\n" \ | ||
629 | /* Skip orig_ax, ip, cs */ \ | ||
630 | " addq $24, %rsp\n" | ||
631 | #else | ||
632 | #define SAVE_REGS_STRING \ | ||
633 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
634 | " subl $16, %esp\n" \ | ||
635 | " pushl %fs\n" \ | ||
636 | " pushl %ds\n" \ | ||
637 | " pushl %es\n" \ | ||
638 | " pushl %eax\n" \ | ||
639 | " pushl %ebp\n" \ | ||
640 | " pushl %edi\n" \ | ||
641 | " pushl %esi\n" \ | ||
642 | " pushl %edx\n" \ | ||
643 | " pushl %ecx\n" \ | ||
644 | " pushl %ebx\n" | ||
645 | #define RESTORE_REGS_STRING \ | ||
646 | " popl %ebx\n" \ | ||
647 | " popl %ecx\n" \ | ||
648 | " popl %edx\n" \ | ||
649 | " popl %esi\n" \ | ||
650 | " popl %edi\n" \ | ||
651 | " popl %ebp\n" \ | ||
652 | " popl %eax\n" \ | ||
653 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
654 | " addl $24, %esp\n" | ||
655 | #endif | ||
656 | |||
553 | /* | 657 | /* |
554 | * When a retprobed function returns, this code saves registers and | 658 | * When a retprobed function returns, this code saves registers and |
555 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 659 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
@@ -563,65 +667,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
563 | /* We don't bother saving the ss register */ | 667 | /* We don't bother saving the ss register */ |
564 | " pushq %rsp\n" | 668 | " pushq %rsp\n" |
565 | " pushfq\n" | 669 | " pushfq\n" |
566 | /* | 670 | SAVE_REGS_STRING |
567 | * Skip cs, ip, orig_ax. | ||
568 | * trampoline_handler() will plug in these values | ||
569 | */ | ||
570 | " subq $24, %rsp\n" | ||
571 | " pushq %rdi\n" | ||
572 | " pushq %rsi\n" | ||
573 | " pushq %rdx\n" | ||
574 | " pushq %rcx\n" | ||
575 | " pushq %rax\n" | ||
576 | " pushq %r8\n" | ||
577 | " pushq %r9\n" | ||
578 | " pushq %r10\n" | ||
579 | " pushq %r11\n" | ||
580 | " pushq %rbx\n" | ||
581 | " pushq %rbp\n" | ||
582 | " pushq %r12\n" | ||
583 | " pushq %r13\n" | ||
584 | " pushq %r14\n" | ||
585 | " pushq %r15\n" | ||
586 | " movq %rsp, %rdi\n" | 671 | " movq %rsp, %rdi\n" |
587 | " call trampoline_handler\n" | 672 | " call trampoline_handler\n" |
588 | /* Replace saved sp with true return address. */ | 673 | /* Replace saved sp with true return address. */ |
589 | " movq %rax, 152(%rsp)\n" | 674 | " movq %rax, 152(%rsp)\n" |
590 | " popq %r15\n" | 675 | RESTORE_REGS_STRING |
591 | " popq %r14\n" | ||
592 | " popq %r13\n" | ||
593 | " popq %r12\n" | ||
594 | " popq %rbp\n" | ||
595 | " popq %rbx\n" | ||
596 | " popq %r11\n" | ||
597 | " popq %r10\n" | ||
598 | " popq %r9\n" | ||
599 | " popq %r8\n" | ||
600 | " popq %rax\n" | ||
601 | " popq %rcx\n" | ||
602 | " popq %rdx\n" | ||
603 | " popq %rsi\n" | ||
604 | " popq %rdi\n" | ||
605 | /* Skip orig_ax, ip, cs */ | ||
606 | " addq $24, %rsp\n" | ||
607 | " popfq\n" | 676 | " popfq\n" |
608 | #else | 677 | #else |
609 | " pushf\n" | 678 | " pushf\n" |
610 | /* | 679 | SAVE_REGS_STRING |
611 | * Skip cs, ip, orig_ax and gs. | ||
612 | * trampoline_handler() will plug in these values | ||
613 | */ | ||
614 | " subl $16, %esp\n" | ||
615 | " pushl %fs\n" | ||
616 | " pushl %es\n" | ||
617 | " pushl %ds\n" | ||
618 | " pushl %eax\n" | ||
619 | " pushl %ebp\n" | ||
620 | " pushl %edi\n" | ||
621 | " pushl %esi\n" | ||
622 | " pushl %edx\n" | ||
623 | " pushl %ecx\n" | ||
624 | " pushl %ebx\n" | ||
625 | " movl %esp, %eax\n" | 680 | " movl %esp, %eax\n" |
626 | " call trampoline_handler\n" | 681 | " call trampoline_handler\n" |
627 | /* Move flags to cs */ | 682 | /* Move flags to cs */ |
@@ -629,15 +684,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
629 | " movl %edx, 52(%esp)\n" | 684 | " movl %edx, 52(%esp)\n" |
630 | /* Replace saved flags with true return address. */ | 685 | /* Replace saved flags with true return address. */ |
631 | " movl %eax, 56(%esp)\n" | 686 | " movl %eax, 56(%esp)\n" |
632 | " popl %ebx\n" | 687 | RESTORE_REGS_STRING |
633 | " popl %ecx\n" | ||
634 | " popl %edx\n" | ||
635 | " popl %esi\n" | ||
636 | " popl %edi\n" | ||
637 | " popl %ebp\n" | ||
638 | " popl %eax\n" | ||
639 | /* Skip ds, es, fs, gs, orig_ax and ip */ | ||
640 | " addl $24, %esp\n" | ||
641 | " popf\n" | 688 | " popf\n" |
642 | #endif | 689 | #endif |
643 | " ret\n"); | 690 | " ret\n"); |
@@ -805,8 +852,8 @@ static void __kprobes resume_execution(struct kprobe *p, | |||
805 | * These instructions can be executed directly if it | 852 | * These instructions can be executed directly if it |
806 | * jumps back to correct address. | 853 | * jumps back to correct address. |
807 | */ | 854 | */ |
808 | set_jmp_op((void *)regs->ip, | 855 | synthesize_reljump((void *)regs->ip, |
809 | (void *)orig_ip + (regs->ip - copy_ip)); | 856 | (void *)orig_ip + (regs->ip - copy_ip)); |
810 | p->ainsn.boostable = 1; | 857 | p->ainsn.boostable = 1; |
811 | } else { | 858 | } else { |
812 | p->ainsn.boostable = -1; | 859 | p->ainsn.boostable = -1; |
@@ -1033,6 +1080,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1033 | return 0; | 1080 | return 0; |
1034 | } | 1081 | } |
1035 | 1082 | ||
1083 | |||
1084 | #ifdef CONFIG_OPTPROBES | ||
1085 | |||
1086 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
1087 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
1088 | { | ||
1089 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
1090 | } | ||
1091 | |||
1092 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
1093 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
1094 | unsigned long val) | ||
1095 | { | ||
1096 | #ifdef CONFIG_X86_64 | ||
1097 | *addr++ = 0x48; | ||
1098 | *addr++ = 0xbf; | ||
1099 | #else | ||
1100 | *addr++ = 0xb8; | ||
1101 | #endif | ||
1102 | *(unsigned long *)addr = val; | ||
1103 | } | ||
1104 | |||
1105 | void __kprobes kprobes_optinsn_template_holder(void) | ||
1106 | { | ||
1107 | asm volatile ( | ||
1108 | ".global optprobe_template_entry\n" | ||
1109 | "optprobe_template_entry: \n" | ||
1110 | #ifdef CONFIG_X86_64 | ||
1111 | /* We don't bother saving the ss register */ | ||
1112 | " pushq %rsp\n" | ||
1113 | " pushfq\n" | ||
1114 | SAVE_REGS_STRING | ||
1115 | " movq %rsp, %rsi\n" | ||
1116 | ".global optprobe_template_val\n" | ||
1117 | "optprobe_template_val: \n" | ||
1118 | ASM_NOP5 | ||
1119 | ASM_NOP5 | ||
1120 | ".global optprobe_template_call\n" | ||
1121 | "optprobe_template_call: \n" | ||
1122 | ASM_NOP5 | ||
1123 | /* Move flags to rsp */ | ||
1124 | " movq 144(%rsp), %rdx\n" | ||
1125 | " movq %rdx, 152(%rsp)\n" | ||
1126 | RESTORE_REGS_STRING | ||
1127 | /* Skip flags entry */ | ||
1128 | " addq $8, %rsp\n" | ||
1129 | " popfq\n" | ||
1130 | #else /* CONFIG_X86_32 */ | ||
1131 | " pushf\n" | ||
1132 | SAVE_REGS_STRING | ||
1133 | " movl %esp, %edx\n" | ||
1134 | ".global optprobe_template_val\n" | ||
1135 | "optprobe_template_val: \n" | ||
1136 | ASM_NOP5 | ||
1137 | ".global optprobe_template_call\n" | ||
1138 | "optprobe_template_call: \n" | ||
1139 | ASM_NOP5 | ||
1140 | RESTORE_REGS_STRING | ||
1141 | " addl $4, %esp\n" /* skip cs */ | ||
1142 | " popf\n" | ||
1143 | #endif | ||
1144 | ".global optprobe_template_end\n" | ||
1145 | "optprobe_template_end: \n"); | ||
1146 | } | ||
1147 | |||
1148 | #define TMPL_MOVE_IDX \ | ||
1149 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
1150 | #define TMPL_CALL_IDX \ | ||
1151 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
1152 | #define TMPL_END_IDX \ | ||
1153 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
1154 | |||
1155 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
1156 | |||
1157 | /* Optimized kprobe call back function: called from optinsn */ | ||
1158 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
1159 | struct pt_regs *regs) | ||
1160 | { | ||
1161 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1162 | |||
1163 | preempt_disable(); | ||
1164 | if (kprobe_running()) { | ||
1165 | kprobes_inc_nmissed_count(&op->kp); | ||
1166 | } else { | ||
1167 | /* Save skipped registers */ | ||
1168 | #ifdef CONFIG_X86_64 | ||
1169 | regs->cs = __KERNEL_CS; | ||
1170 | #else | ||
1171 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
1172 | regs->gs = 0; | ||
1173 | #endif | ||
1174 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
1175 | regs->orig_ax = ~0UL; | ||
1176 | |||
1177 | __get_cpu_var(current_kprobe) = &op->kp; | ||
1178 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1179 | opt_pre_handler(&op->kp, regs); | ||
1180 | __get_cpu_var(current_kprobe) = NULL; | ||
1181 | } | ||
1182 | preempt_enable_no_resched(); | ||
1183 | } | ||
1184 | |||
1185 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
1186 | { | ||
1187 | int len = 0, ret; | ||
1188 | |||
1189 | while (len < RELATIVEJUMP_SIZE) { | ||
1190 | ret = __copy_instruction(dest + len, src + len, 1); | ||
1191 | if (!ret || !can_boost(dest + len)) | ||
1192 | return -EINVAL; | ||
1193 | len += ret; | ||
1194 | } | ||
1195 | /* Check whether the address range is reserved */ | ||
1196 | if (ftrace_text_reserved(src, src + len - 1) || | ||
1197 | alternatives_text_reserved(src, src + len - 1)) | ||
1198 | return -EBUSY; | ||
1199 | |||
1200 | return len; | ||
1201 | } | ||
1202 | |||
1203 | /* Check whether insn is indirect jump */ | ||
1204 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
1205 | { | ||
1206 | return ((insn->opcode.bytes[0] == 0xff && | ||
1207 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
1208 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
1209 | } | ||
1210 | |||
1211 | /* Check whether insn jumps into specified address range */ | ||
1212 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
1213 | { | ||
1214 | unsigned long target = 0; | ||
1215 | |||
1216 | switch (insn->opcode.bytes[0]) { | ||
1217 | case 0xe0: /* loopne */ | ||
1218 | case 0xe1: /* loope */ | ||
1219 | case 0xe2: /* loop */ | ||
1220 | case 0xe3: /* jcxz */ | ||
1221 | case 0xe9: /* near relative jump */ | ||
1222 | case 0xeb: /* short relative jump */ | ||
1223 | break; | ||
1224 | case 0x0f: | ||
1225 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
1226 | break; | ||
1227 | return 0; | ||
1228 | default: | ||
1229 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
1230 | break; | ||
1231 | return 0; | ||
1232 | } | ||
1233 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
1234 | |||
1235 | return (start <= target && target <= start + len); | ||
1236 | } | ||
1237 | |||
1238 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
1239 | static int __kprobes can_optimize(unsigned long paddr) | ||
1240 | { | ||
1241 | int ret; | ||
1242 | unsigned long addr, size = 0, offset = 0; | ||
1243 | struct insn insn; | ||
1244 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
1245 | /* Dummy buffers for lookup_symbol_attrs */ | ||
1246 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
1247 | |||
1248 | /* Lookup symbol including addr */ | ||
1249 | if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) | ||
1250 | return 0; | ||
1251 | |||
1252 | /* Check there is enough space for a relative jump. */ | ||
1253 | if (size - offset < RELATIVEJUMP_SIZE) | ||
1254 | return 0; | ||
1255 | |||
1256 | /* Decode instructions */ | ||
1257 | addr = paddr - offset; | ||
1258 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
1259 | if (search_exception_tables(addr)) | ||
1260 | /* | ||
1261 | * Since some fixup code will jumps into this function, | ||
1262 | * we can't optimize kprobe in this function. | ||
1263 | */ | ||
1264 | return 0; | ||
1265 | kernel_insn_init(&insn, (void *)addr); | ||
1266 | insn_get_opcode(&insn); | ||
1267 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1268 | ret = recover_probed_instruction(buf, addr); | ||
1269 | if (ret) | ||
1270 | return 0; | ||
1271 | kernel_insn_init(&insn, buf); | ||
1272 | } | ||
1273 | insn_get_length(&insn); | ||
1274 | /* Recover address */ | ||
1275 | insn.kaddr = (void *)addr; | ||
1276 | insn.next_byte = (void *)(addr + insn.length); | ||
1277 | /* Check any instructions don't jump into target */ | ||
1278 | if (insn_is_indirect_jump(&insn) || | ||
1279 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
1280 | RELATIVE_ADDR_SIZE)) | ||
1281 | return 0; | ||
1282 | addr += insn.length; | ||
1283 | } | ||
1284 | |||
1285 | return 1; | ||
1286 | } | ||
1287 | |||
1288 | /* Check optimized_kprobe can actually be optimized. */ | ||
1289 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
1290 | { | ||
1291 | int i; | ||
1292 | struct kprobe *p; | ||
1293 | |||
1294 | for (i = 1; i < op->optinsn.size; i++) { | ||
1295 | p = get_kprobe(op->kp.addr + i); | ||
1296 | if (p && !kprobe_disabled(p)) | ||
1297 | return -EEXIST; | ||
1298 | } | ||
1299 | |||
1300 | return 0; | ||
1301 | } | ||
1302 | |||
1303 | /* Check the addr is within the optimized instructions. */ | ||
1304 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
1305 | unsigned long addr) | ||
1306 | { | ||
1307 | return ((unsigned long)op->kp.addr <= addr && | ||
1308 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
1309 | } | ||
1310 | |||
1311 | /* Free optimized instruction slot */ | ||
1312 | static __kprobes | ||
1313 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
1314 | { | ||
1315 | if (op->optinsn.insn) { | ||
1316 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
1317 | op->optinsn.insn = NULL; | ||
1318 | op->optinsn.size = 0; | ||
1319 | } | ||
1320 | } | ||
1321 | |||
1322 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
1323 | { | ||
1324 | __arch_remove_optimized_kprobe(op, 1); | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Copy replacing target instructions | ||
1329 | * Target instructions MUST be relocatable (checked inside) | ||
1330 | */ | ||
1331 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
1332 | { | ||
1333 | u8 *buf; | ||
1334 | int ret; | ||
1335 | long rel; | ||
1336 | |||
1337 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
1338 | return -EILSEQ; | ||
1339 | |||
1340 | op->optinsn.insn = get_optinsn_slot(); | ||
1341 | if (!op->optinsn.insn) | ||
1342 | return -ENOMEM; | ||
1343 | |||
1344 | /* | ||
1345 | * Verify if the address gap is in 2GB range, because this uses | ||
1346 | * a relative jump. | ||
1347 | */ | ||
1348 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
1349 | if (abs(rel) > 0x7fffffff) | ||
1350 | return -ERANGE; | ||
1351 | |||
1352 | buf = (u8 *)op->optinsn.insn; | ||
1353 | |||
1354 | /* Copy instructions into the out-of-line buffer */ | ||
1355 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
1356 | if (ret < 0) { | ||
1357 | __arch_remove_optimized_kprobe(op, 0); | ||
1358 | return ret; | ||
1359 | } | ||
1360 | op->optinsn.size = ret; | ||
1361 | |||
1362 | /* Copy arch-dep-instance from template */ | ||
1363 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
1364 | |||
1365 | /* Set probe information */ | ||
1366 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
1367 | |||
1368 | /* Set probe function call */ | ||
1369 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
1370 | |||
1371 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
1372 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
1373 | (u8 *)op->kp.addr + op->optinsn.size); | ||
1374 | |||
1375 | flush_icache_range((unsigned long) buf, | ||
1376 | (unsigned long) buf + TMPL_END_IDX + | ||
1377 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
1378 | return 0; | ||
1379 | } | ||
1380 | |||
1381 | /* Replace a breakpoint (int3) with a relative jump. */ | ||
1382 | int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | ||
1383 | { | ||
1384 | unsigned char jmp_code[RELATIVEJUMP_SIZE]; | ||
1385 | s32 rel = (s32)((long)op->optinsn.insn - | ||
1386 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
1387 | |||
1388 | /* Backup instructions which will be replaced by jump address */ | ||
1389 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
1390 | RELATIVE_ADDR_SIZE); | ||
1391 | |||
1392 | jmp_code[0] = RELATIVEJUMP_OPCODE; | ||
1393 | *(s32 *)(&jmp_code[1]) = rel; | ||
1394 | |||
1395 | /* | ||
1396 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1397 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1398 | * code probing, it's not a problem. | ||
1399 | */ | ||
1400 | text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); | ||
1401 | return 0; | ||
1402 | } | ||
1403 | |||
1404 | /* Replace a relative jump with a breakpoint (int3). */ | ||
1405 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
1406 | { | ||
1407 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1408 | |||
1409 | /* Set int3 to first byte for kprobes */ | ||
1410 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
1411 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1412 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
1413 | } | ||
1414 | |||
1415 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
1416 | struct pt_regs *regs, | ||
1417 | int reenter) | ||
1418 | { | ||
1419 | struct optimized_kprobe *op; | ||
1420 | |||
1421 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
1422 | /* This kprobe is really able to run optimized path. */ | ||
1423 | op = container_of(p, struct optimized_kprobe, kp); | ||
1424 | /* Detour through copied instructions */ | ||
1425 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
1426 | if (!reenter) | ||
1427 | reset_current_kprobe(); | ||
1428 | preempt_enable_no_resched(); | ||
1429 | return 1; | ||
1430 | } | ||
1431 | return 0; | ||
1432 | } | ||
1433 | #endif | ||
1434 | |||
1036 | int __init arch_init_kprobes(void) | 1435 | int __init arch_init_kprobes(void) |
1037 | { | 1436 | { |
1038 | return 0; | 1437 | return 0; |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 712d15fdc416..71825806cd44 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/dmi.h> | 9 | #include <linux/dmi.h> |
10 | #include <linux/range.h> | ||
11 | |||
10 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
11 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
12 | #include <asm/io.h> | 14 | #include <asm/io.h> |
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { | |||
30 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, | 32 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, |
31 | }; | 33 | }; |
32 | 34 | ||
33 | struct range { | ||
34 | u64 start; | ||
35 | u64 end; | ||
36 | }; | ||
37 | |||
38 | static int __cpuinit cmp_range(const void *x1, const void *x2) | 35 | static int __cpuinit cmp_range(const void *x1, const void *x2) |
39 | { | 36 | { |
40 | const struct range *r1 = x1; | 37 | const struct range *r1 = x1; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b1739d16310..1db183ed7c01 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -428,10 +428,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
428 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 428 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
429 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 429 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
430 | 430 | ||
431 | #ifdef CONFIG_HIGHPTE | ||
432 | .kmap_atomic_pte = kmap_atomic, | ||
433 | #endif | ||
434 | |||
435 | #if PAGETABLE_LEVELS >= 3 | 431 | #if PAGETABLE_LEVELS >= 3 |
436 | #ifdef CONFIG_X86_PAE | 432 | #ifdef CONFIG_X86_PAE |
437 | .set_pte_atomic = native_set_pte_atomic, | 433 | .set_pte_atomic = native_set_pte_atomic, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 75e14e21f61a..1aa966c565f9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -65,7 +65,7 @@ int dma_set_mask(struct device *dev, u64 mask) | |||
65 | } | 65 | } |
66 | EXPORT_SYMBOL(dma_set_mask); | 66 | EXPORT_SYMBOL(dma_set_mask); |
67 | 67 | ||
68 | #ifdef CONFIG_X86_64 | 68 | #if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) |
69 | static __initdata void *dma32_bootmem_ptr; | 69 | static __initdata void *dma32_bootmem_ptr; |
70 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); | 70 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); |
71 | 71 | ||
@@ -116,14 +116,21 @@ static void __init dma32_free_bootmem(void) | |||
116 | dma32_bootmem_ptr = NULL; | 116 | dma32_bootmem_ptr = NULL; |
117 | dma32_bootmem_size = 0; | 117 | dma32_bootmem_size = 0; |
118 | } | 118 | } |
119 | #else | ||
120 | void __init dma32_reserve_bootmem(void) | ||
121 | { | ||
122 | } | ||
123 | static void __init dma32_free_bootmem(void) | ||
124 | { | ||
125 | } | ||
126 | |||
119 | #endif | 127 | #endif |
120 | 128 | ||
121 | void __init pci_iommu_alloc(void) | 129 | void __init pci_iommu_alloc(void) |
122 | { | 130 | { |
123 | #ifdef CONFIG_X86_64 | ||
124 | /* free the range so iommu could get some range less than 4G */ | 131 | /* free the range so iommu could get some range less than 4G */ |
125 | dma32_free_bootmem(); | 132 | dma32_free_bootmem(); |
126 | #endif | 133 | |
127 | if (pci_swiotlb_detect()) | 134 | if (pci_swiotlb_detect()) |
128 | goto out; | 135 | goto out; |
129 | 136 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 704bddcdf64d..8e1aac86b50c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -461,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | |||
461 | DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), | 461 | DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), |
462 | }, | 462 | }, |
463 | }, | 463 | }, |
464 | { /* Handle problems with rebooting on the iMac9,1. */ | ||
465 | .callback = set_pci_reboot, | ||
466 | .ident = "Apple iMac9,1", | ||
467 | .matches = { | ||
468 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
469 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), | ||
470 | }, | ||
471 | }, | ||
464 | { } | 472 | { } |
465 | }; | 473 | }; |
466 | 474 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cb42109a55b4..5d7ba1a449bd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -969,15 +969,11 @@ void __init setup_arch(char **cmdline_p) | |||
969 | #endif | 969 | #endif |
970 | 970 | ||
971 | initmem_init(0, max_pfn, acpi, k8); | 971 | initmem_init(0, max_pfn, acpi, k8); |
972 | #ifndef CONFIG_NO_BOOTMEM | ||
973 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
974 | #endif | ||
972 | 975 | ||
973 | #ifdef CONFIG_X86_64 | ||
974 | /* | ||
975 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
976 | * with the crashkernel command line, so do that after | ||
977 | * reserve_crashkernel() | ||
978 | */ | ||
979 | dma32_reserve_bootmem(); | 976 | dma32_reserve_bootmem(); |
980 | #endif | ||
981 | 977 | ||
982 | reserve_ibft_region(); | 978 | reserve_ibft_region(); |
983 | 979 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 35abcb8b00e9..ef6370b00e70 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
137 | 137 | ||
138 | static void __init pcpu_fc_free(void *ptr, size_t size) | 138 | static void __init pcpu_fc_free(void *ptr, size_t size) |
139 | { | 139 | { |
140 | #ifdef CONFIG_NO_BOOTMEM | ||
141 | u64 start = __pa(ptr); | ||
142 | u64 end = start + size; | ||
143 | free_early_partial(start, end); | ||
144 | #else | ||
140 | free_bootmem(__pa(ptr), size); | 145 | free_bootmem(__pa(ptr), size); |
146 | #endif | ||
141 | } | 147 | } |
142 | 148 | ||
143 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) | 149 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9b4401115ea1..a435c76d714e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -241,6 +241,11 @@ static void __cpuinit smp_callin(void) | |||
241 | map_cpu_to_logical_apicid(); | 241 | map_cpu_to_logical_apicid(); |
242 | 242 | ||
243 | notify_cpu_starting(cpuid); | 243 | notify_cpu_starting(cpuid); |
244 | |||
245 | /* | ||
246 | * Need to setup vector mappings before we enable interrupts. | ||
247 | */ | ||
248 | __setup_vector_irq(smp_processor_id()); | ||
244 | /* | 249 | /* |
245 | * Get our bogomips. | 250 | * Get our bogomips. |
246 | * | 251 | * |
@@ -315,7 +320,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
315 | */ | 320 | */ |
316 | ipi_call_lock(); | 321 | ipi_call_lock(); |
317 | lock_vector_lock(); | 322 | lock_vector_lock(); |
318 | __setup_vector_irq(smp_processor_id()); | ||
319 | set_cpu_online(smp_processor_id(), true); | 323 | set_cpu_online(smp_processor_id(), true); |
320 | unlock_vector_lock(); | 324 | unlock_vector_lock(); |
321 | ipi_call_unlock(); | 325 | ipi_call_unlock(); |
@@ -1212,11 +1216,12 @@ __init void prefill_possible_map(void) | |||
1212 | 1216 | ||
1213 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); | 1217 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); |
1214 | 1218 | ||
1215 | if (possible > CONFIG_NR_CPUS) { | 1219 | /* nr_cpu_ids could be reduced via nr_cpus= */ |
1220 | if (possible > nr_cpu_ids) { | ||
1216 | printk(KERN_WARNING | 1221 | printk(KERN_WARNING |
1217 | "%d Processors exceeds NR_CPUS limit of %d\n", | 1222 | "%d Processors exceeds NR_CPUS limit of %d\n", |
1218 | possible, CONFIG_NR_CPUS); | 1223 | possible, nr_cpu_ids); |
1219 | possible = CONFIG_NR_CPUS; | 1224 | possible = nr_cpu_ids; |
1220 | } | 1225 | } |
1221 | 1226 | ||
1222 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1227 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be2573448ed9..fb5cc5e14cfa 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
70 | * manually to deassert NMI lines for the watchdog if run | 70 | * manually to deassert NMI lines for the watchdog if run |
71 | * on an 82489DX-based system. | 71 | * on an 82489DX-based system. |
72 | */ | 72 | */ |
73 | spin_lock(&i8259A_lock); | 73 | raw_spin_lock(&i8259A_lock); |
74 | outb(0x0c, PIC_MASTER_OCW3); | 74 | outb(0x0c, PIC_MASTER_OCW3); |
75 | /* Ack the IRQ; AEOI will end it automatically. */ | 75 | /* Ack the IRQ; AEOI will end it automatically. */ |
76 | inb(PIC_MASTER_POLL); | 76 | inb(PIC_MASTER_POLL); |
77 | spin_unlock(&i8259A_lock); | 77 | raw_spin_unlock(&i8259A_lock); |
78 | } | 78 | } |
79 | 79 | ||
80 | global_clock_event->event_handler(global_clock_event); | 80 | global_clock_event->event_handler(global_clock_event); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 23066ecf12fa..208a857c679f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void) | |||
740 | } | 740 | } |
741 | #endif | 741 | #endif |
742 | 742 | ||
743 | static void resume_tsc(void) | 743 | static void resume_tsc(struct clocksource *cs) |
744 | { | 744 | { |
745 | clocksource_tsc.cycle_last = 0; | 745 | clocksource_tsc.cycle_last = 0; |
746 | } | 746 | } |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 34a279a7471d..ab38ce0984fa 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -559,7 +559,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
559 | struct irq_desc *desc; | 559 | struct irq_desc *desc; |
560 | unsigned long flags; | 560 | unsigned long flags; |
561 | 561 | ||
562 | spin_lock_irqsave(&i8259A_lock, flags); | 562 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
563 | 563 | ||
564 | /* Find out what's interrupting in the PIIX4 master 8259 */ | 564 | /* Find out what's interrupting in the PIIX4 master 8259 */ |
565 | outb(0x0c, 0x20); /* OCW3 Poll command */ | 565 | outb(0x0c, 0x20); /* OCW3 Poll command */ |
@@ -596,7 +596,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
596 | outb(0x60 + realirq, 0x20); | 596 | outb(0x60 + realirq, 0x20); |
597 | } | 597 | } |
598 | 598 | ||
599 | spin_unlock_irqrestore(&i8259A_lock, flags); | 599 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
600 | 600 | ||
601 | desc = irq_to_desc(realirq); | 601 | desc = irq_to_desc(realirq); |
602 | 602 | ||
@@ -614,7 +614,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
614 | return IRQ_HANDLED; | 614 | return IRQ_HANDLED; |
615 | 615 | ||
616 | out_unlock: | 616 | out_unlock: |
617 | spin_unlock_irqrestore(&i8259A_lock, flags); | 617 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
618 | return IRQ_NONE; | 618 | return IRQ_NONE; |
619 | } | 619 | } |
620 | 620 | ||
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index d430e4c30193..7dd599deca4a 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/fixmap.h> | 33 | #include <asm/fixmap.h> |
34 | #include <asm/apicdef.h> | 34 | #include <asm/apicdef.h> |
35 | #include <asm/apic.h> | 35 | #include <asm/apic.h> |
36 | #include <asm/pgalloc.h> | ||
36 | #include <asm/processor.h> | 37 | #include <asm/processor.h> |
37 | #include <asm/timer.h> | 38 | #include <asm/timer.h> |
38 | #include <asm/vmi_time.h> | 39 | #include <asm/vmi_time.h> |
@@ -266,30 +267,6 @@ static void vmi_nop(void) | |||
266 | { | 267 | { |
267 | } | 268 | } |
268 | 269 | ||
269 | #ifdef CONFIG_HIGHPTE | ||
270 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | ||
271 | { | ||
272 | void *va = kmap_atomic(page, type); | ||
273 | |||
274 | /* | ||
275 | * Internally, the VMI ROM must map virtual addresses to physical | ||
276 | * addresses for processing MMU updates. By the time MMU updates | ||
277 | * are issued, this information is typically already lost. | ||
278 | * Fortunately, the VMI provides a cache of mapping slots for active | ||
279 | * page tables. | ||
280 | * | ||
281 | * We use slot zero for the linear mapping of physical memory, and | ||
282 | * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1. | ||
283 | * | ||
284 | * args: SLOT VA COUNT PFN | ||
285 | */ | ||
286 | BUG_ON(type != KM_PTE0 && type != KM_PTE1); | ||
287 | vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page)); | ||
288 | |||
289 | return va; | ||
290 | } | ||
291 | #endif | ||
292 | |||
293 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | 270 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
294 | { | 271 | { |
295 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 272 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
@@ -640,6 +617,12 @@ static inline int __init activate_vmi(void) | |||
640 | u64 reloc; | 617 | u64 reloc; |
641 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | 618 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; |
642 | 619 | ||
620 | /* | ||
621 | * Prevent page tables from being allocated in highmem, even if | ||
622 | * CONFIG_HIGHPTE is enabled. | ||
623 | */ | ||
624 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
625 | |||
643 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | 626 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { |
644 | printk(KERN_ERR "VMI ROM failed to initialize!"); | 627 | printk(KERN_ERR "VMI ROM failed to initialize!"); |
645 | return 0; | 628 | return 0; |
@@ -778,10 +761,6 @@ static inline int __init activate_vmi(void) | |||
778 | 761 | ||
779 | /* Set linear is needed in all cases */ | 762 | /* Set linear is needed in all cases */ |
780 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | 763 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); |
781 | #ifdef CONFIG_HIGHPTE | ||
782 | if (vmi_ops.set_linear_mapping) | ||
783 | pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; | ||
784 | #endif | ||
785 | 764 | ||
786 | /* | 765 | /* |
787 | * These MUST always be patched. Don't support indirect jumps | 766 | * These MUST always be patched. Don't support indirect jumps |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 74c92bb194df..2f1ca5614292 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void) | |||
79 | 79 | ||
80 | static inline unsigned int vmi_get_timer_vector(void) | 80 | static inline unsigned int vmi_get_timer_vector(void) |
81 | { | 81 | { |
82 | #ifdef CONFIG_X86_IO_APIC | 82 | return IRQ0_VECTOR; |
83 | return FIRST_DEVICE_VECTOR; | ||
84 | #else | ||
85 | return FIRST_EXTERNAL_VECTOR; | ||
86 | #endif | ||
87 | } | 83 | } |
88 | 84 | ||
89 | /** vmi clockchip */ | 85 | /** vmi clockchip */ |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index f92a0da608cb..44879df55696 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -341,7 +341,7 @@ SECTIONS | |||
341 | * Per-cpu symbols which need to be offset from __per_cpu_load | 341 | * Per-cpu symbols which need to be offset from __per_cpu_load |
342 | * for the boot processor. | 342 | * for the boot processor. |
343 | */ | 343 | */ |
344 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | 344 | #define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load |
345 | INIT_PER_CPU(gdt_page); | 345 | INIT_PER_CPU(gdt_page); |
346 | INIT_PER_CPU(irq_stack_union); | 346 | INIT_PER_CPU(irq_stack_union); |
347 | 347 | ||
@@ -352,7 +352,7 @@ INIT_PER_CPU(irq_stack_union); | |||
352 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 352 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
353 | 353 | ||
354 | #ifdef CONFIG_SMP | 354 | #ifdef CONFIG_SMP |
355 | . = ASSERT((per_cpu__irq_stack_union == 0), | 355 | . = ASSERT((irq_stack_union == 0), |
356 | "irq_stack_union is not at start of per-cpu area"); | 356 | "irq_stack_union is not at start of per-cpu area"); |
357 | #endif | 357 | #endif |
358 | 358 | ||
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 06871111bf54..970bbd479516 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -66,6 +66,7 @@ config KVM_AMD | |||
66 | 66 | ||
67 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 67 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
68 | # the virtualization menu. | 68 | # the virtualization menu. |
69 | source drivers/vhost/Kconfig | ||
69 | source drivers/lguest/Kconfig | 70 | source drivers/lguest/Kconfig |
70 | source drivers/virtio/Kconfig | 71 | source drivers/virtio/Kconfig |
71 | 72 | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2226f2c70ea3..5cb3f0f54f47 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -750,6 +750,7 @@ static void __init zone_sizes_init(void) | |||
750 | free_area_init_nodes(max_zone_pfns); | 750 | free_area_init_nodes(max_zone_pfns); |
751 | } | 751 | } |
752 | 752 | ||
753 | #ifndef CONFIG_NO_BOOTMEM | ||
753 | static unsigned long __init setup_node_bootmem(int nodeid, | 754 | static unsigned long __init setup_node_bootmem(int nodeid, |
754 | unsigned long start_pfn, | 755 | unsigned long start_pfn, |
755 | unsigned long end_pfn, | 756 | unsigned long end_pfn, |
@@ -766,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid, | |||
766 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | 767 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", |
767 | nodeid, bootmap, bootmap + bootmap_size); | 768 | nodeid, bootmap, bootmap + bootmap_size); |
768 | free_bootmem_with_active_regions(nodeid, end_pfn); | 769 | free_bootmem_with_active_regions(nodeid, end_pfn); |
769 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
770 | 770 | ||
771 | return bootmap + bootmap_size; | 771 | return bootmap + bootmap_size; |
772 | } | 772 | } |
773 | #endif | ||
773 | 774 | ||
774 | void __init setup_bootmem_allocator(void) | 775 | void __init setup_bootmem_allocator(void) |
775 | { | 776 | { |
777 | #ifndef CONFIG_NO_BOOTMEM | ||
776 | int nodeid; | 778 | int nodeid; |
777 | unsigned long bootmap_size, bootmap; | 779 | unsigned long bootmap_size, bootmap; |
778 | /* | 780 | /* |
@@ -784,11 +786,13 @@ void __init setup_bootmem_allocator(void) | |||
784 | if (bootmap == -1L) | 786 | if (bootmap == -1L) |
785 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 787 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
786 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 788 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
789 | #endif | ||
787 | 790 | ||
788 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 791 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
789 | max_pfn_mapped<<PAGE_SHIFT); | 792 | max_pfn_mapped<<PAGE_SHIFT); |
790 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 793 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
791 | 794 | ||
795 | #ifndef CONFIG_NO_BOOTMEM | ||
792 | for_each_online_node(nodeid) { | 796 | for_each_online_node(nodeid) { |
793 | unsigned long start_pfn, end_pfn; | 797 | unsigned long start_pfn, end_pfn; |
794 | 798 | ||
@@ -806,6 +810,7 @@ void __init setup_bootmem_allocator(void) | |||
806 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | 810 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, |
807 | bootmap); | 811 | bootmap); |
808 | } | 812 | } |
813 | #endif | ||
809 | 814 | ||
810 | after_bootmem = 1; | 815 | after_bootmem = 1; |
811 | } | 816 | } |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 69ddfbd91135..e9b040e1cde5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
573 | int acpi, int k8) | 573 | int acpi, int k8) |
574 | { | 574 | { |
575 | #ifndef CONFIG_NO_BOOTMEM | ||
575 | unsigned long bootmap_size, bootmap; | 576 | unsigned long bootmap_size, bootmap; |
576 | 577 | ||
577 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 578 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
@@ -579,13 +580,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
579 | PAGE_SIZE); | 580 | PAGE_SIZE); |
580 | if (bootmap == -1L) | 581 | if (bootmap == -1L) |
581 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 582 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
583 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | ||
582 | /* don't touch min_low_pfn */ | 584 | /* don't touch min_low_pfn */ |
583 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | 585 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, |
584 | 0, end_pfn); | 586 | 0, end_pfn); |
585 | e820_register_active_regions(0, start_pfn, end_pfn); | 587 | e820_register_active_regions(0, start_pfn, end_pfn); |
586 | free_bootmem_with_active_regions(0, end_pfn); | 588 | free_bootmem_with_active_regions(0, end_pfn); |
587 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 589 | #else |
588 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 590 | e820_register_active_regions(0, start_pfn, end_pfn); |
591 | #endif | ||
589 | } | 592 | } |
590 | #endif | 593 | #endif |
591 | 594 | ||
@@ -974,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
974 | if (pmd_none(*pmd)) { | 977 | if (pmd_none(*pmd)) { |
975 | pte_t entry; | 978 | pte_t entry; |
976 | 979 | ||
977 | p = vmemmap_alloc_block(PMD_SIZE, node); | 980 | p = vmemmap_alloc_block_buf(PMD_SIZE, node); |
978 | if (!p) | 981 | if (!p) |
979 | return -ENOMEM; | 982 | return -ENOMEM; |
980 | 983 | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index b20760ca7244..809baaaf48b1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
418 | 418 | ||
419 | for_each_online_node(nid) { | 419 | for_each_online_node(nid) { |
420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
421 | NODE_DATA(nid)->node_id = nid; | ||
422 | #ifndef CONFIG_NO_BOOTMEM | ||
421 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | 423 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; |
424 | #endif | ||
422 | } | 425 | } |
423 | 426 | ||
424 | setup_bootmem_allocator(); | 427 | setup_bootmem_allocator(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 3307ea8bd43a..8948f47fde05 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
163 | unsigned long end, unsigned long size, | 163 | unsigned long end, unsigned long size, |
164 | unsigned long align) | 164 | unsigned long align) |
165 | { | 165 | { |
166 | unsigned long mem = find_e820_area(start, end, size, align); | 166 | unsigned long mem; |
167 | void *ptr; | ||
168 | 167 | ||
168 | /* | ||
169 | * put it on high as possible | ||
170 | * something will go with NODE_DATA | ||
171 | */ | ||
172 | if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) | ||
173 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
174 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | ||
175 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
176 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
177 | mem = find_e820_area(start, end, size, align); | ||
178 | if (mem != -1L) | ||
179 | return __va(mem); | ||
180 | |||
181 | /* extend the search scope */ | ||
182 | end = max_pfn_mapped << PAGE_SHIFT; | ||
183 | if (end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
184 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
185 | else | ||
186 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
187 | mem = find_e820_area(start, end, size, align); | ||
169 | if (mem != -1L) | 188 | if (mem != -1L) |
170 | return __va(mem); | 189 | return __va(mem); |
171 | 190 | ||
172 | ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); | 191 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", |
173 | if (ptr == NULL) { | ||
174 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
175 | size, nodeid); | 192 | size, nodeid); |
176 | return NULL; | 193 | |
177 | } | 194 | return NULL; |
178 | return ptr; | ||
179 | } | 195 | } |
180 | 196 | ||
181 | /* Initialize bootmem allocator for a node */ | 197 | /* Initialize bootmem allocator for a node */ |
182 | void __init | 198 | void __init |
183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 200 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 201 | unsigned long start_pfn, last_pfn, nodedata_phys; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
187 | unsigned long bootmap_start, nodedata_phys; | ||
188 | void *bootmap; | ||
189 | int nid; | 203 | int nid; |
204 | #ifndef CONFIG_NO_BOOTMEM | ||
205 | unsigned long bootmap_start, bootmap_pages, bootmap_size; | ||
206 | void *bootmap; | ||
207 | #endif | ||
190 | 208 | ||
191 | if (!end) | 209 | if (!end) |
192 | return; | 210 | return; |
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
200 | 218 | ||
201 | start = roundup(start, ZONE_ALIGN); | 219 | start = roundup(start, ZONE_ALIGN); |
202 | 220 | ||
203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 221 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, |
204 | start, end); | 222 | start, end); |
205 | 223 | ||
206 | start_pfn = start >> PAGE_SHIFT; | 224 | start_pfn = start >> PAGE_SHIFT; |
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
211 | if (node_data[nodeid] == NULL) | 229 | if (node_data[nodeid] == NULL) |
212 | return; | 230 | return; |
213 | nodedata_phys = __pa(node_data[nodeid]); | 231 | nodedata_phys = __pa(node_data[nodeid]); |
232 | reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | ||
214 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | 233 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, |
215 | nodedata_phys + pgdat_size - 1); | 234 | nodedata_phys + pgdat_size - 1); |
235 | nid = phys_to_nid(nodedata_phys); | ||
236 | if (nid != nodeid) | ||
237 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
216 | 238 | ||
217 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 239 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
218 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | 240 | NODE_DATA(nodeid)->node_id = nodeid; |
219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 241 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
220 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 242 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
221 | 243 | ||
244 | #ifndef CONFIG_NO_BOOTMEM | ||
245 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | ||
246 | |||
222 | /* | 247 | /* |
223 | * Find a place for the bootmem map | 248 | * Find a place for the bootmem map |
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | 249 | * nodedata_phys could be on other nodes by alloc_bootmem, |
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
227 | * of alloc_bootmem, that could clash with reserved range | 252 | * of alloc_bootmem, that could clash with reserved range |
228 | */ | 253 | */ |
229 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); | 254 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); |
230 | nid = phys_to_nid(nodedata_phys); | 255 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = roundup(start, PAGE_SIZE); | ||
235 | /* | 256 | /* |
236 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like | 257 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
237 | * to use that to align to PAGE_SIZE | 258 | * to use that to align to PAGE_SIZE |
@@ -239,18 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 260 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 261 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 262 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) { | 263 | free_early(nodedata_phys, nodedata_phys + pgdat_size); |
243 | /* | ||
244 | * only need to free it if it is from other node | ||
245 | * bootmem | ||
246 | */ | ||
247 | if (nid != nodeid) | ||
248 | free_bootmem(nodedata_phys, pgdat_size); | ||
249 | } | ||
250 | node_data[nodeid] = NULL; | 264 | node_data[nodeid] = NULL; |
251 | return; | 265 | return; |
252 | } | 266 | } |
253 | bootmap_start = __pa(bootmap); | 267 | bootmap_start = __pa(bootmap); |
268 | reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT), | ||
269 | "BOOTMAP"); | ||
254 | 270 | ||
255 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 271 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
256 | bootmap_start >> PAGE_SHIFT, | 272 | bootmap_start >> PAGE_SHIFT, |
@@ -259,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
259 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | 275 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", |
260 | bootmap_start, bootmap_start + bootmap_size - 1, | 276 | bootmap_start, bootmap_start + bootmap_size - 1, |
261 | bootmap_pages); | 277 | bootmap_pages); |
262 | |||
263 | free_bootmem_with_active_regions(nodeid, end); | ||
264 | |||
265 | /* | ||
266 | * convert early reserve to bootmem reserve earlier | ||
267 | * otherwise early_node_mem could use early reserved mem | ||
268 | * on previous node | ||
269 | */ | ||
270 | early_res_to_bootmem(start, end); | ||
271 | |||
272 | /* | ||
273 | * in some case early_node_mem could use alloc_bootmem | ||
274 | * to get range on other node, don't reserve that again | ||
275 | */ | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
278 | else | ||
279 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
280 | pgdat_size, BOOTMEM_DEFAULT); | ||
281 | nid = phys_to_nid(bootmap_start); | 278 | nid = phys_to_nid(bootmap_start); |
282 | if (nid != nodeid) | 279 | if (nid != nodeid) |
283 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | 280 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); |
284 | else | 281 | |
285 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 282 | free_bootmem_with_active_regions(nodeid, end); |
286 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 283 | #endif |
287 | 284 | ||
288 | node_set_online(nodeid); | 285 | node_set_online(nodeid); |
289 | } | 286 | } |
@@ -709,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void) | |||
709 | for_each_online_node(i) | 706 | for_each_online_node(i) |
710 | pages += free_all_bootmem_node(NODE_DATA(i)); | 707 | pages += free_all_bootmem_node(NODE_DATA(i)); |
711 | 708 | ||
709 | #ifdef CONFIG_NO_BOOTMEM | ||
710 | pages += free_all_memory_core_early(MAX_NUMNODES); | ||
711 | #endif | ||
712 | |||
712 | return pages; | 713 | return pages; |
713 | } | 714 | } |
714 | 715 | ||
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 39fba37f702f..0b7d3e9593e1 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -14,8 +14,7 @@ obj-$(CONFIG_X86_VISWS) += visws.o | |||
14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
15 | 15 | ||
16 | obj-y += common.o early.o | 16 | obj-y += common.o early.o |
17 | obj-y += amd_bus.o | 17 | obj-y += amd_bus.o bus_numa.o |
18 | obj-$(CONFIG_X86_64) += bus_numa.o | ||
19 | 18 | ||
20 | ifeq ($(CONFIG_PCI_DEBUG),y) | 19 | ifeq ($(CONFIG_PCI_DEBUG),y) |
21 | EXTRA_CFLAGS += -DDEBUG | 20 | EXTRA_CFLAGS += -DDEBUG |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 95ecbd495955..fc1e8fe07e5c 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -2,11 +2,11 @@ | |||
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/topology.h> | 3 | #include <linux/topology.h> |
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/range.h> | ||
6 | |||
5 | #include <asm/pci_x86.h> | 7 | #include <asm/pci_x86.h> |
6 | 8 | ||
7 | #ifdef CONFIG_X86_64 | ||
8 | #include <asm/pci-direct.h> | 9 | #include <asm/pci-direct.h> |
9 | #endif | ||
10 | 10 | ||
11 | #include "bus_numa.h" | 11 | #include "bus_numa.h" |
12 | 12 | ||
@@ -15,60 +15,6 @@ | |||
15 | * also get peer root bus resource for io,mmio | 15 | * also get peer root bus resource for io,mmio |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #ifdef CONFIG_X86_64 | ||
19 | |||
20 | #define RANGE_NUM 16 | ||
21 | |||
22 | struct res_range { | ||
23 | size_t start; | ||
24 | size_t end; | ||
25 | }; | ||
26 | |||
27 | static void __init update_range(struct res_range *range, size_t start, | ||
28 | size_t end) | ||
29 | { | ||
30 | int i; | ||
31 | int j; | ||
32 | |||
33 | for (j = 0; j < RANGE_NUM; j++) { | ||
34 | if (!range[j].end) | ||
35 | continue; | ||
36 | |||
37 | if (start <= range[j].start && end >= range[j].end) { | ||
38 | range[j].start = 0; | ||
39 | range[j].end = 0; | ||
40 | continue; | ||
41 | } | ||
42 | |||
43 | if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { | ||
44 | range[j].start = end + 1; | ||
45 | continue; | ||
46 | } | ||
47 | |||
48 | |||
49 | if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { | ||
50 | range[j].end = start - 1; | ||
51 | continue; | ||
52 | } | ||
53 | |||
54 | if (start > range[j].start && end < range[j].end) { | ||
55 | /* find the new spare */ | ||
56 | for (i = 0; i < RANGE_NUM; i++) { | ||
57 | if (range[i].end == 0) | ||
58 | break; | ||
59 | } | ||
60 | if (i < RANGE_NUM) { | ||
61 | range[i].end = range[j].end; | ||
62 | range[i].start = end + 1; | ||
63 | } else { | ||
64 | printk(KERN_ERR "run of slot in ranges\n"); | ||
65 | } | ||
66 | range[j].end = start - 1; | ||
67 | continue; | ||
68 | } | ||
69 | } | ||
70 | } | ||
71 | |||
72 | struct pci_hostbridge_probe { | 18 | struct pci_hostbridge_probe { |
73 | u32 bus; | 19 | u32 bus; |
74 | u32 slot; | 20 | u32 slot; |
@@ -111,6 +57,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void) | |||
111 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | 57 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; |
112 | } | 58 | } |
113 | 59 | ||
60 | #define RANGE_NUM 16 | ||
61 | |||
114 | /** | 62 | /** |
115 | * early_fill_mp_bus_to_node() | 63 | * early_fill_mp_bus_to_node() |
116 | * called before pcibios_scan_root and pci_scan_bus | 64 | * called before pcibios_scan_root and pci_scan_bus |
@@ -130,16 +78,17 @@ static int __init early_fill_mp_bus_info(void) | |||
130 | struct pci_root_info *info; | 78 | struct pci_root_info *info; |
131 | u32 reg; | 79 | u32 reg; |
132 | struct resource *res; | 80 | struct resource *res; |
133 | size_t start; | 81 | u64 start; |
134 | size_t end; | 82 | u64 end; |
135 | struct res_range range[RANGE_NUM]; | 83 | struct range range[RANGE_NUM]; |
136 | u64 val; | 84 | u64 val; |
137 | u32 address; | 85 | u32 address; |
86 | bool found; | ||
138 | 87 | ||
139 | if (!early_pci_allowed()) | 88 | if (!early_pci_allowed()) |
140 | return -1; | 89 | return -1; |
141 | 90 | ||
142 | found_all_numa_early = 0; | 91 | found = false; |
143 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { | 92 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { |
144 | u32 id; | 93 | u32 id; |
145 | u16 device; | 94 | u16 device; |
@@ -153,12 +102,12 @@ static int __init early_fill_mp_bus_info(void) | |||
153 | device = (id>>16) & 0xffff; | 102 | device = (id>>16) & 0xffff; |
154 | if (pci_probes[i].vendor == vendor && | 103 | if (pci_probes[i].vendor == vendor && |
155 | pci_probes[i].device == device) { | 104 | pci_probes[i].device == device) { |
156 | found_all_numa_early = 1; | 105 | found = true; |
157 | break; | 106 | break; |
158 | } | 107 | } |
159 | } | 108 | } |
160 | 109 | ||
161 | if (!found_all_numa_early) | 110 | if (!found) |
162 | return 0; | 111 | return 0; |
163 | 112 | ||
164 | pci_root_num = 0; | 113 | pci_root_num = 0; |
@@ -196,7 +145,7 @@ static int __init early_fill_mp_bus_info(void) | |||
196 | def_link = (reg >> 8) & 0x03; | 145 | def_link = (reg >> 8) & 0x03; |
197 | 146 | ||
198 | memset(range, 0, sizeof(range)); | 147 | memset(range, 0, sizeof(range)); |
199 | range[0].end = 0xffff; | 148 | add_range(range, RANGE_NUM, 0, 0, 0xffff + 1); |
200 | /* io port resource */ | 149 | /* io port resource */ |
201 | for (i = 0; i < 4; i++) { | 150 | for (i = 0; i < 4; i++) { |
202 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); | 151 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); |
@@ -220,13 +169,13 @@ static int __init early_fill_mp_bus_info(void) | |||
220 | 169 | ||
221 | info = &pci_root_info[j]; | 170 | info = &pci_root_info[j]; |
222 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", | 171 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", |
223 | node, link, (u64)start, (u64)end); | 172 | node, link, start, end); |
224 | 173 | ||
225 | /* kernel only handle 16 bit only */ | 174 | /* kernel only handle 16 bit only */ |
226 | if (end > 0xffff) | 175 | if (end > 0xffff) |
227 | end = 0xffff; | 176 | end = 0xffff; |
228 | update_res(info, start, end, IORESOURCE_IO, 1); | 177 | update_res(info, start, end, IORESOURCE_IO, 1); |
229 | update_range(range, start, end); | 178 | subtract_range(range, RANGE_NUM, start, end + 1); |
230 | } | 179 | } |
231 | /* add left over io port range to def node/link, [0, 0xffff] */ | 180 | /* add left over io port range to def node/link, [0, 0xffff] */ |
232 | /* find the position */ | 181 | /* find the position */ |
@@ -241,29 +190,32 @@ static int __init early_fill_mp_bus_info(void) | |||
241 | if (!range[i].end) | 190 | if (!range[i].end) |
242 | continue; | 191 | continue; |
243 | 192 | ||
244 | update_res(info, range[i].start, range[i].end, | 193 | update_res(info, range[i].start, range[i].end - 1, |
245 | IORESOURCE_IO, 1); | 194 | IORESOURCE_IO, 1); |
246 | } | 195 | } |
247 | } | 196 | } |
248 | 197 | ||
249 | memset(range, 0, sizeof(range)); | 198 | memset(range, 0, sizeof(range)); |
250 | /* 0xfd00000000-0xffffffffff for HT */ | 199 | /* 0xfd00000000-0xffffffffff for HT */ |
251 | range[0].end = (0xfdULL<<32) - 1; | 200 | end = cap_resource((0xfdULL<<32) - 1); |
201 | end++; | ||
202 | add_range(range, RANGE_NUM, 0, 0, end); | ||
252 | 203 | ||
253 | /* need to take out [0, TOM) for RAM*/ | 204 | /* need to take out [0, TOM) for RAM*/ |
254 | address = MSR_K8_TOP_MEM1; | 205 | address = MSR_K8_TOP_MEM1; |
255 | rdmsrl(address, val); | 206 | rdmsrl(address, val); |
256 | end = (val & 0xffffff800000ULL); | 207 | end = (val & 0xffffff800000ULL); |
257 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); | 208 | printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20); |
258 | if (end < (1ULL<<32)) | 209 | if (end < (1ULL<<32)) |
259 | update_range(range, 0, end - 1); | 210 | subtract_range(range, RANGE_NUM, 0, end); |
260 | 211 | ||
261 | /* get mmconfig */ | 212 | /* get mmconfig */ |
262 | get_pci_mmcfg_amd_fam10h_range(); | 213 | get_pci_mmcfg_amd_fam10h_range(); |
263 | /* need to take out mmconf range */ | 214 | /* need to take out mmconf range */ |
264 | if (fam10h_mmconf_end) { | 215 | if (fam10h_mmconf_end) { |
265 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); | 216 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); |
266 | update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); | 217 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, |
218 | fam10h_mmconf_end + 1); | ||
267 | } | 219 | } |
268 | 220 | ||
269 | /* mmio resource */ | 221 | /* mmio resource */ |
@@ -293,7 +245,7 @@ static int __init early_fill_mp_bus_info(void) | |||
293 | info = &pci_root_info[j]; | 245 | info = &pci_root_info[j]; |
294 | 246 | ||
295 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", | 247 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", |
296 | node, link, (u64)start, (u64)end); | 248 | node, link, start, end); |
297 | /* | 249 | /* |
298 | * some sick allocation would have range overlap with fam10h | 250 | * some sick allocation would have range overlap with fam10h |
299 | * mmconf range, so need to update start and end. | 251 | * mmconf range, so need to update start and end. |
@@ -318,14 +270,15 @@ static int __init early_fill_mp_bus_info(void) | |||
318 | /* we got a hole */ | 270 | /* we got a hole */ |
319 | endx = fam10h_mmconf_start - 1; | 271 | endx = fam10h_mmconf_start - 1; |
320 | update_res(info, start, endx, IORESOURCE_MEM, 0); | 272 | update_res(info, start, endx, IORESOURCE_MEM, 0); |
321 | update_range(range, start, endx); | 273 | subtract_range(range, RANGE_NUM, start, |
322 | printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); | 274 | endx + 1); |
275 | printk(KERN_CONT " ==> [%llx, %llx]", start, endx); | ||
323 | start = fam10h_mmconf_end + 1; | 276 | start = fam10h_mmconf_end + 1; |
324 | changed = 1; | 277 | changed = 1; |
325 | } | 278 | } |
326 | if (changed) { | 279 | if (changed) { |
327 | if (start <= end) { | 280 | if (start <= end) { |
328 | printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); | 281 | printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end); |
329 | } else { | 282 | } else { |
330 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); | 283 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); |
331 | continue; | 284 | continue; |
@@ -333,8 +286,9 @@ static int __init early_fill_mp_bus_info(void) | |||
333 | } | 286 | } |
334 | } | 287 | } |
335 | 288 | ||
336 | update_res(info, start, end, IORESOURCE_MEM, 1); | 289 | update_res(info, cap_resource(start), cap_resource(end), |
337 | update_range(range, start, end); | 290 | IORESOURCE_MEM, 1); |
291 | subtract_range(range, RANGE_NUM, start, end + 1); | ||
338 | printk(KERN_CONT "\n"); | 292 | printk(KERN_CONT "\n"); |
339 | } | 293 | } |
340 | 294 | ||
@@ -348,8 +302,8 @@ static int __init early_fill_mp_bus_info(void) | |||
348 | address = MSR_K8_TOP_MEM2; | 302 | address = MSR_K8_TOP_MEM2; |
349 | rdmsrl(address, val); | 303 | rdmsrl(address, val); |
350 | end = (val & 0xffffff800000ULL); | 304 | end = (val & 0xffffff800000ULL); |
351 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); | 305 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20); |
352 | update_range(range, 1ULL<<32, end - 1); | 306 | subtract_range(range, RANGE_NUM, 1ULL<<32, end); |
353 | } | 307 | } |
354 | 308 | ||
355 | /* | 309 | /* |
@@ -368,7 +322,8 @@ static int __init early_fill_mp_bus_info(void) | |||
368 | if (!range[i].end) | 322 | if (!range[i].end) |
369 | continue; | 323 | continue; |
370 | 324 | ||
371 | update_res(info, range[i].start, range[i].end, | 325 | update_res(info, cap_resource(range[i].start), |
326 | cap_resource(range[i].end - 1), | ||
372 | IORESOURCE_MEM, 1); | 327 | IORESOURCE_MEM, 1); |
373 | } | 328 | } |
374 | } | 329 | } |
@@ -384,24 +339,14 @@ static int __init early_fill_mp_bus_info(void) | |||
384 | info->bus_min, info->bus_max, info->node, info->link); | 339 | info->bus_min, info->bus_max, info->node, info->link); |
385 | for (j = 0; j < res_num; j++) { | 340 | for (j = 0; j < res_num; j++) { |
386 | res = &info->res[j]; | 341 | res = &info->res[j]; |
387 | printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", | 342 | printk(KERN_DEBUG "bus: %02x index %x %pR\n", |
388 | busnum, j, | 343 | busnum, j, res); |
389 | (res->flags & IORESOURCE_IO)?"io port":"mmio", | ||
390 | res->start, res->end); | ||
391 | } | 344 | } |
392 | } | 345 | } |
393 | 346 | ||
394 | return 0; | 347 | return 0; |
395 | } | 348 | } |
396 | 349 | ||
397 | #else /* !CONFIG_X86_64 */ | ||
398 | |||
399 | static int __init early_fill_mp_bus_info(void) { return 0; } | ||
400 | |||
401 | #endif /* !CONFIG_X86_64 */ | ||
402 | |||
403 | /* common 32/64 bit code */ | ||
404 | |||
405 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) | 350 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) |
406 | 351 | ||
407 | static void enable_pci_io_ecs(void *unused) | 352 | static void enable_pci_io_ecs(void *unused) |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 12d54ff3654d..64a122883896 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -1,11 +1,11 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/range.h> | ||
3 | 4 | ||
4 | #include "bus_numa.h" | 5 | #include "bus_numa.h" |
5 | 6 | ||
6 | int pci_root_num; | 7 | int pci_root_num; |
7 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
8 | int found_all_numa_early; | ||
9 | 9 | ||
10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | 10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) |
11 | { | 11 | { |
@@ -21,10 +21,6 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
21 | if (!pci_root_num) | 21 | if (!pci_root_num) |
22 | return; | 22 | return; |
23 | 23 | ||
24 | /* for amd, if only one root bus, don't need to do anything */ | ||
25 | if (pci_root_num < 2 && found_all_numa_early) | ||
26 | return; | ||
27 | |||
28 | for (i = 0; i < pci_root_num; i++) { | 24 | for (i = 0; i < pci_root_num; i++) { |
29 | if (pci_root_info[i].bus_min == b->number) | 25 | if (pci_root_info[i].bus_min == b->number) |
30 | break; | 26 | break; |
@@ -52,8 +48,8 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
52 | } | 48 | } |
53 | } | 49 | } |
54 | 50 | ||
55 | void __devinit update_res(struct pci_root_info *info, size_t start, | 51 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, |
56 | size_t end, unsigned long flags, int merge) | 52 | resource_size_t end, unsigned long flags, int merge) |
57 | { | 53 | { |
58 | int i; | 54 | int i; |
59 | struct resource *res; | 55 | struct resource *res; |
@@ -61,25 +57,28 @@ void __devinit update_res(struct pci_root_info *info, size_t start, | |||
61 | if (start > end) | 57 | if (start > end) |
62 | return; | 58 | return; |
63 | 59 | ||
60 | if (start == MAX_RESOURCE) | ||
61 | return; | ||
62 | |||
64 | if (!merge) | 63 | if (!merge) |
65 | goto addit; | 64 | goto addit; |
66 | 65 | ||
67 | /* try to merge it with old one */ | 66 | /* try to merge it with old one */ |
68 | for (i = 0; i < info->res_num; i++) { | 67 | for (i = 0; i < info->res_num; i++) { |
69 | size_t final_start, final_end; | 68 | resource_size_t final_start, final_end; |
70 | size_t common_start, common_end; | 69 | resource_size_t common_start, common_end; |
71 | 70 | ||
72 | res = &info->res[i]; | 71 | res = &info->res[i]; |
73 | if (res->flags != flags) | 72 | if (res->flags != flags) |
74 | continue; | 73 | continue; |
75 | 74 | ||
76 | common_start = max((size_t)res->start, start); | 75 | common_start = max(res->start, start); |
77 | common_end = min((size_t)res->end, end); | 76 | common_end = min(res->end, end); |
78 | if (common_start > common_end + 1) | 77 | if (common_start > common_end + 1) |
79 | continue; | 78 | continue; |
80 | 79 | ||
81 | final_start = min((size_t)res->start, start); | 80 | final_start = min(res->start, start); |
82 | final_end = max((size_t)res->end, end); | 81 | final_end = max(res->end, end); |
83 | 82 | ||
84 | res->start = final_start; | 83 | res->start = final_start; |
85 | res->end = final_end; | 84 | res->end = final_end; |
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 731b64ee8d84..804a4b40c31a 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifdef CONFIG_X86_64 | 1 | #ifndef __BUS_NUMA_H |
2 | 2 | #define __BUS_NUMA_H | |
3 | /* | 3 | /* |
4 | * sub bus (transparent) will use entres from 3 to store extra from | 4 | * sub bus (transparent) will use entres from 3 to store extra from |
5 | * root, so need to make sure we have enough slot there. | 5 | * root, so need to make sure we have enough slot there. |
@@ -19,8 +19,7 @@ struct pci_root_info { | |||
19 | #define PCI_ROOT_NR 4 | 19 | #define PCI_ROOT_NR 4 |
20 | extern int pci_root_num; | 20 | extern int pci_root_num; |
21 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 21 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
22 | extern int found_all_numa_early; | ||
23 | 22 | ||
24 | extern void update_res(struct pci_root_info *info, size_t start, | 23 | extern void update_res(struct pci_root_info *info, resource_size_t start, |
25 | size_t end, unsigned long flags, int merge); | 24 | resource_size_t end, unsigned long flags, int merge); |
26 | #endif | 25 | #endif |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 5a8fbf8d4cac..dece3eb9c906 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -255,10 +255,6 @@ void __init pcibios_resource_survey(void) | |||
255 | */ | 255 | */ |
256 | fs_initcall(pcibios_assign_resources); | 256 | fs_initcall(pcibios_assign_resources); |
257 | 257 | ||
258 | void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b) | ||
259 | { | ||
260 | } | ||
261 | |||
262 | /* | 258 | /* |
263 | * If we set up a device for bus mastering, we need to check the latency | 259 | * If we set up a device for bus mastering, we need to check the latency |
264 | * timer as certain crappy BIOSes forget to set it properly. | 260 | * timer as certain crappy BIOSes forget to set it properly. |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 36daccb68642..b607239c1ba8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/traps.h> | 50 | #include <asm/traps.h> |
51 | #include <asm/setup.h> | 51 | #include <asm/setup.h> |
52 | #include <asm/desc.h> | 52 | #include <asm/desc.h> |
53 | #include <asm/pgalloc.h> | ||
53 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
54 | #include <asm/tlbflush.h> | 55 | #include <asm/tlbflush.h> |
55 | #include <asm/reboot.h> | 56 | #include <asm/reboot.h> |
@@ -1094,6 +1095,12 @@ asmlinkage void __init xen_start_kernel(void) | |||
1094 | 1095 | ||
1095 | __supported_pte_mask |= _PAGE_IOMAP; | 1096 | __supported_pte_mask |= _PAGE_IOMAP; |
1096 | 1097 | ||
1098 | /* | ||
1099 | * Prevent page tables from being allocated in highmem, even | ||
1100 | * if CONFIG_HIGHPTE is enabled. | ||
1101 | */ | ||
1102 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
1103 | |||
1097 | /* Work out if we support NX */ | 1104 | /* Work out if we support NX */ |
1098 | x86_configure_nx(); | 1105 | x86_configure_nx(); |
1099 | 1106 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index bf4cd6bfe959..f9eb7de74f42 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1427,23 +1427,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
1427 | #endif | 1427 | #endif |
1428 | } | 1428 | } |
1429 | 1429 | ||
1430 | #ifdef CONFIG_HIGHPTE | ||
1431 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | ||
1432 | { | ||
1433 | pgprot_t prot = PAGE_KERNEL; | ||
1434 | |||
1435 | if (PagePinned(page)) | ||
1436 | prot = PAGE_KERNEL_RO; | ||
1437 | |||
1438 | if (0 && PageHighMem(page)) | ||
1439 | printk("mapping highpte %lx type %d prot %s\n", | ||
1440 | page_to_pfn(page), type, | ||
1441 | (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); | ||
1442 | |||
1443 | return kmap_atomic_prot(page, type, prot); | ||
1444 | } | ||
1445 | #endif | ||
1446 | |||
1447 | #ifdef CONFIG_X86_32 | 1430 | #ifdef CONFIG_X86_32 |
1448 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 1431 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) |
1449 | { | 1432 | { |
@@ -1902,10 +1885,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1902 | .alloc_pmd_clone = paravirt_nop, | 1885 | .alloc_pmd_clone = paravirt_nop, |
1903 | .release_pmd = xen_release_pmd_init, | 1886 | .release_pmd = xen_release_pmd_init, |
1904 | 1887 | ||
1905 | #ifdef CONFIG_HIGHPTE | ||
1906 | .kmap_atomic_pte = xen_kmap_atomic_pte, | ||
1907 | #endif | ||
1908 | |||
1909 | #ifdef CONFIG_X86_64 | 1888 | #ifdef CONFIG_X86_64 |
1910 | .set_pte = xen_set_pte, | 1889 | .set_pte = xen_set_pte, |
1911 | #else | 1890 | #else |
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 88e15deb8b82..22a2093b5862 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -90,9 +90,9 @@ ENTRY(xen_iret) | |||
90 | GET_THREAD_INFO(%eax) | 90 | GET_THREAD_INFO(%eax) |
91 | movl TI_cpu(%eax), %eax | 91 | movl TI_cpu(%eax), %eax |
92 | movl __per_cpu_offset(,%eax,4), %eax | 92 | movl __per_cpu_offset(,%eax,4), %eax |
93 | mov per_cpu__xen_vcpu(%eax), %eax | 93 | mov xen_vcpu(%eax), %eax |
94 | #else | 94 | #else |
95 | movl per_cpu__xen_vcpu, %eax | 95 | movl xen_vcpu, %eax |
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | /* check IF state we're restoring */ | 98 | /* check IF state we're restoring */ |