diff options
author | Marcelo Tosatti <mtosatti@redhat.com> | 2013-03-04 18:10:32 -0500 |
---|---|---|
committer | Marcelo Tosatti <mtosatti@redhat.com> | 2013-03-04 18:10:32 -0500 |
commit | ee2c25efdd46d7ed5605d6fe877bdf4b47a4ab2e (patch) | |
tree | 35890281e93e667a8e262d76ef250025eb30a8c1 /arch/x86/kernel | |
parent | 3ab66e8a455a4877889c65a848f2fb32be502f2c (diff) | |
parent | 6dbe51c251a327e012439c4772097a13df43c5b8 (diff) |
Merge branch 'master' into queue
* master: (15791 commits)
Linux 3.9-rc1
btrfs/raid56: Add missing #include <linux/vmalloc.h>
fix compat_sys_rt_sigprocmask()
SUNRPC: One line comment fix
ext4: enable quotas before orphan cleanup
ext4: don't allow quota mount options when quota feature enabled
ext4: fix a warning from sparse check for ext4_dir_llseek
ext4: convert number of blocks to clusters properly
ext4: fix possible memory leak in ext4_remount()
jbd2: fix ERR_PTR dereference in jbd2__journal_start
metag: Provide dma_get_sgtable()
metag: prom.h: remove declaration of metag_dt_memblock_reserve()
metag: copy devicetree to non-init memory
metag: cleanup metag_ksyms.c includes
metag: move mm/init.c exports out of metag_ksyms.c
metag: move usercopy.c exports out of metag_ksyms.c
metag: move setup.c exports out of metag_ksyms.c
metag: move kick.c exports out of metag_ksyms.c
metag: move traps.c exports out of metag_ksyms.c
metag: move irq enable out of irqflags.h on SMP
...
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Conflicts:
arch/x86/kernel/kvmclock.c
Diffstat (limited to 'arch/x86/kernel')
85 files changed, 3100 insertions, 1869 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34e923a53762..7bd3bd310106 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o | |||
65 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 65 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
66 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 66 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
67 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 67 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
68 | obj-$(CONFIG_KPROBES) += kprobes.o | 68 | obj-y += kprobes/ |
69 | obj-$(CONFIG_OPTPROBES) += kprobes-opt.o | ||
70 | obj-$(CONFIG_MODULES) += module.o | 69 | obj-$(CONFIG_MODULES) += module.o |
71 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | 70 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o |
72 | obj-$(CONFIG_KGDB) += kgdb.o | 71 | obj-$(CONFIG_KGDB) += kgdb.o |
@@ -88,6 +87,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | |||
88 | 87 | ||
89 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 88 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
90 | 89 | ||
90 | obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o | ||
91 | obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o | ||
92 | obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o | ||
91 | microcode-y := microcode_core.o | 93 | microcode-y := microcode_core.o |
92 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | 94 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o |
93 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | 95 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e48cafcf92ae..230c8ea878e5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
51 | 51 | ||
52 | #ifdef CONFIG_X86_64 | 52 | #ifdef CONFIG_X86_64 |
53 | # include <asm/proto.h> | 53 | # include <asm/proto.h> |
54 | # include <asm/numa_64.h> | ||
55 | #endif /* X86 */ | 54 | #endif /* X86 */ |
56 | 55 | ||
57 | #define BAD_MADT_ENTRY(entry, end) ( \ | 56 | #define BAD_MADT_ENTRY(entry, end) ( \ |
@@ -697,6 +696,10 @@ EXPORT_SYMBOL(acpi_map_lsapic); | |||
697 | 696 | ||
698 | int acpi_unmap_lsapic(int cpu) | 697 | int acpi_unmap_lsapic(int cpu) |
699 | { | 698 | { |
699 | #ifdef CONFIG_ACPI_NUMA | ||
700 | set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); | ||
701 | #endif | ||
702 | |||
700 | per_cpu(x86_cpu_to_apicid, cpu) = -1; | 703 | per_cpu(x86_cpu_to_apicid, cpu) = -1; |
701 | set_cpu_present(cpu, false); | 704 | set_cpu_present(cpu, false); |
702 | num_processors--; | 705 | num_processors--; |
@@ -1706,3 +1709,9 @@ int __acpi_release_global_lock(unsigned int *lock) | |||
1706 | } while (unlikely (val != old)); | 1709 | } while (unlikely (val != old)); |
1707 | return old & 0x1; | 1710 | return old & 0x1; |
1708 | } | 1711 | } |
1712 | |||
1713 | void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) | ||
1714 | { | ||
1715 | e820_add_region(addr, size, E820_ACPI); | ||
1716 | update_e820(); | ||
1717 | } | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d5e0d717005a..0532f5d6e4ef 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void) | |||
69 | 69 | ||
70 | #ifndef CONFIG_64BIT | 70 | #ifndef CONFIG_64BIT |
71 | header->pmode_entry = (u32)&wakeup_pmode_return; | 71 | header->pmode_entry = (u32)&wakeup_pmode_return; |
72 | header->pmode_cr3 = (u32)__pa(&initial_page_table); | 72 | header->pmode_cr3 = (u32)__pa_symbol(initial_page_table); |
73 | saved_magic = 0x12345678; | 73 | saved_magic = 0x12345678; |
74 | #else /* CONFIG_64BIT */ | 74 | #else /* CONFIG_64BIT */ |
75 | #ifdef CONFIG_SMP | 75 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e66311200cbd..b574b295a2f9 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c | |||
@@ -768,10 +768,9 @@ int __init gart_iommu_init(void) | |||
768 | aper_base = info.aper_base; | 768 | aper_base = info.aper_base; |
769 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | 769 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); |
770 | 770 | ||
771 | if (end_pfn > max_low_pfn_mapped) { | 771 | start_pfn = PFN_DOWN(aper_base); |
772 | start_pfn = (aper_base>>PAGE_SHIFT); | 772 | if (!pfn_range_is_mapped(start_pfn, end_pfn)) |
773 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | 773 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); |
774 | } | ||
775 | 774 | ||
776 | pr_info("PCI-DMA: using GART IOMMU.\n"); | 775 | pr_info("PCI-DMA: using GART IOMMU.\n"); |
777 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 776 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index afdc3f756dea..c9876efecafb 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -240,7 +240,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n, | |||
240 | dw_apb_clockevent_pause(adev->timer); | 240 | dw_apb_clockevent_pause(adev->timer); |
241 | if (system_state == SYSTEM_RUNNING) { | 241 | if (system_state == SYSTEM_RUNNING) { |
242 | pr_debug("skipping APBT CPU %lu offline\n", cpu); | 242 | pr_debug("skipping APBT CPU %lu offline\n", cpu); |
243 | } else if (adev) { | 243 | } else { |
244 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); | 244 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); |
245 | dw_apb_clockevent_stop(adev->timer); | 245 | dw_apb_clockevent_stop(adev->timer); |
246 | } | 246 | } |
@@ -311,7 +311,6 @@ void __init apbt_time_init(void) | |||
311 | #ifdef CONFIG_SMP | 311 | #ifdef CONFIG_SMP |
312 | int i; | 312 | int i; |
313 | struct sfi_timer_table_entry *p_mtmr; | 313 | struct sfi_timer_table_entry *p_mtmr; |
314 | unsigned int percpu_timer; | ||
315 | struct apbt_dev *adev; | 314 | struct apbt_dev *adev; |
316 | #endif | 315 | #endif |
317 | 316 | ||
@@ -346,13 +345,10 @@ void __init apbt_time_init(void) | |||
346 | return; | 345 | return; |
347 | } | 346 | } |
348 | pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); | 347 | pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); |
349 | if (num_possible_cpus() <= sfi_mtimer_num) { | 348 | if (num_possible_cpus() <= sfi_mtimer_num) |
350 | percpu_timer = 1; | ||
351 | apbt_num_timers_used = num_possible_cpus(); | 349 | apbt_num_timers_used = num_possible_cpus(); |
352 | } else { | 350 | else |
353 | percpu_timer = 0; | ||
354 | apbt_num_timers_used = 1; | 351 | apbt_num_timers_used = 1; |
355 | } | ||
356 | pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); | 352 | pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); |
357 | 353 | ||
358 | /* here we set up per CPU timer data structure */ | 354 | /* here we set up per CPU timer data structure */ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b994cc84aa7e..904611bf0e5a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -131,7 +131,7 @@ static int __init parse_lapic(char *arg) | |||
131 | { | 131 | { |
132 | if (config_enabled(CONFIG_X86_32) && !arg) | 132 | if (config_enabled(CONFIG_X86_32) && !arg) |
133 | force_enable_local_apic = 1; | 133 | force_enable_local_apic = 1; |
134 | else if (!strncmp(arg, "notscdeadline", 13)) | 134 | else if (arg && !strncmp(arg, "notscdeadline", 13)) |
135 | setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); | 135 | setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); |
136 | return 0; | 136 | return 0; |
137 | } | 137 | } |
@@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void) | |||
1477 | * Now that local APIC setup is completed for BP, configure the fault | 1477 | * Now that local APIC setup is completed for BP, configure the fault |
1478 | * handling for interrupt remapping. | 1478 | * handling for interrupt remapping. |
1479 | */ | 1479 | */ |
1480 | if (irq_remapping_enabled) | 1480 | irq_remap_enable_fault_handling(); |
1481 | irq_remap_enable_fault_handling(); | ||
1482 | 1481 | ||
1483 | } | 1482 | } |
1484 | 1483 | ||
@@ -2251,8 +2250,7 @@ static int lapic_suspend(void) | |||
2251 | local_irq_save(flags); | 2250 | local_irq_save(flags); |
2252 | disable_local_APIC(); | 2251 | disable_local_APIC(); |
2253 | 2252 | ||
2254 | if (irq_remapping_enabled) | 2253 | irq_remapping_disable(); |
2255 | irq_remapping_disable(); | ||
2256 | 2254 | ||
2257 | local_irq_restore(flags); | 2255 | local_irq_restore(flags); |
2258 | return 0; | 2256 | return 0; |
@@ -2268,16 +2266,15 @@ static void lapic_resume(void) | |||
2268 | return; | 2266 | return; |
2269 | 2267 | ||
2270 | local_irq_save(flags); | 2268 | local_irq_save(flags); |
2271 | if (irq_remapping_enabled) { | 2269 | |
2272 | /* | 2270 | /* |
2273 | * IO-APIC and PIC have their own resume routines. | 2271 | * IO-APIC and PIC have their own resume routines. |
2274 | * We just mask them here to make sure the interrupt | 2272 | * We just mask them here to make sure the interrupt |
2275 | * subsystem is completely quiet while we enable x2apic | 2273 | * subsystem is completely quiet while we enable x2apic |
2276 | * and interrupt-remapping. | 2274 | * and interrupt-remapping. |
2277 | */ | 2275 | */ |
2278 | mask_ioapic_entries(); | 2276 | mask_ioapic_entries(); |
2279 | legacy_pic->mask_all(); | 2277 | legacy_pic->mask_all(); |
2280 | } | ||
2281 | 2278 | ||
2282 | if (x2apic_mode) | 2279 | if (x2apic_mode) |
2283 | enable_x2apic(); | 2280 | enable_x2apic(); |
@@ -2320,8 +2317,7 @@ static void lapic_resume(void) | |||
2320 | apic_write(APIC_ESR, 0); | 2317 | apic_write(APIC_ESR, 0); |
2321 | apic_read(APIC_ESR); | 2318 | apic_read(APIC_ESR); |
2322 | 2319 | ||
2323 | if (irq_remapping_enabled) | 2320 | irq_remapping_reenable(x2apic_mode); |
2324 | irq_remapping_reenable(x2apic_mode); | ||
2325 | 2321 | ||
2326 | local_irq_restore(flags); | 2322 | local_irq_restore(flags); |
2327 | } | 2323 | } |
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 9c2aa89a11cb..9a9110918ca7 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
29 | #include <asm/ipi.h> | 29 | #include <asm/ipi.h> |
30 | #include <asm/apic_flat_64.h> | 30 | #include <asm/apic_flat_64.h> |
31 | #include <asm/pgtable.h> | ||
31 | 32 | ||
32 | static int numachip_system __read_mostly; | 33 | static int numachip_system __read_mostly; |
33 | 34 | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b739d398bb29..9ed796ccc32c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -68,22 +68,6 @@ | |||
68 | #define for_each_irq_pin(entry, head) \ | 68 | #define for_each_irq_pin(entry, head) \ |
69 | for (entry = head; entry; entry = entry->next) | 69 | for (entry = head; entry; entry = entry->next) |
70 | 70 | ||
71 | #ifdef CONFIG_IRQ_REMAP | ||
72 | static void irq_remap_modify_chip_defaults(struct irq_chip *chip); | ||
73 | static inline bool irq_remapped(struct irq_cfg *cfg) | ||
74 | { | ||
75 | return cfg->irq_2_iommu.iommu != NULL; | ||
76 | } | ||
77 | #else | ||
78 | static inline bool irq_remapped(struct irq_cfg *cfg) | ||
79 | { | ||
80 | return false; | ||
81 | } | ||
82 | static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) | ||
83 | { | ||
84 | } | ||
85 | #endif | ||
86 | |||
87 | /* | 71 | /* |
88 | * Is the SiS APIC rmw bug present ? | 72 | * Is the SiS APIC rmw bug present ? |
89 | * -1 = don't know, 0 = no, 1 = yes | 73 | * -1 = don't know, 0 = no, 1 = yes |
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) | |||
300 | return cfg; | 284 | return cfg; |
301 | } | 285 | } |
302 | 286 | ||
303 | static int alloc_irq_from(unsigned int from, int node) | 287 | static int alloc_irqs_from(unsigned int from, unsigned int count, int node) |
304 | { | 288 | { |
305 | return irq_alloc_desc_from(from, node); | 289 | return irq_alloc_descs_from(from, count, node); |
306 | } | 290 | } |
307 | 291 | ||
308 | static void free_irq_at(unsigned int at, struct irq_cfg *cfg) | 292 | static void free_irq_at(unsigned int at, struct irq_cfg *cfg) |
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | |||
326 | + (mpc_ioapic_addr(idx) & ~PAGE_MASK); | 310 | + (mpc_ioapic_addr(idx) & ~PAGE_MASK); |
327 | } | 311 | } |
328 | 312 | ||
329 | static inline void io_apic_eoi(unsigned int apic, unsigned int vector) | 313 | void io_apic_eoi(unsigned int apic, unsigned int vector) |
330 | { | 314 | { |
331 | struct io_apic __iomem *io_apic = io_apic_base(apic); | 315 | struct io_apic __iomem *io_apic = io_apic_base(apic); |
332 | writel(vector, &io_apic->eoi); | 316 | writel(vector, &io_apic->eoi); |
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data) | |||
573 | * Otherwise, we simulate the EOI message manually by changing the trigger | 557 | * Otherwise, we simulate the EOI message manually by changing the trigger |
574 | * mode to edge and then back to level, with RTE being masked during this. | 558 | * mode to edge and then back to level, with RTE being masked during this. |
575 | */ | 559 | */ |
576 | static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) | 560 | void native_eoi_ioapic_pin(int apic, int pin, int vector) |
577 | { | 561 | { |
578 | if (mpc_ioapic_ver(apic) >= 0x20) { | 562 | if (mpc_ioapic_ver(apic) >= 0x20) { |
579 | /* | 563 | io_apic_eoi(apic, vector); |
580 | * Intr-remapping uses pin number as the virtual vector | ||
581 | * in the RTE. Actual vector is programmed in | ||
582 | * intr-remapping table entry. Hence for the io-apic | ||
583 | * EOI we use the pin number. | ||
584 | */ | ||
585 | if (cfg && irq_remapped(cfg)) | ||
586 | io_apic_eoi(apic, pin); | ||
587 | else | ||
588 | io_apic_eoi(apic, vector); | ||
589 | } else { | 564 | } else { |
590 | struct IO_APIC_route_entry entry, entry1; | 565 | struct IO_APIC_route_entry entry, entry1; |
591 | 566 | ||
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) | |||
606 | } | 581 | } |
607 | } | 582 | } |
608 | 583 | ||
609 | static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | 584 | void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) |
610 | { | 585 | { |
611 | struct irq_pin_list *entry; | 586 | struct irq_pin_list *entry; |
612 | unsigned long flags; | 587 | unsigned long flags; |
613 | 588 | ||
614 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 589 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
615 | for_each_irq_pin(entry, cfg->irq_2_pin) | 590 | for_each_irq_pin(entry, cfg->irq_2_pin) |
616 | __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); | 591 | x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, |
592 | cfg->vector); | ||
617 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 593 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
618 | } | 594 | } |
619 | 595 | ||
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |||
650 | } | 626 | } |
651 | 627 | ||
652 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 628 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
653 | __eoi_ioapic_pin(apic, pin, entry.vector, NULL); | 629 | x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector); |
654 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 630 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
655 | } | 631 | } |
656 | 632 | ||
@@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, | |||
1304 | fasteoi = false; | 1280 | fasteoi = false; |
1305 | } | 1281 | } |
1306 | 1282 | ||
1307 | if (irq_remapped(cfg)) { | 1283 | if (setup_remapped_irq(irq, cfg, chip)) |
1308 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | ||
1309 | irq_remap_modify_chip_defaults(chip); | ||
1310 | fasteoi = trigger != 0; | 1284 | fasteoi = trigger != 0; |
1311 | } | ||
1312 | 1285 | ||
1313 | hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; | 1286 | hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; |
1314 | irq_set_chip_and_handler_name(irq, chip, hdl, | 1287 | irq_set_chip_and_handler_name(irq, chip, hdl, |
1315 | fasteoi ? "fasteoi" : "edge"); | 1288 | fasteoi ? "fasteoi" : "edge"); |
1316 | } | 1289 | } |
1317 | 1290 | ||
1318 | static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, | 1291 | int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, |
1319 | unsigned int destination, int vector, | 1292 | unsigned int destination, int vector, |
1320 | struct io_apic_irq_attr *attr) | 1293 | struct io_apic_irq_attr *attr) |
1321 | { | 1294 | { |
1322 | if (irq_remapping_enabled) | ||
1323 | return setup_ioapic_remapped_entry(irq, entry, destination, | ||
1324 | vector, attr); | ||
1325 | |||
1326 | memset(entry, 0, sizeof(*entry)); | 1295 | memset(entry, 0, sizeof(*entry)); |
1327 | 1296 | ||
1328 | entry->delivery_mode = apic->irq_delivery_mode; | 1297 | entry->delivery_mode = apic->irq_delivery_mode; |
@@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, | |||
1370 | attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, | 1339 | attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, |
1371 | cfg->vector, irq, attr->trigger, attr->polarity, dest); | 1340 | cfg->vector, irq, attr->trigger, attr->polarity, dest); |
1372 | 1341 | ||
1373 | if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { | 1342 | if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { |
1374 | pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1343 | pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
1375 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); | 1344 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); |
1376 | __clear_irq_vector(irq, cfg); | 1345 | __clear_irq_vector(irq, cfg); |
1377 | 1346 | ||
@@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, | |||
1479 | struct IO_APIC_route_entry entry; | 1448 | struct IO_APIC_route_entry entry; |
1480 | unsigned int dest; | 1449 | unsigned int dest; |
1481 | 1450 | ||
1482 | if (irq_remapping_enabled) | ||
1483 | return; | ||
1484 | |||
1485 | memset(&entry, 0, sizeof(entry)); | 1451 | memset(&entry, 0, sizeof(entry)); |
1486 | 1452 | ||
1487 | /* | 1453 | /* |
@@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, | |||
1513 | ioapic_write_entry(ioapic_idx, pin, entry); | 1479 | ioapic_write_entry(ioapic_idx, pin, entry); |
1514 | } | 1480 | } |
1515 | 1481 | ||
1516 | __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | 1482 | void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) |
1517 | { | 1483 | { |
1518 | int i; | 1484 | int i; |
1485 | |||
1486 | pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); | ||
1487 | |||
1488 | for (i = 0; i <= nr_entries; i++) { | ||
1489 | struct IO_APIC_route_entry entry; | ||
1490 | |||
1491 | entry = ioapic_read_entry(apic, i); | ||
1492 | |||
1493 | pr_debug(" %02x %02X ", i, entry.dest); | ||
1494 | pr_cont("%1d %1d %1d %1d %1d " | ||
1495 | "%1d %1d %02X\n", | ||
1496 | entry.mask, | ||
1497 | entry.trigger, | ||
1498 | entry.irr, | ||
1499 | entry.polarity, | ||
1500 | entry.delivery_status, | ||
1501 | entry.dest_mode, | ||
1502 | entry.delivery_mode, | ||
1503 | entry.vector); | ||
1504 | } | ||
1505 | } | ||
1506 | |||
1507 | void intel_ir_io_apic_print_entries(unsigned int apic, | ||
1508 | unsigned int nr_entries) | ||
1509 | { | ||
1510 | int i; | ||
1511 | |||
1512 | pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); | ||
1513 | |||
1514 | for (i = 0; i <= nr_entries; i++) { | ||
1515 | struct IR_IO_APIC_route_entry *ir_entry; | ||
1516 | struct IO_APIC_route_entry entry; | ||
1517 | |||
1518 | entry = ioapic_read_entry(apic, i); | ||
1519 | |||
1520 | ir_entry = (struct IR_IO_APIC_route_entry *)&entry; | ||
1521 | |||
1522 | pr_debug(" %02x %04X ", i, ir_entry->index); | ||
1523 | pr_cont("%1d %1d %1d %1d %1d " | ||
1524 | "%1d %1d %X %02X\n", | ||
1525 | ir_entry->format, | ||
1526 | ir_entry->mask, | ||
1527 | ir_entry->trigger, | ||
1528 | ir_entry->irr, | ||
1529 | ir_entry->polarity, | ||
1530 | ir_entry->delivery_status, | ||
1531 | ir_entry->index2, | ||
1532 | ir_entry->zero, | ||
1533 | ir_entry->vector); | ||
1534 | } | ||
1535 | } | ||
1536 | |||
1537 | __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | ||
1538 | { | ||
1519 | union IO_APIC_reg_00 reg_00; | 1539 | union IO_APIC_reg_00 reg_00; |
1520 | union IO_APIC_reg_01 reg_01; | 1540 | union IO_APIC_reg_01 reg_01; |
1521 | union IO_APIC_reg_02 reg_02; | 1541 | union IO_APIC_reg_02 reg_02; |
@@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |||
1568 | 1588 | ||
1569 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1589 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1570 | 1590 | ||
1571 | if (irq_remapping_enabled) { | 1591 | x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); |
1572 | printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" | ||
1573 | " Pol Stat Indx2 Zero Vect:\n"); | ||
1574 | } else { | ||
1575 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" | ||
1576 | " Stat Dmod Deli Vect:\n"); | ||
1577 | } | ||
1578 | |||
1579 | for (i = 0; i <= reg_01.bits.entries; i++) { | ||
1580 | if (irq_remapping_enabled) { | ||
1581 | struct IO_APIC_route_entry entry; | ||
1582 | struct IR_IO_APIC_route_entry *ir_entry; | ||
1583 | |||
1584 | entry = ioapic_read_entry(ioapic_idx, i); | ||
1585 | ir_entry = (struct IR_IO_APIC_route_entry *) &entry; | ||
1586 | printk(KERN_DEBUG " %02x %04X ", | ||
1587 | i, | ||
1588 | ir_entry->index | ||
1589 | ); | ||
1590 | pr_cont("%1d %1d %1d %1d %1d " | ||
1591 | "%1d %1d %X %02X\n", | ||
1592 | ir_entry->format, | ||
1593 | ir_entry->mask, | ||
1594 | ir_entry->trigger, | ||
1595 | ir_entry->irr, | ||
1596 | ir_entry->polarity, | ||
1597 | ir_entry->delivery_status, | ||
1598 | ir_entry->index2, | ||
1599 | ir_entry->zero, | ||
1600 | ir_entry->vector | ||
1601 | ); | ||
1602 | } else { | ||
1603 | struct IO_APIC_route_entry entry; | ||
1604 | |||
1605 | entry = ioapic_read_entry(ioapic_idx, i); | ||
1606 | printk(KERN_DEBUG " %02x %02X ", | ||
1607 | i, | ||
1608 | entry.dest | ||
1609 | ); | ||
1610 | pr_cont("%1d %1d %1d %1d %1d " | ||
1611 | "%1d %1d %02X\n", | ||
1612 | entry.mask, | ||
1613 | entry.trigger, | ||
1614 | entry.irr, | ||
1615 | entry.polarity, | ||
1616 | entry.delivery_status, | ||
1617 | entry.dest_mode, | ||
1618 | entry.delivery_mode, | ||
1619 | entry.vector | ||
1620 | ); | ||
1621 | } | ||
1622 | } | ||
1623 | } | 1592 | } |
1624 | 1593 | ||
1625 | __apicdebuginit(void) print_IO_APICs(void) | 1594 | __apicdebuginit(void) print_IO_APICs(void) |
@@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void) | |||
1921 | clear_IO_APIC(); | 1890 | clear_IO_APIC(); |
1922 | } | 1891 | } |
1923 | 1892 | ||
1924 | /* | 1893 | void native_disable_io_apic(void) |
1925 | * Not an __init, needed by the reboot code | ||
1926 | */ | ||
1927 | void disable_IO_APIC(void) | ||
1928 | { | 1894 | { |
1929 | /* | 1895 | /* |
1930 | * Clear the IO-APIC before rebooting: | ||
1931 | */ | ||
1932 | clear_IO_APIC(); | ||
1933 | |||
1934 | if (!legacy_pic->nr_legacy_irqs) | ||
1935 | return; | ||
1936 | |||
1937 | /* | ||
1938 | * If the i8259 is routed through an IOAPIC | 1896 | * If the i8259 is routed through an IOAPIC |
1939 | * Put that IOAPIC in virtual wire mode | 1897 | * Put that IOAPIC in virtual wire mode |
1940 | * so legacy interrupts can be delivered. | 1898 | * so legacy interrupts can be delivered. |
1941 | * | ||
1942 | * With interrupt-remapping, for now we will use virtual wire A mode, | ||
1943 | * as virtual wire B is little complex (need to configure both | ||
1944 | * IOAPIC RTE as well as interrupt-remapping table entry). | ||
1945 | * As this gets called during crash dump, keep this simple for now. | ||
1946 | */ | 1899 | */ |
1947 | if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { | 1900 | if (ioapic_i8259.pin != -1) { |
1948 | struct IO_APIC_route_entry entry; | 1901 | struct IO_APIC_route_entry entry; |
1949 | 1902 | ||
1950 | memset(&entry, 0, sizeof(entry)); | 1903 | memset(&entry, 0, sizeof(entry)); |
@@ -1964,12 +1917,25 @@ void disable_IO_APIC(void) | |||
1964 | ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); | 1917 | ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); |
1965 | } | 1918 | } |
1966 | 1919 | ||
1920 | if (cpu_has_apic || apic_from_smp_config()) | ||
1921 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); | ||
1922 | |||
1923 | } | ||
1924 | |||
1925 | /* | ||
1926 | * Not an __init, needed by the reboot code | ||
1927 | */ | ||
1928 | void disable_IO_APIC(void) | ||
1929 | { | ||
1967 | /* | 1930 | /* |
1968 | * Use virtual wire A mode when interrupt remapping is enabled. | 1931 | * Clear the IO-APIC before rebooting: |
1969 | */ | 1932 | */ |
1970 | if (cpu_has_apic || apic_from_smp_config()) | 1933 | clear_IO_APIC(); |
1971 | disconnect_bsp_APIC(!irq_remapping_enabled && | 1934 | |
1972 | ioapic_i8259.pin != -1); | 1935 | if (!legacy_pic->nr_legacy_irqs) |
1936 | return; | ||
1937 | |||
1938 | x86_io_apic_ops.disable(); | ||
1973 | } | 1939 | } |
1974 | 1940 | ||
1975 | #ifdef CONFIG_X86_32 | 1941 | #ifdef CONFIG_X86_32 |
@@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2322 | 2288 | ||
2323 | apic = entry->apic; | 2289 | apic = entry->apic; |
2324 | pin = entry->pin; | 2290 | pin = entry->pin; |
2325 | /* | 2291 | |
2326 | * With interrupt-remapping, destination information comes | 2292 | io_apic_write(apic, 0x11 + pin*2, dest); |
2327 | * from interrupt-remapping table entry. | ||
2328 | */ | ||
2329 | if (!irq_remapped(cfg)) | ||
2330 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
2331 | reg = io_apic_read(apic, 0x10 + pin*2); | 2293 | reg = io_apic_read(apic, 0x10 + pin*2); |
2332 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 2294 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
2333 | reg |= vector; | 2295 | reg |= vector; |
@@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
2369 | return 0; | 2331 | return 0; |
2370 | } | 2332 | } |
2371 | 2333 | ||
2372 | static int | 2334 | |
2373 | ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | 2335 | int native_ioapic_set_affinity(struct irq_data *data, |
2374 | bool force) | 2336 | const struct cpumask *mask, |
2337 | bool force) | ||
2375 | { | 2338 | { |
2376 | unsigned int dest, irq = data->irq; | 2339 | unsigned int dest, irq = data->irq; |
2377 | unsigned long flags; | 2340 | unsigned long flags; |
@@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data) | |||
2548 | ioapic_irqd_unmask(data, cfg, masked); | 2511 | ioapic_irqd_unmask(data, cfg, masked); |
2549 | } | 2512 | } |
2550 | 2513 | ||
2551 | #ifdef CONFIG_IRQ_REMAP | ||
2552 | static void ir_ack_apic_edge(struct irq_data *data) | ||
2553 | { | ||
2554 | ack_APIC_irq(); | ||
2555 | } | ||
2556 | |||
2557 | static void ir_ack_apic_level(struct irq_data *data) | ||
2558 | { | ||
2559 | ack_APIC_irq(); | ||
2560 | eoi_ioapic_irq(data->irq, data->chip_data); | ||
2561 | } | ||
2562 | |||
2563 | static void ir_print_prefix(struct irq_data *data, struct seq_file *p) | ||
2564 | { | ||
2565 | seq_printf(p, " IR-%s", data->chip->name); | ||
2566 | } | ||
2567 | |||
2568 | static void irq_remap_modify_chip_defaults(struct irq_chip *chip) | ||
2569 | { | ||
2570 | chip->irq_print_chip = ir_print_prefix; | ||
2571 | chip->irq_ack = ir_ack_apic_edge; | ||
2572 | chip->irq_eoi = ir_ack_apic_level; | ||
2573 | |||
2574 | chip->irq_set_affinity = set_remapped_irq_affinity; | ||
2575 | } | ||
2576 | #endif /* CONFIG_IRQ_REMAP */ | ||
2577 | |||
2578 | static struct irq_chip ioapic_chip __read_mostly = { | 2514 | static struct irq_chip ioapic_chip __read_mostly = { |
2579 | .name = "IO-APIC", | 2515 | .name = "IO-APIC", |
2580 | .irq_startup = startup_ioapic_irq, | 2516 | .irq_startup = startup_ioapic_irq, |
@@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = { | |||
2582 | .irq_unmask = unmask_ioapic_irq, | 2518 | .irq_unmask = unmask_ioapic_irq, |
2583 | .irq_ack = ack_apic_edge, | 2519 | .irq_ack = ack_apic_edge, |
2584 | .irq_eoi = ack_apic_level, | 2520 | .irq_eoi = ack_apic_level, |
2585 | .irq_set_affinity = ioapic_set_affinity, | 2521 | .irq_set_affinity = native_ioapic_set_affinity, |
2586 | .irq_retrigger = ioapic_retrigger_irq, | 2522 | .irq_retrigger = ioapic_retrigger_irq, |
2587 | }; | 2523 | }; |
2588 | 2524 | ||
@@ -2781,8 +2717,7 @@ static inline void __init check_timer(void) | |||
2781 | * 8259A. | 2717 | * 8259A. |
2782 | */ | 2718 | */ |
2783 | if (pin1 == -1) { | 2719 | if (pin1 == -1) { |
2784 | if (irq_remapping_enabled) | 2720 | panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); |
2785 | panic("BIOS bug: timer not connected to IO-APIC"); | ||
2786 | pin1 = pin2; | 2721 | pin1 = pin2; |
2787 | apic1 = apic2; | 2722 | apic1 = apic2; |
2788 | no_pin1 = 1; | 2723 | no_pin1 = 1; |
@@ -2814,8 +2749,7 @@ static inline void __init check_timer(void) | |||
2814 | clear_IO_APIC_pin(0, pin1); | 2749 | clear_IO_APIC_pin(0, pin1); |
2815 | goto out; | 2750 | goto out; |
2816 | } | 2751 | } |
2817 | if (irq_remapping_enabled) | 2752 | panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); |
2818 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); | ||
2819 | local_irq_disable(); | 2753 | local_irq_disable(); |
2820 | clear_IO_APIC_pin(apic1, pin1); | 2754 | clear_IO_APIC_pin(apic1, pin1); |
2821 | if (!no_pin1) | 2755 | if (!no_pin1) |
@@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops); | |||
2982 | /* | 2916 | /* |
2983 | * Dynamic irq allocate and deallocation | 2917 | * Dynamic irq allocate and deallocation |
2984 | */ | 2918 | */ |
2985 | unsigned int create_irq_nr(unsigned int from, int node) | 2919 | unsigned int __create_irqs(unsigned int from, unsigned int count, int node) |
2986 | { | 2920 | { |
2987 | struct irq_cfg *cfg; | 2921 | struct irq_cfg **cfg; |
2988 | unsigned long flags; | 2922 | unsigned long flags; |
2989 | unsigned int ret = 0; | 2923 | int irq, i; |
2990 | int irq; | ||
2991 | 2924 | ||
2992 | if (from < nr_irqs_gsi) | 2925 | if (from < nr_irqs_gsi) |
2993 | from = nr_irqs_gsi; | 2926 | from = nr_irqs_gsi; |
2994 | 2927 | ||
2995 | irq = alloc_irq_from(from, node); | 2928 | cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node); |
2996 | if (irq < 0) | 2929 | if (!cfg) |
2997 | return 0; | ||
2998 | cfg = alloc_irq_cfg(irq, node); | ||
2999 | if (!cfg) { | ||
3000 | free_irq_at(irq, NULL); | ||
3001 | return 0; | 2930 | return 0; |
2931 | |||
2932 | irq = alloc_irqs_from(from, count, node); | ||
2933 | if (irq < 0) | ||
2934 | goto out_cfgs; | ||
2935 | |||
2936 | for (i = 0; i < count; i++) { | ||
2937 | cfg[i] = alloc_irq_cfg(irq + i, node); | ||
2938 | if (!cfg[i]) | ||
2939 | goto out_irqs; | ||
3002 | } | 2940 | } |
3003 | 2941 | ||
3004 | raw_spin_lock_irqsave(&vector_lock, flags); | 2942 | raw_spin_lock_irqsave(&vector_lock, flags); |
3005 | if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) | 2943 | for (i = 0; i < count; i++) |
3006 | ret = irq; | 2944 | if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus())) |
2945 | goto out_vecs; | ||
3007 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 2946 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3008 | 2947 | ||
3009 | if (ret) { | 2948 | for (i = 0; i < count; i++) { |
3010 | irq_set_chip_data(irq, cfg); | 2949 | irq_set_chip_data(irq + i, cfg[i]); |
3011 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | 2950 | irq_clear_status_flags(irq + i, IRQ_NOREQUEST); |
3012 | } else { | ||
3013 | free_irq_at(irq, cfg); | ||
3014 | } | 2951 | } |
3015 | return ret; | 2952 | |
2953 | kfree(cfg); | ||
2954 | return irq; | ||
2955 | |||
2956 | out_vecs: | ||
2957 | for (i--; i >= 0; i--) | ||
2958 | __clear_irq_vector(irq + i, cfg[i]); | ||
2959 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
2960 | out_irqs: | ||
2961 | for (i = 0; i < count; i++) | ||
2962 | free_irq_at(irq + i, cfg[i]); | ||
2963 | out_cfgs: | ||
2964 | kfree(cfg); | ||
2965 | return 0; | ||
2966 | } | ||
2967 | |||
2968 | unsigned int create_irq_nr(unsigned int from, int node) | ||
2969 | { | ||
2970 | return __create_irqs(from, 1, node); | ||
3016 | } | 2971 | } |
3017 | 2972 | ||
3018 | int create_irq(void) | 2973 | int create_irq(void) |
@@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq) | |||
3037 | 2992 | ||
3038 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); | 2993 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); |
3039 | 2994 | ||
3040 | if (irq_remapped(cfg)) | 2995 | free_remapped_irq(irq); |
3041 | free_remapped_irq(irq); | 2996 | |
3042 | raw_spin_lock_irqsave(&vector_lock, flags); | 2997 | raw_spin_lock_irqsave(&vector_lock, flags); |
3043 | __clear_irq_vector(irq, cfg); | 2998 | __clear_irq_vector(irq, cfg); |
3044 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 2999 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3045 | free_irq_at(irq, cfg); | 3000 | free_irq_at(irq, cfg); |
3046 | } | 3001 | } |
3047 | 3002 | ||
3003 | void destroy_irqs(unsigned int irq, unsigned int count) | ||
3004 | { | ||
3005 | unsigned int i; | ||
3006 | |||
3007 | for (i = 0; i < count; i++) | ||
3008 | destroy_irq(irq + i); | ||
3009 | } | ||
3010 | |||
3048 | /* | 3011 | /* |
3049 | * MSI message composition | 3012 | * MSI message composition |
3050 | */ | 3013 | */ |
3051 | #ifdef CONFIG_PCI_MSI | 3014 | void native_compose_msi_msg(struct pci_dev *pdev, |
3052 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | 3015 | unsigned int irq, unsigned int dest, |
3053 | struct msi_msg *msg, u8 hpet_id) | 3016 | struct msi_msg *msg, u8 hpet_id) |
3054 | { | 3017 | { |
3055 | struct irq_cfg *cfg; | 3018 | struct irq_cfg *cfg = irq_cfg(irq); |
3056 | int err; | ||
3057 | unsigned dest; | ||
3058 | |||
3059 | if (disable_apic) | ||
3060 | return -ENXIO; | ||
3061 | |||
3062 | cfg = irq_cfg(irq); | ||
3063 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); | ||
3064 | if (err) | ||
3065 | return err; | ||
3066 | 3019 | ||
3067 | err = apic->cpu_mask_to_apicid_and(cfg->domain, | 3020 | msg->address_hi = MSI_ADDR_BASE_HI; |
3068 | apic->target_cpus(), &dest); | ||
3069 | if (err) | ||
3070 | return err; | ||
3071 | |||
3072 | if (irq_remapped(cfg)) { | ||
3073 | compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); | ||
3074 | return err; | ||
3075 | } | ||
3076 | 3021 | ||
3077 | if (x2apic_enabled()) | 3022 | if (x2apic_enabled()) |
3078 | msg->address_hi = MSI_ADDR_BASE_HI | | 3023 | msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); |
3079 | MSI_ADDR_EXT_DEST_ID(dest); | ||
3080 | else | ||
3081 | msg->address_hi = MSI_ADDR_BASE_HI; | ||
3082 | 3024 | ||
3083 | msg->address_lo = | 3025 | msg->address_lo = |
3084 | MSI_ADDR_BASE_LO | | 3026 | MSI_ADDR_BASE_LO | |
@@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
3097 | MSI_DATA_DELIVERY_FIXED: | 3039 | MSI_DATA_DELIVERY_FIXED: |
3098 | MSI_DATA_DELIVERY_LOWPRI) | | 3040 | MSI_DATA_DELIVERY_LOWPRI) | |
3099 | MSI_DATA_VECTOR(cfg->vector); | 3041 | MSI_DATA_VECTOR(cfg->vector); |
3042 | } | ||
3100 | 3043 | ||
3101 | return err; | 3044 | #ifdef CONFIG_PCI_MSI |
3045 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | ||
3046 | struct msi_msg *msg, u8 hpet_id) | ||
3047 | { | ||
3048 | struct irq_cfg *cfg; | ||
3049 | int err; | ||
3050 | unsigned dest; | ||
3051 | |||
3052 | if (disable_apic) | ||
3053 | return -ENXIO; | ||
3054 | |||
3055 | cfg = irq_cfg(irq); | ||
3056 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); | ||
3057 | if (err) | ||
3058 | return err; | ||
3059 | |||
3060 | err = apic->cpu_mask_to_apicid_and(cfg->domain, | ||
3061 | apic->target_cpus(), &dest); | ||
3062 | if (err) | ||
3063 | return err; | ||
3064 | |||
3065 | x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); | ||
3066 | |||
3067 | return 0; | ||
3102 | } | 3068 | } |
3103 | 3069 | ||
3104 | static int | 3070 | static int |
@@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = { | |||
3136 | .irq_retrigger = ioapic_retrigger_irq, | 3102 | .irq_retrigger = ioapic_retrigger_irq, |
3137 | }; | 3103 | }; |
3138 | 3104 | ||
3139 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | 3105 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
3106 | unsigned int irq_base, unsigned int irq_offset) | ||
3140 | { | 3107 | { |
3141 | struct irq_chip *chip = &msi_chip; | 3108 | struct irq_chip *chip = &msi_chip; |
3142 | struct msi_msg msg; | 3109 | struct msi_msg msg; |
3110 | unsigned int irq = irq_base + irq_offset; | ||
3143 | int ret; | 3111 | int ret; |
3144 | 3112 | ||
3145 | ret = msi_compose_msg(dev, irq, &msg, -1); | 3113 | ret = msi_compose_msg(dev, irq, &msg, -1); |
3146 | if (ret < 0) | 3114 | if (ret < 0) |
3147 | return ret; | 3115 | return ret; |
3148 | 3116 | ||
3149 | irq_set_msi_desc(irq, msidesc); | 3117 | irq_set_msi_desc_off(irq_base, irq_offset, msidesc); |
3150 | write_msi_msg(irq, &msg); | ||
3151 | 3118 | ||
3152 | if (irq_remapped(irq_get_chip_data(irq))) { | 3119 | /* |
3153 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 3120 | * MSI-X message is written per-IRQ, the offset is always 0. |
3154 | irq_remap_modify_chip_defaults(chip); | 3121 | * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. |
3155 | } | 3122 | */ |
3123 | if (!irq_offset) | ||
3124 | write_msi_msg(irq, &msg); | ||
3125 | |||
3126 | setup_remapped_irq(irq, irq_get_chip_data(irq), chip); | ||
3156 | 3127 | ||
3157 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); | 3128 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); |
3158 | 3129 | ||
@@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
3163 | 3134 | ||
3164 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 3135 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
3165 | { | 3136 | { |
3166 | int node, ret, sub_handle, index = 0; | ||
3167 | unsigned int irq, irq_want; | 3137 | unsigned int irq, irq_want; |
3168 | struct msi_desc *msidesc; | 3138 | struct msi_desc *msidesc; |
3139 | int node, ret; | ||
3169 | 3140 | ||
3170 | /* x86 doesn't support multiple MSI yet */ | 3141 | /* Multiple MSI vectors only supported with interrupt remapping */ |
3171 | if (type == PCI_CAP_ID_MSI && nvec > 1) | 3142 | if (type == PCI_CAP_ID_MSI && nvec > 1) |
3172 | return 1; | 3143 | return 1; |
3173 | 3144 | ||
3174 | node = dev_to_node(&dev->dev); | 3145 | node = dev_to_node(&dev->dev); |
3175 | irq_want = nr_irqs_gsi; | 3146 | irq_want = nr_irqs_gsi; |
3176 | sub_handle = 0; | ||
3177 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 3147 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
3178 | irq = create_irq_nr(irq_want, node); | 3148 | irq = create_irq_nr(irq_want, node); |
3179 | if (irq == 0) | 3149 | if (irq == 0) |
3180 | return -1; | 3150 | return -ENOSPC; |
3151 | |||
3181 | irq_want = irq + 1; | 3152 | irq_want = irq + 1; |
3182 | if (!irq_remapping_enabled) | ||
3183 | goto no_ir; | ||
3184 | 3153 | ||
3185 | if (!sub_handle) { | 3154 | ret = setup_msi_irq(dev, msidesc, irq, 0); |
3186 | /* | ||
3187 | * allocate the consecutive block of IRTE's | ||
3188 | * for 'nvec' | ||
3189 | */ | ||
3190 | index = msi_alloc_remapped_irq(dev, irq, nvec); | ||
3191 | if (index < 0) { | ||
3192 | ret = index; | ||
3193 | goto error; | ||
3194 | } | ||
3195 | } else { | ||
3196 | ret = msi_setup_remapped_irq(dev, irq, index, | ||
3197 | sub_handle); | ||
3198 | if (ret < 0) | ||
3199 | goto error; | ||
3200 | } | ||
3201 | no_ir: | ||
3202 | ret = setup_msi_irq(dev, msidesc, irq); | ||
3203 | if (ret < 0) | 3155 | if (ret < 0) |
3204 | goto error; | 3156 | goto error; |
3205 | sub_handle++; | ||
3206 | } | 3157 | } |
3207 | return 0; | 3158 | return 0; |
3208 | 3159 | ||
@@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = { | |||
3298 | .irq_retrigger = ioapic_retrigger_irq, | 3249 | .irq_retrigger = ioapic_retrigger_irq, |
3299 | }; | 3250 | }; |
3300 | 3251 | ||
3301 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | 3252 | int default_setup_hpet_msi(unsigned int irq, unsigned int id) |
3302 | { | 3253 | { |
3303 | struct irq_chip *chip = &hpet_msi_type; | 3254 | struct irq_chip *chip = &hpet_msi_type; |
3304 | struct msi_msg msg; | 3255 | struct msi_msg msg; |
3305 | int ret; | 3256 | int ret; |
3306 | 3257 | ||
3307 | if (irq_remapping_enabled) { | ||
3308 | ret = setup_hpet_msi_remapped(irq, id); | ||
3309 | if (ret) | ||
3310 | return ret; | ||
3311 | } | ||
3312 | |||
3313 | ret = msi_compose_msg(NULL, irq, &msg, id); | 3258 | ret = msi_compose_msg(NULL, irq, &msg, id); |
3314 | if (ret < 0) | 3259 | if (ret < 0) |
3315 | return ret; | 3260 | return ret; |
3316 | 3261 | ||
3317 | hpet_msi_write(irq_get_handler_data(irq), &msg); | 3262 | hpet_msi_write(irq_get_handler_data(irq), &msg); |
3318 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 3263 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
3319 | if (irq_remapped(irq_get_chip_data(irq))) | 3264 | setup_remapped_irq(irq, irq_get_chip_data(irq), chip); |
3320 | irq_remap_modify_chip_defaults(chip); | ||
3321 | 3265 | ||
3322 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); | 3266 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); |
3323 | return 0; | 3267 | return 0; |
@@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void) | |||
3683 | else | 3627 | else |
3684 | mask = apic->target_cpus(); | 3628 | mask = apic->target_cpus(); |
3685 | 3629 | ||
3686 | if (irq_remapping_enabled) | 3630 | x86_io_apic_ops.set_affinity(idata, mask, false); |
3687 | set_remapped_irq_affinity(idata, mask, false); | ||
3688 | else | ||
3689 | ioapic_set_affinity(idata, mask, false); | ||
3690 | } | 3631 | } |
3691 | 3632 | ||
3692 | } | 3633 | } |
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index cce91bf26676..7434d8556d09 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) | |||
106 | unsigned long mask = cpumask_bits(cpumask)[0]; | 106 | unsigned long mask = cpumask_bits(cpumask)[0]; |
107 | unsigned long flags; | 107 | unsigned long flags; |
108 | 108 | ||
109 | if (WARN_ONCE(!mask, "empty IPI mask")) | 109 | if (!mask) |
110 | return; | 110 | return; |
111 | 111 | ||
112 | local_irq_save(flags); | 112 | local_irq_save(flags); |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index e03a1e180e81..562a76d433c8 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg) | |||
20 | } | 20 | } |
21 | early_param("x2apic_phys", set_x2apic_phys_mode); | 21 | early_param("x2apic_phys", set_x2apic_phys_mode); |
22 | 22 | ||
23 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 23 | static bool x2apic_fadt_phys(void) |
24 | { | 24 | { |
25 | if (x2apic_phys) | 25 | if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && |
26 | return x2apic_enabled(); | 26 | (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { |
27 | else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && | ||
28 | (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && | ||
29 | x2apic_enabled()) { | ||
30 | printk(KERN_DEBUG "System requires x2apic physical mode\n"); | 27 | printk(KERN_DEBUG "System requires x2apic physical mode\n"); |
31 | return 1; | 28 | return true; |
32 | } | 29 | } |
33 | else | 30 | return false; |
34 | return 0; | 31 | } |
32 | |||
33 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
34 | { | ||
35 | return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); | ||
35 | } | 36 | } |
36 | 37 | ||
37 | static void | 38 | static void |
@@ -82,7 +83,7 @@ static void init_x2apic_ldr(void) | |||
82 | 83 | ||
83 | static int x2apic_phys_probe(void) | 84 | static int x2apic_phys_probe(void) |
84 | { | 85 | { |
85 | if (x2apic_mode && x2apic_phys) | 86 | if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) |
86 | return 1; | 87 | return 1; |
87 | 88 | ||
88 | return apic == &apic_x2apic_phys; | 89 | return apic == &apic_x2apic_phys; |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8cfade9510a4..794f6eb54cd3 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV APIC functions (note: not an Intel compatible APIC) | 6 | * SGI UV APIC functions (note: not an Intel compatible APIC) |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | #include <linux/cpumask.h> | 10 | #include <linux/cpumask.h> |
11 | #include <linux/hardirq.h> | 11 | #include <linux/hardirq.h> |
@@ -91,10 +91,16 @@ static int __init early_get_pnodeid(void) | |||
91 | m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); | 91 | m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); |
92 | uv_min_hub_revision_id = node_id.s.revision; | 92 | uv_min_hub_revision_id = node_id.s.revision; |
93 | 93 | ||
94 | if (node_id.s.part_number == UV2_HUB_PART_NUMBER) | 94 | switch (node_id.s.part_number) { |
95 | uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; | 95 | case UV2_HUB_PART_NUMBER: |
96 | if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X) | 96 | case UV2_HUB_PART_NUMBER_X: |
97 | uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; | 97 | uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; |
98 | break; | ||
99 | case UV3_HUB_PART_NUMBER: | ||
100 | case UV3_HUB_PART_NUMBER_X: | ||
101 | uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1; | ||
102 | break; | ||
103 | } | ||
98 | 104 | ||
99 | uv_hub_info->hub_revision = uv_min_hub_revision_id; | 105 | uv_hub_info->hub_revision = uv_min_hub_revision_id; |
100 | pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); | 106 | pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); |
@@ -130,13 +136,16 @@ static void __init uv_set_apicid_hibit(void) | |||
130 | 136 | ||
131 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 137 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
132 | { | 138 | { |
133 | int pnodeid, is_uv1, is_uv2; | 139 | int pnodeid, is_uv1, is_uv2, is_uv3; |
134 | 140 | ||
135 | is_uv1 = !strcmp(oem_id, "SGI"); | 141 | is_uv1 = !strcmp(oem_id, "SGI"); |
136 | is_uv2 = !strcmp(oem_id, "SGI2"); | 142 | is_uv2 = !strcmp(oem_id, "SGI2"); |
137 | if (is_uv1 || is_uv2) { | 143 | is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */ |
144 | if (is_uv1 || is_uv2 || is_uv3) { | ||
138 | uv_hub_info->hub_revision = | 145 | uv_hub_info->hub_revision = |
139 | is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE; | 146 | (is_uv1 ? UV1_HUB_REVISION_BASE : |
147 | (is_uv2 ? UV2_HUB_REVISION_BASE : | ||
148 | UV3_HUB_REVISION_BASE)); | ||
140 | pnodeid = early_get_pnodeid(); | 149 | pnodeid = early_get_pnodeid(); |
141 | early_get_apic_pnode_shift(); | 150 | early_get_apic_pnode_shift(); |
142 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | 151 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; |
@@ -450,14 +459,17 @@ static __init void map_high(char *id, unsigned long base, int pshift, | |||
450 | 459 | ||
451 | paddr = base << pshift; | 460 | paddr = base << pshift; |
452 | bytes = (1UL << bshift) * (max_pnode + 1); | 461 | bytes = (1UL << bshift) * (max_pnode + 1); |
453 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, | 462 | if (!paddr) { |
454 | paddr + bytes); | 463 | pr_info("UV: Map %s_HI base address NULL\n", id); |
464 | return; | ||
465 | } | ||
466 | pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); | ||
455 | if (map_type == map_uc) | 467 | if (map_type == map_uc) |
456 | init_extra_mapping_uc(paddr, bytes); | 468 | init_extra_mapping_uc(paddr, bytes); |
457 | else | 469 | else |
458 | init_extra_mapping_wb(paddr, bytes); | 470 | init_extra_mapping_wb(paddr, bytes); |
459 | |||
460 | } | 471 | } |
472 | |||
461 | static __init void map_gru_high(int max_pnode) | 473 | static __init void map_gru_high(int max_pnode) |
462 | { | 474 | { |
463 | union uvh_rh_gam_gru_overlay_config_mmr_u gru; | 475 | union uvh_rh_gam_gru_overlay_config_mmr_u gru; |
@@ -468,7 +480,8 @@ static __init void map_gru_high(int max_pnode) | |||
468 | map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); | 480 | map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); |
469 | gru_start_paddr = ((u64)gru.s.base << shift); | 481 | gru_start_paddr = ((u64)gru.s.base << shift); |
470 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); | 482 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); |
471 | 483 | } else { | |
484 | pr_info("UV: GRU disabled\n"); | ||
472 | } | 485 | } |
473 | } | 486 | } |
474 | 487 | ||
@@ -480,23 +493,146 @@ static __init void map_mmr_high(int max_pnode) | |||
480 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | 493 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); |
481 | if (mmr.s.enable) | 494 | if (mmr.s.enable) |
482 | map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); | 495 | map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); |
496 | else | ||
497 | pr_info("UV: MMR disabled\n"); | ||
498 | } | ||
499 | |||
500 | /* | ||
501 | * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY | ||
502 | * and REDIRECT MMR regs are exactly the same on UV3. | ||
503 | */ | ||
504 | struct mmioh_config { | ||
505 | unsigned long overlay; | ||
506 | unsigned long redirect; | ||
507 | char *id; | ||
508 | }; | ||
509 | |||
510 | static __initdata struct mmioh_config mmiohs[] = { | ||
511 | { | ||
512 | UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR, | ||
513 | UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR, | ||
514 | "MMIOH0" | ||
515 | }, | ||
516 | { | ||
517 | UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR, | ||
518 | UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR, | ||
519 | "MMIOH1" | ||
520 | }, | ||
521 | }; | ||
522 | |||
523 | static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) | ||
524 | { | ||
525 | union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; | ||
526 | unsigned long mmr; | ||
527 | unsigned long base; | ||
528 | int i, n, shift, m_io, max_io; | ||
529 | int nasid, lnasid, fi, li; | ||
530 | char *id; | ||
531 | |||
532 | id = mmiohs[index].id; | ||
533 | overlay.v = uv_read_local_mmr(mmiohs[index].overlay); | ||
534 | pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", | ||
535 | id, overlay.v, overlay.s3.base, overlay.s3.m_io); | ||
536 | if (!overlay.s3.enable) { | ||
537 | pr_info("UV: %s disabled\n", id); | ||
538 | return; | ||
539 | } | ||
540 | |||
541 | shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT; | ||
542 | base = (unsigned long)overlay.s3.base; | ||
543 | m_io = overlay.s3.m_io; | ||
544 | mmr = mmiohs[index].redirect; | ||
545 | n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; | ||
546 | min_pnode *= 2; /* convert to NASID */ | ||
547 | max_pnode *= 2; | ||
548 | max_io = lnasid = fi = li = -1; | ||
549 | |||
550 | for (i = 0; i < n; i++) { | ||
551 | union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect; | ||
552 | |||
553 | redirect.v = uv_read_local_mmr(mmr + i * 8); | ||
554 | nasid = redirect.s3.nasid; | ||
555 | if (nasid < min_pnode || max_pnode < nasid) | ||
556 | nasid = -1; /* invalid NASID */ | ||
557 | |||
558 | if (nasid == lnasid) { | ||
559 | li = i; | ||
560 | if (i != n-1) /* last entry check */ | ||
561 | continue; | ||
562 | } | ||
563 | |||
564 | /* check if we have a cached (or last) redirect to print */ | ||
565 | if (lnasid != -1 || (i == n-1 && nasid != -1)) { | ||
566 | unsigned long addr1, addr2; | ||
567 | int f, l; | ||
568 | |||
569 | if (lnasid == -1) { | ||
570 | f = l = i; | ||
571 | lnasid = nasid; | ||
572 | } else { | ||
573 | f = fi; | ||
574 | l = li; | ||
575 | } | ||
576 | addr1 = (base << shift) + | ||
577 | f * (unsigned long)(1 << m_io); | ||
578 | addr2 = (base << shift) + | ||
579 | (l + 1) * (unsigned long)(1 << m_io); | ||
580 | pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", | ||
581 | id, fi, li, lnasid, addr1, addr2); | ||
582 | if (max_io < l) | ||
583 | max_io = l; | ||
584 | } | ||
585 | fi = li = i; | ||
586 | lnasid = nasid; | ||
587 | } | ||
588 | |||
589 | pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", | ||
590 | id, base, shift, m_io, max_io); | ||
591 | |||
592 | if (max_io >= 0) | ||
593 | map_high(id, base, shift, m_io, max_io, map_uc); | ||
483 | } | 594 | } |
484 | 595 | ||
485 | static __init void map_mmioh_high(int max_pnode) | 596 | static __init void map_mmioh_high(int min_pnode, int max_pnode) |
486 | { | 597 | { |
487 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | 598 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; |
488 | int shift; | 599 | unsigned long mmr, base; |
600 | int shift, enable, m_io, n_io; | ||
489 | 601 | ||
490 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | 602 | if (is_uv3_hub()) { |
491 | if (is_uv1_hub() && mmioh.s1.enable) { | 603 | /* Map both MMIOH Regions */ |
492 | shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; | 604 | map_mmioh_high_uv3(0, min_pnode, max_pnode); |
493 | map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io, | 605 | map_mmioh_high_uv3(1, min_pnode, max_pnode); |
494 | max_pnode, map_uc); | 606 | return; |
495 | } | 607 | } |
496 | if (is_uv2_hub() && mmioh.s2.enable) { | 608 | |
609 | if (is_uv1_hub()) { | ||
610 | mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; | ||
611 | shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
612 | mmioh.v = uv_read_local_mmr(mmr); | ||
613 | enable = !!mmioh.s1.enable; | ||
614 | base = mmioh.s1.base; | ||
615 | m_io = mmioh.s1.m_io; | ||
616 | n_io = mmioh.s1.n_io; | ||
617 | } else if (is_uv2_hub()) { | ||
618 | mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; | ||
497 | shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; | 619 | shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; |
498 | map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io, | 620 | mmioh.v = uv_read_local_mmr(mmr); |
499 | max_pnode, map_uc); | 621 | enable = !!mmioh.s2.enable; |
622 | base = mmioh.s2.base; | ||
623 | m_io = mmioh.s2.m_io; | ||
624 | n_io = mmioh.s2.n_io; | ||
625 | } else | ||
626 | return; | ||
627 | |||
628 | if (enable) { | ||
629 | max_pnode &= (1 << n_io) - 1; | ||
630 | pr_info( | ||
631 | "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n", | ||
632 | base, shift, m_io, n_io, max_pnode); | ||
633 | map_high("MMIOH", base, shift, m_io, max_pnode, map_uc); | ||
634 | } else { | ||
635 | pr_info("UV: MMIOH disabled\n"); | ||
500 | } | 636 | } |
501 | } | 637 | } |
502 | 638 | ||
@@ -724,42 +860,41 @@ void uv_nmi_init(void) | |||
724 | void __init uv_system_init(void) | 860 | void __init uv_system_init(void) |
725 | { | 861 | { |
726 | union uvh_rh_gam_config_mmr_u m_n_config; | 862 | union uvh_rh_gam_config_mmr_u m_n_config; |
727 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | ||
728 | union uvh_node_id_u node_id; | 863 | union uvh_node_id_u node_id; |
729 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; | 864 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; |
730 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; | 865 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; |
731 | int gnode_extra, max_pnode = 0; | 866 | int gnode_extra, min_pnode = 999999, max_pnode = -1; |
732 | unsigned long mmr_base, present, paddr; | 867 | unsigned long mmr_base, present, paddr; |
733 | unsigned short pnode_mask, pnode_io_mask; | 868 | unsigned short pnode_mask; |
869 | char *hub = (is_uv1_hub() ? "UV1" : | ||
870 | (is_uv2_hub() ? "UV2" : | ||
871 | "UV3")); | ||
734 | 872 | ||
735 | printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2"); | 873 | pr_info("UV: Found %s hub\n", hub); |
736 | map_low_mmrs(); | 874 | map_low_mmrs(); |
737 | 875 | ||
738 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); | 876 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); |
739 | m_val = m_n_config.s.m_skt; | 877 | m_val = m_n_config.s.m_skt; |
740 | n_val = m_n_config.s.n_skt; | 878 | n_val = m_n_config.s.n_skt; |
741 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | 879 | pnode_mask = (1 << n_val) - 1; |
742 | n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io; | ||
743 | mmr_base = | 880 | mmr_base = |
744 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & | 881 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & |
745 | ~UV_MMR_ENABLE; | 882 | ~UV_MMR_ENABLE; |
746 | pnode_mask = (1 << n_val) - 1; | ||
747 | pnode_io_mask = (1 << n_io) - 1; | ||
748 | 883 | ||
749 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); | 884 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); |
750 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; | 885 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; |
751 | gnode_upper = ((unsigned long)gnode_extra << m_val); | 886 | gnode_upper = ((unsigned long)gnode_extra << m_val); |
752 | printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", | 887 | pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x\n", |
753 | n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); | 888 | n_val, m_val, pnode_mask, gnode_upper, gnode_extra); |
754 | 889 | ||
755 | printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); | 890 | pr_info("UV: global MMR base 0x%lx\n", mmr_base); |
756 | 891 | ||
757 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) | 892 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) |
758 | uv_possible_blades += | 893 | uv_possible_blades += |
759 | hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); | 894 | hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); |
760 | 895 | ||
761 | /* uv_num_possible_blades() is really the hub count */ | 896 | /* uv_num_possible_blades() is really the hub count */ |
762 | printk(KERN_INFO "UV: Found %d blades, %d hubs\n", | 897 | pr_info("UV: Found %d blades, %d hubs\n", |
763 | is_uv1_hub() ? uv_num_possible_blades() : | 898 | is_uv1_hub() ? uv_num_possible_blades() : |
764 | (uv_num_possible_blades() + 1) / 2, | 899 | (uv_num_possible_blades() + 1) / 2, |
765 | uv_num_possible_blades()); | 900 | uv_num_possible_blades()); |
@@ -794,6 +929,7 @@ void __init uv_system_init(void) | |||
794 | uv_blade_info[blade].nr_possible_cpus = 0; | 929 | uv_blade_info[blade].nr_possible_cpus = 0; |
795 | uv_blade_info[blade].nr_online_cpus = 0; | 930 | uv_blade_info[blade].nr_online_cpus = 0; |
796 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | 931 | spin_lock_init(&uv_blade_info[blade].nmi_lock); |
932 | min_pnode = min(pnode, min_pnode); | ||
797 | max_pnode = max(pnode, max_pnode); | 933 | max_pnode = max(pnode, max_pnode); |
798 | blade++; | 934 | blade++; |
799 | } | 935 | } |
@@ -856,7 +992,7 @@ void __init uv_system_init(void) | |||
856 | 992 | ||
857 | map_gru_high(max_pnode); | 993 | map_gru_high(max_pnode); |
858 | map_mmr_high(max_pnode); | 994 | map_mmr_high(max_pnode); |
859 | map_mmioh_high(max_pnode & pnode_io_mask); | 995 | map_mmioh_high(min_pnode, max_pnode); |
860 | 996 | ||
861 | uv_cpu_init(); | 997 | uv_cpu_init(); |
862 | uv_scir_register_cpu_notifier(); | 998 | uv_scir_register_cpu_notifier(); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d65464e43503..66b5faffe14a 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -232,6 +232,7 @@ | |||
232 | #include <linux/acpi.h> | 232 | #include <linux/acpi.h> |
233 | #include <linux/syscore_ops.h> | 233 | #include <linux/syscore_ops.h> |
234 | #include <linux/i8253.h> | 234 | #include <linux/i8253.h> |
235 | #include <linux/cpuidle.h> | ||
235 | 236 | ||
236 | #include <asm/uaccess.h> | 237 | #include <asm/uaccess.h> |
237 | #include <asm/desc.h> | 238 | #include <asm/desc.h> |
@@ -360,13 +361,35 @@ struct apm_user { | |||
360 | * idle percentage above which bios idle calls are done | 361 | * idle percentage above which bios idle calls are done |
361 | */ | 362 | */ |
362 | #ifdef CONFIG_APM_CPU_IDLE | 363 | #ifdef CONFIG_APM_CPU_IDLE |
363 | #warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012 | ||
364 | #define DEFAULT_IDLE_THRESHOLD 95 | 364 | #define DEFAULT_IDLE_THRESHOLD 95 |
365 | #else | 365 | #else |
366 | #define DEFAULT_IDLE_THRESHOLD 100 | 366 | #define DEFAULT_IDLE_THRESHOLD 100 |
367 | #endif | 367 | #endif |
368 | #define DEFAULT_IDLE_PERIOD (100 / 3) | 368 | #define DEFAULT_IDLE_PERIOD (100 / 3) |
369 | 369 | ||
370 | static int apm_cpu_idle(struct cpuidle_device *dev, | ||
371 | struct cpuidle_driver *drv, int index); | ||
372 | |||
373 | static struct cpuidle_driver apm_idle_driver = { | ||
374 | .name = "apm_idle", | ||
375 | .owner = THIS_MODULE, | ||
376 | .en_core_tk_irqen = 1, | ||
377 | .states = { | ||
378 | { /* entry 0 is for polling */ }, | ||
379 | { /* entry 1 is for APM idle */ | ||
380 | .name = "APM", | ||
381 | .desc = "APM idle", | ||
382 | .flags = CPUIDLE_FLAG_TIME_VALID, | ||
383 | .exit_latency = 250, /* WAG */ | ||
384 | .target_residency = 500, /* WAG */ | ||
385 | .enter = &apm_cpu_idle | ||
386 | }, | ||
387 | }, | ||
388 | .state_count = 2, | ||
389 | }; | ||
390 | |||
391 | static struct cpuidle_device apm_cpuidle_device; | ||
392 | |||
370 | /* | 393 | /* |
371 | * Local variables | 394 | * Local variables |
372 | */ | 395 | */ |
@@ -377,7 +400,6 @@ static struct { | |||
377 | static int clock_slowed; | 400 | static int clock_slowed; |
378 | static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; | 401 | static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; |
379 | static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; | 402 | static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; |
380 | static int set_pm_idle; | ||
381 | static int suspends_pending; | 403 | static int suspends_pending; |
382 | static int standbys_pending; | 404 | static int standbys_pending; |
383 | static int ignore_sys_suspend; | 405 | static int ignore_sys_suspend; |
@@ -884,8 +906,6 @@ static void apm_do_busy(void) | |||
884 | #define IDLE_CALC_LIMIT (HZ * 100) | 906 | #define IDLE_CALC_LIMIT (HZ * 100) |
885 | #define IDLE_LEAKY_MAX 16 | 907 | #define IDLE_LEAKY_MAX 16 |
886 | 908 | ||
887 | static void (*original_pm_idle)(void) __read_mostly; | ||
888 | |||
889 | /** | 909 | /** |
890 | * apm_cpu_idle - cpu idling for APM capable Linux | 910 | * apm_cpu_idle - cpu idling for APM capable Linux |
891 | * | 911 | * |
@@ -894,35 +914,36 @@ static void (*original_pm_idle)(void) __read_mostly; | |||
894 | * Furthermore it calls the system default idle routine. | 914 | * Furthermore it calls the system default idle routine. |
895 | */ | 915 | */ |
896 | 916 | ||
897 | static void apm_cpu_idle(void) | 917 | static int apm_cpu_idle(struct cpuidle_device *dev, |
918 | struct cpuidle_driver *drv, int index) | ||
898 | { | 919 | { |
899 | static int use_apm_idle; /* = 0 */ | 920 | static int use_apm_idle; /* = 0 */ |
900 | static unsigned int last_jiffies; /* = 0 */ | 921 | static unsigned int last_jiffies; /* = 0 */ |
901 | static unsigned int last_stime; /* = 0 */ | 922 | static unsigned int last_stime; /* = 0 */ |
923 | cputime_t stime; | ||
902 | 924 | ||
903 | int apm_idle_done = 0; | 925 | int apm_idle_done = 0; |
904 | unsigned int jiffies_since_last_check = jiffies - last_jiffies; | 926 | unsigned int jiffies_since_last_check = jiffies - last_jiffies; |
905 | unsigned int bucket; | 927 | unsigned int bucket; |
906 | 928 | ||
907 | WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); | ||
908 | recalc: | 929 | recalc: |
930 | task_cputime(current, NULL, &stime); | ||
909 | if (jiffies_since_last_check > IDLE_CALC_LIMIT) { | 931 | if (jiffies_since_last_check > IDLE_CALC_LIMIT) { |
910 | use_apm_idle = 0; | 932 | use_apm_idle = 0; |
911 | last_jiffies = jiffies; | ||
912 | last_stime = current->stime; | ||
913 | } else if (jiffies_since_last_check > idle_period) { | 933 | } else if (jiffies_since_last_check > idle_period) { |
914 | unsigned int idle_percentage; | 934 | unsigned int idle_percentage; |
915 | 935 | ||
916 | idle_percentage = current->stime - last_stime; | 936 | idle_percentage = stime - last_stime; |
917 | idle_percentage *= 100; | 937 | idle_percentage *= 100; |
918 | idle_percentage /= jiffies_since_last_check; | 938 | idle_percentage /= jiffies_since_last_check; |
919 | use_apm_idle = (idle_percentage > idle_threshold); | 939 | use_apm_idle = (idle_percentage > idle_threshold); |
920 | if (apm_info.forbid_idle) | 940 | if (apm_info.forbid_idle) |
921 | use_apm_idle = 0; | 941 | use_apm_idle = 0; |
922 | last_jiffies = jiffies; | ||
923 | last_stime = current->stime; | ||
924 | } | 942 | } |
925 | 943 | ||
944 | last_jiffies = jiffies; | ||
945 | last_stime = stime; | ||
946 | |||
926 | bucket = IDLE_LEAKY_MAX; | 947 | bucket = IDLE_LEAKY_MAX; |
927 | 948 | ||
928 | while (!need_resched()) { | 949 | while (!need_resched()) { |
@@ -950,10 +971,7 @@ recalc: | |||
950 | break; | 971 | break; |
951 | } | 972 | } |
952 | } | 973 | } |
953 | if (original_pm_idle) | 974 | default_idle(); |
954 | original_pm_idle(); | ||
955 | else | ||
956 | default_idle(); | ||
957 | local_irq_disable(); | 975 | local_irq_disable(); |
958 | jiffies_since_last_check = jiffies - last_jiffies; | 976 | jiffies_since_last_check = jiffies - last_jiffies; |
959 | if (jiffies_since_last_check > idle_period) | 977 | if (jiffies_since_last_check > idle_period) |
@@ -963,7 +981,7 @@ recalc: | |||
963 | if (apm_idle_done) | 981 | if (apm_idle_done) |
964 | apm_do_busy(); | 982 | apm_do_busy(); |
965 | 983 | ||
966 | local_irq_enable(); | 984 | return index; |
967 | } | 985 | } |
968 | 986 | ||
969 | /** | 987 | /** |
@@ -2381,9 +2399,9 @@ static int __init apm_init(void) | |||
2381 | if (HZ != 100) | 2399 | if (HZ != 100) |
2382 | idle_period = (idle_period * HZ) / 100; | 2400 | idle_period = (idle_period * HZ) / 100; |
2383 | if (idle_threshold < 100) { | 2401 | if (idle_threshold < 100) { |
2384 | original_pm_idle = pm_idle; | 2402 | if (!cpuidle_register_driver(&apm_idle_driver)) |
2385 | pm_idle = apm_cpu_idle; | 2403 | if (cpuidle_register_device(&apm_cpuidle_device)) |
2386 | set_pm_idle = 1; | 2404 | cpuidle_unregister_driver(&apm_idle_driver); |
2387 | } | 2405 | } |
2388 | 2406 | ||
2389 | return 0; | 2407 | return 0; |
@@ -2393,15 +2411,9 @@ static void __exit apm_exit(void) | |||
2393 | { | 2411 | { |
2394 | int error; | 2412 | int error; |
2395 | 2413 | ||
2396 | if (set_pm_idle) { | 2414 | cpuidle_unregister_device(&apm_cpuidle_device); |
2397 | pm_idle = original_pm_idle; | 2415 | cpuidle_unregister_driver(&apm_idle_driver); |
2398 | /* | 2416 | |
2399 | * We are about to unload the current idle thread pm callback | ||
2400 | * (pm_idle), Wait for all processors to update cached/local | ||
2401 | * copies of pm_idle before proceeding. | ||
2402 | */ | ||
2403 | kick_all_cpus_sync(); | ||
2404 | } | ||
2405 | if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) | 2417 | if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) |
2406 | && (apm_info.connection_version > 0x0100)) { | 2418 | && (apm_info.connection_version > 0x0100)) { |
2407 | error = apm_engage_power_management(APM_DEVICE_ALL, 0); | 2419 | error = apm_engage_power_management(APM_DEVICE_ALL, 0); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15239fffd6fe..fa96eb0d02fb 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
13 | 13 | ||
14 | #ifdef CONFIG_X86_64 | 14 | #ifdef CONFIG_X86_64 |
15 | # include <asm/numa_64.h> | ||
16 | # include <asm/mmconfig.h> | 15 | # include <asm/mmconfig.h> |
17 | # include <asm/cacheflush.h> | 16 | # include <asm/cacheflush.h> |
18 | #endif | 17 | #endif |
@@ -220,8 +219,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
220 | */ | 219 | */ |
221 | WARN_ONCE(1, "WARNING: This combination of AMD" | 220 | WARN_ONCE(1, "WARNING: This combination of AMD" |
222 | " processors is not suitable for SMP.\n"); | 221 | " processors is not suitable for SMP.\n"); |
223 | if (!test_taint(TAINT_UNSAFE_SMP)) | 222 | add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); |
224 | add_taint(TAINT_UNSAFE_SMP); | ||
225 | 223 | ||
226 | valid_k7: | 224 | valid_k7: |
227 | ; | 225 | ; |
@@ -364,9 +362,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
364 | #endif | 362 | #endif |
365 | } | 363 | } |
366 | 364 | ||
367 | int amd_get_nb_id(int cpu) | 365 | u16 amd_get_nb_id(int cpu) |
368 | { | 366 | { |
369 | int id = 0; | 367 | u16 id = 0; |
370 | #ifdef CONFIG_SMP | 368 | #ifdef CONFIG_SMP |
371 | id = per_cpu(cpu_llc_id, cpu); | 369 | id = per_cpu(cpu_llc_id, cpu); |
372 | #endif | 370 | #endif |
@@ -518,10 +516,9 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
518 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 516 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
519 | { | 517 | { |
520 | u32 dummy; | 518 | u32 dummy; |
521 | |||
522 | #ifdef CONFIG_SMP | ||
523 | unsigned long long value; | 519 | unsigned long long value; |
524 | 520 | ||
521 | #ifdef CONFIG_SMP | ||
525 | /* | 522 | /* |
526 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | 523 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 |
527 | * bit 6 of msr C001_0015 | 524 | * bit 6 of msr C001_0015 |
@@ -559,12 +556,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
559 | * (AMD Erratum #110, docId: 25759). | 556 | * (AMD Erratum #110, docId: 25759). |
560 | */ | 557 | */ |
561 | if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { | 558 | if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { |
562 | u64 val; | ||
563 | |||
564 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); | 559 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); |
565 | if (!rdmsrl_amd_safe(0xc001100d, &val)) { | 560 | if (!rdmsrl_amd_safe(0xc001100d, &value)) { |
566 | val &= ~(1ULL << 32); | 561 | value &= ~(1ULL << 32); |
567 | wrmsrl_amd_safe(0xc001100d, val); | 562 | wrmsrl_amd_safe(0xc001100d, value); |
568 | } | 563 | } |
569 | } | 564 | } |
570 | 565 | ||
@@ -617,13 +612,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
617 | if ((c->x86 == 0x15) && | 612 | if ((c->x86 == 0x15) && |
618 | (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && | 613 | (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && |
619 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { | 614 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { |
620 | u64 val; | ||
621 | 615 | ||
622 | if (!rdmsrl_safe(0xc0011005, &val)) { | 616 | if (!rdmsrl_safe(0xc0011005, &value)) { |
623 | val |= 1ULL << 54; | 617 | value |= 1ULL << 54; |
624 | wrmsrl_safe(0xc0011005, val); | 618 | wrmsrl_safe(0xc0011005, value); |
625 | rdmsrl(0xc0011005, val); | 619 | rdmsrl(0xc0011005, value); |
626 | if (val & (1ULL << 54)) { | 620 | if (value & (1ULL << 54)) { |
627 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); | 621 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); |
628 | printk(KERN_INFO FW_INFO "CPU: Re-enabling " | 622 | printk(KERN_INFO FW_INFO "CPU: Re-enabling " |
629 | "disabled Topology Extensions Support\n"); | 623 | "disabled Topology Extensions Support\n"); |
@@ -637,11 +631,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
637 | */ | 631 | */ |
638 | if ((c->x86 == 0x15) && | 632 | if ((c->x86 == 0x15) && |
639 | (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { | 633 | (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { |
640 | u64 val; | ||
641 | 634 | ||
642 | if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { | 635 | if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { |
643 | val |= 0x1E; | 636 | value |= 0x1E; |
644 | wrmsrl_safe(0xc0011021, val); | 637 | wrmsrl_safe(0xc0011021, value); |
645 | } | 638 | } |
646 | } | 639 | } |
647 | 640 | ||
@@ -685,12 +678,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
685 | * benefit in doing so. | 678 | * benefit in doing so. |
686 | */ | 679 | */ |
687 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | 680 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { |
681 | unsigned long pfn = tseg >> PAGE_SHIFT; | ||
682 | |||
688 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | 683 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); |
689 | if ((tseg>>PMD_SHIFT) < | 684 | if (pfn_range_is_mapped(pfn, pfn + 1)) |
690 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
691 | ((tseg>>PMD_SHIFT) < | ||
692 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
693 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
694 | set_memory_4k((unsigned long)__va(tseg), 1); | 685 | set_memory_4k((unsigned long)__va(tseg), 1); |
695 | } | 686 | } |
696 | } | 687 | } |
@@ -703,13 +694,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
703 | if (c->x86 > 0x11) | 694 | if (c->x86 > 0x11) |
704 | set_cpu_cap(c, X86_FEATURE_ARAT); | 695 | set_cpu_cap(c, X86_FEATURE_ARAT); |
705 | 696 | ||
706 | /* | ||
707 | * Disable GART TLB Walk Errors on Fam10h. We do this here | ||
708 | * because this is always needed when GART is enabled, even in a | ||
709 | * kernel which has no MCE support built in. | ||
710 | */ | ||
711 | if (c->x86 == 0x10) { | 697 | if (c->x86 == 0x10) { |
712 | /* | 698 | /* |
699 | * Disable GART TLB Walk Errors on Fam10h. We do this here | ||
700 | * because this is always needed when GART is enabled, even in a | ||
701 | * kernel which has no MCE support built in. | ||
713 | * BIOS should disable GartTlbWlk Errors themself. If | 702 | * BIOS should disable GartTlbWlk Errors themself. If |
714 | * it doesn't do it here as suggested by the BKDG. | 703 | * it doesn't do it here as suggested by the BKDG. |
715 | * | 704 | * |
@@ -723,6 +712,21 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
723 | mask |= (1 << 10); | 712 | mask |= (1 << 10); |
724 | wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); | 713 | wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); |
725 | } | 714 | } |
715 | |||
716 | /* | ||
717 | * On family 10h BIOS may not have properly enabled WC+ support, | ||
718 | * causing it to be converted to CD memtype. This may result in | ||
719 | * performance degradation for certain nested-paging guests. | ||
720 | * Prevent this conversion by clearing bit 24 in | ||
721 | * MSR_AMD64_BU_CFG2. | ||
722 | * | ||
723 | * NOTE: we want to use the _safe accessors so as not to #GP kvm | ||
724 | * guests on older kvm hosts. | ||
725 | */ | ||
726 | |||
727 | rdmsrl_safe(MSR_AMD64_BU_CFG2, &value); | ||
728 | value &= ~(1ULL << 24); | ||
729 | wrmsrl_safe(MSR_AMD64_BU_CFG2, value); | ||
726 | } | 730 | } |
727 | 731 | ||
728 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | 732 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 92dfec986a48..af6455e3fcc9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -17,15 +17,6 @@ | |||
17 | #include <asm/paravirt.h> | 17 | #include <asm/paravirt.h> |
18 | #include <asm/alternative.h> | 18 | #include <asm/alternative.h> |
19 | 19 | ||
20 | static int __init no_halt(char *s) | ||
21 | { | ||
22 | WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n"); | ||
23 | boot_cpu_data.hlt_works_ok = 0; | ||
24 | return 1; | ||
25 | } | ||
26 | |||
27 | __setup("no-hlt", no_halt); | ||
28 | |||
29 | static int __init no_387(char *s) | 20 | static int __init no_387(char *s) |
30 | { | 21 | { |
31 | boot_cpu_data.hard_math = 0; | 22 | boot_cpu_data.hard_math = 0; |
@@ -89,23 +80,6 @@ static void __init check_fpu(void) | |||
89 | pr_warn("Hmm, FPU with FDIV bug\n"); | 80 | pr_warn("Hmm, FPU with FDIV bug\n"); |
90 | } | 81 | } |
91 | 82 | ||
92 | static void __init check_hlt(void) | ||
93 | { | ||
94 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) | ||
95 | return; | ||
96 | |||
97 | pr_info("Checking 'hlt' instruction... "); | ||
98 | if (!boot_cpu_data.hlt_works_ok) { | ||
99 | pr_cont("disabled\n"); | ||
100 | return; | ||
101 | } | ||
102 | halt(); | ||
103 | halt(); | ||
104 | halt(); | ||
105 | halt(); | ||
106 | pr_cont("OK\n"); | ||
107 | } | ||
108 | |||
109 | /* | 83 | /* |
110 | * Check whether we are able to run this kernel safely on SMP. | 84 | * Check whether we are able to run this kernel safely on SMP. |
111 | * | 85 | * |
@@ -129,7 +103,6 @@ void __init check_bugs(void) | |||
129 | print_cpu_info(&boot_cpu_data); | 103 | print_cpu_info(&boot_cpu_data); |
130 | #endif | 104 | #endif |
131 | check_config(); | 105 | check_config(); |
132 | check_hlt(); | ||
133 | init_utsname()->machine[1] = | 106 | init_utsname()->machine[1] = |
134 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 107 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
135 | alternative_instructions(); | 108 | alternative_instructions(); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9c3ab43a6954..d814772c5bed 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #include <asm/mce.h> | 37 | #include <asm/mce.h> |
38 | #include <asm/msr.h> | 38 | #include <asm/msr.h> |
39 | #include <asm/pat.h> | 39 | #include <asm/pat.h> |
40 | #include <asm/microcode.h> | ||
41 | #include <asm/microcode_intel.h> | ||
40 | 42 | ||
41 | #ifdef CONFIG_X86_LOCAL_APIC | 43 | #ifdef CONFIG_X86_LOCAL_APIC |
42 | #include <asm/uv/uv.h> | 44 | #include <asm/uv/uv.h> |
@@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag) | |||
213 | } | 215 | } |
214 | 216 | ||
215 | /* Probe for the CPUID instruction */ | 217 | /* Probe for the CPUID instruction */ |
216 | static int __cpuinit have_cpuid_p(void) | 218 | int __cpuinit have_cpuid_p(void) |
217 | { | 219 | { |
218 | return flag_is_changeable_p(X86_EFLAGS_ID); | 220 | return flag_is_changeable_p(X86_EFLAGS_ID); |
219 | } | 221 | } |
@@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag) | |||
249 | { | 251 | { |
250 | return 1; | 252 | return 1; |
251 | } | 253 | } |
252 | /* Probe for the CPUID instruction */ | ||
253 | static inline int have_cpuid_p(void) | ||
254 | { | ||
255 | return 1; | ||
256 | } | ||
257 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | 254 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
258 | { | 255 | { |
259 | } | 256 | } |
@@ -1223,6 +1220,12 @@ void __cpuinit cpu_init(void) | |||
1223 | int cpu; | 1220 | int cpu; |
1224 | int i; | 1221 | int i; |
1225 | 1222 | ||
1223 | /* | ||
1224 | * Load microcode on this cpu if a valid microcode is available. | ||
1225 | * This is early microcode loading procedure. | ||
1226 | */ | ||
1227 | load_ucode_ap(); | ||
1228 | |||
1226 | cpu = stack_smp_processor_id(); | 1229 | cpu = stack_smp_processor_id(); |
1227 | t = &per_cpu(init_tss, cpu); | 1230 | t = &per_cpu(init_tss, cpu); |
1228 | oist = &per_cpu(orig_ist, cpu); | 1231 | oist = &per_cpu(orig_ist, cpu); |
@@ -1314,6 +1317,8 @@ void __cpuinit cpu_init(void) | |||
1314 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 1317 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
1315 | struct thread_struct *thread = &curr->thread; | 1318 | struct thread_struct *thread = &curr->thread; |
1316 | 1319 | ||
1320 | show_ucode_info_early(); | ||
1321 | |||
1317 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { | 1322 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { |
1318 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | 1323 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); |
1319 | for (;;) | 1324 | for (;;) |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index a8f8fa9769d6..1e7e84a02eba 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void) | |||
79 | if (x86_hyper->init_platform) | 79 | if (x86_hyper->init_platform) |
80 | x86_hyper->init_platform(); | 80 | x86_hyper->init_platform(); |
81 | } | 81 | } |
82 | |||
83 | bool __init hypervisor_x2apic_available(void) | ||
84 | { | ||
85 | return x86_hyper && | ||
86 | x86_hyper->x2apic_available && | ||
87 | x86_hyper->x2apic_available(); | ||
88 | } | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcaabd0432c5..1905ce98bee0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -17,7 +17,6 @@ | |||
17 | 17 | ||
18 | #ifdef CONFIG_X86_64 | 18 | #ifdef CONFIG_X86_64 |
19 | #include <linux/topology.h> | 19 | #include <linux/topology.h> |
20 | #include <asm/numa_64.h> | ||
21 | #endif | 20 | #endif |
22 | 21 | ||
23 | #include "cpu.h" | 22 | #include "cpu.h" |
@@ -168,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void) | |||
168 | #ifdef CONFIG_X86_F00F_BUG | 167 | #ifdef CONFIG_X86_F00F_BUG |
169 | static void __cpuinit trap_init_f00f_bug(void) | 168 | static void __cpuinit trap_init_f00f_bug(void) |
170 | { | 169 | { |
171 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | 170 | __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); |
172 | 171 | ||
173 | /* | 172 | /* |
174 | * Update the IDT descriptor and reload the IDT so that | 173 | * Update the IDT descriptor and reload the IDT so that |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fe9edec6698a..7c6f7d548c0f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -298,8 +298,7 @@ struct _cache_attr { | |||
298 | unsigned int); | 298 | unsigned int); |
299 | }; | 299 | }; |
300 | 300 | ||
301 | #ifdef CONFIG_AMD_NB | 301 | #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) |
302 | |||
303 | /* | 302 | /* |
304 | * L3 cache descriptors | 303 | * L3 cache descriptors |
305 | */ | 304 | */ |
@@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, | |||
524 | static struct _cache_attr subcaches = | 523 | static struct _cache_attr subcaches = |
525 | __ATTR(subcaches, 0644, show_subcaches, store_subcaches); | 524 | __ATTR(subcaches, 0644, show_subcaches, store_subcaches); |
526 | 525 | ||
527 | #else /* CONFIG_AMD_NB */ | 526 | #else |
528 | #define amd_init_l3_cache(x, y) | 527 | #define amd_init_l3_cache(x, y) |
529 | #endif /* CONFIG_AMD_NB */ | 528 | #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ |
530 | 529 | ||
531 | static int | 530 | static int |
532 | __cpuinit cpuid4_cache_lookup_regs(int index, | 531 | __cpuinit cpuid4_cache_lookup_regs(int index, |
@@ -1227,7 +1226,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { | |||
1227 | .notifier_call = cacheinfo_cpu_callback, | 1226 | .notifier_call = cacheinfo_cpu_callback, |
1228 | }; | 1227 | }; |
1229 | 1228 | ||
1230 | static int __cpuinit cache_sysfs_init(void) | 1229 | static int __init cache_sysfs_init(void) |
1231 | { | 1230 | { |
1232 | int i; | 1231 | int i; |
1233 | 1232 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 6a05c1d327a9..5b7d4fa5d3b7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -24,8 +24,6 @@ struct mce_bank { | |||
24 | int mce_severity(struct mce *a, int tolerant, char **msg); | 24 | int mce_severity(struct mce *a, int tolerant, char **msg); |
25 | struct dentry *mce_get_debugfs_dir(void); | 25 | struct dentry *mce_get_debugfs_dir(void); |
26 | 26 | ||
27 | extern int mce_ser; | ||
28 | |||
29 | extern struct mce_bank *mce_banks; | 27 | extern struct mce_bank *mce_banks; |
30 | 28 | ||
31 | #ifdef CONFIG_X86_MCE_INTEL | 29 | #ifdef CONFIG_X86_MCE_INTEL |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 13017626f9a8..beb1f1689e52 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg) | |||
193 | continue; | 193 | continue; |
194 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) | 194 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) |
195 | continue; | 195 | continue; |
196 | if (s->ser == SER_REQUIRED && !mce_ser) | 196 | if (s->ser == SER_REQUIRED && !mca_cfg.ser) |
197 | continue; | 197 | continue; |
198 | if (s->ser == NO_SER && mce_ser) | 198 | if (s->ser == NO_SER && mca_cfg.ser) |
199 | continue; | 199 | continue; |
200 | if (s->context && ctx != s->context) | 200 | if (s->context && ctx != s->context) |
201 | continue; | 201 | continue; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 46cbf8689692..7bc126346ace 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -58,34 +58,26 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
58 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
59 | #include <trace/events/mce.h> | 59 | #include <trace/events/mce.h> |
60 | 60 | ||
61 | int mce_disabled __read_mostly; | ||
62 | |||
63 | #define SPINUNIT 100 /* 100ns */ | 61 | #define SPINUNIT 100 /* 100ns */ |
64 | 62 | ||
65 | atomic_t mce_entry; | 63 | atomic_t mce_entry; |
66 | 64 | ||
67 | DEFINE_PER_CPU(unsigned, mce_exception_count); | 65 | DEFINE_PER_CPU(unsigned, mce_exception_count); |
68 | 66 | ||
69 | /* | 67 | struct mce_bank *mce_banks __read_mostly; |
70 | * Tolerant levels: | 68 | |
71 | * 0: always panic on uncorrected errors, log corrected errors | 69 | struct mca_config mca_cfg __read_mostly = { |
72 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | 70 | .bootlog = -1, |
73 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | 71 | /* |
74 | * 3: never panic or SIGBUS, log all errors (for testing only) | 72 | * Tolerant levels: |
75 | */ | 73 | * 0: always panic on uncorrected errors, log corrected errors |
76 | static int tolerant __read_mostly = 1; | 74 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors |
77 | static int banks __read_mostly; | 75 | * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors |
78 | static int rip_msr __read_mostly; | 76 | * 3: never panic or SIGBUS, log all errors (for testing only) |
79 | static int mce_bootlog __read_mostly = -1; | 77 | */ |
80 | static int monarch_timeout __read_mostly = -1; | 78 | .tolerant = 1, |
81 | static int mce_panic_timeout __read_mostly; | 79 | .monarch_timeout = -1 |
82 | static int mce_dont_log_ce __read_mostly; | 80 | }; |
83 | int mce_cmci_disabled __read_mostly; | ||
84 | int mce_ignore_ce __read_mostly; | ||
85 | int mce_ser __read_mostly; | ||
86 | int mce_bios_cmci_threshold __read_mostly; | ||
87 | |||
88 | struct mce_bank *mce_banks __read_mostly; | ||
89 | 81 | ||
90 | /* User mode helper program triggered by machine check event */ | 82 | /* User mode helper program triggered by machine check event */ |
91 | static unsigned long mce_need_notify; | 83 | static unsigned long mce_need_notify; |
@@ -302,7 +294,7 @@ static void wait_for_panic(void) | |||
302 | while (timeout-- > 0) | 294 | while (timeout-- > 0) |
303 | udelay(1); | 295 | udelay(1); |
304 | if (panic_timeout == 0) | 296 | if (panic_timeout == 0) |
305 | panic_timeout = mce_panic_timeout; | 297 | panic_timeout = mca_cfg.panic_timeout; |
306 | panic("Panicing machine check CPU died"); | 298 | panic("Panicing machine check CPU died"); |
307 | } | 299 | } |
308 | 300 | ||
@@ -360,7 +352,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
360 | pr_emerg(HW_ERR "Machine check: %s\n", exp); | 352 | pr_emerg(HW_ERR "Machine check: %s\n", exp); |
361 | if (!fake_panic) { | 353 | if (!fake_panic) { |
362 | if (panic_timeout == 0) | 354 | if (panic_timeout == 0) |
363 | panic_timeout = mce_panic_timeout; | 355 | panic_timeout = mca_cfg.panic_timeout; |
364 | panic(msg); | 356 | panic(msg); |
365 | } else | 357 | } else |
366 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); | 358 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); |
@@ -372,7 +364,7 @@ static int msr_to_offset(u32 msr) | |||
372 | { | 364 | { |
373 | unsigned bank = __this_cpu_read(injectm.bank); | 365 | unsigned bank = __this_cpu_read(injectm.bank); |
374 | 366 | ||
375 | if (msr == rip_msr) | 367 | if (msr == mca_cfg.rip_msr) |
376 | return offsetof(struct mce, ip); | 368 | return offsetof(struct mce, ip); |
377 | if (msr == MSR_IA32_MCx_STATUS(bank)) | 369 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
378 | return offsetof(struct mce, status); | 370 | return offsetof(struct mce, status); |
@@ -451,8 +443,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) | |||
451 | m->cs |= 3; | 443 | m->cs |= 3; |
452 | } | 444 | } |
453 | /* Use accurate RIP reporting if available. */ | 445 | /* Use accurate RIP reporting if available. */ |
454 | if (rip_msr) | 446 | if (mca_cfg.rip_msr) |
455 | m->ip = mce_rdmsrl(rip_msr); | 447 | m->ip = mce_rdmsrl(mca_cfg.rip_msr); |
456 | } | 448 | } |
457 | } | 449 | } |
458 | 450 | ||
@@ -513,18 +505,15 @@ static int mce_ring_add(unsigned long pfn) | |||
513 | 505 | ||
514 | int mce_available(struct cpuinfo_x86 *c) | 506 | int mce_available(struct cpuinfo_x86 *c) |
515 | { | 507 | { |
516 | if (mce_disabled) | 508 | if (mca_cfg.disabled) |
517 | return 0; | 509 | return 0; |
518 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | 510 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); |
519 | } | 511 | } |
520 | 512 | ||
521 | static void mce_schedule_work(void) | 513 | static void mce_schedule_work(void) |
522 | { | 514 | { |
523 | if (!mce_ring_empty()) { | 515 | if (!mce_ring_empty()) |
524 | struct work_struct *work = &__get_cpu_var(mce_work); | 516 | schedule_work(&__get_cpu_var(mce_work)); |
525 | if (!work_pending(work)) | ||
526 | schedule_work(work); | ||
527 | } | ||
528 | } | 517 | } |
529 | 518 | ||
530 | DEFINE_PER_CPU(struct irq_work, mce_irq_work); | 519 | DEFINE_PER_CPU(struct irq_work, mce_irq_work); |
@@ -565,7 +554,7 @@ static void mce_read_aux(struct mce *m, int i) | |||
565 | /* | 554 | /* |
566 | * Mask the reported address by the reported granularity. | 555 | * Mask the reported address by the reported granularity. |
567 | */ | 556 | */ |
568 | if (mce_ser && (m->status & MCI_STATUS_MISCV)) { | 557 | if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) { |
569 | u8 shift = MCI_MISC_ADDR_LSB(m->misc); | 558 | u8 shift = MCI_MISC_ADDR_LSB(m->misc); |
570 | m->addr >>= shift; | 559 | m->addr >>= shift; |
571 | m->addr <<= shift; | 560 | m->addr <<= shift; |
@@ -599,7 +588,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
599 | 588 | ||
600 | mce_gather_info(&m, NULL); | 589 | mce_gather_info(&m, NULL); |
601 | 590 | ||
602 | for (i = 0; i < banks; i++) { | 591 | for (i = 0; i < mca_cfg.banks; i++) { |
603 | if (!mce_banks[i].ctl || !test_bit(i, *b)) | 592 | if (!mce_banks[i].ctl || !test_bit(i, *b)) |
604 | continue; | 593 | continue; |
605 | 594 | ||
@@ -620,7 +609,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
620 | * TBD do the same check for MCI_STATUS_EN here? | 609 | * TBD do the same check for MCI_STATUS_EN here? |
621 | */ | 610 | */ |
622 | if (!(flags & MCP_UC) && | 611 | if (!(flags & MCP_UC) && |
623 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) | 612 | (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) |
624 | continue; | 613 | continue; |
625 | 614 | ||
626 | mce_read_aux(&m, i); | 615 | mce_read_aux(&m, i); |
@@ -631,7 +620,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
631 | * Don't get the IP here because it's unlikely to | 620 | * Don't get the IP here because it's unlikely to |
632 | * have anything to do with the actual error location. | 621 | * have anything to do with the actual error location. |
633 | */ | 622 | */ |
634 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) | 623 | if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) |
635 | mce_log(&m); | 624 | mce_log(&m); |
636 | 625 | ||
637 | /* | 626 | /* |
@@ -658,14 +647,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, | |||
658 | { | 647 | { |
659 | int i, ret = 0; | 648 | int i, ret = 0; |
660 | 649 | ||
661 | for (i = 0; i < banks; i++) { | 650 | for (i = 0; i < mca_cfg.banks; i++) { |
662 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 651 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
663 | if (m->status & MCI_STATUS_VAL) { | 652 | if (m->status & MCI_STATUS_VAL) { |
664 | __set_bit(i, validp); | 653 | __set_bit(i, validp); |
665 | if (quirk_no_way_out) | 654 | if (quirk_no_way_out) |
666 | quirk_no_way_out(i, m, regs); | 655 | quirk_no_way_out(i, m, regs); |
667 | } | 656 | } |
668 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 657 | if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY) |
669 | ret = 1; | 658 | ret = 1; |
670 | } | 659 | } |
671 | return ret; | 660 | return ret; |
@@ -696,11 +685,11 @@ static int mce_timed_out(u64 *t) | |||
696 | rmb(); | 685 | rmb(); |
697 | if (atomic_read(&mce_paniced)) | 686 | if (atomic_read(&mce_paniced)) |
698 | wait_for_panic(); | 687 | wait_for_panic(); |
699 | if (!monarch_timeout) | 688 | if (!mca_cfg.monarch_timeout) |
700 | goto out; | 689 | goto out; |
701 | if ((s64)*t < SPINUNIT) { | 690 | if ((s64)*t < SPINUNIT) { |
702 | /* CHECKME: Make panic default for 1 too? */ | 691 | /* CHECKME: Make panic default for 1 too? */ |
703 | if (tolerant < 1) | 692 | if (mca_cfg.tolerant < 1) |
704 | mce_panic("Timeout synchronizing machine check over CPUs", | 693 | mce_panic("Timeout synchronizing machine check over CPUs", |
705 | NULL, NULL); | 694 | NULL, NULL); |
706 | cpu_missing = 1; | 695 | cpu_missing = 1; |
@@ -750,7 +739,8 @@ static void mce_reign(void) | |||
750 | * Grade the severity of the errors of all the CPUs. | 739 | * Grade the severity of the errors of all the CPUs. |
751 | */ | 740 | */ |
752 | for_each_possible_cpu(cpu) { | 741 | for_each_possible_cpu(cpu) { |
753 | int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, | 742 | int severity = mce_severity(&per_cpu(mces_seen, cpu), |
743 | mca_cfg.tolerant, | ||
754 | &nmsg); | 744 | &nmsg); |
755 | if (severity > global_worst) { | 745 | if (severity > global_worst) { |
756 | msg = nmsg; | 746 | msg = nmsg; |
@@ -764,7 +754,7 @@ static void mce_reign(void) | |||
764 | * This dumps all the mces in the log buffer and stops the | 754 | * This dumps all the mces in the log buffer and stops the |
765 | * other CPUs. | 755 | * other CPUs. |
766 | */ | 756 | */ |
767 | if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) | 757 | if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) |
768 | mce_panic("Fatal Machine check", m, msg); | 758 | mce_panic("Fatal Machine check", m, msg); |
769 | 759 | ||
770 | /* | 760 | /* |
@@ -777,7 +767,7 @@ static void mce_reign(void) | |||
777 | * No machine check event found. Must be some external | 767 | * No machine check event found. Must be some external |
778 | * source or one CPU is hung. Panic. | 768 | * source or one CPU is hung. Panic. |
779 | */ | 769 | */ |
780 | if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) | 770 | if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3) |
781 | mce_panic("Machine check from unknown source", NULL, NULL); | 771 | mce_panic("Machine check from unknown source", NULL, NULL); |
782 | 772 | ||
783 | /* | 773 | /* |
@@ -801,7 +791,7 @@ static int mce_start(int *no_way_out) | |||
801 | { | 791 | { |
802 | int order; | 792 | int order; |
803 | int cpus = num_online_cpus(); | 793 | int cpus = num_online_cpus(); |
804 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | 794 | u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; |
805 | 795 | ||
806 | if (!timeout) | 796 | if (!timeout) |
807 | return -1; | 797 | return -1; |
@@ -865,7 +855,7 @@ static int mce_start(int *no_way_out) | |||
865 | static int mce_end(int order) | 855 | static int mce_end(int order) |
866 | { | 856 | { |
867 | int ret = -1; | 857 | int ret = -1; |
868 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | 858 | u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; |
869 | 859 | ||
870 | if (!timeout) | 860 | if (!timeout) |
871 | goto reset; | 861 | goto reset; |
@@ -946,7 +936,7 @@ static void mce_clear_state(unsigned long *toclear) | |||
946 | { | 936 | { |
947 | int i; | 937 | int i; |
948 | 938 | ||
949 | for (i = 0; i < banks; i++) { | 939 | for (i = 0; i < mca_cfg.banks; i++) { |
950 | if (test_bit(i, toclear)) | 940 | if (test_bit(i, toclear)) |
951 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); | 941 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
952 | } | 942 | } |
@@ -1011,6 +1001,7 @@ static void mce_clear_info(struct mce_info *mi) | |||
1011 | */ | 1001 | */ |
1012 | void do_machine_check(struct pt_regs *regs, long error_code) | 1002 | void do_machine_check(struct pt_regs *regs, long error_code) |
1013 | { | 1003 | { |
1004 | struct mca_config *cfg = &mca_cfg; | ||
1014 | struct mce m, *final; | 1005 | struct mce m, *final; |
1015 | int i; | 1006 | int i; |
1016 | int worst = 0; | 1007 | int worst = 0; |
@@ -1022,7 +1013,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1022 | int order; | 1013 | int order; |
1023 | /* | 1014 | /* |
1024 | * If no_way_out gets set, there is no safe way to recover from this | 1015 | * If no_way_out gets set, there is no safe way to recover from this |
1025 | * MCE. If tolerant is cranked up, we'll try anyway. | 1016 | * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway. |
1026 | */ | 1017 | */ |
1027 | int no_way_out = 0; | 1018 | int no_way_out = 0; |
1028 | /* | 1019 | /* |
@@ -1038,7 +1029,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1038 | 1029 | ||
1039 | this_cpu_inc(mce_exception_count); | 1030 | this_cpu_inc(mce_exception_count); |
1040 | 1031 | ||
1041 | if (!banks) | 1032 | if (!cfg->banks) |
1042 | goto out; | 1033 | goto out; |
1043 | 1034 | ||
1044 | mce_gather_info(&m, regs); | 1035 | mce_gather_info(&m, regs); |
@@ -1065,7 +1056,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1065 | * because the first one to see it will clear it. | 1056 | * because the first one to see it will clear it. |
1066 | */ | 1057 | */ |
1067 | order = mce_start(&no_way_out); | 1058 | order = mce_start(&no_way_out); |
1068 | for (i = 0; i < banks; i++) { | 1059 | for (i = 0; i < cfg->banks; i++) { |
1069 | __clear_bit(i, toclear); | 1060 | __clear_bit(i, toclear); |
1070 | if (!test_bit(i, valid_banks)) | 1061 | if (!test_bit(i, valid_banks)) |
1071 | continue; | 1062 | continue; |
@@ -1084,16 +1075,16 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1084 | * Non uncorrected or non signaled errors are handled by | 1075 | * Non uncorrected or non signaled errors are handled by |
1085 | * machine_check_poll. Leave them alone, unless this panics. | 1076 | * machine_check_poll. Leave them alone, unless this panics. |
1086 | */ | 1077 | */ |
1087 | if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && | 1078 | if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) && |
1088 | !no_way_out) | 1079 | !no_way_out) |
1089 | continue; | 1080 | continue; |
1090 | 1081 | ||
1091 | /* | 1082 | /* |
1092 | * Set taint even when machine check was not enabled. | 1083 | * Set taint even when machine check was not enabled. |
1093 | */ | 1084 | */ |
1094 | add_taint(TAINT_MACHINE_CHECK); | 1085 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
1095 | 1086 | ||
1096 | severity = mce_severity(&m, tolerant, NULL); | 1087 | severity = mce_severity(&m, cfg->tolerant, NULL); |
1097 | 1088 | ||
1098 | /* | 1089 | /* |
1099 | * When machine check was for corrected handler don't touch, | 1090 | * When machine check was for corrected handler don't touch, |
@@ -1117,7 +1108,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1117 | * When the ring overflows we just ignore the AO error. | 1108 | * When the ring overflows we just ignore the AO error. |
1118 | * RED-PEN add some logging mechanism when | 1109 | * RED-PEN add some logging mechanism when |
1119 | * usable_address or mce_add_ring fails. | 1110 | * usable_address or mce_add_ring fails. |
1120 | * RED-PEN don't ignore overflow for tolerant == 0 | 1111 | * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0 |
1121 | */ | 1112 | */ |
1122 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | 1113 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) |
1123 | mce_ring_add(m.addr >> PAGE_SHIFT); | 1114 | mce_ring_add(m.addr >> PAGE_SHIFT); |
@@ -1149,7 +1140,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1149 | * issues we try to recover, or limit damage to the current | 1140 | * issues we try to recover, or limit damage to the current |
1150 | * process. | 1141 | * process. |
1151 | */ | 1142 | */ |
1152 | if (tolerant < 3) { | 1143 | if (cfg->tolerant < 3) { |
1153 | if (no_way_out) | 1144 | if (no_way_out) |
1154 | mce_panic("Fatal machine check on current CPU", &m, msg); | 1145 | mce_panic("Fatal machine check on current CPU", &m, msg); |
1155 | if (worst == MCE_AR_SEVERITY) { | 1146 | if (worst == MCE_AR_SEVERITY) { |
@@ -1357,12 +1348,7 @@ int mce_notify_irq(void) | |||
1357 | /* wake processes polling /dev/mcelog */ | 1348 | /* wake processes polling /dev/mcelog */ |
1358 | wake_up_interruptible(&mce_chrdev_wait); | 1349 | wake_up_interruptible(&mce_chrdev_wait); |
1359 | 1350 | ||
1360 | /* | 1351 | if (mce_helper[0]) |
1361 | * There is no risk of missing notifications because | ||
1362 | * work_pending is always cleared before the function is | ||
1363 | * executed. | ||
1364 | */ | ||
1365 | if (mce_helper[0] && !work_pending(&mce_trigger_work)) | ||
1366 | schedule_work(&mce_trigger_work); | 1352 | schedule_work(&mce_trigger_work); |
1367 | 1353 | ||
1368 | if (__ratelimit(&ratelimit)) | 1354 | if (__ratelimit(&ratelimit)) |
@@ -1377,11 +1363,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); | |||
1377 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) | 1363 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) |
1378 | { | 1364 | { |
1379 | int i; | 1365 | int i; |
1366 | u8 num_banks = mca_cfg.banks; | ||
1380 | 1367 | ||
1381 | mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); | 1368 | mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); |
1382 | if (!mce_banks) | 1369 | if (!mce_banks) |
1383 | return -ENOMEM; | 1370 | return -ENOMEM; |
1384 | for (i = 0; i < banks; i++) { | 1371 | |
1372 | for (i = 0; i < num_banks; i++) { | ||
1385 | struct mce_bank *b = &mce_banks[i]; | 1373 | struct mce_bank *b = &mce_banks[i]; |
1386 | 1374 | ||
1387 | b->ctl = -1ULL; | 1375 | b->ctl = -1ULL; |
@@ -1401,7 +1389,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1401 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 1389 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
1402 | 1390 | ||
1403 | b = cap & MCG_BANKCNT_MASK; | 1391 | b = cap & MCG_BANKCNT_MASK; |
1404 | if (!banks) | 1392 | if (!mca_cfg.banks) |
1405 | pr_info("CPU supports %d MCE banks\n", b); | 1393 | pr_info("CPU supports %d MCE banks\n", b); |
1406 | 1394 | ||
1407 | if (b > MAX_NR_BANKS) { | 1395 | if (b > MAX_NR_BANKS) { |
@@ -1411,8 +1399,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1411 | } | 1399 | } |
1412 | 1400 | ||
1413 | /* Don't support asymmetric configurations today */ | 1401 | /* Don't support asymmetric configurations today */ |
1414 | WARN_ON(banks != 0 && b != banks); | 1402 | WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); |
1415 | banks = b; | 1403 | mca_cfg.banks = b; |
1404 | |||
1416 | if (!mce_banks) { | 1405 | if (!mce_banks) { |
1417 | int err = __mcheck_cpu_mce_banks_init(); | 1406 | int err = __mcheck_cpu_mce_banks_init(); |
1418 | 1407 | ||
@@ -1422,25 +1411,29 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1422 | 1411 | ||
1423 | /* Use accurate RIP reporting if available. */ | 1412 | /* Use accurate RIP reporting if available. */ |
1424 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) | 1413 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) |
1425 | rip_msr = MSR_IA32_MCG_EIP; | 1414 | mca_cfg.rip_msr = MSR_IA32_MCG_EIP; |
1426 | 1415 | ||
1427 | if (cap & MCG_SER_P) | 1416 | if (cap & MCG_SER_P) |
1428 | mce_ser = 1; | 1417 | mca_cfg.ser = true; |
1429 | 1418 | ||
1430 | return 0; | 1419 | return 0; |
1431 | } | 1420 | } |
1432 | 1421 | ||
1433 | static void __mcheck_cpu_init_generic(void) | 1422 | static void __mcheck_cpu_init_generic(void) |
1434 | { | 1423 | { |
1424 | enum mcp_flags m_fl = 0; | ||
1435 | mce_banks_t all_banks; | 1425 | mce_banks_t all_banks; |
1436 | u64 cap; | 1426 | u64 cap; |
1437 | int i; | 1427 | int i; |
1438 | 1428 | ||
1429 | if (!mca_cfg.bootlog) | ||
1430 | m_fl = MCP_DONTLOG; | ||
1431 | |||
1439 | /* | 1432 | /* |
1440 | * Log the machine checks left over from the previous reset. | 1433 | * Log the machine checks left over from the previous reset. |
1441 | */ | 1434 | */ |
1442 | bitmap_fill(all_banks, MAX_NR_BANKS); | 1435 | bitmap_fill(all_banks, MAX_NR_BANKS); |
1443 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | 1436 | machine_check_poll(MCP_UC | m_fl, &all_banks); |
1444 | 1437 | ||
1445 | set_in_cr4(X86_CR4_MCE); | 1438 | set_in_cr4(X86_CR4_MCE); |
1446 | 1439 | ||
@@ -1448,7 +1441,7 @@ static void __mcheck_cpu_init_generic(void) | |||
1448 | if (cap & MCG_CTL_P) | 1441 | if (cap & MCG_CTL_P) |
1449 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 1442 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
1450 | 1443 | ||
1451 | for (i = 0; i < banks; i++) { | 1444 | for (i = 0; i < mca_cfg.banks; i++) { |
1452 | struct mce_bank *b = &mce_banks[i]; | 1445 | struct mce_bank *b = &mce_banks[i]; |
1453 | 1446 | ||
1454 | if (!b->init) | 1447 | if (!b->init) |
@@ -1489,6 +1482,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) | |||
1489 | /* Add per CPU specific workarounds here */ | 1482 | /* Add per CPU specific workarounds here */ |
1490 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | 1483 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1491 | { | 1484 | { |
1485 | struct mca_config *cfg = &mca_cfg; | ||
1486 | |||
1492 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1487 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1493 | pr_info("unknown CPU type - not enabling MCE support\n"); | 1488 | pr_info("unknown CPU type - not enabling MCE support\n"); |
1494 | return -EOPNOTSUPP; | 1489 | return -EOPNOTSUPP; |
@@ -1496,7 +1491,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1496 | 1491 | ||
1497 | /* This should be disabled by the BIOS, but isn't always */ | 1492 | /* This should be disabled by the BIOS, but isn't always */ |
1498 | if (c->x86_vendor == X86_VENDOR_AMD) { | 1493 | if (c->x86_vendor == X86_VENDOR_AMD) { |
1499 | if (c->x86 == 15 && banks > 4) { | 1494 | if (c->x86 == 15 && cfg->banks > 4) { |
1500 | /* | 1495 | /* |
1501 | * disable GART TBL walk error reporting, which | 1496 | * disable GART TBL walk error reporting, which |
1502 | * trips off incorrectly with the IOMMU & 3ware | 1497 | * trips off incorrectly with the IOMMU & 3ware |
@@ -1504,18 +1499,18 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1504 | */ | 1499 | */ |
1505 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); | 1500 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); |
1506 | } | 1501 | } |
1507 | if (c->x86 <= 17 && mce_bootlog < 0) { | 1502 | if (c->x86 <= 17 && cfg->bootlog < 0) { |
1508 | /* | 1503 | /* |
1509 | * Lots of broken BIOS around that don't clear them | 1504 | * Lots of broken BIOS around that don't clear them |
1510 | * by default and leave crap in there. Don't log: | 1505 | * by default and leave crap in there. Don't log: |
1511 | */ | 1506 | */ |
1512 | mce_bootlog = 0; | 1507 | cfg->bootlog = 0; |
1513 | } | 1508 | } |
1514 | /* | 1509 | /* |
1515 | * Various K7s with broken bank 0 around. Always disable | 1510 | * Various K7s with broken bank 0 around. Always disable |
1516 | * by default. | 1511 | * by default. |
1517 | */ | 1512 | */ |
1518 | if (c->x86 == 6 && banks > 0) | 1513 | if (c->x86 == 6 && cfg->banks > 0) |
1519 | mce_banks[0].ctl = 0; | 1514 | mce_banks[0].ctl = 0; |
1520 | 1515 | ||
1521 | /* | 1516 | /* |
@@ -1566,7 +1561,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1566 | * valid event later, merely don't write CTL0. | 1561 | * valid event later, merely don't write CTL0. |
1567 | */ | 1562 | */ |
1568 | 1563 | ||
1569 | if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) | 1564 | if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) |
1570 | mce_banks[0].init = 0; | 1565 | mce_banks[0].init = 0; |
1571 | 1566 | ||
1572 | /* | 1567 | /* |
@@ -1574,23 +1569,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1574 | * synchronization with a one second timeout. | 1569 | * synchronization with a one second timeout. |
1575 | */ | 1570 | */ |
1576 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && | 1571 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && |
1577 | monarch_timeout < 0) | 1572 | cfg->monarch_timeout < 0) |
1578 | monarch_timeout = USEC_PER_SEC; | 1573 | cfg->monarch_timeout = USEC_PER_SEC; |
1579 | 1574 | ||
1580 | /* | 1575 | /* |
1581 | * There are also broken BIOSes on some Pentium M and | 1576 | * There are also broken BIOSes on some Pentium M and |
1582 | * earlier systems: | 1577 | * earlier systems: |
1583 | */ | 1578 | */ |
1584 | if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) | 1579 | if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0) |
1585 | mce_bootlog = 0; | 1580 | cfg->bootlog = 0; |
1586 | 1581 | ||
1587 | if (c->x86 == 6 && c->x86_model == 45) | 1582 | if (c->x86 == 6 && c->x86_model == 45) |
1588 | quirk_no_way_out = quirk_sandybridge_ifu; | 1583 | quirk_no_way_out = quirk_sandybridge_ifu; |
1589 | } | 1584 | } |
1590 | if (monarch_timeout < 0) | 1585 | if (cfg->monarch_timeout < 0) |
1591 | monarch_timeout = 0; | 1586 | cfg->monarch_timeout = 0; |
1592 | if (mce_bootlog != 0) | 1587 | if (cfg->bootlog != 0) |
1593 | mce_panic_timeout = 30; | 1588 | cfg->panic_timeout = 30; |
1594 | 1589 | ||
1595 | return 0; | 1590 | return 0; |
1596 | } | 1591 | } |
@@ -1635,7 +1630,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |||
1635 | 1630 | ||
1636 | __this_cpu_write(mce_next_interval, iv); | 1631 | __this_cpu_write(mce_next_interval, iv); |
1637 | 1632 | ||
1638 | if (mce_ignore_ce || !iv) | 1633 | if (mca_cfg.ignore_ce || !iv) |
1639 | return; | 1634 | return; |
1640 | 1635 | ||
1641 | t->expires = round_jiffies(jiffies + iv); | 1636 | t->expires = round_jiffies(jiffies + iv); |
@@ -1668,7 +1663,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = | |||
1668 | */ | 1663 | */ |
1669 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | 1664 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) |
1670 | { | 1665 | { |
1671 | if (mce_disabled) | 1666 | if (mca_cfg.disabled) |
1672 | return; | 1667 | return; |
1673 | 1668 | ||
1674 | if (__mcheck_cpu_ancient_init(c)) | 1669 | if (__mcheck_cpu_ancient_init(c)) |
@@ -1678,7 +1673,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1678 | return; | 1673 | return; |
1679 | 1674 | ||
1680 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { | 1675 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { |
1681 | mce_disabled = 1; | 1676 | mca_cfg.disabled = true; |
1682 | return; | 1677 | return; |
1683 | } | 1678 | } |
1684 | 1679 | ||
@@ -1951,6 +1946,8 @@ static struct miscdevice mce_chrdev_device = { | |||
1951 | */ | 1946 | */ |
1952 | static int __init mcheck_enable(char *str) | 1947 | static int __init mcheck_enable(char *str) |
1953 | { | 1948 | { |
1949 | struct mca_config *cfg = &mca_cfg; | ||
1950 | |||
1954 | if (*str == 0) { | 1951 | if (*str == 0) { |
1955 | enable_p5_mce(); | 1952 | enable_p5_mce(); |
1956 | return 1; | 1953 | return 1; |
@@ -1958,22 +1955,22 @@ static int __init mcheck_enable(char *str) | |||
1958 | if (*str == '=') | 1955 | if (*str == '=') |
1959 | str++; | 1956 | str++; |
1960 | if (!strcmp(str, "off")) | 1957 | if (!strcmp(str, "off")) |
1961 | mce_disabled = 1; | 1958 | cfg->disabled = true; |
1962 | else if (!strcmp(str, "no_cmci")) | 1959 | else if (!strcmp(str, "no_cmci")) |
1963 | mce_cmci_disabled = 1; | 1960 | cfg->cmci_disabled = true; |
1964 | else if (!strcmp(str, "dont_log_ce")) | 1961 | else if (!strcmp(str, "dont_log_ce")) |
1965 | mce_dont_log_ce = 1; | 1962 | cfg->dont_log_ce = true; |
1966 | else if (!strcmp(str, "ignore_ce")) | 1963 | else if (!strcmp(str, "ignore_ce")) |
1967 | mce_ignore_ce = 1; | 1964 | cfg->ignore_ce = true; |
1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1965 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
1969 | mce_bootlog = (str[0] == 'b'); | 1966 | cfg->bootlog = (str[0] == 'b'); |
1970 | else if (!strcmp(str, "bios_cmci_threshold")) | 1967 | else if (!strcmp(str, "bios_cmci_threshold")) |
1971 | mce_bios_cmci_threshold = 1; | 1968 | cfg->bios_cmci_threshold = true; |
1972 | else if (isdigit(str[0])) { | 1969 | else if (isdigit(str[0])) { |
1973 | get_option(&str, &tolerant); | 1970 | get_option(&str, &(cfg->tolerant)); |
1974 | if (*str == ',') { | 1971 | if (*str == ',') { |
1975 | ++str; | 1972 | ++str; |
1976 | get_option(&str, &monarch_timeout); | 1973 | get_option(&str, &(cfg->monarch_timeout)); |
1977 | } | 1974 | } |
1978 | } else { | 1975 | } else { |
1979 | pr_info("mce argument %s ignored. Please use /sys\n", str); | 1976 | pr_info("mce argument %s ignored. Please use /sys\n", str); |
@@ -2002,7 +1999,7 @@ static int mce_disable_error_reporting(void) | |||
2002 | { | 1999 | { |
2003 | int i; | 2000 | int i; |
2004 | 2001 | ||
2005 | for (i = 0; i < banks; i++) { | 2002 | for (i = 0; i < mca_cfg.banks; i++) { |
2006 | struct mce_bank *b = &mce_banks[i]; | 2003 | struct mce_bank *b = &mce_banks[i]; |
2007 | 2004 | ||
2008 | if (b->init) | 2005 | if (b->init) |
@@ -2142,15 +2139,15 @@ static ssize_t set_ignore_ce(struct device *s, | |||
2142 | if (strict_strtoull(buf, 0, &new) < 0) | 2139 | if (strict_strtoull(buf, 0, &new) < 0) |
2143 | return -EINVAL; | 2140 | return -EINVAL; |
2144 | 2141 | ||
2145 | if (mce_ignore_ce ^ !!new) { | 2142 | if (mca_cfg.ignore_ce ^ !!new) { |
2146 | if (new) { | 2143 | if (new) { |
2147 | /* disable ce features */ | 2144 | /* disable ce features */ |
2148 | mce_timer_delete_all(); | 2145 | mce_timer_delete_all(); |
2149 | on_each_cpu(mce_disable_cmci, NULL, 1); | 2146 | on_each_cpu(mce_disable_cmci, NULL, 1); |
2150 | mce_ignore_ce = 1; | 2147 | mca_cfg.ignore_ce = true; |
2151 | } else { | 2148 | } else { |
2152 | /* enable ce features */ | 2149 | /* enable ce features */ |
2153 | mce_ignore_ce = 0; | 2150 | mca_cfg.ignore_ce = false; |
2154 | on_each_cpu(mce_enable_ce, (void *)1, 1); | 2151 | on_each_cpu(mce_enable_ce, (void *)1, 1); |
2155 | } | 2152 | } |
2156 | } | 2153 | } |
@@ -2166,14 +2163,14 @@ static ssize_t set_cmci_disabled(struct device *s, | |||
2166 | if (strict_strtoull(buf, 0, &new) < 0) | 2163 | if (strict_strtoull(buf, 0, &new) < 0) |
2167 | return -EINVAL; | 2164 | return -EINVAL; |
2168 | 2165 | ||
2169 | if (mce_cmci_disabled ^ !!new) { | 2166 | if (mca_cfg.cmci_disabled ^ !!new) { |
2170 | if (new) { | 2167 | if (new) { |
2171 | /* disable cmci */ | 2168 | /* disable cmci */ |
2172 | on_each_cpu(mce_disable_cmci, NULL, 1); | 2169 | on_each_cpu(mce_disable_cmci, NULL, 1); |
2173 | mce_cmci_disabled = 1; | 2170 | mca_cfg.cmci_disabled = true; |
2174 | } else { | 2171 | } else { |
2175 | /* enable cmci */ | 2172 | /* enable cmci */ |
2176 | mce_cmci_disabled = 0; | 2173 | mca_cfg.cmci_disabled = false; |
2177 | on_each_cpu(mce_enable_ce, NULL, 1); | 2174 | on_each_cpu(mce_enable_ce, NULL, 1); |
2178 | } | 2175 | } |
2179 | } | 2176 | } |
@@ -2190,9 +2187,9 @@ static ssize_t store_int_with_restart(struct device *s, | |||
2190 | } | 2187 | } |
2191 | 2188 | ||
2192 | static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); | 2189 | static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); |
2193 | static DEVICE_INT_ATTR(tolerant, 0644, tolerant); | 2190 | static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant); |
2194 | static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 2191 | static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout); |
2195 | static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | 2192 | static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce); |
2196 | 2193 | ||
2197 | static struct dev_ext_attribute dev_attr_check_interval = { | 2194 | static struct dev_ext_attribute dev_attr_check_interval = { |
2198 | __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), | 2195 | __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), |
@@ -2200,13 +2197,13 @@ static struct dev_ext_attribute dev_attr_check_interval = { | |||
2200 | }; | 2197 | }; |
2201 | 2198 | ||
2202 | static struct dev_ext_attribute dev_attr_ignore_ce = { | 2199 | static struct dev_ext_attribute dev_attr_ignore_ce = { |
2203 | __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), | 2200 | __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce), |
2204 | &mce_ignore_ce | 2201 | &mca_cfg.ignore_ce |
2205 | }; | 2202 | }; |
2206 | 2203 | ||
2207 | static struct dev_ext_attribute dev_attr_cmci_disabled = { | 2204 | static struct dev_ext_attribute dev_attr_cmci_disabled = { |
2208 | __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), | 2205 | __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled), |
2209 | &mce_cmci_disabled | 2206 | &mca_cfg.cmci_disabled |
2210 | }; | 2207 | }; |
2211 | 2208 | ||
2212 | static struct device_attribute *mce_device_attrs[] = { | 2209 | static struct device_attribute *mce_device_attrs[] = { |
@@ -2253,7 +2250,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) | |||
2253 | if (err) | 2250 | if (err) |
2254 | goto error; | 2251 | goto error; |
2255 | } | 2252 | } |
2256 | for (j = 0; j < banks; j++) { | 2253 | for (j = 0; j < mca_cfg.banks; j++) { |
2257 | err = device_create_file(dev, &mce_banks[j].attr); | 2254 | err = device_create_file(dev, &mce_banks[j].attr); |
2258 | if (err) | 2255 | if (err) |
2259 | goto error2; | 2256 | goto error2; |
@@ -2285,7 +2282,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu) | |||
2285 | for (i = 0; mce_device_attrs[i]; i++) | 2282 | for (i = 0; mce_device_attrs[i]; i++) |
2286 | device_remove_file(dev, mce_device_attrs[i]); | 2283 | device_remove_file(dev, mce_device_attrs[i]); |
2287 | 2284 | ||
2288 | for (i = 0; i < banks; i++) | 2285 | for (i = 0; i < mca_cfg.banks; i++) |
2289 | device_remove_file(dev, &mce_banks[i].attr); | 2286 | device_remove_file(dev, &mce_banks[i].attr); |
2290 | 2287 | ||
2291 | device_unregister(dev); | 2288 | device_unregister(dev); |
@@ -2304,7 +2301,7 @@ static void __cpuinit mce_disable_cpu(void *h) | |||
2304 | 2301 | ||
2305 | if (!(action & CPU_TASKS_FROZEN)) | 2302 | if (!(action & CPU_TASKS_FROZEN)) |
2306 | cmci_clear(); | 2303 | cmci_clear(); |
2307 | for (i = 0; i < banks; i++) { | 2304 | for (i = 0; i < mca_cfg.banks; i++) { |
2308 | struct mce_bank *b = &mce_banks[i]; | 2305 | struct mce_bank *b = &mce_banks[i]; |
2309 | 2306 | ||
2310 | if (b->init) | 2307 | if (b->init) |
@@ -2322,7 +2319,7 @@ static void __cpuinit mce_reenable_cpu(void *h) | |||
2322 | 2319 | ||
2323 | if (!(action & CPU_TASKS_FROZEN)) | 2320 | if (!(action & CPU_TASKS_FROZEN)) |
2324 | cmci_reenable(); | 2321 | cmci_reenable(); |
2325 | for (i = 0; i < banks; i++) { | 2322 | for (i = 0; i < mca_cfg.banks; i++) { |
2326 | struct mce_bank *b = &mce_banks[i]; | 2323 | struct mce_bank *b = &mce_banks[i]; |
2327 | 2324 | ||
2328 | if (b->init) | 2325 | if (b->init) |
@@ -2375,7 +2372,7 @@ static __init void mce_init_banks(void) | |||
2375 | { | 2372 | { |
2376 | int i; | 2373 | int i; |
2377 | 2374 | ||
2378 | for (i = 0; i < banks; i++) { | 2375 | for (i = 0; i < mca_cfg.banks; i++) { |
2379 | struct mce_bank *b = &mce_banks[i]; | 2376 | struct mce_bank *b = &mce_banks[i]; |
2380 | struct device_attribute *a = &b->attr; | 2377 | struct device_attribute *a = &b->attr; |
2381 | 2378 | ||
@@ -2426,7 +2423,7 @@ device_initcall_sync(mcheck_init_device); | |||
2426 | */ | 2423 | */ |
2427 | static int __init mcheck_disable(char *str) | 2424 | static int __init mcheck_disable(char *str) |
2428 | { | 2425 | { |
2429 | mce_disabled = 1; | 2426 | mca_cfg.disabled = true; |
2430 | return 1; | 2427 | return 1; |
2431 | } | 2428 | } |
2432 | __setup("nomce", mcheck_disable); | 2429 | __setup("nomce", mcheck_disable); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 4f9a3cbfc4a3..402c454fbff0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -53,7 +53,7 @@ static int cmci_supported(int *banks) | |||
53 | { | 53 | { |
54 | u64 cap; | 54 | u64 cap; |
55 | 55 | ||
56 | if (mce_cmci_disabled || mce_ignore_ce) | 56 | if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) |
57 | return 0; | 57 | return 0; |
58 | 58 | ||
59 | /* | 59 | /* |
@@ -200,7 +200,7 @@ static void cmci_discover(int banks) | |||
200 | continue; | 200 | continue; |
201 | } | 201 | } |
202 | 202 | ||
203 | if (!mce_bios_cmci_threshold) { | 203 | if (!mca_cfg.bios_cmci_threshold) { |
204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
205 | val |= CMCI_THRESHOLD; | 205 | val |= CMCI_THRESHOLD; |
206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | 206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { |
@@ -227,7 +227,7 @@ static void cmci_discover(int banks) | |||
227 | * set the thresholds properly or does not work with | 227 | * set the thresholds properly or does not work with |
228 | * this boot option. Note down now and report later. | 228 | * this boot option. Note down now and report later. |
229 | */ | 229 | */ |
230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | 230 | if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && |
231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | 231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) |
232 | bios_wrong_thresh = 1; | 232 | bios_wrong_thresh = 1; |
233 | } else { | 233 | } else { |
@@ -235,7 +235,7 @@ static void cmci_discover(int banks) | |||
235 | } | 235 | } |
236 | } | 236 | } |
237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { | 238 | if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { |
239 | pr_info_once( | 239 | pr_info_once( |
240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | 240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); |
241 | pr_info_once( | 241 | pr_info_once( |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 2d5454cd2c4f..1c044b1ccc59 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) | |||
33 | smp_processor_id()); | 33 | smp_processor_id()); |
34 | } | 34 | } |
35 | 35 | ||
36 | add_taint(TAINT_MACHINE_CHECK); | 36 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
37 | } | 37 | } |
38 | 38 | ||
39 | /* Set up machine check reporting for processors with Intel style MCE: */ | 39 | /* Set up machine check reporting for processors with Intel style MCE: */ |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 2d7998fb628c..e9a701aecaa1 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -15,7 +15,7 @@ | |||
15 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 15 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
16 | { | 16 | { |
17 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 17 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
18 | add_taint(TAINT_MACHINE_CHECK); | 18 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
19 | } | 19 | } |
20 | 20 | ||
21 | /* Set up machine check reporting on the Winchip C6 series */ | 21 | /* Set up machine check reporting on the Winchip C6 series */ |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 0a630dd4b620..a7d26d83fb70 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -14,10 +14,15 @@ | |||
14 | #include <linux/time.h> | 14 | #include <linux/time.h> |
15 | #include <linux/clocksource.h> | 15 | #include <linux/clocksource.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/hardirq.h> | ||
18 | #include <linux/interrupt.h> | ||
17 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
18 | #include <asm/hypervisor.h> | 20 | #include <asm/hypervisor.h> |
19 | #include <asm/hyperv.h> | 21 | #include <asm/hyperv.h> |
20 | #include <asm/mshyperv.h> | 22 | #include <asm/mshyperv.h> |
23 | #include <asm/desc.h> | ||
24 | #include <asm/idle.h> | ||
25 | #include <asm/irq_regs.h> | ||
21 | 26 | ||
22 | struct ms_hyperv_info ms_hyperv; | 27 | struct ms_hyperv_info ms_hyperv; |
23 | EXPORT_SYMBOL_GPL(ms_hyperv); | 28 | EXPORT_SYMBOL_GPL(ms_hyperv); |
@@ -30,6 +35,13 @@ static bool __init ms_hyperv_platform(void) | |||
30 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | 35 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) |
31 | return false; | 36 | return false; |
32 | 37 | ||
38 | /* | ||
39 | * Xen emulates Hyper-V to support enlightened Windows. | ||
40 | * Check to see first if we are on a Xen Hypervisor. | ||
41 | */ | ||
42 | if (xen_cpuid_base()) | ||
43 | return false; | ||
44 | |||
33 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | 45 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, |
34 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | 46 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); |
35 | 47 | ||
@@ -68,7 +80,14 @@ static void __init ms_hyperv_init_platform(void) | |||
68 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | 80 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", |
69 | ms_hyperv.features, ms_hyperv.hints); | 81 | ms_hyperv.features, ms_hyperv.hints); |
70 | 82 | ||
71 | clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); | 83 | if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) |
84 | clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); | ||
85 | #if IS_ENABLED(CONFIG_HYPERV) | ||
86 | /* | ||
87 | * Setup the IDT for hypervisor callback. | ||
88 | */ | ||
89 | alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); | ||
90 | #endif | ||
72 | } | 91 | } |
73 | 92 | ||
74 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | 93 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { |
@@ -77,3 +96,36 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | |||
77 | .init_platform = ms_hyperv_init_platform, | 96 | .init_platform = ms_hyperv_init_platform, |
78 | }; | 97 | }; |
79 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | 98 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); |
99 | |||
100 | #if IS_ENABLED(CONFIG_HYPERV) | ||
101 | static int vmbus_irq = -1; | ||
102 | static irq_handler_t vmbus_isr; | ||
103 | |||
104 | void hv_register_vmbus_handler(int irq, irq_handler_t handler) | ||
105 | { | ||
106 | vmbus_irq = irq; | ||
107 | vmbus_isr = handler; | ||
108 | } | ||
109 | |||
110 | void hyperv_vector_handler(struct pt_regs *regs) | ||
111 | { | ||
112 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
113 | struct irq_desc *desc; | ||
114 | |||
115 | irq_enter(); | ||
116 | exit_idle(); | ||
117 | |||
118 | desc = irq_to_desc(vmbus_irq); | ||
119 | |||
120 | if (desc) | ||
121 | generic_handle_irq_desc(vmbus_irq, desc); | ||
122 | |||
123 | irq_exit(); | ||
124 | set_irq_regs(old_regs); | ||
125 | } | ||
126 | #else | ||
127 | void hv_register_vmbus_handler(int irq, irq_handler_t handler) | ||
128 | { | ||
129 | } | ||
130 | #endif | ||
131 | EXPORT_SYMBOL_GPL(hv_register_vmbus_handler); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e9fe907cd249..fa72a39e5d46 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
542 | 542 | ||
543 | if (tmp != mask_lo) { | 543 | if (tmp != mask_lo) { |
544 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); | 544 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); |
545 | add_taint(TAINT_FIRMWARE_WORKAROUND); | 545 | add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); |
546 | mask_lo = tmp; | 546 | mask_lo = tmp; |
547 | } | 547 | } |
548 | } | 548 | } |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4428fd178bce..bf0f01aea994 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event) | |||
340 | /* BTS is currently only allowed for user-mode. */ | 340 | /* BTS is currently only allowed for user-mode. */ |
341 | if (!attr->exclude_kernel) | 341 | if (!attr->exclude_kernel) |
342 | return -EOPNOTSUPP; | 342 | return -EOPNOTSUPP; |
343 | |||
344 | if (!attr->exclude_guest) | ||
345 | return -EOPNOTSUPP; | ||
346 | } | 343 | } |
347 | 344 | ||
348 | hwc->config |= config; | 345 | hwc->config |= config; |
@@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
385 | if (event->attr.precise_ip) { | 382 | if (event->attr.precise_ip) { |
386 | int precise = 0; | 383 | int precise = 0; |
387 | 384 | ||
388 | if (!event->attr.exclude_guest) | ||
389 | return -EOPNOTSUPP; | ||
390 | |||
391 | /* Support for constant skid */ | 385 | /* Support for constant skid */ |
392 | if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { | 386 | if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { |
393 | precise++; | 387 | precise++; |
@@ -835,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, | |||
835 | } else { | 829 | } else { |
836 | hwc->config_base = x86_pmu_config_addr(hwc->idx); | 830 | hwc->config_base = x86_pmu_config_addr(hwc->idx); |
837 | hwc->event_base = x86_pmu_event_addr(hwc->idx); | 831 | hwc->event_base = x86_pmu_event_addr(hwc->idx); |
838 | hwc->event_base_rdpmc = hwc->idx; | 832 | hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); |
839 | } | 833 | } |
840 | } | 834 | } |
841 | 835 | ||
@@ -1316,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = { | |||
1316 | .attrs = NULL, | 1310 | .attrs = NULL, |
1317 | }; | 1311 | }; |
1318 | 1312 | ||
1319 | struct perf_pmu_events_attr { | ||
1320 | struct device_attribute attr; | ||
1321 | u64 id; | ||
1322 | }; | ||
1323 | |||
1324 | /* | 1313 | /* |
1325 | * Remove all undefined events (x86_pmu.event_map(id) == 0) | 1314 | * Remove all undefined events (x86_pmu.event_map(id) == 0) |
1326 | * out of events_attr attributes. | 1315 | * out of events_attr attributes. |
@@ -1354,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at | |||
1354 | #define EVENT_VAR(_id) event_attr_##_id | 1343 | #define EVENT_VAR(_id) event_attr_##_id |
1355 | #define EVENT_PTR(_id) &event_attr_##_id.attr.attr | 1344 | #define EVENT_PTR(_id) &event_attr_##_id.attr.attr |
1356 | 1345 | ||
1357 | #define EVENT_ATTR(_name, _id) \ | 1346 | #define EVENT_ATTR(_name, _id) \ |
1358 | static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ | 1347 | PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ |
1359 | .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ | 1348 | events_sysfs_show) |
1360 | .id = PERF_COUNT_HW_##_id, \ | ||
1361 | }; | ||
1362 | 1349 | ||
1363 | EVENT_ATTR(cpu-cycles, CPU_CYCLES ); | 1350 | EVENT_ATTR(cpu-cycles, CPU_CYCLES ); |
1364 | EVENT_ATTR(instructions, INSTRUCTIONS ); | 1351 | EVENT_ATTR(instructions, INSTRUCTIONS ); |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 115c1ea97746..7f5c75c2afdd 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -325,6 +325,8 @@ struct x86_pmu { | |||
325 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | 325 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); |
326 | unsigned eventsel; | 326 | unsigned eventsel; |
327 | unsigned perfctr; | 327 | unsigned perfctr; |
328 | int (*addr_offset)(int index, bool eventsel); | ||
329 | int (*rdpmc_index)(int index); | ||
328 | u64 (*event_map)(int); | 330 | u64 (*event_map)(int); |
329 | int max_events; | 331 | int max_events; |
330 | int num_counters; | 332 | int num_counters; |
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs | |||
446 | 448 | ||
447 | u64 x86_perf_event_update(struct perf_event *event); | 449 | u64 x86_perf_event_update(struct perf_event *event); |
448 | 450 | ||
449 | static inline int x86_pmu_addr_offset(int index) | 451 | static inline unsigned int x86_pmu_config_addr(int index) |
450 | { | 452 | { |
451 | int offset; | 453 | return x86_pmu.eventsel + (x86_pmu.addr_offset ? |
452 | 454 | x86_pmu.addr_offset(index, true) : index); | |
453 | /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ | ||
454 | alternative_io(ASM_NOP2, | ||
455 | "shll $1, %%eax", | ||
456 | X86_FEATURE_PERFCTR_CORE, | ||
457 | "=a" (offset), | ||
458 | "a" (index)); | ||
459 | |||
460 | return offset; | ||
461 | } | 455 | } |
462 | 456 | ||
463 | static inline unsigned int x86_pmu_config_addr(int index) | 457 | static inline unsigned int x86_pmu_event_addr(int index) |
464 | { | 458 | { |
465 | return x86_pmu.eventsel + x86_pmu_addr_offset(index); | 459 | return x86_pmu.perfctr + (x86_pmu.addr_offset ? |
460 | x86_pmu.addr_offset(index, false) : index); | ||
466 | } | 461 | } |
467 | 462 | ||
468 | static inline unsigned int x86_pmu_event_addr(int index) | 463 | static inline int x86_pmu_rdpmc_index(int index) |
469 | { | 464 | { |
470 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | 465 | return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; |
471 | } | 466 | } |
472 | 467 | ||
473 | int x86_setup_perfctr(struct perf_event *event); | 468 | int x86_setup_perfctr(struct perf_event *event); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c93bc4e813a0..dfdab42aed27 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event) | |||
132 | return amd_perfmon_event_map[hw_event]; | 132 | return amd_perfmon_event_map[hw_event]; |
133 | } | 133 | } |
134 | 134 | ||
135 | static int amd_pmu_hw_config(struct perf_event *event) | 135 | static struct event_constraint *amd_nb_event_constraint; |
136 | |||
137 | /* | ||
138 | * Previously calculated offsets | ||
139 | */ | ||
140 | static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; | ||
141 | static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; | ||
142 | static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; | ||
143 | |||
144 | /* | ||
145 | * Legacy CPUs: | ||
146 | * 4 counters starting at 0xc0010000 each offset by 1 | ||
147 | * | ||
148 | * CPUs with core performance counter extensions: | ||
149 | * 6 counters starting at 0xc0010200 each offset by 2 | ||
150 | * | ||
151 | * CPUs with north bridge performance counter extensions: | ||
152 | * 4 additional counters starting at 0xc0010240 each offset by 2 | ||
153 | * (indexed right above either one of the above core counters) | ||
154 | */ | ||
155 | static inline int amd_pmu_addr_offset(int index, bool eventsel) | ||
136 | { | 156 | { |
137 | int ret; | 157 | int offset, first, base; |
138 | 158 | ||
139 | /* pass precise event sampling to ibs: */ | 159 | if (!index) |
140 | if (event->attr.precise_ip && get_ibs_caps()) | 160 | return index; |
141 | return -ENOENT; | 161 | |
162 | if (eventsel) | ||
163 | offset = event_offsets[index]; | ||
164 | else | ||
165 | offset = count_offsets[index]; | ||
166 | |||
167 | if (offset) | ||
168 | return offset; | ||
169 | |||
170 | if (amd_nb_event_constraint && | ||
171 | test_bit(index, amd_nb_event_constraint->idxmsk)) { | ||
172 | /* | ||
173 | * calculate the offset of NB counters with respect to | ||
174 | * base eventsel or perfctr | ||
175 | */ | ||
176 | |||
177 | first = find_first_bit(amd_nb_event_constraint->idxmsk, | ||
178 | X86_PMC_IDX_MAX); | ||
179 | |||
180 | if (eventsel) | ||
181 | base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; | ||
182 | else | ||
183 | base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; | ||
184 | |||
185 | offset = base + ((index - first) << 1); | ||
186 | } else if (!cpu_has_perfctr_core) | ||
187 | offset = index; | ||
188 | else | ||
189 | offset = index << 1; | ||
190 | |||
191 | if (eventsel) | ||
192 | event_offsets[index] = offset; | ||
193 | else | ||
194 | count_offsets[index] = offset; | ||
195 | |||
196 | return offset; | ||
197 | } | ||
198 | |||
199 | static inline int amd_pmu_rdpmc_index(int index) | ||
200 | { | ||
201 | int ret, first; | ||
202 | |||
203 | if (!index) | ||
204 | return index; | ||
205 | |||
206 | ret = rdpmc_indexes[index]; | ||
142 | 207 | ||
143 | ret = x86_pmu_hw_config(event); | ||
144 | if (ret) | 208 | if (ret) |
145 | return ret; | 209 | return ret; |
146 | 210 | ||
147 | if (has_branch_stack(event)) | 211 | if (amd_nb_event_constraint && |
148 | return -EOPNOTSUPP; | 212 | test_bit(index, amd_nb_event_constraint->idxmsk)) { |
213 | /* | ||
214 | * according to the mnual, ECX value of the NB counters is | ||
215 | * the index of the NB counter (0, 1, 2 or 3) plus 6 | ||
216 | */ | ||
217 | |||
218 | first = find_first_bit(amd_nb_event_constraint->idxmsk, | ||
219 | X86_PMC_IDX_MAX); | ||
220 | ret = index - first + 6; | ||
221 | } else | ||
222 | ret = index; | ||
223 | |||
224 | rdpmc_indexes[index] = ret; | ||
225 | |||
226 | return ret; | ||
227 | } | ||
149 | 228 | ||
229 | static int amd_core_hw_config(struct perf_event *event) | ||
230 | { | ||
150 | if (event->attr.exclude_host && event->attr.exclude_guest) | 231 | if (event->attr.exclude_host && event->attr.exclude_guest) |
151 | /* | 232 | /* |
152 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | 233 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
156 | event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | | 237 | event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | |
157 | ARCH_PERFMON_EVENTSEL_OS); | 238 | ARCH_PERFMON_EVENTSEL_OS); |
158 | else if (event->attr.exclude_host) | 239 | else if (event->attr.exclude_host) |
159 | event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; | 240 | event->hw.config |= AMD64_EVENTSEL_GUESTONLY; |
160 | else if (event->attr.exclude_guest) | 241 | else if (event->attr.exclude_guest) |
161 | event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; | 242 | event->hw.config |= AMD64_EVENTSEL_HOSTONLY; |
243 | |||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * NB counters do not support the following event select bits: | ||
249 | * Host/Guest only | ||
250 | * Counter mask | ||
251 | * Invert counter mask | ||
252 | * Edge detect | ||
253 | * OS/User mode | ||
254 | */ | ||
255 | static int amd_nb_hw_config(struct perf_event *event) | ||
256 | { | ||
257 | /* for NB, we only allow system wide counting mode */ | ||
258 | if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) | ||
259 | return -EINVAL; | ||
260 | |||
261 | if (event->attr.exclude_user || event->attr.exclude_kernel || | ||
262 | event->attr.exclude_host || event->attr.exclude_guest) | ||
263 | return -EINVAL; | ||
162 | 264 | ||
163 | if (event->attr.type != PERF_TYPE_RAW) | 265 | event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | |
164 | return 0; | 266 | ARCH_PERFMON_EVENTSEL_OS); |
165 | 267 | ||
166 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; | 268 | if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | |
269 | ARCH_PERFMON_EVENTSEL_INT)) | ||
270 | return -EINVAL; | ||
167 | 271 | ||
168 | return 0; | 272 | return 0; |
169 | } | 273 | } |
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) | |||
181 | return (hwc->config & 0xe0) == 0xe0; | 285 | return (hwc->config & 0xe0) == 0xe0; |
182 | } | 286 | } |
183 | 287 | ||
288 | static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) | ||
289 | { | ||
290 | return amd_nb_event_constraint && amd_is_nb_event(hwc); | ||
291 | } | ||
292 | |||
184 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) | 293 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) |
185 | { | 294 | { |
186 | struct amd_nb *nb = cpuc->amd_nb; | 295 | struct amd_nb *nb = cpuc->amd_nb; |
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc) | |||
188 | return nb && nb->nb_id != -1; | 297 | return nb && nb->nb_id != -1; |
189 | } | 298 | } |
190 | 299 | ||
191 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | 300 | static int amd_pmu_hw_config(struct perf_event *event) |
192 | struct perf_event *event) | 301 | { |
302 | int ret; | ||
303 | |||
304 | /* pass precise event sampling to ibs: */ | ||
305 | if (event->attr.precise_ip && get_ibs_caps()) | ||
306 | return -ENOENT; | ||
307 | |||
308 | if (has_branch_stack(event)) | ||
309 | return -EOPNOTSUPP; | ||
310 | |||
311 | ret = x86_pmu_hw_config(event); | ||
312 | if (ret) | ||
313 | return ret; | ||
314 | |||
315 | if (event->attr.type == PERF_TYPE_RAW) | ||
316 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; | ||
317 | |||
318 | if (amd_is_perfctr_nb_event(&event->hw)) | ||
319 | return amd_nb_hw_config(event); | ||
320 | |||
321 | return amd_core_hw_config(event); | ||
322 | } | ||
323 | |||
324 | static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, | ||
325 | struct perf_event *event) | ||
193 | { | 326 | { |
194 | struct hw_perf_event *hwc = &event->hw; | ||
195 | struct amd_nb *nb = cpuc->amd_nb; | 327 | struct amd_nb *nb = cpuc->amd_nb; |
196 | int i; | 328 | int i; |
197 | 329 | ||
198 | /* | 330 | /* |
199 | * only care about NB events | ||
200 | */ | ||
201 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | ||
202 | return; | ||
203 | |||
204 | /* | ||
205 | * need to scan whole list because event may not have | 331 | * need to scan whole list because event may not have |
206 | * been assigned during scheduling | 332 | * been assigned during scheduling |
207 | * | 333 | * |
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
215 | } | 341 | } |
216 | } | 342 | } |
217 | 343 | ||
344 | static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc) | ||
345 | { | ||
346 | int core_id = cpu_data(smp_processor_id()).cpu_core_id; | ||
347 | |||
348 | /* deliver interrupts only to this core */ | ||
349 | if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) { | ||
350 | hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE; | ||
351 | hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK; | ||
352 | hwc->config |= (u64)(core_id) << | ||
353 | AMD64_EVENTSEL_INT_CORE_SEL_SHIFT; | ||
354 | } | ||
355 | } | ||
356 | |||
218 | /* | 357 | /* |
219 | * AMD64 NorthBridge events need special treatment because | 358 | * AMD64 NorthBridge events need special treatment because |
220 | * counter access needs to be synchronized across all cores | 359 | * counter access needs to be synchronized across all cores |
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
247 | * | 386 | * |
248 | * Given that resources are allocated (cmpxchg), they must be | 387 | * Given that resources are allocated (cmpxchg), they must be |
249 | * eventually freed for others to use. This is accomplished by | 388 | * eventually freed for others to use. This is accomplished by |
250 | * calling amd_put_event_constraints(). | 389 | * calling __amd_put_nb_event_constraints() |
251 | * | 390 | * |
252 | * Non NB events are not impacted by this restriction. | 391 | * Non NB events are not impacted by this restriction. |
253 | */ | 392 | */ |
254 | static struct event_constraint * | 393 | static struct event_constraint * |
255 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 394 | __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, |
395 | struct event_constraint *c) | ||
256 | { | 396 | { |
257 | struct hw_perf_event *hwc = &event->hw; | 397 | struct hw_perf_event *hwc = &event->hw; |
258 | struct amd_nb *nb = cpuc->amd_nb; | 398 | struct amd_nb *nb = cpuc->amd_nb; |
259 | struct perf_event *old = NULL; | 399 | struct perf_event *old; |
260 | int max = x86_pmu.num_counters; | 400 | int idx, new = -1; |
261 | int i, j, k = -1; | ||
262 | 401 | ||
263 | /* | 402 | if (!c) |
264 | * if not NB event or no NB, then no constraints | 403 | c = &unconstrained; |
265 | */ | 404 | |
266 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | 405 | if (cpuc->is_fake) |
267 | return &unconstrained; | 406 | return c; |
268 | 407 | ||
269 | /* | 408 | /* |
270 | * detect if already present, if so reuse | 409 | * detect if already present, if so reuse |
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
276 | * because of successive calls to x86_schedule_events() from | 415 | * because of successive calls to x86_schedule_events() from |
277 | * hw_perf_group_sched_in() without hw_perf_enable() | 416 | * hw_perf_group_sched_in() without hw_perf_enable() |
278 | */ | 417 | */ |
279 | for (i = 0; i < max; i++) { | 418 | for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { |
280 | /* | 419 | if (new == -1 || hwc->idx == idx) |
281 | * keep track of first free slot | 420 | /* assign free slot, prefer hwc->idx */ |
282 | */ | 421 | old = cmpxchg(nb->owners + idx, NULL, event); |
283 | if (k == -1 && !nb->owners[i]) | 422 | else if (nb->owners[idx] == event) |
284 | k = i; | 423 | /* event already present */ |
424 | old = event; | ||
425 | else | ||
426 | continue; | ||
427 | |||
428 | if (old && old != event) | ||
429 | continue; | ||
430 | |||
431 | /* reassign to this slot */ | ||
432 | if (new != -1) | ||
433 | cmpxchg(nb->owners + new, event, NULL); | ||
434 | new = idx; | ||
285 | 435 | ||
286 | /* already present, reuse */ | 436 | /* already present, reuse */ |
287 | if (nb->owners[i] == event) | 437 | if (old == event) |
288 | goto done; | ||
289 | } | ||
290 | /* | ||
291 | * not present, so grab a new slot | ||
292 | * starting either at: | ||
293 | */ | ||
294 | if (hwc->idx != -1) { | ||
295 | /* previous assignment */ | ||
296 | i = hwc->idx; | ||
297 | } else if (k != -1) { | ||
298 | /* start from free slot found */ | ||
299 | i = k; | ||
300 | } else { | ||
301 | /* | ||
302 | * event not found, no slot found in | ||
303 | * first pass, try again from the | ||
304 | * beginning | ||
305 | */ | ||
306 | i = 0; | ||
307 | } | ||
308 | j = i; | ||
309 | do { | ||
310 | old = cmpxchg(nb->owners+i, NULL, event); | ||
311 | if (!old) | ||
312 | break; | 438 | break; |
313 | if (++i == max) | 439 | } |
314 | i = 0; | 440 | |
315 | } while (i != j); | 441 | if (new == -1) |
316 | done: | 442 | return &emptyconstraint; |
317 | if (!old) | 443 | |
318 | return &nb->event_constraints[i]; | 444 | if (amd_is_perfctr_nb_event(hwc)) |
319 | 445 | amd_nb_interrupt_hw_config(hwc); | |
320 | return &emptyconstraint; | 446 | |
447 | return &nb->event_constraints[new]; | ||
321 | } | 448 | } |
322 | 449 | ||
323 | static struct amd_nb *amd_alloc_nb(int cpu) | 450 | static struct amd_nb *amd_alloc_nb(int cpu) |
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu) | |||
364 | struct amd_nb *nb; | 491 | struct amd_nb *nb; |
365 | int i, nb_id; | 492 | int i, nb_id; |
366 | 493 | ||
367 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; | 494 | cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; |
368 | 495 | ||
369 | if (boot_cpu_data.x86_max_cores < 2) | 496 | if (boot_cpu_data.x86_max_cores < 2) |
370 | return; | 497 | return; |
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu) | |||
407 | } | 534 | } |
408 | } | 535 | } |
409 | 536 | ||
537 | static struct event_constraint * | ||
538 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
539 | { | ||
540 | /* | ||
541 | * if not NB event or no NB, then no constraints | ||
542 | */ | ||
543 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) | ||
544 | return &unconstrained; | ||
545 | |||
546 | return __amd_get_nb_event_constraints(cpuc, event, | ||
547 | amd_nb_event_constraint); | ||
548 | } | ||
549 | |||
550 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | ||
551 | struct perf_event *event) | ||
552 | { | ||
553 | if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) | ||
554 | __amd_put_nb_event_constraints(cpuc, event); | ||
555 | } | ||
556 | |||
410 | PMU_FORMAT_ATTR(event, "config:0-7,32-35"); | 557 | PMU_FORMAT_ATTR(event, "config:0-7,32-35"); |
411 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | 558 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
412 | PMU_FORMAT_ATTR(edge, "config:18" ); | 559 | PMU_FORMAT_ATTR(edge, "config:18" ); |
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, | |||
496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | 643 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | 644 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
498 | 645 | ||
646 | static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0); | ||
647 | static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0); | ||
648 | |||
499 | static struct event_constraint * | 649 | static struct event_constraint * |
500 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) | 650 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) |
501 | { | 651 | { |
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev | |||
561 | return &amd_f15_PMC20; | 711 | return &amd_f15_PMC20; |
562 | } | 712 | } |
563 | case AMD_EVENT_NB: | 713 | case AMD_EVENT_NB: |
564 | /* not yet implemented */ | 714 | return __amd_get_nb_event_constraints(cpuc, event, |
565 | return &emptyconstraint; | 715 | amd_nb_event_constraint); |
566 | default: | 716 | default: |
567 | return &emptyconstraint; | 717 | return &emptyconstraint; |
568 | } | 718 | } |
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
587 | .schedule_events = x86_schedule_events, | 737 | .schedule_events = x86_schedule_events, |
588 | .eventsel = MSR_K7_EVNTSEL0, | 738 | .eventsel = MSR_K7_EVNTSEL0, |
589 | .perfctr = MSR_K7_PERFCTR0, | 739 | .perfctr = MSR_K7_PERFCTR0, |
740 | .addr_offset = amd_pmu_addr_offset, | ||
741 | .rdpmc_index = amd_pmu_rdpmc_index, | ||
590 | .event_map = amd_pmu_event_map, | 742 | .event_map = amd_pmu_event_map, |
591 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 743 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
592 | .num_counters = AMD64_NUM_COUNTERS, | 744 | .num_counters = AMD64_NUM_COUNTERS, |
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
608 | 760 | ||
609 | static int setup_event_constraints(void) | 761 | static int setup_event_constraints(void) |
610 | { | 762 | { |
611 | if (boot_cpu_data.x86 >= 0x15) | 763 | if (boot_cpu_data.x86 == 0x15) |
612 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; | 764 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; |
613 | return 0; | 765 | return 0; |
614 | } | 766 | } |
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void) | |||
638 | return 0; | 790 | return 0; |
639 | } | 791 | } |
640 | 792 | ||
793 | static int setup_perfctr_nb(void) | ||
794 | { | ||
795 | if (!cpu_has_perfctr_nb) | ||
796 | return -ENODEV; | ||
797 | |||
798 | x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB; | ||
799 | |||
800 | if (cpu_has_perfctr_core) | ||
801 | amd_nb_event_constraint = &amd_NBPMC96; | ||
802 | else | ||
803 | amd_nb_event_constraint = &amd_NBPMC74; | ||
804 | |||
805 | printk(KERN_INFO "perf: AMD northbridge performance counters detected\n"); | ||
806 | |||
807 | return 0; | ||
808 | } | ||
809 | |||
641 | __init int amd_pmu_init(void) | 810 | __init int amd_pmu_init(void) |
642 | { | 811 | { |
643 | /* Performance-monitoring supported from K7 and later: */ | 812 | /* Performance-monitoring supported from K7 and later: */ |
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void) | |||
648 | 817 | ||
649 | setup_event_constraints(); | 818 | setup_event_constraints(); |
650 | setup_perfctr_core(); | 819 | setup_perfctr_core(); |
820 | setup_perfctr_nb(); | ||
651 | 821 | ||
652 | /* Events are common for all AMDs */ | 822 | /* Events are common for all AMDs */ |
653 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | 823 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void) | |||
678 | * SVM is disabled the Guest-only bits still gets set and the counter | 848 | * SVM is disabled the Guest-only bits still gets set and the counter |
679 | * will not count anything. | 849 | * will not count anything. |
680 | */ | 850 | */ |
681 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; | 851 | cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; |
682 | 852 | ||
683 | /* Reload all events */ | 853 | /* Reload all events */ |
684 | x86_pmu_disable_all(); | 854 | x86_pmu_disable_all(); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 6336bcbd0618..5f0581e713c2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -528,7 +528,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | |||
528 | if (!test_bit(IBS_STARTED, pcpu->state)) { | 528 | if (!test_bit(IBS_STARTED, pcpu->state)) { |
529 | /* | 529 | /* |
530 | * Catch spurious interrupts after stopping IBS: After | 530 | * Catch spurious interrupts after stopping IBS: After |
531 | * disabling IBS there could be still incomming NMIs | 531 | * disabling IBS there could be still incoming NMIs |
532 | * with samples that even have the valid bit cleared. | 532 | * with samples that even have the valid bit cleared. |
533 | * Mark all this NMIs as handled. | 533 | * Mark all this NMIs as handled. |
534 | */ | 534 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 93b9e1181f83..529c8931fc02 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -107,6 +107,27 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
107 | EVENT_CONSTRAINT_END | 107 | EVENT_CONSTRAINT_END |
108 | }; | 108 | }; |
109 | 109 | ||
110 | static struct event_constraint intel_ivb_event_constraints[] __read_mostly = | ||
111 | { | ||
112 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
113 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
114 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ | ||
115 | INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ | ||
116 | INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ | ||
117 | INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ | ||
118 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ | ||
119 | INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ | ||
120 | INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ | ||
121 | INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ | ||
122 | INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ | ||
123 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | ||
124 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ | ||
125 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | ||
126 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | ||
127 | INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ | ||
128 | EVENT_CONSTRAINT_END | ||
129 | }; | ||
130 | |||
110 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | 131 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
111 | { | 132 | { |
112 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 133 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
@@ -2019,7 +2040,10 @@ __init int intel_pmu_init(void) | |||
2019 | break; | 2040 | break; |
2020 | 2041 | ||
2021 | case 28: /* Atom */ | 2042 | case 28: /* Atom */ |
2022 | case 54: /* Cedariew */ | 2043 | case 38: /* Lincroft */ |
2044 | case 39: /* Penwell */ | ||
2045 | case 53: /* Cloverview */ | ||
2046 | case 54: /* Cedarview */ | ||
2023 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 2047 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
2024 | sizeof(hw_cache_event_ids)); | 2048 | sizeof(hw_cache_event_ids)); |
2025 | 2049 | ||
@@ -2084,6 +2108,7 @@ __init int intel_pmu_init(void) | |||
2084 | pr_cont("SandyBridge events, "); | 2108 | pr_cont("SandyBridge events, "); |
2085 | break; | 2109 | break; |
2086 | case 58: /* IvyBridge */ | 2110 | case 58: /* IvyBridge */ |
2111 | case 62: /* IvyBridge EP */ | ||
2087 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 2112 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
2088 | sizeof(hw_cache_event_ids)); | 2113 | sizeof(hw_cache_event_ids)); |
2089 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, | 2114 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, |
@@ -2091,7 +2116,7 @@ __init int intel_pmu_init(void) | |||
2091 | 2116 | ||
2092 | intel_pmu_lbr_init_snb(); | 2117 | intel_pmu_lbr_init_snb(); |
2093 | 2118 | ||
2094 | x86_pmu.event_constraints = intel_snb_event_constraints; | 2119 | x86_pmu.event_constraints = intel_ivb_event_constraints; |
2095 | x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; | 2120 | x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; |
2096 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | 2121 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; |
2097 | x86_pmu.extra_regs = intel_snb_extra_regs; | 2122 | x86_pmu.extra_regs = intel_snb_extra_regs; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 3cf3d97cce3a..b43200dbfe7e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -2500,7 +2500,7 @@ static bool pcidrv_registered; | |||
2500 | /* | 2500 | /* |
2501 | * add a pci uncore device | 2501 | * add a pci uncore device |
2502 | */ | 2502 | */ |
2503 | static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) | 2503 | static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) |
2504 | { | 2504 | { |
2505 | struct intel_uncore_pmu *pmu; | 2505 | struct intel_uncore_pmu *pmu; |
2506 | struct intel_uncore_box *box; | 2506 | struct intel_uncore_box *box; |
@@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev) | |||
2571 | kfree(box); | 2571 | kfree(box); |
2572 | } | 2572 | } |
2573 | 2573 | ||
2574 | static int __devinit uncore_pci_probe(struct pci_dev *pdev, | 2574 | static int uncore_pci_probe(struct pci_dev *pdev, |
2575 | const struct pci_device_id *id) | 2575 | const struct pci_device_id *id) |
2576 | { | 2576 | { |
2577 | struct intel_uncore_type *type; | 2577 | struct intel_uncore_type *type; |
2578 | 2578 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index f2af39f5dc3d..4820c232a0b9 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -19,7 +19,7 @@ static const u64 p6_perfmon_event_map[] = | |||
19 | 19 | ||
20 | }; | 20 | }; |
21 | 21 | ||
22 | static __initconst u64 p6_hw_cache_event_ids | 22 | static u64 p6_hw_cache_event_ids |
23 | [PERF_COUNT_HW_CACHE_MAX] | 23 | [PERF_COUNT_HW_CACHE_MAX] |
24 | [PERF_COUNT_HW_CACHE_OP_MAX] | 24 | [PERF_COUNT_HW_CACHE_OP_MAX] |
25 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 25 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index fbd895562292..e280253f6f94 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -26,14 +26,8 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, | |||
26 | #ifdef CONFIG_X86_32 | 26 | #ifdef CONFIG_X86_32 |
27 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | 27 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) |
28 | { | 28 | { |
29 | /* | ||
30 | * We use exception 16 if we have hardware math and we've either seen | ||
31 | * it or the CPU claims it is internal | ||
32 | */ | ||
33 | int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); | ||
34 | seq_printf(m, | 29 | seq_printf(m, |
35 | "fdiv_bug\t: %s\n" | 30 | "fdiv_bug\t: %s\n" |
36 | "hlt_bug\t\t: %s\n" | ||
37 | "f00f_bug\t: %s\n" | 31 | "f00f_bug\t: %s\n" |
38 | "coma_bug\t: %s\n" | 32 | "coma_bug\t: %s\n" |
39 | "fpu\t\t: %s\n" | 33 | "fpu\t\t: %s\n" |
@@ -41,11 +35,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | |||
41 | "cpuid level\t: %d\n" | 35 | "cpuid level\t: %d\n" |
42 | "wp\t\t: %s\n", | 36 | "wp\t\t: %s\n", |
43 | c->fdiv_bug ? "yes" : "no", | 37 | c->fdiv_bug ? "yes" : "no", |
44 | c->hlt_works_ok ? "no" : "yes", | ||
45 | c->f00f_bug ? "yes" : "no", | 38 | c->f00f_bug ? "yes" : "no", |
46 | c->coma_bug ? "yes" : "no", | 39 | c->coma_bug ? "yes" : "no", |
47 | c->hard_math ? "yes" : "no", | 40 | c->hard_math ? "yes" : "no", |
48 | fpu_exception ? "yes" : "no", | 41 | c->hard_math ? "yes" : "no", |
49 | c->cpuid_level, | 42 | c->cpuid_level, |
50 | c->wp_works_ok ? "yes" : "no"); | 43 | c->wp_works_ok ? "yes" : "no"); |
51 | } | 44 | } |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d22d0c4edcfd..03a36321ec54 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -33,6 +33,9 @@ | |||
33 | 33 | ||
34 | #define VMWARE_PORT_CMD_GETVERSION 10 | 34 | #define VMWARE_PORT_CMD_GETVERSION 10 |
35 | #define VMWARE_PORT_CMD_GETHZ 45 | 35 | #define VMWARE_PORT_CMD_GETHZ 45 |
36 | #define VMWARE_PORT_CMD_GETVCPU_INFO 68 | ||
37 | #define VMWARE_PORT_CMD_LEGACY_X2APIC 3 | ||
38 | #define VMWARE_PORT_CMD_VCPU_RESERVED 31 | ||
36 | 39 | ||
37 | #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ | 40 | #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ |
38 | __asm__("inl (%%dx)" : \ | 41 | __asm__("inl (%%dx)" : \ |
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) | |||
125 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 128 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
126 | } | 129 | } |
127 | 130 | ||
131 | /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ | ||
132 | static bool __init vmware_legacy_x2apic_available(void) | ||
133 | { | ||
134 | uint32_t eax, ebx, ecx, edx; | ||
135 | VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx); | ||
136 | return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 && | ||
137 | (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; | ||
138 | } | ||
139 | |||
128 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | 140 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { |
129 | .name = "VMware", | 141 | .name = "VMware", |
130 | .detect = vmware_platform, | 142 | .detect = vmware_platform, |
131 | .set_cpu_features = vmware_set_cpu_features, | 143 | .set_cpu_features = vmware_set_cpu_features, |
132 | .init_platform = vmware_platform_setup, | 144 | .init_platform = vmware_platform_setup, |
145 | .x2apic_available = vmware_legacy_x2apic_available, | ||
133 | }; | 146 | }; |
134 | EXPORT_SYMBOL(x86_hyper_vmware); | 147 | EXPORT_SYMBOL(x86_hyper_vmware); |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 60c78917190c..1e4dbcfe6d31 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -85,7 +85,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
85 | { | 85 | { |
86 | char __user *tmp = buf; | 86 | char __user *tmp = buf; |
87 | struct cpuid_regs cmd; | 87 | struct cpuid_regs cmd; |
88 | int cpu = iminor(file->f_path.dentry->d_inode); | 88 | int cpu = iminor(file_inode(file)); |
89 | u64 pos = *ppos; | 89 | u64 pos = *ppos; |
90 | ssize_t bytes = 0; | 90 | ssize_t bytes = 0; |
91 | int err = 0; | 91 | int err = 0; |
@@ -116,7 +116,7 @@ static int cpuid_open(struct inode *inode, struct file *file) | |||
116 | unsigned int cpu; | 116 | unsigned int cpu; |
117 | struct cpuinfo_x86 *c; | 117 | struct cpuinfo_x86 *c; |
118 | 118 | ||
119 | cpu = iminor(file->f_path.dentry->d_inode); | 119 | cpu = iminor(file_inode(file)); |
120 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) | 120 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) |
121 | return -ENXIO; /* No such CPU */ | 121 | return -ENXIO; /* No such CPU */ |
122 | 122 | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ae42418bc50f..c8797d55b245 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | |||
232 | 232 | ||
233 | bust_spinlocks(0); | 233 | bust_spinlocks(0); |
234 | die_owner = -1; | 234 | die_owner = -1; |
235 | add_taint(TAINT_DIE); | 235 | add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); |
236 | die_nest_count--; | 236 | die_nest_count--; |
237 | if (!die_nest_count) | 237 | if (!die_nest_count) |
238 | /* Nest count reaches zero, release the lock. */ | 238 | /* Nest count reaches zero, release the lock. */ |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index df06ade26bef..d32abeabbda5 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -835,7 +835,7 @@ static int __init parse_memopt(char *p) | |||
835 | } | 835 | } |
836 | early_param("mem", parse_memopt); | 836 | early_param("mem", parse_memopt); |
837 | 837 | ||
838 | static int __init parse_memmap_opt(char *p) | 838 | static int __init parse_memmap_one(char *p) |
839 | { | 839 | { |
840 | char *oldp; | 840 | char *oldp; |
841 | u64 start_at, mem_size; | 841 | u64 start_at, mem_size; |
@@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p) | |||
877 | 877 | ||
878 | return *p == '\0' ? 0 : -EINVAL; | 878 | return *p == '\0' ? 0 : -EINVAL; |
879 | } | 879 | } |
880 | static int __init parse_memmap_opt(char *str) | ||
881 | { | ||
882 | while (str) { | ||
883 | char *k = strchr(str, ','); | ||
884 | |||
885 | if (k) | ||
886 | *k++ = 0; | ||
887 | |||
888 | parse_memmap_one(str); | ||
889 | str = k; | ||
890 | } | ||
891 | |||
892 | return 0; | ||
893 | } | ||
880 | early_param("memmap", parse_memmap_opt); | 894 | early_param("memmap", parse_memmap_opt); |
881 | 895 | ||
882 | void __init finish_e820_parsing(void) | 896 | void __init finish_e820_parsing(void) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c763116c5359..8f3e2dec1df3 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -699,52 +699,6 @@ END(syscall_badsys) | |||
699 | */ | 699 | */ |
700 | .popsection | 700 | .popsection |
701 | 701 | ||
702 | /* | ||
703 | * System calls that need a pt_regs pointer. | ||
704 | */ | ||
705 | #define PTREGSCALL0(name) \ | ||
706 | ENTRY(ptregs_##name) ; \ | ||
707 | leal 4(%esp),%eax; \ | ||
708 | jmp sys_##name; \ | ||
709 | ENDPROC(ptregs_##name) | ||
710 | |||
711 | #define PTREGSCALL1(name) \ | ||
712 | ENTRY(ptregs_##name) ; \ | ||
713 | leal 4(%esp),%edx; \ | ||
714 | movl (PT_EBX+4)(%esp),%eax; \ | ||
715 | jmp sys_##name; \ | ||
716 | ENDPROC(ptregs_##name) | ||
717 | |||
718 | #define PTREGSCALL2(name) \ | ||
719 | ENTRY(ptregs_##name) ; \ | ||
720 | leal 4(%esp),%ecx; \ | ||
721 | movl (PT_ECX+4)(%esp),%edx; \ | ||
722 | movl (PT_EBX+4)(%esp),%eax; \ | ||
723 | jmp sys_##name; \ | ||
724 | ENDPROC(ptregs_##name) | ||
725 | |||
726 | #define PTREGSCALL3(name) \ | ||
727 | ENTRY(ptregs_##name) ; \ | ||
728 | CFI_STARTPROC; \ | ||
729 | leal 4(%esp),%eax; \ | ||
730 | pushl_cfi %eax; \ | ||
731 | movl PT_EDX(%eax),%ecx; \ | ||
732 | movl PT_ECX(%eax),%edx; \ | ||
733 | movl PT_EBX(%eax),%eax; \ | ||
734 | call sys_##name; \ | ||
735 | addl $4,%esp; \ | ||
736 | CFI_ADJUST_CFA_OFFSET -4; \ | ||
737 | ret; \ | ||
738 | CFI_ENDPROC; \ | ||
739 | ENDPROC(ptregs_##name) | ||
740 | |||
741 | PTREGSCALL1(iopl) | ||
742 | PTREGSCALL2(sigaltstack) | ||
743 | PTREGSCALL0(sigreturn) | ||
744 | PTREGSCALL0(rt_sigreturn) | ||
745 | PTREGSCALL2(vm86) | ||
746 | PTREGSCALL1(vm86old) | ||
747 | |||
748 | .macro FIXUP_ESPFIX_STACK | 702 | .macro FIXUP_ESPFIX_STACK |
749 | /* | 703 | /* |
750 | * Switch back for ESPFIX stack to the normal zerobased stack | 704 | * Switch back for ESPFIX stack to the normal zerobased stack |
@@ -1066,7 +1020,6 @@ ENTRY(xen_failsafe_callback) | |||
1066 | lea 16(%esp),%esp | 1020 | lea 16(%esp),%esp |
1067 | CFI_ADJUST_CFA_OFFSET -16 | 1021 | CFI_ADJUST_CFA_OFFSET -16 |
1068 | jz 5f | 1022 | jz 5f |
1069 | addl $16,%esp | ||
1070 | jmp iret_exc | 1023 | jmp iret_exc |
1071 | 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ | 1024 | 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ |
1072 | SAVE_ALL | 1025 | SAVE_ALL |
@@ -1093,11 +1046,18 @@ ENTRY(xen_failsafe_callback) | |||
1093 | _ASM_EXTABLE(4b,9b) | 1046 | _ASM_EXTABLE(4b,9b) |
1094 | ENDPROC(xen_failsafe_callback) | 1047 | ENDPROC(xen_failsafe_callback) |
1095 | 1048 | ||
1096 | BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, | 1049 | BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, |
1097 | xen_evtchn_do_upcall) | 1050 | xen_evtchn_do_upcall) |
1098 | 1051 | ||
1099 | #endif /* CONFIG_XEN */ | 1052 | #endif /* CONFIG_XEN */ |
1100 | 1053 | ||
1054 | #if IS_ENABLED(CONFIG_HYPERV) | ||
1055 | |||
1056 | BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, | ||
1057 | hyperv_vector_handler) | ||
1058 | |||
1059 | #endif /* CONFIG_HYPERV */ | ||
1060 | |||
1101 | #ifdef CONFIG_FUNCTION_TRACER | 1061 | #ifdef CONFIG_FUNCTION_TRACER |
1102 | #ifdef CONFIG_DYNAMIC_FTRACE | 1062 | #ifdef CONFIG_DYNAMIC_FTRACE |
1103 | 1063 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 70641aff0c25..c1d01e6ca790 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -828,23 +828,6 @@ int_restore_rest: | |||
828 | CFI_ENDPROC | 828 | CFI_ENDPROC |
829 | END(system_call) | 829 | END(system_call) |
830 | 830 | ||
831 | /* | ||
832 | * Certain special system calls that need to save a complete full stack frame. | ||
833 | */ | ||
834 | .macro PTREGSCALL label,func,arg | ||
835 | ENTRY(\label) | ||
836 | PARTIAL_FRAME 1 8 /* offset 8: return address */ | ||
837 | subq $REST_SKIP, %rsp | ||
838 | CFI_ADJUST_CFA_OFFSET REST_SKIP | ||
839 | call save_rest | ||
840 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | ||
841 | leaq 8(%rsp), \arg /* pt_regs pointer */ | ||
842 | call \func | ||
843 | jmp ptregscall_common | ||
844 | CFI_ENDPROC | ||
845 | END(\label) | ||
846 | .endm | ||
847 | |||
848 | .macro FORK_LIKE func | 831 | .macro FORK_LIKE func |
849 | ENTRY(stub_\func) | 832 | ENTRY(stub_\func) |
850 | CFI_STARTPROC | 833 | CFI_STARTPROC |
@@ -861,11 +844,22 @@ ENTRY(stub_\func) | |||
861 | END(stub_\func) | 844 | END(stub_\func) |
862 | .endm | 845 | .endm |
863 | 846 | ||
847 | .macro FIXED_FRAME label,func | ||
848 | ENTRY(\label) | ||
849 | CFI_STARTPROC | ||
850 | PARTIAL_FRAME 0 8 /* offset 8: return address */ | ||
851 | FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET | ||
852 | call \func | ||
853 | RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET | ||
854 | ret | ||
855 | CFI_ENDPROC | ||
856 | END(\label) | ||
857 | .endm | ||
858 | |||
864 | FORK_LIKE clone | 859 | FORK_LIKE clone |
865 | FORK_LIKE fork | 860 | FORK_LIKE fork |
866 | FORK_LIKE vfork | 861 | FORK_LIKE vfork |
867 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx | 862 | FIXED_FRAME stub_iopl, sys_iopl |
868 | PTREGSCALL stub_iopl, sys_iopl, %rsi | ||
869 | 863 | ||
870 | ENTRY(ptregscall_common) | 864 | ENTRY(ptregscall_common) |
871 | DEFAULT_FRAME 1 8 /* offset 8: return address */ | 865 | DEFAULT_FRAME 1 8 /* offset 8: return address */ |
@@ -887,7 +881,6 @@ ENTRY(stub_execve) | |||
887 | SAVE_REST | 881 | SAVE_REST |
888 | FIXUP_TOP_OF_STACK %r11 | 882 | FIXUP_TOP_OF_STACK %r11 |
889 | call sys_execve | 883 | call sys_execve |
890 | RESTORE_TOP_OF_STACK %r11 | ||
891 | movq %rax,RAX(%rsp) | 884 | movq %rax,RAX(%rsp) |
892 | RESTORE_REST | 885 | RESTORE_REST |
893 | jmp int_ret_from_sys_call | 886 | jmp int_ret_from_sys_call |
@@ -903,7 +896,6 @@ ENTRY(stub_rt_sigreturn) | |||
903 | addq $8, %rsp | 896 | addq $8, %rsp |
904 | PARTIAL_FRAME 0 | 897 | PARTIAL_FRAME 0 |
905 | SAVE_REST | 898 | SAVE_REST |
906 | movq %rsp,%rdi | ||
907 | FIXUP_TOP_OF_STACK %r11 | 899 | FIXUP_TOP_OF_STACK %r11 |
908 | call sys_rt_sigreturn | 900 | call sys_rt_sigreturn |
909 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 901 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
@@ -913,14 +905,11 @@ ENTRY(stub_rt_sigreturn) | |||
913 | END(stub_rt_sigreturn) | 905 | END(stub_rt_sigreturn) |
914 | 906 | ||
915 | #ifdef CONFIG_X86_X32_ABI | 907 | #ifdef CONFIG_X86_X32_ABI |
916 | PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx | ||
917 | |||
918 | ENTRY(stub_x32_rt_sigreturn) | 908 | ENTRY(stub_x32_rt_sigreturn) |
919 | CFI_STARTPROC | 909 | CFI_STARTPROC |
920 | addq $8, %rsp | 910 | addq $8, %rsp |
921 | PARTIAL_FRAME 0 | 911 | PARTIAL_FRAME 0 |
922 | SAVE_REST | 912 | SAVE_REST |
923 | movq %rsp,%rdi | ||
924 | FIXUP_TOP_OF_STACK %r11 | 913 | FIXUP_TOP_OF_STACK %r11 |
925 | call sys32_x32_rt_sigreturn | 914 | call sys32_x32_rt_sigreturn |
926 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 915 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
@@ -1457,11 +1446,16 @@ ENTRY(xen_failsafe_callback) | |||
1457 | CFI_ENDPROC | 1446 | CFI_ENDPROC |
1458 | END(xen_failsafe_callback) | 1447 | END(xen_failsafe_callback) |
1459 | 1448 | ||
1460 | apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ | 1449 | apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ |
1461 | xen_hvm_callback_vector xen_evtchn_do_upcall | 1450 | xen_hvm_callback_vector xen_evtchn_do_upcall |
1462 | 1451 | ||
1463 | #endif /* CONFIG_XEN */ | 1452 | #endif /* CONFIG_XEN */ |
1464 | 1453 | ||
1454 | #if IS_ENABLED(CONFIG_HYPERV) | ||
1455 | apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ | ||
1456 | hyperv_callback_vector hyperv_vector_handler | ||
1457 | #endif /* CONFIG_HYPERV */ | ||
1458 | |||
1465 | /* | 1459 | /* |
1466 | * Some functions should be protected against kprobes | 1460 | * Some functions should be protected against kprobes |
1467 | */ | 1461 | */ |
@@ -1784,6 +1778,7 @@ first_nmi: | |||
1784 | * Leave room for the "copied" frame | 1778 | * Leave room for the "copied" frame |
1785 | */ | 1779 | */ |
1786 | subq $(5*8), %rsp | 1780 | subq $(5*8), %rsp |
1781 | CFI_ADJUST_CFA_OFFSET 5*8 | ||
1787 | 1782 | ||
1788 | /* Copy the stack frame to the Saved frame */ | 1783 | /* Copy the stack frame to the Saved frame */ |
1789 | .rept 5 | 1784 | .rept 5 |
@@ -1866,10 +1861,8 @@ end_repeat_nmi: | |||
1866 | nmi_swapgs: | 1861 | nmi_swapgs: |
1867 | SWAPGS_UNSAFE_STACK | 1862 | SWAPGS_UNSAFE_STACK |
1868 | nmi_restore: | 1863 | nmi_restore: |
1869 | RESTORE_ALL 8 | 1864 | /* Pop the extra iret frame at once */ |
1870 | 1865 | RESTORE_ALL 6*8 | |
1871 | /* Pop the extra iret frame */ | ||
1872 | addq $(5*8), %rsp | ||
1873 | 1866 | ||
1874 | /* Clear the NMI executing stack variable */ | 1867 | /* Clear the NMI executing stack variable */ |
1875 | movq $0, 5*8(%rsp) | 1868 | movq $0, 5*8(%rsp) |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1d414029f1d8..42a392a9fd02 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) | |||
89 | * kernel identity mapping to modify code. | 89 | * kernel identity mapping to modify code. |
90 | */ | 90 | */ |
91 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 91 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
92 | ip = (unsigned long)__va(__pa(ip)); | 92 | ip = (unsigned long)__va(__pa_symbol(ip)); |
93 | 93 | ||
94 | return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); | 94 | return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); |
95 | } | 95 | } |
@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size) | |||
279 | * kernel identity mapping to modify code. | 279 | * kernel identity mapping to modify code. |
280 | */ | 280 | */ |
281 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 281 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
282 | ip = (unsigned long)__va(__pa(ip)); | 282 | ip = (unsigned long)__va(__pa_symbol(ip)); |
283 | 283 | ||
284 | return probe_kernel_write((void *)ip, val, size); | 284 | return probe_kernel_write((void *)ip, val, size); |
285 | } | 285 | } |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 48d9d4ea1020..992f442ca155 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -5,8 +5,6 @@ | |||
5 | #include <asm/setup.h> | 5 | #include <asm/setup.h> |
6 | #include <asm/bios_ebda.h> | 6 | #include <asm/bios_ebda.h> |
7 | 7 | ||
8 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
9 | |||
10 | /* | 8 | /* |
11 | * The BIOS places the EBDA/XBDA at the top of conventional | 9 | * The BIOS places the EBDA/XBDA at the top of conventional |
12 | * memory, and usually decreases the reported amount of | 10 | * memory, and usually decreases the reported amount of |
@@ -16,17 +14,30 @@ | |||
16 | * chipset: reserve a page before VGA to prevent PCI prefetch | 14 | * chipset: reserve a page before VGA to prevent PCI prefetch |
17 | * into it (errata #56). Usually the page is reserved anyways, | 15 | * into it (errata #56). Usually the page is reserved anyways, |
18 | * unless you have no PS/2 mouse plugged in. | 16 | * unless you have no PS/2 mouse plugged in. |
17 | * | ||
18 | * This functions is deliberately very conservative. Losing | ||
19 | * memory in the bottom megabyte is rarely a problem, as long | ||
20 | * as we have enough memory to install the trampoline. Using | ||
21 | * memory that is in use by the BIOS or by some DMA device | ||
22 | * the BIOS didn't shut down *is* a big problem. | ||
19 | */ | 23 | */ |
24 | |||
25 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
26 | #define LOWMEM_CAP 0x9f000U /* Absolute maximum */ | ||
27 | #define INSANE_CUTOFF 0x20000U /* Less than this = insane */ | ||
28 | |||
20 | void __init reserve_ebda_region(void) | 29 | void __init reserve_ebda_region(void) |
21 | { | 30 | { |
22 | unsigned int lowmem, ebda_addr; | 31 | unsigned int lowmem, ebda_addr; |
23 | 32 | ||
24 | /* To determine the position of the EBDA and the */ | 33 | /* |
25 | /* end of conventional memory, we need to look at */ | 34 | * To determine the position of the EBDA and the |
26 | /* the BIOS data area. In a paravirtual environment */ | 35 | * end of conventional memory, we need to look at |
27 | /* that area is absent. We'll just have to assume */ | 36 | * the BIOS data area. In a paravirtual environment |
28 | /* that the paravirt case can handle memory setup */ | 37 | * that area is absent. We'll just have to assume |
29 | /* correctly, without our help. */ | 38 | * that the paravirt case can handle memory setup |
39 | * correctly, without our help. | ||
40 | */ | ||
30 | if (paravirt_enabled()) | 41 | if (paravirt_enabled()) |
31 | return; | 42 | return; |
32 | 43 | ||
@@ -37,19 +48,23 @@ void __init reserve_ebda_region(void) | |||
37 | /* start of EBDA area */ | 48 | /* start of EBDA area */ |
38 | ebda_addr = get_bios_ebda(); | 49 | ebda_addr = get_bios_ebda(); |
39 | 50 | ||
40 | /* Fixup: bios puts an EBDA in the top 64K segment */ | 51 | /* |
41 | /* of conventional memory, but does not adjust lowmem. */ | 52 | * Note: some old Dells seem to need 4k EBDA without |
42 | if ((lowmem - ebda_addr) <= 0x10000) | 53 | * reporting so, so just consider the memory above 0x9f000 |
43 | lowmem = ebda_addr; | 54 | * to be off limits (bugzilla 2990). |
55 | */ | ||
56 | |||
57 | /* If the EBDA address is below 128K, assume it is bogus */ | ||
58 | if (ebda_addr < INSANE_CUTOFF) | ||
59 | ebda_addr = LOWMEM_CAP; | ||
44 | 60 | ||
45 | /* Fixup: bios does not report an EBDA at all. */ | 61 | /* If lowmem is less than 128K, assume it is bogus */ |
46 | /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | 62 | if (lowmem < INSANE_CUTOFF) |
47 | if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | 63 | lowmem = LOWMEM_CAP; |
48 | lowmem = 0x9f000; | ||
49 | 64 | ||
50 | /* Paranoia: should never happen, but... */ | 65 | /* Use the lower of the lowmem and EBDA markers as the cutoff */ |
51 | if ((lowmem == 0) || (lowmem >= 0x100000)) | 66 | lowmem = min(lowmem, ebda_addr); |
52 | lowmem = 0x9f000; | 67 | lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ |
53 | 68 | ||
54 | /* reserve all memory between lowmem and the 1MB mark */ | 69 | /* reserve all memory between lowmem and the 1MB mark */ |
55 | memblock_reserve(lowmem, 0x100000 - lowmem); | 70 | memblock_reserve(lowmem, 0x100000 - lowmem); |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index c18f59d10101..138463a24877 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
19 | #include <asm/bios_ebda.h> | 19 | #include <asm/bios_ebda.h> |
20 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
21 | #include <asm/bootparam_utils.h> | ||
21 | 22 | ||
22 | static void __init i386_default_early_setup(void) | 23 | static void __init i386_default_early_setup(void) |
23 | { | 24 | { |
@@ -30,19 +31,7 @@ static void __init i386_default_early_setup(void) | |||
30 | 31 | ||
31 | void __init i386_start_kernel(void) | 32 | void __init i386_start_kernel(void) |
32 | { | 33 | { |
33 | memblock_reserve(__pa_symbol(&_text), | 34 | sanitize_boot_params(&boot_params); |
34 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | ||
35 | |||
36 | #ifdef CONFIG_BLK_DEV_INITRD | ||
37 | /* Reserve INITRD */ | ||
38 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
39 | /* Assume only end is not page aligned */ | ||
40 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
41 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
42 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
43 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
44 | } | ||
45 | #endif | ||
46 | 35 | ||
47 | /* Call the subarch specific early setup function */ | 36 | /* Call the subarch specific early setup function */ |
48 | switch (boot_params.hdr.hardware_subarch) { | 37 | switch (boot_params.hdr.hardware_subarch) { |
@@ -57,11 +46,5 @@ void __init i386_start_kernel(void) | |||
57 | break; | 46 | break; |
58 | } | 47 | } |
59 | 48 | ||
60 | /* | ||
61 | * At this point everything still needed from the boot loader | ||
62 | * or BIOS or kernel text should be early reserved or marked not | ||
63 | * RAM in e820. All other memory is free game. | ||
64 | */ | ||
65 | |||
66 | start_kernel(); | 49 | start_kernel(); |
67 | } | 50 | } |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 037df57a99ac..c5e403f6d869 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -25,12 +25,84 @@ | |||
25 | #include <asm/kdebug.h> | 25 | #include <asm/kdebug.h> |
26 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/bootparam_utils.h> | ||
29 | #include <asm/microcode.h> | ||
28 | 30 | ||
29 | static void __init zap_identity_mappings(void) | 31 | /* |
32 | * Manage page tables very early on. | ||
33 | */ | ||
34 | extern pgd_t early_level4_pgt[PTRS_PER_PGD]; | ||
35 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; | ||
36 | static unsigned int __initdata next_early_pgt = 2; | ||
37 | |||
38 | /* Wipe all early page tables except for the kernel symbol map */ | ||
39 | static void __init reset_early_page_tables(void) | ||
30 | { | 40 | { |
31 | pgd_t *pgd = pgd_offset_k(0UL); | 41 | unsigned long i; |
32 | pgd_clear(pgd); | 42 | |
33 | __flush_tlb_all(); | 43 | for (i = 0; i < PTRS_PER_PGD-1; i++) |
44 | early_level4_pgt[i].pgd = 0; | ||
45 | |||
46 | next_early_pgt = 0; | ||
47 | |||
48 | write_cr3(__pa(early_level4_pgt)); | ||
49 | } | ||
50 | |||
51 | /* Create a new PMD entry */ | ||
52 | int __init early_make_pgtable(unsigned long address) | ||
53 | { | ||
54 | unsigned long physaddr = address - __PAGE_OFFSET; | ||
55 | unsigned long i; | ||
56 | pgdval_t pgd, *pgd_p; | ||
57 | pudval_t pud, *pud_p; | ||
58 | pmdval_t pmd, *pmd_p; | ||
59 | |||
60 | /* Invalid address or early pgt is done ? */ | ||
61 | if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) | ||
62 | return -1; | ||
63 | |||
64 | again: | ||
65 | pgd_p = &early_level4_pgt[pgd_index(address)].pgd; | ||
66 | pgd = *pgd_p; | ||
67 | |||
68 | /* | ||
69 | * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is | ||
70 | * critical -- __PAGE_OFFSET would point us back into the dynamic | ||
71 | * range and we might end up looping forever... | ||
72 | */ | ||
73 | if (pgd) | ||
74 | pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); | ||
75 | else { | ||
76 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { | ||
77 | reset_early_page_tables(); | ||
78 | goto again; | ||
79 | } | ||
80 | |||
81 | pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; | ||
82 | for (i = 0; i < PTRS_PER_PUD; i++) | ||
83 | pud_p[i] = 0; | ||
84 | *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | ||
85 | } | ||
86 | pud_p += pud_index(address); | ||
87 | pud = *pud_p; | ||
88 | |||
89 | if (pud) | ||
90 | pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); | ||
91 | else { | ||
92 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { | ||
93 | reset_early_page_tables(); | ||
94 | goto again; | ||
95 | } | ||
96 | |||
97 | pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; | ||
98 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
99 | pmd_p[i] = 0; | ||
100 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | ||
101 | } | ||
102 | pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL); | ||
103 | pmd_p[pmd_index(address)] = pmd; | ||
104 | |||
105 | return 0; | ||
34 | } | 106 | } |
35 | 107 | ||
36 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 108 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
@@ -41,13 +113,25 @@ static void __init clear_bss(void) | |||
41 | (unsigned long) __bss_stop - (unsigned long) __bss_start); | 113 | (unsigned long) __bss_stop - (unsigned long) __bss_start); |
42 | } | 114 | } |
43 | 115 | ||
116 | static unsigned long get_cmd_line_ptr(void) | ||
117 | { | ||
118 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; | ||
119 | |||
120 | cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32; | ||
121 | |||
122 | return cmd_line_ptr; | ||
123 | } | ||
124 | |||
44 | static void __init copy_bootdata(char *real_mode_data) | 125 | static void __init copy_bootdata(char *real_mode_data) |
45 | { | 126 | { |
46 | char * command_line; | 127 | char * command_line; |
128 | unsigned long cmd_line_ptr; | ||
47 | 129 | ||
48 | memcpy(&boot_params, real_mode_data, sizeof boot_params); | 130 | memcpy(&boot_params, real_mode_data, sizeof boot_params); |
49 | if (boot_params.hdr.cmd_line_ptr) { | 131 | sanitize_boot_params(&boot_params); |
50 | command_line = __va(boot_params.hdr.cmd_line_ptr); | 132 | cmd_line_ptr = get_cmd_line_ptr(); |
133 | if (cmd_line_ptr) { | ||
134 | command_line = __va(cmd_line_ptr); | ||
51 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); | 135 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
52 | } | 136 | } |
53 | } | 137 | } |
@@ -70,54 +154,40 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
70 | (__START_KERNEL & PGDIR_MASK))); | 154 | (__START_KERNEL & PGDIR_MASK))); |
71 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | 155 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); |
72 | 156 | ||
157 | /* Kill off the identity-map trampoline */ | ||
158 | reset_early_page_tables(); | ||
159 | |||
73 | /* clear bss before set_intr_gate with early_idt_handler */ | 160 | /* clear bss before set_intr_gate with early_idt_handler */ |
74 | clear_bss(); | 161 | clear_bss(); |
75 | 162 | ||
76 | /* Make NULL pointers segfault */ | 163 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) |
77 | zap_identity_mappings(); | ||
78 | |||
79 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
80 | |||
81 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | ||
82 | #ifdef CONFIG_EARLY_PRINTK | ||
83 | set_intr_gate(i, &early_idt_handlers[i]); | 164 | set_intr_gate(i, &early_idt_handlers[i]); |
84 | #else | ||
85 | set_intr_gate(i, early_idt_handler); | ||
86 | #endif | ||
87 | } | ||
88 | load_idt((const struct desc_ptr *)&idt_descr); | 165 | load_idt((const struct desc_ptr *)&idt_descr); |
89 | 166 | ||
167 | copy_bootdata(__va(real_mode_data)); | ||
168 | |||
169 | /* | ||
170 | * Load microcode early on BSP. | ||
171 | */ | ||
172 | load_ucode_bsp(); | ||
173 | |||
90 | if (console_loglevel == 10) | 174 | if (console_loglevel == 10) |
91 | early_printk("Kernel alive\n"); | 175 | early_printk("Kernel alive\n"); |
92 | 176 | ||
177 | clear_page(init_level4_pgt); | ||
178 | /* set init_level4_pgt kernel high mapping*/ | ||
179 | init_level4_pgt[511] = early_level4_pgt[511]; | ||
180 | |||
93 | x86_64_start_reservations(real_mode_data); | 181 | x86_64_start_reservations(real_mode_data); |
94 | } | 182 | } |
95 | 183 | ||
96 | void __init x86_64_start_reservations(char *real_mode_data) | 184 | void __init x86_64_start_reservations(char *real_mode_data) |
97 | { | 185 | { |
98 | copy_bootdata(__va(real_mode_data)); | 186 | /* version is always not zero if it is copied */ |
99 | 187 | if (!boot_params.hdr.version) | |
100 | memblock_reserve(__pa_symbol(&_text), | 188 | copy_bootdata(__va(real_mode_data)); |
101 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | ||
102 | |||
103 | #ifdef CONFIG_BLK_DEV_INITRD | ||
104 | /* Reserve INITRD */ | ||
105 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
106 | /* Assume only end is not page aligned */ | ||
107 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
108 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
109 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
110 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
111 | } | ||
112 | #endif | ||
113 | 189 | ||
114 | reserve_ebda_region(); | 190 | reserve_ebda_region(); |
115 | 191 | ||
116 | /* | ||
117 | * At this point everything still needed from the boot loader | ||
118 | * or BIOS or kernel text should be early reserved or marked not | ||
119 | * RAM in e820. All other memory is free game. | ||
120 | */ | ||
121 | |||
122 | start_kernel(); | 192 | start_kernel(); |
123 | } | 193 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8e7f6556028f..73afd11799ca 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -144,6 +144,11 @@ ENTRY(startup_32) | |||
144 | movl %eax, pa(olpc_ofw_pgd) | 144 | movl %eax, pa(olpc_ofw_pgd) |
145 | #endif | 145 | #endif |
146 | 146 | ||
147 | #ifdef CONFIG_MICROCODE_EARLY | ||
148 | /* Early load ucode on BSP. */ | ||
149 | call load_ucode_bsp | ||
150 | #endif | ||
151 | |||
147 | /* | 152 | /* |
148 | * Initialize page tables. This creates a PDE and a set of page | 153 | * Initialize page tables. This creates a PDE and a set of page |
149 | * tables, which are located immediately beyond __brk_base. The variable | 154 | * tables, which are located immediately beyond __brk_base. The variable |
@@ -299,38 +304,59 @@ ENTRY(startup_32_smp) | |||
299 | movl %eax,%ss | 304 | movl %eax,%ss |
300 | leal -__PAGE_OFFSET(%ecx),%esp | 305 | leal -__PAGE_OFFSET(%ecx),%esp |
301 | 306 | ||
307 | #ifdef CONFIG_MICROCODE_EARLY | ||
308 | /* Early load ucode on AP. */ | ||
309 | call load_ucode_ap | ||
310 | #endif | ||
311 | |||
312 | |||
302 | default_entry: | 313 | default_entry: |
314 | #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | ||
315 | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | ||
316 | X86_CR0_PG) | ||
317 | movl $(CR0_STATE & ~X86_CR0_PG),%eax | ||
318 | movl %eax,%cr0 | ||
319 | |||
303 | /* | 320 | /* |
304 | * New page tables may be in 4Mbyte page mode and may | 321 | * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave |
305 | * be using the global pages. | 322 | * bits like NT set. This would confuse the debugger if this code is traced. So |
323 | * initialize them properly now before switching to protected mode. That means | ||
324 | * DF in particular (even though we have cleared it earlier after copying the | ||
325 | * command line) because GCC expects it. | ||
326 | */ | ||
327 | pushl $0 | ||
328 | popfl | ||
329 | |||
330 | /* | ||
331 | * New page tables may be in 4Mbyte page mode and may be using the global pages. | ||
306 | * | 332 | * |
307 | * NOTE! If we are on a 486 we may have no cr4 at all! | 333 | * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists |
308 | * Specifically, cr4 exists if and only if CPUID exists | 334 | * if and only if CPUID exists and has flags other than the FPU flag set. |
309 | * and has flags other than the FPU flag set. | ||
310 | */ | 335 | */ |
336 | movl $-1,pa(X86_CPUID) # preset CPUID level | ||
311 | movl $X86_EFLAGS_ID,%ecx | 337 | movl $X86_EFLAGS_ID,%ecx |
312 | pushl %ecx | 338 | pushl %ecx |
313 | popfl | 339 | popfl # set EFLAGS=ID |
314 | pushfl | ||
315 | popl %eax | ||
316 | pushl $0 | ||
317 | popfl | ||
318 | pushfl | 340 | pushfl |
319 | popl %edx | 341 | popl %eax # get EFLAGS |
320 | xorl %edx,%eax | 342 | testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set? |
321 | testl %ecx,%eax | 343 | jz enable_paging # hw disallowed setting of ID bit |
322 | jz 6f # No ID flag = no CPUID = no CR4 | 344 | # which means no CPUID and no CR4 |
345 | |||
346 | xorl %eax,%eax | ||
347 | cpuid | ||
348 | movl %eax,pa(X86_CPUID) # save largest std CPUID function | ||
323 | 349 | ||
324 | movl $1,%eax | 350 | movl $1,%eax |
325 | cpuid | 351 | cpuid |
326 | andl $~1,%edx # Ignore CPUID.FPU | 352 | andl $~1,%edx # Ignore CPUID.FPU |
327 | jz 6f # No flags or only CPUID.FPU = no CR4 | 353 | jz enable_paging # No flags or only CPUID.FPU = no CR4 |
328 | 354 | ||
329 | movl pa(mmu_cr4_features),%eax | 355 | movl pa(mmu_cr4_features),%eax |
330 | movl %eax,%cr4 | 356 | movl %eax,%cr4 |
331 | 357 | ||
332 | testb $X86_CR4_PAE, %al # check if PAE is enabled | 358 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
333 | jz 6f | 359 | jz enable_paging |
334 | 360 | ||
335 | /* Check if extended functions are implemented */ | 361 | /* Check if extended functions are implemented */ |
336 | movl $0x80000000, %eax | 362 | movl $0x80000000, %eax |
@@ -338,7 +364,7 @@ default_entry: | |||
338 | /* Value must be in the range 0x80000001 to 0x8000ffff */ | 364 | /* Value must be in the range 0x80000001 to 0x8000ffff */ |
339 | subl $0x80000001, %eax | 365 | subl $0x80000001, %eax |
340 | cmpl $(0x8000ffff-0x80000001), %eax | 366 | cmpl $(0x8000ffff-0x80000001), %eax |
341 | ja 6f | 367 | ja enable_paging |
342 | 368 | ||
343 | /* Clear bogus XD_DISABLE bits */ | 369 | /* Clear bogus XD_DISABLE bits */ |
344 | call verify_cpu | 370 | call verify_cpu |
@@ -347,7 +373,7 @@ default_entry: | |||
347 | cpuid | 373 | cpuid |
348 | /* Execute Disable bit supported? */ | 374 | /* Execute Disable bit supported? */ |
349 | btl $(X86_FEATURE_NX & 31), %edx | 375 | btl $(X86_FEATURE_NX & 31), %edx |
350 | jnc 6f | 376 | jnc enable_paging |
351 | 377 | ||
352 | /* Setup EFER (Extended Feature Enable Register) */ | 378 | /* Setup EFER (Extended Feature Enable Register) */ |
353 | movl $MSR_EFER, %ecx | 379 | movl $MSR_EFER, %ecx |
@@ -357,15 +383,14 @@ default_entry: | |||
357 | /* Make changes effective */ | 383 | /* Make changes effective */ |
358 | wrmsr | 384 | wrmsr |
359 | 385 | ||
360 | 6: | 386 | enable_paging: |
361 | 387 | ||
362 | /* | 388 | /* |
363 | * Enable paging | 389 | * Enable paging |
364 | */ | 390 | */ |
365 | movl $pa(initial_page_table), %eax | 391 | movl $pa(initial_page_table), %eax |
366 | movl %eax,%cr3 /* set the page table pointer.. */ | 392 | movl %eax,%cr3 /* set the page table pointer.. */ |
367 | movl %cr0,%eax | 393 | movl $CR0_STATE,%eax |
368 | orl $X86_CR0_PG,%eax | ||
369 | movl %eax,%cr0 /* ..and set paging (PG) bit */ | 394 | movl %eax,%cr0 /* ..and set paging (PG) bit */ |
370 | ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ | 395 | ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ |
371 | 1: | 396 | 1: |
@@ -373,14 +398,6 @@ default_entry: | |||
373 | addl $__PAGE_OFFSET, %esp | 398 | addl $__PAGE_OFFSET, %esp |
374 | 399 | ||
375 | /* | 400 | /* |
376 | * Initialize eflags. Some BIOS's leave bits like NT set. This would | ||
377 | * confuse the debugger if this code is traced. | ||
378 | * XXX - best to initialize before switching to protected mode. | ||
379 | */ | ||
380 | pushl $0 | ||
381 | popfl | ||
382 | |||
383 | /* | ||
384 | * start system 32-bit setup. We need to re-do some of the things done | 401 | * start system 32-bit setup. We need to re-do some of the things done |
385 | * in 16-bit mode for the "real" operations. | 402 | * in 16-bit mode for the "real" operations. |
386 | */ | 403 | */ |
@@ -389,31 +406,11 @@ default_entry: | |||
389 | jz 1f # Did we do this already? | 406 | jz 1f # Did we do this already? |
390 | call *%eax | 407 | call *%eax |
391 | 1: | 408 | 1: |
392 | 409 | ||
393 | /* check if it is 486 or 386. */ | ||
394 | /* | 410 | /* |
395 | * XXX - this does a lot of unnecessary setup. Alignment checks don't | 411 | * Check if it is 486 |
396 | * apply at our cpl of 0 and the stack ought to be aligned already, and | ||
397 | * we don't need to preserve eflags. | ||
398 | */ | 412 | */ |
399 | movl $-1,X86_CPUID # -1 for no CPUID initially | 413 | cmpl $-1,X86_CPUID |
400 | movb $3,X86 # at least 386 | ||
401 | pushfl # push EFLAGS | ||
402 | popl %eax # get EFLAGS | ||
403 | movl %eax,%ecx # save original EFLAGS | ||
404 | xorl $0x240000,%eax # flip AC and ID bits in EFLAGS | ||
405 | pushl %eax # copy to EFLAGS | ||
406 | popfl # set EFLAGS | ||
407 | pushfl # get new EFLAGS | ||
408 | popl %eax # put it in eax | ||
409 | xorl %ecx,%eax # change in flags | ||
410 | pushl %ecx # restore original EFLAGS | ||
411 | popfl | ||
412 | testl $0x40000,%eax # check if AC bit changed | ||
413 | je is386 | ||
414 | |||
415 | movb $4,X86 # at least 486 | ||
416 | testl $0x200000,%eax # check if ID bit changed | ||
417 | je is486 | 414 | je is486 |
418 | 415 | ||
419 | /* get vendor info */ | 416 | /* get vendor info */ |
@@ -439,11 +436,10 @@ default_entry: | |||
439 | movb %cl,X86_MASK | 436 | movb %cl,X86_MASK |
440 | movl %edx,X86_CAPABILITY | 437 | movl %edx,X86_CAPABILITY |
441 | 438 | ||
442 | is486: movl $0x50022,%ecx # set AM, WP, NE and MP | 439 | is486: |
443 | jmp 2f | 440 | movb $4,X86 |
444 | 441 | movl $0x50022,%ecx # set AM, WP, NE and MP | |
445 | is386: movl $2,%ecx # set MP | 442 | movl %cr0,%eax |
446 | 2: movl %cr0,%eax | ||
447 | andl $0x80000011,%eax # Save PG,PE,ET | 443 | andl $0x80000011,%eax # Save PG,PE,ET |
448 | orl %ecx,%eax | 444 | orl %ecx,%eax |
449 | movl %eax,%cr0 | 445 | movl %eax,%cr0 |
@@ -468,7 +464,6 @@ is386: movl $2,%ecx # set MP | |||
468 | xorl %eax,%eax # Clear LDT | 464 | xorl %eax,%eax # Clear LDT |
469 | lldt %ax | 465 | lldt %ax |
470 | 466 | ||
471 | cld # gcc2 wants the direction flag cleared at all times | ||
472 | pushl $0 # fake return address for unwinder | 467 | pushl $0 # fake return address for unwinder |
473 | jmp *(initial_code) | 468 | jmp *(initial_code) |
474 | 469 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 980053c4b9cc..6859e9626442 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) | |||
47 | .code64 | 47 | .code64 |
48 | .globl startup_64 | 48 | .globl startup_64 |
49 | startup_64: | 49 | startup_64: |
50 | |||
51 | /* | 50 | /* |
52 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, | 51 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
53 | * and someone has loaded an identity mapped page table | 52 | * and someone has loaded an identity mapped page table |
54 | * for us. These identity mapped page tables map all of the | 53 | * for us. These identity mapped page tables map all of the |
55 | * kernel pages and possibly all of memory. | 54 | * kernel pages and possibly all of memory. |
56 | * | 55 | * |
57 | * %esi holds a physical pointer to real_mode_data. | 56 | * %rsi holds a physical pointer to real_mode_data. |
58 | * | 57 | * |
59 | * We come here either directly from a 64bit bootloader, or from | 58 | * We come here either directly from a 64bit bootloader, or from |
60 | * arch/x86_64/boot/compressed/head.S. | 59 | * arch/x86_64/boot/compressed/head.S. |
@@ -66,7 +65,8 @@ startup_64: | |||
66 | * tables and then reload them. | 65 | * tables and then reload them. |
67 | */ | 66 | */ |
68 | 67 | ||
69 | /* Compute the delta between the address I am compiled to run at and the | 68 | /* |
69 | * Compute the delta between the address I am compiled to run at and the | ||
70 | * address I am actually running at. | 70 | * address I am actually running at. |
71 | */ | 71 | */ |
72 | leaq _text(%rip), %rbp | 72 | leaq _text(%rip), %rbp |
@@ -78,45 +78,62 @@ startup_64: | |||
78 | testl %eax, %eax | 78 | testl %eax, %eax |
79 | jnz bad_address | 79 | jnz bad_address |
80 | 80 | ||
81 | /* Is the address too large? */ | 81 | /* |
82 | leaq _text(%rip), %rdx | 82 | * Is the address too large? |
83 | movq $PGDIR_SIZE, %rax | ||
84 | cmpq %rax, %rdx | ||
85 | jae bad_address | ||
86 | |||
87 | /* Fixup the physical addresses in the page table | ||
88 | */ | 83 | */ |
89 | addq %rbp, init_level4_pgt + 0(%rip) | 84 | leaq _text(%rip), %rax |
90 | addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) | 85 | shrq $MAX_PHYSMEM_BITS, %rax |
91 | addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) | 86 | jnz bad_address |
92 | 87 | ||
93 | addq %rbp, level3_ident_pgt + 0(%rip) | 88 | /* |
89 | * Fixup the physical addresses in the page table | ||
90 | */ | ||
91 | addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip) | ||
94 | 92 | ||
95 | addq %rbp, level3_kernel_pgt + (510*8)(%rip) | 93 | addq %rbp, level3_kernel_pgt + (510*8)(%rip) |
96 | addq %rbp, level3_kernel_pgt + (511*8)(%rip) | 94 | addq %rbp, level3_kernel_pgt + (511*8)(%rip) |
97 | 95 | ||
98 | addq %rbp, level2_fixmap_pgt + (506*8)(%rip) | 96 | addq %rbp, level2_fixmap_pgt + (506*8)(%rip) |
99 | 97 | ||
100 | /* Add an Identity mapping if I am above 1G */ | 98 | /* |
99 | * Set up the identity mapping for the switchover. These | ||
100 | * entries should *NOT* have the global bit set! This also | ||
101 | * creates a bunch of nonsense entries but that is fine -- | ||
102 | * it avoids problems around wraparound. | ||
103 | */ | ||
101 | leaq _text(%rip), %rdi | 104 | leaq _text(%rip), %rdi |
102 | andq $PMD_PAGE_MASK, %rdi | 105 | leaq early_level4_pgt(%rip), %rbx |
103 | 106 | ||
104 | movq %rdi, %rax | 107 | movq %rdi, %rax |
105 | shrq $PUD_SHIFT, %rax | 108 | shrq $PGDIR_SHIFT, %rax |
106 | andq $(PTRS_PER_PUD - 1), %rax | ||
107 | jz ident_complete | ||
108 | 109 | ||
109 | leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx | 110 | leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx |
110 | leaq level3_ident_pgt(%rip), %rbx | 111 | movq %rdx, 0(%rbx,%rax,8) |
111 | movq %rdx, 0(%rbx, %rax, 8) | 112 | movq %rdx, 8(%rbx,%rax,8) |
112 | 113 | ||
114 | addq $4096, %rdx | ||
113 | movq %rdi, %rax | 115 | movq %rdi, %rax |
114 | shrq $PMD_SHIFT, %rax | 116 | shrq $PUD_SHIFT, %rax |
115 | andq $(PTRS_PER_PMD - 1), %rax | 117 | andl $(PTRS_PER_PUD-1), %eax |
116 | leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx | 118 | movq %rdx, (4096+0)(%rbx,%rax,8) |
117 | leaq level2_spare_pgt(%rip), %rbx | 119 | movq %rdx, (4096+8)(%rbx,%rax,8) |
118 | movq %rdx, 0(%rbx, %rax, 8) | 120 | |
119 | ident_complete: | 121 | addq $8192, %rbx |
122 | movq %rdi, %rax | ||
123 | shrq $PMD_SHIFT, %rdi | ||
124 | addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax | ||
125 | leaq (_end - 1)(%rip), %rcx | ||
126 | shrq $PMD_SHIFT, %rcx | ||
127 | subq %rdi, %rcx | ||
128 | incl %ecx | ||
129 | |||
130 | 1: | ||
131 | andq $(PTRS_PER_PMD - 1), %rdi | ||
132 | movq %rax, (%rbx,%rdi,8) | ||
133 | incq %rdi | ||
134 | addq $PMD_SIZE, %rax | ||
135 | decl %ecx | ||
136 | jnz 1b | ||
120 | 137 | ||
121 | /* | 138 | /* |
122 | * Fixup the kernel text+data virtual addresses. Note that | 139 | * Fixup the kernel text+data virtual addresses. Note that |
@@ -124,7 +141,6 @@ ident_complete: | |||
124 | * cleanup_highmap() fixes this up along with the mappings | 141 | * cleanup_highmap() fixes this up along with the mappings |
125 | * beyond _end. | 142 | * beyond _end. |
126 | */ | 143 | */ |
127 | |||
128 | leaq level2_kernel_pgt(%rip), %rdi | 144 | leaq level2_kernel_pgt(%rip), %rdi |
129 | leaq 4096(%rdi), %r8 | 145 | leaq 4096(%rdi), %r8 |
130 | /* See if it is a valid page table entry */ | 146 | /* See if it is a valid page table entry */ |
@@ -139,17 +155,14 @@ ident_complete: | |||
139 | /* Fixup phys_base */ | 155 | /* Fixup phys_base */ |
140 | addq %rbp, phys_base(%rip) | 156 | addq %rbp, phys_base(%rip) |
141 | 157 | ||
142 | /* Due to ENTRY(), sometimes the empty space gets filled with | 158 | movq $(early_level4_pgt - __START_KERNEL_map), %rax |
143 | * zeros. Better take a jmp than relying on empty space being | 159 | jmp 1f |
144 | * filled with 0x90 (nop) | ||
145 | */ | ||
146 | jmp secondary_startup_64 | ||
147 | ENTRY(secondary_startup_64) | 160 | ENTRY(secondary_startup_64) |
148 | /* | 161 | /* |
149 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, | 162 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
150 | * and someone has loaded a mapped page table. | 163 | * and someone has loaded a mapped page table. |
151 | * | 164 | * |
152 | * %esi holds a physical pointer to real_mode_data. | 165 | * %rsi holds a physical pointer to real_mode_data. |
153 | * | 166 | * |
154 | * We come here either from startup_64 (using physical addresses) | 167 | * We come here either from startup_64 (using physical addresses) |
155 | * or from trampoline.S (using virtual addresses). | 168 | * or from trampoline.S (using virtual addresses). |
@@ -159,12 +172,14 @@ ENTRY(secondary_startup_64) | |||
159 | * after the boot processor executes this code. | 172 | * after the boot processor executes this code. |
160 | */ | 173 | */ |
161 | 174 | ||
175 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | ||
176 | 1: | ||
177 | |||
162 | /* Enable PAE mode and PGE */ | 178 | /* Enable PAE mode and PGE */ |
163 | movl $(X86_CR4_PAE | X86_CR4_PGE), %eax | 179 | movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx |
164 | movq %rax, %cr4 | 180 | movq %rcx, %cr4 |
165 | 181 | ||
166 | /* Setup early boot stage 4 level pagetables. */ | 182 | /* Setup early boot stage 4 level pagetables. */ |
167 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | ||
168 | addq phys_base(%rip), %rax | 183 | addq phys_base(%rip), %rax |
169 | movq %rax, %cr3 | 184 | movq %rax, %cr3 |
170 | 185 | ||
@@ -196,7 +211,7 @@ ENTRY(secondary_startup_64) | |||
196 | movq %rax, %cr0 | 211 | movq %rax, %cr0 |
197 | 212 | ||
198 | /* Setup a boot time stack */ | 213 | /* Setup a boot time stack */ |
199 | movq stack_start(%rip),%rsp | 214 | movq stack_start(%rip), %rsp |
200 | 215 | ||
201 | /* zero EFLAGS after setting rsp */ | 216 | /* zero EFLAGS after setting rsp */ |
202 | pushq $0 | 217 | pushq $0 |
@@ -236,15 +251,33 @@ ENTRY(secondary_startup_64) | |||
236 | movl initial_gs+4(%rip),%edx | 251 | movl initial_gs+4(%rip),%edx |
237 | wrmsr | 252 | wrmsr |
238 | 253 | ||
239 | /* esi is pointer to real mode structure with interesting info. | 254 | /* rsi is pointer to real mode structure with interesting info. |
240 | pass it to C */ | 255 | pass it to C */ |
241 | movl %esi, %edi | 256 | movq %rsi, %rdi |
242 | 257 | ||
243 | /* Finally jump to run C code and to be on real kernel address | 258 | /* Finally jump to run C code and to be on real kernel address |
244 | * Since we are running on identity-mapped space we have to jump | 259 | * Since we are running on identity-mapped space we have to jump |
245 | * to the full 64bit address, this is only possible as indirect | 260 | * to the full 64bit address, this is only possible as indirect |
246 | * jump. In addition we need to ensure %cs is set so we make this | 261 | * jump. In addition we need to ensure %cs is set so we make this |
247 | * a far return. | 262 | * a far return. |
263 | * | ||
264 | * Note: do not change to far jump indirect with 64bit offset. | ||
265 | * | ||
266 | * AMD does not support far jump indirect with 64bit offset. | ||
267 | * AMD64 Architecture Programmer's Manual, Volume 3: states only | ||
268 | * JMP FAR mem16:16 FF /5 Far jump indirect, | ||
269 | * with the target specified by a far pointer in memory. | ||
270 | * JMP FAR mem16:32 FF /5 Far jump indirect, | ||
271 | * with the target specified by a far pointer in memory. | ||
272 | * | ||
273 | * Intel64 does support 64bit offset. | ||
274 | * Software Developer Manual Vol 2: states: | ||
275 | * FF /5 JMP m16:16 Jump far, absolute indirect, | ||
276 | * address given in m16:16 | ||
277 | * FF /5 JMP m16:32 Jump far, absolute indirect, | ||
278 | * address given in m16:32. | ||
279 | * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, | ||
280 | * address given in m16:64. | ||
248 | */ | 281 | */ |
249 | movq initial_code(%rip),%rax | 282 | movq initial_code(%rip),%rax |
250 | pushq $0 # fake return address to stop unwinder | 283 | pushq $0 # fake return address to stop unwinder |
@@ -270,13 +303,13 @@ ENDPROC(start_cpu0) | |||
270 | 303 | ||
271 | /* SMP bootup changes these two */ | 304 | /* SMP bootup changes these two */ |
272 | __REFDATA | 305 | __REFDATA |
273 | .align 8 | 306 | .balign 8 |
274 | ENTRY(initial_code) | 307 | GLOBAL(initial_code) |
275 | .quad x86_64_start_kernel | 308 | .quad x86_64_start_kernel |
276 | ENTRY(initial_gs) | 309 | GLOBAL(initial_gs) |
277 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 310 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
278 | 311 | ||
279 | ENTRY(stack_start) | 312 | GLOBAL(stack_start) |
280 | .quad init_thread_union+THREAD_SIZE-8 | 313 | .quad init_thread_union+THREAD_SIZE-8 |
281 | .word 0 | 314 | .word 0 |
282 | __FINITDATA | 315 | __FINITDATA |
@@ -284,7 +317,7 @@ ENDPROC(start_cpu0) | |||
284 | bad_address: | 317 | bad_address: |
285 | jmp bad_address | 318 | jmp bad_address |
286 | 319 | ||
287 | .section ".init.text","ax" | 320 | __INIT |
288 | .globl early_idt_handlers | 321 | .globl early_idt_handlers |
289 | early_idt_handlers: | 322 | early_idt_handlers: |
290 | # 104(%rsp) %rflags | 323 | # 104(%rsp) %rflags |
@@ -303,6 +336,7 @@ early_idt_handlers: | |||
303 | i = i + 1 | 336 | i = i + 1 |
304 | .endr | 337 | .endr |
305 | 338 | ||
339 | /* This is global to keep gas from relaxing the jumps */ | ||
306 | ENTRY(early_idt_handler) | 340 | ENTRY(early_idt_handler) |
307 | cld | 341 | cld |
308 | 342 | ||
@@ -321,14 +355,22 @@ ENTRY(early_idt_handler) | |||
321 | pushq %r11 # 0(%rsp) | 355 | pushq %r11 # 0(%rsp) |
322 | 356 | ||
323 | cmpl $__KERNEL_CS,96(%rsp) | 357 | cmpl $__KERNEL_CS,96(%rsp) |
324 | jne 10f | 358 | jne 11f |
359 | |||
360 | cmpl $14,72(%rsp) # Page fault? | ||
361 | jnz 10f | ||
362 | GET_CR2_INTO(%rdi) # can clobber any volatile register if pv | ||
363 | call early_make_pgtable | ||
364 | andl %eax,%eax | ||
365 | jz 20f # All good | ||
325 | 366 | ||
367 | 10: | ||
326 | leaq 88(%rsp),%rdi # Pointer to %rip | 368 | leaq 88(%rsp),%rdi # Pointer to %rip |
327 | call early_fixup_exception | 369 | call early_fixup_exception |
328 | andl %eax,%eax | 370 | andl %eax,%eax |
329 | jnz 20f # Found an exception entry | 371 | jnz 20f # Found an exception entry |
330 | 372 | ||
331 | 10: | 373 | 11: |
332 | #ifdef CONFIG_EARLY_PRINTK | 374 | #ifdef CONFIG_EARLY_PRINTK |
333 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv | 375 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv |
334 | movl 80(%rsp),%r8d # error code | 376 | movl 80(%rsp),%r8d # error code |
@@ -350,7 +392,7 @@ ENTRY(early_idt_handler) | |||
350 | 1: hlt | 392 | 1: hlt |
351 | jmp 1b | 393 | jmp 1b |
352 | 394 | ||
353 | 20: # Exception table entry found | 395 | 20: # Exception table entry found or page table generated |
354 | popq %r11 | 396 | popq %r11 |
355 | popq %r10 | 397 | popq %r10 |
356 | popq %r9 | 398 | popq %r9 |
@@ -363,6 +405,9 @@ ENTRY(early_idt_handler) | |||
363 | addq $16,%rsp # drop vector number and error code | 405 | addq $16,%rsp # drop vector number and error code |
364 | decl early_recursion_flag(%rip) | 406 | decl early_recursion_flag(%rip) |
365 | INTERRUPT_RETURN | 407 | INTERRUPT_RETURN |
408 | ENDPROC(early_idt_handler) | ||
409 | |||
410 | __INITDATA | ||
366 | 411 | ||
367 | .balign 4 | 412 | .balign 4 |
368 | early_recursion_flag: | 413 | early_recursion_flag: |
@@ -374,11 +419,10 @@ early_idt_msg: | |||
374 | early_idt_ripmsg: | 419 | early_idt_ripmsg: |
375 | .asciz "RIP %s\n" | 420 | .asciz "RIP %s\n" |
376 | #endif /* CONFIG_EARLY_PRINTK */ | 421 | #endif /* CONFIG_EARLY_PRINTK */ |
377 | .previous | ||
378 | 422 | ||
379 | #define NEXT_PAGE(name) \ | 423 | #define NEXT_PAGE(name) \ |
380 | .balign PAGE_SIZE; \ | 424 | .balign PAGE_SIZE; \ |
381 | ENTRY(name) | 425 | GLOBAL(name) |
382 | 426 | ||
383 | /* Automate the creation of 1 to 1 mapping pmd entries */ | 427 | /* Automate the creation of 1 to 1 mapping pmd entries */ |
384 | #define PMDS(START, PERM, COUNT) \ | 428 | #define PMDS(START, PERM, COUNT) \ |
@@ -388,24 +432,37 @@ ENTRY(name) | |||
388 | i = i + 1 ; \ | 432 | i = i + 1 ; \ |
389 | .endr | 433 | .endr |
390 | 434 | ||
435 | __INITDATA | ||
436 | NEXT_PAGE(early_level4_pgt) | ||
437 | .fill 511,8,0 | ||
438 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
439 | |||
440 | NEXT_PAGE(early_dynamic_pgts) | ||
441 | .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 | ||
442 | |||
391 | .data | 443 | .data |
392 | /* | 444 | |
393 | * This default setting generates an ident mapping at address 0x100000 | 445 | #ifndef CONFIG_XEN |
394 | * and a mapping for the kernel that precisely maps virtual address | ||
395 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
396 | * 2Mbyte large pages provided by PAE mode) | ||
397 | */ | ||
398 | NEXT_PAGE(init_level4_pgt) | 446 | NEXT_PAGE(init_level4_pgt) |
399 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 447 | .fill 512,8,0 |
400 | .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 | 448 | #else |
401 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 449 | NEXT_PAGE(init_level4_pgt) |
402 | .org init_level4_pgt + L4_START_KERNEL*8, 0 | 450 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
451 | .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 | ||
452 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
453 | .org init_level4_pgt + L4_START_KERNEL*8, 0 | ||
403 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | 454 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ |
404 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | 455 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE |
405 | 456 | ||
406 | NEXT_PAGE(level3_ident_pgt) | 457 | NEXT_PAGE(level3_ident_pgt) |
407 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 458 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
408 | .fill 511,8,0 | 459 | .fill 511, 8, 0 |
460 | NEXT_PAGE(level2_ident_pgt) | ||
461 | /* Since I easily can, map the first 1G. | ||
462 | * Don't set NX because code runs from these pages. | ||
463 | */ | ||
464 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | ||
465 | #endif | ||
409 | 466 | ||
410 | NEXT_PAGE(level3_kernel_pgt) | 467 | NEXT_PAGE(level3_kernel_pgt) |
411 | .fill L3_START_KERNEL,8,0 | 468 | .fill L3_START_KERNEL,8,0 |
@@ -413,21 +470,6 @@ NEXT_PAGE(level3_kernel_pgt) | |||
413 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE | 470 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE |
414 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | 471 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE |
415 | 472 | ||
416 | NEXT_PAGE(level2_fixmap_pgt) | ||
417 | .fill 506,8,0 | ||
418 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
419 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | ||
420 | .fill 5,8,0 | ||
421 | |||
422 | NEXT_PAGE(level1_fixmap_pgt) | ||
423 | .fill 512,8,0 | ||
424 | |||
425 | NEXT_PAGE(level2_ident_pgt) | ||
426 | /* Since I easily can, map the first 1G. | ||
427 | * Don't set NX because code runs from these pages. | ||
428 | */ | ||
429 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | ||
430 | |||
431 | NEXT_PAGE(level2_kernel_pgt) | 473 | NEXT_PAGE(level2_kernel_pgt) |
432 | /* | 474 | /* |
433 | * 512 MB kernel mapping. We spend a full page on this pagetable | 475 | * 512 MB kernel mapping. We spend a full page on this pagetable |
@@ -442,11 +484,16 @@ NEXT_PAGE(level2_kernel_pgt) | |||
442 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, | 484 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, |
443 | KERNEL_IMAGE_SIZE/PMD_SIZE) | 485 | KERNEL_IMAGE_SIZE/PMD_SIZE) |
444 | 486 | ||
445 | NEXT_PAGE(level2_spare_pgt) | 487 | NEXT_PAGE(level2_fixmap_pgt) |
446 | .fill 512, 8, 0 | 488 | .fill 506,8,0 |
489 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
490 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | ||
491 | .fill 5,8,0 | ||
492 | |||
493 | NEXT_PAGE(level1_fixmap_pgt) | ||
494 | .fill 512,8,0 | ||
447 | 495 | ||
448 | #undef PMDS | 496 | #undef PMDS |
449 | #undef NEXT_PAGE | ||
450 | 497 | ||
451 | .data | 498 | .data |
452 | .align 16 | 499 | .align 16 |
@@ -472,6 +519,5 @@ ENTRY(nmi_idt_table) | |||
472 | .skip IDT_ENTRIES * 16 | 519 | .skip IDT_ENTRIES * 16 |
473 | 520 | ||
474 | __PAGE_ALIGNED_BSS | 521 | __PAGE_ALIGNED_BSS |
475 | .align PAGE_SIZE | 522 | NEXT_PAGE(empty_zero_page) |
476 | ENTRY(empty_zero_page) | ||
477 | .skip PAGE_SIZE | 523 | .skip PAGE_SIZE |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index e28670f9a589..da85a8e830a1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta, | |||
478 | 478 | ||
479 | static int hpet_setup_msi_irq(unsigned int irq) | 479 | static int hpet_setup_msi_irq(unsigned int irq) |
480 | { | 480 | { |
481 | if (arch_setup_hpet_msi(irq, hpet_blockid)) { | 481 | if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { |
482 | destroy_irq(irq); | 482 | destroy_irq(irq); |
483 | return -EINVAL; | 483 | return -EINVAL; |
484 | } | 484 | } |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 9c3bd4a2050e..0fa69127209a 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic); | |||
26 | EXPORT_SYMBOL(__get_user_1); | 26 | EXPORT_SYMBOL(__get_user_1); |
27 | EXPORT_SYMBOL(__get_user_2); | 27 | EXPORT_SYMBOL(__get_user_2); |
28 | EXPORT_SYMBOL(__get_user_4); | 28 | EXPORT_SYMBOL(__get_user_4); |
29 | EXPORT_SYMBOL(__get_user_8); | ||
29 | 30 | ||
30 | EXPORT_SYMBOL(__put_user_1); | 31 | EXPORT_SYMBOL(__put_user_1); |
31 | EXPORT_SYMBOL(__put_user_2); | 32 | EXPORT_SYMBOL(__put_user_2); |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 8c968974253d..4ddaf66ea35f 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -93,8 +93,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
93 | * on system-call entry - see also fork() and the signal handling | 93 | * on system-call entry - see also fork() and the signal handling |
94 | * code. | 94 | * code. |
95 | */ | 95 | */ |
96 | long sys_iopl(unsigned int level, struct pt_regs *regs) | 96 | SYSCALL_DEFINE1(iopl, unsigned int, level) |
97 | { | 97 | { |
98 | struct pt_regs *regs = current_pt_regs(); | ||
98 | unsigned int old = (regs->flags >> 12) & 3; | 99 | unsigned int old = (regs->flags >> 12) & 3; |
99 | struct thread_struct *t = ¤t->thread; | 100 | struct thread_struct *t = ¤t->thread; |
100 | 101 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 6e03b0d69138..7dc4e459c2b3 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -42,39 +42,6 @@ | |||
42 | * (these are usually mapped into the 0x30-0xff vector range) | 42 | * (these are usually mapped into the 0x30-0xff vector range) |
43 | */ | 43 | */ |
44 | 44 | ||
45 | #ifdef CONFIG_X86_32 | ||
46 | /* | ||
47 | * Note that on a 486, we don't want to do a SIGFPE on an irq13 | ||
48 | * as the irq is unreliable, and exception 16 works correctly | ||
49 | * (ie as explained in the intel literature). On a 386, you | ||
50 | * can't use exception 16 due to bad IBM design, so we have to | ||
51 | * rely on the less exact irq13. | ||
52 | * | ||
53 | * Careful.. Not only is IRQ13 unreliable, but it is also | ||
54 | * leads to races. IBM designers who came up with it should | ||
55 | * be shot. | ||
56 | */ | ||
57 | |||
58 | static irqreturn_t math_error_irq(int cpl, void *dev_id) | ||
59 | { | ||
60 | outb(0, 0xF0); | ||
61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | ||
62 | return IRQ_NONE; | ||
63 | math_error(get_irq_regs(), 0, X86_TRAP_MF); | ||
64 | return IRQ_HANDLED; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * New motherboards sometimes make IRQ 13 be a PCI interrupt, | ||
69 | * so allow interrupt sharing. | ||
70 | */ | ||
71 | static struct irqaction fpu_irq = { | ||
72 | .handler = math_error_irq, | ||
73 | .name = "fpu", | ||
74 | .flags = IRQF_NO_THREAD, | ||
75 | }; | ||
76 | #endif | ||
77 | |||
78 | /* | 45 | /* |
79 | * IRQ2 is cascade interrupt to second interrupt controller | 46 | * IRQ2 is cascade interrupt to second interrupt controller |
80 | */ | 47 | */ |
@@ -242,13 +209,6 @@ void __init native_init_IRQ(void) | |||
242 | setup_irq(2, &irq2); | 209 | setup_irq(2, &irq2); |
243 | 210 | ||
244 | #ifdef CONFIG_X86_32 | 211 | #ifdef CONFIG_X86_32 |
245 | /* | ||
246 | * External FPU? Set up irq13 if so, for | ||
247 | * original braindamaged IBM FERR coupling. | ||
248 | */ | ||
249 | if (boot_cpu_data.hard_math && !cpu_has_fpu) | ||
250 | setup_irq(FPU_IRQ, &fpu_irq); | ||
251 | |||
252 | irq_ctx_init(smp_processor_id()); | 212 | irq_ctx_init(smp_processor_id()); |
253 | #endif | 213 | #endif |
254 | } | 214 | } |
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile new file mode 100644 index 000000000000..0d33169cc1a2 --- /dev/null +++ b/arch/x86/kernel/kprobes/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for kernel probes | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_KPROBES) += core.o | ||
6 | obj-$(CONFIG_OPTPROBES) += opt.o | ||
7 | obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o | ||
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h index 3230b68ef29a..2e9d4b5af036 100644 --- a/arch/x86/kernel/kprobes-common.h +++ b/arch/x86/kernel/kprobes/common.h | |||
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig | |||
99 | return addr; | 99 | return addr; |
100 | } | 100 | } |
101 | #endif | 101 | #endif |
102 | |||
103 | #ifdef CONFIG_KPROBES_ON_FTRACE | ||
104 | extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
105 | struct kprobe_ctlblk *kcb); | ||
106 | #else | ||
107 | static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
108 | struct kprobe_ctlblk *kcb) | ||
109 | { | ||
110 | return 0; | ||
111 | } | ||
112 | #endif | ||
102 | #endif | 113 | #endif |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c index 57916c0d3cf6..3f06e6149981 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -58,7 +58,7 @@ | |||
58 | #include <asm/insn.h> | 58 | #include <asm/insn.h> |
59 | #include <asm/debugreg.h> | 59 | #include <asm/debugreg.h> |
60 | 60 | ||
61 | #include "kprobes-common.h" | 61 | #include "common.h" |
62 | 62 | ||
63 | void jprobe_return_end(void); | 63 | void jprobe_return_end(void); |
64 | 64 | ||
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | |||
78 | * Groups, and some special opcodes can not boost. | 78 | * Groups, and some special opcodes can not boost. |
79 | * This is non-const and volatile to keep gcc from statically | 79 | * This is non-const and volatile to keep gcc from statically |
80 | * optimizing it out, as variable_test_bit makes gcc think only | 80 | * optimizing it out, as variable_test_bit makes gcc think only |
81 | * *(unsigned long*) is used. | 81 | * *(unsigned long*) is used. |
82 | */ | 82 | */ |
83 | static volatile u32 twobyte_is_boostable[256 / 32] = { | 83 | static volatile u32 twobyte_is_boostable[256 / 32] = { |
84 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 84 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | |||
117 | struct __arch_relative_insn { | 117 | struct __arch_relative_insn { |
118 | u8 op; | 118 | u8 op; |
119 | s32 raddr; | 119 | s32 raddr; |
120 | } __attribute__((packed)) *insn; | 120 | } __packed *insn; |
121 | 121 | ||
122 | insn = (struct __arch_relative_insn *)from; | 122 | insn = (struct __arch_relative_insn *)from; |
123 | insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); | 123 | insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); |
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb | |||
541 | return 1; | 541 | return 1; |
542 | } | 542 | } |
543 | 543 | ||
544 | #ifdef KPROBES_CAN_USE_FTRACE | ||
545 | static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
546 | struct kprobe_ctlblk *kcb) | ||
547 | { | ||
548 | /* | ||
549 | * Emulate singlestep (and also recover regs->ip) | ||
550 | * as if there is a 5byte nop | ||
551 | */ | ||
552 | regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; | ||
553 | if (unlikely(p->post_handler)) { | ||
554 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
555 | p->post_handler(p, regs, 0); | ||
556 | } | ||
557 | __this_cpu_write(current_kprobe, NULL); | ||
558 | } | ||
559 | #endif | ||
560 | |||
561 | /* | 544 | /* |
562 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | 545 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they |
563 | * remain disabled throughout this function. | 546 | * remain disabled throughout this function. |
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
616 | } else if (kprobe_running()) { | 599 | } else if (kprobe_running()) { |
617 | p = __this_cpu_read(current_kprobe); | 600 | p = __this_cpu_read(current_kprobe); |
618 | if (p->break_handler && p->break_handler(p, regs)) { | 601 | if (p->break_handler && p->break_handler(p, regs)) { |
619 | #ifdef KPROBES_CAN_USE_FTRACE | 602 | if (!skip_singlestep(p, regs, kcb)) |
620 | if (kprobe_ftrace(p)) { | 603 | setup_singlestep(p, regs, kcb, 0); |
621 | skip_singlestep(p, regs, kcb); | ||
622 | return 1; | ||
623 | } | ||
624 | #endif | ||
625 | setup_singlestep(p, regs, kcb, 0); | ||
626 | return 1; | 604 | return 1; |
627 | } | 605 | } |
628 | } /* else: not a kprobe fault; let the kernel handle it */ | 606 | } /* else: not a kprobe fault; let the kernel handle it */ |
@@ -674,7 +652,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
674 | { | 652 | { |
675 | struct kretprobe_instance *ri = NULL; | 653 | struct kretprobe_instance *ri = NULL; |
676 | struct hlist_head *head, empty_rp; | 654 | struct hlist_head *head, empty_rp; |
677 | struct hlist_node *node, *tmp; | 655 | struct hlist_node *tmp; |
678 | unsigned long flags, orig_ret_address = 0; | 656 | unsigned long flags, orig_ret_address = 0; |
679 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 657 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
680 | kprobe_opcode_t *correct_ret_addr = NULL; | 658 | kprobe_opcode_t *correct_ret_addr = NULL; |
@@ -704,7 +682,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
704 | * will be the real return address, and all the rest will | 682 | * will be the real return address, and all the rest will |
705 | * point to kretprobe_trampoline. | 683 | * point to kretprobe_trampoline. |
706 | */ | 684 | */ |
707 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | 685 | hlist_for_each_entry_safe(ri, tmp, head, hlist) { |
708 | if (ri->task != current) | 686 | if (ri->task != current) |
709 | /* another task is sharing our hash bucket */ | 687 | /* another task is sharing our hash bucket */ |
710 | continue; | 688 | continue; |
@@ -723,7 +701,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
723 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | 701 | kretprobe_assert(ri, orig_ret_address, trampoline_address); |
724 | 702 | ||
725 | correct_ret_addr = ri->ret_addr; | 703 | correct_ret_addr = ri->ret_addr; |
726 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | 704 | hlist_for_each_entry_safe(ri, tmp, head, hlist) { |
727 | if (ri->task != current) | 705 | if (ri->task != current) |
728 | /* another task is sharing our hash bucket */ | 706 | /* another task is sharing our hash bucket */ |
729 | continue; | 707 | continue; |
@@ -750,7 +728,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
750 | 728 | ||
751 | kretprobe_hash_unlock(current, &flags); | 729 | kretprobe_hash_unlock(current, &flags); |
752 | 730 | ||
753 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 731 | hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { |
754 | hlist_del(&ri->hlist); | 732 | hlist_del(&ri->hlist); |
755 | kfree(ri); | 733 | kfree(ri); |
756 | } | 734 | } |
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1075 | return 0; | 1053 | return 0; |
1076 | } | 1054 | } |
1077 | 1055 | ||
1078 | #ifdef KPROBES_CAN_USE_FTRACE | ||
1079 | /* Ftrace callback handler for kprobes */ | ||
1080 | void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, | ||
1081 | struct ftrace_ops *ops, struct pt_regs *regs) | ||
1082 | { | ||
1083 | struct kprobe *p; | ||
1084 | struct kprobe_ctlblk *kcb; | ||
1085 | unsigned long flags; | ||
1086 | |||
1087 | /* Disable irq for emulating a breakpoint and avoiding preempt */ | ||
1088 | local_irq_save(flags); | ||
1089 | |||
1090 | p = get_kprobe((kprobe_opcode_t *)ip); | ||
1091 | if (unlikely(!p) || kprobe_disabled(p)) | ||
1092 | goto end; | ||
1093 | |||
1094 | kcb = get_kprobe_ctlblk(); | ||
1095 | if (kprobe_running()) { | ||
1096 | kprobes_inc_nmissed_count(p); | ||
1097 | } else { | ||
1098 | /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ | ||
1099 | regs->ip = ip + sizeof(kprobe_opcode_t); | ||
1100 | |||
1101 | __this_cpu_write(current_kprobe, p); | ||
1102 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1103 | if (!p->pre_handler || !p->pre_handler(p, regs)) | ||
1104 | skip_singlestep(p, regs, kcb); | ||
1105 | /* | ||
1106 | * If pre_handler returns !0, it sets regs->ip and | ||
1107 | * resets current kprobe. | ||
1108 | */ | ||
1109 | } | ||
1110 | end: | ||
1111 | local_irq_restore(flags); | ||
1112 | } | ||
1113 | |||
1114 | int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) | ||
1115 | { | ||
1116 | p->ainsn.insn = NULL; | ||
1117 | p->ainsn.boostable = -1; | ||
1118 | return 0; | ||
1119 | } | ||
1120 | #endif | ||
1121 | |||
1122 | int __init arch_init_kprobes(void) | 1056 | int __init arch_init_kprobes(void) |
1123 | { | 1057 | { |
1124 | return arch_init_optprobes(); | 1058 | return arch_init_optprobes(); |
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c new file mode 100644 index 000000000000..23ef5c556f06 --- /dev/null +++ b/arch/x86/kernel/kprobes/ftrace.c | |||
@@ -0,0 +1,93 @@ | |||
1 | /* | ||
2 | * Dynamic Ftrace based Kprobes Optimization | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) Hitachi Ltd., 2012 | ||
19 | */ | ||
20 | #include <linux/kprobes.h> | ||
21 | #include <linux/ptrace.h> | ||
22 | #include <linux/hardirq.h> | ||
23 | #include <linux/preempt.h> | ||
24 | #include <linux/ftrace.h> | ||
25 | |||
26 | #include "common.h" | ||
27 | |||
28 | static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
29 | struct kprobe_ctlblk *kcb) | ||
30 | { | ||
31 | /* | ||
32 | * Emulate singlestep (and also recover regs->ip) | ||
33 | * as if there is a 5byte nop | ||
34 | */ | ||
35 | regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; | ||
36 | if (unlikely(p->post_handler)) { | ||
37 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
38 | p->post_handler(p, regs, 0); | ||
39 | } | ||
40 | __this_cpu_write(current_kprobe, NULL); | ||
41 | return 1; | ||
42 | } | ||
43 | |||
44 | int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
45 | struct kprobe_ctlblk *kcb) | ||
46 | { | ||
47 | if (kprobe_ftrace(p)) | ||
48 | return __skip_singlestep(p, regs, kcb); | ||
49 | else | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | /* Ftrace callback handler for kprobes */ | ||
54 | void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, | ||
55 | struct ftrace_ops *ops, struct pt_regs *regs) | ||
56 | { | ||
57 | struct kprobe *p; | ||
58 | struct kprobe_ctlblk *kcb; | ||
59 | unsigned long flags; | ||
60 | |||
61 | /* Disable irq for emulating a breakpoint and avoiding preempt */ | ||
62 | local_irq_save(flags); | ||
63 | |||
64 | p = get_kprobe((kprobe_opcode_t *)ip); | ||
65 | if (unlikely(!p) || kprobe_disabled(p)) | ||
66 | goto end; | ||
67 | |||
68 | kcb = get_kprobe_ctlblk(); | ||
69 | if (kprobe_running()) { | ||
70 | kprobes_inc_nmissed_count(p); | ||
71 | } else { | ||
72 | /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ | ||
73 | regs->ip = ip + sizeof(kprobe_opcode_t); | ||
74 | |||
75 | __this_cpu_write(current_kprobe, p); | ||
76 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
77 | if (!p->pre_handler || !p->pre_handler(p, regs)) | ||
78 | __skip_singlestep(p, regs, kcb); | ||
79 | /* | ||
80 | * If pre_handler returns !0, it sets regs->ip and | ||
81 | * resets current kprobe. | ||
82 | */ | ||
83 | } | ||
84 | end: | ||
85 | local_irq_restore(flags); | ||
86 | } | ||
87 | |||
88 | int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) | ||
89 | { | ||
90 | p->ainsn.insn = NULL; | ||
91 | p->ainsn.boostable = -1; | ||
92 | return 0; | ||
93 | } | ||
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c index c5e410eed403..76dc6f095724 100644 --- a/arch/x86/kernel/kprobes-opt.c +++ b/arch/x86/kernel/kprobes/opt.c | |||
@@ -37,7 +37,7 @@ | |||
37 | #include <asm/insn.h> | 37 | #include <asm/insn.h> |
38 | #include <asm/debugreg.h> | 38 | #include <asm/debugreg.h> |
39 | 39 | ||
40 | #include "kprobes-common.h" | 40 | #include "common.h" |
41 | 41 | ||
42 | unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | 42 | unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) |
43 | { | 43 | { |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 08b973f64032..b686a904d7c3 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <asm/apicdef.h> | 43 | #include <asm/apicdef.h> |
44 | #include <asm/hypervisor.h> | 44 | #include <asm/hypervisor.h> |
45 | #include <asm/kvm_guest.h> | 45 | #include <asm/kvm_guest.h> |
46 | #include <asm/context_tracking.h> | ||
46 | 47 | ||
47 | static int kvmapf = 1; | 48 | static int kvmapf = 1; |
48 | 49 | ||
@@ -121,6 +122,8 @@ void kvm_async_pf_task_wait(u32 token) | |||
121 | struct kvm_task_sleep_node n, *e; | 122 | struct kvm_task_sleep_node n, *e; |
122 | DEFINE_WAIT(wait); | 123 | DEFINE_WAIT(wait); |
123 | 124 | ||
125 | rcu_irq_enter(); | ||
126 | |||
124 | spin_lock(&b->lock); | 127 | spin_lock(&b->lock); |
125 | e = _find_apf_task(b, token); | 128 | e = _find_apf_task(b, token); |
126 | if (e) { | 129 | if (e) { |
@@ -128,6 +131,8 @@ void kvm_async_pf_task_wait(u32 token) | |||
128 | hlist_del(&e->link); | 131 | hlist_del(&e->link); |
129 | kfree(e); | 132 | kfree(e); |
130 | spin_unlock(&b->lock); | 133 | spin_unlock(&b->lock); |
134 | |||
135 | rcu_irq_exit(); | ||
131 | return; | 136 | return; |
132 | } | 137 | } |
133 | 138 | ||
@@ -152,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token) | |||
152 | /* | 157 | /* |
153 | * We cannot reschedule. So halt. | 158 | * We cannot reschedule. So halt. |
154 | */ | 159 | */ |
160 | rcu_irq_exit(); | ||
155 | native_safe_halt(); | 161 | native_safe_halt(); |
162 | rcu_irq_enter(); | ||
156 | local_irq_disable(); | 163 | local_irq_disable(); |
157 | } | 164 | } |
158 | } | 165 | } |
159 | if (!n.halted) | 166 | if (!n.halted) |
160 | finish_wait(&n.wq, &wait); | 167 | finish_wait(&n.wq, &wait); |
161 | 168 | ||
169 | rcu_irq_exit(); | ||
162 | return; | 170 | return; |
163 | } | 171 | } |
164 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); | 172 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); |
@@ -252,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
252 | break; | 260 | break; |
253 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | 261 | case KVM_PV_REASON_PAGE_NOT_PRESENT: |
254 | /* page is swapped out by the host. */ | 262 | /* page is swapped out by the host. */ |
255 | rcu_irq_enter(); | 263 | exception_enter(regs); |
256 | exit_idle(); | 264 | exit_idle(); |
257 | kvm_async_pf_task_wait((u32)read_cr2()); | 265 | kvm_async_pf_task_wait((u32)read_cr2()); |
258 | rcu_irq_exit(); | 266 | exception_exit(regs); |
259 | break; | 267 | break; |
260 | case KVM_PV_REASON_PAGE_READY: | 268 | case KVM_PV_REASON_PAGE_READY: |
261 | rcu_irq_enter(); | 269 | rcu_irq_enter(); |
@@ -289,9 +297,9 @@ static void kvm_register_steal_time(void) | |||
289 | 297 | ||
290 | memset(st, 0, sizeof(*st)); | 298 | memset(st, 0, sizeof(*st)); |
291 | 299 | ||
292 | wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); | 300 | wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); |
293 | printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", | 301 | pr_info("kvm-stealtime: cpu %d, msr %llx\n", |
294 | cpu, __pa(st)); | 302 | cpu, (unsigned long long) slow_virt_to_phys(st)); |
295 | } | 303 | } |
296 | 304 | ||
297 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; | 305 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; |
@@ -316,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
316 | return; | 324 | return; |
317 | 325 | ||
318 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { | 326 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { |
319 | u64 pa = __pa(&__get_cpu_var(apf_reason)); | 327 | u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); |
320 | 328 | ||
321 | #ifdef CONFIG_PREEMPT | 329 | #ifdef CONFIG_PREEMPT |
322 | pa |= KVM_ASYNC_PF_SEND_ALWAYS; | 330 | pa |= KVM_ASYNC_PF_SEND_ALWAYS; |
@@ -332,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
332 | /* Size alignment is implied but just to make it explicit. */ | 340 | /* Size alignment is implied but just to make it explicit. */ |
333 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); | 341 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); |
334 | __get_cpu_var(kvm_apic_eoi) = 0; | 342 | __get_cpu_var(kvm_apic_eoi) = 0; |
335 | pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; | 343 | pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) |
344 | | KVM_MSR_ENABLED; | ||
336 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); | 345 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); |
337 | } | 346 | } |
338 | 347 | ||
@@ -497,6 +506,7 @@ static bool __init kvm_detect(void) | |||
497 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { | 506 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { |
498 | .name = "KVM", | 507 | .name = "KVM", |
499 | .detect = kvm_detect, | 508 | .detect = kvm_detect, |
509 | .x2apic_available = kvm_para_available, | ||
500 | }; | 510 | }; |
501 | EXPORT_SYMBOL_GPL(x86_hyper_kvm); | 511 | EXPORT_SYMBOL_GPL(x86_hyper_kvm); |
502 | 512 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index b730efad6fe9..d2c381280e3c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -166,8 +166,8 @@ int kvm_register_clock(char *txt) | |||
166 | return 0; | 166 | return 0; |
167 | 167 | ||
168 | src = &hv_clock[cpu].pvti; | 168 | src = &hv_clock[cpu].pvti; |
169 | low = (int)__pa(src) | 1; | 169 | low = (int)slow_virt_to_phys(src) | 1; |
170 | high = ((u64)__pa(src) >> 32); | 170 | high = ((u64)slow_virt_to_phys(src) >> 32); |
171 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); | 171 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); |
172 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 172 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
173 | cpu, high, low, txt); | 173 | cpu, high, low, txt); |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index b3ea9db39db6..4eabc160696f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -16,125 +16,12 @@ | |||
16 | #include <linux/io.h> | 16 | #include <linux/io.h> |
17 | #include <linux/suspend.h> | 17 | #include <linux/suspend.h> |
18 | 18 | ||
19 | #include <asm/init.h> | ||
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
20 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
21 | #include <asm/mmu_context.h> | 22 | #include <asm/mmu_context.h> |
22 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
23 | 24 | ||
24 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | ||
25 | unsigned long addr) | ||
26 | { | ||
27 | pud_t *pud; | ||
28 | pmd_t *pmd; | ||
29 | struct page *page; | ||
30 | int result = -ENOMEM; | ||
31 | |||
32 | addr &= PMD_MASK; | ||
33 | pgd += pgd_index(addr); | ||
34 | if (!pgd_present(*pgd)) { | ||
35 | page = kimage_alloc_control_pages(image, 0); | ||
36 | if (!page) | ||
37 | goto out; | ||
38 | pud = (pud_t *)page_address(page); | ||
39 | clear_page(pud); | ||
40 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
41 | } | ||
42 | pud = pud_offset(pgd, addr); | ||
43 | if (!pud_present(*pud)) { | ||
44 | page = kimage_alloc_control_pages(image, 0); | ||
45 | if (!page) | ||
46 | goto out; | ||
47 | pmd = (pmd_t *)page_address(page); | ||
48 | clear_page(pmd); | ||
49 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
50 | } | ||
51 | pmd = pmd_offset(pud, addr); | ||
52 | if (!pmd_present(*pmd)) | ||
53 | set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
54 | result = 0; | ||
55 | out: | ||
56 | return result; | ||
57 | } | ||
58 | |||
59 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | ||
60 | { | ||
61 | unsigned long end_addr; | ||
62 | |||
63 | addr &= PAGE_MASK; | ||
64 | end_addr = addr + PUD_SIZE; | ||
65 | while (addr < end_addr) { | ||
66 | set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
67 | addr += PMD_SIZE; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | static int init_level3_page(struct kimage *image, pud_t *level3p, | ||
72 | unsigned long addr, unsigned long last_addr) | ||
73 | { | ||
74 | unsigned long end_addr; | ||
75 | int result; | ||
76 | |||
77 | result = 0; | ||
78 | addr &= PAGE_MASK; | ||
79 | end_addr = addr + PGDIR_SIZE; | ||
80 | while ((addr < last_addr) && (addr < end_addr)) { | ||
81 | struct page *page; | ||
82 | pmd_t *level2p; | ||
83 | |||
84 | page = kimage_alloc_control_pages(image, 0); | ||
85 | if (!page) { | ||
86 | result = -ENOMEM; | ||
87 | goto out; | ||
88 | } | ||
89 | level2p = (pmd_t *)page_address(page); | ||
90 | init_level2_page(level2p, addr); | ||
91 | set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); | ||
92 | addr += PUD_SIZE; | ||
93 | } | ||
94 | /* clear the unused entries */ | ||
95 | while (addr < end_addr) { | ||
96 | pud_clear(level3p++); | ||
97 | addr += PUD_SIZE; | ||
98 | } | ||
99 | out: | ||
100 | return result; | ||
101 | } | ||
102 | |||
103 | |||
104 | static int init_level4_page(struct kimage *image, pgd_t *level4p, | ||
105 | unsigned long addr, unsigned long last_addr) | ||
106 | { | ||
107 | unsigned long end_addr; | ||
108 | int result; | ||
109 | |||
110 | result = 0; | ||
111 | addr &= PAGE_MASK; | ||
112 | end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); | ||
113 | while ((addr < last_addr) && (addr < end_addr)) { | ||
114 | struct page *page; | ||
115 | pud_t *level3p; | ||
116 | |||
117 | page = kimage_alloc_control_pages(image, 0); | ||
118 | if (!page) { | ||
119 | result = -ENOMEM; | ||
120 | goto out; | ||
121 | } | ||
122 | level3p = (pud_t *)page_address(page); | ||
123 | result = init_level3_page(image, level3p, addr, last_addr); | ||
124 | if (result) | ||
125 | goto out; | ||
126 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); | ||
127 | addr += PGDIR_SIZE; | ||
128 | } | ||
129 | /* clear the unused entries */ | ||
130 | while (addr < end_addr) { | ||
131 | pgd_clear(level4p++); | ||
132 | addr += PGDIR_SIZE; | ||
133 | } | ||
134 | out: | ||
135 | return result; | ||
136 | } | ||
137 | |||
138 | static void free_transition_pgtable(struct kimage *image) | 25 | static void free_transition_pgtable(struct kimage *image) |
139 | { | 26 | { |
140 | free_page((unsigned long)image->arch.pud); | 27 | free_page((unsigned long)image->arch.pud); |
@@ -184,22 +71,62 @@ err: | |||
184 | return result; | 71 | return result; |
185 | } | 72 | } |
186 | 73 | ||
74 | static void *alloc_pgt_page(void *data) | ||
75 | { | ||
76 | struct kimage *image = (struct kimage *)data; | ||
77 | struct page *page; | ||
78 | void *p = NULL; | ||
79 | |||
80 | page = kimage_alloc_control_pages(image, 0); | ||
81 | if (page) { | ||
82 | p = page_address(page); | ||
83 | clear_page(p); | ||
84 | } | ||
85 | |||
86 | return p; | ||
87 | } | ||
187 | 88 | ||
188 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | 89 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) |
189 | { | 90 | { |
91 | struct x86_mapping_info info = { | ||
92 | .alloc_pgt_page = alloc_pgt_page, | ||
93 | .context = image, | ||
94 | .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, | ||
95 | }; | ||
96 | unsigned long mstart, mend; | ||
190 | pgd_t *level4p; | 97 | pgd_t *level4p; |
191 | int result; | 98 | int result; |
99 | int i; | ||
100 | |||
192 | level4p = (pgd_t *)__va(start_pgtable); | 101 | level4p = (pgd_t *)__va(start_pgtable); |
193 | result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); | 102 | clear_page(level4p); |
194 | if (result) | 103 | for (i = 0; i < nr_pfn_mapped; i++) { |
195 | return result; | 104 | mstart = pfn_mapped[i].start << PAGE_SHIFT; |
105 | mend = pfn_mapped[i].end << PAGE_SHIFT; | ||
106 | |||
107 | result = kernel_ident_mapping_init(&info, | ||
108 | level4p, mstart, mend); | ||
109 | if (result) | ||
110 | return result; | ||
111 | } | ||
112 | |||
196 | /* | 113 | /* |
197 | * image->start may be outside 0 ~ max_pfn, for example when | 114 | * segments's mem ranges could be outside 0 ~ max_pfn, |
198 | * jump back to original kernel from kexeced kernel | 115 | * for example when jump back to original kernel from kexeced kernel. |
116 | * or first kernel is booted with user mem map, and second kernel | ||
117 | * could be loaded out of that range. | ||
199 | */ | 118 | */ |
200 | result = init_one_level2_page(image, level4p, image->start); | 119 | for (i = 0; i < image->nr_segments; i++) { |
201 | if (result) | 120 | mstart = image->segment[i].mem; |
202 | return result; | 121 | mend = mstart + image->segment[i].memsz; |
122 | |||
123 | result = kernel_ident_mapping_init(&info, | ||
124 | level4p, mstart, mend); | ||
125 | |||
126 | if (result) | ||
127 | return result; | ||
128 | } | ||
129 | |||
203 | return init_transition_pgtable(image, level4p); | 130 | return init_transition_pgtable(image, level4p); |
204 | } | 131 | } |
205 | 132 | ||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 3a04b224d0c0..22db92bbdf1a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = { | |||
364 | 364 | ||
365 | static void microcode_fini_cpu(int cpu) | 365 | static void microcode_fini_cpu(int cpu) |
366 | { | 366 | { |
367 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
368 | |||
369 | microcode_ops->microcode_fini_cpu(cpu); | 367 | microcode_ops->microcode_fini_cpu(cpu); |
370 | uci->valid = 0; | ||
371 | } | 368 | } |
372 | 369 | ||
373 | static enum ucode_state microcode_resume_cpu(int cpu) | 370 | static enum ucode_state microcode_resume_cpu(int cpu) |
@@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu) | |||
383 | static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) | 380 | static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) |
384 | { | 381 | { |
385 | enum ucode_state ustate; | 382 | enum ucode_state ustate; |
383 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
384 | |||
385 | if (uci && uci->valid) | ||
386 | return UCODE_OK; | ||
386 | 387 | ||
387 | if (collect_cpu_info(cpu)) | 388 | if (collect_cpu_info(cpu)) |
388 | return UCODE_ERROR; | 389 | return UCODE_ERROR; |
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c new file mode 100644 index 000000000000..577db8417d15 --- /dev/null +++ b/arch/x86/kernel/microcode_core_early.c | |||
@@ -0,0 +1,76 @@ | |||
1 | /* | ||
2 | * X86 CPU microcode early update for Linux | ||
3 | * | ||
4 | * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> | ||
5 | * H Peter Anvin" <hpa@zytor.com> | ||
6 | * | ||
7 | * This driver allows to early upgrade microcode on Intel processors | ||
8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
9 | * Pentium III, Xeon, Pentium 4, etc. | ||
10 | * | ||
11 | * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture | ||
12 | * Software Developer's Manual. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version | ||
17 | * 2 of the License, or (at your option) any later version. | ||
18 | */ | ||
19 | #include <linux/module.h> | ||
20 | #include <asm/microcode_intel.h> | ||
21 | #include <asm/processor.h> | ||
22 | |||
23 | #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) | ||
24 | #define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') | ||
25 | #define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') | ||
26 | #define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') | ||
27 | #define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') | ||
28 | #define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') | ||
29 | #define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') | ||
30 | |||
31 | #define CPUID_IS(a, b, c, ebx, ecx, edx) \ | ||
32 | (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) | ||
33 | |||
34 | /* | ||
35 | * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. | ||
36 | * x86_vendor() gets vendor id for BSP. | ||
37 | * | ||
38 | * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify | ||
39 | * coding, we still use x86_vendor() to get vendor id for AP. | ||
40 | * | ||
41 | * x86_vendor() gets vendor information directly through cpuid. | ||
42 | */ | ||
43 | static int __cpuinit x86_vendor(void) | ||
44 | { | ||
45 | u32 eax = 0x00000000; | ||
46 | u32 ebx, ecx = 0, edx; | ||
47 | |||
48 | if (!have_cpuid_p()) | ||
49 | return X86_VENDOR_UNKNOWN; | ||
50 | |||
51 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
52 | |||
53 | if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) | ||
54 | return X86_VENDOR_INTEL; | ||
55 | |||
56 | if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) | ||
57 | return X86_VENDOR_AMD; | ||
58 | |||
59 | return X86_VENDOR_UNKNOWN; | ||
60 | } | ||
61 | |||
62 | void __init load_ucode_bsp(void) | ||
63 | { | ||
64 | int vendor = x86_vendor(); | ||
65 | |||
66 | if (vendor == X86_VENDOR_INTEL) | ||
67 | load_ucode_intel_bsp(); | ||
68 | } | ||
69 | |||
70 | void __cpuinit load_ucode_ap(void) | ||
71 | { | ||
72 | int vendor = x86_vendor(); | ||
73 | |||
74 | if (vendor == X86_VENDOR_INTEL) | ||
75 | load_ucode_intel_ap(); | ||
76 | } | ||
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3544aed39338..5fb2cebf556b 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -79,7 +79,7 @@ | |||
79 | #include <linux/module.h> | 79 | #include <linux/module.h> |
80 | #include <linux/vmalloc.h> | 80 | #include <linux/vmalloc.h> |
81 | 81 | ||
82 | #include <asm/microcode.h> | 82 | #include <asm/microcode_intel.h> |
83 | #include <asm/processor.h> | 83 | #include <asm/processor.h> |
84 | #include <asm/msr.h> | 84 | #include <asm/msr.h> |
85 | 85 | ||
@@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver"); | |||
87 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | 87 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
88 | MODULE_LICENSE("GPL"); | 88 | MODULE_LICENSE("GPL"); |
89 | 89 | ||
90 | struct microcode_header_intel { | ||
91 | unsigned int hdrver; | ||
92 | unsigned int rev; | ||
93 | unsigned int date; | ||
94 | unsigned int sig; | ||
95 | unsigned int cksum; | ||
96 | unsigned int ldrver; | ||
97 | unsigned int pf; | ||
98 | unsigned int datasize; | ||
99 | unsigned int totalsize; | ||
100 | unsigned int reserved[3]; | ||
101 | }; | ||
102 | |||
103 | struct microcode_intel { | ||
104 | struct microcode_header_intel hdr; | ||
105 | unsigned int bits[0]; | ||
106 | }; | ||
107 | |||
108 | /* microcode format is extended from prescott processors */ | ||
109 | struct extended_signature { | ||
110 | unsigned int sig; | ||
111 | unsigned int pf; | ||
112 | unsigned int cksum; | ||
113 | }; | ||
114 | |||
115 | struct extended_sigtable { | ||
116 | unsigned int count; | ||
117 | unsigned int cksum; | ||
118 | unsigned int reserved[3]; | ||
119 | struct extended_signature sigs[0]; | ||
120 | }; | ||
121 | |||
122 | #define DEFAULT_UCODE_DATASIZE (2000) | ||
123 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) | ||
124 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
125 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) | ||
126 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) | ||
127 | #define DWSIZE (sizeof(u32)) | ||
128 | |||
129 | #define get_totalsize(mc) \ | ||
130 | (((struct microcode_intel *)mc)->hdr.totalsize ? \ | ||
131 | ((struct microcode_intel *)mc)->hdr.totalsize : \ | ||
132 | DEFAULT_UCODE_TOTALSIZE) | ||
133 | |||
134 | #define get_datasize(mc) \ | ||
135 | (((struct microcode_intel *)mc)->hdr.datasize ? \ | ||
136 | ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) | ||
137 | |||
138 | #define sigmatch(s1, s2, p1, p2) \ | ||
139 | (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) | ||
140 | |||
141 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | ||
142 | |||
143 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | 90 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) |
144 | { | 91 | { |
145 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | 92 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); |
@@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
162 | return 0; | 109 | return 0; |
163 | } | 110 | } |
164 | 111 | ||
165 | static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) | ||
166 | { | ||
167 | return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; | ||
168 | } | ||
169 | |||
170 | static inline int | ||
171 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | ||
172 | { | ||
173 | return (mc_header->rev <= rev) ? 0 : 1; | ||
174 | } | ||
175 | |||
176 | static int microcode_sanity_check(void *mc) | ||
177 | { | ||
178 | unsigned long total_size, data_size, ext_table_size; | ||
179 | struct microcode_header_intel *mc_header = mc; | ||
180 | struct extended_sigtable *ext_header = NULL; | ||
181 | int sum, orig_sum, ext_sigcount = 0, i; | ||
182 | struct extended_signature *ext_sig; | ||
183 | |||
184 | total_size = get_totalsize(mc_header); | ||
185 | data_size = get_datasize(mc_header); | ||
186 | |||
187 | if (data_size + MC_HEADER_SIZE > total_size) { | ||
188 | pr_err("error! Bad data size in microcode data file\n"); | ||
189 | return -EINVAL; | ||
190 | } | ||
191 | |||
192 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | ||
193 | pr_err("error! Unknown microcode update format\n"); | ||
194 | return -EINVAL; | ||
195 | } | ||
196 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | ||
197 | if (ext_table_size) { | ||
198 | if ((ext_table_size < EXT_HEADER_SIZE) | ||
199 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | ||
200 | pr_err("error! Small exttable size in microcode data file\n"); | ||
201 | return -EINVAL; | ||
202 | } | ||
203 | ext_header = mc + MC_HEADER_SIZE + data_size; | ||
204 | if (ext_table_size != exttable_size(ext_header)) { | ||
205 | pr_err("error! Bad exttable size in microcode data file\n"); | ||
206 | return -EFAULT; | ||
207 | } | ||
208 | ext_sigcount = ext_header->count; | ||
209 | } | ||
210 | |||
211 | /* check extended table checksum */ | ||
212 | if (ext_table_size) { | ||
213 | int ext_table_sum = 0; | ||
214 | int *ext_tablep = (int *)ext_header; | ||
215 | |||
216 | i = ext_table_size / DWSIZE; | ||
217 | while (i--) | ||
218 | ext_table_sum += ext_tablep[i]; | ||
219 | if (ext_table_sum) { | ||
220 | pr_warning("aborting, bad extended signature table checksum\n"); | ||
221 | return -EINVAL; | ||
222 | } | ||
223 | } | ||
224 | |||
225 | /* calculate the checksum */ | ||
226 | orig_sum = 0; | ||
227 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | ||
228 | while (i--) | ||
229 | orig_sum += ((int *)mc)[i]; | ||
230 | if (orig_sum) { | ||
231 | pr_err("aborting, bad checksum\n"); | ||
232 | return -EINVAL; | ||
233 | } | ||
234 | if (!ext_table_size) | ||
235 | return 0; | ||
236 | /* check extended signature checksum */ | ||
237 | for (i = 0; i < ext_sigcount; i++) { | ||
238 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + | ||
239 | EXT_SIGNATURE_SIZE * i; | ||
240 | sum = orig_sum | ||
241 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | ||
242 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | ||
243 | if (sum) { | ||
244 | pr_err("aborting, bad checksum\n"); | ||
245 | return -EINVAL; | ||
246 | } | ||
247 | } | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* | 112 | /* |
252 | * return 0 - no update found | 113 | * return 0 - no update found |
253 | * return 1 - found update | 114 | * return 1 - found update |
254 | */ | 115 | */ |
255 | static int | 116 | static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) |
256 | get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) | ||
257 | { | 117 | { |
258 | struct microcode_header_intel *mc_header = mc; | 118 | struct cpu_signature cpu_sig; |
259 | struct extended_sigtable *ext_header; | 119 | unsigned int csig, cpf, crev; |
260 | unsigned long total_size = get_totalsize(mc_header); | ||
261 | int ext_sigcount, i; | ||
262 | struct extended_signature *ext_sig; | ||
263 | |||
264 | if (!update_match_revision(mc_header, rev)) | ||
265 | return 0; | ||
266 | |||
267 | if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) | ||
268 | return 1; | ||
269 | 120 | ||
270 | /* Look for ext. headers: */ | 121 | collect_cpu_info(cpu, &cpu_sig); |
271 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) | ||
272 | return 0; | ||
273 | 122 | ||
274 | ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; | 123 | csig = cpu_sig.sig; |
275 | ext_sigcount = ext_header->count; | 124 | cpf = cpu_sig.pf; |
276 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | 125 | crev = cpu_sig.rev; |
277 | 126 | ||
278 | for (i = 0; i < ext_sigcount; i++) { | 127 | return get_matching_microcode(csig, cpf, mc_intel, crev); |
279 | if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) | ||
280 | return 1; | ||
281 | ext_sig++; | ||
282 | } | ||
283 | return 0; | ||
284 | } | 128 | } |
285 | 129 | ||
286 | static int apply_microcode(int cpu) | 130 | int apply_microcode(int cpu) |
287 | { | 131 | { |
288 | struct microcode_intel *mc_intel; | 132 | struct microcode_intel *mc_intel; |
289 | struct ucode_cpu_info *uci; | 133 | struct ucode_cpu_info *uci; |
@@ -300,6 +144,14 @@ static int apply_microcode(int cpu) | |||
300 | if (mc_intel == NULL) | 144 | if (mc_intel == NULL) |
301 | return 0; | 145 | return 0; |
302 | 146 | ||
147 | /* | ||
148 | * Microcode on this CPU could be updated earlier. Only apply the | ||
149 | * microcode patch in mc_intel when it is newer than the one on this | ||
150 | * CPU. | ||
151 | */ | ||
152 | if (get_matching_mc(mc_intel, cpu) == 0) | ||
153 | return 0; | ||
154 | |||
303 | /* write microcode via MSR 0x79 */ | 155 | /* write microcode via MSR 0x79 */ |
304 | wrmsr(MSR_IA32_UCODE_WRITE, | 156 | wrmsr(MSR_IA32_UCODE_WRITE, |
305 | (unsigned long) mc_intel->bits, | 157 | (unsigned long) mc_intel->bits, |
@@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
338 | unsigned int leftover = size; | 190 | unsigned int leftover = size; |
339 | enum ucode_state state = UCODE_OK; | 191 | enum ucode_state state = UCODE_OK; |
340 | unsigned int curr_mc_size = 0; | 192 | unsigned int curr_mc_size = 0; |
193 | unsigned int csig, cpf; | ||
341 | 194 | ||
342 | while (leftover) { | 195 | while (leftover) { |
343 | struct microcode_header_intel mc_header; | 196 | struct microcode_header_intel mc_header; |
@@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
362 | } | 215 | } |
363 | 216 | ||
364 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | 217 | if (get_ucode_data(mc, ucode_ptr, mc_size) || |
365 | microcode_sanity_check(mc) < 0) { | 218 | microcode_sanity_check(mc, 1) < 0) { |
366 | break; | 219 | break; |
367 | } | 220 | } |
368 | 221 | ||
369 | if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { | 222 | csig = uci->cpu_sig.sig; |
223 | cpf = uci->cpu_sig.pf; | ||
224 | if (get_matching_microcode(csig, cpf, mc, new_rev)) { | ||
370 | vfree(new_mc); | 225 | vfree(new_mc); |
371 | new_rev = mc_header.rev; | 226 | new_rev = mc_header.rev; |
372 | new_mc = mc; | 227 | new_mc = mc; |
@@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
393 | vfree(uci->mc); | 248 | vfree(uci->mc); |
394 | uci->mc = (struct microcode_intel *)new_mc; | 249 | uci->mc = (struct microcode_intel *)new_mc; |
395 | 250 | ||
251 | /* | ||
252 | * If early loading microcode is supported, save this mc into | ||
253 | * permanent memory. So it will be loaded early when a CPU is hot added | ||
254 | * or resumes. | ||
255 | */ | ||
256 | save_mc_for_early(new_mc); | ||
257 | |||
396 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", | 258 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", |
397 | cpu, new_rev, uci->cpu_sig.rev); | 259 | cpu, new_rev, uci->cpu_sig.rev); |
398 | out: | 260 | out: |
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c new file mode 100644 index 000000000000..7890bc838952 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_early.c | |||
@@ -0,0 +1,796 @@ | |||
1 | /* | ||
2 | * Intel CPU microcode early update for Linux | ||
3 | * | ||
4 | * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> | ||
5 | * H Peter Anvin" <hpa@zytor.com> | ||
6 | * | ||
7 | * This allows to early upgrade microcode on Intel processors | ||
8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
9 | * Pentium III, Xeon, Pentium 4, etc. | ||
10 | * | ||
11 | * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture | ||
12 | * Software Developer's Manual. | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version | ||
17 | * 2 of the License, or (at your option) any later version. | ||
18 | */ | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/earlycpio.h> | ||
23 | #include <linux/initrd.h> | ||
24 | #include <linux/cpu.h> | ||
25 | #include <asm/msr.h> | ||
26 | #include <asm/microcode_intel.h> | ||
27 | #include <asm/processor.h> | ||
28 | #include <asm/tlbflush.h> | ||
29 | #include <asm/setup.h> | ||
30 | |||
31 | unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; | ||
32 | struct mc_saved_data { | ||
33 | unsigned int mc_saved_count; | ||
34 | struct microcode_intel **mc_saved; | ||
35 | } mc_saved_data; | ||
36 | |||
37 | static enum ucode_state __cpuinit | ||
38 | generic_load_microcode_early(struct microcode_intel **mc_saved_p, | ||
39 | unsigned int mc_saved_count, | ||
40 | struct ucode_cpu_info *uci) | ||
41 | { | ||
42 | struct microcode_intel *ucode_ptr, *new_mc = NULL; | ||
43 | int new_rev = uci->cpu_sig.rev; | ||
44 | enum ucode_state state = UCODE_OK; | ||
45 | unsigned int mc_size; | ||
46 | struct microcode_header_intel *mc_header; | ||
47 | unsigned int csig = uci->cpu_sig.sig; | ||
48 | unsigned int cpf = uci->cpu_sig.pf; | ||
49 | int i; | ||
50 | |||
51 | for (i = 0; i < mc_saved_count; i++) { | ||
52 | ucode_ptr = mc_saved_p[i]; | ||
53 | |||
54 | mc_header = (struct microcode_header_intel *)ucode_ptr; | ||
55 | mc_size = get_totalsize(mc_header); | ||
56 | if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) { | ||
57 | new_rev = mc_header->rev; | ||
58 | new_mc = ucode_ptr; | ||
59 | } | ||
60 | } | ||
61 | |||
62 | if (!new_mc) { | ||
63 | state = UCODE_NFOUND; | ||
64 | goto out; | ||
65 | } | ||
66 | |||
67 | uci->mc = (struct microcode_intel *)new_mc; | ||
68 | out: | ||
69 | return state; | ||
70 | } | ||
71 | |||
72 | static void __cpuinit | ||
73 | microcode_pointer(struct microcode_intel **mc_saved, | ||
74 | unsigned long *mc_saved_in_initrd, | ||
75 | unsigned long initrd_start, int mc_saved_count) | ||
76 | { | ||
77 | int i; | ||
78 | |||
79 | for (i = 0; i < mc_saved_count; i++) | ||
80 | mc_saved[i] = (struct microcode_intel *) | ||
81 | (mc_saved_in_initrd[i] + initrd_start); | ||
82 | } | ||
83 | |||
84 | #ifdef CONFIG_X86_32 | ||
85 | static void __cpuinit | ||
86 | microcode_phys(struct microcode_intel **mc_saved_tmp, | ||
87 | struct mc_saved_data *mc_saved_data) | ||
88 | { | ||
89 | int i; | ||
90 | struct microcode_intel ***mc_saved; | ||
91 | |||
92 | mc_saved = (struct microcode_intel ***) | ||
93 | __pa_symbol(&mc_saved_data->mc_saved); | ||
94 | for (i = 0; i < mc_saved_data->mc_saved_count; i++) { | ||
95 | struct microcode_intel *p; | ||
96 | |||
97 | p = *(struct microcode_intel **) | ||
98 | __pa(mc_saved_data->mc_saved + i); | ||
99 | mc_saved_tmp[i] = (struct microcode_intel *)__pa(p); | ||
100 | } | ||
101 | } | ||
102 | #endif | ||
103 | |||
104 | static enum ucode_state __cpuinit | ||
105 | load_microcode(struct mc_saved_data *mc_saved_data, | ||
106 | unsigned long *mc_saved_in_initrd, | ||
107 | unsigned long initrd_start, | ||
108 | struct ucode_cpu_info *uci) | ||
109 | { | ||
110 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | ||
111 | unsigned int count = mc_saved_data->mc_saved_count; | ||
112 | |||
113 | if (!mc_saved_data->mc_saved) { | ||
114 | microcode_pointer(mc_saved_tmp, mc_saved_in_initrd, | ||
115 | initrd_start, count); | ||
116 | |||
117 | return generic_load_microcode_early(mc_saved_tmp, count, uci); | ||
118 | } else { | ||
119 | #ifdef CONFIG_X86_32 | ||
120 | microcode_phys(mc_saved_tmp, mc_saved_data); | ||
121 | return generic_load_microcode_early(mc_saved_tmp, count, uci); | ||
122 | #else | ||
123 | return generic_load_microcode_early(mc_saved_data->mc_saved, | ||
124 | count, uci); | ||
125 | #endif | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static u8 get_x86_family(unsigned long sig) | ||
130 | { | ||
131 | u8 x86; | ||
132 | |||
133 | x86 = (sig >> 8) & 0xf; | ||
134 | |||
135 | if (x86 == 0xf) | ||
136 | x86 += (sig >> 20) & 0xff; | ||
137 | |||
138 | return x86; | ||
139 | } | ||
140 | |||
141 | static u8 get_x86_model(unsigned long sig) | ||
142 | { | ||
143 | u8 x86, x86_model; | ||
144 | |||
145 | x86 = get_x86_family(sig); | ||
146 | x86_model = (sig >> 4) & 0xf; | ||
147 | |||
148 | if (x86 == 0x6 || x86 == 0xf) | ||
149 | x86_model += ((sig >> 16) & 0xf) << 4; | ||
150 | |||
151 | return x86_model; | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Given CPU signature and a microcode patch, this function finds if the | ||
156 | * microcode patch has matching family and model with the CPU. | ||
157 | */ | ||
158 | static enum ucode_state | ||
159 | matching_model_microcode(struct microcode_header_intel *mc_header, | ||
160 | unsigned long sig) | ||
161 | { | ||
162 | u8 x86, x86_model; | ||
163 | u8 x86_ucode, x86_model_ucode; | ||
164 | struct extended_sigtable *ext_header; | ||
165 | unsigned long total_size = get_totalsize(mc_header); | ||
166 | unsigned long data_size = get_datasize(mc_header); | ||
167 | int ext_sigcount, i; | ||
168 | struct extended_signature *ext_sig; | ||
169 | |||
170 | x86 = get_x86_family(sig); | ||
171 | x86_model = get_x86_model(sig); | ||
172 | |||
173 | x86_ucode = get_x86_family(mc_header->sig); | ||
174 | x86_model_ucode = get_x86_model(mc_header->sig); | ||
175 | |||
176 | if (x86 == x86_ucode && x86_model == x86_model_ucode) | ||
177 | return UCODE_OK; | ||
178 | |||
179 | /* Look for ext. headers: */ | ||
180 | if (total_size <= data_size + MC_HEADER_SIZE) | ||
181 | return UCODE_NFOUND; | ||
182 | |||
183 | ext_header = (struct extended_sigtable *) | ||
184 | mc_header + data_size + MC_HEADER_SIZE; | ||
185 | ext_sigcount = ext_header->count; | ||
186 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
187 | |||
188 | for (i = 0; i < ext_sigcount; i++) { | ||
189 | x86_ucode = get_x86_family(ext_sig->sig); | ||
190 | x86_model_ucode = get_x86_model(ext_sig->sig); | ||
191 | |||
192 | if (x86 == x86_ucode && x86_model == x86_model_ucode) | ||
193 | return UCODE_OK; | ||
194 | |||
195 | ext_sig++; | ||
196 | } | ||
197 | |||
198 | return UCODE_NFOUND; | ||
199 | } | ||
200 | |||
201 | static int | ||
202 | save_microcode(struct mc_saved_data *mc_saved_data, | ||
203 | struct microcode_intel **mc_saved_src, | ||
204 | unsigned int mc_saved_count) | ||
205 | { | ||
206 | int i, j; | ||
207 | struct microcode_intel **mc_saved_p; | ||
208 | int ret; | ||
209 | |||
210 | if (!mc_saved_count) | ||
211 | return -EINVAL; | ||
212 | |||
213 | /* | ||
214 | * Copy new microcode data. | ||
215 | */ | ||
216 | mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *), | ||
217 | GFP_KERNEL); | ||
218 | if (!mc_saved_p) | ||
219 | return -ENOMEM; | ||
220 | |||
221 | for (i = 0; i < mc_saved_count; i++) { | ||
222 | struct microcode_intel *mc = mc_saved_src[i]; | ||
223 | struct microcode_header_intel *mc_header = &mc->hdr; | ||
224 | unsigned long mc_size = get_totalsize(mc_header); | ||
225 | mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL); | ||
226 | if (!mc_saved_p[i]) { | ||
227 | ret = -ENOMEM; | ||
228 | goto err; | ||
229 | } | ||
230 | if (!mc_saved_src[i]) { | ||
231 | ret = -EINVAL; | ||
232 | goto err; | ||
233 | } | ||
234 | memcpy(mc_saved_p[i], mc, mc_size); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Point to newly saved microcode. | ||
239 | */ | ||
240 | mc_saved_data->mc_saved = mc_saved_p; | ||
241 | mc_saved_data->mc_saved_count = mc_saved_count; | ||
242 | |||
243 | return 0; | ||
244 | |||
245 | err: | ||
246 | for (j = 0; j <= i; j++) | ||
247 | kfree(mc_saved_p[j]); | ||
248 | kfree(mc_saved_p); | ||
249 | |||
250 | return ret; | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * A microcode patch in ucode_ptr is saved into mc_saved | ||
255 | * - if it has matching signature and newer revision compared to an existing | ||
256 | * patch mc_saved. | ||
257 | * - or if it is a newly discovered microcode patch. | ||
258 | * | ||
259 | * The microcode patch should have matching model with CPU. | ||
260 | */ | ||
261 | static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr, | ||
262 | unsigned int *mc_saved_count_p) | ||
263 | { | ||
264 | int i; | ||
265 | int found = 0; | ||
266 | unsigned int mc_saved_count = *mc_saved_count_p; | ||
267 | struct microcode_header_intel *mc_header; | ||
268 | |||
269 | mc_header = (struct microcode_header_intel *)ucode_ptr; | ||
270 | for (i = 0; i < mc_saved_count; i++) { | ||
271 | unsigned int sig, pf; | ||
272 | unsigned int new_rev; | ||
273 | struct microcode_header_intel *mc_saved_header = | ||
274 | (struct microcode_header_intel *)mc_saved[i]; | ||
275 | sig = mc_saved_header->sig; | ||
276 | pf = mc_saved_header->pf; | ||
277 | new_rev = mc_header->rev; | ||
278 | |||
279 | if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) { | ||
280 | found = 1; | ||
281 | if (update_match_revision(mc_header, new_rev)) { | ||
282 | /* | ||
283 | * Found an older ucode saved before. | ||
284 | * Replace the older one with this newer | ||
285 | * one. | ||
286 | */ | ||
287 | mc_saved[i] = | ||
288 | (struct microcode_intel *)ucode_ptr; | ||
289 | break; | ||
290 | } | ||
291 | } | ||
292 | } | ||
293 | if (i >= mc_saved_count && !found) | ||
294 | /* | ||
295 | * This ucode is first time discovered in ucode file. | ||
296 | * Save it to memory. | ||
297 | */ | ||
298 | mc_saved[mc_saved_count++] = | ||
299 | (struct microcode_intel *)ucode_ptr; | ||
300 | |||
301 | *mc_saved_count_p = mc_saved_count; | ||
302 | } | ||
303 | |||
304 | /* | ||
305 | * Get microcode matching with BSP's model. Only CPUs with the same model as | ||
306 | * BSP can stay in the platform. | ||
307 | */ | ||
308 | static enum ucode_state __init | ||
309 | get_matching_model_microcode(int cpu, unsigned long start, | ||
310 | void *data, size_t size, | ||
311 | struct mc_saved_data *mc_saved_data, | ||
312 | unsigned long *mc_saved_in_initrd, | ||
313 | struct ucode_cpu_info *uci) | ||
314 | { | ||
315 | u8 *ucode_ptr = data; | ||
316 | unsigned int leftover = size; | ||
317 | enum ucode_state state = UCODE_OK; | ||
318 | unsigned int mc_size; | ||
319 | struct microcode_header_intel *mc_header; | ||
320 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | ||
321 | unsigned int mc_saved_count = mc_saved_data->mc_saved_count; | ||
322 | int i; | ||
323 | |||
324 | while (leftover) { | ||
325 | mc_header = (struct microcode_header_intel *)ucode_ptr; | ||
326 | |||
327 | mc_size = get_totalsize(mc_header); | ||
328 | if (!mc_size || mc_size > leftover || | ||
329 | microcode_sanity_check(ucode_ptr, 0) < 0) | ||
330 | break; | ||
331 | |||
332 | leftover -= mc_size; | ||
333 | |||
334 | /* | ||
335 | * Since APs with same family and model as the BSP may boot in | ||
336 | * the platform, we need to find and save microcode patches | ||
337 | * with the same family and model as the BSP. | ||
338 | */ | ||
339 | if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != | ||
340 | UCODE_OK) { | ||
341 | ucode_ptr += mc_size; | ||
342 | continue; | ||
343 | } | ||
344 | |||
345 | _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count); | ||
346 | |||
347 | ucode_ptr += mc_size; | ||
348 | } | ||
349 | |||
350 | if (leftover) { | ||
351 | state = UCODE_ERROR; | ||
352 | goto out; | ||
353 | } | ||
354 | |||
355 | if (mc_saved_count == 0) { | ||
356 | state = UCODE_NFOUND; | ||
357 | goto out; | ||
358 | } | ||
359 | |||
360 | for (i = 0; i < mc_saved_count; i++) | ||
361 | mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; | ||
362 | |||
363 | mc_saved_data->mc_saved_count = mc_saved_count; | ||
364 | out: | ||
365 | return state; | ||
366 | } | ||
367 | |||
368 | #define native_rdmsr(msr, val1, val2) \ | ||
369 | do { \ | ||
370 | u64 __val = native_read_msr((msr)); \ | ||
371 | (void)((val1) = (u32)__val); \ | ||
372 | (void)((val2) = (u32)(__val >> 32)); \ | ||
373 | } while (0) | ||
374 | |||
375 | #define native_wrmsr(msr, low, high) \ | ||
376 | native_write_msr(msr, low, high); | ||
377 | |||
378 | static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci) | ||
379 | { | ||
380 | unsigned int val[2]; | ||
381 | u8 x86, x86_model; | ||
382 | struct cpu_signature csig; | ||
383 | unsigned int eax, ebx, ecx, edx; | ||
384 | |||
385 | csig.sig = 0; | ||
386 | csig.pf = 0; | ||
387 | csig.rev = 0; | ||
388 | |||
389 | memset(uci, 0, sizeof(*uci)); | ||
390 | |||
391 | eax = 0x00000001; | ||
392 | ecx = 0; | ||
393 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
394 | csig.sig = eax; | ||
395 | |||
396 | x86 = get_x86_family(csig.sig); | ||
397 | x86_model = get_x86_model(csig.sig); | ||
398 | |||
399 | if ((x86_model >= 5) || (x86 > 6)) { | ||
400 | /* get processor flags from MSR 0x17 */ | ||
401 | native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | ||
402 | csig.pf = 1 << ((val[1] >> 18) & 7); | ||
403 | } | ||
404 | native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
405 | |||
406 | /* As documented in the SDM: Do a CPUID 1 here */ | ||
407 | sync_core(); | ||
408 | |||
409 | /* get the current revision from MSR 0x8B */ | ||
410 | native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
411 | |||
412 | csig.rev = val[1]; | ||
413 | |||
414 | uci->cpu_sig = csig; | ||
415 | uci->valid = 1; | ||
416 | |||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | #ifdef DEBUG | ||
421 | static void __ref show_saved_mc(void) | ||
422 | { | ||
423 | int i, j; | ||
424 | unsigned int sig, pf, rev, total_size, data_size, date; | ||
425 | struct ucode_cpu_info uci; | ||
426 | |||
427 | if (mc_saved_data.mc_saved_count == 0) { | ||
428 | pr_debug("no micorcode data saved.\n"); | ||
429 | return; | ||
430 | } | ||
431 | pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); | ||
432 | |||
433 | collect_cpu_info_early(&uci); | ||
434 | |||
435 | sig = uci.cpu_sig.sig; | ||
436 | pf = uci.cpu_sig.pf; | ||
437 | rev = uci.cpu_sig.rev; | ||
438 | pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n", | ||
439 | smp_processor_id(), sig, pf, rev); | ||
440 | |||
441 | for (i = 0; i < mc_saved_data.mc_saved_count; i++) { | ||
442 | struct microcode_header_intel *mc_saved_header; | ||
443 | struct extended_sigtable *ext_header; | ||
444 | int ext_sigcount; | ||
445 | struct extended_signature *ext_sig; | ||
446 | |||
447 | mc_saved_header = (struct microcode_header_intel *) | ||
448 | mc_saved_data.mc_saved[i]; | ||
449 | sig = mc_saved_header->sig; | ||
450 | pf = mc_saved_header->pf; | ||
451 | rev = mc_saved_header->rev; | ||
452 | total_size = get_totalsize(mc_saved_header); | ||
453 | data_size = get_datasize(mc_saved_header); | ||
454 | date = mc_saved_header->date; | ||
455 | |||
456 | pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", | ||
457 | i, sig, pf, rev, total_size, | ||
458 | date & 0xffff, | ||
459 | date >> 24, | ||
460 | (date >> 16) & 0xff); | ||
461 | |||
462 | /* Look for ext. headers: */ | ||
463 | if (total_size <= data_size + MC_HEADER_SIZE) | ||
464 | continue; | ||
465 | |||
466 | ext_header = (struct extended_sigtable *) | ||
467 | mc_saved_header + data_size + MC_HEADER_SIZE; | ||
468 | ext_sigcount = ext_header->count; | ||
469 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
470 | |||
471 | for (j = 0; j < ext_sigcount; j++) { | ||
472 | sig = ext_sig->sig; | ||
473 | pf = ext_sig->pf; | ||
474 | |||
475 | pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", | ||
476 | j, sig, pf); | ||
477 | |||
478 | ext_sig++; | ||
479 | } | ||
480 | |||
481 | } | ||
482 | } | ||
483 | #else | ||
484 | static inline void show_saved_mc(void) | ||
485 | { | ||
486 | } | ||
487 | #endif | ||
488 | |||
489 | #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) | ||
490 | /* | ||
491 | * Save this mc into mc_saved_data. So it will be loaded early when a CPU is | ||
492 | * hot added or resumes. | ||
493 | * | ||
494 | * Please make sure this mc should be a valid microcode patch before calling | ||
495 | * this function. | ||
496 | */ | ||
497 | int save_mc_for_early(u8 *mc) | ||
498 | { | ||
499 | struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; | ||
500 | unsigned int mc_saved_count_init; | ||
501 | unsigned int mc_saved_count; | ||
502 | struct microcode_intel **mc_saved; | ||
503 | int ret = 0; | ||
504 | int i; | ||
505 | |||
506 | /* | ||
507 | * Hold hotplug lock so mc_saved_data is not accessed by a CPU in | ||
508 | * hotplug. | ||
509 | */ | ||
510 | cpu_hotplug_driver_lock(); | ||
511 | |||
512 | mc_saved_count_init = mc_saved_data.mc_saved_count; | ||
513 | mc_saved_count = mc_saved_data.mc_saved_count; | ||
514 | mc_saved = mc_saved_data.mc_saved; | ||
515 | |||
516 | if (mc_saved && mc_saved_count) | ||
517 | memcpy(mc_saved_tmp, mc_saved, | ||
518 | mc_saved_count * sizeof(struct mirocode_intel *)); | ||
519 | /* | ||
520 | * Save the microcode patch mc in mc_save_tmp structure if it's a newer | ||
521 | * version. | ||
522 | */ | ||
523 | |||
524 | _save_mc(mc_saved_tmp, mc, &mc_saved_count); | ||
525 | |||
526 | /* | ||
527 | * Save the mc_save_tmp in global mc_saved_data. | ||
528 | */ | ||
529 | ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); | ||
530 | if (ret) { | ||
531 | pr_err("Can not save microcode patch.\n"); | ||
532 | goto out; | ||
533 | } | ||
534 | |||
535 | show_saved_mc(); | ||
536 | |||
537 | /* | ||
538 | * Free old saved microcod data. | ||
539 | */ | ||
540 | if (mc_saved) { | ||
541 | for (i = 0; i < mc_saved_count_init; i++) | ||
542 | kfree(mc_saved[i]); | ||
543 | kfree(mc_saved); | ||
544 | } | ||
545 | |||
546 | out: | ||
547 | cpu_hotplug_driver_unlock(); | ||
548 | |||
549 | return ret; | ||
550 | } | ||
551 | EXPORT_SYMBOL_GPL(save_mc_for_early); | ||
552 | #endif | ||
553 | |||
554 | static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; | ||
555 | static __init enum ucode_state | ||
556 | scan_microcode(unsigned long start, unsigned long end, | ||
557 | struct mc_saved_data *mc_saved_data, | ||
558 | unsigned long *mc_saved_in_initrd, | ||
559 | struct ucode_cpu_info *uci) | ||
560 | { | ||
561 | unsigned int size = end - start + 1; | ||
562 | struct cpio_data cd; | ||
563 | long offset = 0; | ||
564 | #ifdef CONFIG_X86_32 | ||
565 | char *p = (char *)__pa_symbol(ucode_name); | ||
566 | #else | ||
567 | char *p = ucode_name; | ||
568 | #endif | ||
569 | |||
570 | cd.data = NULL; | ||
571 | cd.size = 0; | ||
572 | |||
573 | cd = find_cpio_data(p, (void *)start, size, &offset); | ||
574 | if (!cd.data) | ||
575 | return UCODE_ERROR; | ||
576 | |||
577 | |||
578 | return get_matching_model_microcode(0, start, cd.data, cd.size, | ||
579 | mc_saved_data, mc_saved_in_initrd, | ||
580 | uci); | ||
581 | } | ||
582 | |||
583 | /* | ||
584 | * Print ucode update info. | ||
585 | */ | ||
586 | static void __cpuinit | ||
587 | print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) | ||
588 | { | ||
589 | int cpu = smp_processor_id(); | ||
590 | |||
591 | pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", | ||
592 | cpu, | ||
593 | uci->cpu_sig.rev, | ||
594 | date & 0xffff, | ||
595 | date >> 24, | ||
596 | (date >> 16) & 0xff); | ||
597 | } | ||
598 | |||
599 | #ifdef CONFIG_X86_32 | ||
600 | |||
601 | static int delay_ucode_info; | ||
602 | static int current_mc_date; | ||
603 | |||
604 | /* | ||
605 | * Print early updated ucode info after printk works. This is delayed info dump. | ||
606 | */ | ||
607 | void __cpuinit show_ucode_info_early(void) | ||
608 | { | ||
609 | struct ucode_cpu_info uci; | ||
610 | |||
611 | if (delay_ucode_info) { | ||
612 | collect_cpu_info_early(&uci); | ||
613 | print_ucode_info(&uci, current_mc_date); | ||
614 | delay_ucode_info = 0; | ||
615 | } | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * At this point, we can not call printk() yet. Keep microcode patch number in | ||
620 | * mc_saved_data.mc_saved and delay printing microcode info in | ||
621 | * show_ucode_info_early() until printk() works. | ||
622 | */ | ||
623 | static void __cpuinit print_ucode(struct ucode_cpu_info *uci) | ||
624 | { | ||
625 | struct microcode_intel *mc_intel; | ||
626 | int *delay_ucode_info_p; | ||
627 | int *current_mc_date_p; | ||
628 | |||
629 | mc_intel = uci->mc; | ||
630 | if (mc_intel == NULL) | ||
631 | return; | ||
632 | |||
633 | delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info); | ||
634 | current_mc_date_p = (int *)__pa_symbol(¤t_mc_date); | ||
635 | |||
636 | *delay_ucode_info_p = 1; | ||
637 | *current_mc_date_p = mc_intel->hdr.date; | ||
638 | } | ||
639 | #else | ||
640 | |||
641 | /* | ||
642 | * Flush global tlb. We only do this in x86_64 where paging has been enabled | ||
643 | * already and PGE should be enabled as well. | ||
644 | */ | ||
645 | static inline void __cpuinit flush_tlb_early(void) | ||
646 | { | ||
647 | __native_flush_tlb_global_irq_disabled(); | ||
648 | } | ||
649 | |||
650 | static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci) | ||
651 | { | ||
652 | struct microcode_intel *mc_intel; | ||
653 | |||
654 | mc_intel = uci->mc; | ||
655 | if (mc_intel == NULL) | ||
656 | return; | ||
657 | |||
658 | print_ucode_info(uci, mc_intel->hdr.date); | ||
659 | } | ||
660 | #endif | ||
661 | |||
662 | static int apply_microcode_early(struct mc_saved_data *mc_saved_data, | ||
663 | struct ucode_cpu_info *uci) | ||
664 | { | ||
665 | struct microcode_intel *mc_intel; | ||
666 | unsigned int val[2]; | ||
667 | |||
668 | mc_intel = uci->mc; | ||
669 | if (mc_intel == NULL) | ||
670 | return 0; | ||
671 | |||
672 | /* write microcode via MSR 0x79 */ | ||
673 | native_wrmsr(MSR_IA32_UCODE_WRITE, | ||
674 | (unsigned long) mc_intel->bits, | ||
675 | (unsigned long) mc_intel->bits >> 16 >> 16); | ||
676 | native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
677 | |||
678 | /* As documented in the SDM: Do a CPUID 1 here */ | ||
679 | sync_core(); | ||
680 | |||
681 | /* get the current revision from MSR 0x8B */ | ||
682 | native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
683 | if (val[1] != mc_intel->hdr.rev) | ||
684 | return -1; | ||
685 | |||
686 | #ifdef CONFIG_X86_64 | ||
687 | /* Flush global tlb. This is precaution. */ | ||
688 | flush_tlb_early(); | ||
689 | #endif | ||
690 | uci->cpu_sig.rev = val[1]; | ||
691 | |||
692 | print_ucode(uci); | ||
693 | |||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * This function converts microcode patch offsets previously stored in | ||
699 | * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. | ||
700 | */ | ||
701 | int __init save_microcode_in_initrd(void) | ||
702 | { | ||
703 | unsigned int count = mc_saved_data.mc_saved_count; | ||
704 | struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; | ||
705 | int ret = 0; | ||
706 | |||
707 | if (count == 0) | ||
708 | return ret; | ||
709 | |||
710 | microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); | ||
711 | ret = save_microcode(&mc_saved_data, mc_saved, count); | ||
712 | if (ret) | ||
713 | pr_err("Can not save microcod patches from initrd"); | ||
714 | |||
715 | show_saved_mc(); | ||
716 | |||
717 | return ret; | ||
718 | } | ||
719 | |||
720 | static void __init | ||
721 | _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, | ||
722 | unsigned long *mc_saved_in_initrd, | ||
723 | unsigned long initrd_start_early, | ||
724 | unsigned long initrd_end_early, | ||
725 | struct ucode_cpu_info *uci) | ||
726 | { | ||
727 | collect_cpu_info_early(uci); | ||
728 | scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, | ||
729 | mc_saved_in_initrd, uci); | ||
730 | load_microcode(mc_saved_data, mc_saved_in_initrd, | ||
731 | initrd_start_early, uci); | ||
732 | apply_microcode_early(mc_saved_data, uci); | ||
733 | } | ||
734 | |||
735 | void __init | ||
736 | load_ucode_intel_bsp(void) | ||
737 | { | ||
738 | u64 ramdisk_image, ramdisk_size; | ||
739 | unsigned long initrd_start_early, initrd_end_early; | ||
740 | struct ucode_cpu_info uci; | ||
741 | #ifdef CONFIG_X86_32 | ||
742 | struct boot_params *boot_params_p; | ||
743 | |||
744 | boot_params_p = (struct boot_params *)__pa_symbol(&boot_params); | ||
745 | ramdisk_image = boot_params_p->hdr.ramdisk_image; | ||
746 | ramdisk_size = boot_params_p->hdr.ramdisk_size; | ||
747 | initrd_start_early = ramdisk_image; | ||
748 | initrd_end_early = initrd_start_early + ramdisk_size; | ||
749 | |||
750 | _load_ucode_intel_bsp( | ||
751 | (struct mc_saved_data *)__pa_symbol(&mc_saved_data), | ||
752 | (unsigned long *)__pa_symbol(&mc_saved_in_initrd), | ||
753 | initrd_start_early, initrd_end_early, &uci); | ||
754 | #else | ||
755 | ramdisk_image = boot_params.hdr.ramdisk_image; | ||
756 | ramdisk_size = boot_params.hdr.ramdisk_size; | ||
757 | initrd_start_early = ramdisk_image + PAGE_OFFSET; | ||
758 | initrd_end_early = initrd_start_early + ramdisk_size; | ||
759 | |||
760 | _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, | ||
761 | initrd_start_early, initrd_end_early, &uci); | ||
762 | #endif | ||
763 | } | ||
764 | |||
765 | void __cpuinit load_ucode_intel_ap(void) | ||
766 | { | ||
767 | struct mc_saved_data *mc_saved_data_p; | ||
768 | struct ucode_cpu_info uci; | ||
769 | unsigned long *mc_saved_in_initrd_p; | ||
770 | unsigned long initrd_start_addr; | ||
771 | #ifdef CONFIG_X86_32 | ||
772 | unsigned long *initrd_start_p; | ||
773 | |||
774 | mc_saved_in_initrd_p = | ||
775 | (unsigned long *)__pa_symbol(mc_saved_in_initrd); | ||
776 | mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data); | ||
777 | initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start); | ||
778 | initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p); | ||
779 | #else | ||
780 | mc_saved_data_p = &mc_saved_data; | ||
781 | mc_saved_in_initrd_p = mc_saved_in_initrd; | ||
782 | initrd_start_addr = initrd_start; | ||
783 | #endif | ||
784 | |||
785 | /* | ||
786 | * If there is no valid ucode previously saved in memory, no need to | ||
787 | * update ucode on this AP. | ||
788 | */ | ||
789 | if (mc_saved_data_p->mc_saved_count == 0) | ||
790 | return; | ||
791 | |||
792 | collect_cpu_info_early(&uci); | ||
793 | load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, | ||
794 | initrd_start_addr, &uci); | ||
795 | apply_microcode_early(mc_saved_data_p, &uci); | ||
796 | } | ||
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c new file mode 100644 index 000000000000..ce69320d0179 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_lib.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * Intel CPU Microcode Update Driver for Linux | ||
3 | * | ||
4 | * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> | ||
5 | * H Peter Anvin" <hpa@zytor.com> | ||
6 | * | ||
7 | * This driver allows to upgrade microcode on Intel processors | ||
8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
9 | * Pentium III, Xeon, Pentium 4, etc. | ||
10 | * | ||
11 | * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture | ||
12 | * Software Developer's Manual | ||
13 | * Order Number 253668 or free download from: | ||
14 | * | ||
15 | * http://developer.intel.com/Assets/PDF/manual/253668.pdf | ||
16 | * | ||
17 | * For more information, go to http://www.urbanmyth.org/microcode | ||
18 | * | ||
19 | * This program is free software; you can redistribute it and/or | ||
20 | * modify it under the terms of the GNU General Public License | ||
21 | * as published by the Free Software Foundation; either version | ||
22 | * 2 of the License, or (at your option) any later version. | ||
23 | * | ||
24 | */ | ||
25 | #include <linux/firmware.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/module.h> | ||
29 | |||
30 | #include <asm/microcode_intel.h> | ||
31 | #include <asm/processor.h> | ||
32 | #include <asm/msr.h> | ||
33 | |||
34 | static inline int | ||
35 | update_match_cpu(unsigned int csig, unsigned int cpf, | ||
36 | unsigned int sig, unsigned int pf) | ||
37 | { | ||
38 | return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; | ||
39 | } | ||
40 | |||
41 | int | ||
42 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | ||
43 | { | ||
44 | return (mc_header->rev <= rev) ? 0 : 1; | ||
45 | } | ||
46 | |||
47 | int microcode_sanity_check(void *mc, int print_err) | ||
48 | { | ||
49 | unsigned long total_size, data_size, ext_table_size; | ||
50 | struct microcode_header_intel *mc_header = mc; | ||
51 | struct extended_sigtable *ext_header = NULL; | ||
52 | int sum, orig_sum, ext_sigcount = 0, i; | ||
53 | struct extended_signature *ext_sig; | ||
54 | |||
55 | total_size = get_totalsize(mc_header); | ||
56 | data_size = get_datasize(mc_header); | ||
57 | |||
58 | if (data_size + MC_HEADER_SIZE > total_size) { | ||
59 | if (print_err) | ||
60 | pr_err("error! Bad data size in microcode data file\n"); | ||
61 | return -EINVAL; | ||
62 | } | ||
63 | |||
64 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | ||
65 | if (print_err) | ||
66 | pr_err("error! Unknown microcode update format\n"); | ||
67 | return -EINVAL; | ||
68 | } | ||
69 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | ||
70 | if (ext_table_size) { | ||
71 | if ((ext_table_size < EXT_HEADER_SIZE) | ||
72 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | ||
73 | if (print_err) | ||
74 | pr_err("error! Small exttable size in microcode data file\n"); | ||
75 | return -EINVAL; | ||
76 | } | ||
77 | ext_header = mc + MC_HEADER_SIZE + data_size; | ||
78 | if (ext_table_size != exttable_size(ext_header)) { | ||
79 | if (print_err) | ||
80 | pr_err("error! Bad exttable size in microcode data file\n"); | ||
81 | return -EFAULT; | ||
82 | } | ||
83 | ext_sigcount = ext_header->count; | ||
84 | } | ||
85 | |||
86 | /* check extended table checksum */ | ||
87 | if (ext_table_size) { | ||
88 | int ext_table_sum = 0; | ||
89 | int *ext_tablep = (int *)ext_header; | ||
90 | |||
91 | i = ext_table_size / DWSIZE; | ||
92 | while (i--) | ||
93 | ext_table_sum += ext_tablep[i]; | ||
94 | if (ext_table_sum) { | ||
95 | if (print_err) | ||
96 | pr_warn("aborting, bad extended signature table checksum\n"); | ||
97 | return -EINVAL; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | /* calculate the checksum */ | ||
102 | orig_sum = 0; | ||
103 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | ||
104 | while (i--) | ||
105 | orig_sum += ((int *)mc)[i]; | ||
106 | if (orig_sum) { | ||
107 | if (print_err) | ||
108 | pr_err("aborting, bad checksum\n"); | ||
109 | return -EINVAL; | ||
110 | } | ||
111 | if (!ext_table_size) | ||
112 | return 0; | ||
113 | /* check extended signature checksum */ | ||
114 | for (i = 0; i < ext_sigcount; i++) { | ||
115 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + | ||
116 | EXT_SIGNATURE_SIZE * i; | ||
117 | sum = orig_sum | ||
118 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | ||
119 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | ||
120 | if (sum) { | ||
121 | if (print_err) | ||
122 | pr_err("aborting, bad checksum\n"); | ||
123 | return -EINVAL; | ||
124 | } | ||
125 | } | ||
126 | return 0; | ||
127 | } | ||
128 | EXPORT_SYMBOL_GPL(microcode_sanity_check); | ||
129 | |||
130 | /* | ||
131 | * return 0 - no update found | ||
132 | * return 1 - found update | ||
133 | */ | ||
134 | int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) | ||
135 | { | ||
136 | struct microcode_header_intel *mc_header = mc; | ||
137 | struct extended_sigtable *ext_header; | ||
138 | unsigned long total_size = get_totalsize(mc_header); | ||
139 | int ext_sigcount, i; | ||
140 | struct extended_signature *ext_sig; | ||
141 | |||
142 | if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf)) | ||
143 | return 1; | ||
144 | |||
145 | /* Look for ext. headers: */ | ||
146 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) | ||
147 | return 0; | ||
148 | |||
149 | ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; | ||
150 | ext_sigcount = ext_header->count; | ||
151 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
152 | |||
153 | for (i = 0; i < ext_sigcount; i++) { | ||
154 | if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf)) | ||
155 | return 1; | ||
156 | ext_sig++; | ||
157 | } | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * return 0 - no update found | ||
163 | * return 1 - found update | ||
164 | */ | ||
165 | int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev) | ||
166 | { | ||
167 | struct microcode_header_intel *mc_header = mc; | ||
168 | |||
169 | if (!update_match_revision(mc_header, rev)) | ||
170 | return 0; | ||
171 | |||
172 | return get_matching_sig(csig, cpf, mc, rev); | ||
173 | } | ||
174 | EXPORT_SYMBOL_GPL(get_matching_microcode); | ||
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index a7c5661f8496..ce130493b802 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -71,7 +71,7 @@ static ssize_t msr_read(struct file *file, char __user *buf, | |||
71 | u32 __user *tmp = (u32 __user *) buf; | 71 | u32 __user *tmp = (u32 __user *) buf; |
72 | u32 data[2]; | 72 | u32 data[2]; |
73 | u32 reg = *ppos; | 73 | u32 reg = *ppos; |
74 | int cpu = iminor(file->f_path.dentry->d_inode); | 74 | int cpu = iminor(file_inode(file)); |
75 | int err = 0; | 75 | int err = 0; |
76 | ssize_t bytes = 0; | 76 | ssize_t bytes = 0; |
77 | 77 | ||
@@ -99,7 +99,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
99 | const u32 __user *tmp = (const u32 __user *)buf; | 99 | const u32 __user *tmp = (const u32 __user *)buf; |
100 | u32 data[2]; | 100 | u32 data[2]; |
101 | u32 reg = *ppos; | 101 | u32 reg = *ppos; |
102 | int cpu = iminor(file->f_path.dentry->d_inode); | 102 | int cpu = iminor(file_inode(file)); |
103 | int err = 0; | 103 | int err = 0; |
104 | ssize_t bytes = 0; | 104 | ssize_t bytes = 0; |
105 | 105 | ||
@@ -125,7 +125,7 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) | |||
125 | { | 125 | { |
126 | u32 __user *uregs = (u32 __user *)arg; | 126 | u32 __user *uregs = (u32 __user *)arg; |
127 | u32 regs[8]; | 127 | u32 regs[8]; |
128 | int cpu = iminor(file->f_path.dentry->d_inode); | 128 | int cpu = iminor(file_inode(file)); |
129 | int err; | 129 | int err; |
130 | 130 | ||
131 | switch (ioc) { | 131 | switch (ioc) { |
@@ -171,10 +171,12 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) | |||
171 | 171 | ||
172 | static int msr_open(struct inode *inode, struct file *file) | 172 | static int msr_open(struct inode *inode, struct file *file) |
173 | { | 173 | { |
174 | unsigned int cpu; | 174 | unsigned int cpu = iminor(file_inode(file)); |
175 | struct cpuinfo_x86 *c; | 175 | struct cpuinfo_x86 *c; |
176 | 176 | ||
177 | cpu = iminor(file->f_path.dentry->d_inode); | 177 | if (!capable(CAP_SYS_RAWIO)) |
178 | return -EPERM; | ||
179 | |||
178 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) | 180 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) |
179 | return -ENXIO; /* No such CPU */ | 181 | return -ENXIO; /* No such CPU */ |
180 | 182 | ||
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f84f5c57de35..60308053fdb2 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -509,3 +509,4 @@ void local_touch_nmi(void) | |||
509 | { | 509 | { |
510 | __this_cpu_write(last_nmi_rip, 0); | 510 | __this_cpu_write(last_nmi_rip, 0); |
511 | } | 511 | } |
512 | EXPORT_SYMBOL_GPL(local_touch_nmi); | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index de2b7ad70273..872079a67e4d 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -56,7 +56,7 @@ struct device x86_dma_fallback_dev = { | |||
56 | EXPORT_SYMBOL(x86_dma_fallback_dev); | 56 | EXPORT_SYMBOL(x86_dma_fallback_dev); |
57 | 57 | ||
58 | /* Number of entries preallocated for DMA-API debugging */ | 58 | /* Number of entries preallocated for DMA-API debugging */ |
59 | #define PREALLOC_DMA_DEBUG_ENTRIES 32768 | 59 | #define PREALLOC_DMA_DEBUG_ENTRIES 65536 |
60 | 60 | ||
61 | int dma_set_mask(struct device *dev, u64 mask) | 61 | int dma_set_mask(struct device *dev, u64 mask) |
62 | { | 62 | { |
@@ -265,7 +265,7 @@ rootfs_initcall(pci_iommu_init); | |||
265 | #ifdef CONFIG_PCI | 265 | #ifdef CONFIG_PCI |
266 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ | 266 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ |
267 | 267 | ||
268 | static __devinit void via_no_dac(struct pci_dev *dev) | 268 | static void via_no_dac(struct pci_dev *dev) |
269 | { | 269 | { |
270 | if (forbid_dac == 0) { | 270 | if (forbid_dac == 0) { |
271 | dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); | 271 | dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2ed787f15bf0..14ae10031ff0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
268 | unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; | 268 | unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; |
269 | EXPORT_SYMBOL(boot_option_idle_override); | 269 | EXPORT_SYMBOL(boot_option_idle_override); |
270 | 270 | ||
271 | /* | 271 | static void (*x86_idle)(void); |
272 | * Powermanagement idle function, if any.. | ||
273 | */ | ||
274 | void (*pm_idle)(void); | ||
275 | #ifdef CONFIG_APM_MODULE | ||
276 | EXPORT_SYMBOL(pm_idle); | ||
277 | #endif | ||
278 | 272 | ||
279 | #ifndef CONFIG_SMP | 273 | #ifndef CONFIG_SMP |
280 | static inline void play_dead(void) | 274 | static inline void play_dead(void) |
@@ -351,7 +345,7 @@ void cpu_idle(void) | |||
351 | rcu_idle_enter(); | 345 | rcu_idle_enter(); |
352 | 346 | ||
353 | if (cpuidle_idle_call()) | 347 | if (cpuidle_idle_call()) |
354 | pm_idle(); | 348 | x86_idle(); |
355 | 349 | ||
356 | rcu_idle_exit(); | 350 | rcu_idle_exit(); |
357 | start_critical_timings(); | 351 | start_critical_timings(); |
@@ -375,7 +369,6 @@ void cpu_idle(void) | |||
375 | */ | 369 | */ |
376 | void default_idle(void) | 370 | void default_idle(void) |
377 | { | 371 | { |
378 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); | ||
379 | trace_cpu_idle_rcuidle(1, smp_processor_id()); | 372 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
380 | current_thread_info()->status &= ~TS_POLLING; | 373 | current_thread_info()->status &= ~TS_POLLING; |
381 | /* | 374 | /* |
@@ -389,21 +382,22 @@ void default_idle(void) | |||
389 | else | 382 | else |
390 | local_irq_enable(); | 383 | local_irq_enable(); |
391 | current_thread_info()->status |= TS_POLLING; | 384 | current_thread_info()->status |= TS_POLLING; |
392 | trace_power_end_rcuidle(smp_processor_id()); | ||
393 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); | 385 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
394 | } | 386 | } |
395 | #ifdef CONFIG_APM_MODULE | 387 | #ifdef CONFIG_APM_MODULE |
396 | EXPORT_SYMBOL(default_idle); | 388 | EXPORT_SYMBOL(default_idle); |
397 | #endif | 389 | #endif |
398 | 390 | ||
399 | bool set_pm_idle_to_default(void) | 391 | #ifdef CONFIG_XEN |
392 | bool xen_set_default_idle(void) | ||
400 | { | 393 | { |
401 | bool ret = !!pm_idle; | 394 | bool ret = !!x86_idle; |
402 | 395 | ||
403 | pm_idle = default_idle; | 396 | x86_idle = default_idle; |
404 | 397 | ||
405 | return ret; | 398 | return ret; |
406 | } | 399 | } |
400 | #endif | ||
407 | void stop_this_cpu(void *dummy) | 401 | void stop_this_cpu(void *dummy) |
408 | { | 402 | { |
409 | local_irq_disable(); | 403 | local_irq_disable(); |
@@ -413,31 +407,8 @@ void stop_this_cpu(void *dummy) | |||
413 | set_cpu_online(smp_processor_id(), false); | 407 | set_cpu_online(smp_processor_id(), false); |
414 | disable_local_APIC(); | 408 | disable_local_APIC(); |
415 | 409 | ||
416 | for (;;) { | 410 | for (;;) |
417 | if (hlt_works(smp_processor_id())) | 411 | halt(); |
418 | halt(); | ||
419 | } | ||
420 | } | ||
421 | |||
422 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ | ||
423 | static void mwait_idle(void) | ||
424 | { | ||
425 | if (!need_resched()) { | ||
426 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); | ||
427 | trace_cpu_idle_rcuidle(1, smp_processor_id()); | ||
428 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | ||
429 | clflush((void *)¤t_thread_info()->flags); | ||
430 | |||
431 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
432 | smp_mb(); | ||
433 | if (!need_resched()) | ||
434 | __sti_mwait(0, 0); | ||
435 | else | ||
436 | local_irq_enable(); | ||
437 | trace_power_end_rcuidle(smp_processor_id()); | ||
438 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); | ||
439 | } else | ||
440 | local_irq_enable(); | ||
441 | } | 412 | } |
442 | 413 | ||
443 | /* | 414 | /* |
@@ -447,62 +418,13 @@ static void mwait_idle(void) | |||
447 | */ | 418 | */ |
448 | static void poll_idle(void) | 419 | static void poll_idle(void) |
449 | { | 420 | { |
450 | trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); | ||
451 | trace_cpu_idle_rcuidle(0, smp_processor_id()); | 421 | trace_cpu_idle_rcuidle(0, smp_processor_id()); |
452 | local_irq_enable(); | 422 | local_irq_enable(); |
453 | while (!need_resched()) | 423 | while (!need_resched()) |
454 | cpu_relax(); | 424 | cpu_relax(); |
455 | trace_power_end_rcuidle(smp_processor_id()); | ||
456 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); | 425 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
457 | } | 426 | } |
458 | 427 | ||
459 | /* | ||
460 | * mwait selection logic: | ||
461 | * | ||
462 | * It depends on the CPU. For AMD CPUs that support MWAIT this is | ||
463 | * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings | ||
464 | * then depend on a clock divisor and current Pstate of the core. If | ||
465 | * all cores of a processor are in halt state (C1) the processor can | ||
466 | * enter the C1E (C1 enhanced) state. If mwait is used this will never | ||
467 | * happen. | ||
468 | * | ||
469 | * idle=mwait overrides this decision and forces the usage of mwait. | ||
470 | */ | ||
471 | |||
472 | #define MWAIT_INFO 0x05 | ||
473 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | ||
474 | #define MWAIT_EDX_C1 0xf0 | ||
475 | |||
476 | int mwait_usable(const struct cpuinfo_x86 *c) | ||
477 | { | ||
478 | u32 eax, ebx, ecx, edx; | ||
479 | |||
480 | /* Use mwait if idle=mwait boot option is given */ | ||
481 | if (boot_option_idle_override == IDLE_FORCE_MWAIT) | ||
482 | return 1; | ||
483 | |||
484 | /* | ||
485 | * Any idle= boot option other than idle=mwait means that we must not | ||
486 | * use mwait. Eg: idle=halt or idle=poll or idle=nomwait | ||
487 | */ | ||
488 | if (boot_option_idle_override != IDLE_NO_OVERRIDE) | ||
489 | return 0; | ||
490 | |||
491 | if (c->cpuid_level < MWAIT_INFO) | ||
492 | return 0; | ||
493 | |||
494 | cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); | ||
495 | /* Check, whether EDX has extended info about MWAIT */ | ||
496 | if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) | ||
497 | return 1; | ||
498 | |||
499 | /* | ||
500 | * edx enumeratios MONITOR/MWAIT extensions. Check, whether | ||
501 | * C1 supports MWAIT | ||
502 | */ | ||
503 | return (edx & MWAIT_EDX_C1); | ||
504 | } | ||
505 | |||
506 | bool amd_e400_c1e_detected; | 428 | bool amd_e400_c1e_detected; |
507 | EXPORT_SYMBOL(amd_e400_c1e_detected); | 429 | EXPORT_SYMBOL(amd_e400_c1e_detected); |
508 | 430 | ||
@@ -567,31 +489,24 @@ static void amd_e400_idle(void) | |||
567 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | 489 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) |
568 | { | 490 | { |
569 | #ifdef CONFIG_SMP | 491 | #ifdef CONFIG_SMP |
570 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 492 | if (x86_idle == poll_idle && smp_num_siblings > 1) |
571 | pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); | 493 | pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); |
572 | } | ||
573 | #endif | 494 | #endif |
574 | if (pm_idle) | 495 | if (x86_idle) |
575 | return; | 496 | return; |
576 | 497 | ||
577 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { | 498 | if (cpu_has_amd_erratum(amd_erratum_400)) { |
578 | /* | ||
579 | * One CPU supports mwait => All CPUs supports mwait | ||
580 | */ | ||
581 | pr_info("using mwait in idle threads\n"); | ||
582 | pm_idle = mwait_idle; | ||
583 | } else if (cpu_has_amd_erratum(amd_erratum_400)) { | ||
584 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ | 499 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ |
585 | pr_info("using AMD E400 aware idle routine\n"); | 500 | pr_info("using AMD E400 aware idle routine\n"); |
586 | pm_idle = amd_e400_idle; | 501 | x86_idle = amd_e400_idle; |
587 | } else | 502 | } else |
588 | pm_idle = default_idle; | 503 | x86_idle = default_idle; |
589 | } | 504 | } |
590 | 505 | ||
591 | void __init init_amd_e400_c1e_mask(void) | 506 | void __init init_amd_e400_c1e_mask(void) |
592 | { | 507 | { |
593 | /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ | 508 | /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ |
594 | if (pm_idle == amd_e400_idle) | 509 | if (x86_idle == amd_e400_idle) |
595 | zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); | 510 | zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); |
596 | } | 511 | } |
597 | 512 | ||
@@ -602,11 +517,8 @@ static int __init idle_setup(char *str) | |||
602 | 517 | ||
603 | if (!strcmp(str, "poll")) { | 518 | if (!strcmp(str, "poll")) { |
604 | pr_info("using polling idle threads\n"); | 519 | pr_info("using polling idle threads\n"); |
605 | pm_idle = poll_idle; | 520 | x86_idle = poll_idle; |
606 | boot_option_idle_override = IDLE_POLL; | 521 | boot_option_idle_override = IDLE_POLL; |
607 | } else if (!strcmp(str, "mwait")) { | ||
608 | boot_option_idle_override = IDLE_FORCE_MWAIT; | ||
609 | WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); | ||
610 | } else if (!strcmp(str, "halt")) { | 522 | } else if (!strcmp(str, "halt")) { |
611 | /* | 523 | /* |
612 | * When the boot option of idle=halt is added, halt is | 524 | * When the boot option of idle=halt is added, halt is |
@@ -615,7 +527,7 @@ static int __init idle_setup(char *str) | |||
615 | * To continue to load the CPU idle driver, don't touch | 527 | * To continue to load the CPU idle driver, don't touch |
616 | * the boot_option_idle_override. | 528 | * the boot_option_idle_override. |
617 | */ | 529 | */ |
618 | pm_idle = default_idle; | 530 | x86_idle = default_idle; |
619 | boot_option_idle_override = IDLE_HALT; | 531 | boot_option_idle_override = IDLE_HALT; |
620 | } else if (!strcmp(str, "nomwait")) { | 532 | } else if (!strcmp(str, "nomwait")) { |
621 | /* | 533 | /* |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6e68a6194965..0f49677da51e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -117,7 +117,7 @@ void release_thread(struct task_struct *dead_task) | |||
117 | { | 117 | { |
118 | if (dead_task->mm) { | 118 | if (dead_task->mm) { |
119 | if (dead_task->mm->context.size) { | 119 | if (dead_task->mm->context.size) { |
120 | pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", | 120 | pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", |
121 | dead_task->comm, | 121 | dead_task->comm, |
122 | dead_task->mm->context.ldt, | 122 | dead_task->mm->context.ldt, |
123 | dead_task->mm->context.size); | 123 | dead_task->mm->context.size); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b629bbe0d9bd..29a8120e6fe8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
23 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
24 | #include <linux/rcupdate.h> | 24 | #include <linux/rcupdate.h> |
25 | #include <linux/module.h> | 25 | #include <linux/export.h> |
26 | #include <linux/context_tracking.h> | 26 | #include <linux/context_tracking.h> |
27 | 27 | ||
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 85c39590c1a4..2cb9470ea85b 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -185,7 +185,7 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | |||
185 | 185 | ||
186 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | 186 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { |
187 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | 187 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, |
188 | __pa_symbol(i) + (idx*PAGE_SIZE), | 188 | __pa(i) + (idx*PAGE_SIZE), |
189 | PAGE_KERNEL_VVAR); | 189 | PAGE_KERNEL_VVAR); |
190 | } | 190 | } |
191 | 191 | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 1b27de563561..26ee48a33dc4 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -8,7 +8,7 @@ | |||
8 | 8 | ||
9 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) | 9 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) |
10 | 10 | ||
11 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | 11 | static void quirk_intel_irqbalance(struct pci_dev *dev) |
12 | { | 12 | { |
13 | u8 config; | 13 | u8 config; |
14 | u16 word; | 14 | u16 word; |
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | |||
512 | 512 | ||
513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
514 | /* Set correct numa_node information for AMD NB functions */ | 514 | /* Set correct numa_node information for AMD NB functions */ |
515 | static void __devinit quirk_amd_nb_node(struct pci_dev *dev) | 515 | static void quirk_amd_nb_node(struct pci_dev *dev) |
516 | { | 516 | { |
517 | struct pci_dev *nb_ht; | 517 | struct pci_dev *nb_ht; |
518 | unsigned int devfn; | 518 | unsigned int devfn; |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4e8ba39eaf0f..76fa1e9a2b39 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void) | |||
584 | break; | 584 | break; |
585 | 585 | ||
586 | case BOOT_EFI: | 586 | case BOOT_EFI: |
587 | if (efi_enabled) | 587 | if (efi_enabled(EFI_RUNTIME_SERVICES)) |
588 | efi.reset_system(reboot_mode ? | 588 | efi.reset_system(reboot_mode ? |
589 | EFI_RESET_WARM : | 589 | EFI_RESET_WARM : |
590 | EFI_RESET_COLD, | 590 | EFI_RESET_COLD, |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 801602b5d745..2e8f3d3b5641 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void) | |||
149 | if (century) { | 149 | if (century) { |
150 | century = bcd2bin(century); | 150 | century = bcd2bin(century); |
151 | year += century * 100; | 151 | year += century * 100; |
152 | printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); | ||
153 | } else | 152 | } else |
154 | year += CMOS_YEARS_OFFS; | 153 | year += CMOS_YEARS_OFFS; |
155 | 154 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c228322ca180..84d32855f65c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -108,17 +108,16 @@ | |||
108 | #include <asm/topology.h> | 108 | #include <asm/topology.h> |
109 | #include <asm/apicdef.h> | 109 | #include <asm/apicdef.h> |
110 | #include <asm/amd_nb.h> | 110 | #include <asm/amd_nb.h> |
111 | #ifdef CONFIG_X86_64 | ||
112 | #include <asm/numa_64.h> | ||
113 | #endif | ||
114 | #include <asm/mce.h> | 111 | #include <asm/mce.h> |
115 | #include <asm/alternative.h> | 112 | #include <asm/alternative.h> |
116 | #include <asm/prom.h> | 113 | #include <asm/prom.h> |
117 | 114 | ||
118 | /* | 115 | /* |
119 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 116 | * max_low_pfn_mapped: highest direct mapped pfn under 4GB |
120 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | 117 | * max_pfn_mapped: highest direct mapped pfn over 4GB |
121 | * apertures, ACPI and other tables without having to play with fixmaps. | 118 | * |
119 | * The direct mapping only covers E820_RAM regions, so the ranges and gaps are | ||
120 | * represented by pfn_mapped | ||
122 | */ | 121 | */ |
123 | unsigned long max_low_pfn_mapped; | 122 | unsigned long max_low_pfn_mapped; |
124 | unsigned long max_pfn_mapped; | 123 | unsigned long max_pfn_mapped; |
@@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align) | |||
276 | return ret; | 275 | return ret; |
277 | } | 276 | } |
278 | 277 | ||
279 | #ifdef CONFIG_X86_64 | 278 | #ifdef CONFIG_X86_32 |
280 | static void __init init_gbpages(void) | ||
281 | { | ||
282 | if (direct_gbpages && cpu_has_gbpages) | ||
283 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
284 | else | ||
285 | direct_gbpages = 0; | ||
286 | } | ||
287 | #else | ||
288 | static inline void init_gbpages(void) | ||
289 | { | ||
290 | } | ||
291 | static void __init cleanup_highmap(void) | 279 | static void __init cleanup_highmap(void) |
292 | { | 280 | { |
293 | } | 281 | } |
@@ -296,8 +284,8 @@ static void __init cleanup_highmap(void) | |||
296 | static void __init reserve_brk(void) | 284 | static void __init reserve_brk(void) |
297 | { | 285 | { |
298 | if (_brk_end > _brk_start) | 286 | if (_brk_end > _brk_start) |
299 | memblock_reserve(__pa(_brk_start), | 287 | memblock_reserve(__pa_symbol(_brk_start), |
300 | __pa(_brk_end) - __pa(_brk_start)); | 288 | _brk_end - _brk_start); |
301 | 289 | ||
302 | /* Mark brk area as locked down and no longer taking any | 290 | /* Mark brk area as locked down and no longer taking any |
303 | new allocations */ | 291 | new allocations */ |
@@ -306,27 +294,43 @@ static void __init reserve_brk(void) | |||
306 | 294 | ||
307 | #ifdef CONFIG_BLK_DEV_INITRD | 295 | #ifdef CONFIG_BLK_DEV_INITRD |
308 | 296 | ||
297 | static u64 __init get_ramdisk_image(void) | ||
298 | { | ||
299 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
300 | |||
301 | ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; | ||
302 | |||
303 | return ramdisk_image; | ||
304 | } | ||
305 | static u64 __init get_ramdisk_size(void) | ||
306 | { | ||
307 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
308 | |||
309 | ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; | ||
310 | |||
311 | return ramdisk_size; | ||
312 | } | ||
313 | |||
309 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | 314 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) |
310 | static void __init relocate_initrd(void) | 315 | static void __init relocate_initrd(void) |
311 | { | 316 | { |
312 | /* Assume only end is not page aligned */ | 317 | /* Assume only end is not page aligned */ |
313 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 318 | u64 ramdisk_image = get_ramdisk_image(); |
314 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 319 | u64 ramdisk_size = get_ramdisk_size(); |
315 | u64 area_size = PAGE_ALIGN(ramdisk_size); | 320 | u64 area_size = PAGE_ALIGN(ramdisk_size); |
316 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | ||
317 | u64 ramdisk_here; | 321 | u64 ramdisk_here; |
318 | unsigned long slop, clen, mapaddr; | 322 | unsigned long slop, clen, mapaddr; |
319 | char *p, *q; | 323 | char *p, *q; |
320 | 324 | ||
321 | /* We need to move the initrd down into lowmem */ | 325 | /* We need to move the initrd down into directly mapped mem */ |
322 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, | 326 | ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
323 | PAGE_SIZE); | 327 | area_size, PAGE_SIZE); |
324 | 328 | ||
325 | if (!ramdisk_here) | 329 | if (!ramdisk_here) |
326 | panic("Cannot find place for new RAMDISK of size %lld\n", | 330 | panic("Cannot find place for new RAMDISK of size %lld\n", |
327 | ramdisk_size); | 331 | ramdisk_size); |
328 | 332 | ||
329 | /* Note: this includes all the lowmem currently occupied by | 333 | /* Note: this includes all the mem currently occupied by |
330 | the initrd, we rely on that fact to keep the data intact. */ | 334 | the initrd, we rely on that fact to keep the data intact. */ |
331 | memblock_reserve(ramdisk_here, area_size); | 335 | memblock_reserve(ramdisk_here, area_size); |
332 | initrd_start = ramdisk_here + PAGE_OFFSET; | 336 | initrd_start = ramdisk_here + PAGE_OFFSET; |
@@ -336,17 +340,7 @@ static void __init relocate_initrd(void) | |||
336 | 340 | ||
337 | q = (char *)initrd_start; | 341 | q = (char *)initrd_start; |
338 | 342 | ||
339 | /* Copy any lowmem portion of the initrd */ | 343 | /* Copy the initrd */ |
340 | if (ramdisk_image < end_of_lowmem) { | ||
341 | clen = end_of_lowmem - ramdisk_image; | ||
342 | p = (char *)__va(ramdisk_image); | ||
343 | memcpy(q, p, clen); | ||
344 | q += clen; | ||
345 | ramdisk_image += clen; | ||
346 | ramdisk_size -= clen; | ||
347 | } | ||
348 | |||
349 | /* Copy the highmem portion of the initrd */ | ||
350 | while (ramdisk_size) { | 344 | while (ramdisk_size) { |
351 | slop = ramdisk_image & ~PAGE_MASK; | 345 | slop = ramdisk_image & ~PAGE_MASK; |
352 | clen = ramdisk_size; | 346 | clen = ramdisk_size; |
@@ -360,22 +354,35 @@ static void __init relocate_initrd(void) | |||
360 | ramdisk_image += clen; | 354 | ramdisk_image += clen; |
361 | ramdisk_size -= clen; | 355 | ramdisk_size -= clen; |
362 | } | 356 | } |
363 | /* high pages is not converted by early_res_to_bootmem */ | 357 | |
364 | ramdisk_image = boot_params.hdr.ramdisk_image; | 358 | ramdisk_image = get_ramdisk_image(); |
365 | ramdisk_size = boot_params.hdr.ramdisk_size; | 359 | ramdisk_size = get_ramdisk_size(); |
366 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" | 360 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" |
367 | " [mem %#010llx-%#010llx]\n", | 361 | " [mem %#010llx-%#010llx]\n", |
368 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 362 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
369 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 363 | ramdisk_here, ramdisk_here + ramdisk_size - 1); |
370 | } | 364 | } |
371 | 365 | ||
366 | static void __init early_reserve_initrd(void) | ||
367 | { | ||
368 | /* Assume only end is not page aligned */ | ||
369 | u64 ramdisk_image = get_ramdisk_image(); | ||
370 | u64 ramdisk_size = get_ramdisk_size(); | ||
371 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
372 | |||
373 | if (!boot_params.hdr.type_of_loader || | ||
374 | !ramdisk_image || !ramdisk_size) | ||
375 | return; /* No initrd provided by bootloader */ | ||
376 | |||
377 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
378 | } | ||
372 | static void __init reserve_initrd(void) | 379 | static void __init reserve_initrd(void) |
373 | { | 380 | { |
374 | /* Assume only end is not page aligned */ | 381 | /* Assume only end is not page aligned */ |
375 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 382 | u64 ramdisk_image = get_ramdisk_image(); |
376 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 383 | u64 ramdisk_size = get_ramdisk_size(); |
377 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 384 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
378 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | 385 | u64 mapped_size; |
379 | 386 | ||
380 | if (!boot_params.hdr.type_of_loader || | 387 | if (!boot_params.hdr.type_of_loader || |
381 | !ramdisk_image || !ramdisk_size) | 388 | !ramdisk_image || !ramdisk_size) |
@@ -383,22 +390,18 @@ static void __init reserve_initrd(void) | |||
383 | 390 | ||
384 | initrd_start = 0; | 391 | initrd_start = 0; |
385 | 392 | ||
386 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 393 | mapped_size = memblock_mem_size(max_pfn_mapped); |
394 | if (ramdisk_size >= (mapped_size>>1)) | ||
387 | panic("initrd too large to handle, " | 395 | panic("initrd too large to handle, " |
388 | "disabling initrd (%lld needed, %lld available)\n", | 396 | "disabling initrd (%lld needed, %lld available)\n", |
389 | ramdisk_size, end_of_lowmem>>1); | 397 | ramdisk_size, mapped_size>>1); |
390 | } | ||
391 | 398 | ||
392 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, | 399 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, |
393 | ramdisk_end - 1); | 400 | ramdisk_end - 1); |
394 | 401 | ||
395 | 402 | if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image), | |
396 | if (ramdisk_end <= end_of_lowmem) { | 403 | PFN_DOWN(ramdisk_end))) { |
397 | /* All in lowmem, easy case */ | 404 | /* All are mapped, easy case */ |
398 | /* | ||
399 | * don't need to reserve again, already reserved early | ||
400 | * in i386_start_kernel | ||
401 | */ | ||
402 | initrd_start = ramdisk_image + PAGE_OFFSET; | 405 | initrd_start = ramdisk_image + PAGE_OFFSET; |
403 | initrd_end = initrd_start + ramdisk_size; | 406 | initrd_end = initrd_start + ramdisk_size; |
404 | return; | 407 | return; |
@@ -409,6 +412,9 @@ static void __init reserve_initrd(void) | |||
409 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); | 412 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
410 | } | 413 | } |
411 | #else | 414 | #else |
415 | static void __init early_reserve_initrd(void) | ||
416 | { | ||
417 | } | ||
412 | static void __init reserve_initrd(void) | 418 | static void __init reserve_initrd(void) |
413 | { | 419 | { |
414 | } | 420 | } |
@@ -419,8 +425,6 @@ static void __init parse_setup_data(void) | |||
419 | struct setup_data *data; | 425 | struct setup_data *data; |
420 | u64 pa_data; | 426 | u64 pa_data; |
421 | 427 | ||
422 | if (boot_params.hdr.version < 0x0209) | ||
423 | return; | ||
424 | pa_data = boot_params.hdr.setup_data; | 428 | pa_data = boot_params.hdr.setup_data; |
425 | while (pa_data) { | 429 | while (pa_data) { |
426 | u32 data_len, map_len; | 430 | u32 data_len, map_len; |
@@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void) | |||
456 | u64 pa_data; | 460 | u64 pa_data; |
457 | int found = 0; | 461 | int found = 0; |
458 | 462 | ||
459 | if (boot_params.hdr.version < 0x0209) | ||
460 | return; | ||
461 | pa_data = boot_params.hdr.setup_data; | 463 | pa_data = boot_params.hdr.setup_data; |
462 | while (pa_data) { | 464 | while (pa_data) { |
463 | data = early_memremap(pa_data, sizeof(*data)); | 465 | data = early_memremap(pa_data, sizeof(*data)); |
@@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
481 | struct setup_data *data; | 483 | struct setup_data *data; |
482 | u64 pa_data; | 484 | u64 pa_data; |
483 | 485 | ||
484 | if (boot_params.hdr.version < 0x0209) | ||
485 | return; | ||
486 | pa_data = boot_params.hdr.setup_data; | 486 | pa_data = boot_params.hdr.setup_data; |
487 | while (pa_data) { | 487 | while (pa_data) { |
488 | data = early_memremap(pa_data, sizeof(*data)); | 488 | data = early_memremap(pa_data, sizeof(*data)); |
@@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
501 | /* | 501 | /* |
502 | * Keep the crash kernel below this limit. On 32 bits earlier kernels | 502 | * Keep the crash kernel below this limit. On 32 bits earlier kernels |
503 | * would limit the kernel to the low 512 MiB due to mapping restrictions. | 503 | * would limit the kernel to the low 512 MiB due to mapping restrictions. |
504 | * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this | ||
505 | * limit once kexec-tools are fixed. | ||
506 | */ | 504 | */ |
507 | #ifdef CONFIG_X86_32 | 505 | #ifdef CONFIG_X86_32 |
508 | # define CRASH_KERNEL_ADDR_MAX (512 << 20) | 506 | # define CRASH_KERNEL_ADDR_MAX (512 << 20) |
509 | #else | 507 | #else |
510 | # define CRASH_KERNEL_ADDR_MAX (896 << 20) | 508 | # define CRASH_KERNEL_ADDR_MAX MAXMEM |
511 | #endif | 509 | #endif |
512 | 510 | ||
511 | static void __init reserve_crashkernel_low(void) | ||
512 | { | ||
513 | #ifdef CONFIG_X86_64 | ||
514 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
515 | unsigned long long low_base = 0, low_size = 0; | ||
516 | unsigned long total_low_mem; | ||
517 | unsigned long long base; | ||
518 | int ret; | ||
519 | |||
520 | total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); | ||
521 | ret = parse_crashkernel_low(boot_command_line, total_low_mem, | ||
522 | &low_size, &base); | ||
523 | if (ret != 0 || low_size <= 0) | ||
524 | return; | ||
525 | |||
526 | low_base = memblock_find_in_range(low_size, (1ULL<<32), | ||
527 | low_size, alignment); | ||
528 | |||
529 | if (!low_base) { | ||
530 | pr_info("crashkernel low reservation failed - No suitable area found.\n"); | ||
531 | |||
532 | return; | ||
533 | } | ||
534 | |||
535 | memblock_reserve(low_base, low_size); | ||
536 | pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", | ||
537 | (unsigned long)(low_size >> 20), | ||
538 | (unsigned long)(low_base >> 20), | ||
539 | (unsigned long)(total_low_mem >> 20)); | ||
540 | crashk_low_res.start = low_base; | ||
541 | crashk_low_res.end = low_base + low_size - 1; | ||
542 | insert_resource(&iomem_resource, &crashk_low_res); | ||
543 | #endif | ||
544 | } | ||
545 | |||
513 | static void __init reserve_crashkernel(void) | 546 | static void __init reserve_crashkernel(void) |
514 | { | 547 | { |
548 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
515 | unsigned long long total_mem; | 549 | unsigned long long total_mem; |
516 | unsigned long long crash_size, crash_base; | 550 | unsigned long long crash_size, crash_base; |
517 | int ret; | 551 | int ret; |
@@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void) | |||
525 | 559 | ||
526 | /* 0 means: find the address automatically */ | 560 | /* 0 means: find the address automatically */ |
527 | if (crash_base <= 0) { | 561 | if (crash_base <= 0) { |
528 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
529 | |||
530 | /* | 562 | /* |
531 | * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX | 563 | * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX |
532 | */ | 564 | */ |
@@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void) | |||
537 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 569 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
538 | return; | 570 | return; |
539 | } | 571 | } |
572 | |||
540 | } else { | 573 | } else { |
541 | unsigned long long start; | 574 | unsigned long long start; |
542 | 575 | ||
@@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void) | |||
558 | crashk_res.start = crash_base; | 591 | crashk_res.start = crash_base; |
559 | crashk_res.end = crash_base + crash_size - 1; | 592 | crashk_res.end = crash_base + crash_size - 1; |
560 | insert_resource(&iomem_resource, &crashk_res); | 593 | insert_resource(&iomem_resource, &crashk_res); |
594 | |||
595 | if (crash_base >= (1ULL<<32)) | ||
596 | reserve_crashkernel_low(); | ||
561 | } | 597 | } |
562 | #else | 598 | #else |
563 | static void __init reserve_crashkernel(void) | 599 | static void __init reserve_crashkernel(void) |
@@ -608,7 +644,82 @@ static __init void reserve_ibft_region(void) | |||
608 | memblock_reserve(addr, size); | 644 | memblock_reserve(addr, size); |
609 | } | 645 | } |
610 | 646 | ||
611 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | 647 | static bool __init snb_gfx_workaround_needed(void) |
648 | { | ||
649 | #ifdef CONFIG_PCI | ||
650 | int i; | ||
651 | u16 vendor, devid; | ||
652 | static const __initconst u16 snb_ids[] = { | ||
653 | 0x0102, | ||
654 | 0x0112, | ||
655 | 0x0122, | ||
656 | 0x0106, | ||
657 | 0x0116, | ||
658 | 0x0126, | ||
659 | 0x010a, | ||
660 | }; | ||
661 | |||
662 | /* Assume no if something weird is going on with PCI */ | ||
663 | if (!early_pci_allowed()) | ||
664 | return false; | ||
665 | |||
666 | vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); | ||
667 | if (vendor != 0x8086) | ||
668 | return false; | ||
669 | |||
670 | devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); | ||
671 | for (i = 0; i < ARRAY_SIZE(snb_ids); i++) | ||
672 | if (devid == snb_ids[i]) | ||
673 | return true; | ||
674 | #endif | ||
675 | |||
676 | return false; | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * Sandy Bridge graphics has trouble with certain ranges, exclude | ||
681 | * them from allocation. | ||
682 | */ | ||
683 | static void __init trim_snb_memory(void) | ||
684 | { | ||
685 | static const __initconst unsigned long bad_pages[] = { | ||
686 | 0x20050000, | ||
687 | 0x20110000, | ||
688 | 0x20130000, | ||
689 | 0x20138000, | ||
690 | 0x40004000, | ||
691 | }; | ||
692 | int i; | ||
693 | |||
694 | if (!snb_gfx_workaround_needed()) | ||
695 | return; | ||
696 | |||
697 | printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n"); | ||
698 | |||
699 | /* | ||
700 | * Reserve all memory below the 1 MB mark that has not | ||
701 | * already been reserved. | ||
702 | */ | ||
703 | memblock_reserve(0, 1<<20); | ||
704 | |||
705 | for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { | ||
706 | if (memblock_reserve(bad_pages[i], PAGE_SIZE)) | ||
707 | printk(KERN_WARNING "failed to reserve 0x%08lx\n", | ||
708 | bad_pages[i]); | ||
709 | } | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | * Here we put platform-specific memory range workarounds, i.e. | ||
714 | * memory known to be corrupt or otherwise in need to be reserved on | ||
715 | * specific platforms. | ||
716 | * | ||
717 | * If this gets used more widely it could use a real dispatch mechanism. | ||
718 | */ | ||
719 | static void __init trim_platform_memory_ranges(void) | ||
720 | { | ||
721 | trim_snb_memory(); | ||
722 | } | ||
612 | 723 | ||
613 | static void __init trim_bios_range(void) | 724 | static void __init trim_bios_range(void) |
614 | { | 725 | { |
@@ -621,8 +732,7 @@ static void __init trim_bios_range(void) | |||
621 | * since some BIOSes are known to corrupt low memory. See the | 732 | * since some BIOSes are known to corrupt low memory. See the |
622 | * Kconfig help text for X86_RESERVE_LOW. | 733 | * Kconfig help text for X86_RESERVE_LOW. |
623 | */ | 734 | */ |
624 | e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), | 735 | e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); |
625 | E820_RAM, E820_RESERVED); | ||
626 | 736 | ||
627 | /* | 737 | /* |
628 | * special case: Some BIOSen report the PC BIOS | 738 | * special case: Some BIOSen report the PC BIOS |
@@ -630,9 +740,33 @@ static void __init trim_bios_range(void) | |||
630 | * take them out. | 740 | * take them out. |
631 | */ | 741 | */ |
632 | e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); | 742 | e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); |
743 | |||
633 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 744 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
634 | } | 745 | } |
635 | 746 | ||
747 | /* called before trim_bios_range() to spare extra sanitize */ | ||
748 | static void __init e820_add_kernel_range(void) | ||
749 | { | ||
750 | u64 start = __pa_symbol(_text); | ||
751 | u64 size = __pa_symbol(_end) - start; | ||
752 | |||
753 | /* | ||
754 | * Complain if .text .data and .bss are not marked as E820_RAM and | ||
755 | * attempt to fix it by adding the range. We may have a confused BIOS, | ||
756 | * or the user may have used memmap=exactmap or memmap=xxM$yyM to | ||
757 | * exclude kernel range. If we really are running on top non-RAM, | ||
758 | * we will crash later anyways. | ||
759 | */ | ||
760 | if (e820_all_mapped(start, start + size, E820_RAM)) | ||
761 | return; | ||
762 | |||
763 | pr_warn(".text .data .bss are not marked as E820_RAM!\n"); | ||
764 | e820_remove_range(start, size, E820_RAM, 0); | ||
765 | e820_add_region(start, size, E820_RAM); | ||
766 | } | ||
767 | |||
768 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | ||
769 | |||
636 | static int __init parse_reservelow(char *p) | 770 | static int __init parse_reservelow(char *p) |
637 | { | 771 | { |
638 | unsigned long long size; | 772 | unsigned long long size; |
@@ -655,6 +789,11 @@ static int __init parse_reservelow(char *p) | |||
655 | 789 | ||
656 | early_param("reservelow", parse_reservelow); | 790 | early_param("reservelow", parse_reservelow); |
657 | 791 | ||
792 | static void __init trim_low_memory_range(void) | ||
793 | { | ||
794 | memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); | ||
795 | } | ||
796 | |||
658 | /* | 797 | /* |
659 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 798 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
660 | * passed the efi memmap, systab, etc., so we should use these data structures | 799 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -670,6 +809,17 @@ early_param("reservelow", parse_reservelow); | |||
670 | 809 | ||
671 | void __init setup_arch(char **cmdline_p) | 810 | void __init setup_arch(char **cmdline_p) |
672 | { | 811 | { |
812 | memblock_reserve(__pa_symbol(_text), | ||
813 | (unsigned long)__bss_stop - (unsigned long)_text); | ||
814 | |||
815 | early_reserve_initrd(); | ||
816 | |||
817 | /* | ||
818 | * At this point everything still needed from the boot loader | ||
819 | * or BIOS or kernel text should be early reserved or marked not | ||
820 | * RAM in e820. All other memory is free game. | ||
821 | */ | ||
822 | |||
673 | #ifdef CONFIG_X86_32 | 823 | #ifdef CONFIG_X86_32 |
674 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 824 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
675 | visws_early_detect(); | 825 | visws_early_detect(); |
@@ -729,15 +879,15 @@ void __init setup_arch(char **cmdline_p) | |||
729 | #ifdef CONFIG_EFI | 879 | #ifdef CONFIG_EFI |
730 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 880 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
731 | "EL32", 4)) { | 881 | "EL32", 4)) { |
732 | efi_enabled = 1; | 882 | set_bit(EFI_BOOT, &x86_efi_facility); |
733 | efi_64bit = false; | ||
734 | } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 883 | } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
735 | "EL64", 4)) { | 884 | "EL64", 4)) { |
736 | efi_enabled = 1; | 885 | set_bit(EFI_BOOT, &x86_efi_facility); |
737 | efi_64bit = true; | 886 | set_bit(EFI_64BIT, &x86_efi_facility); |
738 | } | 887 | } |
739 | if (efi_enabled && efi_memblock_x86_reserve_range()) | 888 | |
740 | efi_enabled = 0; | 889 | if (efi_enabled(EFI_BOOT)) |
890 | efi_memblock_x86_reserve_range(); | ||
741 | #endif | 891 | #endif |
742 | 892 | ||
743 | x86_init.oem.arch_setup(); | 893 | x86_init.oem.arch_setup(); |
@@ -757,12 +907,12 @@ void __init setup_arch(char **cmdline_p) | |||
757 | init_mm.end_data = (unsigned long) _edata; | 907 | init_mm.end_data = (unsigned long) _edata; |
758 | init_mm.brk = _brk_end; | 908 | init_mm.brk = _brk_end; |
759 | 909 | ||
760 | code_resource.start = virt_to_phys(_text); | 910 | code_resource.start = __pa_symbol(_text); |
761 | code_resource.end = virt_to_phys(_etext)-1; | 911 | code_resource.end = __pa_symbol(_etext)-1; |
762 | data_resource.start = virt_to_phys(_etext); | 912 | data_resource.start = __pa_symbol(_etext); |
763 | data_resource.end = virt_to_phys(_edata)-1; | 913 | data_resource.end = __pa_symbol(_edata)-1; |
764 | bss_resource.start = virt_to_phys(&__bss_start); | 914 | bss_resource.start = __pa_symbol(__bss_start); |
765 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 915 | bss_resource.end = __pa_symbol(__bss_stop)-1; |
766 | 916 | ||
767 | #ifdef CONFIG_CMDLINE_BOOL | 917 | #ifdef CONFIG_CMDLINE_BOOL |
768 | #ifdef CONFIG_CMDLINE_OVERRIDE | 918 | #ifdef CONFIG_CMDLINE_OVERRIDE |
@@ -810,7 +960,7 @@ void __init setup_arch(char **cmdline_p) | |||
810 | 960 | ||
811 | finish_e820_parsing(); | 961 | finish_e820_parsing(); |
812 | 962 | ||
813 | if (efi_enabled) | 963 | if (efi_enabled(EFI_BOOT)) |
814 | efi_init(); | 964 | efi_init(); |
815 | 965 | ||
816 | dmi_scan_machine(); | 966 | dmi_scan_machine(); |
@@ -828,6 +978,7 @@ void __init setup_arch(char **cmdline_p) | |||
828 | insert_resource(&iomem_resource, &data_resource); | 978 | insert_resource(&iomem_resource, &data_resource); |
829 | insert_resource(&iomem_resource, &bss_resource); | 979 | insert_resource(&iomem_resource, &bss_resource); |
830 | 980 | ||
981 | e820_add_kernel_range(); | ||
831 | trim_bios_range(); | 982 | trim_bios_range(); |
832 | #ifdef CONFIG_X86_32 | 983 | #ifdef CONFIG_X86_32 |
833 | if (ppro_with_ram_bug()) { | 984 | if (ppro_with_ram_bug()) { |
@@ -877,6 +1028,8 @@ void __init setup_arch(char **cmdline_p) | |||
877 | 1028 | ||
878 | reserve_ibft_region(); | 1029 | reserve_ibft_region(); |
879 | 1030 | ||
1031 | early_alloc_pgt_buf(); | ||
1032 | |||
880 | /* | 1033 | /* |
881 | * Need to conclude brk, before memblock_x86_fill() | 1034 | * Need to conclude brk, before memblock_x86_fill() |
882 | * it could use memblock_find_in_range, could overlap with | 1035 | * it could use memblock_find_in_range, could overlap with |
@@ -886,14 +1039,14 @@ void __init setup_arch(char **cmdline_p) | |||
886 | 1039 | ||
887 | cleanup_highmap(); | 1040 | cleanup_highmap(); |
888 | 1041 | ||
889 | memblock.current_limit = get_max_mapped(); | 1042 | memblock.current_limit = ISA_END_ADDRESS; |
890 | memblock_x86_fill(); | 1043 | memblock_x86_fill(); |
891 | 1044 | ||
892 | /* | 1045 | /* |
893 | * The EFI specification says that boot service code won't be called | 1046 | * The EFI specification says that boot service code won't be called |
894 | * after ExitBootServices(). This is, in fact, a lie. | 1047 | * after ExitBootServices(). This is, in fact, a lie. |
895 | */ | 1048 | */ |
896 | if (efi_enabled) | 1049 | if (efi_enabled(EFI_MEMMAP)) |
897 | efi_reserve_boot_services(); | 1050 | efi_reserve_boot_services(); |
898 | 1051 | ||
899 | /* preallocate 4k for mptable mpc */ | 1052 | /* preallocate 4k for mptable mpc */ |
@@ -903,39 +1056,22 @@ void __init setup_arch(char **cmdline_p) | |||
903 | setup_bios_corruption_check(); | 1056 | setup_bios_corruption_check(); |
904 | #endif | 1057 | #endif |
905 | 1058 | ||
1059 | #ifdef CONFIG_X86_32 | ||
906 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", | 1060 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", |
907 | (max_pfn_mapped<<PAGE_SHIFT) - 1); | 1061 | (max_pfn_mapped<<PAGE_SHIFT) - 1); |
1062 | #endif | ||
908 | 1063 | ||
909 | setup_real_mode(); | 1064 | reserve_real_mode(); |
910 | |||
911 | init_gbpages(); | ||
912 | |||
913 | /* max_pfn_mapped is updated here */ | ||
914 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | ||
915 | max_pfn_mapped = max_low_pfn_mapped; | ||
916 | 1065 | ||
917 | #ifdef CONFIG_X86_64 | 1066 | trim_platform_memory_ranges(); |
918 | if (max_pfn > max_low_pfn) { | 1067 | trim_low_memory_range(); |
919 | int i; | ||
920 | unsigned long start, end; | ||
921 | unsigned long start_pfn, end_pfn; | ||
922 | 1068 | ||
923 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, | 1069 | init_mem_mapping(); |
924 | NULL) { | ||
925 | 1070 | ||
926 | end = PFN_PHYS(end_pfn); | 1071 | early_trap_pf_init(); |
927 | if (end <= (1UL<<32)) | ||
928 | continue; | ||
929 | 1072 | ||
930 | start = PFN_PHYS(start_pfn); | 1073 | setup_real_mode(); |
931 | max_pfn_mapped = init_memory_mapping( | ||
932 | max((1UL<<32), start), end); | ||
933 | } | ||
934 | 1074 | ||
935 | /* can we preseve max_low_pfn ?*/ | ||
936 | max_low_pfn = max_pfn; | ||
937 | } | ||
938 | #endif | ||
939 | memblock.current_limit = get_max_mapped(); | 1075 | memblock.current_limit = get_max_mapped(); |
940 | dma_contiguous_reserve(0); | 1076 | dma_contiguous_reserve(0); |
941 | 1077 | ||
@@ -952,6 +1088,10 @@ void __init setup_arch(char **cmdline_p) | |||
952 | 1088 | ||
953 | reserve_initrd(); | 1089 | reserve_initrd(); |
954 | 1090 | ||
1091 | #if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD) | ||
1092 | acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); | ||
1093 | #endif | ||
1094 | |||
955 | reserve_crashkernel(); | 1095 | reserve_crashkernel(); |
956 | 1096 | ||
957 | vsmp_init(); | 1097 | vsmp_init(); |
@@ -1030,7 +1170,7 @@ void __init setup_arch(char **cmdline_p) | |||
1030 | 1170 | ||
1031 | #ifdef CONFIG_VT | 1171 | #ifdef CONFIG_VT |
1032 | #if defined(CONFIG_VGA_CONSOLE) | 1172 | #if defined(CONFIG_VGA_CONSOLE) |
1033 | if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) | 1173 | if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) |
1034 | conswitchp = &vga_con; | 1174 | conswitchp = &vga_con; |
1035 | #elif defined(CONFIG_DUMMY_CONSOLE) | 1175 | #elif defined(CONFIG_DUMMY_CONSOLE) |
1036 | conswitchp = &dummy_con; | 1176 | conswitchp = &dummy_con; |
@@ -1047,14 +1187,13 @@ void __init setup_arch(char **cmdline_p) | |||
1047 | register_refined_jiffies(CLOCK_TICK_RATE); | 1187 | register_refined_jiffies(CLOCK_TICK_RATE); |
1048 | 1188 | ||
1049 | #ifdef CONFIG_EFI | 1189 | #ifdef CONFIG_EFI |
1050 | /* Once setup is done above, disable efi_enabled on mismatched | 1190 | /* Once setup is done above, unmap the EFI memory map on |
1051 | * firmware/kernel archtectures since there is no support for | 1191 | * mismatched firmware/kernel archtectures since there is no |
1052 | * runtime services. | 1192 | * support for runtime services. |
1053 | */ | 1193 | */ |
1054 | if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) { | 1194 | if (efi_enabled(EFI_BOOT) && !efi_is_native()) { |
1055 | pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); | 1195 | pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); |
1056 | efi_unmap_memmap(); | 1196 | efi_unmap_memmap(); |
1057 | efi_enabled = 0; | ||
1058 | } | 1197 | } |
1059 | #endif | 1198 | #endif |
1060 | } | 1199 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index fbbb604313a2..69562992e457 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -278,7 +278,7 @@ static const struct { | |||
278 | }; | 278 | }; |
279 | 279 | ||
280 | static int | 280 | static int |
281 | __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | 281 | __setup_frame(int sig, struct ksignal *ksig, sigset_t *set, |
282 | struct pt_regs *regs) | 282 | struct pt_regs *regs) |
283 | { | 283 | { |
284 | struct sigframe __user *frame; | 284 | struct sigframe __user *frame; |
@@ -286,7 +286,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
286 | int err = 0; | 286 | int err = 0; |
287 | void __user *fpstate = NULL; | 287 | void __user *fpstate = NULL; |
288 | 288 | ||
289 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | 289 | frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate); |
290 | 290 | ||
291 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 291 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
292 | return -EFAULT; | 292 | return -EFAULT; |
@@ -307,8 +307,8 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
307 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); | 307 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); |
308 | else | 308 | else |
309 | restorer = &frame->retcode; | 309 | restorer = &frame->retcode; |
310 | if (ka->sa.sa_flags & SA_RESTORER) | 310 | if (ksig->ka.sa.sa_flags & SA_RESTORER) |
311 | restorer = ka->sa.sa_restorer; | 311 | restorer = ksig->ka.sa.sa_restorer; |
312 | 312 | ||
313 | /* Set up to return from userspace. */ | 313 | /* Set up to return from userspace. */ |
314 | err |= __put_user(restorer, &frame->pretcode); | 314 | err |= __put_user(restorer, &frame->pretcode); |
@@ -327,7 +327,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
327 | 327 | ||
328 | /* Set up registers for signal handler */ | 328 | /* Set up registers for signal handler */ |
329 | regs->sp = (unsigned long)frame; | 329 | regs->sp = (unsigned long)frame; |
330 | regs->ip = (unsigned long)ka->sa.sa_handler; | 330 | regs->ip = (unsigned long)ksig->ka.sa.sa_handler; |
331 | regs->ax = (unsigned long)sig; | 331 | regs->ax = (unsigned long)sig; |
332 | regs->dx = 0; | 332 | regs->dx = 0; |
333 | regs->cx = 0; | 333 | regs->cx = 0; |
@@ -340,7 +340,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
340 | return 0; | 340 | return 0; |
341 | } | 341 | } |
342 | 342 | ||
343 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 343 | static int __setup_rt_frame(int sig, struct ksignal *ksig, |
344 | sigset_t *set, struct pt_regs *regs) | 344 | sigset_t *set, struct pt_regs *regs) |
345 | { | 345 | { |
346 | struct rt_sigframe __user *frame; | 346 | struct rt_sigframe __user *frame; |
@@ -348,7 +348,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
348 | int err = 0; | 348 | int err = 0; |
349 | void __user *fpstate = NULL; | 349 | void __user *fpstate = NULL; |
350 | 350 | ||
351 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | 351 | frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate); |
352 | 352 | ||
353 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 353 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
354 | return -EFAULT; | 354 | return -EFAULT; |
@@ -364,15 +364,12 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
364 | else | 364 | else |
365 | put_user_ex(0, &frame->uc.uc_flags); | 365 | put_user_ex(0, &frame->uc.uc_flags); |
366 | put_user_ex(0, &frame->uc.uc_link); | 366 | put_user_ex(0, &frame->uc.uc_link); |
367 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 367 | err |= __save_altstack(&frame->uc.uc_stack, regs->sp); |
368 | put_user_ex(sas_ss_flags(regs->sp), | ||
369 | &frame->uc.uc_stack.ss_flags); | ||
370 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
371 | 368 | ||
372 | /* Set up to return from userspace. */ | 369 | /* Set up to return from userspace. */ |
373 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 370 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); |
374 | if (ka->sa.sa_flags & SA_RESTORER) | 371 | if (ksig->ka.sa.sa_flags & SA_RESTORER) |
375 | restorer = ka->sa.sa_restorer; | 372 | restorer = ksig->ka.sa.sa_restorer; |
376 | put_user_ex(restorer, &frame->pretcode); | 373 | put_user_ex(restorer, &frame->pretcode); |
377 | 374 | ||
378 | /* | 375 | /* |
@@ -385,7 +382,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
385 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); | 382 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
386 | } put_user_catch(err); | 383 | } put_user_catch(err); |
387 | 384 | ||
388 | err |= copy_siginfo_to_user(&frame->info, info); | 385 | err |= copy_siginfo_to_user(&frame->info, &ksig->info); |
389 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | 386 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, |
390 | regs, set->sig[0]); | 387 | regs, set->sig[0]); |
391 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | 388 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); |
@@ -395,7 +392,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
395 | 392 | ||
396 | /* Set up registers for signal handler */ | 393 | /* Set up registers for signal handler */ |
397 | regs->sp = (unsigned long)frame; | 394 | regs->sp = (unsigned long)frame; |
398 | regs->ip = (unsigned long)ka->sa.sa_handler; | 395 | regs->ip = (unsigned long)ksig->ka.sa.sa_handler; |
399 | regs->ax = (unsigned long)sig; | 396 | regs->ax = (unsigned long)sig; |
400 | regs->dx = (unsigned long)&frame->info; | 397 | regs->dx = (unsigned long)&frame->info; |
401 | regs->cx = (unsigned long)&frame->uc; | 398 | regs->cx = (unsigned long)&frame->uc; |
@@ -408,21 +405,20 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
408 | return 0; | 405 | return 0; |
409 | } | 406 | } |
410 | #else /* !CONFIG_X86_32 */ | 407 | #else /* !CONFIG_X86_32 */ |
411 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 408 | static int __setup_rt_frame(int sig, struct ksignal *ksig, |
412 | sigset_t *set, struct pt_regs *regs) | 409 | sigset_t *set, struct pt_regs *regs) |
413 | { | 410 | { |
414 | struct rt_sigframe __user *frame; | 411 | struct rt_sigframe __user *frame; |
415 | void __user *fp = NULL; | 412 | void __user *fp = NULL; |
416 | int err = 0; | 413 | int err = 0; |
417 | struct task_struct *me = current; | ||
418 | 414 | ||
419 | frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); | 415 | frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp); |
420 | 416 | ||
421 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 417 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
422 | return -EFAULT; | 418 | return -EFAULT; |
423 | 419 | ||
424 | if (ka->sa.sa_flags & SA_SIGINFO) { | 420 | if (ksig->ka.sa.sa_flags & SA_SIGINFO) { |
425 | if (copy_siginfo_to_user(&frame->info, info)) | 421 | if (copy_siginfo_to_user(&frame->info, &ksig->info)) |
426 | return -EFAULT; | 422 | return -EFAULT; |
427 | } | 423 | } |
428 | 424 | ||
@@ -433,16 +429,13 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
433 | else | 429 | else |
434 | put_user_ex(0, &frame->uc.uc_flags); | 430 | put_user_ex(0, &frame->uc.uc_flags); |
435 | put_user_ex(0, &frame->uc.uc_link); | 431 | put_user_ex(0, &frame->uc.uc_link); |
436 | put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 432 | err |= __save_altstack(&frame->uc.uc_stack, regs->sp); |
437 | put_user_ex(sas_ss_flags(regs->sp), | ||
438 | &frame->uc.uc_stack.ss_flags); | ||
439 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
440 | 433 | ||
441 | /* Set up to return from userspace. If provided, use a stub | 434 | /* Set up to return from userspace. If provided, use a stub |
442 | already in userspace. */ | 435 | already in userspace. */ |
443 | /* x86-64 should always use SA_RESTORER. */ | 436 | /* x86-64 should always use SA_RESTORER. */ |
444 | if (ka->sa.sa_flags & SA_RESTORER) { | 437 | if (ksig->ka.sa.sa_flags & SA_RESTORER) { |
445 | put_user_ex(ka->sa.sa_restorer, &frame->pretcode); | 438 | put_user_ex(ksig->ka.sa.sa_restorer, &frame->pretcode); |
446 | } else { | 439 | } else { |
447 | /* could use a vstub here */ | 440 | /* could use a vstub here */ |
448 | err |= -EFAULT; | 441 | err |= -EFAULT; |
@@ -464,7 +457,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
464 | next argument after the signal number on the stack. */ | 457 | next argument after the signal number on the stack. */ |
465 | regs->si = (unsigned long)&frame->info; | 458 | regs->si = (unsigned long)&frame->info; |
466 | regs->dx = (unsigned long)&frame->uc; | 459 | regs->dx = (unsigned long)&frame->uc; |
467 | regs->ip = (unsigned long) ka->sa.sa_handler; | 460 | regs->ip = (unsigned long) ksig->ka.sa.sa_handler; |
468 | 461 | ||
469 | regs->sp = (unsigned long)frame; | 462 | regs->sp = (unsigned long)frame; |
470 | 463 | ||
@@ -476,8 +469,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
476 | } | 469 | } |
477 | #endif /* CONFIG_X86_32 */ | 470 | #endif /* CONFIG_X86_32 */ |
478 | 471 | ||
479 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | 472 | static int x32_setup_rt_frame(struct ksignal *ksig, |
480 | siginfo_t *info, compat_sigset_t *set, | 473 | compat_sigset_t *set, |
481 | struct pt_regs *regs) | 474 | struct pt_regs *regs) |
482 | { | 475 | { |
483 | #ifdef CONFIG_X86_X32_ABI | 476 | #ifdef CONFIG_X86_X32_ABI |
@@ -486,13 +479,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | |||
486 | int err = 0; | 479 | int err = 0; |
487 | void __user *fpstate = NULL; | 480 | void __user *fpstate = NULL; |
488 | 481 | ||
489 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | 482 | frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate); |
490 | 483 | ||
491 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 484 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
492 | return -EFAULT; | 485 | return -EFAULT; |
493 | 486 | ||
494 | if (ka->sa.sa_flags & SA_SIGINFO) { | 487 | if (ksig->ka.sa.sa_flags & SA_SIGINFO) { |
495 | if (copy_siginfo_to_user32(&frame->info, info)) | 488 | if (copy_siginfo_to_user32(&frame->info, &ksig->info)) |
496 | return -EFAULT; | 489 | return -EFAULT; |
497 | } | 490 | } |
498 | 491 | ||
@@ -503,14 +496,11 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | |||
503 | else | 496 | else |
504 | put_user_ex(0, &frame->uc.uc_flags); | 497 | put_user_ex(0, &frame->uc.uc_flags); |
505 | put_user_ex(0, &frame->uc.uc_link); | 498 | put_user_ex(0, &frame->uc.uc_link); |
506 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 499 | err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); |
507 | put_user_ex(sas_ss_flags(regs->sp), | ||
508 | &frame->uc.uc_stack.ss_flags); | ||
509 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
510 | put_user_ex(0, &frame->uc.uc__pad0); | 500 | put_user_ex(0, &frame->uc.uc__pad0); |
511 | 501 | ||
512 | if (ka->sa.sa_flags & SA_RESTORER) { | 502 | if (ksig->ka.sa.sa_flags & SA_RESTORER) { |
513 | restorer = ka->sa.sa_restorer; | 503 | restorer = ksig->ka.sa.sa_restorer; |
514 | } else { | 504 | } else { |
515 | /* could use a vstub here */ | 505 | /* could use a vstub here */ |
516 | restorer = NULL; | 506 | restorer = NULL; |
@@ -528,10 +518,10 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | |||
528 | 518 | ||
529 | /* Set up registers for signal handler */ | 519 | /* Set up registers for signal handler */ |
530 | regs->sp = (unsigned long) frame; | 520 | regs->sp = (unsigned long) frame; |
531 | regs->ip = (unsigned long) ka->sa.sa_handler; | 521 | regs->ip = (unsigned long) ksig->ka.sa.sa_handler; |
532 | 522 | ||
533 | /* We use the x32 calling convention here... */ | 523 | /* We use the x32 calling convention here... */ |
534 | regs->di = sig; | 524 | regs->di = ksig->sig; |
535 | regs->si = (unsigned long) &frame->info; | 525 | regs->si = (unsigned long) &frame->info; |
536 | regs->dx = (unsigned long) &frame->uc; | 526 | regs->dx = (unsigned long) &frame->uc; |
537 | 527 | ||
@@ -545,77 +535,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | |||
545 | return 0; | 535 | return 0; |
546 | } | 536 | } |
547 | 537 | ||
548 | #ifdef CONFIG_X86_32 | ||
549 | /* | ||
550 | * Atomically swap in the new signal mask, and wait for a signal. | ||
551 | */ | ||
552 | asmlinkage int | ||
553 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
554 | { | ||
555 | sigset_t blocked; | ||
556 | siginitset(&blocked, mask); | ||
557 | return sigsuspend(&blocked); | ||
558 | } | ||
559 | |||
560 | asmlinkage int | ||
561 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
562 | struct old_sigaction __user *oact) | ||
563 | { | ||
564 | struct k_sigaction new_ka, old_ka; | ||
565 | int ret = 0; | ||
566 | |||
567 | if (act) { | ||
568 | old_sigset_t mask; | ||
569 | |||
570 | if (!access_ok(VERIFY_READ, act, sizeof(*act))) | ||
571 | return -EFAULT; | ||
572 | |||
573 | get_user_try { | ||
574 | get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); | ||
575 | get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); | ||
576 | get_user_ex(mask, &act->sa_mask); | ||
577 | get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); | ||
578 | } get_user_catch(ret); | ||
579 | |||
580 | if (ret) | ||
581 | return -EFAULT; | ||
582 | siginitset(&new_ka.sa.sa_mask, mask); | ||
583 | } | ||
584 | |||
585 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
586 | |||
587 | if (!ret && oact) { | ||
588 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) | ||
589 | return -EFAULT; | ||
590 | |||
591 | put_user_try { | ||
592 | put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); | ||
593 | put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); | ||
594 | put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
595 | put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); | ||
596 | } put_user_catch(ret); | ||
597 | |||
598 | if (ret) | ||
599 | return -EFAULT; | ||
600 | } | ||
601 | |||
602 | return ret; | ||
603 | } | ||
604 | #endif /* CONFIG_X86_32 */ | ||
605 | |||
606 | long | ||
607 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
608 | struct pt_regs *regs) | ||
609 | { | ||
610 | return do_sigaltstack(uss, uoss, regs->sp); | ||
611 | } | ||
612 | |||
613 | /* | 538 | /* |
614 | * Do a signal return; undo the signal stack. | 539 | * Do a signal return; undo the signal stack. |
615 | */ | 540 | */ |
616 | #ifdef CONFIG_X86_32 | 541 | #ifdef CONFIG_X86_32 |
617 | unsigned long sys_sigreturn(struct pt_regs *regs) | 542 | unsigned long sys_sigreturn(void) |
618 | { | 543 | { |
544 | struct pt_regs *regs = current_pt_regs(); | ||
619 | struct sigframe __user *frame; | 545 | struct sigframe __user *frame; |
620 | unsigned long ax; | 546 | unsigned long ax; |
621 | sigset_t set; | 547 | sigset_t set; |
@@ -642,8 +568,9 @@ badframe: | |||
642 | } | 568 | } |
643 | #endif /* CONFIG_X86_32 */ | 569 | #endif /* CONFIG_X86_32 */ |
644 | 570 | ||
645 | long sys_rt_sigreturn(struct pt_regs *regs) | 571 | long sys_rt_sigreturn(void) |
646 | { | 572 | { |
573 | struct pt_regs *regs = current_pt_regs(); | ||
647 | struct rt_sigframe __user *frame; | 574 | struct rt_sigframe __user *frame; |
648 | unsigned long ax; | 575 | unsigned long ax; |
649 | sigset_t set; | 576 | sigset_t set; |
@@ -659,7 +586,7 @@ long sys_rt_sigreturn(struct pt_regs *regs) | |||
659 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 586 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
660 | goto badframe; | 587 | goto badframe; |
661 | 588 | ||
662 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | 589 | if (restore_altstack(&frame->uc.uc_stack)) |
663 | goto badframe; | 590 | goto badframe; |
664 | 591 | ||
665 | return ax; | 592 | return ax; |
@@ -684,30 +611,29 @@ static int signr_convert(int sig) | |||
684 | } | 611 | } |
685 | 612 | ||
686 | static int | 613 | static int |
687 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 614 | setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) |
688 | struct pt_regs *regs) | ||
689 | { | 615 | { |
690 | int usig = signr_convert(sig); | 616 | int usig = signr_convert(ksig->sig); |
691 | sigset_t *set = sigmask_to_save(); | 617 | sigset_t *set = sigmask_to_save(); |
692 | compat_sigset_t *cset = (compat_sigset_t *) set; | 618 | compat_sigset_t *cset = (compat_sigset_t *) set; |
693 | 619 | ||
694 | /* Set up the stack frame */ | 620 | /* Set up the stack frame */ |
695 | if (is_ia32_frame()) { | 621 | if (is_ia32_frame()) { |
696 | if (ka->sa.sa_flags & SA_SIGINFO) | 622 | if (ksig->ka.sa.sa_flags & SA_SIGINFO) |
697 | return ia32_setup_rt_frame(usig, ka, info, cset, regs); | 623 | return ia32_setup_rt_frame(usig, ksig, cset, regs); |
698 | else | 624 | else |
699 | return ia32_setup_frame(usig, ka, cset, regs); | 625 | return ia32_setup_frame(usig, ksig, cset, regs); |
700 | } else if (is_x32_frame()) { | 626 | } else if (is_x32_frame()) { |
701 | return x32_setup_rt_frame(usig, ka, info, cset, regs); | 627 | return x32_setup_rt_frame(ksig, cset, regs); |
702 | } else { | 628 | } else { |
703 | return __setup_rt_frame(sig, ka, info, set, regs); | 629 | return __setup_rt_frame(ksig->sig, ksig, set, regs); |
704 | } | 630 | } |
705 | } | 631 | } |
706 | 632 | ||
707 | static void | 633 | static void |
708 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 634 | handle_signal(struct ksignal *ksig, struct pt_regs *regs) |
709 | struct pt_regs *regs) | ||
710 | { | 635 | { |
636 | bool failed; | ||
711 | /* Are we from a system call? */ | 637 | /* Are we from a system call? */ |
712 | if (syscall_get_nr(current, regs) >= 0) { | 638 | if (syscall_get_nr(current, regs) >= 0) { |
713 | /* If so, check system call restarting.. */ | 639 | /* If so, check system call restarting.. */ |
@@ -718,7 +644,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
718 | break; | 644 | break; |
719 | 645 | ||
720 | case -ERESTARTSYS: | 646 | case -ERESTARTSYS: |
721 | if (!(ka->sa.sa_flags & SA_RESTART)) { | 647 | if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { |
722 | regs->ax = -EINTR; | 648 | regs->ax = -EINTR; |
723 | break; | 649 | break; |
724 | } | 650 | } |
@@ -738,26 +664,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
738 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | 664 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) |
739 | regs->flags &= ~X86_EFLAGS_TF; | 665 | regs->flags &= ~X86_EFLAGS_TF; |
740 | 666 | ||
741 | if (setup_rt_frame(sig, ka, info, regs) < 0) { | 667 | failed = (setup_rt_frame(ksig, regs) < 0); |
742 | force_sigsegv(sig, current); | 668 | if (!failed) { |
743 | return; | 669 | /* |
670 | * Clear the direction flag as per the ABI for function entry. | ||
671 | */ | ||
672 | regs->flags &= ~X86_EFLAGS_DF; | ||
673 | /* | ||
674 | * Clear TF when entering the signal handler, but | ||
675 | * notify any tracer that was single-stepping it. | ||
676 | * The tracer may want to single-step inside the | ||
677 | * handler too. | ||
678 | */ | ||
679 | regs->flags &= ~X86_EFLAGS_TF; | ||
744 | } | 680 | } |
745 | 681 | signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); | |
746 | /* | ||
747 | * Clear the direction flag as per the ABI for function entry. | ||
748 | */ | ||
749 | regs->flags &= ~X86_EFLAGS_DF; | ||
750 | |||
751 | /* | ||
752 | * Clear TF when entering the signal handler, but | ||
753 | * notify any tracer that was single-stepping it. | ||
754 | * The tracer may want to single-step inside the | ||
755 | * handler too. | ||
756 | */ | ||
757 | regs->flags &= ~X86_EFLAGS_TF; | ||
758 | |||
759 | signal_delivered(sig, info, ka, regs, | ||
760 | test_thread_flag(TIF_SINGLESTEP)); | ||
761 | } | 682 | } |
762 | 683 | ||
763 | #ifdef CONFIG_X86_32 | 684 | #ifdef CONFIG_X86_32 |
@@ -774,14 +695,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
774 | */ | 695 | */ |
775 | static void do_signal(struct pt_regs *regs) | 696 | static void do_signal(struct pt_regs *regs) |
776 | { | 697 | { |
777 | struct k_sigaction ka; | 698 | struct ksignal ksig; |
778 | siginfo_t info; | ||
779 | int signr; | ||
780 | 699 | ||
781 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 700 | if (get_signal(&ksig)) { |
782 | if (signr > 0) { | ||
783 | /* Whee! Actually deliver the signal. */ | 701 | /* Whee! Actually deliver the signal. */ |
784 | handle_signal(signr, &info, &ka, regs); | 702 | handle_signal(&ksig, regs); |
785 | return; | 703 | return; |
786 | } | 704 | } |
787 | 705 | ||
@@ -860,12 +778,12 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
860 | } | 778 | } |
861 | 779 | ||
862 | #ifdef CONFIG_X86_X32_ABI | 780 | #ifdef CONFIG_X86_X32_ABI |
863 | asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | 781 | asmlinkage long sys32_x32_rt_sigreturn(void) |
864 | { | 782 | { |
783 | struct pt_regs *regs = current_pt_regs(); | ||
865 | struct rt_sigframe_x32 __user *frame; | 784 | struct rt_sigframe_x32 __user *frame; |
866 | sigset_t set; | 785 | sigset_t set; |
867 | unsigned long ax; | 786 | unsigned long ax; |
868 | struct pt_regs tregs; | ||
869 | 787 | ||
870 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); | 788 | frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); |
871 | 789 | ||
@@ -879,8 +797,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | |||
879 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 797 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
880 | goto badframe; | 798 | goto badframe; |
881 | 799 | ||
882 | tregs = *regs; | 800 | if (compat_restore_altstack(&frame->uc.uc_stack)) |
883 | if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) | ||
884 | goto badframe; | 801 | goto badframe; |
885 | 802 | ||
886 | return ax; | 803 | return ax; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed0fe385289d..a6ceaedc396a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void) | |||
1369 | void *mwait_ptr; | 1369 | void *mwait_ptr; |
1370 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); | 1370 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); |
1371 | 1371 | ||
1372 | if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) | 1372 | if (!this_cpu_has(X86_FEATURE_MWAIT)) |
1373 | return; | 1373 | return; |
1374 | if (!this_cpu_has(X86_FEATURE_CLFLSH)) | 1374 | if (!this_cpu_has(X86_FEATURE_CLFLSH)) |
1375 | return; | 1375 | return; |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index cd3b2438a980..9b4d51d0c0d0 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on) | |||
165 | * Ensure irq/preemption can't change debugctl in between. | 165 | * Ensure irq/preemption can't change debugctl in between. |
166 | * Note also that both TIF_BLOCKSTEP and debugctl should | 166 | * Note also that both TIF_BLOCKSTEP and debugctl should |
167 | * be changed atomically wrt preemption. | 167 | * be changed atomically wrt preemption. |
168 | * FIXME: this means that set/clear TIF_BLOCKSTEP is simply | 168 | * |
169 | * wrong if task != current, SIGKILL can wakeup the stopped | 169 | * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if |
170 | * tracee and set/clear can play with the running task, this | 170 | * task is current or it can't be running, otherwise we can race |
171 | * can confuse the next __switch_to_xtra(). | 171 | * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but |
172 | * PTRACE_KILL is not safe. | ||
172 | */ | 173 | */ |
173 | local_irq_disable(); | 174 | local_irq_disable(); |
174 | debugctl = get_debugctlmsr(); | 175 | debugctl = get_debugctlmsr(); |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 97ef74b88e0f..dbded5aedb81 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
157 | if (flags & MAP_FIXED) | 157 | if (flags & MAP_FIXED) |
158 | return addr; | 158 | return addr; |
159 | 159 | ||
160 | /* for MAP_32BIT mappings we force the legact mmap base */ | 160 | /* for MAP_32BIT mappings we force the legacy mmap base */ |
161 | if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) | 161 | if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) |
162 | goto bottomup; | 162 | goto bottomup; |
163 | 163 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index eb8586693e0b..68bda7a84159 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -69,9 +69,6 @@ | |||
69 | 69 | ||
70 | asmlinkage int system_call(void); | 70 | asmlinkage int system_call(void); |
71 | 71 | ||
72 | /* Do we ignore FPU interrupts ? */ | ||
73 | char ignore_fpu_irq; | ||
74 | |||
75 | /* | 72 | /* |
76 | * The IDT has to be page-aligned to simplify the Pentium | 73 | * The IDT has to be page-aligned to simplify the Pentium |
77 | * F0 0F bug workaround. | 74 | * F0 0F bug workaround. |
@@ -564,9 +561,6 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
564 | 561 | ||
565 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | 562 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
566 | { | 563 | { |
567 | #ifdef CONFIG_X86_32 | ||
568 | ignore_fpu_irq = 1; | ||
569 | #endif | ||
570 | exception_enter(regs); | 564 | exception_enter(regs); |
571 | math_error(regs, error_code, X86_TRAP_MF); | 565 | math_error(regs, error_code, X86_TRAP_MF); |
572 | exception_exit(regs); | 566 | exception_exit(regs); |
@@ -694,10 +688,19 @@ void __init early_trap_init(void) | |||
694 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); | 688 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); |
695 | /* int3 can be called from all */ | 689 | /* int3 can be called from all */ |
696 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); | 690 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); |
691 | #ifdef CONFIG_X86_32 | ||
697 | set_intr_gate(X86_TRAP_PF, &page_fault); | 692 | set_intr_gate(X86_TRAP_PF, &page_fault); |
693 | #endif | ||
698 | load_idt(&idt_descr); | 694 | load_idt(&idt_descr); |
699 | } | 695 | } |
700 | 696 | ||
697 | void __init early_trap_pf_init(void) | ||
698 | { | ||
699 | #ifdef CONFIG_X86_64 | ||
700 | set_intr_gate(X86_TRAP_PF, &page_fault); | ||
701 | #endif | ||
702 | } | ||
703 | |||
701 | void __init trap_init(void) | 704 | void __init trap_init(void) |
702 | { | 705 | { |
703 | int i; | 706 | int i; |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 06ccb5073a3f..4b9ea101fe3b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | |||
623 | ns_now = __cycles_2_ns(tsc_now); | 623 | ns_now = __cycles_2_ns(tsc_now); |
624 | 624 | ||
625 | if (cpu_khz) { | 625 | if (cpu_khz) { |
626 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | 626 | *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + |
627 | cpu_khz / 2) / cpu_khz; | ||
627 | *offset = ns_now - mult_frac(tsc_now, *scale, | 628 | *offset = ns_now - mult_frac(tsc_now, *scale, |
628 | (1UL << CYC2NS_SCALE_FACTOR)); | 629 | (1UL << CYC2NS_SCALE_FACTOR)); |
629 | } | 630 | } |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index c71025b67462..0ba4cfb4f412 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
680 | if (auprobe->insn[i] == 0x66) | 680 | if (auprobe->insn[i] == 0x66) |
681 | continue; | 681 | continue; |
682 | 682 | ||
683 | if (auprobe->insn[i] == 0x90) | 683 | if (auprobe->insn[i] == 0x90) { |
684 | regs->ip += i + 1; | ||
684 | return true; | 685 | return true; |
686 | } | ||
685 | 687 | ||
686 | break; | 688 | break; |
687 | } | 689 | } |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 1dfe69cc78a8..1cf5766dde16 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -202,7 +202,7 @@ out: | |||
202 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | 202 | static int do_vm86_irq_handling(int subfunction, int irqnumber); |
203 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | 203 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); |
204 | 204 | ||
205 | int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) | 205 | int sys_vm86old(struct vm86_struct __user *v86) |
206 | { | 206 | { |
207 | struct kernel_vm86_struct info; /* declare this _on top_, | 207 | struct kernel_vm86_struct info; /* declare this _on top_, |
208 | * this avoids wasting of stack space. | 208 | * this avoids wasting of stack space. |
@@ -222,7 +222,7 @@ int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) | |||
222 | if (tmp) | 222 | if (tmp) |
223 | goto out; | 223 | goto out; |
224 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); | 224 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); |
225 | info.regs32 = regs; | 225 | info.regs32 = current_pt_regs(); |
226 | tsk->thread.vm86_info = v86; | 226 | tsk->thread.vm86_info = v86; |
227 | do_sys_vm86(&info, tsk); | 227 | do_sys_vm86(&info, tsk); |
228 | ret = 0; /* we never return here */ | 228 | ret = 0; /* we never return here */ |
@@ -231,7 +231,7 @@ out: | |||
231 | } | 231 | } |
232 | 232 | ||
233 | 233 | ||
234 | int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) | 234 | int sys_vm86(unsigned long cmd, unsigned long arg) |
235 | { | 235 | { |
236 | struct kernel_vm86_struct info; /* declare this _on top_, | 236 | struct kernel_vm86_struct info; /* declare this _on top_, |
237 | * this avoids wasting of stack space. | 237 | * this avoids wasting of stack space. |
@@ -272,7 +272,7 @@ int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) | |||
272 | ret = -EFAULT; | 272 | ret = -EFAULT; |
273 | if (tmp) | 273 | if (tmp) |
274 | goto out; | 274 | goto out; |
275 | info.regs32 = regs; | 275 | info.regs32 = current_pt_regs(); |
276 | info.vm86plus.is_vm86pus = 1; | 276 | info.vm86plus.is_vm86pus = 1; |
277 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; | 277 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; |
278 | do_sys_vm86(&info, tsk); | 278 | do_sys_vm86(&info, tsk); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3a3e8c9e280d..9a907a67be8f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
145 | return nr; | 145 | return nr; |
146 | } | 146 | } |
147 | 147 | ||
148 | #ifdef CONFIG_SECCOMP | ||
149 | static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr) | ||
150 | { | ||
151 | if (!seccomp_mode(&tsk->seccomp)) | ||
152 | return 0; | ||
153 | task_pt_regs(tsk)->orig_ax = syscall_nr; | ||
154 | task_pt_regs(tsk)->ax = syscall_nr; | ||
155 | return __secure_computing(syscall_nr); | ||
156 | } | ||
157 | #else | ||
158 | #define vsyscall_seccomp(_tsk, _nr) 0 | ||
159 | #endif | ||
160 | |||
161 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | 148 | static bool write_ok_or_segv(unsigned long ptr, size_t size) |
162 | { | 149 | { |
163 | /* | 150 | /* |
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
190 | { | 177 | { |
191 | struct task_struct *tsk; | 178 | struct task_struct *tsk; |
192 | unsigned long caller; | 179 | unsigned long caller; |
193 | int vsyscall_nr; | 180 | int vsyscall_nr, syscall_nr, tmp; |
194 | int prev_sig_on_uaccess_error; | 181 | int prev_sig_on_uaccess_error; |
195 | long ret; | 182 | long ret; |
196 | int skip; | ||
197 | 183 | ||
198 | /* | 184 | /* |
199 | * No point in checking CS -- the only way to get here is a user mode | 185 | * No point in checking CS -- the only way to get here is a user mode |
@@ -225,56 +211,84 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
225 | } | 211 | } |
226 | 212 | ||
227 | tsk = current; | 213 | tsk = current; |
228 | /* | ||
229 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
230 | * preserve that behavior to make writing exploits harder. | ||
231 | */ | ||
232 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
233 | current_thread_info()->sig_on_uaccess_error = 1; | ||
234 | 214 | ||
235 | /* | 215 | /* |
216 | * Check for access_ok violations and find the syscall nr. | ||
217 | * | ||
236 | * NULL is a valid user pointer (in the access_ok sense) on 32-bit and | 218 | * NULL is a valid user pointer (in the access_ok sense) on 32-bit and |
237 | * 64-bit, so we don't need to special-case it here. For all the | 219 | * 64-bit, so we don't need to special-case it here. For all the |
238 | * vsyscalls, NULL means "don't write anything" not "write it at | 220 | * vsyscalls, NULL means "don't write anything" not "write it at |
239 | * address 0". | 221 | * address 0". |
240 | */ | 222 | */ |
241 | ret = -EFAULT; | ||
242 | skip = 0; | ||
243 | switch (vsyscall_nr) { | 223 | switch (vsyscall_nr) { |
244 | case 0: | 224 | case 0: |
245 | skip = vsyscall_seccomp(tsk, __NR_gettimeofday); | ||
246 | if (skip) | ||
247 | break; | ||
248 | |||
249 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | 225 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || |
250 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | 226 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) { |
251 | break; | 227 | ret = -EFAULT; |
228 | goto check_fault; | ||
229 | } | ||
230 | |||
231 | syscall_nr = __NR_gettimeofday; | ||
232 | break; | ||
233 | |||
234 | case 1: | ||
235 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) { | ||
236 | ret = -EFAULT; | ||
237 | goto check_fault; | ||
238 | } | ||
239 | |||
240 | syscall_nr = __NR_time; | ||
241 | break; | ||
242 | |||
243 | case 2: | ||
244 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
245 | !write_ok_or_segv(regs->si, sizeof(unsigned))) { | ||
246 | ret = -EFAULT; | ||
247 | goto check_fault; | ||
248 | } | ||
249 | |||
250 | syscall_nr = __NR_getcpu; | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Handle seccomp. regs->ip must be the original value. | ||
256 | * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. | ||
257 | * | ||
258 | * We could optimize the seccomp disabled case, but performance | ||
259 | * here doesn't matter. | ||
260 | */ | ||
261 | regs->orig_ax = syscall_nr; | ||
262 | regs->ax = -ENOSYS; | ||
263 | tmp = secure_computing(syscall_nr); | ||
264 | if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { | ||
265 | warn_bad_vsyscall(KERN_DEBUG, regs, | ||
266 | "seccomp tried to change syscall nr or ip"); | ||
267 | do_exit(SIGSYS); | ||
268 | } | ||
269 | if (tmp) | ||
270 | goto do_ret; /* skip requested */ | ||
252 | 271 | ||
272 | /* | ||
273 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
274 | * preserve that behavior to make writing exploits harder. | ||
275 | */ | ||
276 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
277 | current_thread_info()->sig_on_uaccess_error = 1; | ||
278 | |||
279 | ret = -EFAULT; | ||
280 | switch (vsyscall_nr) { | ||
281 | case 0: | ||
253 | ret = sys_gettimeofday( | 282 | ret = sys_gettimeofday( |
254 | (struct timeval __user *)regs->di, | 283 | (struct timeval __user *)regs->di, |
255 | (struct timezone __user *)regs->si); | 284 | (struct timezone __user *)regs->si); |
256 | break; | 285 | break; |
257 | 286 | ||
258 | case 1: | 287 | case 1: |
259 | skip = vsyscall_seccomp(tsk, __NR_time); | ||
260 | if (skip) | ||
261 | break; | ||
262 | |||
263 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | ||
264 | break; | ||
265 | |||
266 | ret = sys_time((time_t __user *)regs->di); | 288 | ret = sys_time((time_t __user *)regs->di); |
267 | break; | 289 | break; |
268 | 290 | ||
269 | case 2: | 291 | case 2: |
270 | skip = vsyscall_seccomp(tsk, __NR_getcpu); | ||
271 | if (skip) | ||
272 | break; | ||
273 | |||
274 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
275 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | ||
276 | break; | ||
277 | |||
278 | ret = sys_getcpu((unsigned __user *)regs->di, | 292 | ret = sys_getcpu((unsigned __user *)regs->di, |
279 | (unsigned __user *)regs->si, | 293 | (unsigned __user *)regs->si, |
280 | NULL); | 294 | NULL); |
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
283 | 297 | ||
284 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | 298 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; |
285 | 299 | ||
286 | if (skip) { | 300 | check_fault: |
287 | if ((long)regs->ax <= 0L) /* seccomp errno emulation */ | ||
288 | goto do_ret; | ||
289 | goto done; /* seccomp trace/trap */ | ||
290 | } | ||
291 | |||
292 | if (ret == -EFAULT) { | 301 | if (ret == -EFAULT) { |
293 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ | 302 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
294 | warn_bad_vsyscall(KERN_INFO, regs, | 303 | warn_bad_vsyscall(KERN_INFO, regs, |
@@ -311,7 +320,6 @@ do_ret: | |||
311 | /* Emulate a ret instruction. */ | 320 | /* Emulate a ret instruction. */ |
312 | regs->ip = caller; | 321 | regs->ip = caller; |
313 | regs->sp += 8; | 322 | regs->sp += 8; |
314 | done: | ||
315 | return true; | 323 | return true; |
316 | 324 | ||
317 | sigsegv: | 325 | sigsegv: |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 1330dd102950..b014d9414d08 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy); | |||
59 | EXPORT_SYMBOL(__memcpy); | 59 | EXPORT_SYMBOL(__memcpy); |
60 | EXPORT_SYMBOL(memmove); | 60 | EXPORT_SYMBOL(memmove); |
61 | 61 | ||
62 | #ifndef CONFIG_DEBUG_VIRTUAL | ||
63 | EXPORT_SYMBOL(phys_base); | ||
64 | #endif | ||
62 | EXPORT_SYMBOL(empty_zero_page); | 65 | EXPORT_SYMBOL(empty_zero_page); |
63 | #ifndef CONFIG_PARAVIRT | 66 | #ifndef CONFIG_PARAVIRT |
64 | EXPORT_SYMBOL(native_load_gs_index); | 67 | EXPORT_SYMBOL(native_load_gs_index); |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7a3d075a814a..45a14dbbddaf 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/time.h> | 19 | #include <asm/time.h> |
20 | #include <asm/irq.h> | 20 | #include <asm/irq.h> |
21 | #include <asm/io_apic.h> | 21 | #include <asm/io_apic.h> |
22 | #include <asm/hpet.h> | ||
22 | #include <asm/pat.h> | 23 | #include <asm/pat.h> |
23 | #include <asm/tsc.h> | 24 | #include <asm/tsc.h> |
24 | #include <asm/iommu.h> | 25 | #include <asm/iommu.h> |
@@ -62,10 +63,6 @@ struct x86_init_ops x86_init __initdata = { | |||
62 | .banner = default_banner, | 63 | .banner = default_banner, |
63 | }, | 64 | }, |
64 | 65 | ||
65 | .mapping = { | ||
66 | .pagetable_reserve = native_pagetable_reserve, | ||
67 | }, | ||
68 | |||
69 | .paging = { | 66 | .paging = { |
70 | .pagetable_init = native_pagetable_init, | 67 | .pagetable_init = native_pagetable_init, |
71 | }, | 68 | }, |
@@ -111,15 +108,22 @@ struct x86_platform_ops x86_platform = { | |||
111 | 108 | ||
112 | EXPORT_SYMBOL_GPL(x86_platform); | 109 | EXPORT_SYMBOL_GPL(x86_platform); |
113 | struct x86_msi_ops x86_msi = { | 110 | struct x86_msi_ops x86_msi = { |
114 | .setup_msi_irqs = native_setup_msi_irqs, | 111 | .setup_msi_irqs = native_setup_msi_irqs, |
115 | .teardown_msi_irq = native_teardown_msi_irq, | 112 | .compose_msi_msg = native_compose_msi_msg, |
116 | .teardown_msi_irqs = default_teardown_msi_irqs, | 113 | .teardown_msi_irq = native_teardown_msi_irq, |
117 | .restore_msi_irqs = default_restore_msi_irqs, | 114 | .teardown_msi_irqs = default_teardown_msi_irqs, |
115 | .restore_msi_irqs = default_restore_msi_irqs, | ||
116 | .setup_hpet_msi = default_setup_hpet_msi, | ||
118 | }; | 117 | }; |
119 | 118 | ||
120 | struct x86_io_apic_ops x86_io_apic_ops = { | 119 | struct x86_io_apic_ops x86_io_apic_ops = { |
121 | .init = native_io_apic_init_mappings, | 120 | .init = native_io_apic_init_mappings, |
122 | .read = native_io_apic_read, | 121 | .read = native_io_apic_read, |
123 | .write = native_io_apic_write, | 122 | .write = native_io_apic_write, |
124 | .modify = native_io_apic_modify, | 123 | .modify = native_io_apic_modify, |
124 | .disable = native_disable_io_apic, | ||
125 | .print_entries = native_io_apic_print_entries, | ||
126 | .set_affinity = native_ioapic_set_affinity, | ||
127 | .setup_entry = native_setup_ioapic_entry, | ||
128 | .eoi_ioapic_pin = native_eoi_ioapic_pin, | ||
125 | }; | 129 | }; |