diff options
Diffstat (limited to 'arch/x86/kernel')
93 files changed, 2966 insertions, 3495 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0925676266bd..2c833d8c4141 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER | |||
| 11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
| 12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
| 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
| 14 | CFLAGS_REMOVE_pvclock.o = -pg | ||
| 15 | CFLAGS_REMOVE_kvmclock.o = -pg | ||
| 14 | CFLAGS_REMOVE_ftrace.o = -pg | 16 | CFLAGS_REMOVE_ftrace.o = -pg |
| 15 | CFLAGS_REMOVE_early_printk.o = -pg | 17 | CFLAGS_REMOVE_early_printk.o = -pg |
| 16 | endif | 18 | endif |
| @@ -32,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n | |||
| 32 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 34 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
| 33 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 35 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
| 34 | obj-y += time.o ioport.o ldt.o dumpstack.o | 36 | obj-y += time.o ioport.o ldt.o dumpstack.o |
| 35 | obj-y += setup.o x86_init.o i8259.o irqinit.o | 37 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o |
| 38 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | ||
| 36 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 39 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
| 37 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 40 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
| 38 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 41 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
| @@ -42,6 +45,7 @@ obj-y += bootflag.o e820.o | |||
| 42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 45 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
| 43 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 46 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
| 44 | obj-y += tsc.o io_delay.o rtc.o | 47 | obj-y += tsc.o io_delay.o rtc.o |
| 48 | obj-y += pci-iommu_table.o | ||
| 45 | 49 | ||
| 46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 50 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
| 47 | obj-y += process.o | 51 | obj-y += process.o |
| @@ -83,15 +87,15 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | |||
| 83 | obj-$(CONFIG_KGDB) += kgdb.o | 87 | obj-$(CONFIG_KGDB) += kgdb.o |
| 84 | obj-$(CONFIG_VM86) += vm86_32.o | 88 | obj-$(CONFIG_VM86) += vm86_32.o |
| 85 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 89 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
| 90 | obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o | ||
| 86 | 91 | ||
| 87 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 92 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
| 88 | obj-$(CONFIG_APB_TIMER) += apb_timer.o | 93 | obj-$(CONFIG_APB_TIMER) += apb_timer.o |
| 89 | 94 | ||
| 90 | obj-$(CONFIG_K8_NB) += k8.o | 95 | obj-$(CONFIG_AMD_NB) += amd_nb.o |
| 91 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 96 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
| 92 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 97 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
| 93 | 98 | ||
| 94 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | ||
| 95 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 99 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
| 96 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 100 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
| 97 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 101 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
| @@ -104,6 +108,7 @@ obj-$(CONFIG_SCx200) += scx200.o | |||
| 104 | scx200-y += scx200_32.o | 108 | scx200-y += scx200_32.o |
| 105 | 109 | ||
| 106 | obj-$(CONFIG_OLPC) += olpc.o | 110 | obj-$(CONFIG_OLPC) += olpc.o |
| 111 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o | ||
| 107 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o | 112 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o |
| 108 | obj-$(CONFIG_X86_MRST) += mrst.o | 113 | obj-$(CONFIG_X86_MRST) += mrst.o |
| 109 | 114 | ||
| @@ -120,7 +125,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | |||
| 120 | # 64 bit specific files | 125 | # 64 bit specific files |
| 121 | ifeq ($(CONFIG_X86_64),y) | 126 | ifeq ($(CONFIG_X86_64),y) |
| 122 | obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o | 127 | obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o |
| 123 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | ||
| 124 | obj-$(CONFIG_AUDIT) += audit_64.o | 128 | obj-$(CONFIG_AUDIT) += audit_64.o |
| 125 | 129 | ||
| 126 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 130 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index fb7a5f052e2b..5812404a0d4c 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include <acpi/processor.h> | 14 | #include <acpi/processor.h> |
| 15 | #include <asm/acpi.h> | 15 | #include <asm/acpi.h> |
| 16 | #include <asm/mwait.h> | ||
| 16 | 17 | ||
| 17 | /* | 18 | /* |
| 18 | * Initialize bm_flags based on the CPU cache properties | 19 | * Initialize bm_flags based on the CPU cache properties |
| @@ -61,20 +62,10 @@ struct cstate_entry { | |||
| 61 | unsigned int ecx; | 62 | unsigned int ecx; |
| 62 | } states[ACPI_PROCESSOR_MAX_POWER]; | 63 | } states[ACPI_PROCESSOR_MAX_POWER]; |
| 63 | }; | 64 | }; |
| 64 | static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ | 65 | static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */ |
| 65 | 66 | ||
| 66 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; | 67 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; |
| 67 | 68 | ||
| 68 | #define MWAIT_SUBSTATE_MASK (0xf) | ||
| 69 | #define MWAIT_CSTATE_MASK (0xf) | ||
| 70 | #define MWAIT_SUBSTATE_SIZE (4) | ||
| 71 | |||
| 72 | #define CPUID_MWAIT_LEAF (5) | ||
| 73 | #define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) | ||
| 74 | #define CPUID5_ECX_INTERRUPT_BREAK (0x2) | ||
| 75 | |||
| 76 | #define MWAIT_ECX_INTERRUPT_BREAK (0x1) | ||
| 77 | |||
| 78 | #define NATIVE_CSTATE_BEYOND_HALT (2) | 69 | #define NATIVE_CSTATE_BEYOND_HALT (2) |
| 79 | 70 | ||
| 80 | static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) | 71 | static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 33cec152070d..74a847835bab 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
| @@ -7,11 +7,17 @@ | |||
| 7 | 7 | ||
| 8 | #include <linux/acpi.h> | 8 | #include <linux/acpi.h> |
| 9 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
| 10 | #include <linux/memblock.h> | ||
| 10 | #include <linux/dmi.h> | 11 | #include <linux/dmi.h> |
| 11 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
| 12 | #include <asm/segment.h> | 13 | #include <asm/segment.h> |
| 13 | #include <asm/desc.h> | 14 | #include <asm/desc.h> |
| 14 | 15 | ||
| 16 | #ifdef CONFIG_X86_32 | ||
| 17 | #include <asm/pgtable.h> | ||
| 18 | #include <asm/pgtable_32.h> | ||
| 19 | #endif | ||
| 20 | |||
| 15 | #include "realmode/wakeup.h" | 21 | #include "realmode/wakeup.h" |
| 16 | #include "sleep.h" | 22 | #include "sleep.h" |
| 17 | 23 | ||
| @@ -90,7 +96,7 @@ int acpi_save_state_mem(void) | |||
| 90 | 96 | ||
| 91 | #ifndef CONFIG_64BIT | 97 | #ifndef CONFIG_64BIT |
| 92 | header->pmode_entry = (u32)&wakeup_pmode_return; | 98 | header->pmode_entry = (u32)&wakeup_pmode_return; |
| 93 | header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET); | 99 | header->pmode_cr3 = (u32)__pa(&initial_page_table); |
| 94 | saved_magic = 0x12345678; | 100 | saved_magic = 0x12345678; |
| 95 | #else /* CONFIG_64BIT */ | 101 | #else /* CONFIG_64BIT */ |
| 96 | header->trampoline_segment = setup_trampoline() >> 4; | 102 | header->trampoline_segment = setup_trampoline() >> 4; |
| @@ -125,7 +131,7 @@ void acpi_restore_state_mem(void) | |||
| 125 | */ | 131 | */ |
| 126 | void __init acpi_reserve_wakeup_memory(void) | 132 | void __init acpi_reserve_wakeup_memory(void) |
| 127 | { | 133 | { |
| 128 | unsigned long mem; | 134 | phys_addr_t mem; |
| 129 | 135 | ||
| 130 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | 136 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { |
| 131 | printk(KERN_ERR | 137 | printk(KERN_ERR |
| @@ -133,15 +139,15 @@ void __init acpi_reserve_wakeup_memory(void) | |||
| 133 | return; | 139 | return; |
| 134 | } | 140 | } |
| 135 | 141 | ||
| 136 | mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); | 142 | mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); |
| 137 | 143 | ||
| 138 | if (mem == -1L) { | 144 | if (mem == MEMBLOCK_ERROR) { |
| 139 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | 145 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); |
| 140 | return; | 146 | return; |
| 141 | } | 147 | } |
| 142 | acpi_realmode = (unsigned long) phys_to_virt(mem); | 148 | acpi_realmode = (unsigned long) phys_to_virt(mem); |
| 143 | acpi_wakeup_address = mem; | 149 | acpi_wakeup_address = mem; |
| 144 | reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | 150 | memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); |
| 145 | } | 151 | } |
| 146 | 152 | ||
| 147 | 153 | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f65ab8b014c4..a36bb90aef53 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
| 195 | 195 | ||
| 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
| 197 | extern s32 __smp_locks[], __smp_locks_end[]; | 197 | extern s32 __smp_locks[], __smp_locks_end[]; |
| 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | 198 | void *text_poke_early(void *addr, const void *opcode, size_t len); |
| 199 | 199 | ||
| 200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
| 201 | This runs before SMP is initialized to avoid SMP problems with | 201 | This runs before SMP is initialized to avoid SMP problems with |
| @@ -522,7 +522,7 @@ void __init alternative_instructions(void) | |||
| 522 | * instructions. And on the local CPU you need to be protected again NMI or MCE | 522 | * instructions. And on the local CPU you need to be protected again NMI or MCE |
| 523 | * handlers seeing an inconsistent instruction while you patch. | 523 | * handlers seeing an inconsistent instruction while you patch. |
| 524 | */ | 524 | */ |
| 525 | static void *__init_or_module text_poke_early(void *addr, const void *opcode, | 525 | void *__init_or_module text_poke_early(void *addr, const void *opcode, |
| 526 | size_t len) | 526 | size_t len) |
| 527 | { | 527 | { |
| 528 | unsigned long flags; | 528 | unsigned long flags; |
| @@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | |||
| 637 | tpp.len = len; | 637 | tpp.len = len; |
| 638 | atomic_set(&stop_machine_first, 1); | 638 | atomic_set(&stop_machine_first, 1); |
| 639 | wrote_text = 0; | 639 | wrote_text = 0; |
| 640 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | 640 | /* Use __stop_machine() because the caller already got online_cpus. */ |
| 641 | __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
| 641 | return addr; | 642 | return addr; |
| 642 | } | 643 | } |
| 643 | 644 | ||
| 645 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | ||
| 646 | |||
| 647 | unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; | ||
| 648 | |||
| 649 | void __init arch_init_ideal_nop5(void) | ||
| 650 | { | ||
| 651 | extern const unsigned char ftrace_test_p6nop[]; | ||
| 652 | extern const unsigned char ftrace_test_nop5[]; | ||
| 653 | extern const unsigned char ftrace_test_jmp[]; | ||
| 654 | int faulted = 0; | ||
| 655 | |||
| 656 | /* | ||
| 657 | * There is no good nop for all x86 archs. | ||
| 658 | * We will default to using the P6_NOP5, but first we | ||
| 659 | * will test to make sure that the nop will actually | ||
| 660 | * work on this CPU. If it faults, we will then | ||
| 661 | * go to a lesser efficient 5 byte nop. If that fails | ||
| 662 | * we then just use a jmp as our nop. This isn't the most | ||
| 663 | * efficient nop, but we can not use a multi part nop | ||
| 664 | * since we would then risk being preempted in the middle | ||
| 665 | * of that nop, and if we enabled tracing then, it might | ||
| 666 | * cause a system crash. | ||
| 667 | * | ||
| 668 | * TODO: check the cpuid to determine the best nop. | ||
| 669 | */ | ||
| 670 | asm volatile ( | ||
| 671 | "ftrace_test_jmp:" | ||
| 672 | "jmp ftrace_test_p6nop\n" | ||
| 673 | "nop\n" | ||
| 674 | "nop\n" | ||
| 675 | "nop\n" /* 2 byte jmp + 3 bytes */ | ||
| 676 | "ftrace_test_p6nop:" | ||
| 677 | P6_NOP5 | ||
| 678 | "jmp 1f\n" | ||
| 679 | "ftrace_test_nop5:" | ||
| 680 | ".byte 0x66,0x66,0x66,0x66,0x90\n" | ||
| 681 | "1:" | ||
| 682 | ".section .fixup, \"ax\"\n" | ||
| 683 | "2: movl $1, %0\n" | ||
| 684 | " jmp ftrace_test_nop5\n" | ||
| 685 | "3: movl $2, %0\n" | ||
| 686 | " jmp 1b\n" | ||
| 687 | ".previous\n" | ||
| 688 | _ASM_EXTABLE(ftrace_test_p6nop, 2b) | ||
| 689 | _ASM_EXTABLE(ftrace_test_nop5, 3b) | ||
| 690 | : "=r"(faulted) : "0" (faulted)); | ||
| 691 | |||
| 692 | switch (faulted) { | ||
| 693 | case 0: | ||
| 694 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); | ||
| 695 | memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); | ||
| 696 | break; | ||
| 697 | case 1: | ||
| 698 | pr_info("converting mcount calls to 66 66 66 66 90\n"); | ||
| 699 | memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); | ||
| 700 | break; | ||
| 701 | case 2: | ||
| 702 | pr_info("converting mcount calls to jmp . + 5\n"); | ||
| 703 | memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); | ||
| 704 | break; | ||
| 705 | } | ||
| 706 | |||
| 707 | } | ||
| 708 | #endif | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa044e1e30a2..d2fdb0826df2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
| 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
| 4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
| 5 | * | 5 | * |
| @@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
| 1953 | size_t size, | 1953 | size_t size, |
| 1954 | int dir) | 1954 | int dir) |
| 1955 | { | 1955 | { |
| 1956 | dma_addr_t flush_addr; | ||
| 1956 | dma_addr_t i, start; | 1957 | dma_addr_t i, start; |
| 1957 | unsigned int pages; | 1958 | unsigned int pages; |
| 1958 | 1959 | ||
| @@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
| 1960 | (dma_addr + size > dma_dom->aperture_size)) | 1961 | (dma_addr + size > dma_dom->aperture_size)) |
| 1961 | return; | 1962 | return; |
| 1962 | 1963 | ||
| 1964 | flush_addr = dma_addr; | ||
| 1963 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); | 1965 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); |
| 1964 | dma_addr &= PAGE_MASK; | 1966 | dma_addr &= PAGE_MASK; |
| 1965 | start = dma_addr; | 1967 | start = dma_addr; |
| @@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, | |||
| 1974 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 1976 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
| 1975 | 1977 | ||
| 1976 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 1978 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
| 1977 | iommu_flush_pages(&dma_dom->domain, dma_addr, size); | 1979 | iommu_flush_pages(&dma_dom->domain, flush_addr, size); |
| 1978 | dma_dom->need_flush = false; | 1980 | dma_dom->need_flush = false; |
| 1979 | } | 1981 | } |
| 1980 | } | 1982 | } |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2b8dd4..6e11c8134158 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
| 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
| 4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
| 5 | * | 5 | * |
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <asm/iommu.h> | 31 | #include <asm/iommu.h> |
| 32 | #include <asm/gart.h> | 32 | #include <asm/gart.h> |
| 33 | #include <asm/x86_init.h> | 33 | #include <asm/x86_init.h> |
| 34 | 34 | #include <asm/iommu_table.h> | |
| 35 | /* | 35 | /* |
| 36 | * definitions for the ACPI scanning code | 36 | * definitions for the ACPI scanning code |
| 37 | */ | 37 | */ |
| @@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size) | |||
| 194 | return 1UL << shift; | 194 | return 1UL << shift; |
| 195 | } | 195 | } |
| 196 | 196 | ||
| 197 | /* Access to l1 and l2 indexed register spaces */ | ||
| 198 | |||
| 199 | static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) | ||
| 200 | { | ||
| 201 | u32 val; | ||
| 202 | |||
| 203 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
| 204 | pci_read_config_dword(iommu->dev, 0xfc, &val); | ||
| 205 | return val; | ||
| 206 | } | ||
| 207 | |||
| 208 | static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) | ||
| 209 | { | ||
| 210 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); | ||
| 211 | pci_write_config_dword(iommu->dev, 0xfc, val); | ||
| 212 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
| 213 | } | ||
| 214 | |||
| 215 | static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) | ||
| 216 | { | ||
| 217 | u32 val; | ||
| 218 | |||
| 219 | pci_write_config_dword(iommu->dev, 0xf0, address); | ||
| 220 | pci_read_config_dword(iommu->dev, 0xf4, &val); | ||
| 221 | return val; | ||
| 222 | } | ||
| 223 | |||
| 224 | static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) | ||
| 225 | { | ||
| 226 | pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); | ||
| 227 | pci_write_config_dword(iommu->dev, 0xf4, val); | ||
| 228 | } | ||
| 229 | |||
| 197 | /**************************************************************************** | 230 | /**************************************************************************** |
| 198 | * | 231 | * |
| 199 | * AMD IOMMU MMIO register space handling functions | 232 | * AMD IOMMU MMIO register space handling functions |
| @@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
| 619 | { | 652 | { |
| 620 | int cap_ptr = iommu->cap_ptr; | 653 | int cap_ptr = iommu->cap_ptr; |
| 621 | u32 range, misc; | 654 | u32 range, misc; |
| 655 | int i, j; | ||
| 622 | 656 | ||
| 623 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, | 657 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
| 624 | &iommu->cap); | 658 | &iommu->cap); |
| @@ -632,6 +666,30 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
| 632 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | 666 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), |
| 633 | MMIO_GET_LD(range)); | 667 | MMIO_GET_LD(range)); |
| 634 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | 668 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); |
| 669 | |||
| 670 | if (!is_rd890_iommu(iommu->dev)) | ||
| 671 | return; | ||
| 672 | |||
| 673 | /* | ||
| 674 | * Some rd890 systems may not be fully reconfigured by the BIOS, so | ||
| 675 | * it's necessary for us to store this information so it can be | ||
| 676 | * reprogrammed on resume | ||
| 677 | */ | ||
| 678 | |||
| 679 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
| 680 | &iommu->stored_addr_lo); | ||
| 681 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
| 682 | &iommu->stored_addr_hi); | ||
| 683 | |||
| 684 | /* Low bit locks writes to configuration space */ | ||
| 685 | iommu->stored_addr_lo &= ~1; | ||
| 686 | |||
| 687 | for (i = 0; i < 6; i++) | ||
| 688 | for (j = 0; j < 0x12; j++) | ||
| 689 | iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); | ||
| 690 | |||
| 691 | for (i = 0; i < 0x83; i++) | ||
| 692 | iommu->stored_l2[i] = iommu_read_l2(iommu, i); | ||
| 635 | } | 693 | } |
| 636 | 694 | ||
| 637 | /* | 695 | /* |
| @@ -649,29 +707,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 649 | struct ivhd_entry *e; | 707 | struct ivhd_entry *e; |
| 650 | 708 | ||
| 651 | /* | 709 | /* |
| 652 | * First set the recommended feature enable bits from ACPI | 710 | * First save the recommended feature enable bits from ACPI |
| 653 | * into the IOMMU control registers | ||
| 654 | */ | ||
| 655 | h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? | ||
| 656 | iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : | ||
| 657 | iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); | ||
| 658 | |||
| 659 | h->flags & IVHD_FLAG_PASSPW_EN_MASK ? | ||
| 660 | iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : | ||
| 661 | iommu_feature_disable(iommu, CONTROL_PASSPW_EN); | ||
| 662 | |||
| 663 | h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? | ||
| 664 | iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : | ||
| 665 | iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); | ||
| 666 | |||
| 667 | h->flags & IVHD_FLAG_ISOC_EN_MASK ? | ||
| 668 | iommu_feature_enable(iommu, CONTROL_ISOC_EN) : | ||
| 669 | iommu_feature_disable(iommu, CONTROL_ISOC_EN); | ||
| 670 | |||
| 671 | /* | ||
| 672 | * make IOMMU memory accesses cache coherent | ||
| 673 | */ | 711 | */ |
| 674 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); | 712 | iommu->acpi_flags = h->flags; |
| 675 | 713 | ||
| 676 | /* | 714 | /* |
| 677 | * Done. Now parse the device entries | 715 | * Done. Now parse the device entries |
| @@ -1116,6 +1154,79 @@ static void init_device_table(void) | |||
| 1116 | } | 1154 | } |
| 1117 | } | 1155 | } |
| 1118 | 1156 | ||
| 1157 | static void iommu_init_flags(struct amd_iommu *iommu) | ||
| 1158 | { | ||
| 1159 | iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? | ||
| 1160 | iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : | ||
| 1161 | iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); | ||
| 1162 | |||
| 1163 | iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? | ||
| 1164 | iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : | ||
| 1165 | iommu_feature_disable(iommu, CONTROL_PASSPW_EN); | ||
| 1166 | |||
| 1167 | iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? | ||
| 1168 | iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : | ||
| 1169 | iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); | ||
| 1170 | |||
| 1171 | iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? | ||
| 1172 | iommu_feature_enable(iommu, CONTROL_ISOC_EN) : | ||
| 1173 | iommu_feature_disable(iommu, CONTROL_ISOC_EN); | ||
| 1174 | |||
| 1175 | /* | ||
| 1176 | * make IOMMU memory accesses cache coherent | ||
| 1177 | */ | ||
| 1178 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | static void iommu_apply_resume_quirks(struct amd_iommu *iommu) | ||
| 1182 | { | ||
| 1183 | int i, j; | ||
| 1184 | u32 ioc_feature_control; | ||
| 1185 | struct pci_dev *pdev = NULL; | ||
| 1186 | |||
| 1187 | /* RD890 BIOSes may not have completely reconfigured the iommu */ | ||
| 1188 | if (!is_rd890_iommu(iommu->dev)) | ||
| 1189 | return; | ||
| 1190 | |||
| 1191 | /* | ||
| 1192 | * First, we need to ensure that the iommu is enabled. This is | ||
| 1193 | * controlled by a register in the northbridge | ||
| 1194 | */ | ||
| 1195 | pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); | ||
| 1196 | |||
| 1197 | if (!pdev) | ||
| 1198 | return; | ||
| 1199 | |||
| 1200 | /* Select Northbridge indirect register 0x75 and enable writing */ | ||
| 1201 | pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); | ||
| 1202 | pci_read_config_dword(pdev, 0x64, &ioc_feature_control); | ||
| 1203 | |||
| 1204 | /* Enable the iommu */ | ||
| 1205 | if (!(ioc_feature_control & 0x1)) | ||
| 1206 | pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); | ||
| 1207 | |||
| 1208 | pci_dev_put(pdev); | ||
| 1209 | |||
| 1210 | /* Restore the iommu BAR */ | ||
| 1211 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
| 1212 | iommu->stored_addr_lo); | ||
| 1213 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
| 1214 | iommu->stored_addr_hi); | ||
| 1215 | |||
| 1216 | /* Restore the l1 indirect regs for each of the 6 l1s */ | ||
| 1217 | for (i = 0; i < 6; i++) | ||
| 1218 | for (j = 0; j < 0x12; j++) | ||
| 1219 | iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); | ||
| 1220 | |||
| 1221 | /* Restore the l2 indirect regs */ | ||
| 1222 | for (i = 0; i < 0x83; i++) | ||
| 1223 | iommu_write_l2(iommu, i, iommu->stored_l2[i]); | ||
| 1224 | |||
| 1225 | /* Lock PCI setup registers */ | ||
| 1226 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
| 1227 | iommu->stored_addr_lo | 1); | ||
| 1228 | } | ||
| 1229 | |||
| 1119 | /* | 1230 | /* |
| 1120 | * This function finally enables all IOMMUs found in the system after | 1231 | * This function finally enables all IOMMUs found in the system after |
| 1121 | * they have been initialized | 1232 | * they have been initialized |
| @@ -1126,6 +1237,7 @@ static void enable_iommus(void) | |||
| 1126 | 1237 | ||
| 1127 | for_each_iommu(iommu) { | 1238 | for_each_iommu(iommu) { |
| 1128 | iommu_disable(iommu); | 1239 | iommu_disable(iommu); |
| 1240 | iommu_init_flags(iommu); | ||
| 1129 | iommu_set_device_table(iommu); | 1241 | iommu_set_device_table(iommu); |
| 1130 | iommu_enable_command_buffer(iommu); | 1242 | iommu_enable_command_buffer(iommu); |
| 1131 | iommu_enable_event_buffer(iommu); | 1243 | iommu_enable_event_buffer(iommu); |
| @@ -1150,6 +1262,11 @@ static void disable_iommus(void) | |||
| 1150 | 1262 | ||
| 1151 | static int amd_iommu_resume(struct sys_device *dev) | 1263 | static int amd_iommu_resume(struct sys_device *dev) |
| 1152 | { | 1264 | { |
| 1265 | struct amd_iommu *iommu; | ||
| 1266 | |||
| 1267 | for_each_iommu(iommu) | ||
| 1268 | iommu_apply_resume_quirks(iommu); | ||
| 1269 | |||
| 1153 | /* re-load the hardware */ | 1270 | /* re-load the hardware */ |
| 1154 | enable_iommus(); | 1271 | enable_iommus(); |
| 1155 | 1272 | ||
| @@ -1382,13 +1499,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
| 1382 | return 0; | 1499 | return 0; |
| 1383 | } | 1500 | } |
| 1384 | 1501 | ||
| 1385 | void __init amd_iommu_detect(void) | 1502 | int __init amd_iommu_detect(void) |
| 1386 | { | 1503 | { |
| 1387 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1504 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
| 1388 | return; | 1505 | return -ENODEV; |
| 1389 | 1506 | ||
| 1390 | if (amd_iommu_disabled) | 1507 | if (amd_iommu_disabled) |
| 1391 | return; | 1508 | return -ENODEV; |
| 1392 | 1509 | ||
| 1393 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1510 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
| 1394 | iommu_detected = 1; | 1511 | iommu_detected = 1; |
| @@ -1397,7 +1514,9 @@ void __init amd_iommu_detect(void) | |||
| 1397 | 1514 | ||
| 1398 | /* Make sure ACS will be enabled */ | 1515 | /* Make sure ACS will be enabled */ |
| 1399 | pci_request_acs(); | 1516 | pci_request_acs(); |
| 1517 | return 1; | ||
| 1400 | } | 1518 | } |
| 1519 | return -ENODEV; | ||
| 1401 | } | 1520 | } |
| 1402 | 1521 | ||
| 1403 | /**************************************************************************** | 1522 | /**************************************************************************** |
| @@ -1428,3 +1547,8 @@ static int __init parse_amd_iommu_options(char *str) | |||
| 1428 | 1547 | ||
| 1429 | __setup("amd_iommu_dump", parse_amd_iommu_dump); | 1548 | __setup("amd_iommu_dump", parse_amd_iommu_dump); |
| 1430 | __setup("amd_iommu=", parse_amd_iommu_options); | 1549 | __setup("amd_iommu=", parse_amd_iommu_options); |
| 1550 | |||
| 1551 | IOMMU_INIT_FINISH(amd_iommu_detect, | ||
| 1552 | gart_iommu_hole_init, | ||
| 1553 | 0, | ||
| 1554 | 0); | ||
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/amd_nb.c index 0f7bc20cfcde..8f6463d8ed0d 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/amd_nb.c | |||
| @@ -8,21 +8,19 @@ | |||
| 8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
| 9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
| 10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
| 11 | #include <asm/k8.h> | 11 | #include <asm/amd_nb.h> |
| 12 | |||
| 13 | int num_k8_northbridges; | ||
| 14 | EXPORT_SYMBOL(num_k8_northbridges); | ||
| 15 | 12 | ||
| 16 | static u32 *flush_words; | 13 | static u32 *flush_words; |
| 17 | 14 | ||
| 18 | struct pci_device_id k8_nb_ids[] = { | 15 | struct pci_device_id k8_nb_ids[] = { |
| 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
| 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
| 18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, | ||
| 21 | {} | 19 | {} |
| 22 | }; | 20 | }; |
| 23 | EXPORT_SYMBOL(k8_nb_ids); | 21 | EXPORT_SYMBOL(k8_nb_ids); |
| 24 | 22 | ||
| 25 | struct pci_dev **k8_northbridges; | 23 | struct k8_northbridge_info k8_northbridges; |
| 26 | EXPORT_SYMBOL(k8_northbridges); | 24 | EXPORT_SYMBOL(k8_northbridges); |
| 27 | 25 | ||
| 28 | static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) | 26 | static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) |
| @@ -40,36 +38,45 @@ int cache_k8_northbridges(void) | |||
| 40 | int i; | 38 | int i; |
| 41 | struct pci_dev *dev; | 39 | struct pci_dev *dev; |
| 42 | 40 | ||
| 43 | if (num_k8_northbridges) | 41 | if (k8_northbridges.num) |
| 44 | return 0; | 42 | return 0; |
| 45 | 43 | ||
| 46 | dev = NULL; | 44 | dev = NULL; |
| 47 | while ((dev = next_k8_northbridge(dev)) != NULL) | 45 | while ((dev = next_k8_northbridge(dev)) != NULL) |
| 48 | num_k8_northbridges++; | 46 | k8_northbridges.num++; |
| 47 | |||
| 48 | /* some CPU families (e.g. family 0x11) do not support GART */ | ||
| 49 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || | ||
| 50 | boot_cpu_data.x86 == 0x15) | ||
| 51 | k8_northbridges.gart_supported = 1; | ||
| 49 | 52 | ||
| 50 | k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), | 53 | k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * |
| 51 | GFP_KERNEL); | 54 | sizeof(void *), GFP_KERNEL); |
| 52 | if (!k8_northbridges) | 55 | if (!k8_northbridges.nb_misc) |
| 53 | return -ENOMEM; | 56 | return -ENOMEM; |
| 54 | 57 | ||
| 55 | if (!num_k8_northbridges) { | 58 | if (!k8_northbridges.num) { |
| 56 | k8_northbridges[0] = NULL; | 59 | k8_northbridges.nb_misc[0] = NULL; |
| 57 | return 0; | 60 | return 0; |
| 58 | } | 61 | } |
| 59 | 62 | ||
| 60 | flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); | 63 | if (k8_northbridges.gart_supported) { |
| 61 | if (!flush_words) { | 64 | flush_words = kmalloc(k8_northbridges.num * sizeof(u32), |
| 62 | kfree(k8_northbridges); | 65 | GFP_KERNEL); |
| 63 | return -ENOMEM; | 66 | if (!flush_words) { |
| 67 | kfree(k8_northbridges.nb_misc); | ||
| 68 | return -ENOMEM; | ||
| 69 | } | ||
| 64 | } | 70 | } |
| 65 | 71 | ||
| 66 | dev = NULL; | 72 | dev = NULL; |
| 67 | i = 0; | 73 | i = 0; |
| 68 | while ((dev = next_k8_northbridge(dev)) != NULL) { | 74 | while ((dev = next_k8_northbridge(dev)) != NULL) { |
| 69 | k8_northbridges[i] = dev; | 75 | k8_northbridges.nb_misc[i] = dev; |
| 70 | pci_read_config_dword(dev, 0x9c, &flush_words[i++]); | 76 | if (k8_northbridges.gart_supported) |
| 77 | pci_read_config_dword(dev, 0x9c, &flush_words[i++]); | ||
| 71 | } | 78 | } |
| 72 | k8_northbridges[i] = NULL; | 79 | k8_northbridges.nb_misc[i] = NULL; |
| 73 | return 0; | 80 | return 0; |
| 74 | } | 81 | } |
| 75 | EXPORT_SYMBOL_GPL(cache_k8_northbridges); | 82 | EXPORT_SYMBOL_GPL(cache_k8_northbridges); |
| @@ -93,22 +100,25 @@ void k8_flush_garts(void) | |||
| 93 | unsigned long flags; | 100 | unsigned long flags; |
| 94 | static DEFINE_SPINLOCK(gart_lock); | 101 | static DEFINE_SPINLOCK(gart_lock); |
| 95 | 102 | ||
| 103 | if (!k8_northbridges.gart_supported) | ||
| 104 | return; | ||
| 105 | |||
| 96 | /* Avoid races between AGP and IOMMU. In theory it's not needed | 106 | /* Avoid races between AGP and IOMMU. In theory it's not needed |
| 97 | but I'm not sure if the hardware won't lose flush requests | 107 | but I'm not sure if the hardware won't lose flush requests |
| 98 | when another is pending. This whole thing is so expensive anyways | 108 | when another is pending. This whole thing is so expensive anyways |
| 99 | that it doesn't matter to serialize more. -AK */ | 109 | that it doesn't matter to serialize more. -AK */ |
| 100 | spin_lock_irqsave(&gart_lock, flags); | 110 | spin_lock_irqsave(&gart_lock, flags); |
| 101 | flushed = 0; | 111 | flushed = 0; |
| 102 | for (i = 0; i < num_k8_northbridges; i++) { | 112 | for (i = 0; i < k8_northbridges.num; i++) { |
| 103 | pci_write_config_dword(k8_northbridges[i], 0x9c, | 113 | pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, |
| 104 | flush_words[i]|1); | 114 | flush_words[i]|1); |
| 105 | flushed++; | 115 | flushed++; |
| 106 | } | 116 | } |
| 107 | for (i = 0; i < num_k8_northbridges; i++) { | 117 | for (i = 0; i < k8_northbridges.num; i++) { |
| 108 | u32 w; | 118 | u32 w; |
| 109 | /* Make sure the hardware actually executed the flush*/ | 119 | /* Make sure the hardware actually executed the flush*/ |
| 110 | for (;;) { | 120 | for (;;) { |
| 111 | pci_read_config_dword(k8_northbridges[i], | 121 | pci_read_config_dword(k8_northbridges.nb_misc[i], |
| 112 | 0x9c, &w); | 122 | 0x9c, &w); |
| 113 | if (!(w & 1)) | 123 | if (!(w & 1)) |
| 114 | break; | 124 | break; |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 8dd77800ff5d..92543c73cf8e 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
| @@ -231,34 +231,6 @@ static void apbt_restart_clocksource(struct clocksource *cs) | |||
| 231 | apbt_start_counter(phy_cs_timer_id); | 231 | apbt_start_counter(phy_cs_timer_id); |
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | /* Setup IRQ routing via IOAPIC */ | ||
| 235 | #ifdef CONFIG_SMP | ||
| 236 | static void apbt_setup_irq(struct apbt_dev *adev) | ||
| 237 | { | ||
| 238 | struct irq_chip *chip; | ||
| 239 | struct irq_desc *desc; | ||
| 240 | |||
| 241 | /* timer0 irq has been setup early */ | ||
| 242 | if (adev->irq == 0) | ||
| 243 | return; | ||
| 244 | desc = irq_to_desc(adev->irq); | ||
| 245 | chip = get_irq_chip(adev->irq); | ||
| 246 | disable_irq(adev->irq); | ||
| 247 | desc->status |= IRQ_MOVE_PCNTXT; | ||
| 248 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | ||
| 249 | /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */ | ||
| 250 | set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge"); | ||
| 251 | enable_irq(adev->irq); | ||
| 252 | if (system_state == SYSTEM_BOOTING) | ||
| 253 | if (request_irq(adev->irq, apbt_interrupt_handler, | ||
| 254 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
| 255 | adev->name, adev)) { | ||
| 256 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
| 257 | adev->num); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | #endif | ||
| 261 | |||
| 262 | static void apbt_enable_int(int n) | 234 | static void apbt_enable_int(int n) |
| 263 | { | 235 | { |
| 264 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | 236 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); |
| @@ -334,6 +306,27 @@ static int __init apbt_clockevent_register(void) | |||
| 334 | } | 306 | } |
| 335 | 307 | ||
| 336 | #ifdef CONFIG_SMP | 308 | #ifdef CONFIG_SMP |
| 309 | |||
| 310 | static void apbt_setup_irq(struct apbt_dev *adev) | ||
| 311 | { | ||
| 312 | /* timer0 irq has been setup early */ | ||
| 313 | if (adev->irq == 0) | ||
| 314 | return; | ||
| 315 | |||
| 316 | if (system_state == SYSTEM_BOOTING) { | ||
| 317 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); | ||
| 318 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ | ||
| 319 | __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); | ||
| 320 | if (request_irq(adev->irq, apbt_interrupt_handler, | ||
| 321 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
| 322 | adev->name, adev)) { | ||
| 323 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
| 324 | adev->num); | ||
| 325 | } | ||
| 326 | } else | ||
| 327 | enable_irq(adev->irq); | ||
| 328 | } | ||
| 329 | |||
| 337 | /* Should be called with per cpu */ | 330 | /* Should be called with per cpu */ |
| 338 | void apbt_setup_secondary_clock(void) | 331 | void apbt_setup_secondary_clock(void) |
| 339 | { | 332 | { |
| @@ -343,7 +336,7 @@ void apbt_setup_secondary_clock(void) | |||
| 343 | 336 | ||
| 344 | /* Don't register boot CPU clockevent */ | 337 | /* Don't register boot CPU clockevent */ |
| 345 | cpu = smp_processor_id(); | 338 | cpu = smp_processor_id(); |
| 346 | if (cpu == boot_cpu_id) | 339 | if (!cpu) |
| 347 | return; | 340 | return; |
| 348 | /* | 341 | /* |
| 349 | * We need to calculate the scaled math multiplication factor for | 342 | * We need to calculate the scaled math multiplication factor for |
| @@ -389,16 +382,17 @@ static int apbt_cpuhp_notify(struct notifier_block *n, | |||
| 389 | 382 | ||
| 390 | switch (action & 0xf) { | 383 | switch (action & 0xf) { |
| 391 | case CPU_DEAD: | 384 | case CPU_DEAD: |
| 385 | disable_irq(adev->irq); | ||
| 392 | apbt_disable_int(cpu); | 386 | apbt_disable_int(cpu); |
| 393 | if (system_state == SYSTEM_RUNNING) | 387 | if (system_state == SYSTEM_RUNNING) { |
| 394 | pr_debug("skipping APBT CPU %lu offline\n", cpu); | 388 | pr_debug("skipping APBT CPU %lu offline\n", cpu); |
| 395 | else if (adev) { | 389 | } else if (adev) { |
| 396 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); | 390 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); |
| 397 | free_irq(adev->irq, adev); | 391 | free_irq(adev->irq, adev); |
| 398 | } | 392 | } |
| 399 | break; | 393 | break; |
| 400 | default: | 394 | default: |
| 401 | pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); | 395 | pr_debug("APBT notified %lu, no action\n", action); |
| 402 | } | 396 | } |
| 403 | return NOTIFY_OK; | 397 | return NOTIFY_OK; |
| 404 | } | 398 | } |
| @@ -552,7 +546,7 @@ bad_count: | |||
| 552 | pr_debug("APB CS going back %lx:%lx:%lx ", | 546 | pr_debug("APB CS going back %lx:%lx:%lx ", |
| 553 | t2, last_read, t2 - last_read); | 547 | t2, last_read, t2 - last_read); |
| 554 | bad_count_x3: | 548 | bad_count_x3: |
| 555 | pr_debug(KERN_INFO "tripple check enforced\n"); | 549 | pr_debug("triple check enforced\n"); |
| 556 | t0 = apbt_readl(phy_cs_timer_id, | 550 | t0 = apbt_readl(phy_cs_timer_id, |
| 557 | APBTMR_N_CURRENT_VALUE); | 551 | APBTMR_N_CURRENT_VALUE); |
| 558 | udelay(1); | 552 | udelay(1); |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index a2e0caf26e17..b3a16e8f0703 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #include <asm/gart.h> | 27 | #include <asm/gart.h> |
| 28 | #include <asm/pci-direct.h> | 28 | #include <asm/pci-direct.h> |
| 29 | #include <asm/dma.h> | 29 | #include <asm/dma.h> |
| 30 | #include <asm/k8.h> | 30 | #include <asm/amd_nb.h> |
| 31 | #include <asm/x86_init.h> | 31 | #include <asm/x86_init.h> |
| 32 | 32 | ||
| 33 | int gart_iommu_aperture; | 33 | int gart_iommu_aperture; |
| @@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void) | |||
| 307 | continue; | 307 | continue; |
| 308 | 308 | ||
| 309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
| 310 | aper_enabled = ctl & AMD64_GARTEN; | 310 | aper_enabled = ctl & GARTEN; |
| 311 | aper_order = (ctl >> 1) & 7; | 311 | aper_order = (ctl >> 1) & 7; |
| 312 | aper_size = (32 * 1024 * 1024) << aper_order; | 312 | aper_size = (32 * 1024 * 1024) << aper_order; |
| 313 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; | 313 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
| @@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void) | |||
| 362 | continue; | 362 | continue; |
| 363 | 363 | ||
| 364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
| 365 | ctl &= ~AMD64_GARTEN; | 365 | ctl &= ~GARTEN; |
| 366 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); | 366 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
| 367 | } | 367 | } |
| 368 | } | 368 | } |
| @@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void) | |||
| 371 | 371 | ||
| 372 | static int __initdata printed_gart_size_msg; | 372 | static int __initdata printed_gart_size_msg; |
| 373 | 373 | ||
| 374 | void __init gart_iommu_hole_init(void) | 374 | int __init gart_iommu_hole_init(void) |
| 375 | { | 375 | { |
| 376 | u32 agp_aper_base = 0, agp_aper_order = 0; | 376 | u32 agp_aper_base = 0, agp_aper_order = 0; |
| 377 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; | 377 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
| @@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void) | |||
| 381 | 381 | ||
| 382 | if (gart_iommu_aperture_disabled || !fix_aperture || | 382 | if (gart_iommu_aperture_disabled || !fix_aperture || |
| 383 | !early_pci_allowed()) | 383 | !early_pci_allowed()) |
| 384 | return; | 384 | return -ENODEV; |
| 385 | 385 | ||
| 386 | printk(KERN_INFO "Checking aperture...\n"); | 386 | printk(KERN_INFO "Checking aperture...\n"); |
| 387 | 387 | ||
| @@ -463,8 +463,9 @@ out: | |||
| 463 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; | 463 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; |
| 464 | 464 | ||
| 465 | insert_aperture_resource((u32)last_aper_base, n); | 465 | insert_aperture_resource((u32)last_aper_base, n); |
| 466 | return 1; | ||
| 466 | } | 467 | } |
| 467 | return; | 468 | return 0; |
| 468 | } | 469 | } |
| 469 | 470 | ||
| 470 | if (!fallback_aper_force) { | 471 | if (!fallback_aper_force) { |
| @@ -500,13 +501,18 @@ out: | |||
| 500 | panic("Not enough memory for aperture"); | 501 | panic("Not enough memory for aperture"); |
| 501 | } | 502 | } |
| 502 | } else { | 503 | } else { |
| 503 | return; | 504 | return 0; |
| 504 | } | 505 | } |
| 505 | 506 | ||
| 506 | /* Fix up the north bridges */ | 507 | /* Fix up the north bridges */ |
| 507 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 508 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
| 508 | int bus; | 509 | int bus, dev_base, dev_limit; |
| 509 | int dev_base, dev_limit; | 510 | |
| 511 | /* | ||
| 512 | * Don't enable translation yet but enable GART IO and CPU | ||
| 513 | * accesses and set DISTLBWALKPRB since GART table memory is UC. | ||
| 514 | */ | ||
| 515 | u32 ctl = DISTLBWALKPRB | aper_order << 1; | ||
| 510 | 516 | ||
| 511 | bus = bus_dev_ranges[i].bus; | 517 | bus = bus_dev_ranges[i].bus; |
| 512 | dev_base = bus_dev_ranges[i].dev_base; | 518 | dev_base = bus_dev_ranges[i].dev_base; |
| @@ -515,13 +521,12 @@ out: | |||
| 515 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | 521 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
| 516 | continue; | 522 | continue; |
| 517 | 523 | ||
| 518 | /* Don't enable translation yet. That is done later. | 524 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
| 519 | Assume this BIOS didn't initialise the GART so | ||
| 520 | just overwrite all previous bits */ | ||
| 521 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); | ||
| 522 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); | 525 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); |
| 523 | } | 526 | } |
| 524 | } | 527 | } |
| 525 | 528 | ||
| 526 | set_up_gart_resume(aper_order, aper_alloc); | 529 | set_up_gart_resume(aper_order, aper_alloc); |
| 530 | |||
| 531 | return 1; | ||
| 527 | } | 532 | } |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e3b534cda49a..850657d1b0ed 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
| @@ -52,6 +52,7 @@ | |||
| 52 | #include <asm/mce.h> | 52 | #include <asm/mce.h> |
| 53 | #include <asm/kvm_para.h> | 53 | #include <asm/kvm_para.h> |
| 54 | #include <asm/tsc.h> | 54 | #include <asm/tsc.h> |
| 55 | #include <asm/atomic.h> | ||
| 55 | 56 | ||
| 56 | unsigned int num_processors; | 57 | unsigned int num_processors; |
| 57 | 58 | ||
| @@ -370,38 +371,87 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
| 370 | } | 371 | } |
| 371 | 372 | ||
| 372 | /* | 373 | /* |
| 373 | * Setup extended LVT, AMD specific (K8, family 10h) | 374 | * Setup extended LVT, AMD specific |
| 374 | * | 375 | * |
| 375 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | 376 | * Software should use the LVT offsets the BIOS provides. The offsets |
| 376 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | 377 | * are determined by the subsystems using it like those for MCE |
| 378 | * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts | ||
| 379 | * are supported. Beginning with family 10h at least 4 offsets are | ||
| 380 | * available. | ||
| 377 | * | 381 | * |
| 378 | * If mask=1, the LVT entry does not generate interrupts while mask=0 | 382 | * Since the offsets must be consistent for all cores, we keep track |
| 379 | * enables the vector. See also the BKDGs. | 383 | * of the LVT offsets in software and reserve the offset for the same |
| 384 | * vector also to be used on other cores. An offset is freed by | ||
| 385 | * setting the entry to APIC_EILVT_MASKED. | ||
| 386 | * | ||
| 387 | * If the BIOS is right, there should be no conflicts. Otherwise a | ||
| 388 | * "[Firmware Bug]: ..." error message is generated. However, if | ||
| 389 | * software does not properly determines the offsets, it is not | ||
| 390 | * necessarily a BIOS bug. | ||
| 380 | */ | 391 | */ |
| 381 | 392 | ||
| 382 | #define APIC_EILVT_LVTOFF_MCE 0 | 393 | static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX]; |
| 383 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
| 384 | 394 | ||
| 385 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | 395 | static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) |
| 386 | { | 396 | { |
| 387 | unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); | 397 | return (old & APIC_EILVT_MASKED) |
| 388 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | 398 | || (new == APIC_EILVT_MASKED) |
| 389 | 399 | || ((new & ~APIC_EILVT_MASKED) == old); | |
| 390 | apic_write(reg, v); | ||
| 391 | } | 400 | } |
| 392 | 401 | ||
| 393 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | 402 | static unsigned int reserve_eilvt_offset(int offset, unsigned int new) |
| 394 | { | 403 | { |
| 395 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | 404 | unsigned int rsvd; /* 0: uninitialized */ |
| 396 | return APIC_EILVT_LVTOFF_MCE; | 405 | |
| 406 | if (offset >= APIC_EILVT_NR_MAX) | ||
| 407 | return ~0; | ||
| 408 | |||
| 409 | rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED; | ||
| 410 | do { | ||
| 411 | if (rsvd && | ||
| 412 | !eilvt_entry_is_changeable(rsvd, new)) | ||
| 413 | /* may not change if vectors are different */ | ||
| 414 | return rsvd; | ||
| 415 | rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); | ||
| 416 | } while (rsvd != new); | ||
| 417 | |||
| 418 | return new; | ||
| 397 | } | 419 | } |
| 398 | 420 | ||
| 399 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | 421 | /* |
| 422 | * If mask=1, the LVT entry does not generate interrupts while mask=0 | ||
| 423 | * enables the vector. See also the BKDGs. | ||
| 424 | */ | ||
| 425 | |||
| 426 | int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) | ||
| 400 | { | 427 | { |
| 401 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | 428 | unsigned long reg = APIC_EILVTn(offset); |
| 402 | return APIC_EILVT_LVTOFF_IBS; | 429 | unsigned int new, old, reserved; |
| 430 | |||
| 431 | new = (mask << 16) | (msg_type << 8) | vector; | ||
| 432 | old = apic_read(reg); | ||
| 433 | reserved = reserve_eilvt_offset(offset, new); | ||
| 434 | |||
| 435 | if (reserved != new) { | ||
| 436 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " | ||
| 437 | "vector 0x%x was already reserved by another core, " | ||
| 438 | "APIC%lX=0x%x\n", | ||
| 439 | smp_processor_id(), new, reserved, reg, old); | ||
| 440 | return -EINVAL; | ||
| 441 | } | ||
| 442 | |||
| 443 | if (!eilvt_entry_is_changeable(old, new)) { | ||
| 444 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " | ||
| 445 | "register already in use, APIC%lX=0x%x\n", | ||
| 446 | smp_processor_id(), new, reg, old); | ||
| 447 | return -EBUSY; | ||
| 448 | } | ||
| 449 | |||
| 450 | apic_write(reg, new); | ||
| 451 | |||
| 452 | return 0; | ||
| 403 | } | 453 | } |
| 404 | EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); | 454 | EXPORT_SYMBOL_GPL(setup_APIC_eilvt); |
| 405 | 455 | ||
| 406 | /* | 456 | /* |
| 407 | * Program the next event, relative to now | 457 | * Program the next event, relative to now |
| @@ -1665,10 +1715,7 @@ int __init APIC_init_uniprocessor(void) | |||
| 1665 | } | 1715 | } |
| 1666 | #endif | 1716 | #endif |
| 1667 | 1717 | ||
| 1668 | #ifndef CONFIG_SMP | ||
| 1669 | enable_IR_x2apic(); | ||
| 1670 | default_setup_apic_routing(); | 1718 | default_setup_apic_routing(); |
| 1671 | #endif | ||
| 1672 | 1719 | ||
| 1673 | verify_local_APIC(); | 1720 | verify_local_APIC(); |
| 1674 | connect_bsp_APIC(); | 1721 | connect_bsp_APIC(); |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f1efebaf5510..8ae808d110f4 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -131,13 +131,9 @@ struct irq_pin_list { | |||
| 131 | struct irq_pin_list *next; | 131 | struct irq_pin_list *next; |
| 132 | }; | 132 | }; |
| 133 | 133 | ||
| 134 | static struct irq_pin_list *get_one_free_irq_2_pin(int node) | 134 | static struct irq_pin_list *alloc_irq_pin_list(int node) |
| 135 | { | 135 | { |
| 136 | struct irq_pin_list *pin; | 136 | return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); |
| 137 | |||
| 138 | pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); | ||
| 139 | |||
| 140 | return pin; | ||
| 141 | } | 137 | } |
| 142 | 138 | ||
| 143 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 139 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
| @@ -150,10 +146,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS]; | |||
| 150 | int __init arch_early_irq_init(void) | 146 | int __init arch_early_irq_init(void) |
| 151 | { | 147 | { |
| 152 | struct irq_cfg *cfg; | 148 | struct irq_cfg *cfg; |
| 153 | struct irq_desc *desc; | 149 | int count, node, i; |
| 154 | int count; | ||
| 155 | int node; | ||
| 156 | int i; | ||
| 157 | 150 | ||
| 158 | if (!legacy_pic->nr_legacy_irqs) { | 151 | if (!legacy_pic->nr_legacy_irqs) { |
| 159 | nr_irqs_gsi = 0; | 152 | nr_irqs_gsi = 0; |
| @@ -162,13 +155,15 @@ int __init arch_early_irq_init(void) | |||
| 162 | 155 | ||
| 163 | cfg = irq_cfgx; | 156 | cfg = irq_cfgx; |
| 164 | count = ARRAY_SIZE(irq_cfgx); | 157 | count = ARRAY_SIZE(irq_cfgx); |
| 165 | node= cpu_to_node(boot_cpu_id); | 158 | node = cpu_to_node(0); |
| 159 | |||
| 160 | /* Make sure the legacy interrupts are marked in the bitmap */ | ||
| 161 | irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); | ||
| 166 | 162 | ||
| 167 | for (i = 0; i < count; i++) { | 163 | for (i = 0; i < count; i++) { |
| 168 | desc = irq_to_desc(i); | 164 | set_irq_chip_data(i, &cfg[i]); |
| 169 | desc->chip_data = &cfg[i]; | 165 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); |
| 170 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); | 166 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); |
| 171 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); | ||
| 172 | /* | 167 | /* |
| 173 | * For legacy IRQ's, start with assigning irq0 to irq15 to | 168 | * For legacy IRQ's, start with assigning irq0 to irq15 to |
| 174 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. | 169 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. |
| @@ -183,165 +178,88 @@ int __init arch_early_irq_init(void) | |||
| 183 | } | 178 | } |
| 184 | 179 | ||
| 185 | #ifdef CONFIG_SPARSE_IRQ | 180 | #ifdef CONFIG_SPARSE_IRQ |
| 186 | struct irq_cfg *irq_cfg(unsigned int irq) | 181 | static struct irq_cfg *irq_cfg(unsigned int irq) |
| 187 | { | 182 | { |
| 188 | struct irq_cfg *cfg = NULL; | 183 | return get_irq_chip_data(irq); |
| 189 | struct irq_desc *desc; | ||
| 190 | |||
| 191 | desc = irq_to_desc(irq); | ||
| 192 | if (desc) | ||
| 193 | cfg = desc->chip_data; | ||
| 194 | |||
| 195 | return cfg; | ||
| 196 | } | 184 | } |
| 197 | 185 | ||
| 198 | static struct irq_cfg *get_one_free_irq_cfg(int node) | 186 | static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) |
| 199 | { | 187 | { |
| 200 | struct irq_cfg *cfg; | 188 | struct irq_cfg *cfg; |
| 201 | 189 | ||
| 202 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); | 190 | cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); |
| 203 | if (cfg) { | 191 | if (!cfg) |
| 204 | if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { | 192 | return NULL; |
| 205 | kfree(cfg); | 193 | if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) |
| 206 | cfg = NULL; | 194 | goto out_cfg; |
| 207 | } else if (!zalloc_cpumask_var_node(&cfg->old_domain, | 195 | if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) |
| 208 | GFP_ATOMIC, node)) { | 196 | goto out_domain; |
| 209 | free_cpumask_var(cfg->domain); | ||
| 210 | kfree(cfg); | ||
| 211 | cfg = NULL; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | |||
| 215 | return cfg; | 197 | return cfg; |
| 198 | out_domain: | ||
| 199 | free_cpumask_var(cfg->domain); | ||
| 200 | out_cfg: | ||
| 201 | kfree(cfg); | ||
| 202 | return NULL; | ||
| 216 | } | 203 | } |
| 217 | 204 | ||
| 218 | int arch_init_chip_data(struct irq_desc *desc, int node) | 205 | static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) |
| 219 | { | ||
| 220 | struct irq_cfg *cfg; | ||
| 221 | |||
| 222 | cfg = desc->chip_data; | ||
| 223 | if (!cfg) { | ||
| 224 | desc->chip_data = get_one_free_irq_cfg(node); | ||
| 225 | if (!desc->chip_data) { | ||
| 226 | printk(KERN_ERR "can not alloc irq_cfg\n"); | ||
| 227 | BUG_ON(1); | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | return 0; | ||
| 232 | } | ||
| 233 | |||
| 234 | /* for move_irq_desc */ | ||
| 235 | static void | ||
| 236 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) | ||
| 237 | { | 206 | { |
| 238 | struct irq_pin_list *old_entry, *head, *tail, *entry; | 207 | if (!cfg) |
| 239 | |||
| 240 | cfg->irq_2_pin = NULL; | ||
| 241 | old_entry = old_cfg->irq_2_pin; | ||
| 242 | if (!old_entry) | ||
| 243 | return; | ||
| 244 | |||
| 245 | entry = get_one_free_irq_2_pin(node); | ||
| 246 | if (!entry) | ||
| 247 | return; | 208 | return; |
| 209 | set_irq_chip_data(at, NULL); | ||
| 210 | free_cpumask_var(cfg->domain); | ||
| 211 | free_cpumask_var(cfg->old_domain); | ||
| 212 | kfree(cfg); | ||
| 213 | } | ||
| 248 | 214 | ||
| 249 | entry->apic = old_entry->apic; | 215 | #else |
| 250 | entry->pin = old_entry->pin; | ||
| 251 | head = entry; | ||
| 252 | tail = entry; | ||
| 253 | old_entry = old_entry->next; | ||
| 254 | while (old_entry) { | ||
| 255 | entry = get_one_free_irq_2_pin(node); | ||
| 256 | if (!entry) { | ||
| 257 | entry = head; | ||
| 258 | while (entry) { | ||
| 259 | head = entry->next; | ||
| 260 | kfree(entry); | ||
| 261 | entry = head; | ||
| 262 | } | ||
| 263 | /* still use the old one */ | ||
| 264 | return; | ||
| 265 | } | ||
| 266 | entry->apic = old_entry->apic; | ||
| 267 | entry->pin = old_entry->pin; | ||
| 268 | tail->next = entry; | ||
| 269 | tail = entry; | ||
| 270 | old_entry = old_entry->next; | ||
| 271 | } | ||
| 272 | 216 | ||
| 273 | tail->next = NULL; | 217 | struct irq_cfg *irq_cfg(unsigned int irq) |
| 274 | cfg->irq_2_pin = head; | 218 | { |
| 219 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | ||
| 275 | } | 220 | } |
| 276 | 221 | ||
| 277 | static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) | 222 | static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) |
| 278 | { | 223 | { |
| 279 | struct irq_pin_list *entry, *next; | 224 | return irq_cfgx + irq; |
| 280 | 225 | } | |
| 281 | if (old_cfg->irq_2_pin == cfg->irq_2_pin) | ||
| 282 | return; | ||
| 283 | 226 | ||
| 284 | entry = old_cfg->irq_2_pin; | 227 | static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { } |
| 285 | 228 | ||
| 286 | while (entry) { | 229 | #endif |
| 287 | next = entry->next; | ||
| 288 | kfree(entry); | ||
| 289 | entry = next; | ||
| 290 | } | ||
| 291 | old_cfg->irq_2_pin = NULL; | ||
| 292 | } | ||
| 293 | 230 | ||
| 294 | void arch_init_copy_chip_data(struct irq_desc *old_desc, | 231 | static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) |
| 295 | struct irq_desc *desc, int node) | ||
| 296 | { | 232 | { |
| 233 | int res = irq_alloc_desc_at(at, node); | ||
| 297 | struct irq_cfg *cfg; | 234 | struct irq_cfg *cfg; |
| 298 | struct irq_cfg *old_cfg; | ||
| 299 | |||
| 300 | cfg = get_one_free_irq_cfg(node); | ||
| 301 | |||
| 302 | if (!cfg) | ||
| 303 | return; | ||
| 304 | 235 | ||
| 305 | desc->chip_data = cfg; | 236 | if (res < 0) { |
| 306 | 237 | if (res != -EEXIST) | |
| 307 | old_cfg = old_desc->chip_data; | 238 | return NULL; |
| 308 | 239 | cfg = get_irq_chip_data(at); | |
| 309 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); | 240 | if (cfg) |
| 310 | 241 | return cfg; | |
| 311 | init_copy_irq_2_pin(old_cfg, cfg, node); | 242 | } |
| 312 | } | ||
| 313 | 243 | ||
| 314 | static void free_irq_cfg(struct irq_cfg *old_cfg) | 244 | cfg = alloc_irq_cfg(at, node); |
| 315 | { | 245 | if (cfg) |
| 316 | kfree(old_cfg); | 246 | set_irq_chip_data(at, cfg); |
| 247 | else | ||
| 248 | irq_free_desc(at); | ||
| 249 | return cfg; | ||
| 317 | } | 250 | } |
| 318 | 251 | ||
| 319 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | 252 | static int alloc_irq_from(unsigned int from, int node) |
| 320 | { | 253 | { |
| 321 | struct irq_cfg *old_cfg, *cfg; | 254 | return irq_alloc_desc_from(from, node); |
| 322 | |||
| 323 | old_cfg = old_desc->chip_data; | ||
| 324 | cfg = desc->chip_data; | ||
| 325 | |||
| 326 | if (old_cfg == cfg) | ||
| 327 | return; | ||
| 328 | |||
| 329 | if (old_cfg) { | ||
| 330 | free_irq_2_pin(old_cfg, cfg); | ||
| 331 | free_irq_cfg(old_cfg); | ||
| 332 | old_desc->chip_data = NULL; | ||
| 333 | } | ||
| 334 | } | 255 | } |
| 335 | /* end for move_irq_desc */ | ||
| 336 | 256 | ||
| 337 | #else | 257 | static void free_irq_at(unsigned int at, struct irq_cfg *cfg) |
| 338 | struct irq_cfg *irq_cfg(unsigned int irq) | ||
| 339 | { | 258 | { |
| 340 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 259 | free_irq_cfg(at, cfg); |
| 260 | irq_free_desc(at); | ||
| 341 | } | 261 | } |
| 342 | 262 | ||
| 343 | #endif | ||
| 344 | |||
| 345 | struct io_apic { | 263 | struct io_apic { |
| 346 | unsigned int index; | 264 | unsigned int index; |
| 347 | unsigned int unused[3]; | 265 | unsigned int unused[3]; |
| @@ -446,7 +364,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |||
| 446 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 364 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
| 447 | } | 365 | } |
| 448 | 366 | ||
| 449 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 367 | static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
| 450 | { | 368 | { |
| 451 | unsigned long flags; | 369 | unsigned long flags; |
| 452 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 370 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| @@ -476,7 +394,7 @@ static void ioapic_mask_entry(int apic, int pin) | |||
| 476 | * fast in the common case, and fast for shared ISA-space IRQs. | 394 | * fast in the common case, and fast for shared ISA-space IRQs. |
| 477 | */ | 395 | */ |
| 478 | static int | 396 | static int |
| 479 | add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | 397 | __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) |
| 480 | { | 398 | { |
| 481 | struct irq_pin_list **last, *entry; | 399 | struct irq_pin_list **last, *entry; |
| 482 | 400 | ||
| @@ -488,7 +406,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | |||
| 488 | last = &entry->next; | 406 | last = &entry->next; |
| 489 | } | 407 | } |
| 490 | 408 | ||
| 491 | entry = get_one_free_irq_2_pin(node); | 409 | entry = alloc_irq_pin_list(node); |
| 492 | if (!entry) { | 410 | if (!entry) { |
| 493 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", | 411 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", |
| 494 | node, apic, pin); | 412 | node, apic, pin); |
| @@ -503,7 +421,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | |||
| 503 | 421 | ||
| 504 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) | 422 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) |
| 505 | { | 423 | { |
| 506 | if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) | 424 | if (__add_pin_to_irq_node(cfg, node, apic, pin)) |
| 507 | panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); | 425 | panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); |
| 508 | } | 426 | } |
| 509 | 427 | ||
| @@ -566,11 +484,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) | |||
| 566 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | 484 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); |
| 567 | } | 485 | } |
| 568 | 486 | ||
| 569 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) | ||
| 570 | { | ||
| 571 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); | ||
| 572 | } | ||
| 573 | |||
| 574 | static void io_apic_sync(struct irq_pin_list *entry) | 487 | static void io_apic_sync(struct irq_pin_list *entry) |
| 575 | { | 488 | { |
| 576 | /* | 489 | /* |
| @@ -582,44 +495,37 @@ static void io_apic_sync(struct irq_pin_list *entry) | |||
| 582 | readl(&io_apic->data); | 495 | readl(&io_apic->data); |
| 583 | } | 496 | } |
| 584 | 497 | ||
| 585 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | 498 | static void mask_ioapic(struct irq_cfg *cfg) |
| 586 | { | 499 | { |
| 500 | unsigned long flags; | ||
| 501 | |||
| 502 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 587 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 503 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
| 504 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 588 | } | 505 | } |
| 589 | 506 | ||
| 590 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | 507 | static void mask_ioapic_irq(struct irq_data *data) |
| 591 | { | 508 | { |
| 592 | struct irq_cfg *cfg = desc->chip_data; | 509 | mask_ioapic(data->chip_data); |
| 593 | unsigned long flags; | 510 | } |
| 594 | |||
| 595 | BUG_ON(!cfg); | ||
| 596 | 511 | ||
| 597 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 512 | static void __unmask_ioapic(struct irq_cfg *cfg) |
| 598 | __mask_IO_APIC_irq(cfg); | 513 | { |
| 599 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 514 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); |
| 600 | } | 515 | } |
| 601 | 516 | ||
| 602 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | 517 | static void unmask_ioapic(struct irq_cfg *cfg) |
| 603 | { | 518 | { |
| 604 | struct irq_cfg *cfg = desc->chip_data; | ||
| 605 | unsigned long flags; | 519 | unsigned long flags; |
| 606 | 520 | ||
| 607 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 521 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 608 | __unmask_IO_APIC_irq(cfg); | 522 | __unmask_ioapic(cfg); |
| 609 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 523 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 610 | } | 524 | } |
| 611 | 525 | ||
| 612 | static void mask_IO_APIC_irq(unsigned int irq) | 526 | static void unmask_ioapic_irq(struct irq_data *data) |
| 613 | { | ||
| 614 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 615 | |||
| 616 | mask_IO_APIC_irq_desc(desc); | ||
| 617 | } | ||
| 618 | static void unmask_IO_APIC_irq(unsigned int irq) | ||
| 619 | { | 527 | { |
| 620 | struct irq_desc *desc = irq_to_desc(irq); | 528 | unmask_ioapic(data->chip_data); |
| 621 | |||
| 622 | unmask_IO_APIC_irq_desc(desc); | ||
| 623 | } | 529 | } |
| 624 | 530 | ||
| 625 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 531 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
| @@ -689,14 +595,14 @@ struct IO_APIC_route_entry **alloc_ioapic_entries(void) | |||
| 689 | struct IO_APIC_route_entry **ioapic_entries; | 595 | struct IO_APIC_route_entry **ioapic_entries; |
| 690 | 596 | ||
| 691 | ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, | 597 | ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, |
| 692 | GFP_ATOMIC); | 598 | GFP_KERNEL); |
| 693 | if (!ioapic_entries) | 599 | if (!ioapic_entries) |
| 694 | return 0; | 600 | return 0; |
| 695 | 601 | ||
| 696 | for (apic = 0; apic < nr_ioapics; apic++) { | 602 | for (apic = 0; apic < nr_ioapics; apic++) { |
| 697 | ioapic_entries[apic] = | 603 | ioapic_entries[apic] = |
| 698 | kzalloc(sizeof(struct IO_APIC_route_entry) * | 604 | kzalloc(sizeof(struct IO_APIC_route_entry) * |
| 699 | nr_ioapic_registers[apic], GFP_ATOMIC); | 605 | nr_ioapic_registers[apic], GFP_KERNEL); |
| 700 | if (!ioapic_entries[apic]) | 606 | if (!ioapic_entries[apic]) |
| 701 | goto nomem; | 607 | goto nomem; |
| 702 | } | 608 | } |
| @@ -1254,7 +1160,6 @@ void __setup_vector_irq(int cpu) | |||
| 1254 | /* Initialize vector_irq on a new cpu */ | 1160 | /* Initialize vector_irq on a new cpu */ |
| 1255 | int irq, vector; | 1161 | int irq, vector; |
| 1256 | struct irq_cfg *cfg; | 1162 | struct irq_cfg *cfg; |
| 1257 | struct irq_desc *desc; | ||
| 1258 | 1163 | ||
| 1259 | /* | 1164 | /* |
| 1260 | * vector_lock will make sure that we don't run into irq vector | 1165 | * vector_lock will make sure that we don't run into irq vector |
| @@ -1263,9 +1168,10 @@ void __setup_vector_irq(int cpu) | |||
| 1263 | */ | 1168 | */ |
| 1264 | raw_spin_lock(&vector_lock); | 1169 | raw_spin_lock(&vector_lock); |
| 1265 | /* Mark the inuse vectors */ | 1170 | /* Mark the inuse vectors */ |
| 1266 | for_each_irq_desc(irq, desc) { | 1171 | for_each_active_irq(irq) { |
| 1267 | cfg = desc->chip_data; | 1172 | cfg = get_irq_chip_data(irq); |
| 1268 | 1173 | if (!cfg) | |
| 1174 | continue; | ||
| 1269 | /* | 1175 | /* |
| 1270 | * If it is a legacy IRQ handled by the legacy PIC, this cpu | 1176 | * If it is a legacy IRQ handled by the legacy PIC, this cpu |
| 1271 | * will be part of the irq_cfg's domain. | 1177 | * will be part of the irq_cfg's domain. |
| @@ -1322,17 +1228,17 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
| 1322 | } | 1228 | } |
| 1323 | #endif | 1229 | #endif |
| 1324 | 1230 | ||
| 1325 | static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) | 1231 | static void ioapic_register_intr(unsigned int irq, unsigned long trigger) |
| 1326 | { | 1232 | { |
| 1327 | 1233 | ||
| 1328 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1234 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
| 1329 | trigger == IOAPIC_LEVEL) | 1235 | trigger == IOAPIC_LEVEL) |
| 1330 | desc->status |= IRQ_LEVEL; | 1236 | irq_set_status_flags(irq, IRQ_LEVEL); |
| 1331 | else | 1237 | else |
| 1332 | desc->status &= ~IRQ_LEVEL; | 1238 | irq_clear_status_flags(irq, IRQ_LEVEL); |
| 1333 | 1239 | ||
| 1334 | if (irq_remapped(irq)) { | 1240 | if (irq_remapped(get_irq_chip_data(irq))) { |
| 1335 | desc->status |= IRQ_MOVE_PCNTXT; | 1241 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 1336 | if (trigger) | 1242 | if (trigger) |
| 1337 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | 1243 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, |
| 1338 | handle_fasteoi_irq, | 1244 | handle_fasteoi_irq, |
| @@ -1353,10 +1259,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t | |||
| 1353 | handle_edge_irq, "edge"); | 1259 | handle_edge_irq, "edge"); |
| 1354 | } | 1260 | } |
| 1355 | 1261 | ||
| 1356 | int setup_ioapic_entry(int apic_id, int irq, | 1262 | static int setup_ioapic_entry(int apic_id, int irq, |
| 1357 | struct IO_APIC_route_entry *entry, | 1263 | struct IO_APIC_route_entry *entry, |
| 1358 | unsigned int destination, int trigger, | 1264 | unsigned int destination, int trigger, |
| 1359 | int polarity, int vector, int pin) | 1265 | int polarity, int vector, int pin) |
| 1360 | { | 1266 | { |
| 1361 | /* | 1267 | /* |
| 1362 | * add it to the IO-APIC irq-routing table: | 1268 | * add it to the IO-APIC irq-routing table: |
| @@ -1377,21 +1283,7 @@ int setup_ioapic_entry(int apic_id, int irq, | |||
| 1377 | if (index < 0) | 1283 | if (index < 0) |
| 1378 | panic("Failed to allocate IRTE for ioapic %d\n", apic_id); | 1284 | panic("Failed to allocate IRTE for ioapic %d\n", apic_id); |
| 1379 | 1285 | ||
| 1380 | memset(&irte, 0, sizeof(irte)); | 1286 | prepare_irte(&irte, vector, destination); |
| 1381 | |||
| 1382 | irte.present = 1; | ||
| 1383 | irte.dst_mode = apic->irq_dest_mode; | ||
| 1384 | /* | ||
| 1385 | * Trigger mode in the IRTE will always be edge, and the | ||
| 1386 | * actual level or edge trigger will be setup in the IO-APIC | ||
| 1387 | * RTE. This will help simplify level triggered irq migration. | ||
| 1388 | * For more details, see the comments above explainig IO-APIC | ||
| 1389 | * irq migration in the presence of interrupt-remapping. | ||
| 1390 | */ | ||
| 1391 | irte.trigger_mode = 0; | ||
| 1392 | irte.dlvry_mode = apic->irq_delivery_mode; | ||
| 1393 | irte.vector = vector; | ||
| 1394 | irte.dest_id = IRTE_DEST(destination); | ||
| 1395 | 1287 | ||
| 1396 | /* Set source-id of interrupt request */ | 1288 | /* Set source-id of interrupt request */ |
| 1397 | set_ioapic_sid(&irte, apic_id); | 1289 | set_ioapic_sid(&irte, apic_id); |
| @@ -1426,18 +1318,14 @@ int setup_ioapic_entry(int apic_id, int irq, | |||
| 1426 | return 0; | 1318 | return 0; |
| 1427 | } | 1319 | } |
| 1428 | 1320 | ||
| 1429 | static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, | 1321 | static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, |
| 1430 | int trigger, int polarity) | 1322 | struct irq_cfg *cfg, int trigger, int polarity) |
| 1431 | { | 1323 | { |
| 1432 | struct irq_cfg *cfg; | ||
| 1433 | struct IO_APIC_route_entry entry; | 1324 | struct IO_APIC_route_entry entry; |
| 1434 | unsigned int dest; | 1325 | unsigned int dest; |
| 1435 | 1326 | ||
| 1436 | if (!IO_APIC_IRQ(irq)) | 1327 | if (!IO_APIC_IRQ(irq)) |
| 1437 | return; | 1328 | return; |
| 1438 | |||
| 1439 | cfg = desc->chip_data; | ||
| 1440 | |||
| 1441 | /* | 1329 | /* |
| 1442 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy | 1330 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy |
| 1443 | * controllers like 8259. Now that IO-APIC can handle this irq, update | 1331 | * controllers like 8259. Now that IO-APIC can handle this irq, update |
| @@ -1466,9 +1354,9 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
| 1466 | return; | 1354 | return; |
| 1467 | } | 1355 | } |
| 1468 | 1356 | ||
| 1469 | ioapic_register_intr(irq, desc, trigger); | 1357 | ioapic_register_intr(irq, trigger); |
| 1470 | if (irq < legacy_pic->nr_legacy_irqs) | 1358 | if (irq < legacy_pic->nr_legacy_irqs) |
| 1471 | legacy_pic->chip->mask(irq); | 1359 | legacy_pic->mask(irq); |
| 1472 | 1360 | ||
| 1473 | ioapic_write_entry(apic_id, pin, entry); | 1361 | ioapic_write_entry(apic_id, pin, entry); |
| 1474 | } | 1362 | } |
| @@ -1479,11 +1367,9 @@ static struct { | |||
| 1479 | 1367 | ||
| 1480 | static void __init setup_IO_APIC_irqs(void) | 1368 | static void __init setup_IO_APIC_irqs(void) |
| 1481 | { | 1369 | { |
| 1482 | int apic_id, pin, idx, irq; | 1370 | int apic_id, pin, idx, irq, notcon = 0; |
| 1483 | int notcon = 0; | 1371 | int node = cpu_to_node(0); |
| 1484 | struct irq_desc *desc; | ||
| 1485 | struct irq_cfg *cfg; | 1372 | struct irq_cfg *cfg; |
| 1486 | int node = cpu_to_node(boot_cpu_id); | ||
| 1487 | 1373 | ||
| 1488 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1374 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
| 1489 | 1375 | ||
| @@ -1520,19 +1406,17 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1520 | apic->multi_timer_check(apic_id, irq)) | 1406 | apic->multi_timer_check(apic_id, irq)) |
| 1521 | continue; | 1407 | continue; |
| 1522 | 1408 | ||
| 1523 | desc = irq_to_desc_alloc_node(irq, node); | 1409 | cfg = alloc_irq_and_cfg_at(irq, node); |
| 1524 | if (!desc) { | 1410 | if (!cfg) |
| 1525 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
| 1526 | continue; | 1411 | continue; |
| 1527 | } | 1412 | |
| 1528 | cfg = desc->chip_data; | ||
| 1529 | add_pin_to_irq_node(cfg, node, apic_id, pin); | 1413 | add_pin_to_irq_node(cfg, node, apic_id, pin); |
| 1530 | /* | 1414 | /* |
| 1531 | * don't mark it in pin_programmed, so later acpi could | 1415 | * don't mark it in pin_programmed, so later acpi could |
| 1532 | * set it correctly when irq < 16 | 1416 | * set it correctly when irq < 16 |
| 1533 | */ | 1417 | */ |
| 1534 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | 1418 | setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx), |
| 1535 | irq_trigger(idx), irq_polarity(idx)); | 1419 | irq_polarity(idx)); |
| 1536 | } | 1420 | } |
| 1537 | 1421 | ||
| 1538 | if (notcon) | 1422 | if (notcon) |
| @@ -1547,9 +1431,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1547 | */ | 1431 | */ |
| 1548 | void setup_IO_APIC_irq_extra(u32 gsi) | 1432 | void setup_IO_APIC_irq_extra(u32 gsi) |
| 1549 | { | 1433 | { |
| 1550 | int apic_id = 0, pin, idx, irq; | 1434 | int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); |
| 1551 | int node = cpu_to_node(boot_cpu_id); | ||
| 1552 | struct irq_desc *desc; | ||
| 1553 | struct irq_cfg *cfg; | 1435 | struct irq_cfg *cfg; |
| 1554 | 1436 | ||
| 1555 | /* | 1437 | /* |
| @@ -1565,18 +1447,15 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
| 1565 | return; | 1447 | return; |
| 1566 | 1448 | ||
| 1567 | irq = pin_2_irq(idx, apic_id, pin); | 1449 | irq = pin_2_irq(idx, apic_id, pin); |
| 1568 | #ifdef CONFIG_SPARSE_IRQ | 1450 | |
| 1569 | desc = irq_to_desc(irq); | 1451 | /* Only handle the non legacy irqs on secondary ioapics */ |
| 1570 | if (desc) | 1452 | if (apic_id == 0 || irq < NR_IRQS_LEGACY) |
| 1571 | return; | 1453 | return; |
| 1572 | #endif | 1454 | |
| 1573 | desc = irq_to_desc_alloc_node(irq, node); | 1455 | cfg = alloc_irq_and_cfg_at(irq, node); |
| 1574 | if (!desc) { | 1456 | if (!cfg) |
| 1575 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
| 1576 | return; | 1457 | return; |
| 1577 | } | ||
| 1578 | 1458 | ||
| 1579 | cfg = desc->chip_data; | ||
| 1580 | add_pin_to_irq_node(cfg, node, apic_id, pin); | 1459 | add_pin_to_irq_node(cfg, node, apic_id, pin); |
| 1581 | 1460 | ||
| 1582 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { | 1461 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { |
| @@ -1586,7 +1465,7 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
| 1586 | } | 1465 | } |
| 1587 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); | 1466 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); |
| 1588 | 1467 | ||
| 1589 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | 1468 | setup_ioapic_irq(apic_id, pin, irq, cfg, |
| 1590 | irq_trigger(idx), irq_polarity(idx)); | 1469 | irq_trigger(idx), irq_polarity(idx)); |
| 1591 | } | 1470 | } |
| 1592 | 1471 | ||
| @@ -1637,7 +1516,6 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1637 | union IO_APIC_reg_03 reg_03; | 1516 | union IO_APIC_reg_03 reg_03; |
| 1638 | unsigned long flags; | 1517 | unsigned long flags; |
| 1639 | struct irq_cfg *cfg; | 1518 | struct irq_cfg *cfg; |
| 1640 | struct irq_desc *desc; | ||
| 1641 | unsigned int irq; | 1519 | unsigned int irq; |
| 1642 | 1520 | ||
| 1643 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1521 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
| @@ -1724,10 +1602,10 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1724 | } | 1602 | } |
| 1725 | } | 1603 | } |
| 1726 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1604 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
| 1727 | for_each_irq_desc(irq, desc) { | 1605 | for_each_active_irq(irq) { |
| 1728 | struct irq_pin_list *entry; | 1606 | struct irq_pin_list *entry; |
| 1729 | 1607 | ||
| 1730 | cfg = desc->chip_data; | 1608 | cfg = get_irq_chip_data(irq); |
| 1731 | if (!cfg) | 1609 | if (!cfg) |
| 1732 | continue; | 1610 | continue; |
| 1733 | entry = cfg->irq_2_pin; | 1611 | entry = cfg->irq_2_pin; |
| @@ -2234,29 +2112,26 @@ static int __init timer_irq_works(void) | |||
| 2234 | * an edge even if it isn't on the 8259A... | 2112 | * an edge even if it isn't on the 8259A... |
| 2235 | */ | 2113 | */ |
| 2236 | 2114 | ||
| 2237 | static unsigned int startup_ioapic_irq(unsigned int irq) | 2115 | static unsigned int startup_ioapic_irq(struct irq_data *data) |
| 2238 | { | 2116 | { |
| 2239 | int was_pending = 0; | 2117 | int was_pending = 0, irq = data->irq; |
| 2240 | unsigned long flags; | 2118 | unsigned long flags; |
| 2241 | struct irq_cfg *cfg; | ||
| 2242 | 2119 | ||
| 2243 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2120 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 2244 | if (irq < legacy_pic->nr_legacy_irqs) { | 2121 | if (irq < legacy_pic->nr_legacy_irqs) { |
| 2245 | legacy_pic->chip->mask(irq); | 2122 | legacy_pic->mask(irq); |
| 2246 | if (legacy_pic->irq_pending(irq)) | 2123 | if (legacy_pic->irq_pending(irq)) |
| 2247 | was_pending = 1; | 2124 | was_pending = 1; |
| 2248 | } | 2125 | } |
| 2249 | cfg = irq_cfg(irq); | 2126 | __unmask_ioapic(data->chip_data); |
| 2250 | __unmask_IO_APIC_irq(cfg); | ||
| 2251 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2127 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2252 | 2128 | ||
| 2253 | return was_pending; | 2129 | return was_pending; |
| 2254 | } | 2130 | } |
| 2255 | 2131 | ||
| 2256 | static int ioapic_retrigger_irq(unsigned int irq) | 2132 | static int ioapic_retrigger_irq(struct irq_data *data) |
| 2257 | { | 2133 | { |
| 2258 | 2134 | struct irq_cfg *cfg = data->chip_data; | |
| 2259 | struct irq_cfg *cfg = irq_cfg(irq); | ||
| 2260 | unsigned long flags; | 2135 | unsigned long flags; |
| 2261 | 2136 | ||
| 2262 | raw_spin_lock_irqsave(&vector_lock, flags); | 2137 | raw_spin_lock_irqsave(&vector_lock, flags); |
| @@ -2307,7 +2182,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
| 2307 | * With interrupt-remapping, destination information comes | 2182 | * With interrupt-remapping, destination information comes |
| 2308 | * from interrupt-remapping table entry. | 2183 | * from interrupt-remapping table entry. |
| 2309 | */ | 2184 | */ |
| 2310 | if (!irq_remapped(irq)) | 2185 | if (!irq_remapped(cfg)) |
| 2311 | io_apic_write(apic, 0x11 + pin*2, dest); | 2186 | io_apic_write(apic, 0x11 + pin*2, dest); |
| 2312 | reg = io_apic_read(apic, 0x10 + pin*2); | 2187 | reg = io_apic_read(apic, 0x10 + pin*2); |
| 2313 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 2188 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
| @@ -2317,65 +2192,46 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
| 2317 | } | 2192 | } |
| 2318 | 2193 | ||
| 2319 | /* | 2194 | /* |
| 2320 | * Either sets desc->affinity to a valid value, and returns | 2195 | * Either sets data->affinity to a valid value, and returns |
| 2321 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and | 2196 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and |
| 2322 | * leaves desc->affinity untouched. | 2197 | * leaves data->affinity untouched. |
| 2323 | */ | 2198 | */ |
| 2324 | unsigned int | 2199 | int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2325 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, | 2200 | unsigned int *dest_id) |
| 2326 | unsigned int *dest_id) | ||
| 2327 | { | 2201 | { |
| 2328 | struct irq_cfg *cfg; | 2202 | struct irq_cfg *cfg = data->chip_data; |
| 2329 | unsigned int irq; | ||
| 2330 | 2203 | ||
| 2331 | if (!cpumask_intersects(mask, cpu_online_mask)) | 2204 | if (!cpumask_intersects(mask, cpu_online_mask)) |
| 2332 | return -1; | 2205 | return -1; |
| 2333 | 2206 | ||
| 2334 | irq = desc->irq; | 2207 | if (assign_irq_vector(data->irq, data->chip_data, mask)) |
| 2335 | cfg = desc->chip_data; | ||
| 2336 | if (assign_irq_vector(irq, cfg, mask)) | ||
| 2337 | return -1; | 2208 | return -1; |
| 2338 | 2209 | ||
| 2339 | cpumask_copy(desc->affinity, mask); | 2210 | cpumask_copy(data->affinity, mask); |
| 2340 | 2211 | ||
| 2341 | *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); | 2212 | *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); |
| 2342 | return 0; | 2213 | return 0; |
| 2343 | } | 2214 | } |
| 2344 | 2215 | ||
| 2345 | static int | 2216 | static int |
| 2346 | set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | 2217 | ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2218 | bool force) | ||
| 2347 | { | 2219 | { |
| 2348 | struct irq_cfg *cfg; | 2220 | unsigned int dest, irq = data->irq; |
| 2349 | unsigned long flags; | 2221 | unsigned long flags; |
| 2350 | unsigned int dest; | 2222 | int ret; |
| 2351 | unsigned int irq; | ||
| 2352 | int ret = -1; | ||
| 2353 | |||
| 2354 | irq = desc->irq; | ||
| 2355 | cfg = desc->chip_data; | ||
| 2356 | 2223 | ||
| 2357 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2224 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 2358 | ret = set_desc_affinity(desc, mask, &dest); | 2225 | ret = __ioapic_set_affinity(data, mask, &dest); |
| 2359 | if (!ret) { | 2226 | if (!ret) { |
| 2360 | /* Only the high 8 bits are valid. */ | 2227 | /* Only the high 8 bits are valid. */ |
| 2361 | dest = SET_APIC_LOGICAL_ID(dest); | 2228 | dest = SET_APIC_LOGICAL_ID(dest); |
| 2362 | __target_IO_APIC_irq(irq, dest, cfg); | 2229 | __target_IO_APIC_irq(irq, dest, data->chip_data); |
| 2363 | } | 2230 | } |
| 2364 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2231 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2365 | |||
| 2366 | return ret; | 2232 | return ret; |
| 2367 | } | 2233 | } |
| 2368 | 2234 | ||
| 2369 | static int | ||
| 2370 | set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) | ||
| 2371 | { | ||
| 2372 | struct irq_desc *desc; | ||
| 2373 | |||
| 2374 | desc = irq_to_desc(irq); | ||
| 2375 | |||
| 2376 | return set_ioapic_affinity_irq_desc(desc, mask); | ||
| 2377 | } | ||
| 2378 | |||
| 2379 | #ifdef CONFIG_INTR_REMAP | 2235 | #ifdef CONFIG_INTR_REMAP |
| 2380 | 2236 | ||
| 2381 | /* | 2237 | /* |
| @@ -2390,24 +2246,21 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) | |||
| 2390 | * the interrupt-remapping table entry. | 2246 | * the interrupt-remapping table entry. |
| 2391 | */ | 2247 | */ |
| 2392 | static int | 2248 | static int |
| 2393 | migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | 2249 | ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2250 | bool force) | ||
| 2394 | { | 2251 | { |
| 2395 | struct irq_cfg *cfg; | 2252 | struct irq_cfg *cfg = data->chip_data; |
| 2253 | unsigned int dest, irq = data->irq; | ||
| 2396 | struct irte irte; | 2254 | struct irte irte; |
| 2397 | unsigned int dest; | ||
| 2398 | unsigned int irq; | ||
| 2399 | int ret = -1; | ||
| 2400 | 2255 | ||
| 2401 | if (!cpumask_intersects(mask, cpu_online_mask)) | 2256 | if (!cpumask_intersects(mask, cpu_online_mask)) |
| 2402 | return ret; | 2257 | return -EINVAL; |
| 2403 | 2258 | ||
| 2404 | irq = desc->irq; | ||
| 2405 | if (get_irte(irq, &irte)) | 2259 | if (get_irte(irq, &irte)) |
| 2406 | return ret; | 2260 | return -EBUSY; |
| 2407 | 2261 | ||
| 2408 | cfg = desc->chip_data; | ||
| 2409 | if (assign_irq_vector(irq, cfg, mask)) | 2262 | if (assign_irq_vector(irq, cfg, mask)) |
| 2410 | return ret; | 2263 | return -EBUSY; |
| 2411 | 2264 | ||
| 2412 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); | 2265 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); |
| 2413 | 2266 | ||
| @@ -2422,29 +2275,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
| 2422 | if (cfg->move_in_progress) | 2275 | if (cfg->move_in_progress) |
| 2423 | send_cleanup_vector(cfg); | 2276 | send_cleanup_vector(cfg); |
| 2424 | 2277 | ||
| 2425 | cpumask_copy(desc->affinity, mask); | 2278 | cpumask_copy(data->affinity, mask); |
| 2426 | |||
| 2427 | return 0; | 2279 | return 0; |
| 2428 | } | 2280 | } |
| 2429 | 2281 | ||
| 2430 | /* | ||
| 2431 | * Migrates the IRQ destination in the process context. | ||
| 2432 | */ | ||
| 2433 | static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, | ||
| 2434 | const struct cpumask *mask) | ||
| 2435 | { | ||
| 2436 | return migrate_ioapic_irq_desc(desc, mask); | ||
| 2437 | } | ||
| 2438 | static int set_ir_ioapic_affinity_irq(unsigned int irq, | ||
| 2439 | const struct cpumask *mask) | ||
| 2440 | { | ||
| 2441 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 2442 | |||
| 2443 | return set_ir_ioapic_affinity_irq_desc(desc, mask); | ||
| 2444 | } | ||
| 2445 | #else | 2282 | #else |
| 2446 | static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, | 2283 | static inline int |
| 2447 | const struct cpumask *mask) | 2284 | ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2285 | bool force) | ||
| 2448 | { | 2286 | { |
| 2449 | return 0; | 2287 | return 0; |
| 2450 | } | 2288 | } |
| @@ -2506,10 +2344,8 @@ unlock: | |||
| 2506 | irq_exit(); | 2344 | irq_exit(); |
| 2507 | } | 2345 | } |
| 2508 | 2346 | ||
| 2509 | static void __irq_complete_move(struct irq_desc **descp, unsigned vector) | 2347 | static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) |
| 2510 | { | 2348 | { |
| 2511 | struct irq_desc *desc = *descp; | ||
| 2512 | struct irq_cfg *cfg = desc->chip_data; | ||
| 2513 | unsigned me; | 2349 | unsigned me; |
| 2514 | 2350 | ||
| 2515 | if (likely(!cfg->move_in_progress)) | 2351 | if (likely(!cfg->move_in_progress)) |
| @@ -2521,31 +2357,28 @@ static void __irq_complete_move(struct irq_desc **descp, unsigned vector) | |||
| 2521 | send_cleanup_vector(cfg); | 2357 | send_cleanup_vector(cfg); |
| 2522 | } | 2358 | } |
| 2523 | 2359 | ||
| 2524 | static void irq_complete_move(struct irq_desc **descp) | 2360 | static void irq_complete_move(struct irq_cfg *cfg) |
| 2525 | { | 2361 | { |
| 2526 | __irq_complete_move(descp, ~get_irq_regs()->orig_ax); | 2362 | __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); |
| 2527 | } | 2363 | } |
| 2528 | 2364 | ||
| 2529 | void irq_force_complete_move(int irq) | 2365 | void irq_force_complete_move(int irq) |
| 2530 | { | 2366 | { |
| 2531 | struct irq_desc *desc = irq_to_desc(irq); | 2367 | struct irq_cfg *cfg = get_irq_chip_data(irq); |
| 2532 | struct irq_cfg *cfg = desc->chip_data; | ||
| 2533 | 2368 | ||
| 2534 | if (!cfg) | 2369 | if (!cfg) |
| 2535 | return; | 2370 | return; |
| 2536 | 2371 | ||
| 2537 | __irq_complete_move(&desc, cfg->vector); | 2372 | __irq_complete_move(cfg, cfg->vector); |
| 2538 | } | 2373 | } |
| 2539 | #else | 2374 | #else |
| 2540 | static inline void irq_complete_move(struct irq_desc **descp) {} | 2375 | static inline void irq_complete_move(struct irq_cfg *cfg) { } |
| 2541 | #endif | 2376 | #endif |
| 2542 | 2377 | ||
| 2543 | static void ack_apic_edge(unsigned int irq) | 2378 | static void ack_apic_edge(struct irq_data *data) |
| 2544 | { | 2379 | { |
| 2545 | struct irq_desc *desc = irq_to_desc(irq); | 2380 | irq_complete_move(data->chip_data); |
| 2546 | 2381 | move_native_irq(data->irq); | |
| 2547 | irq_complete_move(&desc); | ||
| 2548 | move_native_irq(irq); | ||
| 2549 | ack_APIC_irq(); | 2382 | ack_APIC_irq(); |
| 2550 | } | 2383 | } |
| 2551 | 2384 | ||
| @@ -2567,10 +2400,12 @@ atomic_t irq_mis_count; | |||
| 2567 | * Otherwise, we simulate the EOI message manually by changing the trigger | 2400 | * Otherwise, we simulate the EOI message manually by changing the trigger |
| 2568 | * mode to edge and then back to level, with RTE being masked during this. | 2401 | * mode to edge and then back to level, with RTE being masked during this. |
| 2569 | */ | 2402 | */ |
| 2570 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | 2403 | static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) |
| 2571 | { | 2404 | { |
| 2572 | struct irq_pin_list *entry; | 2405 | struct irq_pin_list *entry; |
| 2406 | unsigned long flags; | ||
| 2573 | 2407 | ||
| 2408 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2574 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 2409 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
| 2575 | if (mp_ioapics[entry->apic].apicver >= 0x20) { | 2410 | if (mp_ioapics[entry->apic].apicver >= 0x20) { |
| 2576 | /* | 2411 | /* |
| @@ -2579,7 +2414,7 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | |||
| 2579 | * intr-remapping table entry. Hence for the io-apic | 2414 | * intr-remapping table entry. Hence for the io-apic |
| 2580 | * EOI we use the pin number. | 2415 | * EOI we use the pin number. |
| 2581 | */ | 2416 | */ |
| 2582 | if (irq_remapped(irq)) | 2417 | if (irq_remapped(cfg)) |
| 2583 | io_apic_eoi(entry->apic, entry->pin); | 2418 | io_apic_eoi(entry->apic, entry->pin); |
| 2584 | else | 2419 | else |
| 2585 | io_apic_eoi(entry->apic, cfg->vector); | 2420 | io_apic_eoi(entry->apic, cfg->vector); |
| @@ -2588,36 +2423,22 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | |||
| 2588 | __unmask_and_level_IO_APIC_irq(entry); | 2423 | __unmask_and_level_IO_APIC_irq(entry); |
| 2589 | } | 2424 | } |
| 2590 | } | 2425 | } |
| 2591 | } | ||
| 2592 | |||
| 2593 | static void eoi_ioapic_irq(struct irq_desc *desc) | ||
| 2594 | { | ||
| 2595 | struct irq_cfg *cfg; | ||
| 2596 | unsigned long flags; | ||
| 2597 | unsigned int irq; | ||
| 2598 | |||
| 2599 | irq = desc->irq; | ||
| 2600 | cfg = desc->chip_data; | ||
| 2601 | |||
| 2602 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2603 | __eoi_ioapic_irq(irq, cfg); | ||
| 2604 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2426 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2605 | } | 2427 | } |
| 2606 | 2428 | ||
| 2607 | static void ack_apic_level(unsigned int irq) | 2429 | static void ack_apic_level(struct irq_data *data) |
| 2608 | { | 2430 | { |
| 2431 | struct irq_cfg *cfg = data->chip_data; | ||
| 2432 | int i, do_unmask_irq = 0, irq = data->irq; | ||
| 2609 | struct irq_desc *desc = irq_to_desc(irq); | 2433 | struct irq_desc *desc = irq_to_desc(irq); |
| 2610 | unsigned long v; | 2434 | unsigned long v; |
| 2611 | int i; | ||
| 2612 | struct irq_cfg *cfg; | ||
| 2613 | int do_unmask_irq = 0; | ||
| 2614 | 2435 | ||
| 2615 | irq_complete_move(&desc); | 2436 | irq_complete_move(cfg); |
| 2616 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2437 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 2617 | /* If we are moving the irq we need to mask it */ | 2438 | /* If we are moving the irq we need to mask it */ |
| 2618 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { | 2439 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { |
| 2619 | do_unmask_irq = 1; | 2440 | do_unmask_irq = 1; |
| 2620 | mask_IO_APIC_irq_desc(desc); | 2441 | mask_ioapic(cfg); |
| 2621 | } | 2442 | } |
| 2622 | #endif | 2443 | #endif |
| 2623 | 2444 | ||
| @@ -2653,7 +2474,6 @@ static void ack_apic_level(unsigned int irq) | |||
| 2653 | * we use the above logic (mask+edge followed by unmask+level) from | 2474 | * we use the above logic (mask+edge followed by unmask+level) from |
| 2654 | * Manfred Spraul to clear the remote IRR. | 2475 | * Manfred Spraul to clear the remote IRR. |
| 2655 | */ | 2476 | */ |
| 2656 | cfg = desc->chip_data; | ||
| 2657 | i = cfg->vector; | 2477 | i = cfg->vector; |
| 2658 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | 2478 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
| 2659 | 2479 | ||
| @@ -2673,7 +2493,7 @@ static void ack_apic_level(unsigned int irq) | |||
| 2673 | if (!(v & (1 << (i & 0x1f)))) { | 2493 | if (!(v & (1 << (i & 0x1f)))) { |
| 2674 | atomic_inc(&irq_mis_count); | 2494 | atomic_inc(&irq_mis_count); |
| 2675 | 2495 | ||
| 2676 | eoi_ioapic_irq(desc); | 2496 | eoi_ioapic_irq(irq, cfg); |
| 2677 | } | 2497 | } |
| 2678 | 2498 | ||
| 2679 | /* Now we can move and renable the irq */ | 2499 | /* Now we can move and renable the irq */ |
| @@ -2704,61 +2524,57 @@ static void ack_apic_level(unsigned int irq) | |||
| 2704 | * accurate and is causing problems then it is a hardware bug | 2524 | * accurate and is causing problems then it is a hardware bug |
| 2705 | * and you can go talk to the chipset vendor about it. | 2525 | * and you can go talk to the chipset vendor about it. |
| 2706 | */ | 2526 | */ |
| 2707 | cfg = desc->chip_data; | ||
| 2708 | if (!io_apic_level_ack_pending(cfg)) | 2527 | if (!io_apic_level_ack_pending(cfg)) |
| 2709 | move_masked_irq(irq); | 2528 | move_masked_irq(irq); |
| 2710 | unmask_IO_APIC_irq_desc(desc); | 2529 | unmask_ioapic(cfg); |
| 2711 | } | 2530 | } |
| 2712 | } | 2531 | } |
| 2713 | 2532 | ||
| 2714 | #ifdef CONFIG_INTR_REMAP | 2533 | #ifdef CONFIG_INTR_REMAP |
| 2715 | static void ir_ack_apic_edge(unsigned int irq) | 2534 | static void ir_ack_apic_edge(struct irq_data *data) |
| 2716 | { | 2535 | { |
| 2717 | ack_APIC_irq(); | 2536 | ack_APIC_irq(); |
| 2718 | } | 2537 | } |
| 2719 | 2538 | ||
| 2720 | static void ir_ack_apic_level(unsigned int irq) | 2539 | static void ir_ack_apic_level(struct irq_data *data) |
| 2721 | { | 2540 | { |
| 2722 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 2723 | |||
| 2724 | ack_APIC_irq(); | 2541 | ack_APIC_irq(); |
| 2725 | eoi_ioapic_irq(desc); | 2542 | eoi_ioapic_irq(data->irq, data->chip_data); |
| 2726 | } | 2543 | } |
| 2727 | #endif /* CONFIG_INTR_REMAP */ | 2544 | #endif /* CONFIG_INTR_REMAP */ |
| 2728 | 2545 | ||
| 2729 | static struct irq_chip ioapic_chip __read_mostly = { | 2546 | static struct irq_chip ioapic_chip __read_mostly = { |
| 2730 | .name = "IO-APIC", | 2547 | .name = "IO-APIC", |
| 2731 | .startup = startup_ioapic_irq, | 2548 | .irq_startup = startup_ioapic_irq, |
| 2732 | .mask = mask_IO_APIC_irq, | 2549 | .irq_mask = mask_ioapic_irq, |
| 2733 | .unmask = unmask_IO_APIC_irq, | 2550 | .irq_unmask = unmask_ioapic_irq, |
| 2734 | .ack = ack_apic_edge, | 2551 | .irq_ack = ack_apic_edge, |
| 2735 | .eoi = ack_apic_level, | 2552 | .irq_eoi = ack_apic_level, |
| 2736 | #ifdef CONFIG_SMP | 2553 | #ifdef CONFIG_SMP |
| 2737 | .set_affinity = set_ioapic_affinity_irq, | 2554 | .irq_set_affinity = ioapic_set_affinity, |
| 2738 | #endif | 2555 | #endif |
| 2739 | .retrigger = ioapic_retrigger_irq, | 2556 | .irq_retrigger = ioapic_retrigger_irq, |
| 2740 | }; | 2557 | }; |
| 2741 | 2558 | ||
| 2742 | static struct irq_chip ir_ioapic_chip __read_mostly = { | 2559 | static struct irq_chip ir_ioapic_chip __read_mostly = { |
| 2743 | .name = "IR-IO-APIC", | 2560 | .name = "IR-IO-APIC", |
| 2744 | .startup = startup_ioapic_irq, | 2561 | .irq_startup = startup_ioapic_irq, |
| 2745 | .mask = mask_IO_APIC_irq, | 2562 | .irq_mask = mask_ioapic_irq, |
| 2746 | .unmask = unmask_IO_APIC_irq, | 2563 | .irq_unmask = unmask_ioapic_irq, |
| 2747 | #ifdef CONFIG_INTR_REMAP | 2564 | #ifdef CONFIG_INTR_REMAP |
| 2748 | .ack = ir_ack_apic_edge, | 2565 | .irq_ack = ir_ack_apic_edge, |
| 2749 | .eoi = ir_ack_apic_level, | 2566 | .irq_eoi = ir_ack_apic_level, |
| 2750 | #ifdef CONFIG_SMP | 2567 | #ifdef CONFIG_SMP |
| 2751 | .set_affinity = set_ir_ioapic_affinity_irq, | 2568 | .irq_set_affinity = ir_ioapic_set_affinity, |
| 2752 | #endif | 2569 | #endif |
| 2753 | #endif | 2570 | #endif |
| 2754 | .retrigger = ioapic_retrigger_irq, | 2571 | .irq_retrigger = ioapic_retrigger_irq, |
| 2755 | }; | 2572 | }; |
| 2756 | 2573 | ||
| 2757 | static inline void init_IO_APIC_traps(void) | 2574 | static inline void init_IO_APIC_traps(void) |
| 2758 | { | 2575 | { |
| 2759 | int irq; | ||
| 2760 | struct irq_desc *desc; | ||
| 2761 | struct irq_cfg *cfg; | 2576 | struct irq_cfg *cfg; |
| 2577 | unsigned int irq; | ||
| 2762 | 2578 | ||
| 2763 | /* | 2579 | /* |
| 2764 | * NOTE! The local APIC isn't very good at handling | 2580 | * NOTE! The local APIC isn't very good at handling |
| @@ -2771,8 +2587,8 @@ static inline void init_IO_APIC_traps(void) | |||
| 2771 | * Also, we've got to be careful not to trash gate | 2587 | * Also, we've got to be careful not to trash gate |
| 2772 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 2588 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
| 2773 | */ | 2589 | */ |
| 2774 | for_each_irq_desc(irq, desc) { | 2590 | for_each_active_irq(irq) { |
| 2775 | cfg = desc->chip_data; | 2591 | cfg = get_irq_chip_data(irq); |
| 2776 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { | 2592 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { |
| 2777 | /* | 2593 | /* |
| 2778 | * Hmm.. We don't have an entry for this, | 2594 | * Hmm.. We don't have an entry for this, |
| @@ -2783,7 +2599,7 @@ static inline void init_IO_APIC_traps(void) | |||
| 2783 | legacy_pic->make_irq(irq); | 2599 | legacy_pic->make_irq(irq); |
| 2784 | else | 2600 | else |
| 2785 | /* Strange. Oh, well.. */ | 2601 | /* Strange. Oh, well.. */ |
| 2786 | desc->chip = &no_irq_chip; | 2602 | set_irq_chip(irq, &no_irq_chip); |
| 2787 | } | 2603 | } |
| 2788 | } | 2604 | } |
| 2789 | } | 2605 | } |
| @@ -2792,7 +2608,7 @@ static inline void init_IO_APIC_traps(void) | |||
| 2792 | * The local APIC irq-chip implementation: | 2608 | * The local APIC irq-chip implementation: |
| 2793 | */ | 2609 | */ |
| 2794 | 2610 | ||
| 2795 | static void mask_lapic_irq(unsigned int irq) | 2611 | static void mask_lapic_irq(struct irq_data *data) |
| 2796 | { | 2612 | { |
| 2797 | unsigned long v; | 2613 | unsigned long v; |
| 2798 | 2614 | ||
| @@ -2800,7 +2616,7 @@ static void mask_lapic_irq(unsigned int irq) | |||
| 2800 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); | 2616 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
| 2801 | } | 2617 | } |
| 2802 | 2618 | ||
| 2803 | static void unmask_lapic_irq(unsigned int irq) | 2619 | static void unmask_lapic_irq(struct irq_data *data) |
| 2804 | { | 2620 | { |
| 2805 | unsigned long v; | 2621 | unsigned long v; |
| 2806 | 2622 | ||
| @@ -2808,21 +2624,21 @@ static void unmask_lapic_irq(unsigned int irq) | |||
| 2808 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2624 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
| 2809 | } | 2625 | } |
| 2810 | 2626 | ||
| 2811 | static void ack_lapic_irq(unsigned int irq) | 2627 | static void ack_lapic_irq(struct irq_data *data) |
| 2812 | { | 2628 | { |
| 2813 | ack_APIC_irq(); | 2629 | ack_APIC_irq(); |
| 2814 | } | 2630 | } |
| 2815 | 2631 | ||
| 2816 | static struct irq_chip lapic_chip __read_mostly = { | 2632 | static struct irq_chip lapic_chip __read_mostly = { |
| 2817 | .name = "local-APIC", | 2633 | .name = "local-APIC", |
| 2818 | .mask = mask_lapic_irq, | 2634 | .irq_mask = mask_lapic_irq, |
| 2819 | .unmask = unmask_lapic_irq, | 2635 | .irq_unmask = unmask_lapic_irq, |
| 2820 | .ack = ack_lapic_irq, | 2636 | .irq_ack = ack_lapic_irq, |
| 2821 | }; | 2637 | }; |
| 2822 | 2638 | ||
| 2823 | static void lapic_register_intr(int irq, struct irq_desc *desc) | 2639 | static void lapic_register_intr(int irq) |
| 2824 | { | 2640 | { |
| 2825 | desc->status &= ~IRQ_LEVEL; | 2641 | irq_clear_status_flags(irq, IRQ_LEVEL); |
| 2826 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | 2642 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, |
| 2827 | "edge"); | 2643 | "edge"); |
| 2828 | } | 2644 | } |
| @@ -2925,9 +2741,8 @@ int timer_through_8259 __initdata; | |||
| 2925 | */ | 2741 | */ |
| 2926 | static inline void __init check_timer(void) | 2742 | static inline void __init check_timer(void) |
| 2927 | { | 2743 | { |
| 2928 | struct irq_desc *desc = irq_to_desc(0); | 2744 | struct irq_cfg *cfg = get_irq_chip_data(0); |
| 2929 | struct irq_cfg *cfg = desc->chip_data; | 2745 | int node = cpu_to_node(0); |
| 2930 | int node = cpu_to_node(boot_cpu_id); | ||
| 2931 | int apic1, pin1, apic2, pin2; | 2746 | int apic1, pin1, apic2, pin2; |
| 2932 | unsigned long flags; | 2747 | unsigned long flags; |
| 2933 | int no_pin1 = 0; | 2748 | int no_pin1 = 0; |
| @@ -2937,7 +2752,7 @@ static inline void __init check_timer(void) | |||
| 2937 | /* | 2752 | /* |
| 2938 | * get/set the timer IRQ vector: | 2753 | * get/set the timer IRQ vector: |
| 2939 | */ | 2754 | */ |
| 2940 | legacy_pic->chip->mask(0); | 2755 | legacy_pic->mask(0); |
| 2941 | assign_irq_vector(0, cfg, apic->target_cpus()); | 2756 | assign_irq_vector(0, cfg, apic->target_cpus()); |
| 2942 | 2757 | ||
| 2943 | /* | 2758 | /* |
| @@ -2996,7 +2811,7 @@ static inline void __init check_timer(void) | |||
| 2996 | add_pin_to_irq_node(cfg, node, apic1, pin1); | 2811 | add_pin_to_irq_node(cfg, node, apic1, pin1); |
| 2997 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2812 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
| 2998 | } else { | 2813 | } else { |
| 2999 | /* for edge trigger, setup_IO_APIC_irq already | 2814 | /* for edge trigger, setup_ioapic_irq already |
| 3000 | * leave it unmasked. | 2815 | * leave it unmasked. |
| 3001 | * so only need to unmask if it is level-trigger | 2816 | * so only need to unmask if it is level-trigger |
| 3002 | * do we really have level trigger timer? | 2817 | * do we really have level trigger timer? |
| @@ -3004,12 +2819,12 @@ static inline void __init check_timer(void) | |||
| 3004 | int idx; | 2819 | int idx; |
| 3005 | idx = find_irq_entry(apic1, pin1, mp_INT); | 2820 | idx = find_irq_entry(apic1, pin1, mp_INT); |
| 3006 | if (idx != -1 && irq_trigger(idx)) | 2821 | if (idx != -1 && irq_trigger(idx)) |
| 3007 | unmask_IO_APIC_irq_desc(desc); | 2822 | unmask_ioapic(cfg); |
| 3008 | } | 2823 | } |
| 3009 | if (timer_irq_works()) { | 2824 | if (timer_irq_works()) { |
| 3010 | if (nmi_watchdog == NMI_IO_APIC) { | 2825 | if (nmi_watchdog == NMI_IO_APIC) { |
| 3011 | setup_nmi(); | 2826 | setup_nmi(); |
| 3012 | legacy_pic->chip->unmask(0); | 2827 | legacy_pic->unmask(0); |
| 3013 | } | 2828 | } |
| 3014 | if (disable_timer_pin_1 > 0) | 2829 | if (disable_timer_pin_1 > 0) |
| 3015 | clear_IO_APIC_pin(0, pin1); | 2830 | clear_IO_APIC_pin(0, pin1); |
| @@ -3032,14 +2847,14 @@ static inline void __init check_timer(void) | |||
| 3032 | */ | 2847 | */ |
| 3033 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); | 2848 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); |
| 3034 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 2849 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
| 3035 | legacy_pic->chip->unmask(0); | 2850 | legacy_pic->unmask(0); |
| 3036 | if (timer_irq_works()) { | 2851 | if (timer_irq_works()) { |
| 3037 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2852 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
| 3038 | timer_through_8259 = 1; | 2853 | timer_through_8259 = 1; |
| 3039 | if (nmi_watchdog == NMI_IO_APIC) { | 2854 | if (nmi_watchdog == NMI_IO_APIC) { |
| 3040 | legacy_pic->chip->mask(0); | 2855 | legacy_pic->mask(0); |
| 3041 | setup_nmi(); | 2856 | setup_nmi(); |
| 3042 | legacy_pic->chip->unmask(0); | 2857 | legacy_pic->unmask(0); |
| 3043 | } | 2858 | } |
| 3044 | goto out; | 2859 | goto out; |
| 3045 | } | 2860 | } |
| @@ -3047,7 +2862,7 @@ static inline void __init check_timer(void) | |||
| 3047 | * Cleanup, just in case ... | 2862 | * Cleanup, just in case ... |
| 3048 | */ | 2863 | */ |
| 3049 | local_irq_disable(); | 2864 | local_irq_disable(); |
| 3050 | legacy_pic->chip->mask(0); | 2865 | legacy_pic->mask(0); |
| 3051 | clear_IO_APIC_pin(apic2, pin2); | 2866 | clear_IO_APIC_pin(apic2, pin2); |
| 3052 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 2867 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
| 3053 | } | 2868 | } |
| @@ -3064,16 +2879,16 @@ static inline void __init check_timer(void) | |||
| 3064 | apic_printk(APIC_QUIET, KERN_INFO | 2879 | apic_printk(APIC_QUIET, KERN_INFO |
| 3065 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 2880 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
| 3066 | 2881 | ||
| 3067 | lapic_register_intr(0, desc); | 2882 | lapic_register_intr(0); |
| 3068 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 2883 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
| 3069 | legacy_pic->chip->unmask(0); | 2884 | legacy_pic->unmask(0); |
| 3070 | 2885 | ||
| 3071 | if (timer_irq_works()) { | 2886 | if (timer_irq_works()) { |
| 3072 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 2887 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
| 3073 | goto out; | 2888 | goto out; |
| 3074 | } | 2889 | } |
| 3075 | local_irq_disable(); | 2890 | local_irq_disable(); |
| 3076 | legacy_pic->chip->mask(0); | 2891 | legacy_pic->mask(0); |
| 3077 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 2892 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
| 3078 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | 2893 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
| 3079 | 2894 | ||
| @@ -3239,49 +3054,42 @@ device_initcall(ioapic_init_sysfs); | |||
| 3239 | /* | 3054 | /* |
| 3240 | * Dynamic irq allocate and deallocation | 3055 | * Dynamic irq allocate and deallocation |
| 3241 | */ | 3056 | */ |
| 3242 | unsigned int create_irq_nr(unsigned int irq_want, int node) | 3057 | unsigned int create_irq_nr(unsigned int from, int node) |
| 3243 | { | 3058 | { |
| 3244 | /* Allocate an unused irq */ | 3059 | struct irq_cfg *cfg; |
| 3245 | unsigned int irq; | ||
| 3246 | unsigned int new; | ||
| 3247 | unsigned long flags; | 3060 | unsigned long flags; |
| 3248 | struct irq_cfg *cfg_new = NULL; | 3061 | unsigned int ret = 0; |
| 3249 | struct irq_desc *desc_new = NULL; | 3062 | int irq; |
| 3250 | |||
| 3251 | irq = 0; | ||
| 3252 | if (irq_want < nr_irqs_gsi) | ||
| 3253 | irq_want = nr_irqs_gsi; | ||
| 3254 | |||
| 3255 | raw_spin_lock_irqsave(&vector_lock, flags); | ||
| 3256 | for (new = irq_want; new < nr_irqs; new++) { | ||
| 3257 | desc_new = irq_to_desc_alloc_node(new, node); | ||
| 3258 | if (!desc_new) { | ||
| 3259 | printk(KERN_INFO "can not get irq_desc for %d\n", new); | ||
| 3260 | continue; | ||
| 3261 | } | ||
| 3262 | cfg_new = desc_new->chip_data; | ||
| 3263 | |||
| 3264 | if (cfg_new->vector != 0) | ||
| 3265 | continue; | ||
| 3266 | 3063 | ||
| 3267 | desc_new = move_irq_desc(desc_new, node); | 3064 | if (from < nr_irqs_gsi) |
| 3268 | cfg_new = desc_new->chip_data; | 3065 | from = nr_irqs_gsi; |
| 3269 | 3066 | ||
| 3270 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) | 3067 | irq = alloc_irq_from(from, node); |
| 3271 | irq = new; | 3068 | if (irq < 0) |
| 3272 | break; | 3069 | return 0; |
| 3070 | cfg = alloc_irq_cfg(irq, node); | ||
| 3071 | if (!cfg) { | ||
| 3072 | free_irq_at(irq, NULL); | ||
| 3073 | return 0; | ||
| 3273 | } | 3074 | } |
| 3274 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
| 3275 | 3075 | ||
| 3276 | if (irq > 0) | 3076 | raw_spin_lock_irqsave(&vector_lock, flags); |
| 3277 | dynamic_irq_init_keep_chip_data(irq); | 3077 | if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) |
| 3078 | ret = irq; | ||
| 3079 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
| 3278 | 3080 | ||
| 3279 | return irq; | 3081 | if (ret) { |
| 3082 | set_irq_chip_data(irq, cfg); | ||
| 3083 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | ||
| 3084 | } else { | ||
| 3085 | free_irq_at(irq, cfg); | ||
| 3086 | } | ||
| 3087 | return ret; | ||
| 3280 | } | 3088 | } |
| 3281 | 3089 | ||
| 3282 | int create_irq(void) | 3090 | int create_irq(void) |
| 3283 | { | 3091 | { |
| 3284 | int node = cpu_to_node(boot_cpu_id); | 3092 | int node = cpu_to_node(0); |
| 3285 | unsigned int irq_want; | 3093 | unsigned int irq_want; |
| 3286 | int irq; | 3094 | int irq; |
| 3287 | 3095 | ||
| @@ -3296,14 +3104,17 @@ int create_irq(void) | |||
| 3296 | 3104 | ||
| 3297 | void destroy_irq(unsigned int irq) | 3105 | void destroy_irq(unsigned int irq) |
| 3298 | { | 3106 | { |
| 3107 | struct irq_cfg *cfg = get_irq_chip_data(irq); | ||
| 3299 | unsigned long flags; | 3108 | unsigned long flags; |
| 3300 | 3109 | ||
| 3301 | dynamic_irq_cleanup_keep_chip_data(irq); | 3110 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); |
| 3302 | 3111 | ||
| 3303 | free_irte(irq); | 3112 | if (intr_remapping_enabled) |
| 3113 | free_irte(irq); | ||
| 3304 | raw_spin_lock_irqsave(&vector_lock, flags); | 3114 | raw_spin_lock_irqsave(&vector_lock, flags); |
| 3305 | __clear_irq_vector(irq, get_irq_chip_data(irq)); | 3115 | __clear_irq_vector(irq, cfg); |
| 3306 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 3116 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
| 3117 | free_irq_at(irq, cfg); | ||
| 3307 | } | 3118 | } |
| 3308 | 3119 | ||
| 3309 | /* | 3120 | /* |
| @@ -3327,7 +3138,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
| 3327 | 3138 | ||
| 3328 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); | 3139 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); |
| 3329 | 3140 | ||
| 3330 | if (irq_remapped(irq)) { | 3141 | if (irq_remapped(get_irq_chip_data(irq))) { |
| 3331 | struct irte irte; | 3142 | struct irte irte; |
| 3332 | int ir_index; | 3143 | int ir_index; |
| 3333 | u16 sub_handle; | 3144 | u16 sub_handle; |
| @@ -3335,14 +3146,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
| 3335 | ir_index = map_irq_to_irte_handle(irq, &sub_handle); | 3146 | ir_index = map_irq_to_irte_handle(irq, &sub_handle); |
| 3336 | BUG_ON(ir_index == -1); | 3147 | BUG_ON(ir_index == -1); |
| 3337 | 3148 | ||
| 3338 | memset (&irte, 0, sizeof(irte)); | 3149 | prepare_irte(&irte, cfg->vector, dest); |
| 3339 | |||
| 3340 | irte.present = 1; | ||
| 3341 | irte.dst_mode = apic->irq_dest_mode; | ||
| 3342 | irte.trigger_mode = 0; /* edge */ | ||
| 3343 | irte.dlvry_mode = apic->irq_delivery_mode; | ||
| 3344 | irte.vector = cfg->vector; | ||
| 3345 | irte.dest_id = IRTE_DEST(dest); | ||
| 3346 | 3150 | ||
| 3347 | /* Set source-id of interrupt request */ | 3151 | /* Set source-id of interrupt request */ |
| 3348 | if (pdev) | 3152 | if (pdev) |
| @@ -3387,26 +3191,24 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
| 3387 | } | 3191 | } |
| 3388 | 3192 | ||
| 3389 | #ifdef CONFIG_SMP | 3193 | #ifdef CONFIG_SMP |
| 3390 | static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3194 | static int |
| 3195 | msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | ||
| 3391 | { | 3196 | { |
| 3392 | struct irq_desc *desc = irq_to_desc(irq); | 3197 | struct irq_cfg *cfg = data->chip_data; |
| 3393 | struct irq_cfg *cfg; | ||
| 3394 | struct msi_msg msg; | 3198 | struct msi_msg msg; |
| 3395 | unsigned int dest; | 3199 | unsigned int dest; |
| 3396 | 3200 | ||
| 3397 | if (set_desc_affinity(desc, mask, &dest)) | 3201 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3398 | return -1; | 3202 | return -1; |
| 3399 | 3203 | ||
| 3400 | cfg = desc->chip_data; | 3204 | __get_cached_msi_msg(data->msi_desc, &msg); |
| 3401 | |||
| 3402 | get_cached_msi_msg_desc(desc, &msg); | ||
| 3403 | 3205 | ||
| 3404 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3206 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
| 3405 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3207 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
| 3406 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3208 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
| 3407 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3209 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
| 3408 | 3210 | ||
| 3409 | write_msi_msg_desc(desc, &msg); | 3211 | __write_msi_msg(data->msi_desc, &msg); |
| 3410 | 3212 | ||
| 3411 | return 0; | 3213 | return 0; |
| 3412 | } | 3214 | } |
| @@ -3416,17 +3218,17 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3416 | * done in the process context using interrupt-remapping hardware. | 3218 | * done in the process context using interrupt-remapping hardware. |
| 3417 | */ | 3219 | */ |
| 3418 | static int | 3220 | static int |
| 3419 | ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3221 | ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 3222 | bool force) | ||
| 3420 | { | 3223 | { |
| 3421 | struct irq_desc *desc = irq_to_desc(irq); | 3224 | struct irq_cfg *cfg = data->chip_data; |
| 3422 | struct irq_cfg *cfg = desc->chip_data; | 3225 | unsigned int dest, irq = data->irq; |
| 3423 | unsigned int dest; | ||
| 3424 | struct irte irte; | 3226 | struct irte irte; |
| 3425 | 3227 | ||
| 3426 | if (get_irte(irq, &irte)) | 3228 | if (get_irte(irq, &irte)) |
| 3427 | return -1; | 3229 | return -1; |
| 3428 | 3230 | ||
| 3429 | if (set_desc_affinity(desc, mask, &dest)) | 3231 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3430 | return -1; | 3232 | return -1; |
| 3431 | 3233 | ||
| 3432 | irte.vector = cfg->vector; | 3234 | irte.vector = cfg->vector; |
| @@ -3456,27 +3258,27 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3456 | * which implement the MSI or MSI-X Capability Structure. | 3258 | * which implement the MSI or MSI-X Capability Structure. |
| 3457 | */ | 3259 | */ |
| 3458 | static struct irq_chip msi_chip = { | 3260 | static struct irq_chip msi_chip = { |
| 3459 | .name = "PCI-MSI", | 3261 | .name = "PCI-MSI", |
| 3460 | .unmask = unmask_msi_irq, | 3262 | .irq_unmask = unmask_msi_irq, |
| 3461 | .mask = mask_msi_irq, | 3263 | .irq_mask = mask_msi_irq, |
| 3462 | .ack = ack_apic_edge, | 3264 | .irq_ack = ack_apic_edge, |
| 3463 | #ifdef CONFIG_SMP | 3265 | #ifdef CONFIG_SMP |
| 3464 | .set_affinity = set_msi_irq_affinity, | 3266 | .irq_set_affinity = msi_set_affinity, |
| 3465 | #endif | 3267 | #endif |
| 3466 | .retrigger = ioapic_retrigger_irq, | 3268 | .irq_retrigger = ioapic_retrigger_irq, |
| 3467 | }; | 3269 | }; |
| 3468 | 3270 | ||
| 3469 | static struct irq_chip msi_ir_chip = { | 3271 | static struct irq_chip msi_ir_chip = { |
| 3470 | .name = "IR-PCI-MSI", | 3272 | .name = "IR-PCI-MSI", |
| 3471 | .unmask = unmask_msi_irq, | 3273 | .irq_unmask = unmask_msi_irq, |
| 3472 | .mask = mask_msi_irq, | 3274 | .irq_mask = mask_msi_irq, |
| 3473 | #ifdef CONFIG_INTR_REMAP | 3275 | #ifdef CONFIG_INTR_REMAP |
| 3474 | .ack = ir_ack_apic_edge, | 3276 | .irq_ack = ir_ack_apic_edge, |
| 3475 | #ifdef CONFIG_SMP | 3277 | #ifdef CONFIG_SMP |
| 3476 | .set_affinity = ir_set_msi_irq_affinity, | 3278 | .irq_set_affinity = ir_msi_set_affinity, |
| 3477 | #endif | 3279 | #endif |
| 3478 | #endif | 3280 | #endif |
| 3479 | .retrigger = ioapic_retrigger_irq, | 3281 | .irq_retrigger = ioapic_retrigger_irq, |
| 3480 | }; | 3282 | }; |
| 3481 | 3283 | ||
| 3482 | /* | 3284 | /* |
| @@ -3508,8 +3310,8 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | |||
| 3508 | 3310 | ||
| 3509 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | 3311 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) |
| 3510 | { | 3312 | { |
| 3511 | int ret; | ||
| 3512 | struct msi_msg msg; | 3313 | struct msi_msg msg; |
| 3314 | int ret; | ||
| 3513 | 3315 | ||
| 3514 | ret = msi_compose_msg(dev, irq, &msg, -1); | 3316 | ret = msi_compose_msg(dev, irq, &msg, -1); |
| 3515 | if (ret < 0) | 3317 | if (ret < 0) |
| @@ -3518,12 +3320,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
| 3518 | set_irq_msi(irq, msidesc); | 3320 | set_irq_msi(irq, msidesc); |
| 3519 | write_msi_msg(irq, &msg); | 3321 | write_msi_msg(irq, &msg); |
| 3520 | 3322 | ||
| 3521 | if (irq_remapped(irq)) { | 3323 | if (irq_remapped(get_irq_chip_data(irq))) { |
| 3522 | struct irq_desc *desc = irq_to_desc(irq); | 3324 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 3523 | /* | ||
| 3524 | * irq migration in process context | ||
| 3525 | */ | ||
| 3526 | desc->status |= IRQ_MOVE_PCNTXT; | ||
| 3527 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); | 3325 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); |
| 3528 | } else | 3326 | } else |
| 3529 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | 3327 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); |
| @@ -3535,13 +3333,10 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
| 3535 | 3333 | ||
| 3536 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 3334 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
| 3537 | { | 3335 | { |
| 3538 | unsigned int irq; | 3336 | int node, ret, sub_handle, index = 0; |
| 3539 | int ret, sub_handle; | 3337 | unsigned int irq, irq_want; |
| 3540 | struct msi_desc *msidesc; | 3338 | struct msi_desc *msidesc; |
| 3541 | unsigned int irq_want; | ||
| 3542 | struct intel_iommu *iommu = NULL; | 3339 | struct intel_iommu *iommu = NULL; |
| 3543 | int index = 0; | ||
| 3544 | int node; | ||
| 3545 | 3340 | ||
| 3546 | /* x86 doesn't support multiple MSI yet */ | 3341 | /* x86 doesn't support multiple MSI yet */ |
| 3547 | if (type == PCI_CAP_ID_MSI && nvec > 1) | 3342 | if (type == PCI_CAP_ID_MSI && nvec > 1) |
| @@ -3601,18 +3396,17 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
| 3601 | 3396 | ||
| 3602 | #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) | 3397 | #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) |
| 3603 | #ifdef CONFIG_SMP | 3398 | #ifdef CONFIG_SMP |
| 3604 | static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | 3399 | static int |
| 3400 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
| 3401 | bool force) | ||
| 3605 | { | 3402 | { |
| 3606 | struct irq_desc *desc = irq_to_desc(irq); | 3403 | struct irq_cfg *cfg = data->chip_data; |
| 3607 | struct irq_cfg *cfg; | 3404 | unsigned int dest, irq = data->irq; |
| 3608 | struct msi_msg msg; | 3405 | struct msi_msg msg; |
| 3609 | unsigned int dest; | ||
| 3610 | 3406 | ||
| 3611 | if (set_desc_affinity(desc, mask, &dest)) | 3407 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3612 | return -1; | 3408 | return -1; |
| 3613 | 3409 | ||
| 3614 | cfg = desc->chip_data; | ||
| 3615 | |||
| 3616 | dmar_msi_read(irq, &msg); | 3410 | dmar_msi_read(irq, &msg); |
| 3617 | 3411 | ||
| 3618 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3412 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
| @@ -3628,14 +3422,14 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3628 | #endif /* CONFIG_SMP */ | 3422 | #endif /* CONFIG_SMP */ |
| 3629 | 3423 | ||
| 3630 | static struct irq_chip dmar_msi_type = { | 3424 | static struct irq_chip dmar_msi_type = { |
| 3631 | .name = "DMAR_MSI", | 3425 | .name = "DMAR_MSI", |
| 3632 | .unmask = dmar_msi_unmask, | 3426 | .irq_unmask = dmar_msi_unmask, |
| 3633 | .mask = dmar_msi_mask, | 3427 | .irq_mask = dmar_msi_mask, |
| 3634 | .ack = ack_apic_edge, | 3428 | .irq_ack = ack_apic_edge, |
| 3635 | #ifdef CONFIG_SMP | 3429 | #ifdef CONFIG_SMP |
| 3636 | .set_affinity = dmar_msi_set_affinity, | 3430 | .irq_set_affinity = dmar_msi_set_affinity, |
| 3637 | #endif | 3431 | #endif |
| 3638 | .retrigger = ioapic_retrigger_irq, | 3432 | .irq_retrigger = ioapic_retrigger_irq, |
| 3639 | }; | 3433 | }; |
| 3640 | 3434 | ||
| 3641 | int arch_setup_dmar_msi(unsigned int irq) | 3435 | int arch_setup_dmar_msi(unsigned int irq) |
| @@ -3656,26 +3450,24 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
| 3656 | #ifdef CONFIG_HPET_TIMER | 3450 | #ifdef CONFIG_HPET_TIMER |
| 3657 | 3451 | ||
| 3658 | #ifdef CONFIG_SMP | 3452 | #ifdef CONFIG_SMP |
| 3659 | static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | 3453 | static int hpet_msi_set_affinity(struct irq_data *data, |
| 3454 | const struct cpumask *mask, bool force) | ||
| 3660 | { | 3455 | { |
| 3661 | struct irq_desc *desc = irq_to_desc(irq); | 3456 | struct irq_cfg *cfg = data->chip_data; |
| 3662 | struct irq_cfg *cfg; | ||
| 3663 | struct msi_msg msg; | 3457 | struct msi_msg msg; |
| 3664 | unsigned int dest; | 3458 | unsigned int dest; |
| 3665 | 3459 | ||
| 3666 | if (set_desc_affinity(desc, mask, &dest)) | 3460 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3667 | return -1; | 3461 | return -1; |
| 3668 | 3462 | ||
| 3669 | cfg = desc->chip_data; | 3463 | hpet_msi_read(data->handler_data, &msg); |
| 3670 | |||
| 3671 | hpet_msi_read(irq, &msg); | ||
| 3672 | 3464 | ||
| 3673 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3465 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
| 3674 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3466 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
| 3675 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3467 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
| 3676 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3468 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
| 3677 | 3469 | ||
| 3678 | hpet_msi_write(irq, &msg); | 3470 | hpet_msi_write(data->handler_data, &msg); |
| 3679 | 3471 | ||
| 3680 | return 0; | 3472 | return 0; |
| 3681 | } | 3473 | } |
| @@ -3683,34 +3475,33 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3683 | #endif /* CONFIG_SMP */ | 3475 | #endif /* CONFIG_SMP */ |
| 3684 | 3476 | ||
| 3685 | static struct irq_chip ir_hpet_msi_type = { | 3477 | static struct irq_chip ir_hpet_msi_type = { |
| 3686 | .name = "IR-HPET_MSI", | 3478 | .name = "IR-HPET_MSI", |
| 3687 | .unmask = hpet_msi_unmask, | 3479 | .irq_unmask = hpet_msi_unmask, |
| 3688 | .mask = hpet_msi_mask, | 3480 | .irq_mask = hpet_msi_mask, |
| 3689 | #ifdef CONFIG_INTR_REMAP | 3481 | #ifdef CONFIG_INTR_REMAP |
| 3690 | .ack = ir_ack_apic_edge, | 3482 | .irq_ack = ir_ack_apic_edge, |
| 3691 | #ifdef CONFIG_SMP | 3483 | #ifdef CONFIG_SMP |
| 3692 | .set_affinity = ir_set_msi_irq_affinity, | 3484 | .irq_set_affinity = ir_msi_set_affinity, |
| 3693 | #endif | 3485 | #endif |
| 3694 | #endif | 3486 | #endif |
| 3695 | .retrigger = ioapic_retrigger_irq, | 3487 | .irq_retrigger = ioapic_retrigger_irq, |
| 3696 | }; | 3488 | }; |
| 3697 | 3489 | ||
| 3698 | static struct irq_chip hpet_msi_type = { | 3490 | static struct irq_chip hpet_msi_type = { |
| 3699 | .name = "HPET_MSI", | 3491 | .name = "HPET_MSI", |
| 3700 | .unmask = hpet_msi_unmask, | 3492 | .irq_unmask = hpet_msi_unmask, |
| 3701 | .mask = hpet_msi_mask, | 3493 | .irq_mask = hpet_msi_mask, |
| 3702 | .ack = ack_apic_edge, | 3494 | .irq_ack = ack_apic_edge, |
| 3703 | #ifdef CONFIG_SMP | 3495 | #ifdef CONFIG_SMP |
| 3704 | .set_affinity = hpet_msi_set_affinity, | 3496 | .irq_set_affinity = hpet_msi_set_affinity, |
| 3705 | #endif | 3497 | #endif |
| 3706 | .retrigger = ioapic_retrigger_irq, | 3498 | .irq_retrigger = ioapic_retrigger_irq, |
| 3707 | }; | 3499 | }; |
| 3708 | 3500 | ||
| 3709 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | 3501 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
| 3710 | { | 3502 | { |
| 3711 | int ret; | ||
| 3712 | struct msi_msg msg; | 3503 | struct msi_msg msg; |
| 3713 | struct irq_desc *desc = irq_to_desc(irq); | 3504 | int ret; |
| 3714 | 3505 | ||
| 3715 | if (intr_remapping_enabled) { | 3506 | if (intr_remapping_enabled) { |
| 3716 | struct intel_iommu *iommu = map_hpet_to_ir(id); | 3507 | struct intel_iommu *iommu = map_hpet_to_ir(id); |
| @@ -3728,9 +3519,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | |||
| 3728 | if (ret < 0) | 3519 | if (ret < 0) |
| 3729 | return ret; | 3520 | return ret; |
| 3730 | 3521 | ||
| 3731 | hpet_msi_write(irq, &msg); | 3522 | hpet_msi_write(get_irq_data(irq), &msg); |
| 3732 | desc->status |= IRQ_MOVE_PCNTXT; | 3523 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 3733 | if (irq_remapped(irq)) | 3524 | if (irq_remapped(get_irq_chip_data(irq))) |
| 3734 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, | 3525 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, |
| 3735 | handle_edge_irq, "edge"); | 3526 | handle_edge_irq, "edge"); |
| 3736 | else | 3527 | else |
| @@ -3763,33 +3554,30 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
| 3763 | write_ht_irq_msg(irq, &msg); | 3554 | write_ht_irq_msg(irq, &msg); |
| 3764 | } | 3555 | } |
| 3765 | 3556 | ||
| 3766 | static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3557 | static int |
| 3558 | ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | ||
| 3767 | { | 3559 | { |
| 3768 | struct irq_desc *desc = irq_to_desc(irq); | 3560 | struct irq_cfg *cfg = data->chip_data; |
| 3769 | struct irq_cfg *cfg; | ||
| 3770 | unsigned int dest; | 3561 | unsigned int dest; |
| 3771 | 3562 | ||
| 3772 | if (set_desc_affinity(desc, mask, &dest)) | 3563 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3773 | return -1; | 3564 | return -1; |
| 3774 | 3565 | ||
| 3775 | cfg = desc->chip_data; | 3566 | target_ht_irq(data->irq, dest, cfg->vector); |
| 3776 | |||
| 3777 | target_ht_irq(irq, dest, cfg->vector); | ||
| 3778 | |||
| 3779 | return 0; | 3567 | return 0; |
| 3780 | } | 3568 | } |
| 3781 | 3569 | ||
| 3782 | #endif | 3570 | #endif |
| 3783 | 3571 | ||
| 3784 | static struct irq_chip ht_irq_chip = { | 3572 | static struct irq_chip ht_irq_chip = { |
| 3785 | .name = "PCI-HT", | 3573 | .name = "PCI-HT", |
| 3786 | .mask = mask_ht_irq, | 3574 | .irq_mask = mask_ht_irq, |
| 3787 | .unmask = unmask_ht_irq, | 3575 | .irq_unmask = unmask_ht_irq, |
| 3788 | .ack = ack_apic_edge, | 3576 | .irq_ack = ack_apic_edge, |
| 3789 | #ifdef CONFIG_SMP | 3577 | #ifdef CONFIG_SMP |
| 3790 | .set_affinity = set_ht_irq_affinity, | 3578 | .irq_set_affinity = ht_set_affinity, |
| 3791 | #endif | 3579 | #endif |
| 3792 | .retrigger = ioapic_retrigger_irq, | 3580 | .irq_retrigger = ioapic_retrigger_irq, |
| 3793 | }; | 3581 | }; |
| 3794 | 3582 | ||
| 3795 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | 3583 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
| @@ -3880,14 +3668,13 @@ int __init arch_probe_nr_irqs(void) | |||
| 3880 | if (nr < nr_irqs) | 3668 | if (nr < nr_irqs) |
| 3881 | nr_irqs = nr; | 3669 | nr_irqs = nr; |
| 3882 | 3670 | ||
| 3883 | return 0; | 3671 | return NR_IRQS_LEGACY; |
| 3884 | } | 3672 | } |
| 3885 | #endif | 3673 | #endif |
| 3886 | 3674 | ||
| 3887 | static int __io_apic_set_pci_routing(struct device *dev, int irq, | 3675 | static int __io_apic_set_pci_routing(struct device *dev, int irq, |
| 3888 | struct io_apic_irq_attr *irq_attr) | 3676 | struct io_apic_irq_attr *irq_attr) |
| 3889 | { | 3677 | { |
| 3890 | struct irq_desc *desc; | ||
| 3891 | struct irq_cfg *cfg; | 3678 | struct irq_cfg *cfg; |
| 3892 | int node; | 3679 | int node; |
| 3893 | int ioapic, pin; | 3680 | int ioapic, pin; |
| @@ -3903,13 +3690,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
| 3903 | if (dev) | 3690 | if (dev) |
| 3904 | node = dev_to_node(dev); | 3691 | node = dev_to_node(dev); |
| 3905 | else | 3692 | else |
| 3906 | node = cpu_to_node(boot_cpu_id); | 3693 | node = cpu_to_node(0); |
| 3907 | 3694 | ||
| 3908 | desc = irq_to_desc_alloc_node(irq, node); | 3695 | cfg = alloc_irq_and_cfg_at(irq, node); |
| 3909 | if (!desc) { | 3696 | if (!cfg) |
| 3910 | printk(KERN_INFO "can not get irq_desc %d\n", irq); | ||
| 3911 | return 0; | 3697 | return 0; |
| 3912 | } | ||
| 3913 | 3698 | ||
| 3914 | pin = irq_attr->ioapic_pin; | 3699 | pin = irq_attr->ioapic_pin; |
| 3915 | trigger = irq_attr->trigger; | 3700 | trigger = irq_attr->trigger; |
| @@ -3919,15 +3704,14 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
| 3919 | * IRQs < 16 are already in the irq_2_pin[] map | 3704 | * IRQs < 16 are already in the irq_2_pin[] map |
| 3920 | */ | 3705 | */ |
| 3921 | if (irq >= legacy_pic->nr_legacy_irqs) { | 3706 | if (irq >= legacy_pic->nr_legacy_irqs) { |
| 3922 | cfg = desc->chip_data; | 3707 | if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) { |
| 3923 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { | ||
| 3924 | printk(KERN_INFO "can not add pin %d for irq %d\n", | 3708 | printk(KERN_INFO "can not add pin %d for irq %d\n", |
| 3925 | pin, irq); | 3709 | pin, irq); |
| 3926 | return 0; | 3710 | return 0; |
| 3927 | } | 3711 | } |
| 3928 | } | 3712 | } |
| 3929 | 3713 | ||
| 3930 | setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); | 3714 | setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity); |
| 3931 | 3715 | ||
| 3932 | return 0; | 3716 | return 0; |
| 3933 | } | 3717 | } |
| @@ -4120,14 +3904,14 @@ void __init setup_ioapic_dest(void) | |||
| 4120 | */ | 3904 | */ |
| 4121 | if (desc->status & | 3905 | if (desc->status & |
| 4122 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | 3906 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) |
| 4123 | mask = desc->affinity; | 3907 | mask = desc->irq_data.affinity; |
| 4124 | else | 3908 | else |
| 4125 | mask = apic->target_cpus(); | 3909 | mask = apic->target_cpus(); |
| 4126 | 3910 | ||
| 4127 | if (intr_remapping_enabled) | 3911 | if (intr_remapping_enabled) |
| 4128 | set_ir_ioapic_affinity_irq_desc(desc, mask); | 3912 | ir_ioapic_set_affinity(&desc->irq_data, mask, false); |
| 4129 | else | 3913 | else |
| 4130 | set_ioapic_affinity_irq_desc(desc, mask); | 3914 | ioapic_set_affinity(&desc->irq_data, mask, false); |
| 4131 | } | 3915 | } |
| 4132 | 3916 | ||
| 4133 | } | 3917 | } |
| @@ -4311,19 +4095,18 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
| 4311 | void __init pre_init_apic_IRQ0(void) | 4095 | void __init pre_init_apic_IRQ0(void) |
| 4312 | { | 4096 | { |
| 4313 | struct irq_cfg *cfg; | 4097 | struct irq_cfg *cfg; |
| 4314 | struct irq_desc *desc; | ||
| 4315 | 4098 | ||
| 4316 | printk(KERN_INFO "Early APIC setup for system timer0\n"); | 4099 | printk(KERN_INFO "Early APIC setup for system timer0\n"); |
| 4317 | #ifndef CONFIG_SMP | 4100 | #ifndef CONFIG_SMP |
| 4318 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | 4101 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); |
| 4319 | #endif | 4102 | #endif |
| 4320 | desc = irq_to_desc_alloc_node(0, 0); | 4103 | /* Make sure the irq descriptor is set up */ |
| 4104 | cfg = alloc_irq_and_cfg_at(0, 0); | ||
| 4321 | 4105 | ||
| 4322 | setup_local_APIC(); | 4106 | setup_local_APIC(); |
| 4323 | 4107 | ||
| 4324 | cfg = irq_cfg(0); | ||
| 4325 | add_pin_to_irq_node(cfg, 0, 0, 0); | 4108 | add_pin_to_irq_node(cfg, 0, 0, 0); |
| 4326 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | 4109 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); |
| 4327 | 4110 | ||
| 4328 | setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); | 4111 | setup_ioapic_irq(0, 0, 0, cfg, 0, 0); |
| 4329 | } | 4112 | } |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index a43f71cb30f8..c90041ccb742 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
| @@ -178,7 +178,7 @@ int __init check_nmi_watchdog(void) | |||
| 178 | error: | 178 | error: |
| 179 | if (nmi_watchdog == NMI_IO_APIC) { | 179 | if (nmi_watchdog == NMI_IO_APIC) { |
| 180 | if (!timer_through_8259) | 180 | if (!timer_through_8259) |
| 181 | legacy_pic->chip->mask(0); | 181 | legacy_pic->mask(0); |
| 182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | 182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
| 183 | } | 183 | } |
| 184 | 184 | ||
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 3e28401f161c..960f26ab5c9f 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/nodemask.h> | 26 | #include <linux/nodemask.h> |
| 27 | #include <linux/topology.h> | 27 | #include <linux/topology.h> |
| 28 | #include <linux/bootmem.h> | 28 | #include <linux/bootmem.h> |
| 29 | #include <linux/memblock.h> | ||
| 29 | #include <linux/threads.h> | 30 | #include <linux/threads.h> |
| 30 | #include <linux/cpumask.h> | 31 | #include <linux/cpumask.h> |
| 31 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
| @@ -88,7 +89,7 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd) | |||
| 88 | node_end_pfn[node] = | 89 | node_end_pfn[node] = |
| 89 | MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | 90 | MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); |
| 90 | 91 | ||
| 91 | e820_register_active_regions(node, node_start_pfn[node], | 92 | memblock_x86_register_active_regions(node, node_start_pfn[node], |
| 92 | node_end_pfn[node]); | 93 | node_end_pfn[node]); |
| 93 | 94 | ||
| 94 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); | 95 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 83e9be4778e2..f9e4e6a54073 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
| @@ -54,6 +54,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | |||
| 54 | */ | 54 | */ |
| 55 | void __init default_setup_apic_routing(void) | 55 | void __init default_setup_apic_routing(void) |
| 56 | { | 56 | { |
| 57 | |||
| 58 | enable_IR_x2apic(); | ||
| 59 | |||
| 57 | #ifdef CONFIG_X86_X2APIC | 60 | #ifdef CONFIG_X86_X2APIC |
| 58 | if (x2apic_mode | 61 | if (x2apic_mode |
| 59 | #ifdef CONFIG_X86_UV | 62 | #ifdef CONFIG_X86_UV |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 7b598b84c902..f744f54cb248 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
| @@ -698,9 +698,11 @@ void __init uv_system_init(void) | |||
| 698 | for (j = 0; j < 64; j++) { | 698 | for (j = 0; j < 64; j++) { |
| 699 | if (!test_bit(j, &present)) | 699 | if (!test_bit(j, &present)) |
| 700 | continue; | 700 | continue; |
| 701 | uv_blade_info[blade].pnode = (i * 64 + j); | 701 | pnode = (i * 64 + j); |
| 702 | uv_blade_info[blade].pnode = pnode; | ||
| 702 | uv_blade_info[blade].nr_possible_cpus = 0; | 703 | uv_blade_info[blade].nr_possible_cpus = 0; |
| 703 | uv_blade_info[blade].nr_online_cpus = 0; | 704 | uv_blade_info[blade].nr_online_cpus = 0; |
| 705 | max_pnode = max(pnode, max_pnode); | ||
| 704 | blade++; | 706 | blade++; |
| 705 | } | 707 | } |
| 706 | } | 708 | } |
| @@ -738,7 +740,6 @@ void __init uv_system_init(void) | |||
| 738 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); | 740 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); |
| 739 | uv_node_to_blade[nid] = blade; | 741 | uv_node_to_blade[nid] = blade; |
| 740 | uv_cpu_to_blade[cpu] = blade; | 742 | uv_cpu_to_blade[cpu] = blade; |
| 741 | max_pnode = max(pnode, max_pnode); | ||
| 742 | } | 743 | } |
| 743 | 744 | ||
| 744 | /* Add blade/pnode info for nodes without cpus */ | 745 | /* Add blade/pnode info for nodes without cpus */ |
| @@ -750,7 +751,6 @@ void __init uv_system_init(void) | |||
| 750 | pnode = (paddr >> m_val) & pnode_mask; | 751 | pnode = (paddr >> m_val) & pnode_mask; |
| 751 | blade = boot_pnode_to_blade(pnode); | 752 | blade = boot_pnode_to_blade(pnode); |
| 752 | uv_node_to_blade[nid] = blade; | 753 | uv_node_to_blade[nid] = blade; |
| 753 | max_pnode = max(pnode, max_pnode); | ||
| 754 | } | 754 | } |
| 755 | 755 | ||
| 756 | map_gru_high(max_pnode); | 756 | map_gru_high(max_pnode); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 4c9c67bf09b7..fbbc4dadecc4 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -1926,6 +1926,7 @@ static const struct file_operations apm_bios_fops = { | |||
| 1926 | .unlocked_ioctl = do_ioctl, | 1926 | .unlocked_ioctl = do_ioctl, |
| 1927 | .open = do_open, | 1927 | .open = do_open, |
| 1928 | .release = do_release, | 1928 | .release = do_release, |
| 1929 | .llseek = noop_llseek, | ||
| 1929 | }; | 1930 | }; |
| 1930 | 1931 | ||
| 1931 | static struct miscdevice apm_device = { | 1932 | static struct miscdevice apm_device = { |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index dfdbf6403895..1a4088dda37a 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
| @@ -99,9 +99,7 @@ void foo(void) | |||
| 99 | 99 | ||
| 100 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 100 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
| 101 | DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); | 101 | DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); |
| 102 | DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); | 102 | DEFINE(THREAD_SIZE_asm, THREAD_SIZE); |
| 103 | DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); | ||
| 104 | DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); | ||
| 105 | 103 | ||
| 106 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | 104 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
| 107 | 105 | ||
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index fc999e6fc46a..13a389179514 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
| @@ -2,7 +2,8 @@ | |||
| 2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
| 3 | #include <linux/kthread.h> | 3 | #include <linux/kthread.h> |
| 4 | #include <linux/workqueue.h> | 4 | #include <linux/workqueue.h> |
| 5 | #include <asm/e820.h> | 5 | #include <linux/memblock.h> |
| 6 | |||
| 6 | #include <asm/proto.h> | 7 | #include <asm/proto.h> |
| 7 | 8 | ||
| 8 | /* | 9 | /* |
| @@ -18,10 +19,12 @@ static int __read_mostly memory_corruption_check = -1; | |||
| 18 | static unsigned __read_mostly corruption_check_size = 64*1024; | 19 | static unsigned __read_mostly corruption_check_size = 64*1024; |
| 19 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | 20 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ |
| 20 | 21 | ||
| 21 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | 22 | static struct scan_area { |
| 23 | u64 addr; | ||
| 24 | u64 size; | ||
| 25 | } scan_areas[MAX_SCAN_AREAS]; | ||
| 22 | static int num_scan_areas; | 26 | static int num_scan_areas; |
| 23 | 27 | ||
| 24 | |||
| 25 | static __init int set_corruption_check(char *arg) | 28 | static __init int set_corruption_check(char *arg) |
| 26 | { | 29 | { |
| 27 | char *end; | 30 | char *end; |
| @@ -81,9 +84,9 @@ void __init setup_bios_corruption_check(void) | |||
| 81 | 84 | ||
| 82 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 85 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { |
| 83 | u64 size; | 86 | u64 size; |
| 84 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | 87 | addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); |
| 85 | 88 | ||
| 86 | if (!(addr + 1)) | 89 | if (addr == MEMBLOCK_ERROR) |
| 87 | break; | 90 | break; |
| 88 | 91 | ||
| 89 | if (addr >= corruption_check_size) | 92 | if (addr >= corruption_check_size) |
| @@ -92,7 +95,7 @@ void __init setup_bios_corruption_check(void) | |||
| 92 | if ((addr + size) > corruption_check_size) | 95 | if ((addr + size) > corruption_check_size) |
| 93 | size = corruption_check_size - addr; | 96 | size = corruption_check_size - addr; |
| 94 | 97 | ||
| 95 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | 98 | memblock_x86_reserve_range(addr, addr + size, "SCAN RAM"); |
| 96 | scan_areas[num_scan_areas].addr = addr; | 99 | scan_areas[num_scan_areas].addr = addr; |
| 97 | scan_areas[num_scan_areas].size = size; | 100 | scan_areas[num_scan_areas].size = size; |
| 98 | num_scan_areas++; | 101 | num_scan_areas++; |
| @@ -105,7 +108,6 @@ void __init setup_bios_corruption_check(void) | |||
| 105 | 108 | ||
| 106 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | 109 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", |
| 107 | num_scan_areas); | 110 | num_scan_areas); |
| 108 | update_e820(); | ||
| 109 | } | 111 | } |
| 110 | 112 | ||
| 111 | 113 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ba5f62f45f01..9e093f8fe78c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
| 148 | { | 148 | { |
| 149 | #ifdef CONFIG_SMP | 149 | #ifdef CONFIG_SMP |
| 150 | /* calling is from identify_secondary_cpu() ? */ | 150 | /* calling is from identify_secondary_cpu() ? */ |
| 151 | if (c->cpu_index == boot_cpu_id) | 151 | if (!c->cpu_index) |
| 152 | return; | 152 | return; |
| 153 | 153 | ||
| 154 | /* | 154 | /* |
| @@ -253,37 +253,51 @@ static int __cpuinit nearby_node(int apicid) | |||
| 253 | #endif | 253 | #endif |
| 254 | 254 | ||
| 255 | /* | 255 | /* |
| 256 | * Fixup core topology information for AMD multi-node processors. | 256 | * Fixup core topology information for |
| 257 | * Assumption: Number of cores in each internal node is the same. | 257 | * (1) AMD multi-node processors |
| 258 | * Assumption: Number of cores in each internal node is the same. | ||
| 259 | * (2) AMD processors supporting compute units | ||
| 258 | */ | 260 | */ |
| 259 | #ifdef CONFIG_X86_HT | 261 | #ifdef CONFIG_X86_HT |
| 260 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | 262 | static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) |
| 261 | { | 263 | { |
| 262 | unsigned long long value; | 264 | u32 nodes; |
| 263 | u32 nodes, cores_per_node; | 265 | u8 node_id; |
| 264 | int cpu = smp_processor_id(); | 266 | int cpu = smp_processor_id(); |
| 265 | 267 | ||
| 266 | if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) | 268 | /* get information required for multi-node processors */ |
| 267 | return; | 269 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { |
| 270 | u32 eax, ebx, ecx, edx; | ||
| 268 | 271 | ||
| 269 | /* fixup topology information only once for a core */ | 272 | cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); |
| 270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | 273 | nodes = ((ecx >> 8) & 7) + 1; |
| 271 | return; | 274 | node_id = ecx & 7; |
| 272 | 275 | ||
| 273 | rdmsrl(MSR_FAM10H_NODE_ID, value); | 276 | /* get compute unit information */ |
| 277 | smp_num_siblings = ((ebx >> 8) & 3) + 1; | ||
| 278 | c->compute_unit_id = ebx & 0xff; | ||
| 279 | } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { | ||
| 280 | u64 value; | ||
| 274 | 281 | ||
| 275 | nodes = ((value >> 3) & 7) + 1; | 282 | rdmsrl(MSR_FAM10H_NODE_ID, value); |
| 276 | if (nodes == 1) | 283 | nodes = ((value >> 3) & 7) + 1; |
| 284 | node_id = value & 7; | ||
| 285 | } else | ||
| 277 | return; | 286 | return; |
| 278 | 287 | ||
| 279 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | 288 | /* fixup multi-node processor information */ |
| 280 | cores_per_node = c->x86_max_cores / nodes; | 289 | if (nodes > 1) { |
| 290 | u32 cores_per_node; | ||
| 291 | |||
| 292 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | ||
| 293 | cores_per_node = c->x86_max_cores / nodes; | ||
| 281 | 294 | ||
| 282 | /* store NodeID, use llc_shared_map to store sibling info */ | 295 | /* store NodeID, use llc_shared_map to store sibling info */ |
| 283 | per_cpu(cpu_llc_id, cpu) = value & 7; | 296 | per_cpu(cpu_llc_id, cpu) = node_id; |
| 284 | 297 | ||
| 285 | /* fixup core id to be in range from 0 to (cores_per_node - 1) */ | 298 | /* core id to be in range from 0 to (cores_per_node - 1) */ |
| 286 | c->cpu_core_id = c->cpu_core_id % cores_per_node; | 299 | c->cpu_core_id = c->cpu_core_id % cores_per_node; |
| 300 | } | ||
| 287 | } | 301 | } |
| 288 | #endif | 302 | #endif |
| 289 | 303 | ||
| @@ -304,9 +318,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
| 304 | c->phys_proc_id = c->initial_apicid >> bits; | 318 | c->phys_proc_id = c->initial_apicid >> bits; |
| 305 | /* use socket ID also for last level cache */ | 319 | /* use socket ID also for last level cache */ |
| 306 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | 320 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; |
| 307 | /* fixup topology information on multi-node processors */ | 321 | amd_get_topology(c); |
| 308 | if ((c->x86 == 0x10) && (c->x86_model == 9)) | ||
| 309 | amd_fixup_dcm(c); | ||
| 310 | #endif | 322 | #endif |
| 311 | } | 323 | } |
| 312 | 324 | ||
| @@ -412,6 +424,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
| 412 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 424 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); |
| 413 | } | 425 | } |
| 414 | #endif | 426 | #endif |
| 427 | |||
| 428 | /* We need to do the following only once */ | ||
| 429 | if (c != &boot_cpu_data) | ||
| 430 | return; | ||
| 431 | |||
| 432 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { | ||
| 433 | |||
| 434 | if (c->x86 > 0x10 || | ||
| 435 | (c->x86 == 0x10 && c->x86_model >= 0x2)) { | ||
| 436 | u64 val; | ||
| 437 | |||
| 438 | rdmsrl(MSR_K7_HWCR, val); | ||
| 439 | if (!(val & BIT(24))) | ||
| 440 | printk(KERN_WARNING FW_BUG "TSC doesn't count " | ||
| 441 | "with P0 frequency!\n"); | ||
| 442 | } | ||
| 443 | } | ||
| 415 | } | 444 | } |
| 416 | 445 | ||
| 417 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 446 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
| @@ -523,7 +552,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
| 523 | #endif | 552 | #endif |
| 524 | 553 | ||
| 525 | if (c->extended_cpuid_level >= 0x80000006) { | 554 | if (c->extended_cpuid_level >= 0x80000006) { |
| 526 | if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) | 555 | if (cpuid_edx(0x80000006) & 0xf000) |
| 527 | num_cache_leaves = 4; | 556 | num_cache_leaves = 4; |
| 528 | else | 557 | else |
| 529 | num_cache_leaves = 3; | 558 | num_cache_leaves = 3; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 490dac63c2d2..4b68bda30938 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) | |||
| 545 | } | 545 | } |
| 546 | } | 546 | } |
| 547 | 547 | ||
| 548 | static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | 548 | void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) |
| 549 | { | 549 | { |
| 550 | u32 tfms, xlvl; | 550 | u32 tfms, xlvl; |
| 551 | u32 ebx; | 551 | u32 ebx; |
| @@ -665,7 +665,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
| 665 | this_cpu->c_early_init(c); | 665 | this_cpu->c_early_init(c); |
| 666 | 666 | ||
| 667 | #ifdef CONFIG_SMP | 667 | #ifdef CONFIG_SMP |
| 668 | c->cpu_index = boot_cpu_id; | 668 | c->cpu_index = 0; |
| 669 | #endif | 669 | #endif |
| 670 | filter_cpuid_features(c, false); | 670 | filter_cpuid_features(c, false); |
| 671 | } | 671 | } |
| @@ -704,16 +704,21 @@ void __init early_cpu_init(void) | |||
| 704 | } | 704 | } |
| 705 | 705 | ||
| 706 | /* | 706 | /* |
| 707 | * The NOPL instruction is supposed to exist on all CPUs with | 707 | * The NOPL instruction is supposed to exist on all CPUs of family >= 6; |
| 708 | * family >= 6; unfortunately, that's not true in practice because | 708 | * unfortunately, that's not true in practice because of early VIA |
| 709 | * of early VIA chips and (more importantly) broken virtualizers that | 709 | * chips and (more importantly) broken virtualizers that are not easy |
| 710 | * are not easy to detect. In the latter case it doesn't even *fail* | 710 | * to detect. In the latter case it doesn't even *fail* reliably, so |
| 711 | * reliably, so probing for it doesn't even work. Disable it completely | 711 | * probing for it doesn't even work. Disable it completely on 32-bit |
| 712 | * unless we can find a reliable way to detect all the broken cases. | 712 | * unless we can find a reliable way to detect all the broken cases. |
| 713 | * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). | ||
| 713 | */ | 714 | */ |
| 714 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | 715 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) |
| 715 | { | 716 | { |
| 717 | #ifdef CONFIG_X86_32 | ||
| 716 | clear_cpu_cap(c, X86_FEATURE_NOPL); | 718 | clear_cpu_cap(c, X86_FEATURE_NOPL); |
| 719 | #else | ||
| 720 | set_cpu_cap(c, X86_FEATURE_NOPL); | ||
| 721 | #endif | ||
| 717 | } | 722 | } |
| 718 | 723 | ||
| 719 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | 724 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
| @@ -1264,13 +1269,6 @@ void __cpuinit cpu_init(void) | |||
| 1264 | clear_all_debug_regs(); | 1269 | clear_all_debug_regs(); |
| 1265 | dbg_restore_debug_regs(); | 1270 | dbg_restore_debug_regs(); |
| 1266 | 1271 | ||
| 1267 | /* | ||
| 1268 | * Force FPU initialization: | ||
| 1269 | */ | ||
| 1270 | current_thread_info()->status = 0; | ||
| 1271 | clear_used_math(); | ||
| 1272 | mxcsr_feature_mask_init(); | ||
| 1273 | |||
| 1274 | fpu_init(); | 1272 | fpu_init(); |
| 1275 | xsave_init(); | 1273 | xsave_init(); |
| 1276 | } | 1274 | } |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3624e8a0f71b..e765633f210e 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
| @@ -32,6 +32,8 @@ struct cpu_dev { | |||
| 32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], | 32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], |
| 33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
| 34 | 34 | ||
| 35 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | ||
| 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 36 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
| 37 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | ||
| 36 | 38 | ||
| 37 | #endif | 39 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 994230d4dc4e..4f6f679f2799 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
| @@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | |||
| 368 | return -ENODEV; | 368 | return -ENODEV; |
| 369 | 369 | ||
| 370 | out_obj = output.pointer; | 370 | out_obj = output.pointer; |
| 371 | if (out_obj->type != ACPI_TYPE_BUFFER) | 371 | if (out_obj->type != ACPI_TYPE_BUFFER) { |
| 372 | return -ENODEV; | 372 | ret = -ENODEV; |
| 373 | goto out_free; | ||
| 374 | } | ||
| 373 | 375 | ||
| 374 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | 376 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); |
| 375 | if (errors) | 377 | if (errors) { |
| 376 | return -ENODEV; | 378 | ret = -ENODEV; |
| 379 | goto out_free; | ||
| 380 | } | ||
| 377 | 381 | ||
| 378 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | 382 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); |
| 379 | if (!(supported & 0x1)) | 383 | if (!(supported & 0x1)) { |
| 380 | return -ENODEV; | 384 | ret = -ENODEV; |
| 385 | goto out_free; | ||
| 386 | } | ||
| 381 | 387 | ||
| 382 | out_free: | 388 | out_free: |
| 383 | kfree(output.pointer); | 389 | kfree(output.pointer); |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 85f69cdeae10..d16c2c53d6bf 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
| 39 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; | 39 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; |
| 40 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 40 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
| 41 | c->cpuid_level = cpuid_eax(0); | 41 | c->cpuid_level = cpuid_eax(0); |
| 42 | get_cpu_cap(c); | ||
| 42 | } | 43 | } |
| 43 | } | 44 | } |
| 44 | 45 | ||
| @@ -169,7 +170,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) | |||
| 169 | { | 170 | { |
| 170 | #ifdef CONFIG_SMP | 171 | #ifdef CONFIG_SMP |
| 171 | /* calling is from identify_secondary_cpu() ? */ | 172 | /* calling is from identify_secondary_cpu() ? */ |
| 172 | if (c->cpu_index == boot_cpu_id) | 173 | if (!c->cpu_index) |
| 173 | return; | 174 | return; |
| 174 | 175 | ||
| 175 | /* | 176 | /* |
| @@ -283,9 +284,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
| 283 | /* Don't do the funky fallback heuristics the AMD version employs | 284 | /* Don't do the funky fallback heuristics the AMD version employs |
| 284 | for now. */ | 285 | for now. */ |
| 285 | node = apicid_to_node[apicid]; | 286 | node = apicid_to_node[apicid]; |
| 286 | if (node == NUMA_NO_NODE) | 287 | if (node == NUMA_NO_NODE || !node_online(node)) { |
| 287 | node = first_node(node_online_map); | ||
| 288 | else if (!node_online(node)) { | ||
| 289 | /* reuse the value from init_cpu_to_node() */ | 288 | /* reuse the value from init_cpu_to_node() */ |
| 290 | node = cpu_to_node(cpu); | 289 | node = cpu_to_node(cpu); |
| 291 | } | 290 | } |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 898c2f4eab88..12cd823c8d03 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
| 19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 20 | #include <asm/k8.h> | 20 | #include <asm/amd_nb.h> |
| 21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
| 22 | 22 | ||
| 23 | #define LVL_1_INST 1 | 23 | #define LVL_1_INST 1 |
| @@ -306,7 +306,7 @@ struct _cache_attr { | |||
| 306 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | 306 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); |
| 307 | }; | 307 | }; |
| 308 | 308 | ||
| 309 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_AMD_NB |
| 310 | 310 | ||
| 311 | /* | 311 | /* |
| 312 | * L3 cache descriptors | 312 | * L3 cache descriptors |
| @@ -369,7 +369,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | |||
| 369 | return; | 369 | return; |
| 370 | 370 | ||
| 371 | /* not in virtualized environments */ | 371 | /* not in virtualized environments */ |
| 372 | if (num_k8_northbridges == 0) | 372 | if (k8_northbridges.num == 0) |
| 373 | return; | 373 | return; |
| 374 | 374 | ||
| 375 | /* | 375 | /* |
| @@ -377,7 +377,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | |||
| 377 | * never freed but this is done only on shutdown so it doesn't matter. | 377 | * never freed but this is done only on shutdown so it doesn't matter. |
| 378 | */ | 378 | */ |
| 379 | if (!l3_caches) { | 379 | if (!l3_caches) { |
| 380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | 380 | int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); |
| 381 | 381 | ||
| 382 | l3_caches = kzalloc(size, GFP_ATOMIC); | 382 | l3_caches = kzalloc(size, GFP_ATOMIC); |
| 383 | if (!l3_caches) | 383 | if (!l3_caches) |
| @@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | |||
| 556 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 556 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, |
| 557 | show_cache_disable_1, store_cache_disable_1); | 557 | show_cache_disable_1, store_cache_disable_1); |
| 558 | 558 | ||
| 559 | #else /* CONFIG_CPU_SUP_AMD */ | 559 | #else /* CONFIG_AMD_NB */ |
| 560 | static void __cpuinit | 560 | static void __cpuinit |
| 561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) | 561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) |
| 562 | { | 562 | { |
| 563 | }; | 563 | }; |
| 564 | #endif /* CONFIG_CPU_SUP_AMD */ | 564 | #endif /* CONFIG_AMD_NB */ |
| 565 | 565 | ||
| 566 | static int | 566 | static int |
| 567 | __cpuinit cpuid4_cache_lookup_regs(int index, | 567 | __cpuinit cpuid4_cache_lookup_regs(int index, |
| @@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = { | |||
| 1000 | 1000 | ||
| 1001 | static struct attribute *default_l3_attrs[] = { | 1001 | static struct attribute *default_l3_attrs[] = { |
| 1002 | DEFAULT_SYSFS_CACHE_ATTRS, | 1002 | DEFAULT_SYSFS_CACHE_ATTRS, |
| 1003 | #ifdef CONFIG_CPU_SUP_AMD | 1003 | #ifdef CONFIG_AMD_NB |
| 1004 | &cache_disable_0.attr, | 1004 | &cache_disable_0.attr, |
| 1005 | &cache_disable_1.attr, | 1005 | &cache_disable_1.attr, |
| 1006 | #endif | 1006 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 8a85dd1b1aa1..1e8d66c1336a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
| @@ -192,6 +192,7 @@ static const struct file_operations severities_coverage_fops = { | |||
| 192 | .release = seq_release, | 192 | .release = seq_release, |
| 193 | .read = seq_read, | 193 | .read = seq_read, |
| 194 | .write = severities_coverage_write, | 194 | .write = severities_coverage_write, |
| 195 | .llseek = seq_lseek, | ||
| 195 | }; | 196 | }; |
| 196 | 197 | ||
| 197 | static int __init severities_debugfs_init(void) | 198 | static int __init severities_debugfs_init(void) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ed41562909fe..7a35b72d7c03 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -1665,6 +1665,7 @@ struct file_operations mce_chrdev_ops = { | |||
| 1665 | .read = mce_read, | 1665 | .read = mce_read, |
| 1666 | .poll = mce_poll, | 1666 | .poll = mce_poll, |
| 1667 | .unlocked_ioctl = mce_ioctl, | 1667 | .unlocked_ioctl = mce_ioctl, |
| 1668 | .llseek = no_llseek, | ||
| 1668 | }; | 1669 | }; |
| 1669 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); | 1670 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); |
| 1670 | 1671 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5e975298fa81..80c482382d5c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
| @@ -131,7 +131,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 131 | u32 low = 0, high = 0, address = 0; | 131 | u32 low = 0, high = 0, address = 0; |
| 132 | unsigned int bank, block; | 132 | unsigned int bank, block; |
| 133 | struct thresh_restart tr; | 133 | struct thresh_restart tr; |
| 134 | u8 lvt_off; | 134 | int lvt_off = -1; |
| 135 | u8 offset; | ||
| 135 | 136 | ||
| 136 | for (bank = 0; bank < NR_BANKS; ++bank) { | 137 | for (bank = 0; bank < NR_BANKS; ++bank) { |
| 137 | for (block = 0; block < NR_BLOCKS; ++block) { | 138 | for (block = 0; block < NR_BLOCKS; ++block) { |
| @@ -141,6 +142,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 141 | address = (low & MASK_BLKPTR_LO) >> 21; | 142 | address = (low & MASK_BLKPTR_LO) >> 21; |
| 142 | if (!address) | 143 | if (!address) |
| 143 | break; | 144 | break; |
| 145 | |||
| 144 | address += MCG_XBLK_ADDR; | 146 | address += MCG_XBLK_ADDR; |
| 145 | } else | 147 | } else |
| 146 | ++address; | 148 | ++address; |
| @@ -148,12 +150,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 148 | if (rdmsr_safe(address, &low, &high)) | 150 | if (rdmsr_safe(address, &low, &high)) |
| 149 | break; | 151 | break; |
| 150 | 152 | ||
| 151 | if (!(high & MASK_VALID_HI)) { | 153 | if (!(high & MASK_VALID_HI)) |
| 152 | if (block) | 154 | continue; |
| 153 | continue; | ||
| 154 | else | ||
| 155 | break; | ||
| 156 | } | ||
| 157 | 155 | ||
| 158 | if (!(high & MASK_CNTP_HI) || | 156 | if (!(high & MASK_CNTP_HI) || |
| 159 | (high & MASK_LOCKED_HI)) | 157 | (high & MASK_LOCKED_HI)) |
| @@ -165,8 +163,28 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 165 | if (shared_bank[bank] && c->cpu_core_id) | 163 | if (shared_bank[bank] && c->cpu_core_id) |
| 166 | break; | 164 | break; |
| 167 | #endif | 165 | #endif |
| 168 | lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, | 166 | offset = (high & MASK_LVTOFF_HI) >> 20; |
| 169 | APIC_EILVT_MSG_FIX, 0); | 167 | if (lvt_off < 0) { |
| 168 | if (setup_APIC_eilvt(offset, | ||
| 169 | THRESHOLD_APIC_VECTOR, | ||
| 170 | APIC_EILVT_MSG_FIX, 0)) { | ||
| 171 | pr_err(FW_BUG "cpu %d, failed to " | ||
| 172 | "setup threshold interrupt " | ||
| 173 | "for bank %d, block %d " | ||
| 174 | "(MSR%08X=0x%x%08x)", | ||
| 175 | smp_processor_id(), bank, block, | ||
| 176 | address, high, low); | ||
| 177 | continue; | ||
| 178 | } | ||
| 179 | lvt_off = offset; | ||
| 180 | } else if (lvt_off != offset) { | ||
| 181 | pr_err(FW_BUG "cpu %d, invalid threshold " | ||
| 182 | "interrupt offset %d for bank %d," | ||
| 183 | "block %d (MSR%08X=0x%x%08x)", | ||
| 184 | smp_processor_id(), lvt_off, bank, | ||
| 185 | block, address, high, low); | ||
| 186 | continue; | ||
| 187 | } | ||
| 170 | 188 | ||
| 171 | high &= ~MASK_LVTOFF_HI; | 189 | high &= ~MASK_LVTOFF_HI; |
| 172 | high |= lvt_off << 20; | 190 | high |= lvt_off << 20; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d9368eeda309..4b683267eca5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -216,7 +216,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, | |||
| 216 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 216 | err = sysfs_add_file_to_group(&sys_dev->kobj, |
| 217 | &attr_core_power_limit_count.attr, | 217 | &attr_core_power_limit_count.attr, |
| 218 | thermal_attr_group.name); | 218 | thermal_attr_group.name); |
| 219 | if (cpu_has(c, X86_FEATURE_PTS)) | 219 | if (cpu_has(c, X86_FEATURE_PTS)) { |
| 220 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 220 | err = sysfs_add_file_to_group(&sys_dev->kobj, |
| 221 | &attr_package_throttle_count.attr, | 221 | &attr_package_throttle_count.attr, |
| 222 | thermal_attr_group.name); | 222 | thermal_attr_group.name); |
| @@ -224,6 +224,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, | |||
| 224 | err = sysfs_add_file_to_group(&sys_dev->kobj, | 224 | err = sysfs_add_file_to_group(&sys_dev->kobj, |
| 225 | &attr_package_power_limit_count.attr, | 225 | &attr_package_power_limit_count.attr, |
| 226 | thermal_attr_group.name); | 226 | thermal_attr_group.name); |
| 227 | } | ||
| 227 | 228 | ||
| 228 | return err; | 229 | return err; |
| 229 | } | 230 | } |
| @@ -349,7 +350,7 @@ static void intel_thermal_interrupt(void) | |||
| 349 | 350 | ||
| 350 | static void unexpected_thermal_interrupt(void) | 351 | static void unexpected_thermal_interrupt(void) |
| 351 | { | 352 | { |
| 352 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | 353 | printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", |
| 353 | smp_processor_id()); | 354 | smp_processor_id()); |
| 354 | add_taint(TAINT_MACHINE_CHECK); | 355 | add_taint(TAINT_MACHINE_CHECK); |
| 355 | } | 356 | } |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index c5f59d071425..ac140c7be396 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
| @@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void) | |||
| 827 | 827 | ||
| 828 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | 828 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) |
| 829 | return 0; | 829 | return 0; |
| 830 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | 830 | if (boot_cpu_data.x86 < 0xf) |
| 831 | return 0; | 831 | return 0; |
| 832 | /* In case some hypervisor doesn't pass SYSCFG through: */ | 832 | /* In case some hypervisor doesn't pass SYSCFG through: */ |
| 833 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) | 833 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 7d28d7d03885..9f27228ceffd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
| @@ -64,18 +64,59 @@ static inline void k8_check_syscfg_dram_mod_en(void) | |||
| 64 | } | 64 | } |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | /* Get the size of contiguous MTRR range */ | ||
| 68 | static u64 get_mtrr_size(u64 mask) | ||
| 69 | { | ||
| 70 | u64 size; | ||
| 71 | |||
| 72 | mask >>= PAGE_SHIFT; | ||
| 73 | mask |= size_or_mask; | ||
| 74 | size = -mask; | ||
| 75 | size <<= PAGE_SHIFT; | ||
| 76 | return size; | ||
| 77 | } | ||
| 78 | |||
| 67 | /* | 79 | /* |
| 68 | * Returns the effective MTRR type for the region | 80 | * Check and return the effective type for MTRR-MTRR type overlap. |
| 69 | * Error returns: | 81 | * Returns 1 if the effective type is UNCACHEABLE, else returns 0 |
| 70 | * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR | ||
| 71 | * - 0xFF - when MTRR is not enabled | ||
| 72 | */ | 82 | */ |
| 73 | u8 mtrr_type_lookup(u64 start, u64 end) | 83 | static int check_type_overlap(u8 *prev, u8 *curr) |
| 84 | { | ||
| 85 | if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) { | ||
| 86 | *prev = MTRR_TYPE_UNCACHABLE; | ||
| 87 | *curr = MTRR_TYPE_UNCACHABLE; | ||
| 88 | return 1; | ||
| 89 | } | ||
| 90 | |||
| 91 | if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) || | ||
| 92 | (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) { | ||
| 93 | *prev = MTRR_TYPE_WRTHROUGH; | ||
| 94 | *curr = MTRR_TYPE_WRTHROUGH; | ||
| 95 | } | ||
| 96 | |||
| 97 | if (*prev != *curr) { | ||
| 98 | *prev = MTRR_TYPE_UNCACHABLE; | ||
| 99 | *curr = MTRR_TYPE_UNCACHABLE; | ||
| 100 | return 1; | ||
| 101 | } | ||
| 102 | |||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Error/Semi-error returns: | ||
| 108 | * 0xFF - when MTRR is not enabled | ||
| 109 | * *repeat == 1 implies [start:end] spanned across MTRR range and type returned | ||
| 110 | * corresponds only to [start:*partial_end]. | ||
| 111 | * Caller has to lookup again for [*partial_end:end]. | ||
| 112 | */ | ||
| 113 | static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) | ||
| 74 | { | 114 | { |
| 75 | int i; | 115 | int i; |
| 76 | u64 base, mask; | 116 | u64 base, mask; |
| 77 | u8 prev_match, curr_match; | 117 | u8 prev_match, curr_match; |
| 78 | 118 | ||
| 119 | *repeat = 0; | ||
| 79 | if (!mtrr_state_set) | 120 | if (!mtrr_state_set) |
| 80 | return 0xFF; | 121 | return 0xFF; |
| 81 | 122 | ||
| @@ -126,8 +167,34 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
| 126 | 167 | ||
| 127 | start_state = ((start & mask) == (base & mask)); | 168 | start_state = ((start & mask) == (base & mask)); |
| 128 | end_state = ((end & mask) == (base & mask)); | 169 | end_state = ((end & mask) == (base & mask)); |
| 129 | if (start_state != end_state) | 170 | |
| 130 | return 0xFE; | 171 | if (start_state != end_state) { |
| 172 | /* | ||
| 173 | * We have start:end spanning across an MTRR. | ||
| 174 | * We split the region into | ||
| 175 | * either | ||
| 176 | * (start:mtrr_end) (mtrr_end:end) | ||
| 177 | * or | ||
| 178 | * (start:mtrr_start) (mtrr_start:end) | ||
| 179 | * depending on kind of overlap. | ||
| 180 | * Return the type for first region and a pointer to | ||
| 181 | * the start of second region so that caller will | ||
| 182 | * lookup again on the second region. | ||
| 183 | * Note: This way we handle multiple overlaps as well. | ||
| 184 | */ | ||
| 185 | if (start_state) | ||
| 186 | *partial_end = base + get_mtrr_size(mask); | ||
| 187 | else | ||
| 188 | *partial_end = base; | ||
| 189 | |||
| 190 | if (unlikely(*partial_end <= start)) { | ||
| 191 | WARN_ON(1); | ||
| 192 | *partial_end = start + PAGE_SIZE; | ||
| 193 | } | ||
| 194 | |||
| 195 | end = *partial_end - 1; /* end is inclusive */ | ||
| 196 | *repeat = 1; | ||
| 197 | } | ||
| 131 | 198 | ||
| 132 | if ((start & mask) != (base & mask)) | 199 | if ((start & mask) != (base & mask)) |
| 133 | continue; | 200 | continue; |
| @@ -138,21 +205,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
| 138 | continue; | 205 | continue; |
| 139 | } | 206 | } |
| 140 | 207 | ||
| 141 | if (prev_match == MTRR_TYPE_UNCACHABLE || | 208 | if (check_type_overlap(&prev_match, &curr_match)) |
| 142 | curr_match == MTRR_TYPE_UNCACHABLE) { | 209 | return curr_match; |
| 143 | return MTRR_TYPE_UNCACHABLE; | ||
| 144 | } | ||
| 145 | |||
| 146 | if ((prev_match == MTRR_TYPE_WRBACK && | ||
| 147 | curr_match == MTRR_TYPE_WRTHROUGH) || | ||
| 148 | (prev_match == MTRR_TYPE_WRTHROUGH && | ||
| 149 | curr_match == MTRR_TYPE_WRBACK)) { | ||
| 150 | prev_match = MTRR_TYPE_WRTHROUGH; | ||
| 151 | curr_match = MTRR_TYPE_WRTHROUGH; | ||
| 152 | } | ||
| 153 | |||
| 154 | if (prev_match != curr_match) | ||
| 155 | return MTRR_TYPE_UNCACHABLE; | ||
| 156 | } | 210 | } |
| 157 | 211 | ||
| 158 | if (mtrr_tom2) { | 212 | if (mtrr_tom2) { |
| @@ -166,6 +220,36 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
| 166 | return mtrr_state.def_type; | 220 | return mtrr_state.def_type; |
| 167 | } | 221 | } |
| 168 | 222 | ||
| 223 | /* | ||
| 224 | * Returns the effective MTRR type for the region | ||
| 225 | * Error return: | ||
| 226 | * 0xFF - when MTRR is not enabled | ||
| 227 | */ | ||
| 228 | u8 mtrr_type_lookup(u64 start, u64 end) | ||
| 229 | { | ||
| 230 | u8 type, prev_type; | ||
| 231 | int repeat; | ||
| 232 | u64 partial_end; | ||
| 233 | |||
| 234 | type = __mtrr_type_lookup(start, end, &partial_end, &repeat); | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Common path is with repeat = 0. | ||
| 238 | * However, we can have cases where [start:end] spans across some | ||
| 239 | * MTRR range. Do repeated lookups for that case here. | ||
| 240 | */ | ||
| 241 | while (repeat) { | ||
| 242 | prev_type = type; | ||
| 243 | start = partial_end; | ||
| 244 | type = __mtrr_type_lookup(start, end, &partial_end, &repeat); | ||
| 245 | |||
| 246 | if (check_type_overlap(&prev_type, &type)) | ||
| 247 | return type; | ||
| 248 | } | ||
| 249 | |||
| 250 | return type; | ||
| 251 | } | ||
| 252 | |||
| 169 | /* Get the MSR pair relating to a var range */ | 253 | /* Get the MSR pair relating to a var range */ |
| 170 | static void | 254 | static void |
| 171 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | 255 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3efdf2870a35..fe73c1844a9a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -102,6 +102,7 @@ struct cpu_hw_events { | |||
| 102 | */ | 102 | */ |
| 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
| 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 105 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 105 | int enabled; | 106 | int enabled; |
| 106 | 107 | ||
| 107 | int n_events; | 108 | int n_events; |
| @@ -530,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event) | |||
| 530 | /* | 531 | /* |
| 531 | * Setup the hardware configuration for a given attr_type | 532 | * Setup the hardware configuration for a given attr_type |
| 532 | */ | 533 | */ |
| 533 | static int __hw_perf_event_init(struct perf_event *event) | 534 | static int __x86_pmu_event_init(struct perf_event *event) |
| 534 | { | 535 | { |
| 535 | int err; | 536 | int err; |
| 536 | 537 | ||
| @@ -583,7 +584,7 @@ static void x86_pmu_disable_all(void) | |||
| 583 | } | 584 | } |
| 584 | } | 585 | } |
| 585 | 586 | ||
| 586 | void hw_perf_disable(void) | 587 | static void x86_pmu_disable(struct pmu *pmu) |
| 587 | { | 588 | { |
| 588 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 589 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 589 | 590 | ||
| @@ -618,7 +619,7 @@ static void x86_pmu_enable_all(int added) | |||
| 618 | } | 619 | } |
| 619 | } | 620 | } |
| 620 | 621 | ||
| 621 | static const struct pmu pmu; | 622 | static struct pmu pmu; |
| 622 | 623 | ||
| 623 | static inline int is_x86_event(struct perf_event *event) | 624 | static inline int is_x86_event(struct perf_event *event) |
| 624 | { | 625 | { |
| @@ -800,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, | |||
| 800 | hwc->last_tag == cpuc->tags[i]; | 801 | hwc->last_tag == cpuc->tags[i]; |
| 801 | } | 802 | } |
| 802 | 803 | ||
| 803 | static int x86_pmu_start(struct perf_event *event); | 804 | static void x86_pmu_start(struct perf_event *event, int flags); |
| 804 | static void x86_pmu_stop(struct perf_event *event); | 805 | static void x86_pmu_stop(struct perf_event *event, int flags); |
| 805 | 806 | ||
| 806 | void hw_perf_enable(void) | 807 | static void x86_pmu_enable(struct pmu *pmu) |
| 807 | { | 808 | { |
| 808 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 809 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 809 | struct perf_event *event; | 810 | struct perf_event *event; |
| @@ -839,7 +840,14 @@ void hw_perf_enable(void) | |||
| 839 | match_prev_assignment(hwc, cpuc, i)) | 840 | match_prev_assignment(hwc, cpuc, i)) |
| 840 | continue; | 841 | continue; |
| 841 | 842 | ||
| 842 | x86_pmu_stop(event); | 843 | /* |
| 844 | * Ensure we don't accidentally enable a stopped | ||
| 845 | * counter simply because we rescheduled. | ||
| 846 | */ | ||
| 847 | if (hwc->state & PERF_HES_STOPPED) | ||
| 848 | hwc->state |= PERF_HES_ARCH; | ||
| 849 | |||
| 850 | x86_pmu_stop(event, PERF_EF_UPDATE); | ||
| 843 | } | 851 | } |
| 844 | 852 | ||
| 845 | for (i = 0; i < cpuc->n_events; i++) { | 853 | for (i = 0; i < cpuc->n_events; i++) { |
| @@ -851,7 +859,10 @@ void hw_perf_enable(void) | |||
| 851 | else if (i < n_running) | 859 | else if (i < n_running) |
| 852 | continue; | 860 | continue; |
| 853 | 861 | ||
| 854 | x86_pmu_start(event); | 862 | if (hwc->state & PERF_HES_ARCH) |
| 863 | continue; | ||
| 864 | |||
| 865 | x86_pmu_start(event, PERF_EF_RELOAD); | ||
| 855 | } | 866 | } |
| 856 | cpuc->n_added = 0; | 867 | cpuc->n_added = 0; |
| 857 | perf_events_lapic_init(); | 868 | perf_events_lapic_init(); |
| @@ -952,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
| 952 | } | 963 | } |
| 953 | 964 | ||
| 954 | /* | 965 | /* |
| 955 | * activate a single event | 966 | * Add a single event to the PMU. |
| 956 | * | 967 | * |
| 957 | * The event is added to the group of enabled events | 968 | * The event is added to the group of enabled events |
| 958 | * but only if it can be scehduled with existing events. | 969 | * but only if it can be scehduled with existing events. |
| 959 | * | ||
| 960 | * Called with PMU disabled. If successful and return value 1, | ||
| 961 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
| 962 | */ | 970 | */ |
| 963 | static int x86_pmu_enable(struct perf_event *event) | 971 | static int x86_pmu_add(struct perf_event *event, int flags) |
| 964 | { | 972 | { |
| 965 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 973 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 966 | struct hw_perf_event *hwc; | 974 | struct hw_perf_event *hwc; |
| @@ -969,57 +977,67 @@ static int x86_pmu_enable(struct perf_event *event) | |||
| 969 | 977 | ||
| 970 | hwc = &event->hw; | 978 | hwc = &event->hw; |
| 971 | 979 | ||
| 980 | perf_pmu_disable(event->pmu); | ||
| 972 | n0 = cpuc->n_events; | 981 | n0 = cpuc->n_events; |
| 973 | n = collect_events(cpuc, event, false); | 982 | ret = n = collect_events(cpuc, event, false); |
| 974 | if (n < 0) | 983 | if (ret < 0) |
| 975 | return n; | 984 | goto out; |
| 985 | |||
| 986 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
| 987 | if (!(flags & PERF_EF_START)) | ||
| 988 | hwc->state |= PERF_HES_ARCH; | ||
| 976 | 989 | ||
| 977 | /* | 990 | /* |
| 978 | * If group events scheduling transaction was started, | 991 | * If group events scheduling transaction was started, |
| 979 | * skip the schedulability test here, it will be peformed | 992 | * skip the schedulability test here, it will be peformed |
| 980 | * at commit time(->commit_txn) as a whole | 993 | * at commit time (->commit_txn) as a whole |
| 981 | */ | 994 | */ |
| 982 | if (cpuc->group_flag & PERF_EVENT_TXN) | 995 | if (cpuc->group_flag & PERF_EVENT_TXN) |
| 983 | goto out; | 996 | goto done_collect; |
| 984 | 997 | ||
| 985 | ret = x86_pmu.schedule_events(cpuc, n, assign); | 998 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
| 986 | if (ret) | 999 | if (ret) |
| 987 | return ret; | 1000 | goto out; |
| 988 | /* | 1001 | /* |
| 989 | * copy new assignment, now we know it is possible | 1002 | * copy new assignment, now we know it is possible |
| 990 | * will be used by hw_perf_enable() | 1003 | * will be used by hw_perf_enable() |
| 991 | */ | 1004 | */ |
| 992 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1005 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
| 993 | 1006 | ||
| 994 | out: | 1007 | done_collect: |
| 995 | cpuc->n_events = n; | 1008 | cpuc->n_events = n; |
| 996 | cpuc->n_added += n - n0; | 1009 | cpuc->n_added += n - n0; |
| 997 | cpuc->n_txn += n - n0; | 1010 | cpuc->n_txn += n - n0; |
| 998 | 1011 | ||
| 999 | return 0; | 1012 | ret = 0; |
| 1013 | out: | ||
| 1014 | perf_pmu_enable(event->pmu); | ||
| 1015 | return ret; | ||
| 1000 | } | 1016 | } |
| 1001 | 1017 | ||
| 1002 | static int x86_pmu_start(struct perf_event *event) | 1018 | static void x86_pmu_start(struct perf_event *event, int flags) |
| 1003 | { | 1019 | { |
| 1004 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1020 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1005 | int idx = event->hw.idx; | 1021 | int idx = event->hw.idx; |
| 1006 | 1022 | ||
| 1007 | if (idx == -1) | 1023 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) |
| 1008 | return -EAGAIN; | 1024 | return; |
| 1025 | |||
| 1026 | if (WARN_ON_ONCE(idx == -1)) | ||
| 1027 | return; | ||
| 1028 | |||
| 1029 | if (flags & PERF_EF_RELOAD) { | ||
| 1030 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
| 1031 | x86_perf_event_set_period(event); | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | event->hw.state = 0; | ||
| 1009 | 1035 | ||
| 1010 | x86_perf_event_set_period(event); | ||
| 1011 | cpuc->events[idx] = event; | 1036 | cpuc->events[idx] = event; |
| 1012 | __set_bit(idx, cpuc->active_mask); | 1037 | __set_bit(idx, cpuc->active_mask); |
| 1038 | __set_bit(idx, cpuc->running); | ||
| 1013 | x86_pmu.enable(event); | 1039 | x86_pmu.enable(event); |
| 1014 | perf_event_update_userpage(event); | 1040 | perf_event_update_userpage(event); |
| 1015 | |||
| 1016 | return 0; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | static void x86_pmu_unthrottle(struct perf_event *event) | ||
| 1020 | { | ||
| 1021 | int ret = x86_pmu_start(event); | ||
| 1022 | WARN_ON_ONCE(ret); | ||
| 1023 | } | 1041 | } |
| 1024 | 1042 | ||
| 1025 | void perf_event_print_debug(void) | 1043 | void perf_event_print_debug(void) |
| @@ -1076,27 +1094,29 @@ void perf_event_print_debug(void) | |||
| 1076 | local_irq_restore(flags); | 1094 | local_irq_restore(flags); |
| 1077 | } | 1095 | } |
| 1078 | 1096 | ||
| 1079 | static void x86_pmu_stop(struct perf_event *event) | 1097 | static void x86_pmu_stop(struct perf_event *event, int flags) |
| 1080 | { | 1098 | { |
| 1081 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1099 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1082 | struct hw_perf_event *hwc = &event->hw; | 1100 | struct hw_perf_event *hwc = &event->hw; |
| 1083 | int idx = hwc->idx; | ||
| 1084 | 1101 | ||
| 1085 | if (!__test_and_clear_bit(idx, cpuc->active_mask)) | 1102 | if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { |
| 1086 | return; | 1103 | x86_pmu.disable(event); |
| 1087 | 1104 | cpuc->events[hwc->idx] = NULL; | |
| 1088 | x86_pmu.disable(event); | 1105 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); |
| 1089 | 1106 | hwc->state |= PERF_HES_STOPPED; | |
| 1090 | /* | 1107 | } |
| 1091 | * Drain the remaining delta count out of a event | ||
| 1092 | * that we are disabling: | ||
| 1093 | */ | ||
| 1094 | x86_perf_event_update(event); | ||
| 1095 | 1108 | ||
| 1096 | cpuc->events[idx] = NULL; | 1109 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { |
| 1110 | /* | ||
| 1111 | * Drain the remaining delta count out of a event | ||
| 1112 | * that we are disabling: | ||
| 1113 | */ | ||
| 1114 | x86_perf_event_update(event); | ||
| 1115 | hwc->state |= PERF_HES_UPTODATE; | ||
| 1116 | } | ||
| 1097 | } | 1117 | } |
| 1098 | 1118 | ||
| 1099 | static void x86_pmu_disable(struct perf_event *event) | 1119 | static void x86_pmu_del(struct perf_event *event, int flags) |
| 1100 | { | 1120 | { |
| 1101 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1121 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1102 | int i; | 1122 | int i; |
| @@ -1109,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
| 1109 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1129 | if (cpuc->group_flag & PERF_EVENT_TXN) |
| 1110 | return; | 1130 | return; |
| 1111 | 1131 | ||
| 1112 | x86_pmu_stop(event); | 1132 | x86_pmu_stop(event, PERF_EF_UPDATE); |
| 1113 | 1133 | ||
| 1114 | for (i = 0; i < cpuc->n_events; i++) { | 1134 | for (i = 0; i < cpuc->n_events; i++) { |
| 1115 | if (event == cpuc->event_list[i]) { | 1135 | if (event == cpuc->event_list[i]) { |
| @@ -1132,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1132 | struct perf_sample_data data; | 1152 | struct perf_sample_data data; |
| 1133 | struct cpu_hw_events *cpuc; | 1153 | struct cpu_hw_events *cpuc; |
| 1134 | struct perf_event *event; | 1154 | struct perf_event *event; |
| 1135 | struct hw_perf_event *hwc; | ||
| 1136 | int idx, handled = 0; | 1155 | int idx, handled = 0; |
| 1137 | u64 val; | 1156 | u64 val; |
| 1138 | 1157 | ||
| @@ -1141,11 +1160,18 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1141 | cpuc = &__get_cpu_var(cpu_hw_events); | 1160 | cpuc = &__get_cpu_var(cpu_hw_events); |
| 1142 | 1161 | ||
| 1143 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1162 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1144 | if (!test_bit(idx, cpuc->active_mask)) | 1163 | if (!test_bit(idx, cpuc->active_mask)) { |
| 1164 | /* | ||
| 1165 | * Though we deactivated the counter some cpus | ||
| 1166 | * might still deliver spurious interrupts still | ||
| 1167 | * in flight. Catch them: | ||
| 1168 | */ | ||
| 1169 | if (__test_and_clear_bit(idx, cpuc->running)) | ||
| 1170 | handled++; | ||
| 1145 | continue; | 1171 | continue; |
| 1172 | } | ||
| 1146 | 1173 | ||
| 1147 | event = cpuc->events[idx]; | 1174 | event = cpuc->events[idx]; |
| 1148 | hwc = &event->hw; | ||
| 1149 | 1175 | ||
| 1150 | val = x86_perf_event_update(event); | 1176 | val = x86_perf_event_update(event); |
| 1151 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | 1177 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
| @@ -1161,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1161 | continue; | 1187 | continue; |
| 1162 | 1188 | ||
| 1163 | if (perf_event_overflow(event, 1, &data, regs)) | 1189 | if (perf_event_overflow(event, 1, &data, regs)) |
| 1164 | x86_pmu_stop(event); | 1190 | x86_pmu_stop(event, 0); |
| 1165 | } | 1191 | } |
| 1166 | 1192 | ||
| 1167 | if (handled) | 1193 | if (handled) |
| @@ -1170,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1170 | return handled; | 1196 | return handled; |
| 1171 | } | 1197 | } |
| 1172 | 1198 | ||
| 1173 | void smp_perf_pending_interrupt(struct pt_regs *regs) | ||
| 1174 | { | ||
| 1175 | irq_enter(); | ||
| 1176 | ack_APIC_irq(); | ||
| 1177 | inc_irq_stat(apic_pending_irqs); | ||
| 1178 | perf_event_do_pending(); | ||
| 1179 | irq_exit(); | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | void set_perf_event_pending(void) | ||
| 1183 | { | ||
| 1184 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 1185 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
| 1186 | return; | ||
| 1187 | |||
| 1188 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | ||
| 1189 | #endif | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | void perf_events_lapic_init(void) | 1199 | void perf_events_lapic_init(void) |
| 1193 | { | 1200 | { |
| 1194 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1201 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
| @@ -1378,7 +1385,6 @@ void __init init_hw_perf_events(void) | |||
| 1378 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | 1385 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
| 1379 | } | 1386 | } |
| 1380 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; | 1387 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
| 1381 | perf_max_events = x86_pmu.num_counters; | ||
| 1382 | 1388 | ||
| 1383 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | 1389 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
| 1384 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1390 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
| @@ -1414,6 +1420,7 @@ void __init init_hw_perf_events(void) | |||
| 1414 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); | 1420 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
| 1415 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); | 1421 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
| 1416 | 1422 | ||
| 1423 | perf_pmu_register(&pmu); | ||
| 1417 | perf_cpu_notifier(x86_pmu_notifier); | 1424 | perf_cpu_notifier(x86_pmu_notifier); |
| 1418 | } | 1425 | } |
| 1419 | 1426 | ||
| @@ -1427,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
| 1427 | * Set the flag to make pmu::enable() not perform the | 1434 | * Set the flag to make pmu::enable() not perform the |
| 1428 | * schedulability test, it will be performed at commit time | 1435 | * schedulability test, it will be performed at commit time |
| 1429 | */ | 1436 | */ |
| 1430 | static void x86_pmu_start_txn(const struct pmu *pmu) | 1437 | static void x86_pmu_start_txn(struct pmu *pmu) |
| 1431 | { | 1438 | { |
| 1432 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1439 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1433 | 1440 | ||
| 1441 | perf_pmu_disable(pmu); | ||
| 1434 | cpuc->group_flag |= PERF_EVENT_TXN; | 1442 | cpuc->group_flag |= PERF_EVENT_TXN; |
| 1435 | cpuc->n_txn = 0; | 1443 | cpuc->n_txn = 0; |
| 1436 | } | 1444 | } |
| @@ -1440,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) | |||
| 1440 | * Clear the flag and pmu::enable() will perform the | 1448 | * Clear the flag and pmu::enable() will perform the |
| 1441 | * schedulability test. | 1449 | * schedulability test. |
| 1442 | */ | 1450 | */ |
| 1443 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | 1451 | static void x86_pmu_cancel_txn(struct pmu *pmu) |
| 1444 | { | 1452 | { |
| 1445 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1453 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1446 | 1454 | ||
| @@ -1450,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
| 1450 | */ | 1458 | */ |
| 1451 | cpuc->n_added -= cpuc->n_txn; | 1459 | cpuc->n_added -= cpuc->n_txn; |
| 1452 | cpuc->n_events -= cpuc->n_txn; | 1460 | cpuc->n_events -= cpuc->n_txn; |
| 1461 | perf_pmu_enable(pmu); | ||
| 1453 | } | 1462 | } |
| 1454 | 1463 | ||
| 1455 | /* | 1464 | /* |
| @@ -1457,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) | |||
| 1457 | * Perform the group schedulability test as a whole | 1466 | * Perform the group schedulability test as a whole |
| 1458 | * Return 0 if success | 1467 | * Return 0 if success |
| 1459 | */ | 1468 | */ |
| 1460 | static int x86_pmu_commit_txn(const struct pmu *pmu) | 1469 | static int x86_pmu_commit_txn(struct pmu *pmu) |
| 1461 | { | 1470 | { |
| 1462 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1471 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1463 | int assign[X86_PMC_IDX_MAX]; | 1472 | int assign[X86_PMC_IDX_MAX]; |
| @@ -1479,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) | |||
| 1479 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1488 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
| 1480 | 1489 | ||
| 1481 | cpuc->group_flag &= ~PERF_EVENT_TXN; | 1490 | cpuc->group_flag &= ~PERF_EVENT_TXN; |
| 1482 | 1491 | perf_pmu_enable(pmu); | |
| 1483 | return 0; | 1492 | return 0; |
| 1484 | } | 1493 | } |
| 1485 | 1494 | ||
| 1486 | static const struct pmu pmu = { | ||
| 1487 | .enable = x86_pmu_enable, | ||
| 1488 | .disable = x86_pmu_disable, | ||
| 1489 | .start = x86_pmu_start, | ||
| 1490 | .stop = x86_pmu_stop, | ||
| 1491 | .read = x86_pmu_read, | ||
| 1492 | .unthrottle = x86_pmu_unthrottle, | ||
| 1493 | .start_txn = x86_pmu_start_txn, | ||
| 1494 | .cancel_txn = x86_pmu_cancel_txn, | ||
| 1495 | .commit_txn = x86_pmu_commit_txn, | ||
| 1496 | }; | ||
| 1497 | |||
| 1498 | /* | 1495 | /* |
| 1499 | * validate that we can schedule this event | 1496 | * validate that we can schedule this event |
| 1500 | */ | 1497 | */ |
| @@ -1569,12 +1566,22 @@ out: | |||
| 1569 | return ret; | 1566 | return ret; |
| 1570 | } | 1567 | } |
| 1571 | 1568 | ||
| 1572 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1569 | int x86_pmu_event_init(struct perf_event *event) |
| 1573 | { | 1570 | { |
| 1574 | const struct pmu *tmp; | 1571 | struct pmu *tmp; |
| 1575 | int err; | 1572 | int err; |
| 1576 | 1573 | ||
| 1577 | err = __hw_perf_event_init(event); | 1574 | switch (event->attr.type) { |
| 1575 | case PERF_TYPE_RAW: | ||
| 1576 | case PERF_TYPE_HARDWARE: | ||
| 1577 | case PERF_TYPE_HW_CACHE: | ||
| 1578 | break; | ||
| 1579 | |||
| 1580 | default: | ||
| 1581 | return -ENOENT; | ||
| 1582 | } | ||
| 1583 | |||
| 1584 | err = __x86_pmu_event_init(event); | ||
| 1578 | if (!err) { | 1585 | if (!err) { |
| 1579 | /* | 1586 | /* |
| 1580 | * we temporarily connect event to its pmu | 1587 | * we temporarily connect event to its pmu |
| @@ -1594,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
| 1594 | if (err) { | 1601 | if (err) { |
| 1595 | if (event->destroy) | 1602 | if (event->destroy) |
| 1596 | event->destroy(event); | 1603 | event->destroy(event); |
| 1597 | return ERR_PTR(err); | ||
| 1598 | } | 1604 | } |
| 1599 | 1605 | ||
| 1600 | return &pmu; | 1606 | return err; |
| 1601 | } | 1607 | } |
| 1602 | 1608 | ||
| 1603 | /* | 1609 | static struct pmu pmu = { |
| 1604 | * callchain support | 1610 | .pmu_enable = x86_pmu_enable, |
| 1605 | */ | 1611 | .pmu_disable = x86_pmu_disable, |
| 1606 | 1612 | ||
| 1607 | static inline | 1613 | .event_init = x86_pmu_event_init, |
| 1608 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) | ||
| 1609 | { | ||
| 1610 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
| 1611 | entry->ip[entry->nr++] = ip; | ||
| 1612 | } | ||
| 1613 | 1614 | ||
| 1614 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | 1615 | .add = x86_pmu_add, |
| 1615 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | 1616 | .del = x86_pmu_del, |
| 1617 | .start = x86_pmu_start, | ||
| 1618 | .stop = x86_pmu_stop, | ||
| 1619 | .read = x86_pmu_read, | ||
| 1616 | 1620 | ||
| 1621 | .start_txn = x86_pmu_start_txn, | ||
| 1622 | .cancel_txn = x86_pmu_cancel_txn, | ||
| 1623 | .commit_txn = x86_pmu_commit_txn, | ||
| 1624 | }; | ||
| 1625 | |||
| 1626 | /* | ||
| 1627 | * callchain support | ||
| 1628 | */ | ||
| 1617 | 1629 | ||
| 1618 | static void | 1630 | static void |
| 1619 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | 1631 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) |
| @@ -1635,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
| 1635 | { | 1647 | { |
| 1636 | struct perf_callchain_entry *entry = data; | 1648 | struct perf_callchain_entry *entry = data; |
| 1637 | 1649 | ||
| 1638 | callchain_store(entry, addr); | 1650 | perf_callchain_store(entry, addr); |
| 1639 | } | 1651 | } |
| 1640 | 1652 | ||
| 1641 | static const struct stacktrace_ops backtrace_ops = { | 1653 | static const struct stacktrace_ops backtrace_ops = { |
| @@ -1646,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = { | |||
| 1646 | .walk_stack = print_context_stack_bp, | 1658 | .walk_stack = print_context_stack_bp, |
| 1647 | }; | 1659 | }; |
| 1648 | 1660 | ||
| 1649 | static void | 1661 | void |
| 1650 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1662 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
| 1651 | { | 1663 | { |
| 1652 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 1664 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { |
| 1653 | callchain_store(entry, regs->ip); | 1665 | /* TODO: We don't support guest os callchain now */ |
| 1666 | return; | ||
| 1667 | } | ||
| 1668 | |||
| 1669 | perf_callchain_store(entry, regs->ip); | ||
| 1654 | 1670 | ||
| 1655 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1671 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
| 1656 | } | 1672 | } |
| @@ -1679,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
| 1679 | if (fp < compat_ptr(regs->sp)) | 1695 | if (fp < compat_ptr(regs->sp)) |
| 1680 | break; | 1696 | break; |
| 1681 | 1697 | ||
| 1682 | callchain_store(entry, frame.return_address); | 1698 | perf_callchain_store(entry, frame.return_address); |
| 1683 | fp = compat_ptr(frame.next_frame); | 1699 | fp = compat_ptr(frame.next_frame); |
| 1684 | } | 1700 | } |
| 1685 | return 1; | 1701 | return 1; |
| @@ -1692,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
| 1692 | } | 1708 | } |
| 1693 | #endif | 1709 | #endif |
| 1694 | 1710 | ||
| 1695 | static void | 1711 | void |
| 1696 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1712 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
| 1697 | { | 1713 | { |
| 1698 | struct stack_frame frame; | 1714 | struct stack_frame frame; |
| 1699 | const void __user *fp; | 1715 | const void __user *fp; |
| 1700 | 1716 | ||
| 1701 | if (!user_mode(regs)) | 1717 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { |
| 1702 | regs = task_pt_regs(current); | 1718 | /* TODO: We don't support guest os callchain now */ |
| 1719 | return; | ||
| 1720 | } | ||
| 1703 | 1721 | ||
| 1704 | fp = (void __user *)regs->bp; | 1722 | fp = (void __user *)regs->bp; |
| 1705 | 1723 | ||
| 1706 | callchain_store(entry, PERF_CONTEXT_USER); | 1724 | perf_callchain_store(entry, regs->ip); |
| 1707 | callchain_store(entry, regs->ip); | ||
| 1708 | 1725 | ||
| 1709 | if (perf_callchain_user32(regs, entry)) | 1726 | if (perf_callchain_user32(regs, entry)) |
| 1710 | return; | 1727 | return; |
| @@ -1721,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
| 1721 | if ((unsigned long)fp < regs->sp) | 1738 | if ((unsigned long)fp < regs->sp) |
| 1722 | break; | 1739 | break; |
| 1723 | 1740 | ||
| 1724 | callchain_store(entry, frame.return_address); | 1741 | perf_callchain_store(entry, frame.return_address); |
| 1725 | fp = frame.next_frame; | 1742 | fp = frame.next_frame; |
| 1726 | } | 1743 | } |
| 1727 | } | 1744 | } |
| 1728 | 1745 | ||
| 1729 | static void | ||
| 1730 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
| 1731 | { | ||
| 1732 | int is_user; | ||
| 1733 | |||
| 1734 | if (!regs) | ||
| 1735 | return; | ||
| 1736 | |||
| 1737 | is_user = user_mode(regs); | ||
| 1738 | |||
| 1739 | if (is_user && current->state != TASK_RUNNING) | ||
| 1740 | return; | ||
| 1741 | |||
| 1742 | if (!is_user) | ||
| 1743 | perf_callchain_kernel(regs, entry); | ||
| 1744 | |||
| 1745 | if (current->mm) | ||
| 1746 | perf_callchain_user(regs, entry); | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
| 1750 | { | ||
| 1751 | struct perf_callchain_entry *entry; | ||
| 1752 | |||
| 1753 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
| 1754 | /* TODO: We don't support guest os callchain now */ | ||
| 1755 | return NULL; | ||
| 1756 | } | ||
| 1757 | |||
| 1758 | if (in_nmi()) | ||
| 1759 | entry = &__get_cpu_var(pmc_nmi_entry); | ||
| 1760 | else | ||
| 1761 | entry = &__get_cpu_var(pmc_irq_entry); | ||
| 1762 | |||
| 1763 | entry->nr = 0; | ||
| 1764 | |||
| 1765 | perf_do_callchain(regs, entry); | ||
| 1766 | |||
| 1767 | return entry; | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1746 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
| 1771 | { | 1747 | { |
| 1772 | unsigned long ip; | 1748 | unsigned long ip; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c2897b7b4a3b..46d58448c3af 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
| @@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
| 52 | [ C(DTLB) ] = { | 52 | [ C(DTLB) ] = { |
| 53 | [ C(OP_READ) ] = { | 53 | [ C(OP_READ) ] = { |
| 54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | 54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
| 55 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | 55 | [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ |
| 56 | }, | 56 | }, |
| 57 | [ C(OP_WRITE) ] = { | 57 | [ C(OP_WRITE) ] = { |
| 58 | [ C(RESULT_ACCESS) ] = 0, | 58 | [ C(RESULT_ACCESS) ] = 0, |
| @@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
| 66 | [ C(ITLB) ] = { | 66 | [ C(ITLB) ] = { |
| 67 | [ C(OP_READ) ] = { | 67 | [ C(OP_READ) ] = { |
| 68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | 68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ |
| 69 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | 69 | [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ |
| 70 | }, | 70 | }, |
| 71 | [ C(OP_WRITE) ] = { | 71 | [ C(OP_WRITE) ] = { |
| 72 | [ C(RESULT_ACCESS) ] = -1, | 72 | [ C(RESULT_ACCESS) ] = -1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90012d2..c8f5c088cad1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
| 713 | struct cpu_hw_events *cpuc; | 713 | struct cpu_hw_events *cpuc; |
| 714 | int bit, loops; | 714 | int bit, loops; |
| 715 | u64 status; | 715 | u64 status; |
| 716 | int handled = 0; | 716 | int handled; |
| 717 | 717 | ||
| 718 | perf_sample_data_init(&data, 0); | 718 | perf_sample_data_init(&data, 0); |
| 719 | 719 | ||
| 720 | cpuc = &__get_cpu_var(cpu_hw_events); | 720 | cpuc = &__get_cpu_var(cpu_hw_events); |
| 721 | 721 | ||
| 722 | intel_pmu_disable_all(); | 722 | intel_pmu_disable_all(); |
| 723 | intel_pmu_drain_bts_buffer(); | 723 | handled = intel_pmu_drain_bts_buffer(); |
| 724 | status = intel_pmu_get_status(); | 724 | status = intel_pmu_get_status(); |
| 725 | if (!status) { | 725 | if (!status) { |
| 726 | intel_pmu_enable_all(0); | 726 | intel_pmu_enable_all(0); |
| 727 | return 0; | 727 | return handled; |
| 728 | } | 728 | } |
| 729 | 729 | ||
| 730 | loops = 0; | 730 | loops = 0; |
| @@ -763,7 +763,7 @@ again: | |||
| 763 | data.period = event->hw.last_period; | 763 | data.period = event->hw.last_period; |
| 764 | 764 | ||
| 765 | if (perf_event_overflow(event, 1, &data, regs)) | 765 | if (perf_event_overflow(event, 1, &data, regs)) |
| 766 | x86_pmu_stop(event); | 766 | x86_pmu_stop(event, 0); |
| 767 | } | 767 | } |
| 768 | 768 | ||
| 769 | /* | 769 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1311cd..4977f9c400e5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
| @@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void) | |||
| 214 | update_debugctlmsr(debugctlmsr); | 214 | update_debugctlmsr(debugctlmsr); |
| 215 | } | 215 | } |
| 216 | 216 | ||
| 217 | static void intel_pmu_drain_bts_buffer(void) | 217 | static int intel_pmu_drain_bts_buffer(void) |
| 218 | { | 218 | { |
| 219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 220 | struct debug_store *ds = cpuc->ds; | 220 | struct debug_store *ds = cpuc->ds; |
| @@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void) | |||
| 231 | struct pt_regs regs; | 231 | struct pt_regs regs; |
| 232 | 232 | ||
| 233 | if (!event) | 233 | if (!event) |
| 234 | return; | 234 | return 0; |
| 235 | 235 | ||
| 236 | if (!ds) | 236 | if (!ds) |
| 237 | return; | 237 | return 0; |
| 238 | 238 | ||
| 239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | 239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; |
| 240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | 240 | top = (struct bts_record *)(unsigned long)ds->bts_index; |
| 241 | 241 | ||
| 242 | if (top <= at) | 242 | if (top <= at) |
| 243 | return; | 243 | return 0; |
| 244 | 244 | ||
| 245 | ds->bts_index = ds->bts_buffer_base; | 245 | ds->bts_index = ds->bts_buffer_base; |
| 246 | 246 | ||
| @@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void) | |||
| 256 | perf_prepare_sample(&header, &data, event, ®s); | 256 | perf_prepare_sample(&header, &data, event, ®s); |
| 257 | 257 | ||
| 258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | 258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) |
| 259 | return; | 259 | return 1; |
| 260 | 260 | ||
| 261 | for (; at < top; at++) { | 261 | for (; at < top; at++) { |
| 262 | data.ip = at->from; | 262 | data.ip = at->from; |
| @@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void) | |||
| 270 | /* There's new data available. */ | 270 | /* There's new data available. */ |
| 271 | event->hw.interrupts++; | 271 | event->hw.interrupts++; |
| 272 | event->pending_kill = POLL_IN; | 272 | event->pending_kill = POLL_IN; |
| 273 | return 1; | ||
| 273 | } | 274 | } |
| 274 | 275 | ||
| 275 | /* | 276 | /* |
| @@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
| 491 | regs.flags &= ~PERF_EFLAGS_EXACT; | 492 | regs.flags &= ~PERF_EFLAGS_EXACT; |
| 492 | 493 | ||
| 493 | if (perf_event_overflow(event, 1, &data, ®s)) | 494 | if (perf_event_overflow(event, 1, &data, ®s)) |
| 494 | x86_pmu_stop(event); | 495 | x86_pmu_stop(event, 0); |
| 495 | } | 496 | } |
| 496 | 497 | ||
| 497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | 498 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index b560db3305be..81400b93e694 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | struct p4_event_bind { | 18 | struct p4_event_bind { |
| 19 | unsigned int opcode; /* Event code and ESCR selector */ | 19 | unsigned int opcode; /* Event code and ESCR selector */ |
| 20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | 20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ |
| 21 | unsigned int escr_emask; /* valid ESCR EventMask bits */ | ||
| 22 | unsigned int shared; /* event is shared across threads */ | ||
| 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | 23 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ |
| 22 | }; | 24 | }; |
| 23 | 25 | ||
| @@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = { | |||
| 66 | [P4_EVENT_TC_DELIVER_MODE] = { | 68 | [P4_EVENT_TC_DELIVER_MODE] = { |
| 67 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | 69 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), |
| 68 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | 70 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, |
| 71 | .escr_emask = | ||
| 72 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | | ||
| 73 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | | ||
| 74 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | | ||
| 75 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | | ||
| 76 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | | ||
| 77 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | | ||
| 78 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), | ||
| 79 | .shared = 1, | ||
| 69 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 80 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
| 70 | }, | 81 | }, |
| 71 | [P4_EVENT_BPU_FETCH_REQUEST] = { | 82 | [P4_EVENT_BPU_FETCH_REQUEST] = { |
| 72 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | 83 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), |
| 73 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | 84 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, |
| 85 | .escr_emask = | ||
| 86 | P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), | ||
| 74 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 87 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 75 | }, | 88 | }, |
| 76 | [P4_EVENT_ITLB_REFERENCE] = { | 89 | [P4_EVENT_ITLB_REFERENCE] = { |
| 77 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | 90 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), |
| 78 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | 91 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, |
| 92 | .escr_emask = | ||
| 93 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | | ||
| 94 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | | ||
| 95 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), | ||
| 79 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 96 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 80 | }, | 97 | }, |
| 81 | [P4_EVENT_MEMORY_CANCEL] = { | 98 | [P4_EVENT_MEMORY_CANCEL] = { |
| 82 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | 99 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), |
| 83 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | 100 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, |
| 101 | .escr_emask = | ||
| 102 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | | ||
| 103 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), | ||
| 84 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 104 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 85 | }, | 105 | }, |
| 86 | [P4_EVENT_MEMORY_COMPLETE] = { | 106 | [P4_EVENT_MEMORY_COMPLETE] = { |
| 87 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | 107 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), |
| 88 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | 108 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, |
| 109 | .escr_emask = | ||
| 110 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | | ||
| 111 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), | ||
| 89 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 112 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 90 | }, | 113 | }, |
| 91 | [P4_EVENT_LOAD_PORT_REPLAY] = { | 114 | [P4_EVENT_LOAD_PORT_REPLAY] = { |
| 92 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | 115 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), |
| 93 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | 116 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, |
| 117 | .escr_emask = | ||
| 118 | P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), | ||
| 94 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 119 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 95 | }, | 120 | }, |
| 96 | [P4_EVENT_STORE_PORT_REPLAY] = { | 121 | [P4_EVENT_STORE_PORT_REPLAY] = { |
| 97 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | 122 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), |
| 98 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | 123 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, |
| 124 | .escr_emask = | ||
| 125 | P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), | ||
| 99 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 126 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 100 | }, | 127 | }, |
| 101 | [P4_EVENT_MOB_LOAD_REPLAY] = { | 128 | [P4_EVENT_MOB_LOAD_REPLAY] = { |
| 102 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | 129 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), |
| 103 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | 130 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, |
| 131 | .escr_emask = | ||
| 132 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | | ||
| 133 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | | ||
| 134 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | | ||
| 135 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), | ||
| 104 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 136 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 105 | }, | 137 | }, |
| 106 | [P4_EVENT_PAGE_WALK_TYPE] = { | 138 | [P4_EVENT_PAGE_WALK_TYPE] = { |
| 107 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | 139 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), |
| 108 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | 140 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, |
| 141 | .escr_emask = | ||
| 142 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | | ||
| 143 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), | ||
| 144 | .shared = 1, | ||
| 109 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 145 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 110 | }, | 146 | }, |
| 111 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | 147 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { |
| 112 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | 148 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), |
| 113 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | 149 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, |
| 150 | .escr_emask = | ||
| 151 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
| 152 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
| 153 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
| 154 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
| 155 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
| 156 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | | ||
| 157 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
| 158 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
| 159 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), | ||
| 114 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 160 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 115 | }, | 161 | }, |
| 116 | [P4_EVENT_IOQ_ALLOCATION] = { | 162 | [P4_EVENT_IOQ_ALLOCATION] = { |
| 117 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | 163 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), |
| 118 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 164 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
| 165 | .escr_emask = | ||
| 166 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | | ||
| 167 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | | ||
| 168 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | | ||
| 169 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | | ||
| 170 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | | ||
| 171 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | | ||
| 172 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | | ||
| 173 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | | ||
| 174 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | | ||
| 175 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | | ||
| 176 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), | ||
| 119 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 177 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 120 | }, | 178 | }, |
| 121 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | 179 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ |
| 122 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | 180 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), |
| 123 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | 181 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, |
| 182 | .escr_emask = | ||
| 183 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | | ||
| 184 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | | ||
| 185 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | | ||
| 186 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | | ||
| 187 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | | ||
| 188 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | | ||
| 189 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | | ||
| 190 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | | ||
| 191 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | | ||
| 192 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | | ||
| 193 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), | ||
| 124 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | 194 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
| 125 | }, | 195 | }, |
| 126 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | 196 | [P4_EVENT_FSB_DATA_ACTIVITY] = { |
| 127 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | 197 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), |
| 128 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 198 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
| 199 | .escr_emask = | ||
| 200 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
| 201 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | | ||
| 202 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | | ||
| 203 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | | ||
| 204 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | | ||
| 205 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), | ||
| 206 | .shared = 1, | ||
| 129 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 207 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 130 | }, | 208 | }, |
| 131 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | 209 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ |
| 132 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | 210 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), |
| 133 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | 211 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, |
| 212 | .escr_emask = | ||
| 213 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | | ||
| 214 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | | ||
| 215 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | | ||
| 216 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | | ||
| 217 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | | ||
| 218 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | | ||
| 219 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | | ||
| 220 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | | ||
| 221 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | | ||
| 222 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | | ||
| 223 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | | ||
| 224 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | | ||
| 225 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), | ||
| 134 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | 226 | .cntr = { {0, -1, -1}, {1, -1, -1} }, |
| 135 | }, | 227 | }, |
| 136 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | 228 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ |
| 137 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | 229 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), |
| 138 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | 230 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, |
| 231 | .escr_emask = | ||
| 232 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | | ||
| 233 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | | ||
| 234 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | | ||
| 235 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | | ||
| 236 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | | ||
| 237 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | | ||
| 238 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | | ||
| 239 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | | ||
| 240 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | | ||
| 241 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | | ||
| 242 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | | ||
| 243 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | | ||
| 244 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), | ||
| 139 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | 245 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
| 140 | }, | 246 | }, |
| 141 | [P4_EVENT_SSE_INPUT_ASSIST] = { | 247 | [P4_EVENT_SSE_INPUT_ASSIST] = { |
| 142 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | 248 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), |
| 143 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 249 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 250 | .escr_emask = | ||
| 251 | P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), | ||
| 252 | .shared = 1, | ||
| 144 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 253 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 145 | }, | 254 | }, |
| 146 | [P4_EVENT_PACKED_SP_UOP] = { | 255 | [P4_EVENT_PACKED_SP_UOP] = { |
| 147 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | 256 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), |
| 148 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 257 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 258 | .escr_emask = | ||
| 259 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), | ||
| 260 | .shared = 1, | ||
| 149 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 261 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 150 | }, | 262 | }, |
| 151 | [P4_EVENT_PACKED_DP_UOP] = { | 263 | [P4_EVENT_PACKED_DP_UOP] = { |
| 152 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | 264 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), |
| 153 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 265 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 266 | .escr_emask = | ||
| 267 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), | ||
| 268 | .shared = 1, | ||
| 154 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 269 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 155 | }, | 270 | }, |
| 156 | [P4_EVENT_SCALAR_SP_UOP] = { | 271 | [P4_EVENT_SCALAR_SP_UOP] = { |
| 157 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | 272 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), |
| 158 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 273 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 274 | .escr_emask = | ||
| 275 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), | ||
| 276 | .shared = 1, | ||
| 159 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 277 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 160 | }, | 278 | }, |
| 161 | [P4_EVENT_SCALAR_DP_UOP] = { | 279 | [P4_EVENT_SCALAR_DP_UOP] = { |
| 162 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | 280 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), |
| 163 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 281 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 282 | .escr_emask = | ||
| 283 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), | ||
| 284 | .shared = 1, | ||
| 164 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 285 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 165 | }, | 286 | }, |
| 166 | [P4_EVENT_64BIT_MMX_UOP] = { | 287 | [P4_EVENT_64BIT_MMX_UOP] = { |
| 167 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | 288 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), |
| 168 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 289 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 290 | .escr_emask = | ||
| 291 | P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), | ||
| 292 | .shared = 1, | ||
| 169 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 293 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 170 | }, | 294 | }, |
| 171 | [P4_EVENT_128BIT_MMX_UOP] = { | 295 | [P4_EVENT_128BIT_MMX_UOP] = { |
| 172 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | 296 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), |
| 173 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 297 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 298 | .escr_emask = | ||
| 299 | P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), | ||
| 300 | .shared = 1, | ||
| 174 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 301 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 175 | }, | 302 | }, |
| 176 | [P4_EVENT_X87_FP_UOP] = { | 303 | [P4_EVENT_X87_FP_UOP] = { |
| 177 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | 304 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), |
| 178 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | 305 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
| 306 | .escr_emask = | ||
| 307 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), | ||
| 308 | .shared = 1, | ||
| 179 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 309 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 180 | }, | 310 | }, |
| 181 | [P4_EVENT_TC_MISC] = { | 311 | [P4_EVENT_TC_MISC] = { |
| 182 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | 312 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), |
| 183 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | 313 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, |
| 314 | .escr_emask = | ||
| 315 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), | ||
| 184 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 316 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
| 185 | }, | 317 | }, |
| 186 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | 318 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { |
| 187 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | 319 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), |
| 188 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 320 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
| 321 | .escr_emask = | ||
| 322 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), | ||
| 189 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 323 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 190 | }, | 324 | }, |
| 191 | [P4_EVENT_TC_MS_XFER] = { | 325 | [P4_EVENT_TC_MS_XFER] = { |
| 192 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | 326 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), |
| 193 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | 327 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, |
| 328 | .escr_emask = | ||
| 329 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), | ||
| 194 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 330 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
| 195 | }, | 331 | }, |
| 196 | [P4_EVENT_UOP_QUEUE_WRITES] = { | 332 | [P4_EVENT_UOP_QUEUE_WRITES] = { |
| 197 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | 333 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), |
| 198 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | 334 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, |
| 335 | .escr_emask = | ||
| 336 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | | ||
| 337 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | | ||
| 338 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), | ||
| 199 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 339 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
| 200 | }, | 340 | }, |
| 201 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | 341 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { |
| 202 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | 342 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), |
| 203 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | 343 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, |
| 344 | .escr_emask = | ||
| 345 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | | ||
| 346 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | | ||
| 347 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | | ||
| 348 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), | ||
| 204 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 349 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
| 205 | }, | 350 | }, |
| 206 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | 351 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { |
| 207 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | 352 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), |
| 208 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | 353 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, |
| 354 | .escr_emask = | ||
| 355 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
| 356 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
| 357 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
| 358 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), | ||
| 209 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | 359 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
| 210 | }, | 360 | }, |
| 211 | [P4_EVENT_RESOURCE_STALL] = { | 361 | [P4_EVENT_RESOURCE_STALL] = { |
| 212 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | 362 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), |
| 213 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | 363 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, |
| 364 | .escr_emask = | ||
| 365 | P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), | ||
| 214 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 366 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 215 | }, | 367 | }, |
| 216 | [P4_EVENT_WC_BUFFER] = { | 368 | [P4_EVENT_WC_BUFFER] = { |
| 217 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | 369 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), |
| 218 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | 370 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, |
| 371 | .escr_emask = | ||
| 372 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | | ||
| 373 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), | ||
| 374 | .shared = 1, | ||
| 219 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | 375 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
| 220 | }, | 376 | }, |
| 221 | [P4_EVENT_B2B_CYCLES] = { | 377 | [P4_EVENT_B2B_CYCLES] = { |
| 222 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | 378 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), |
| 223 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 379 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
| 380 | .escr_emask = 0, | ||
| 224 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 381 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 225 | }, | 382 | }, |
| 226 | [P4_EVENT_BNR] = { | 383 | [P4_EVENT_BNR] = { |
| 227 | .opcode = P4_OPCODE(P4_EVENT_BNR), | 384 | .opcode = P4_OPCODE(P4_EVENT_BNR), |
| 228 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 385 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
| 386 | .escr_emask = 0, | ||
| 229 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 387 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 230 | }, | 388 | }, |
| 231 | [P4_EVENT_SNOOP] = { | 389 | [P4_EVENT_SNOOP] = { |
| 232 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | 390 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), |
| 233 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 391 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
| 392 | .escr_emask = 0, | ||
| 234 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 393 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 235 | }, | 394 | }, |
| 236 | [P4_EVENT_RESPONSE] = { | 395 | [P4_EVENT_RESPONSE] = { |
| 237 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | 396 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), |
| 238 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 397 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
| 398 | .escr_emask = 0, | ||
| 239 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | 399 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
| 240 | }, | 400 | }, |
| 241 | [P4_EVENT_FRONT_END_EVENT] = { | 401 | [P4_EVENT_FRONT_END_EVENT] = { |
| 242 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | 402 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), |
| 243 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 403 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
| 404 | .escr_emask = | ||
| 405 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | | ||
| 406 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), | ||
| 244 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 407 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 245 | }, | 408 | }, |
| 246 | [P4_EVENT_EXECUTION_EVENT] = { | 409 | [P4_EVENT_EXECUTION_EVENT] = { |
| 247 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | 410 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), |
| 248 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 411 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
| 412 | .escr_emask = | ||
| 413 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | | ||
| 414 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | | ||
| 415 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | | ||
| 416 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | | ||
| 417 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | | ||
| 418 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | | ||
| 419 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | | ||
| 420 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), | ||
| 249 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 421 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 250 | }, | 422 | }, |
| 251 | [P4_EVENT_REPLAY_EVENT] = { | 423 | [P4_EVENT_REPLAY_EVENT] = { |
| 252 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | 424 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), |
| 253 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 425 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
| 426 | .escr_emask = | ||
| 427 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | | ||
| 428 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), | ||
| 254 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 429 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 255 | }, | 430 | }, |
| 256 | [P4_EVENT_INSTR_RETIRED] = { | 431 | [P4_EVENT_INSTR_RETIRED] = { |
| 257 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | 432 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), |
| 258 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 433 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
| 434 | .escr_emask = | ||
| 435 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
| 436 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | | ||
| 437 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | | ||
| 438 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), | ||
| 259 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 439 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 260 | }, | 440 | }, |
| 261 | [P4_EVENT_UOPS_RETIRED] = { | 441 | [P4_EVENT_UOPS_RETIRED] = { |
| 262 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | 442 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), |
| 263 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 443 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
| 444 | .escr_emask = | ||
| 445 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | | ||
| 446 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), | ||
| 264 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 447 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 265 | }, | 448 | }, |
| 266 | [P4_EVENT_UOP_TYPE] = { | 449 | [P4_EVENT_UOP_TYPE] = { |
| 267 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | 450 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), |
| 268 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | 451 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, |
| 452 | .escr_emask = | ||
| 453 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | | ||
| 454 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), | ||
| 269 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 455 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 270 | }, | 456 | }, |
| 271 | [P4_EVENT_BRANCH_RETIRED] = { | 457 | [P4_EVENT_BRANCH_RETIRED] = { |
| 272 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | 458 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), |
| 273 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 459 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
| 460 | .escr_emask = | ||
| 461 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | | ||
| 462 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | | ||
| 463 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | | ||
| 464 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), | ||
| 274 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 465 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 275 | }, | 466 | }, |
| 276 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | 467 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { |
| 277 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | 468 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), |
| 278 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 469 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
| 470 | .escr_emask = | ||
| 471 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), | ||
| 279 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 472 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 280 | }, | 473 | }, |
| 281 | [P4_EVENT_X87_ASSIST] = { | 474 | [P4_EVENT_X87_ASSIST] = { |
| 282 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | 475 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), |
| 283 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 476 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
| 477 | .escr_emask = | ||
| 478 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | | ||
| 479 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | | ||
| 480 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | | ||
| 481 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | | ||
| 482 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), | ||
| 284 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 483 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 285 | }, | 484 | }, |
| 286 | [P4_EVENT_MACHINE_CLEAR] = { | 485 | [P4_EVENT_MACHINE_CLEAR] = { |
| 287 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | 486 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), |
| 288 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | 487 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
| 488 | .escr_emask = | ||
| 489 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | | ||
| 490 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | | ||
| 491 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), | ||
| 289 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 492 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 290 | }, | 493 | }, |
| 291 | [P4_EVENT_INSTR_COMPLETED] = { | 494 | [P4_EVENT_INSTR_COMPLETED] = { |
| 292 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | 495 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), |
| 293 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 496 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
| 497 | .escr_emask = | ||
| 498 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | | ||
| 499 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), | ||
| 294 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | 500 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
| 295 | }, | 501 | }, |
| 296 | }; | 502 | }; |
| @@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event) | |||
| 428 | return config; | 634 | return config; |
| 429 | } | 635 | } |
| 430 | 636 | ||
| 637 | /* check cpu model specifics */ | ||
| 638 | static bool p4_event_match_cpu_model(unsigned int event_idx) | ||
| 639 | { | ||
| 640 | /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ | ||
| 641 | if (event_idx == P4_EVENT_INSTR_COMPLETED) { | ||
| 642 | if (boot_cpu_data.x86_model != 3 && | ||
| 643 | boot_cpu_data.x86_model != 4 && | ||
| 644 | boot_cpu_data.x86_model != 6) | ||
| 645 | return false; | ||
| 646 | } | ||
| 647 | |||
| 648 | /* | ||
| 649 | * For info | ||
| 650 | * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 | ||
| 651 | */ | ||
| 652 | |||
| 653 | return true; | ||
| 654 | } | ||
| 655 | |||
| 431 | static int p4_validate_raw_event(struct perf_event *event) | 656 | static int p4_validate_raw_event(struct perf_event *event) |
| 432 | { | 657 | { |
| 433 | unsigned int v; | 658 | unsigned int v, emask; |
| 434 | 659 | ||
| 435 | /* user data may have out-of-bound event index */ | 660 | /* User data may have out-of-bound event index */ |
| 436 | v = p4_config_unpack_event(event->attr.config); | 661 | v = p4_config_unpack_event(event->attr.config); |
| 437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | 662 | if (v >= ARRAY_SIZE(p4_event_bind_map)) |
| 438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | 663 | return -EINVAL; |
| 664 | |||
| 665 | /* It may be unsupported: */ | ||
| 666 | if (!p4_event_match_cpu_model(v)) | ||
| 439 | return -EINVAL; | 667 | return -EINVAL; |
| 668 | |||
| 669 | /* | ||
| 670 | * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as | ||
| 671 | * in Architectural Performance Monitoring, it means not | ||
| 672 | * on _which_ logical cpu to count but rather _when_, ie it | ||
| 673 | * depends on logical cpu state -- count event if one cpu active, | ||
| 674 | * none, both or any, so we just allow user to pass any value | ||
| 675 | * desired. | ||
| 676 | * | ||
| 677 | * In turn we always set Tx_OS/Tx_USR bits bound to logical | ||
| 678 | * cpu without their propagation to another cpu | ||
| 679 | */ | ||
| 680 | |||
| 681 | /* | ||
| 682 | * if an event is shared accross the logical threads | ||
| 683 | * the user needs special permissions to be able to use it | ||
| 684 | */ | ||
| 685 | if (p4_event_bind_map[v].shared) { | ||
| 686 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
| 687 | return -EACCES; | ||
| 440 | } | 688 | } |
| 441 | 689 | ||
| 690 | /* ESCR EventMask bits may be invalid */ | ||
| 691 | emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; | ||
| 692 | if (emask & ~p4_event_bind_map[v].escr_emask) | ||
| 693 | return -EINVAL; | ||
| 694 | |||
| 442 | /* | 695 | /* |
| 443 | * it may have some screwed PEBS bits | 696 | * it may have some invalid PEBS bits |
| 444 | */ | 697 | */ |
| 445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | 698 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) |
| 446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
| 447 | return -EINVAL; | 699 | return -EINVAL; |
| 448 | } | 700 | |
| 449 | v = p4_config_unpack_metric(event->attr.config); | 701 | v = p4_config_unpack_metric(event->attr.config); |
| 450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | 702 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) |
| 451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
| 452 | return -EINVAL; | 703 | return -EINVAL; |
| 453 | } | ||
| 454 | 704 | ||
| 455 | return 0; | 705 | return 0; |
| 456 | } | 706 | } |
| @@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event) | |||
| 478 | 728 | ||
| 479 | if (event->attr.type == PERF_TYPE_RAW) { | 729 | if (event->attr.type == PERF_TYPE_RAW) { |
| 480 | 730 | ||
| 731 | /* | ||
| 732 | * Clear bits we reserve to be managed by kernel itself | ||
| 733 | * and never allowed from a user space | ||
| 734 | */ | ||
| 735 | event->attr.config &= P4_CONFIG_MASK; | ||
| 736 | |||
| 481 | rc = p4_validate_raw_event(event); | 737 | rc = p4_validate_raw_event(event); |
| 482 | if (rc) | 738 | if (rc) |
| 483 | goto out; | 739 | goto out; |
| 484 | 740 | ||
| 485 | /* | 741 | /* |
| 486 | * We don't control raw events so it's up to the caller | ||
| 487 | * to pass sane values (and we don't count the thread number | ||
| 488 | * on HT machine but allow HT-compatible specifics to be | ||
| 489 | * passed on) | ||
| 490 | * | ||
| 491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | 742 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED |
| 492 | * bits since we keep additional info here (for cache events and etc) | 743 | * bits since we keep additional info here (for cache events and etc) |
| 493 | * | ||
| 494 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
| 495 | * CAP_SYS_ADMIN | ||
| 496 | */ | 744 | */ |
| 497 | event->hw.config |= event->attr.config & | 745 | event->hw.config |= event->attr.config; |
| 498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
| 499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); | ||
| 500 | |||
| 501 | event->hw.config &= ~P4_CCCR_FORCE_OVF; | ||
| 502 | } | 746 | } |
| 503 | 747 | ||
| 504 | rc = x86_setup_perfctr(event); | 748 | rc = x86_setup_perfctr(event); |
| @@ -660,8 +904,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
| 660 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 904 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 661 | int overflow; | 905 | int overflow; |
| 662 | 906 | ||
| 663 | if (!test_bit(idx, cpuc->active_mask)) | 907 | if (!test_bit(idx, cpuc->active_mask)) { |
| 908 | /* catch in-flight IRQs */ | ||
| 909 | if (__test_and_clear_bit(idx, cpuc->running)) | ||
| 910 | handled++; | ||
| 664 | continue; | 911 | continue; |
| 912 | } | ||
| 665 | 913 | ||
| 666 | event = cpuc->events[idx]; | 914 | event = cpuc->events[idx]; |
| 667 | hwc = &event->hw; | 915 | hwc = &event->hw; |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fb329e9f8494..d9f4ff8fcd69 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
| @@ -700,11 +700,10 @@ static void probe_nmi_watchdog(void) | |||
| 700 | { | 700 | { |
| 701 | switch (boot_cpu_data.x86_vendor) { | 701 | switch (boot_cpu_data.x86_vendor) { |
| 702 | case X86_VENDOR_AMD: | 702 | case X86_VENDOR_AMD: |
| 703 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 703 | if (boot_cpu_data.x86 == 6 || |
| 704 | boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) | 704 | (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) |
| 705 | return; | 705 | wd_ops = &k7_wd_ops; |
| 706 | wd_ops = &k7_wd_ops; | 706 | return; |
| 707 | break; | ||
| 708 | case X86_VENDOR_INTEL: | 707 | case X86_VENDOR_INTEL: |
| 709 | /* Work around where perfctr1 doesn't have a working enable | 708 | /* Work around where perfctr1 doesn't have a working enable |
| 710 | * bit as described in the following errata: | 709 | * bit as described in the following errata: |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 34b4dad6f0b8..c7f64e6f537a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c | |||
| @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 31 | const struct cpuid_bit *cb; | 31 | const struct cpuid_bit *cb; |
| 32 | 32 | ||
| 33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
| 34 | { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, | ||
| 34 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, | 35 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, |
| 35 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, | 36 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, |
| 36 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, | 37 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, |
| @@ -43,6 +44,12 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 43 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, | 44 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, |
| 44 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, | 45 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, |
| 45 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, | 46 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, |
| 47 | { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 }, | ||
| 48 | { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 }, | ||
| 49 | { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 }, | ||
| 50 | { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 }, | ||
| 51 | { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 }, | ||
| 52 | { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 }, | ||
| 46 | { 0, 0, 0, 0, 0 } | 53 | { 0, 0, 0, 0, 0 } |
| 47 | }; | 54 | }; |
| 48 | 55 | ||
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 045b36cada65..994828899e09 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c | |||
| @@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
| 34 | if (!csize) | 34 | if (!csize) |
| 35 | return 0; | 35 | return 0; |
| 36 | 36 | ||
| 37 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); | 37 | vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE); |
| 38 | if (!vaddr) | 38 | if (!vaddr) |
| 39 | return -ENOMEM; | 39 | return -ENOMEM; |
| 40 | 40 | ||
| @@ -46,6 +46,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
| 46 | } else | 46 | } else |
| 47 | memcpy(buf, vaddr + offset, csize); | 47 | memcpy(buf, vaddr + offset, csize); |
| 48 | 48 | ||
| 49 | set_iounmap_nonlazy(); | ||
| 49 | iounmap(vaddr); | 50 | iounmap(vaddr); |
| 50 | return csize; | 51 | return csize; |
| 51 | } | 52 | } |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0d6fc71bedb1..0c2b7ef7a34d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
| 16 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
| 17 | #include <linux/firmware-map.h> | 17 | #include <linux/firmware-map.h> |
| 18 | #include <linux/memblock.h> | ||
| 18 | 19 | ||
| 19 | #include <asm/e820.h> | 20 | #include <asm/e820.h> |
| 20 | #include <asm/proto.h> | 21 | #include <asm/proto.h> |
| @@ -738,73 +739,7 @@ core_initcall(e820_mark_nvs_memory); | |||
| 738 | #endif | 739 | #endif |
| 739 | 740 | ||
| 740 | /* | 741 | /* |
| 741 | * Find a free area with specified alignment in a specific range. | 742 | * pre allocated 4k and reserved it in memblock and e820_saved |
| 742 | */ | ||
| 743 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
| 744 | { | ||
| 745 | int i; | ||
| 746 | |||
| 747 | for (i = 0; i < e820.nr_map; i++) { | ||
| 748 | struct e820entry *ei = &e820.map[i]; | ||
| 749 | u64 addr; | ||
| 750 | u64 ei_start, ei_last; | ||
| 751 | |||
| 752 | if (ei->type != E820_RAM) | ||
| 753 | continue; | ||
| 754 | |||
| 755 | ei_last = ei->addr + ei->size; | ||
| 756 | ei_start = ei->addr; | ||
| 757 | addr = find_early_area(ei_start, ei_last, start, end, | ||
| 758 | size, align); | ||
| 759 | |||
| 760 | if (addr != -1ULL) | ||
| 761 | return addr; | ||
| 762 | } | ||
| 763 | return -1ULL; | ||
| 764 | } | ||
| 765 | |||
| 766 | u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) | ||
| 767 | { | ||
| 768 | return find_e820_area(start, end, size, align); | ||
| 769 | } | ||
| 770 | |||
| 771 | u64 __init get_max_mapped(void) | ||
| 772 | { | ||
| 773 | u64 end = max_pfn_mapped; | ||
| 774 | |||
| 775 | end <<= PAGE_SHIFT; | ||
| 776 | |||
| 777 | return end; | ||
| 778 | } | ||
| 779 | /* | ||
| 780 | * Find next free range after *start | ||
| 781 | */ | ||
| 782 | u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | ||
| 783 | { | ||
| 784 | int i; | ||
| 785 | |||
| 786 | for (i = 0; i < e820.nr_map; i++) { | ||
| 787 | struct e820entry *ei = &e820.map[i]; | ||
| 788 | u64 addr; | ||
| 789 | u64 ei_start, ei_last; | ||
| 790 | |||
| 791 | if (ei->type != E820_RAM) | ||
| 792 | continue; | ||
| 793 | |||
| 794 | ei_last = ei->addr + ei->size; | ||
| 795 | ei_start = ei->addr; | ||
| 796 | addr = find_early_area_size(ei_start, ei_last, start, | ||
| 797 | sizep, align); | ||
| 798 | |||
| 799 | if (addr != -1ULL) | ||
| 800 | return addr; | ||
| 801 | } | ||
| 802 | |||
| 803 | return -1ULL; | ||
| 804 | } | ||
| 805 | |||
| 806 | /* | ||
| 807 | * pre allocated 4k and reserved it in e820 | ||
| 808 | */ | 743 | */ |
| 809 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | 744 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) |
| 810 | { | 745 | { |
| @@ -813,8 +748,8 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |||
| 813 | u64 start; | 748 | u64 start; |
| 814 | 749 | ||
| 815 | for (start = startt; ; start += size) { | 750 | for (start = startt; ; start += size) { |
| 816 | start = find_e820_area_size(start, &size, align); | 751 | start = memblock_x86_find_in_range_size(start, &size, align); |
| 817 | if (!(start + 1)) | 752 | if (start == MEMBLOCK_ERROR) |
| 818 | return 0; | 753 | return 0; |
| 819 | if (size >= sizet) | 754 | if (size >= sizet) |
| 820 | break; | 755 | break; |
| @@ -830,10 +765,9 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |||
| 830 | addr = round_down(start + size - sizet, align); | 765 | addr = round_down(start + size - sizet, align); |
| 831 | if (addr < start) | 766 | if (addr < start) |
| 832 | return 0; | 767 | return 0; |
| 833 | e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); | 768 | memblock_x86_reserve_range(addr, addr + sizet, "new next"); |
| 834 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); | 769 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); |
| 835 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); | 770 | printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); |
| 836 | update_e820(); | ||
| 837 | update_e820_saved(); | 771 | update_e820_saved(); |
| 838 | 772 | ||
| 839 | return addr; | 773 | return addr; |
| @@ -895,74 +829,6 @@ unsigned long __init e820_end_of_low_ram_pfn(void) | |||
| 895 | { | 829 | { |
| 896 | return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); | 830 | return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); |
| 897 | } | 831 | } |
| 898 | /* | ||
| 899 | * Finds an active region in the address range from start_pfn to last_pfn and | ||
| 900 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | ||
| 901 | */ | ||
| 902 | int __init e820_find_active_region(const struct e820entry *ei, | ||
| 903 | unsigned long start_pfn, | ||
| 904 | unsigned long last_pfn, | ||
| 905 | unsigned long *ei_startpfn, | ||
| 906 | unsigned long *ei_endpfn) | ||
| 907 | { | ||
| 908 | u64 align = PAGE_SIZE; | ||
| 909 | |||
| 910 | *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; | ||
| 911 | *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; | ||
| 912 | |||
| 913 | /* Skip map entries smaller than a page */ | ||
| 914 | if (*ei_startpfn >= *ei_endpfn) | ||
| 915 | return 0; | ||
| 916 | |||
| 917 | /* Skip if map is outside the node */ | ||
| 918 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | ||
| 919 | *ei_startpfn >= last_pfn) | ||
| 920 | return 0; | ||
| 921 | |||
| 922 | /* Check for overlaps */ | ||
| 923 | if (*ei_startpfn < start_pfn) | ||
| 924 | *ei_startpfn = start_pfn; | ||
| 925 | if (*ei_endpfn > last_pfn) | ||
| 926 | *ei_endpfn = last_pfn; | ||
| 927 | |||
| 928 | return 1; | ||
| 929 | } | ||
| 930 | |||
| 931 | /* Walk the e820 map and register active regions within a node */ | ||
| 932 | void __init e820_register_active_regions(int nid, unsigned long start_pfn, | ||
| 933 | unsigned long last_pfn) | ||
| 934 | { | ||
| 935 | unsigned long ei_startpfn; | ||
| 936 | unsigned long ei_endpfn; | ||
| 937 | int i; | ||
| 938 | |||
| 939 | for (i = 0; i < e820.nr_map; i++) | ||
| 940 | if (e820_find_active_region(&e820.map[i], | ||
| 941 | start_pfn, last_pfn, | ||
| 942 | &ei_startpfn, &ei_endpfn)) | ||
| 943 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
| 944 | } | ||
| 945 | |||
| 946 | /* | ||
| 947 | * Find the hole size (in bytes) in the memory range. | ||
| 948 | * @start: starting address of the memory range to scan | ||
| 949 | * @end: ending address of the memory range to scan | ||
| 950 | */ | ||
| 951 | u64 __init e820_hole_size(u64 start, u64 end) | ||
| 952 | { | ||
| 953 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
| 954 | unsigned long last_pfn = end >> PAGE_SHIFT; | ||
| 955 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
| 956 | int i; | ||
| 957 | |||
| 958 | for (i = 0; i < e820.nr_map; i++) { | ||
| 959 | if (e820_find_active_region(&e820.map[i], | ||
| 960 | start_pfn, last_pfn, | ||
| 961 | &ei_startpfn, &ei_endpfn)) | ||
| 962 | ram += ei_endpfn - ei_startpfn; | ||
| 963 | } | ||
| 964 | return end - start - ((u64)ram << PAGE_SHIFT); | ||
| 965 | } | ||
| 966 | 832 | ||
| 967 | static void early_panic(char *msg) | 833 | static void early_panic(char *msg) |
| 968 | { | 834 | { |
| @@ -1210,3 +1076,48 @@ void __init setup_memory_map(void) | |||
| 1210 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1076 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
| 1211 | e820_print_map(who); | 1077 | e820_print_map(who); |
| 1212 | } | 1078 | } |
| 1079 | |||
| 1080 | void __init memblock_x86_fill(void) | ||
| 1081 | { | ||
| 1082 | int i; | ||
| 1083 | u64 end; | ||
| 1084 | |||
| 1085 | /* | ||
| 1086 | * EFI may have more than 128 entries | ||
| 1087 | * We are safe to enable resizing, beause memblock_x86_fill() | ||
| 1088 | * is rather later for x86 | ||
| 1089 | */ | ||
| 1090 | memblock_can_resize = 1; | ||
| 1091 | |||
| 1092 | for (i = 0; i < e820.nr_map; i++) { | ||
| 1093 | struct e820entry *ei = &e820.map[i]; | ||
| 1094 | |||
| 1095 | end = ei->addr + ei->size; | ||
| 1096 | if (end != (resource_size_t)end) | ||
| 1097 | continue; | ||
| 1098 | |||
| 1099 | if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) | ||
| 1100 | continue; | ||
| 1101 | |||
| 1102 | memblock_add(ei->addr, ei->size); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | memblock_analyze(); | ||
| 1106 | memblock_dump_all(); | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | void __init memblock_find_dma_reserve(void) | ||
| 1110 | { | ||
| 1111 | #ifdef CONFIG_X86_64 | ||
| 1112 | u64 free_size_pfn; | ||
| 1113 | u64 mem_size_pfn; | ||
| 1114 | /* | ||
| 1115 | * need to find out used area below MAX_DMA_PFN | ||
| 1116 | * need to use memblock to get free size in [0, MAX_DMA_PFN] | ||
| 1117 | * at first, and assume boot_mem will not take below MAX_DMA_PFN | ||
| 1118 | */ | ||
| 1119 | mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | ||
| 1120 | free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | ||
| 1121 | set_dma_reserve(mem_size_pfn - free_size_pfn); | ||
| 1122 | #endif | ||
| 1123 | } | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index e5cc7e82e60d..76b8cd953dee 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
| 19 | #include <asm/iommu.h> | 19 | #include <asm/iommu.h> |
| 20 | #include <asm/gart.h> | 20 | #include <asm/gart.h> |
| 21 | #include <asm/hpet.h> | ||
| 22 | 21 | ||
| 23 | static void __init fix_hypertransport_config(int num, int slot, int func) | 22 | static void __init fix_hypertransport_config(int num, int slot, int func) |
| 24 | { | 23 | { |
| @@ -98,7 +97,6 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
| 98 | } | 97 | } |
| 99 | 98 | ||
| 100 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) | 99 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) |
| 101 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) | ||
| 102 | static u32 __init ati_ixp4x0_rev(int num, int slot, int func) | 100 | static u32 __init ati_ixp4x0_rev(int num, int slot, int func) |
| 103 | { | 101 | { |
| 104 | u32 d; | 102 | u32 d; |
| @@ -116,7 +114,6 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func) | |||
| 116 | d &= 0xff; | 114 | d &= 0xff; |
| 117 | return d; | 115 | return d; |
| 118 | } | 116 | } |
| 119 | #endif | ||
| 120 | 117 | ||
| 121 | static void __init ati_bugs(int num, int slot, int func) | 118 | static void __init ati_bugs(int num, int slot, int func) |
| 122 | { | 119 | { |
| @@ -192,21 +189,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) | |||
| 192 | } | 189 | } |
| 193 | #endif | 190 | #endif |
| 194 | 191 | ||
| 195 | /* | ||
| 196 | * Force the read back of the CMP register in hpet_next_event() | ||
| 197 | * to work around the problem that the CMP register write seems to be | ||
| 198 | * delayed. See hpet_next_event() for details. | ||
| 199 | * | ||
| 200 | * We do this on all SMBUS incarnations for now until we have more | ||
| 201 | * information about the affected chipsets. | ||
| 202 | */ | ||
| 203 | static void __init ati_hpet_bugs(int num, int slot, int func) | ||
| 204 | { | ||
| 205 | #ifdef CONFIG_HPET_TIMER | ||
| 206 | hpet_readback_cmp = 1; | ||
| 207 | #endif | ||
| 208 | } | ||
| 209 | |||
| 210 | #define QFLAG_APPLY_ONCE 0x1 | 192 | #define QFLAG_APPLY_ONCE 0x1 |
| 211 | #define QFLAG_APPLIED 0x2 | 193 | #define QFLAG_APPLIED 0x2 |
| 212 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 194 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
| @@ -236,8 +218,6 @@ static struct chipset early_qrk[] __initdata = { | |||
| 236 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, | 218 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, |
| 237 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | 219 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, |
| 238 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, | 220 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, |
| 239 | { PCI_VENDOR_ID_ATI, PCI_ANY_ID, | ||
| 240 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, | ||
| 241 | {} | 221 | {} |
| 242 | }; | 222 | }; |
| 243 | 223 | ||
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index fa99bae75ace..4572f25f9325 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <xen/hvc-console.h> | 14 | #include <xen/hvc-console.h> |
| 15 | #include <asm/pci-direct.h> | 15 | #include <asm/pci-direct.h> |
| 16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
| 17 | #include <asm/mrst.h> | ||
| 17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
| 18 | #include <linux/usb/ehci_def.h> | 19 | #include <linux/usb/ehci_def.h> |
| 19 | 20 | ||
| @@ -239,6 +240,18 @@ static int __init setup_early_printk(char *buf) | |||
| 239 | if (!strncmp(buf, "xen", 3)) | 240 | if (!strncmp(buf, "xen", 3)) |
| 240 | early_console_register(&xenboot_console, keep); | 241 | early_console_register(&xenboot_console, keep); |
| 241 | #endif | 242 | #endif |
| 243 | #ifdef CONFIG_X86_MRST_EARLY_PRINTK | ||
| 244 | if (!strncmp(buf, "mrst", 4)) { | ||
| 245 | mrst_early_console_init(); | ||
| 246 | early_console_register(&early_mrst_console, keep); | ||
| 247 | } | ||
| 248 | |||
| 249 | if (!strncmp(buf, "hsu", 3)) { | ||
| 250 | hsu_early_console_init(); | ||
| 251 | early_console_register(&early_hsu_console, keep); | ||
| 252 | } | ||
| 253 | |||
| 254 | #endif | ||
| 242 | buf++; | 255 | buf++; |
| 243 | } | 256 | } |
| 244 | return 0; | 257 | return 0; |
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c new file mode 100644 index 000000000000..65df603622b2 --- /dev/null +++ b/arch/x86/kernel/early_printk_mrst.c | |||
| @@ -0,0 +1,319 @@ | |||
| 1 | /* | ||
| 2 | * early_printk_mrst.c - early consoles for Intel MID platforms | ||
| 3 | * | ||
| 4 | * Copyright (c) 2008-2010, Intel Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; version 2 | ||
| 9 | * of the License. | ||
| 10 | */ | ||
| 11 | |||
| 12 | /* | ||
| 13 | * This file implements two early consoles named mrst and hsu. | ||
| 14 | * mrst is based on Maxim3110 spi-uart device, it exists in both | ||
| 15 | * Moorestown and Medfield platforms, while hsu is based on a High | ||
| 16 | * Speed UART device which only exists in the Medfield platform | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/serial_reg.h> | ||
| 20 | #include <linux/serial_mfd.h> | ||
| 21 | #include <linux/kmsg_dump.h> | ||
| 22 | #include <linux/console.h> | ||
| 23 | #include <linux/kernel.h> | ||
| 24 | #include <linux/delay.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | #include <linux/io.h> | ||
| 27 | |||
| 28 | #include <asm/fixmap.h> | ||
| 29 | #include <asm/pgtable.h> | ||
| 30 | #include <asm/mrst.h> | ||
| 31 | |||
| 32 | #define MRST_SPI_TIMEOUT 0x200000 | ||
| 33 | #define MRST_REGBASE_SPI0 0xff128000 | ||
| 34 | #define MRST_REGBASE_SPI1 0xff128400 | ||
| 35 | #define MRST_CLK_SPI0_REG 0xff11d86c | ||
| 36 | |||
| 37 | /* Bit fields in CTRLR0 */ | ||
| 38 | #define SPI_DFS_OFFSET 0 | ||
| 39 | |||
| 40 | #define SPI_FRF_OFFSET 4 | ||
| 41 | #define SPI_FRF_SPI 0x0 | ||
| 42 | #define SPI_FRF_SSP 0x1 | ||
| 43 | #define SPI_FRF_MICROWIRE 0x2 | ||
| 44 | #define SPI_FRF_RESV 0x3 | ||
| 45 | |||
| 46 | #define SPI_MODE_OFFSET 6 | ||
| 47 | #define SPI_SCPH_OFFSET 6 | ||
| 48 | #define SPI_SCOL_OFFSET 7 | ||
| 49 | #define SPI_TMOD_OFFSET 8 | ||
| 50 | #define SPI_TMOD_TR 0x0 /* xmit & recv */ | ||
| 51 | #define SPI_TMOD_TO 0x1 /* xmit only */ | ||
| 52 | #define SPI_TMOD_RO 0x2 /* recv only */ | ||
| 53 | #define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */ | ||
| 54 | |||
| 55 | #define SPI_SLVOE_OFFSET 10 | ||
| 56 | #define SPI_SRL_OFFSET 11 | ||
| 57 | #define SPI_CFS_OFFSET 12 | ||
| 58 | |||
| 59 | /* Bit fields in SR, 7 bits */ | ||
| 60 | #define SR_MASK 0x7f /* cover 7 bits */ | ||
| 61 | #define SR_BUSY (1 << 0) | ||
| 62 | #define SR_TF_NOT_FULL (1 << 1) | ||
| 63 | #define SR_TF_EMPT (1 << 2) | ||
| 64 | #define SR_RF_NOT_EMPT (1 << 3) | ||
| 65 | #define SR_RF_FULL (1 << 4) | ||
| 66 | #define SR_TX_ERR (1 << 5) | ||
| 67 | #define SR_DCOL (1 << 6) | ||
| 68 | |||
| 69 | struct dw_spi_reg { | ||
| 70 | u32 ctrl0; | ||
| 71 | u32 ctrl1; | ||
| 72 | u32 ssienr; | ||
| 73 | u32 mwcr; | ||
| 74 | u32 ser; | ||
| 75 | u32 baudr; | ||
| 76 | u32 txfltr; | ||
| 77 | u32 rxfltr; | ||
| 78 | u32 txflr; | ||
| 79 | u32 rxflr; | ||
| 80 | u32 sr; | ||
| 81 | u32 imr; | ||
| 82 | u32 isr; | ||
| 83 | u32 risr; | ||
| 84 | u32 txoicr; | ||
| 85 | u32 rxoicr; | ||
| 86 | u32 rxuicr; | ||
| 87 | u32 msticr; | ||
| 88 | u32 icr; | ||
| 89 | u32 dmacr; | ||
| 90 | u32 dmatdlr; | ||
| 91 | u32 dmardlr; | ||
| 92 | u32 idr; | ||
| 93 | u32 version; | ||
| 94 | |||
| 95 | /* Currently operates as 32 bits, though only the low 16 bits matter */ | ||
| 96 | u32 dr; | ||
| 97 | } __packed; | ||
| 98 | |||
| 99 | #define dw_readl(dw, name) __raw_readl(&(dw)->name) | ||
| 100 | #define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name) | ||
| 101 | |||
| 102 | /* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */ | ||
| 103 | static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0; | ||
| 104 | |||
| 105 | static u32 *pclk_spi0; | ||
| 106 | /* Always contains an accessable address, start with 0 */ | ||
| 107 | static struct dw_spi_reg *pspi; | ||
| 108 | |||
| 109 | static struct kmsg_dumper dw_dumper; | ||
| 110 | static int dumper_registered; | ||
| 111 | |||
| 112 | static void dw_kmsg_dump(struct kmsg_dumper *dumper, | ||
| 113 | enum kmsg_dump_reason reason, | ||
| 114 | const char *s1, unsigned long l1, | ||
| 115 | const char *s2, unsigned long l2) | ||
| 116 | { | ||
| 117 | int i; | ||
| 118 | |||
| 119 | /* When run to this, we'd better re-init the HW */ | ||
| 120 | mrst_early_console_init(); | ||
| 121 | |||
| 122 | for (i = 0; i < l1; i++) | ||
| 123 | early_mrst_console.write(&early_mrst_console, s1 + i, 1); | ||
| 124 | for (i = 0; i < l2; i++) | ||
| 125 | early_mrst_console.write(&early_mrst_console, s2 + i, 1); | ||
| 126 | } | ||
| 127 | |||
| 128 | /* Set the ratio rate to 115200, 8n1, IRQ disabled */ | ||
| 129 | static void max3110_write_config(void) | ||
| 130 | { | ||
| 131 | u16 config; | ||
| 132 | |||
| 133 | config = 0xc001; | ||
| 134 | dw_writel(pspi, dr, config); | ||
| 135 | } | ||
| 136 | |||
| 137 | /* Translate char to a eligible word and send to max3110 */ | ||
| 138 | static void max3110_write_data(char c) | ||
| 139 | { | ||
| 140 | u16 data; | ||
| 141 | |||
| 142 | data = 0x8000 | c; | ||
| 143 | dw_writel(pspi, dr, data); | ||
| 144 | } | ||
| 145 | |||
| 146 | void mrst_early_console_init(void) | ||
| 147 | { | ||
| 148 | u32 ctrlr0 = 0; | ||
| 149 | u32 spi0_cdiv; | ||
| 150 | u32 freq; /* Freqency info only need be searched once */ | ||
| 151 | |||
| 152 | /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */ | ||
| 153 | pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | ||
| 154 | MRST_CLK_SPI0_REG); | ||
| 155 | spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9; | ||
| 156 | freq = 100000000 / (spi0_cdiv + 1); | ||
| 157 | |||
| 158 | if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL) | ||
| 159 | mrst_spi_paddr = MRST_REGBASE_SPI1; | ||
| 160 | |||
| 161 | pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | ||
| 162 | mrst_spi_paddr); | ||
| 163 | |||
| 164 | /* Disable SPI controller */ | ||
| 165 | dw_writel(pspi, ssienr, 0); | ||
| 166 | |||
| 167 | /* Set control param, 8 bits, transmit only mode */ | ||
| 168 | ctrlr0 = dw_readl(pspi, ctrl0); | ||
| 169 | |||
| 170 | ctrlr0 &= 0xfcc0; | ||
| 171 | ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET) | ||
| 172 | | (SPI_TMOD_TO << SPI_TMOD_OFFSET); | ||
| 173 | dw_writel(pspi, ctrl0, ctrlr0); | ||
| 174 | |||
| 175 | /* | ||
| 176 | * Change the spi0 clk to comply with 115200 bps, use 100000 to | ||
| 177 | * calculate the clk dividor to make the clock a little slower | ||
| 178 | * than real baud rate. | ||
| 179 | */ | ||
| 180 | dw_writel(pspi, baudr, freq/100000); | ||
| 181 | |||
| 182 | /* Disable all INT for early phase */ | ||
| 183 | dw_writel(pspi, imr, 0x0); | ||
| 184 | |||
| 185 | /* Set the cs to spi-uart */ | ||
| 186 | dw_writel(pspi, ser, 0x2); | ||
| 187 | |||
| 188 | /* Enable the HW, the last step for HW init */ | ||
| 189 | dw_writel(pspi, ssienr, 0x1); | ||
| 190 | |||
| 191 | /* Set the default configuration */ | ||
| 192 | max3110_write_config(); | ||
| 193 | |||
| 194 | /* Register the kmsg dumper */ | ||
| 195 | if (!dumper_registered) { | ||
| 196 | dw_dumper.dump = dw_kmsg_dump; | ||
| 197 | kmsg_dump_register(&dw_dumper); | ||
| 198 | dumper_registered = 1; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | /* Slave select should be called in the read/write function */ | ||
| 203 | static void early_mrst_spi_putc(char c) | ||
| 204 | { | ||
| 205 | unsigned int timeout; | ||
| 206 | u32 sr; | ||
| 207 | |||
| 208 | timeout = MRST_SPI_TIMEOUT; | ||
| 209 | /* Early putc needs to make sure the TX FIFO is not full */ | ||
| 210 | while (--timeout) { | ||
| 211 | sr = dw_readl(pspi, sr); | ||
| 212 | if (!(sr & SR_TF_NOT_FULL)) | ||
| 213 | cpu_relax(); | ||
| 214 | else | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | |||
| 218 | if (!timeout) | ||
| 219 | pr_warning("MRST earlycon: timed out\n"); | ||
| 220 | else | ||
| 221 | max3110_write_data(c); | ||
| 222 | } | ||
| 223 | |||
| 224 | /* Early SPI only uses polling mode */ | ||
| 225 | static void early_mrst_spi_write(struct console *con, const char *str, unsigned n) | ||
| 226 | { | ||
| 227 | int i; | ||
| 228 | |||
| 229 | for (i = 0; i < n && *str; i++) { | ||
| 230 | if (*str == '\n') | ||
| 231 | early_mrst_spi_putc('\r'); | ||
| 232 | early_mrst_spi_putc(*str); | ||
| 233 | str++; | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | struct console early_mrst_console = { | ||
| 238 | .name = "earlymrst", | ||
| 239 | .write = early_mrst_spi_write, | ||
| 240 | .flags = CON_PRINTBUFFER, | ||
| 241 | .index = -1, | ||
| 242 | }; | ||
| 243 | |||
| 244 | /* | ||
| 245 | * Following is the early console based on Medfield HSU (High | ||
| 246 | * Speed UART) device. | ||
| 247 | */ | ||
| 248 | #define HSU_PORT2_PADDR 0xffa28180 | ||
| 249 | |||
| 250 | static void __iomem *phsu; | ||
| 251 | |||
| 252 | void hsu_early_console_init(void) | ||
| 253 | { | ||
| 254 | u8 lcr; | ||
| 255 | |||
| 256 | phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | ||
| 257 | HSU_PORT2_PADDR); | ||
| 258 | |||
| 259 | /* Disable FIFO */ | ||
| 260 | writeb(0x0, phsu + UART_FCR); | ||
| 261 | |||
| 262 | /* Set to default 115200 bps, 8n1 */ | ||
| 263 | lcr = readb(phsu + UART_LCR); | ||
| 264 | writeb((0x80 | lcr), phsu + UART_LCR); | ||
| 265 | writeb(0x18, phsu + UART_DLL); | ||
| 266 | writeb(lcr, phsu + UART_LCR); | ||
| 267 | writel(0x3600, phsu + UART_MUL*4); | ||
| 268 | |||
| 269 | writeb(0x8, phsu + UART_MCR); | ||
| 270 | writeb(0x7, phsu + UART_FCR); | ||
| 271 | writeb(0x3, phsu + UART_LCR); | ||
| 272 | |||
| 273 | /* Clear IRQ status */ | ||
| 274 | readb(phsu + UART_LSR); | ||
| 275 | readb(phsu + UART_RX); | ||
| 276 | readb(phsu + UART_IIR); | ||
| 277 | readb(phsu + UART_MSR); | ||
| 278 | |||
| 279 | /* Enable FIFO */ | ||
| 280 | writeb(0x7, phsu + UART_FCR); | ||
| 281 | } | ||
| 282 | |||
| 283 | #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) | ||
| 284 | |||
| 285 | static void early_hsu_putc(char ch) | ||
| 286 | { | ||
| 287 | unsigned int timeout = 10000; /* 10ms */ | ||
| 288 | u8 status; | ||
| 289 | |||
| 290 | while (--timeout) { | ||
| 291 | status = readb(phsu + UART_LSR); | ||
| 292 | if (status & BOTH_EMPTY) | ||
| 293 | break; | ||
| 294 | udelay(1); | ||
| 295 | } | ||
| 296 | |||
| 297 | /* Only write the char when there was no timeout */ | ||
| 298 | if (timeout) | ||
| 299 | writeb(ch, phsu + UART_TX); | ||
| 300 | } | ||
| 301 | |||
| 302 | static void early_hsu_write(struct console *con, const char *str, unsigned n) | ||
| 303 | { | ||
| 304 | int i; | ||
| 305 | |||
| 306 | for (i = 0; i < n && *str; i++) { | ||
| 307 | if (*str == '\n') | ||
| 308 | early_hsu_putc('\r'); | ||
| 309 | early_hsu_putc(*str); | ||
| 310 | str++; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | struct console early_hsu_console = { | ||
| 315 | .name = "earlyhsu", | ||
| 316 | .write = early_hsu_write, | ||
| 317 | .flags = CON_PRINTBUFFER, | ||
| 318 | .index = -1, | ||
| 319 | }; | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index c2fa9b8b497e..0fe27d7c6258 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
| 31 | #include <linux/efi.h> | 31 | #include <linux/efi.h> |
| 32 | #include <linux/bootmem.h> | 32 | #include <linux/bootmem.h> |
| 33 | #include <linux/memblock.h> | ||
| 33 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
| 34 | #include <linux/uaccess.h> | 35 | #include <linux/uaccess.h> |
| 35 | #include <linux/time.h> | 36 | #include <linux/time.h> |
| @@ -275,7 +276,7 @@ static void __init do_add_efi_memmap(void) | |||
| 275 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 276 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
| 276 | } | 277 | } |
| 277 | 278 | ||
| 278 | void __init efi_reserve_early(void) | 279 | void __init efi_memblock_x86_reserve_range(void) |
| 279 | { | 280 | { |
| 280 | unsigned long pmap; | 281 | unsigned long pmap; |
| 281 | 282 | ||
| @@ -290,7 +291,7 @@ void __init efi_reserve_early(void) | |||
| 290 | boot_params.efi_info.efi_memdesc_size; | 291 | boot_params.efi_info.efi_memdesc_size; |
| 291 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | 292 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; |
| 292 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | 293 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; |
| 293 | reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, | 294 | memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, |
| 294 | "EFI memmap"); | 295 | "EFI memmap"); |
| 295 | } | 296 | } |
| 296 | 297 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 227d00920d2f..59e175e89599 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -115,8 +115,7 @@ | |||
| 115 | 115 | ||
| 116 | /* unfortunately push/pop can't be no-op */ | 116 | /* unfortunately push/pop can't be no-op */ |
| 117 | .macro PUSH_GS | 117 | .macro PUSH_GS |
| 118 | pushl $0 | 118 | pushl_cfi $0 |
| 119 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 120 | .endm | 119 | .endm |
| 121 | .macro POP_GS pop=0 | 120 | .macro POP_GS pop=0 |
| 122 | addl $(4 + \pop), %esp | 121 | addl $(4 + \pop), %esp |
| @@ -140,14 +139,12 @@ | |||
| 140 | #else /* CONFIG_X86_32_LAZY_GS */ | 139 | #else /* CONFIG_X86_32_LAZY_GS */ |
| 141 | 140 | ||
| 142 | .macro PUSH_GS | 141 | .macro PUSH_GS |
| 143 | pushl %gs | 142 | pushl_cfi %gs |
| 144 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 145 | /*CFI_REL_OFFSET gs, 0*/ | 143 | /*CFI_REL_OFFSET gs, 0*/ |
| 146 | .endm | 144 | .endm |
| 147 | 145 | ||
| 148 | .macro POP_GS pop=0 | 146 | .macro POP_GS pop=0 |
| 149 | 98: popl %gs | 147 | 98: popl_cfi %gs |
| 150 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 151 | /*CFI_RESTORE gs*/ | 148 | /*CFI_RESTORE gs*/ |
| 152 | .if \pop <> 0 | 149 | .if \pop <> 0 |
| 153 | add $\pop, %esp | 150 | add $\pop, %esp |
| @@ -195,35 +192,25 @@ | |||
| 195 | .macro SAVE_ALL | 192 | .macro SAVE_ALL |
| 196 | cld | 193 | cld |
| 197 | PUSH_GS | 194 | PUSH_GS |
| 198 | pushl %fs | 195 | pushl_cfi %fs |
| 199 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 200 | /*CFI_REL_OFFSET fs, 0;*/ | 196 | /*CFI_REL_OFFSET fs, 0;*/ |
| 201 | pushl %es | 197 | pushl_cfi %es |
| 202 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 203 | /*CFI_REL_OFFSET es, 0;*/ | 198 | /*CFI_REL_OFFSET es, 0;*/ |
| 204 | pushl %ds | 199 | pushl_cfi %ds |
| 205 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 206 | /*CFI_REL_OFFSET ds, 0;*/ | 200 | /*CFI_REL_OFFSET ds, 0;*/ |
| 207 | pushl %eax | 201 | pushl_cfi %eax |
| 208 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 209 | CFI_REL_OFFSET eax, 0 | 202 | CFI_REL_OFFSET eax, 0 |
| 210 | pushl %ebp | 203 | pushl_cfi %ebp |
| 211 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 212 | CFI_REL_OFFSET ebp, 0 | 204 | CFI_REL_OFFSET ebp, 0 |
| 213 | pushl %edi | 205 | pushl_cfi %edi |
| 214 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 215 | CFI_REL_OFFSET edi, 0 | 206 | CFI_REL_OFFSET edi, 0 |
| 216 | pushl %esi | 207 | pushl_cfi %esi |
| 217 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 218 | CFI_REL_OFFSET esi, 0 | 208 | CFI_REL_OFFSET esi, 0 |
| 219 | pushl %edx | 209 | pushl_cfi %edx |
| 220 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 221 | CFI_REL_OFFSET edx, 0 | 210 | CFI_REL_OFFSET edx, 0 |
| 222 | pushl %ecx | 211 | pushl_cfi %ecx |
| 223 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 224 | CFI_REL_OFFSET ecx, 0 | 212 | CFI_REL_OFFSET ecx, 0 |
| 225 | pushl %ebx | 213 | pushl_cfi %ebx |
| 226 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 227 | CFI_REL_OFFSET ebx, 0 | 214 | CFI_REL_OFFSET ebx, 0 |
| 228 | movl $(__USER_DS), %edx | 215 | movl $(__USER_DS), %edx |
| 229 | movl %edx, %ds | 216 | movl %edx, %ds |
| @@ -234,39 +221,29 @@ | |||
| 234 | .endm | 221 | .endm |
| 235 | 222 | ||
| 236 | .macro RESTORE_INT_REGS | 223 | .macro RESTORE_INT_REGS |
| 237 | popl %ebx | 224 | popl_cfi %ebx |
| 238 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 239 | CFI_RESTORE ebx | 225 | CFI_RESTORE ebx |
| 240 | popl %ecx | 226 | popl_cfi %ecx |
| 241 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 242 | CFI_RESTORE ecx | 227 | CFI_RESTORE ecx |
| 243 | popl %edx | 228 | popl_cfi %edx |
| 244 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 245 | CFI_RESTORE edx | 229 | CFI_RESTORE edx |
| 246 | popl %esi | 230 | popl_cfi %esi |
| 247 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 248 | CFI_RESTORE esi | 231 | CFI_RESTORE esi |
| 249 | popl %edi | 232 | popl_cfi %edi |
| 250 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 251 | CFI_RESTORE edi | 233 | CFI_RESTORE edi |
| 252 | popl %ebp | 234 | popl_cfi %ebp |
| 253 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 254 | CFI_RESTORE ebp | 235 | CFI_RESTORE ebp |
| 255 | popl %eax | 236 | popl_cfi %eax |
| 256 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 257 | CFI_RESTORE eax | 237 | CFI_RESTORE eax |
| 258 | .endm | 238 | .endm |
| 259 | 239 | ||
| 260 | .macro RESTORE_REGS pop=0 | 240 | .macro RESTORE_REGS pop=0 |
| 261 | RESTORE_INT_REGS | 241 | RESTORE_INT_REGS |
| 262 | 1: popl %ds | 242 | 1: popl_cfi %ds |
| 263 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 264 | /*CFI_RESTORE ds;*/ | 243 | /*CFI_RESTORE ds;*/ |
| 265 | 2: popl %es | 244 | 2: popl_cfi %es |
| 266 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 267 | /*CFI_RESTORE es;*/ | 245 | /*CFI_RESTORE es;*/ |
| 268 | 3: popl %fs | 246 | 3: popl_cfi %fs |
| 269 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 270 | /*CFI_RESTORE fs;*/ | 247 | /*CFI_RESTORE fs;*/ |
| 271 | POP_GS \pop | 248 | POP_GS \pop |
| 272 | .pushsection .fixup, "ax" | 249 | .pushsection .fixup, "ax" |
| @@ -320,16 +297,12 @@ | |||
| 320 | 297 | ||
| 321 | ENTRY(ret_from_fork) | 298 | ENTRY(ret_from_fork) |
| 322 | CFI_STARTPROC | 299 | CFI_STARTPROC |
| 323 | pushl %eax | 300 | pushl_cfi %eax |
| 324 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 325 | call schedule_tail | 301 | call schedule_tail |
| 326 | GET_THREAD_INFO(%ebp) | 302 | GET_THREAD_INFO(%ebp) |
| 327 | popl %eax | 303 | popl_cfi %eax |
| 328 | CFI_ADJUST_CFA_OFFSET -4 | 304 | pushl_cfi $0x0202 # Reset kernel eflags |
| 329 | pushl $0x0202 # Reset kernel eflags | 305 | popfl_cfi |
| 330 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 331 | popfl | ||
| 332 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 333 | jmp syscall_exit | 306 | jmp syscall_exit |
| 334 | CFI_ENDPROC | 307 | CFI_ENDPROC |
| 335 | END(ret_from_fork) | 308 | END(ret_from_fork) |
| @@ -409,29 +382,23 @@ sysenter_past_esp: | |||
| 409 | * enough kernel state to call TRACE_IRQS_OFF can be called - but | 382 | * enough kernel state to call TRACE_IRQS_OFF can be called - but |
| 410 | * we immediately enable interrupts at that point anyway. | 383 | * we immediately enable interrupts at that point anyway. |
| 411 | */ | 384 | */ |
| 412 | pushl $(__USER_DS) | 385 | pushl_cfi $__USER_DS |
| 413 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 414 | /*CFI_REL_OFFSET ss, 0*/ | 386 | /*CFI_REL_OFFSET ss, 0*/ |
| 415 | pushl %ebp | 387 | pushl_cfi %ebp |
| 416 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 417 | CFI_REL_OFFSET esp, 0 | 388 | CFI_REL_OFFSET esp, 0 |
| 418 | pushfl | 389 | pushfl_cfi |
| 419 | orl $X86_EFLAGS_IF, (%esp) | 390 | orl $X86_EFLAGS_IF, (%esp) |
| 420 | CFI_ADJUST_CFA_OFFSET 4 | 391 | pushl_cfi $__USER_CS |
| 421 | pushl $(__USER_CS) | ||
| 422 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 423 | /*CFI_REL_OFFSET cs, 0*/ | 392 | /*CFI_REL_OFFSET cs, 0*/ |
| 424 | /* | 393 | /* |
| 425 | * Push current_thread_info()->sysenter_return to the stack. | 394 | * Push current_thread_info()->sysenter_return to the stack. |
| 426 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 395 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
| 427 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 396 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
| 428 | */ | 397 | */ |
| 429 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | 398 | pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) |
| 430 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 431 | CFI_REL_OFFSET eip, 0 | 399 | CFI_REL_OFFSET eip, 0 |
| 432 | 400 | ||
| 433 | pushl %eax | 401 | pushl_cfi %eax |
| 434 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 435 | SAVE_ALL | 402 | SAVE_ALL |
| 436 | ENABLE_INTERRUPTS(CLBR_NONE) | 403 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 437 | 404 | ||
| @@ -486,8 +453,7 @@ sysenter_audit: | |||
| 486 | movl %eax,%edx /* 2nd arg: syscall number */ | 453 | movl %eax,%edx /* 2nd arg: syscall number */ |
| 487 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | 454 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ |
| 488 | call audit_syscall_entry | 455 | call audit_syscall_entry |
| 489 | pushl %ebx | 456 | pushl_cfi %ebx |
| 490 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 491 | movl PT_EAX(%esp),%eax /* reload syscall number */ | 457 | movl PT_EAX(%esp),%eax /* reload syscall number */ |
| 492 | jmp sysenter_do_call | 458 | jmp sysenter_do_call |
| 493 | 459 | ||
| @@ -529,8 +495,7 @@ ENDPROC(ia32_sysenter_target) | |||
| 529 | # system call handler stub | 495 | # system call handler stub |
| 530 | ENTRY(system_call) | 496 | ENTRY(system_call) |
| 531 | RING0_INT_FRAME # can't unwind into user space anyway | 497 | RING0_INT_FRAME # can't unwind into user space anyway |
| 532 | pushl %eax # save orig_eax | 498 | pushl_cfi %eax # save orig_eax |
| 533 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 534 | SAVE_ALL | 499 | SAVE_ALL |
| 535 | GET_THREAD_INFO(%ebp) | 500 | GET_THREAD_INFO(%ebp) |
| 536 | # system call tracing in operation / emulation | 501 | # system call tracing in operation / emulation |
| @@ -566,7 +531,6 @@ restore_all_notrace: | |||
| 566 | je ldt_ss # returning to user-space with LDT SS | 531 | je ldt_ss # returning to user-space with LDT SS |
| 567 | restore_nocheck: | 532 | restore_nocheck: |
| 568 | RESTORE_REGS 4 # skip orig_eax/error_code | 533 | RESTORE_REGS 4 # skip orig_eax/error_code |
| 569 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 570 | irq_return: | 534 | irq_return: |
| 571 | INTERRUPT_RETURN | 535 | INTERRUPT_RETURN |
| 572 | .section .fixup,"ax" | 536 | .section .fixup,"ax" |
| @@ -619,10 +583,8 @@ ldt_ss: | |||
| 619 | shr $16, %edx | 583 | shr $16, %edx |
| 620 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ | 584 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ |
| 621 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ | 585 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ |
| 622 | pushl $__ESPFIX_SS | 586 | pushl_cfi $__ESPFIX_SS |
| 623 | CFI_ADJUST_CFA_OFFSET 4 | 587 | pushl_cfi %eax /* new kernel esp */ |
| 624 | push %eax /* new kernel esp */ | ||
| 625 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 626 | /* Disable interrupts, but do not irqtrace this section: we | 588 | /* Disable interrupts, but do not irqtrace this section: we |
| 627 | * will soon execute iret and the tracer was already set to | 589 | * will soon execute iret and the tracer was already set to |
| 628 | * the irqstate after the iret */ | 590 | * the irqstate after the iret */ |
| @@ -666,11 +628,9 @@ work_notifysig: # deal with pending signals and | |||
| 666 | 628 | ||
| 667 | ALIGN | 629 | ALIGN |
| 668 | work_notifysig_v86: | 630 | work_notifysig_v86: |
| 669 | pushl %ecx # save ti_flags for do_notify_resume | 631 | pushl_cfi %ecx # save ti_flags for do_notify_resume |
| 670 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 671 | call save_v86_state # %eax contains pt_regs pointer | 632 | call save_v86_state # %eax contains pt_regs pointer |
| 672 | popl %ecx | 633 | popl_cfi %ecx |
| 673 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 674 | movl %eax, %esp | 634 | movl %eax, %esp |
| 675 | #else | 635 | #else |
| 676 | movl %esp, %eax | 636 | movl %esp, %eax |
| @@ -750,14 +710,18 @@ ptregs_##name: \ | |||
| 750 | #define PTREGSCALL3(name) \ | 710 | #define PTREGSCALL3(name) \ |
| 751 | ALIGN; \ | 711 | ALIGN; \ |
| 752 | ptregs_##name: \ | 712 | ptregs_##name: \ |
| 713 | CFI_STARTPROC; \ | ||
| 753 | leal 4(%esp),%eax; \ | 714 | leal 4(%esp),%eax; \ |
| 754 | pushl %eax; \ | 715 | pushl_cfi %eax; \ |
| 755 | movl PT_EDX(%eax),%ecx; \ | 716 | movl PT_EDX(%eax),%ecx; \ |
| 756 | movl PT_ECX(%eax),%edx; \ | 717 | movl PT_ECX(%eax),%edx; \ |
| 757 | movl PT_EBX(%eax),%eax; \ | 718 | movl PT_EBX(%eax),%eax; \ |
| 758 | call sys_##name; \ | 719 | call sys_##name; \ |
| 759 | addl $4,%esp; \ | 720 | addl $4,%esp; \ |
| 760 | ret | 721 | CFI_ADJUST_CFA_OFFSET -4; \ |
| 722 | ret; \ | ||
| 723 | CFI_ENDPROC; \ | ||
| 724 | ENDPROC(ptregs_##name) | ||
| 761 | 725 | ||
| 762 | PTREGSCALL1(iopl) | 726 | PTREGSCALL1(iopl) |
| 763 | PTREGSCALL0(fork) | 727 | PTREGSCALL0(fork) |
| @@ -772,15 +736,19 @@ PTREGSCALL1(vm86old) | |||
| 772 | /* Clone is an oddball. The 4th arg is in %edi */ | 736 | /* Clone is an oddball. The 4th arg is in %edi */ |
| 773 | ALIGN; | 737 | ALIGN; |
| 774 | ptregs_clone: | 738 | ptregs_clone: |
| 739 | CFI_STARTPROC | ||
| 775 | leal 4(%esp),%eax | 740 | leal 4(%esp),%eax |
| 776 | pushl %eax | 741 | pushl_cfi %eax |
| 777 | pushl PT_EDI(%eax) | 742 | pushl_cfi PT_EDI(%eax) |
| 778 | movl PT_EDX(%eax),%ecx | 743 | movl PT_EDX(%eax),%ecx |
| 779 | movl PT_ECX(%eax),%edx | 744 | movl PT_ECX(%eax),%edx |
| 780 | movl PT_EBX(%eax),%eax | 745 | movl PT_EBX(%eax),%eax |
| 781 | call sys_clone | 746 | call sys_clone |
| 782 | addl $8,%esp | 747 | addl $8,%esp |
| 748 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 783 | ret | 749 | ret |
| 750 | CFI_ENDPROC | ||
| 751 | ENDPROC(ptregs_clone) | ||
| 784 | 752 | ||
| 785 | .macro FIXUP_ESPFIX_STACK | 753 | .macro FIXUP_ESPFIX_STACK |
| 786 | /* | 754 | /* |
| @@ -795,10 +763,8 @@ ptregs_clone: | |||
| 795 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ | 763 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ |
| 796 | shl $16, %eax | 764 | shl $16, %eax |
| 797 | addl %esp, %eax /* the adjusted stack pointer */ | 765 | addl %esp, %eax /* the adjusted stack pointer */ |
| 798 | pushl $__KERNEL_DS | 766 | pushl_cfi $__KERNEL_DS |
| 799 | CFI_ADJUST_CFA_OFFSET 4 | 767 | pushl_cfi %eax |
| 800 | pushl %eax | ||
| 801 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 802 | lss (%esp), %esp /* switch to the normal stack segment */ | 768 | lss (%esp), %esp /* switch to the normal stack segment */ |
| 803 | CFI_ADJUST_CFA_OFFSET -8 | 769 | CFI_ADJUST_CFA_OFFSET -8 |
| 804 | .endm | 770 | .endm |
| @@ -835,8 +801,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
| 835 | .if vector <> FIRST_EXTERNAL_VECTOR | 801 | .if vector <> FIRST_EXTERNAL_VECTOR |
| 836 | CFI_ADJUST_CFA_OFFSET -4 | 802 | CFI_ADJUST_CFA_OFFSET -4 |
| 837 | .endif | 803 | .endif |
| 838 | 1: pushl $(~vector+0x80) /* Note: always in signed byte range */ | 804 | 1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
| 839 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 840 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | 805 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
| 841 | jmp 2f | 806 | jmp 2f |
| 842 | .endif | 807 | .endif |
| @@ -876,8 +841,7 @@ ENDPROC(common_interrupt) | |||
| 876 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 841 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
| 877 | ENTRY(name) \ | 842 | ENTRY(name) \ |
| 878 | RING0_INT_FRAME; \ | 843 | RING0_INT_FRAME; \ |
| 879 | pushl $~(nr); \ | 844 | pushl_cfi $~(nr); \ |
| 880 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 881 | SAVE_ALL; \ | 845 | SAVE_ALL; \ |
| 882 | TRACE_IRQS_OFF \ | 846 | TRACE_IRQS_OFF \ |
| 883 | movl %esp,%eax; \ | 847 | movl %esp,%eax; \ |
| @@ -893,21 +857,18 @@ ENDPROC(name) | |||
| 893 | 857 | ||
| 894 | ENTRY(coprocessor_error) | 858 | ENTRY(coprocessor_error) |
| 895 | RING0_INT_FRAME | 859 | RING0_INT_FRAME |
| 896 | pushl $0 | 860 | pushl_cfi $0 |
| 897 | CFI_ADJUST_CFA_OFFSET 4 | 861 | pushl_cfi $do_coprocessor_error |
| 898 | pushl $do_coprocessor_error | ||
| 899 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 900 | jmp error_code | 862 | jmp error_code |
| 901 | CFI_ENDPROC | 863 | CFI_ENDPROC |
| 902 | END(coprocessor_error) | 864 | END(coprocessor_error) |
| 903 | 865 | ||
| 904 | ENTRY(simd_coprocessor_error) | 866 | ENTRY(simd_coprocessor_error) |
| 905 | RING0_INT_FRAME | 867 | RING0_INT_FRAME |
| 906 | pushl $0 | 868 | pushl_cfi $0 |
| 907 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 908 | #ifdef CONFIG_X86_INVD_BUG | 869 | #ifdef CONFIG_X86_INVD_BUG |
| 909 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | 870 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ |
| 910 | 661: pushl $do_general_protection | 871 | 661: pushl_cfi $do_general_protection |
| 911 | 662: | 872 | 662: |
| 912 | .section .altinstructions,"a" | 873 | .section .altinstructions,"a" |
| 913 | .balign 4 | 874 | .balign 4 |
| @@ -922,19 +883,16 @@ ENTRY(simd_coprocessor_error) | |||
| 922 | 664: | 883 | 664: |
| 923 | .previous | 884 | .previous |
| 924 | #else | 885 | #else |
| 925 | pushl $do_simd_coprocessor_error | 886 | pushl_cfi $do_simd_coprocessor_error |
| 926 | #endif | 887 | #endif |
| 927 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 928 | jmp error_code | 888 | jmp error_code |
| 929 | CFI_ENDPROC | 889 | CFI_ENDPROC |
| 930 | END(simd_coprocessor_error) | 890 | END(simd_coprocessor_error) |
| 931 | 891 | ||
| 932 | ENTRY(device_not_available) | 892 | ENTRY(device_not_available) |
| 933 | RING0_INT_FRAME | 893 | RING0_INT_FRAME |
| 934 | pushl $-1 # mark this as an int | 894 | pushl_cfi $-1 # mark this as an int |
| 935 | CFI_ADJUST_CFA_OFFSET 4 | 895 | pushl_cfi $do_device_not_available |
| 936 | pushl $do_device_not_available | ||
| 937 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 938 | jmp error_code | 896 | jmp error_code |
| 939 | CFI_ENDPROC | 897 | CFI_ENDPROC |
| 940 | END(device_not_available) | 898 | END(device_not_available) |
| @@ -956,82 +914,68 @@ END(native_irq_enable_sysexit) | |||
| 956 | 914 | ||
| 957 | ENTRY(overflow) | 915 | ENTRY(overflow) |
| 958 | RING0_INT_FRAME | 916 | RING0_INT_FRAME |
| 959 | pushl $0 | 917 | pushl_cfi $0 |
| 960 | CFI_ADJUST_CFA_OFFSET 4 | 918 | pushl_cfi $do_overflow |
| 961 | pushl $do_overflow | ||
| 962 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 963 | jmp error_code | 919 | jmp error_code |
| 964 | CFI_ENDPROC | 920 | CFI_ENDPROC |
| 965 | END(overflow) | 921 | END(overflow) |
| 966 | 922 | ||
| 967 | ENTRY(bounds) | 923 | ENTRY(bounds) |
| 968 | RING0_INT_FRAME | 924 | RING0_INT_FRAME |
| 969 | pushl $0 | 925 | pushl_cfi $0 |
| 970 | CFI_ADJUST_CFA_OFFSET 4 | 926 | pushl_cfi $do_bounds |
| 971 | pushl $do_bounds | ||
| 972 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 973 | jmp error_code | 927 | jmp error_code |
| 974 | CFI_ENDPROC | 928 | CFI_ENDPROC |
| 975 | END(bounds) | 929 | END(bounds) |
| 976 | 930 | ||
| 977 | ENTRY(invalid_op) | 931 | ENTRY(invalid_op) |
| 978 | RING0_INT_FRAME | 932 | RING0_INT_FRAME |
| 979 | pushl $0 | 933 | pushl_cfi $0 |
| 980 | CFI_ADJUST_CFA_OFFSET 4 | 934 | pushl_cfi $do_invalid_op |
| 981 | pushl $do_invalid_op | ||
| 982 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 983 | jmp error_code | 935 | jmp error_code |
| 984 | CFI_ENDPROC | 936 | CFI_ENDPROC |
| 985 | END(invalid_op) | 937 | END(invalid_op) |
| 986 | 938 | ||
| 987 | ENTRY(coprocessor_segment_overrun) | 939 | ENTRY(coprocessor_segment_overrun) |
| 988 | RING0_INT_FRAME | 940 | RING0_INT_FRAME |
| 989 | pushl $0 | 941 | pushl_cfi $0 |
| 990 | CFI_ADJUST_CFA_OFFSET 4 | 942 | pushl_cfi $do_coprocessor_segment_overrun |
| 991 | pushl $do_coprocessor_segment_overrun | ||
| 992 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 993 | jmp error_code | 943 | jmp error_code |
| 994 | CFI_ENDPROC | 944 | CFI_ENDPROC |
| 995 | END(coprocessor_segment_overrun) | 945 | END(coprocessor_segment_overrun) |
| 996 | 946 | ||
| 997 | ENTRY(invalid_TSS) | 947 | ENTRY(invalid_TSS) |
| 998 | RING0_EC_FRAME | 948 | RING0_EC_FRAME |
| 999 | pushl $do_invalid_TSS | 949 | pushl_cfi $do_invalid_TSS |
| 1000 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1001 | jmp error_code | 950 | jmp error_code |
| 1002 | CFI_ENDPROC | 951 | CFI_ENDPROC |
| 1003 | END(invalid_TSS) | 952 | END(invalid_TSS) |
| 1004 | 953 | ||
| 1005 | ENTRY(segment_not_present) | 954 | ENTRY(segment_not_present) |
| 1006 | RING0_EC_FRAME | 955 | RING0_EC_FRAME |
| 1007 | pushl $do_segment_not_present | 956 | pushl_cfi $do_segment_not_present |
| 1008 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1009 | jmp error_code | 957 | jmp error_code |
| 1010 | CFI_ENDPROC | 958 | CFI_ENDPROC |
| 1011 | END(segment_not_present) | 959 | END(segment_not_present) |
| 1012 | 960 | ||
| 1013 | ENTRY(stack_segment) | 961 | ENTRY(stack_segment) |
| 1014 | RING0_EC_FRAME | 962 | RING0_EC_FRAME |
| 1015 | pushl $do_stack_segment | 963 | pushl_cfi $do_stack_segment |
| 1016 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1017 | jmp error_code | 964 | jmp error_code |
| 1018 | CFI_ENDPROC | 965 | CFI_ENDPROC |
| 1019 | END(stack_segment) | 966 | END(stack_segment) |
| 1020 | 967 | ||
| 1021 | ENTRY(alignment_check) | 968 | ENTRY(alignment_check) |
| 1022 | RING0_EC_FRAME | 969 | RING0_EC_FRAME |
| 1023 | pushl $do_alignment_check | 970 | pushl_cfi $do_alignment_check |
| 1024 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1025 | jmp error_code | 971 | jmp error_code |
| 1026 | CFI_ENDPROC | 972 | CFI_ENDPROC |
| 1027 | END(alignment_check) | 973 | END(alignment_check) |
| 1028 | 974 | ||
| 1029 | ENTRY(divide_error) | 975 | ENTRY(divide_error) |
| 1030 | RING0_INT_FRAME | 976 | RING0_INT_FRAME |
| 1031 | pushl $0 # no error code | 977 | pushl_cfi $0 # no error code |
| 1032 | CFI_ADJUST_CFA_OFFSET 4 | 978 | pushl_cfi $do_divide_error |
| 1033 | pushl $do_divide_error | ||
| 1034 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1035 | jmp error_code | 979 | jmp error_code |
| 1036 | CFI_ENDPROC | 980 | CFI_ENDPROC |
| 1037 | END(divide_error) | 981 | END(divide_error) |
| @@ -1039,10 +983,8 @@ END(divide_error) | |||
| 1039 | #ifdef CONFIG_X86_MCE | 983 | #ifdef CONFIG_X86_MCE |
| 1040 | ENTRY(machine_check) | 984 | ENTRY(machine_check) |
| 1041 | RING0_INT_FRAME | 985 | RING0_INT_FRAME |
| 1042 | pushl $0 | 986 | pushl_cfi $0 |
| 1043 | CFI_ADJUST_CFA_OFFSET 4 | 987 | pushl_cfi machine_check_vector |
| 1044 | pushl machine_check_vector | ||
| 1045 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1046 | jmp error_code | 988 | jmp error_code |
| 1047 | CFI_ENDPROC | 989 | CFI_ENDPROC |
| 1048 | END(machine_check) | 990 | END(machine_check) |
| @@ -1050,10 +992,8 @@ END(machine_check) | |||
| 1050 | 992 | ||
| 1051 | ENTRY(spurious_interrupt_bug) | 993 | ENTRY(spurious_interrupt_bug) |
| 1052 | RING0_INT_FRAME | 994 | RING0_INT_FRAME |
| 1053 | pushl $0 | 995 | pushl_cfi $0 |
| 1054 | CFI_ADJUST_CFA_OFFSET 4 | 996 | pushl_cfi $do_spurious_interrupt_bug |
| 1055 | pushl $do_spurious_interrupt_bug | ||
| 1056 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1057 | jmp error_code | 997 | jmp error_code |
| 1058 | CFI_ENDPROC | 998 | CFI_ENDPROC |
| 1059 | END(spurious_interrupt_bug) | 999 | END(spurious_interrupt_bug) |
| @@ -1084,8 +1024,7 @@ ENTRY(xen_sysenter_target) | |||
| 1084 | 1024 | ||
| 1085 | ENTRY(xen_hypervisor_callback) | 1025 | ENTRY(xen_hypervisor_callback) |
| 1086 | CFI_STARTPROC | 1026 | CFI_STARTPROC |
| 1087 | pushl $0 | 1027 | pushl_cfi $0 |
| 1088 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1089 | SAVE_ALL | 1028 | SAVE_ALL |
| 1090 | TRACE_IRQS_OFF | 1029 | TRACE_IRQS_OFF |
| 1091 | 1030 | ||
| @@ -1121,23 +1060,20 @@ ENDPROC(xen_hypervisor_callback) | |||
| 1121 | # We distinguish between categories by maintaining a status value in EAX. | 1060 | # We distinguish between categories by maintaining a status value in EAX. |
| 1122 | ENTRY(xen_failsafe_callback) | 1061 | ENTRY(xen_failsafe_callback) |
| 1123 | CFI_STARTPROC | 1062 | CFI_STARTPROC |
| 1124 | pushl %eax | 1063 | pushl_cfi %eax |
| 1125 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1126 | movl $1,%eax | 1064 | movl $1,%eax |
| 1127 | 1: mov 4(%esp),%ds | 1065 | 1: mov 4(%esp),%ds |
| 1128 | 2: mov 8(%esp),%es | 1066 | 2: mov 8(%esp),%es |
| 1129 | 3: mov 12(%esp),%fs | 1067 | 3: mov 12(%esp),%fs |
| 1130 | 4: mov 16(%esp),%gs | 1068 | 4: mov 16(%esp),%gs |
| 1131 | testl %eax,%eax | 1069 | testl %eax,%eax |
| 1132 | popl %eax | 1070 | popl_cfi %eax |
| 1133 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1134 | lea 16(%esp),%esp | 1071 | lea 16(%esp),%esp |
| 1135 | CFI_ADJUST_CFA_OFFSET -16 | 1072 | CFI_ADJUST_CFA_OFFSET -16 |
| 1136 | jz 5f | 1073 | jz 5f |
| 1137 | addl $16,%esp | 1074 | addl $16,%esp |
| 1138 | jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) | 1075 | jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) |
| 1139 | 5: pushl $0 # EAX == 0 => Category 1 (Bad segment) | 1076 | 5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment) |
| 1140 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1141 | SAVE_ALL | 1077 | SAVE_ALL |
| 1142 | jmp ret_from_exception | 1078 | jmp ret_from_exception |
| 1143 | CFI_ENDPROC | 1079 | CFI_ENDPROC |
| @@ -1287,40 +1223,29 @@ syscall_table_size=(.-sys_call_table) | |||
| 1287 | 1223 | ||
| 1288 | ENTRY(page_fault) | 1224 | ENTRY(page_fault) |
| 1289 | RING0_EC_FRAME | 1225 | RING0_EC_FRAME |
| 1290 | pushl $do_page_fault | 1226 | pushl_cfi $do_page_fault |
| 1291 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1292 | ALIGN | 1227 | ALIGN |
| 1293 | error_code: | 1228 | error_code: |
| 1294 | /* the function address is in %gs's slot on the stack */ | 1229 | /* the function address is in %gs's slot on the stack */ |
| 1295 | pushl %fs | 1230 | pushl_cfi %fs |
| 1296 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1297 | /*CFI_REL_OFFSET fs, 0*/ | 1231 | /*CFI_REL_OFFSET fs, 0*/ |
| 1298 | pushl %es | 1232 | pushl_cfi %es |
| 1299 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1300 | /*CFI_REL_OFFSET es, 0*/ | 1233 | /*CFI_REL_OFFSET es, 0*/ |
| 1301 | pushl %ds | 1234 | pushl_cfi %ds |
| 1302 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1303 | /*CFI_REL_OFFSET ds, 0*/ | 1235 | /*CFI_REL_OFFSET ds, 0*/ |
| 1304 | pushl %eax | 1236 | pushl_cfi %eax |
| 1305 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1306 | CFI_REL_OFFSET eax, 0 | 1237 | CFI_REL_OFFSET eax, 0 |
| 1307 | pushl %ebp | 1238 | pushl_cfi %ebp |
| 1308 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1309 | CFI_REL_OFFSET ebp, 0 | 1239 | CFI_REL_OFFSET ebp, 0 |
| 1310 | pushl %edi | 1240 | pushl_cfi %edi |
| 1311 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1312 | CFI_REL_OFFSET edi, 0 | 1241 | CFI_REL_OFFSET edi, 0 |
| 1313 | pushl %esi | 1242 | pushl_cfi %esi |
| 1314 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1315 | CFI_REL_OFFSET esi, 0 | 1243 | CFI_REL_OFFSET esi, 0 |
| 1316 | pushl %edx | 1244 | pushl_cfi %edx |
| 1317 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1318 | CFI_REL_OFFSET edx, 0 | 1245 | CFI_REL_OFFSET edx, 0 |
| 1319 | pushl %ecx | 1246 | pushl_cfi %ecx |
| 1320 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1321 | CFI_REL_OFFSET ecx, 0 | 1247 | CFI_REL_OFFSET ecx, 0 |
| 1322 | pushl %ebx | 1248 | pushl_cfi %ebx |
| 1323 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1324 | CFI_REL_OFFSET ebx, 0 | 1249 | CFI_REL_OFFSET ebx, 0 |
| 1325 | cld | 1250 | cld |
| 1326 | movl $(__KERNEL_PERCPU), %ecx | 1251 | movl $(__KERNEL_PERCPU), %ecx |
| @@ -1362,12 +1287,9 @@ END(page_fault) | |||
| 1362 | movl TSS_sysenter_sp0 + \offset(%esp), %esp | 1287 | movl TSS_sysenter_sp0 + \offset(%esp), %esp |
| 1363 | CFI_DEF_CFA esp, 0 | 1288 | CFI_DEF_CFA esp, 0 |
| 1364 | CFI_UNDEFINED eip | 1289 | CFI_UNDEFINED eip |
| 1365 | pushfl | 1290 | pushfl_cfi |
| 1366 | CFI_ADJUST_CFA_OFFSET 4 | 1291 | pushl_cfi $__KERNEL_CS |
| 1367 | pushl $__KERNEL_CS | 1292 | pushl_cfi $sysenter_past_esp |
| 1368 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1369 | pushl $sysenter_past_esp | ||
| 1370 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1371 | CFI_REL_OFFSET eip, 0 | 1293 | CFI_REL_OFFSET eip, 0 |
| 1372 | .endm | 1294 | .endm |
| 1373 | 1295 | ||
| @@ -1377,8 +1299,7 @@ ENTRY(debug) | |||
| 1377 | jne debug_stack_correct | 1299 | jne debug_stack_correct |
| 1378 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn | 1300 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn |
| 1379 | debug_stack_correct: | 1301 | debug_stack_correct: |
| 1380 | pushl $-1 # mark this as an int | 1302 | pushl_cfi $-1 # mark this as an int |
| 1381 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1382 | SAVE_ALL | 1303 | SAVE_ALL |
| 1383 | TRACE_IRQS_OFF | 1304 | TRACE_IRQS_OFF |
| 1384 | xorl %edx,%edx # error code 0 | 1305 | xorl %edx,%edx # error code 0 |
| @@ -1398,32 +1319,27 @@ END(debug) | |||
| 1398 | */ | 1319 | */ |
| 1399 | ENTRY(nmi) | 1320 | ENTRY(nmi) |
| 1400 | RING0_INT_FRAME | 1321 | RING0_INT_FRAME |
| 1401 | pushl %eax | 1322 | pushl_cfi %eax |
| 1402 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1403 | movl %ss, %eax | 1323 | movl %ss, %eax |
| 1404 | cmpw $__ESPFIX_SS, %ax | 1324 | cmpw $__ESPFIX_SS, %ax |
| 1405 | popl %eax | 1325 | popl_cfi %eax |
| 1406 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1407 | je nmi_espfix_stack | 1326 | je nmi_espfix_stack |
| 1408 | cmpl $ia32_sysenter_target,(%esp) | 1327 | cmpl $ia32_sysenter_target,(%esp) |
| 1409 | je nmi_stack_fixup | 1328 | je nmi_stack_fixup |
| 1410 | pushl %eax | 1329 | pushl_cfi %eax |
| 1411 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1412 | movl %esp,%eax | 1330 | movl %esp,%eax |
| 1413 | /* Do not access memory above the end of our stack page, | 1331 | /* Do not access memory above the end of our stack page, |
| 1414 | * it might not exist. | 1332 | * it might not exist. |
| 1415 | */ | 1333 | */ |
| 1416 | andl $(THREAD_SIZE-1),%eax | 1334 | andl $(THREAD_SIZE-1),%eax |
| 1417 | cmpl $(THREAD_SIZE-20),%eax | 1335 | cmpl $(THREAD_SIZE-20),%eax |
| 1418 | popl %eax | 1336 | popl_cfi %eax |
| 1419 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1420 | jae nmi_stack_correct | 1337 | jae nmi_stack_correct |
| 1421 | cmpl $ia32_sysenter_target,12(%esp) | 1338 | cmpl $ia32_sysenter_target,12(%esp) |
| 1422 | je nmi_debug_stack_check | 1339 | je nmi_debug_stack_check |
| 1423 | nmi_stack_correct: | 1340 | nmi_stack_correct: |
| 1424 | /* We have a RING0_INT_FRAME here */ | 1341 | /* We have a RING0_INT_FRAME here */ |
| 1425 | pushl %eax | 1342 | pushl_cfi %eax |
| 1426 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1427 | SAVE_ALL | 1343 | SAVE_ALL |
| 1428 | xorl %edx,%edx # zero error code | 1344 | xorl %edx,%edx # zero error code |
| 1429 | movl %esp,%eax # pt_regs pointer | 1345 | movl %esp,%eax # pt_regs pointer |
| @@ -1452,18 +1368,14 @@ nmi_espfix_stack: | |||
| 1452 | * | 1368 | * |
| 1453 | * create the pointer to lss back | 1369 | * create the pointer to lss back |
| 1454 | */ | 1370 | */ |
| 1455 | pushl %ss | 1371 | pushl_cfi %ss |
| 1456 | CFI_ADJUST_CFA_OFFSET 4 | 1372 | pushl_cfi %esp |
| 1457 | pushl %esp | ||
| 1458 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1459 | addl $4, (%esp) | 1373 | addl $4, (%esp) |
| 1460 | /* copy the iret frame of 12 bytes */ | 1374 | /* copy the iret frame of 12 bytes */ |
| 1461 | .rept 3 | 1375 | .rept 3 |
| 1462 | pushl 16(%esp) | 1376 | pushl_cfi 16(%esp) |
| 1463 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1464 | .endr | 1377 | .endr |
| 1465 | pushl %eax | 1378 | pushl_cfi %eax |
| 1466 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1467 | SAVE_ALL | 1379 | SAVE_ALL |
| 1468 | FIXUP_ESPFIX_STACK # %eax == %esp | 1380 | FIXUP_ESPFIX_STACK # %eax == %esp |
| 1469 | xorl %edx,%edx # zero error code | 1381 | xorl %edx,%edx # zero error code |
| @@ -1477,8 +1389,7 @@ END(nmi) | |||
| 1477 | 1389 | ||
| 1478 | ENTRY(int3) | 1390 | ENTRY(int3) |
| 1479 | RING0_INT_FRAME | 1391 | RING0_INT_FRAME |
| 1480 | pushl $-1 # mark this as an int | 1392 | pushl_cfi $-1 # mark this as an int |
| 1481 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1482 | SAVE_ALL | 1393 | SAVE_ALL |
| 1483 | TRACE_IRQS_OFF | 1394 | TRACE_IRQS_OFF |
| 1484 | xorl %edx,%edx # zero error code | 1395 | xorl %edx,%edx # zero error code |
| @@ -1490,8 +1401,7 @@ END(int3) | |||
| 1490 | 1401 | ||
| 1491 | ENTRY(general_protection) | 1402 | ENTRY(general_protection) |
| 1492 | RING0_EC_FRAME | 1403 | RING0_EC_FRAME |
| 1493 | pushl $do_general_protection | 1404 | pushl_cfi $do_general_protection |
| 1494 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1495 | jmp error_code | 1405 | jmp error_code |
| 1496 | CFI_ENDPROC | 1406 | CFI_ENDPROC |
| 1497 | END(general_protection) | 1407 | END(general_protection) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec7cbba..fe2690d71c0c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -213,23 +213,17 @@ ENDPROC(native_usergs_sysret64) | |||
| 213 | .macro FAKE_STACK_FRAME child_rip | 213 | .macro FAKE_STACK_FRAME child_rip |
| 214 | /* push in order ss, rsp, eflags, cs, rip */ | 214 | /* push in order ss, rsp, eflags, cs, rip */ |
| 215 | xorl %eax, %eax | 215 | xorl %eax, %eax |
| 216 | pushq $__KERNEL_DS /* ss */ | 216 | pushq_cfi $__KERNEL_DS /* ss */ |
| 217 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 218 | /*CFI_REL_OFFSET ss,0*/ | 217 | /*CFI_REL_OFFSET ss,0*/ |
| 219 | pushq %rax /* rsp */ | 218 | pushq_cfi %rax /* rsp */ |
| 220 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 221 | CFI_REL_OFFSET rsp,0 | 219 | CFI_REL_OFFSET rsp,0 |
| 222 | pushq $X86_EFLAGS_IF /* eflags - interrupts on */ | 220 | pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ |
| 223 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 224 | /*CFI_REL_OFFSET rflags,0*/ | 221 | /*CFI_REL_OFFSET rflags,0*/ |
| 225 | pushq $__KERNEL_CS /* cs */ | 222 | pushq_cfi $__KERNEL_CS /* cs */ |
| 226 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 227 | /*CFI_REL_OFFSET cs,0*/ | 223 | /*CFI_REL_OFFSET cs,0*/ |
| 228 | pushq \child_rip /* rip */ | 224 | pushq_cfi \child_rip /* rip */ |
| 229 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 230 | CFI_REL_OFFSET rip,0 | 225 | CFI_REL_OFFSET rip,0 |
| 231 | pushq %rax /* orig rax */ | 226 | pushq_cfi %rax /* orig rax */ |
| 232 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 233 | .endm | 227 | .endm |
| 234 | 228 | ||
| 235 | .macro UNFAKE_STACK_FRAME | 229 | .macro UNFAKE_STACK_FRAME |
| @@ -398,10 +392,8 @@ ENTRY(ret_from_fork) | |||
| 398 | 392 | ||
| 399 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | 393 | LOCK ; btr $TIF_FORK,TI_flags(%r8) |
| 400 | 394 | ||
| 401 | push kernel_eflags(%rip) | 395 | pushq_cfi kernel_eflags(%rip) |
| 402 | CFI_ADJUST_CFA_OFFSET 8 | 396 | popfq_cfi # reset kernel eflags |
| 403 | popf # reset kernel eflags | ||
| 404 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 405 | 397 | ||
| 406 | call schedule_tail # rdi: 'prev' task parameter | 398 | call schedule_tail # rdi: 'prev' task parameter |
| 407 | 399 | ||
| @@ -521,11 +513,9 @@ sysret_careful: | |||
| 521 | jnc sysret_signal | 513 | jnc sysret_signal |
| 522 | TRACE_IRQS_ON | 514 | TRACE_IRQS_ON |
| 523 | ENABLE_INTERRUPTS(CLBR_NONE) | 515 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 524 | pushq %rdi | 516 | pushq_cfi %rdi |
| 525 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 526 | call schedule | 517 | call schedule |
| 527 | popq %rdi | 518 | popq_cfi %rdi |
| 528 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 529 | jmp sysret_check | 519 | jmp sysret_check |
| 530 | 520 | ||
| 531 | /* Handle a signal */ | 521 | /* Handle a signal */ |
| @@ -634,11 +624,9 @@ int_careful: | |||
| 634 | jnc int_very_careful | 624 | jnc int_very_careful |
| 635 | TRACE_IRQS_ON | 625 | TRACE_IRQS_ON |
| 636 | ENABLE_INTERRUPTS(CLBR_NONE) | 626 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 637 | pushq %rdi | 627 | pushq_cfi %rdi |
| 638 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 639 | call schedule | 628 | call schedule |
| 640 | popq %rdi | 629 | popq_cfi %rdi |
| 641 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 642 | DISABLE_INTERRUPTS(CLBR_NONE) | 630 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 643 | TRACE_IRQS_OFF | 631 | TRACE_IRQS_OFF |
| 644 | jmp int_with_check | 632 | jmp int_with_check |
| @@ -652,12 +640,10 @@ int_check_syscall_exit_work: | |||
| 652 | /* Check for syscall exit trace */ | 640 | /* Check for syscall exit trace */ |
| 653 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 641 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
| 654 | jz int_signal | 642 | jz int_signal |
| 655 | pushq %rdi | 643 | pushq_cfi %rdi |
| 656 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 657 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 644 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
| 658 | call syscall_trace_leave | 645 | call syscall_trace_leave |
| 659 | popq %rdi | 646 | popq_cfi %rdi |
| 660 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 661 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 647 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
| 662 | jmp int_restore_rest | 648 | jmp int_restore_rest |
| 663 | 649 | ||
| @@ -714,9 +700,8 @@ END(ptregscall_common) | |||
| 714 | 700 | ||
| 715 | ENTRY(stub_execve) | 701 | ENTRY(stub_execve) |
| 716 | CFI_STARTPROC | 702 | CFI_STARTPROC |
| 717 | popq %r11 | 703 | addq $8, %rsp |
| 718 | CFI_ADJUST_CFA_OFFSET -8 | 704 | PARTIAL_FRAME 0 |
| 719 | CFI_REGISTER rip, r11 | ||
| 720 | SAVE_REST | 705 | SAVE_REST |
| 721 | FIXUP_TOP_OF_STACK %r11 | 706 | FIXUP_TOP_OF_STACK %r11 |
| 722 | movq %rsp, %rcx | 707 | movq %rsp, %rcx |
| @@ -735,7 +720,7 @@ END(stub_execve) | |||
| 735 | ENTRY(stub_rt_sigreturn) | 720 | ENTRY(stub_rt_sigreturn) |
| 736 | CFI_STARTPROC | 721 | CFI_STARTPROC |
| 737 | addq $8, %rsp | 722 | addq $8, %rsp |
| 738 | CFI_ADJUST_CFA_OFFSET -8 | 723 | PARTIAL_FRAME 0 |
| 739 | SAVE_REST | 724 | SAVE_REST |
| 740 | movq %rsp,%rdi | 725 | movq %rsp,%rdi |
| 741 | FIXUP_TOP_OF_STACK %r11 | 726 | FIXUP_TOP_OF_STACK %r11 |
| @@ -766,8 +751,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
| 766 | .if vector <> FIRST_EXTERNAL_VECTOR | 751 | .if vector <> FIRST_EXTERNAL_VECTOR |
| 767 | CFI_ADJUST_CFA_OFFSET -8 | 752 | CFI_ADJUST_CFA_OFFSET -8 |
| 768 | .endif | 753 | .endif |
| 769 | 1: pushq $(~vector+0x80) /* Note: always in signed byte range */ | 754 | 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
| 770 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 771 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | 755 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
| 772 | jmp 2f | 756 | jmp 2f |
| 773 | .endif | 757 | .endif |
| @@ -796,8 +780,8 @@ END(interrupt) | |||
| 796 | 780 | ||
| 797 | /* 0(%rsp): ~(interrupt number) */ | 781 | /* 0(%rsp): ~(interrupt number) */ |
| 798 | .macro interrupt func | 782 | .macro interrupt func |
| 799 | subq $10*8, %rsp | 783 | subq $ORIG_RAX-ARGOFFSET+8, %rsp |
| 800 | CFI_ADJUST_CFA_OFFSET 10*8 | 784 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 |
| 801 | call save_args | 785 | call save_args |
| 802 | PARTIAL_FRAME 0 | 786 | PARTIAL_FRAME 0 |
| 803 | call \func | 787 | call \func |
| @@ -822,6 +806,7 @@ ret_from_intr: | |||
| 822 | TRACE_IRQS_OFF | 806 | TRACE_IRQS_OFF |
| 823 | decl PER_CPU_VAR(irq_count) | 807 | decl PER_CPU_VAR(irq_count) |
| 824 | leaveq | 808 | leaveq |
| 809 | CFI_RESTORE rbp | ||
| 825 | CFI_DEF_CFA_REGISTER rsp | 810 | CFI_DEF_CFA_REGISTER rsp |
| 826 | CFI_ADJUST_CFA_OFFSET -8 | 811 | CFI_ADJUST_CFA_OFFSET -8 |
| 827 | exit_intr: | 812 | exit_intr: |
| @@ -903,11 +888,9 @@ retint_careful: | |||
| 903 | jnc retint_signal | 888 | jnc retint_signal |
| 904 | TRACE_IRQS_ON | 889 | TRACE_IRQS_ON |
| 905 | ENABLE_INTERRUPTS(CLBR_NONE) | 890 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 906 | pushq %rdi | 891 | pushq_cfi %rdi |
| 907 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 908 | call schedule | 892 | call schedule |
| 909 | popq %rdi | 893 | popq_cfi %rdi |
| 910 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 911 | GET_THREAD_INFO(%rcx) | 894 | GET_THREAD_INFO(%rcx) |
| 912 | DISABLE_INTERRUPTS(CLBR_NONE) | 895 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 913 | TRACE_IRQS_OFF | 896 | TRACE_IRQS_OFF |
| @@ -956,8 +939,7 @@ END(common_interrupt) | |||
| 956 | .macro apicinterrupt num sym do_sym | 939 | .macro apicinterrupt num sym do_sym |
| 957 | ENTRY(\sym) | 940 | ENTRY(\sym) |
| 958 | INTR_FRAME | 941 | INTR_FRAME |
| 959 | pushq $~(\num) | 942 | pushq_cfi $~(\num) |
| 960 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 961 | interrupt \do_sym | 943 | interrupt \do_sym |
| 962 | jmp ret_from_intr | 944 | jmp ret_from_intr |
| 963 | CFI_ENDPROC | 945 | CFI_ENDPROC |
| @@ -981,22 +963,10 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ | |||
| 981 | x86_platform_ipi smp_x86_platform_ipi | 963 | x86_platform_ipi smp_x86_platform_ipi |
| 982 | 964 | ||
| 983 | #ifdef CONFIG_SMP | 965 | #ifdef CONFIG_SMP |
| 984 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | 966 | .irpc idx, "01234567" |
| 985 | invalidate_interrupt0 smp_invalidate_interrupt | 967 | apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ |
| 986 | apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ | 968 | invalidate_interrupt\idx smp_invalidate_interrupt |
| 987 | invalidate_interrupt1 smp_invalidate_interrupt | 969 | .endr |
| 988 | apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ | ||
| 989 | invalidate_interrupt2 smp_invalidate_interrupt | ||
| 990 | apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ | ||
| 991 | invalidate_interrupt3 smp_invalidate_interrupt | ||
| 992 | apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ | ||
| 993 | invalidate_interrupt4 smp_invalidate_interrupt | ||
| 994 | apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ | ||
| 995 | invalidate_interrupt5 smp_invalidate_interrupt | ||
| 996 | apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ | ||
| 997 | invalidate_interrupt6 smp_invalidate_interrupt | ||
| 998 | apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ | ||
| 999 | invalidate_interrupt7 smp_invalidate_interrupt | ||
| 1000 | #endif | 970 | #endif |
| 1001 | 971 | ||
| 1002 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 972 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
| @@ -1023,9 +993,9 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
| 1023 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 993 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
| 1024 | spurious_interrupt smp_spurious_interrupt | 994 | spurious_interrupt smp_spurious_interrupt |
| 1025 | 995 | ||
| 1026 | #ifdef CONFIG_PERF_EVENTS | 996 | #ifdef CONFIG_IRQ_WORK |
| 1027 | apicinterrupt LOCAL_PENDING_VECTOR \ | 997 | apicinterrupt IRQ_WORK_VECTOR \ |
| 1028 | perf_pending_interrupt smp_perf_pending_interrupt | 998 | irq_work_interrupt smp_irq_work_interrupt |
| 1029 | #endif | 999 | #endif |
| 1030 | 1000 | ||
| 1031 | /* | 1001 | /* |
| @@ -1036,8 +1006,8 @@ ENTRY(\sym) | |||
| 1036 | INTR_FRAME | 1006 | INTR_FRAME |
| 1037 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1007 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1038 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1008 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
| 1039 | subq $15*8,%rsp | 1009 | subq $ORIG_RAX-R15, %rsp |
| 1040 | CFI_ADJUST_CFA_OFFSET 15*8 | 1010 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1041 | call error_entry | 1011 | call error_entry |
| 1042 | DEFAULT_FRAME 0 | 1012 | DEFAULT_FRAME 0 |
| 1043 | movq %rsp,%rdi /* pt_regs pointer */ | 1013 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1052,9 +1022,9 @@ END(\sym) | |||
| 1052 | ENTRY(\sym) | 1022 | ENTRY(\sym) |
| 1053 | INTR_FRAME | 1023 | INTR_FRAME |
| 1054 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1024 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1055 | pushq $-1 /* ORIG_RAX: no syscall to restart */ | 1025 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
| 1056 | CFI_ADJUST_CFA_OFFSET 8 | 1026 | subq $ORIG_RAX-R15, %rsp |
| 1057 | subq $15*8, %rsp | 1027 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1058 | call save_paranoid | 1028 | call save_paranoid |
| 1059 | TRACE_IRQS_OFF | 1029 | TRACE_IRQS_OFF |
| 1060 | movq %rsp,%rdi /* pt_regs pointer */ | 1030 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1070,9 +1040,9 @@ END(\sym) | |||
| 1070 | ENTRY(\sym) | 1040 | ENTRY(\sym) |
| 1071 | INTR_FRAME | 1041 | INTR_FRAME |
| 1072 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1042 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1073 | pushq $-1 /* ORIG_RAX: no syscall to restart */ | 1043 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
| 1074 | CFI_ADJUST_CFA_OFFSET 8 | 1044 | subq $ORIG_RAX-R15, %rsp |
| 1075 | subq $15*8, %rsp | 1045 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1076 | call save_paranoid | 1046 | call save_paranoid |
| 1077 | TRACE_IRQS_OFF | 1047 | TRACE_IRQS_OFF |
| 1078 | movq %rsp,%rdi /* pt_regs pointer */ | 1048 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1089,8 +1059,8 @@ END(\sym) | |||
| 1089 | ENTRY(\sym) | 1059 | ENTRY(\sym) |
| 1090 | XCPT_FRAME | 1060 | XCPT_FRAME |
| 1091 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1061 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1092 | subq $15*8,%rsp | 1062 | subq $ORIG_RAX-R15, %rsp |
| 1093 | CFI_ADJUST_CFA_OFFSET 15*8 | 1063 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1094 | call error_entry | 1064 | call error_entry |
| 1095 | DEFAULT_FRAME 0 | 1065 | DEFAULT_FRAME 0 |
| 1096 | movq %rsp,%rdi /* pt_regs pointer */ | 1066 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1107,8 +1077,8 @@ END(\sym) | |||
| 1107 | ENTRY(\sym) | 1077 | ENTRY(\sym) |
| 1108 | XCPT_FRAME | 1078 | XCPT_FRAME |
| 1109 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1079 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1110 | subq $15*8,%rsp | 1080 | subq $ORIG_RAX-R15, %rsp |
| 1111 | CFI_ADJUST_CFA_OFFSET 15*8 | 1081 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1112 | call save_paranoid | 1082 | call save_paranoid |
| 1113 | DEFAULT_FRAME 0 | 1083 | DEFAULT_FRAME 0 |
| 1114 | TRACE_IRQS_OFF | 1084 | TRACE_IRQS_OFF |
| @@ -1139,16 +1109,14 @@ zeroentry simd_coprocessor_error do_simd_coprocessor_error | |||
| 1139 | /* edi: new selector */ | 1109 | /* edi: new selector */ |
| 1140 | ENTRY(native_load_gs_index) | 1110 | ENTRY(native_load_gs_index) |
| 1141 | CFI_STARTPROC | 1111 | CFI_STARTPROC |
| 1142 | pushf | 1112 | pushfq_cfi |
| 1143 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1144 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) | 1113 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
| 1145 | SWAPGS | 1114 | SWAPGS |
| 1146 | gs_change: | 1115 | gs_change: |
| 1147 | movl %edi,%gs | 1116 | movl %edi,%gs |
| 1148 | 2: mfence /* workaround */ | 1117 | 2: mfence /* workaround */ |
| 1149 | SWAPGS | 1118 | SWAPGS |
| 1150 | popf | 1119 | popfq_cfi |
| 1151 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 1152 | ret | 1120 | ret |
| 1153 | CFI_ENDPROC | 1121 | CFI_ENDPROC |
| 1154 | END(native_load_gs_index) | 1122 | END(native_load_gs_index) |
| @@ -1215,8 +1183,7 @@ END(kernel_execve) | |||
| 1215 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1183 | /* Call softirq on interrupt stack. Interrupts are off. */ |
| 1216 | ENTRY(call_softirq) | 1184 | ENTRY(call_softirq) |
| 1217 | CFI_STARTPROC | 1185 | CFI_STARTPROC |
| 1218 | push %rbp | 1186 | pushq_cfi %rbp |
| 1219 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1220 | CFI_REL_OFFSET rbp,0 | 1187 | CFI_REL_OFFSET rbp,0 |
| 1221 | mov %rsp,%rbp | 1188 | mov %rsp,%rbp |
| 1222 | CFI_DEF_CFA_REGISTER rbp | 1189 | CFI_DEF_CFA_REGISTER rbp |
| @@ -1225,6 +1192,7 @@ ENTRY(call_softirq) | |||
| 1225 | push %rbp # backlink for old unwinder | 1192 | push %rbp # backlink for old unwinder |
| 1226 | call __do_softirq | 1193 | call __do_softirq |
| 1227 | leaveq | 1194 | leaveq |
| 1195 | CFI_RESTORE rbp | ||
| 1228 | CFI_DEF_CFA_REGISTER rsp | 1196 | CFI_DEF_CFA_REGISTER rsp |
| 1229 | CFI_ADJUST_CFA_OFFSET -8 | 1197 | CFI_ADJUST_CFA_OFFSET -8 |
| 1230 | decl PER_CPU_VAR(irq_count) | 1198 | decl PER_CPU_VAR(irq_count) |
| @@ -1368,7 +1336,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
| 1368 | 1336 | ||
| 1369 | /* ebx: no swapgs flag */ | 1337 | /* ebx: no swapgs flag */ |
| 1370 | ENTRY(paranoid_exit) | 1338 | ENTRY(paranoid_exit) |
| 1371 | INTR_FRAME | 1339 | DEFAULT_FRAME |
| 1372 | DISABLE_INTERRUPTS(CLBR_NONE) | 1340 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 1373 | TRACE_IRQS_OFF | 1341 | TRACE_IRQS_OFF |
| 1374 | testl %ebx,%ebx /* swapgs needed? */ | 1342 | testl %ebx,%ebx /* swapgs needed? */ |
| @@ -1445,7 +1413,6 @@ error_swapgs: | |||
| 1445 | error_sti: | 1413 | error_sti: |
| 1446 | TRACE_IRQS_OFF | 1414 | TRACE_IRQS_OFF |
| 1447 | ret | 1415 | ret |
| 1448 | CFI_ENDPROC | ||
| 1449 | 1416 | ||
| 1450 | /* | 1417 | /* |
| 1451 | * There are two places in the kernel that can potentially fault with | 1418 | * There are two places in the kernel that can potentially fault with |
| @@ -1470,6 +1437,7 @@ bstep_iret: | |||
| 1470 | /* Fix truncated RIP */ | 1437 | /* Fix truncated RIP */ |
| 1471 | movq %rcx,RIP+8(%rsp) | 1438 | movq %rcx,RIP+8(%rsp) |
| 1472 | jmp error_swapgs | 1439 | jmp error_swapgs |
| 1440 | CFI_ENDPROC | ||
| 1473 | END(error_entry) | 1441 | END(error_entry) |
| 1474 | 1442 | ||
| 1475 | 1443 | ||
| @@ -1498,8 +1466,8 @@ ENTRY(nmi) | |||
| 1498 | INTR_FRAME | 1466 | INTR_FRAME |
| 1499 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1467 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1500 | pushq_cfi $-1 | 1468 | pushq_cfi $-1 |
| 1501 | subq $15*8, %rsp | 1469 | subq $ORIG_RAX-R15, %rsp |
| 1502 | CFI_ADJUST_CFA_OFFSET 15*8 | 1470 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1503 | call save_paranoid | 1471 | call save_paranoid |
| 1504 | DEFAULT_FRAME 0 | 1472 | DEFAULT_FRAME 0 |
| 1505 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1473 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cd37469b54ee..3afb33f14d2d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
| @@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) | |||
| 257 | return mod_code_status; | 257 | return mod_code_status; |
| 258 | } | 258 | } |
| 259 | 259 | ||
| 260 | |||
| 261 | |||
| 262 | |||
| 263 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | ||
| 264 | |||
| 265 | static unsigned char *ftrace_nop_replace(void) | 260 | static unsigned char *ftrace_nop_replace(void) |
| 266 | { | 261 | { |
| 267 | return ftrace_nop; | 262 | return ideal_nop5; |
| 268 | } | 263 | } |
| 269 | 264 | ||
| 270 | static int | 265 | static int |
| @@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
| 338 | 333 | ||
| 339 | int __init ftrace_dyn_arch_init(void *data) | 334 | int __init ftrace_dyn_arch_init(void *data) |
| 340 | { | 335 | { |
| 341 | extern const unsigned char ftrace_test_p6nop[]; | ||
| 342 | extern const unsigned char ftrace_test_nop5[]; | ||
| 343 | extern const unsigned char ftrace_test_jmp[]; | ||
| 344 | int faulted = 0; | ||
| 345 | |||
| 346 | /* | ||
| 347 | * There is no good nop for all x86 archs. | ||
| 348 | * We will default to using the P6_NOP5, but first we | ||
| 349 | * will test to make sure that the nop will actually | ||
| 350 | * work on this CPU. If it faults, we will then | ||
| 351 | * go to a lesser efficient 5 byte nop. If that fails | ||
| 352 | * we then just use a jmp as our nop. This isn't the most | ||
| 353 | * efficient nop, but we can not use a multi part nop | ||
| 354 | * since we would then risk being preempted in the middle | ||
| 355 | * of that nop, and if we enabled tracing then, it might | ||
| 356 | * cause a system crash. | ||
| 357 | * | ||
| 358 | * TODO: check the cpuid to determine the best nop. | ||
| 359 | */ | ||
| 360 | asm volatile ( | ||
| 361 | "ftrace_test_jmp:" | ||
| 362 | "jmp ftrace_test_p6nop\n" | ||
| 363 | "nop\n" | ||
| 364 | "nop\n" | ||
| 365 | "nop\n" /* 2 byte jmp + 3 bytes */ | ||
| 366 | "ftrace_test_p6nop:" | ||
| 367 | P6_NOP5 | ||
| 368 | "jmp 1f\n" | ||
| 369 | "ftrace_test_nop5:" | ||
| 370 | ".byte 0x66,0x66,0x66,0x66,0x90\n" | ||
| 371 | "1:" | ||
| 372 | ".section .fixup, \"ax\"\n" | ||
| 373 | "2: movl $1, %0\n" | ||
| 374 | " jmp ftrace_test_nop5\n" | ||
| 375 | "3: movl $2, %0\n" | ||
| 376 | " jmp 1b\n" | ||
| 377 | ".previous\n" | ||
| 378 | _ASM_EXTABLE(ftrace_test_p6nop, 2b) | ||
| 379 | _ASM_EXTABLE(ftrace_test_nop5, 3b) | ||
| 380 | : "=r"(faulted) : "0" (faulted)); | ||
| 381 | |||
| 382 | switch (faulted) { | ||
| 383 | case 0: | ||
| 384 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); | ||
| 385 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); | ||
| 386 | break; | ||
| 387 | case 1: | ||
| 388 | pr_info("converting mcount calls to 66 66 66 66 90\n"); | ||
| 389 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); | ||
| 390 | break; | ||
| 391 | case 2: | ||
| 392 | pr_info("converting mcount calls to jmp . + 5\n"); | ||
| 393 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); | ||
| 394 | break; | ||
| 395 | } | ||
| 396 | |||
| 397 | /* The return code is retured via data */ | 336 | /* The return code is retured via data */ |
| 398 | *(unsigned long *)data = 0; | 337 | *(unsigned long *)data = 0; |
| 399 | 338 | ||
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 3e66bd364a9d..af0699ba48cf 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
| 2 | #include <linux/init.h> | 2 | #include <linux/init.h> |
| 3 | #include <linux/memblock.h> | ||
| 3 | 4 | ||
| 4 | #include <asm/setup.h> | 5 | #include <asm/setup.h> |
| 5 | #include <asm/bios_ebda.h> | 6 | #include <asm/bios_ebda.h> |
| @@ -51,5 +52,5 @@ void __init reserve_ebda_region(void) | |||
| 51 | lowmem = 0x9f000; | 52 | lowmem = 0x9f000; |
| 52 | 53 | ||
| 53 | /* reserve all memory between lowmem and the 1MB mark */ | 54 | /* reserve all memory between lowmem and the 1MB mark */ |
| 54 | reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); | 55 | memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); |
| 55 | } | 56 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 784360c0625c..763310165fa0 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
| 9 | #include <linux/start_kernel.h> | 9 | #include <linux/start_kernel.h> |
| 10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
| 11 | #include <linux/memblock.h> | ||
| 11 | 12 | ||
| 12 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
| 13 | #include <asm/sections.h> | 14 | #include <asm/sections.h> |
| @@ -17,6 +18,7 @@ | |||
| 17 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
| 18 | #include <asm/io_apic.h> | 19 | #include <asm/io_apic.h> |
| 19 | #include <asm/bios_ebda.h> | 20 | #include <asm/bios_ebda.h> |
| 21 | #include <asm/tlbflush.h> | ||
| 20 | 22 | ||
| 21 | static void __init i386_default_early_setup(void) | 23 | static void __init i386_default_early_setup(void) |
| 22 | { | 24 | { |
| @@ -30,17 +32,18 @@ static void __init i386_default_early_setup(void) | |||
| 30 | 32 | ||
| 31 | void __init i386_start_kernel(void) | 33 | void __init i386_start_kernel(void) |
| 32 | { | 34 | { |
| 35 | memblock_init(); | ||
| 36 | |||
| 33 | #ifdef CONFIG_X86_TRAMPOLINE | 37 | #ifdef CONFIG_X86_TRAMPOLINE |
| 34 | /* | 38 | /* |
| 35 | * But first pinch a few for the stack/trampoline stuff | 39 | * But first pinch a few for the stack/trampoline stuff |
| 36 | * FIXME: Don't need the extra page at 4K, but need to fix | 40 | * FIXME: Don't need the extra page at 4K, but need to fix |
| 37 | * trampoline before removing it. (see the GDT stuff) | 41 | * trampoline before removing it. (see the GDT stuff) |
| 38 | */ | 42 | */ |
| 39 | reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, | 43 | memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); |
| 40 | "EX TRAMPOLINE"); | ||
| 41 | #endif | 44 | #endif |
| 42 | 45 | ||
| 43 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 46 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
| 44 | 47 | ||
| 45 | #ifdef CONFIG_BLK_DEV_INITRD | 48 | #ifdef CONFIG_BLK_DEV_INITRD |
| 46 | /* Reserve INITRD */ | 49 | /* Reserve INITRD */ |
| @@ -49,7 +52,7 @@ void __init i386_start_kernel(void) | |||
| 49 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 52 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
| 50 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 53 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
| 51 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 54 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
| 52 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 55 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); |
| 53 | } | 56 | } |
| 54 | #endif | 57 | #endif |
| 55 | 58 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7147143fd614..2d2673c28aff 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
| 13 | #include <linux/start_kernel.h> | 13 | #include <linux/start_kernel.h> |
| 14 | #include <linux/io.h> | 14 | #include <linux/io.h> |
| 15 | #include <linux/memblock.h> | ||
| 15 | 16 | ||
| 16 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
| 17 | #include <asm/proto.h> | 18 | #include <asm/proto.h> |
| @@ -79,6 +80,8 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
| 79 | /* Cleanup the over mapped high alias */ | 80 | /* Cleanup the over mapped high alias */ |
| 80 | cleanup_highmap(); | 81 | cleanup_highmap(); |
| 81 | 82 | ||
| 83 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
| 84 | |||
| 82 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | 85 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { |
| 83 | #ifdef CONFIG_EARLY_PRINTK | 86 | #ifdef CONFIG_EARLY_PRINTK |
| 84 | set_intr_gate(i, &early_idt_handlers[i]); | 87 | set_intr_gate(i, &early_idt_handlers[i]); |
| @@ -98,7 +101,9 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
| 98 | { | 101 | { |
| 99 | copy_bootdata(__va(real_mode_data)); | 102 | copy_bootdata(__va(real_mode_data)); |
| 100 | 103 | ||
| 101 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 104 | memblock_init(); |
| 105 | |||
| 106 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | ||
| 102 | 107 | ||
| 103 | #ifdef CONFIG_BLK_DEV_INITRD | 108 | #ifdef CONFIG_BLK_DEV_INITRD |
| 104 | /* Reserve INITRD */ | 109 | /* Reserve INITRD */ |
| @@ -107,7 +112,7 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
| 107 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 112 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
| 108 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 113 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; |
| 109 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 114 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
| 110 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 115 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); |
| 111 | } | 116 | } |
| 112 | #endif | 117 | #endif |
| 113 | 118 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index fa8c1b8e09fb..bcece91dd311 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -183,13 +183,12 @@ default_entry: | |||
| 183 | #ifdef CONFIG_X86_PAE | 183 | #ifdef CONFIG_X86_PAE |
| 184 | 184 | ||
| 185 | /* | 185 | /* |
| 186 | * In PAE mode swapper_pg_dir is statically defined to contain enough | 186 | * In PAE mode initial_page_table is statically defined to contain |
| 187 | * entries to cover the VMSPLIT option (that is the top 1, 2 or 3 | 187 | * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 |
| 188 | * entries). The identity mapping is handled by pointing two PGD | 188 | * entries). The identity mapping is handled by pointing two PGD entries |
| 189 | * entries to the first kernel PMD. | 189 | * to the first kernel PMD. |
| 190 | * | 190 | * |
| 191 | * Note the upper half of each PMD or PTE are always zero at | 191 | * Note the upper half of each PMD or PTE are always zero at this stage. |
| 192 | * this stage. | ||
| 193 | */ | 192 | */ |
| 194 | 193 | ||
| 195 | #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ | 194 | #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ |
| @@ -197,7 +196,7 @@ default_entry: | |||
| 197 | xorl %ebx,%ebx /* %ebx is kept at zero */ | 196 | xorl %ebx,%ebx /* %ebx is kept at zero */ |
| 198 | 197 | ||
| 199 | movl $pa(__brk_base), %edi | 198 | movl $pa(__brk_base), %edi |
| 200 | movl $pa(swapper_pg_pmd), %edx | 199 | movl $pa(initial_pg_pmd), %edx |
| 201 | movl $PTE_IDENT_ATTR, %eax | 200 | movl $PTE_IDENT_ATTR, %eax |
| 202 | 10: | 201 | 10: |
| 203 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ | 202 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ |
| @@ -226,14 +225,14 @@ default_entry: | |||
| 226 | movl %eax, pa(max_pfn_mapped) | 225 | movl %eax, pa(max_pfn_mapped) |
| 227 | 226 | ||
| 228 | /* Do early initialization of the fixmap area */ | 227 | /* Do early initialization of the fixmap area */ |
| 229 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax | 228 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 230 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) | 229 | movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) |
| 231 | #else /* Not PAE */ | 230 | #else /* Not PAE */ |
| 232 | 231 | ||
| 233 | page_pde_offset = (__PAGE_OFFSET >> 20); | 232 | page_pde_offset = (__PAGE_OFFSET >> 20); |
| 234 | 233 | ||
| 235 | movl $pa(__brk_base), %edi | 234 | movl $pa(__brk_base), %edi |
| 236 | movl $pa(swapper_pg_dir), %edx | 235 | movl $pa(initial_page_table), %edx |
| 237 | movl $PTE_IDENT_ATTR, %eax | 236 | movl $PTE_IDENT_ATTR, %eax |
| 238 | 10: | 237 | 10: |
| 239 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ | 238 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ |
| @@ -257,8 +256,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 257 | movl %eax, pa(max_pfn_mapped) | 256 | movl %eax, pa(max_pfn_mapped) |
| 258 | 257 | ||
| 259 | /* Do early initialization of the fixmap area */ | 258 | /* Do early initialization of the fixmap area */ |
| 260 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax | 259 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 261 | movl %eax,pa(swapper_pg_dir+0xffc) | 260 | movl %eax,pa(initial_page_table+0xffc) |
| 262 | #endif | 261 | #endif |
| 263 | jmp 3f | 262 | jmp 3f |
| 264 | /* | 263 | /* |
| @@ -334,7 +333,7 @@ ENTRY(startup_32_smp) | |||
| 334 | /* | 333 | /* |
| 335 | * Enable paging | 334 | * Enable paging |
| 336 | */ | 335 | */ |
| 337 | movl pa(initial_page_table), %eax | 336 | movl $pa(initial_page_table), %eax |
| 338 | movl %eax,%cr3 /* set the page table pointer.. */ | 337 | movl %eax,%cr3 /* set the page table pointer.. */ |
| 339 | movl %cr0,%eax | 338 | movl %cr0,%eax |
| 340 | orl $X86_CR0_PG,%eax | 339 | orl $X86_CR0_PG,%eax |
| @@ -614,8 +613,6 @@ ignore_int: | |||
| 614 | .align 4 | 613 | .align 4 |
| 615 | ENTRY(initial_code) | 614 | ENTRY(initial_code) |
| 616 | .long i386_start_kernel | 615 | .long i386_start_kernel |
| 617 | ENTRY(initial_page_table) | ||
| 618 | .long pa(swapper_pg_dir) | ||
| 619 | 616 | ||
| 620 | /* | 617 | /* |
| 621 | * BSS section | 618 | * BSS section |
| @@ -623,20 +620,18 @@ ENTRY(initial_page_table) | |||
| 623 | __PAGE_ALIGNED_BSS | 620 | __PAGE_ALIGNED_BSS |
| 624 | .align PAGE_SIZE_asm | 621 | .align PAGE_SIZE_asm |
| 625 | #ifdef CONFIG_X86_PAE | 622 | #ifdef CONFIG_X86_PAE |
| 626 | swapper_pg_pmd: | 623 | initial_pg_pmd: |
| 627 | .fill 1024*KPMDS,4,0 | 624 | .fill 1024*KPMDS,4,0 |
| 628 | #else | 625 | #else |
| 629 | ENTRY(swapper_pg_dir) | 626 | ENTRY(initial_page_table) |
| 630 | .fill 1024,4,0 | 627 | .fill 1024,4,0 |
| 631 | #endif | 628 | #endif |
| 632 | swapper_pg_fixmap: | 629 | initial_pg_fixmap: |
| 633 | .fill 1024,4,0 | 630 | .fill 1024,4,0 |
| 634 | #ifdef CONFIG_X86_TRAMPOLINE | ||
| 635 | ENTRY(trampoline_pg_dir) | ||
| 636 | .fill 1024,4,0 | ||
| 637 | #endif | ||
| 638 | ENTRY(empty_zero_page) | 631 | ENTRY(empty_zero_page) |
| 639 | .fill 4096,1,0 | 632 | .fill 4096,1,0 |
| 633 | ENTRY(swapper_pg_dir) | ||
| 634 | .fill 1024,4,0 | ||
| 640 | 635 | ||
| 641 | /* | 636 | /* |
| 642 | * This starts the data section. | 637 | * This starts the data section. |
| @@ -645,20 +640,20 @@ ENTRY(empty_zero_page) | |||
| 645 | __PAGE_ALIGNED_DATA | 640 | __PAGE_ALIGNED_DATA |
| 646 | /* Page-aligned for the benefit of paravirt? */ | 641 | /* Page-aligned for the benefit of paravirt? */ |
| 647 | .align PAGE_SIZE_asm | 642 | .align PAGE_SIZE_asm |
| 648 | ENTRY(swapper_pg_dir) | 643 | ENTRY(initial_page_table) |
| 649 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ | 644 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ |
| 650 | # if KPMDS == 3 | 645 | # if KPMDS == 3 |
| 651 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 | 646 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 |
| 652 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 | 647 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 653 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 | 648 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 |
| 654 | # elif KPMDS == 2 | 649 | # elif KPMDS == 2 |
| 655 | .long 0,0 | 650 | .long 0,0 |
| 656 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 | 651 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 |
| 657 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 | 652 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 658 | # elif KPMDS == 1 | 653 | # elif KPMDS == 1 |
| 659 | .long 0,0 | 654 | .long 0,0 |
| 660 | .long 0,0 | 655 | .long 0,0 |
| 661 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 | 656 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 |
| 662 | # else | 657 | # else |
| 663 | # error "Kernel PMDs should be 1, 2 or 3" | 658 | # error "Kernel PMDs should be 1, 2 or 3" |
| 664 | # endif | 659 | # endif |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 351f9c0fea1f..aff0b3c27509 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -35,7 +35,6 @@ | |||
| 35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
| 36 | u8 hpet_blockid; /* OS timer block num */ | 36 | u8 hpet_blockid; /* OS timer block num */ |
| 37 | u8 hpet_msi_disable; | 37 | u8 hpet_msi_disable; |
| 38 | u8 hpet_readback_cmp; | ||
| 39 | 38 | ||
| 40 | #ifdef CONFIG_PCI_MSI | 39 | #ifdef CONFIG_PCI_MSI |
| 41 | static unsigned long hpet_num_timers; | 40 | static unsigned long hpet_num_timers; |
| @@ -381,40 +380,35 @@ static int hpet_next_event(unsigned long delta, | |||
| 381 | struct clock_event_device *evt, int timer) | 380 | struct clock_event_device *evt, int timer) |
| 382 | { | 381 | { |
| 383 | u32 cnt; | 382 | u32 cnt; |
| 383 | s32 res; | ||
| 384 | 384 | ||
| 385 | cnt = hpet_readl(HPET_COUNTER); | 385 | cnt = hpet_readl(HPET_COUNTER); |
| 386 | cnt += (u32) delta; | 386 | cnt += (u32) delta; |
| 387 | hpet_writel(cnt, HPET_Tn_CMP(timer)); | 387 | hpet_writel(cnt, HPET_Tn_CMP(timer)); |
| 388 | 388 | ||
| 389 | /* | 389 | /* |
| 390 | * We need to read back the CMP register on certain HPET | 390 | * HPETs are a complete disaster. The compare register is |
| 391 | * implementations (ATI chipsets) which seem to delay the | 391 | * based on a equal comparison and neither provides a less |
| 392 | * transfer of the compare register into the internal compare | 392 | * than or equal functionality (which would require to take |
| 393 | * logic. With small deltas this might actually be too late as | 393 | * the wraparound into account) nor a simple count down event |
| 394 | * the counter could already be higher than the compare value | 394 | * mode. Further the write to the comparator register is |
| 395 | * at that point and we would wait for the next hpet interrupt | 395 | * delayed internally up to two HPET clock cycles in certain |
| 396 | * forever. We found out that reading the CMP register back | 396 | * chipsets (ATI, ICH9,10). We worked around that by reading |
| 397 | * forces the transfer so we can rely on the comparison with | 397 | * back the compare register, but that required another |
| 398 | * the counter register below. | 398 | * workaround for ICH9,10 chips where the first readout after |
| 399 | * | 399 | * write can return the old stale value. We already have a |
| 400 | * That works fine on those ATI chipsets, but on newer Intel | 400 | * minimum delta of 5us enforced, but a NMI or SMI hitting |
| 401 | * chipsets (ICH9...) this triggers due to an erratum: Reading | 401 | * between the counter readout and the comparator write can |
| 402 | * the comparator immediately following a write is returning | 402 | * move us behind that point easily. Now instead of reading |
| 403 | * the old value. | 403 | * the compare register back several times, we make the ETIME |
| 404 | * | 404 | * decision based on the following: Return ETIME if the |
| 405 | * We restrict the read back to the affected ATI chipsets (set | 405 | * counter value after the write is less than 8 HPET cycles |
| 406 | * by quirks) and also run it with hpet=verbose for debugging | 406 | * away from the event or if the counter is already ahead of |
| 407 | * purposes. | 407 | * the event. |
| 408 | */ | 408 | */ |
| 409 | if (hpet_readback_cmp || hpet_verbose) { | 409 | res = (s32)(cnt - hpet_readl(HPET_COUNTER)); |
| 410 | u32 cmp = hpet_readl(HPET_Tn_CMP(timer)); | ||
| 411 | |||
| 412 | if (cmp != cnt) | ||
| 413 | printk_once(KERN_WARNING | ||
| 414 | "hpet: compare register read back failed.\n"); | ||
| 415 | } | ||
| 416 | 410 | ||
| 417 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 411 | return res < 8 ? -ETIME : 0; |
| 418 | } | 412 | } |
| 419 | 413 | ||
| 420 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 414 | static void hpet_legacy_set_mode(enum clock_event_mode mode, |
| @@ -437,9 +431,9 @@ static int hpet_legacy_next_event(unsigned long delta, | |||
| 437 | static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); | 431 | static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); |
| 438 | static struct hpet_dev *hpet_devs; | 432 | static struct hpet_dev *hpet_devs; |
| 439 | 433 | ||
| 440 | void hpet_msi_unmask(unsigned int irq) | 434 | void hpet_msi_unmask(struct irq_data *data) |
| 441 | { | 435 | { |
| 442 | struct hpet_dev *hdev = get_irq_data(irq); | 436 | struct hpet_dev *hdev = data->handler_data; |
| 443 | unsigned int cfg; | 437 | unsigned int cfg; |
| 444 | 438 | ||
| 445 | /* unmask it */ | 439 | /* unmask it */ |
| @@ -448,10 +442,10 @@ void hpet_msi_unmask(unsigned int irq) | |||
| 448 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | 442 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); |
| 449 | } | 443 | } |
| 450 | 444 | ||
| 451 | void hpet_msi_mask(unsigned int irq) | 445 | void hpet_msi_mask(struct irq_data *data) |
| 452 | { | 446 | { |
| 447 | struct hpet_dev *hdev = data->handler_data; | ||
| 453 | unsigned int cfg; | 448 | unsigned int cfg; |
| 454 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 455 | 449 | ||
| 456 | /* mask it */ | 450 | /* mask it */ |
| 457 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | 451 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); |
| @@ -459,18 +453,14 @@ void hpet_msi_mask(unsigned int irq) | |||
| 459 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | 453 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); |
| 460 | } | 454 | } |
| 461 | 455 | ||
| 462 | void hpet_msi_write(unsigned int irq, struct msi_msg *msg) | 456 | void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg) |
| 463 | { | 457 | { |
| 464 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 465 | |||
| 466 | hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); | 458 | hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); |
| 467 | hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); | 459 | hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); |
| 468 | } | 460 | } |
| 469 | 461 | ||
| 470 | void hpet_msi_read(unsigned int irq, struct msi_msg *msg) | 462 | void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg) |
| 471 | { | 463 | { |
| 472 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 473 | |||
| 474 | msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); | 464 | msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); |
| 475 | msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); | 465 | msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); |
| 476 | msg->address_hi = 0; | 466 | msg->address_hi = 0; |
| @@ -503,7 +493,7 @@ static int hpet_assign_irq(struct hpet_dev *dev) | |||
| 503 | { | 493 | { |
| 504 | unsigned int irq; | 494 | unsigned int irq; |
| 505 | 495 | ||
| 506 | irq = create_irq(); | 496 | irq = create_irq_nr(0, -1); |
| 507 | if (!irq) | 497 | if (!irq) |
| 508 | return -EINVAL; | 498 | return -EINVAL; |
| 509 | 499 | ||
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec37c32f..ff15c9dcc25d 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
| @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) | |||
| 206 | int arch_bp_generic_fields(int x86_len, int x86_type, | 206 | int arch_bp_generic_fields(int x86_len, int x86_type, |
| 207 | int *gen_len, int *gen_type) | 207 | int *gen_len, int *gen_type) |
| 208 | { | 208 | { |
| 209 | /* Len */ | 209 | /* Type */ |
| 210 | switch (x86_len) { | 210 | switch (x86_type) { |
| 211 | case X86_BREAKPOINT_LEN_X: | 211 | case X86_BREAKPOINT_EXECUTE: |
| 212 | if (x86_len != X86_BREAKPOINT_LEN_X) | ||
| 213 | return -EINVAL; | ||
| 214 | |||
| 215 | *gen_type = HW_BREAKPOINT_X; | ||
| 212 | *gen_len = sizeof(long); | 216 | *gen_len = sizeof(long); |
| 217 | return 0; | ||
| 218 | case X86_BREAKPOINT_WRITE: | ||
| 219 | *gen_type = HW_BREAKPOINT_W; | ||
| 213 | break; | 220 | break; |
| 221 | case X86_BREAKPOINT_RW: | ||
| 222 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
| 223 | break; | ||
| 224 | default: | ||
| 225 | return -EINVAL; | ||
| 226 | } | ||
| 227 | |||
| 228 | /* Len */ | ||
| 229 | switch (x86_len) { | ||
| 214 | case X86_BREAKPOINT_LEN_1: | 230 | case X86_BREAKPOINT_LEN_1: |
| 215 | *gen_len = HW_BREAKPOINT_LEN_1; | 231 | *gen_len = HW_BREAKPOINT_LEN_1; |
| 216 | break; | 232 | break; |
| @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, | |||
| 229 | return -EINVAL; | 245 | return -EINVAL; |
| 230 | } | 246 | } |
| 231 | 247 | ||
| 232 | /* Type */ | ||
| 233 | switch (x86_type) { | ||
| 234 | case X86_BREAKPOINT_EXECUTE: | ||
| 235 | *gen_type = HW_BREAKPOINT_X; | ||
| 236 | break; | ||
| 237 | case X86_BREAKPOINT_WRITE: | ||
| 238 | *gen_type = HW_BREAKPOINT_W; | ||
| 239 | break; | ||
| 240 | case X86_BREAKPOINT_RW: | ||
| 241 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
| 242 | break; | ||
| 243 | default: | ||
| 244 | return -EINVAL; | ||
| 245 | } | ||
| 246 | |||
| 247 | return 0; | 248 | return 0; |
| 248 | } | 249 | } |
| 249 | 250 | ||
| @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
| 316 | ret = -EINVAL; | 317 | ret = -EINVAL; |
| 317 | 318 | ||
| 318 | switch (info->len) { | 319 | switch (info->len) { |
| 319 | case X86_BREAKPOINT_LEN_X: | ||
| 320 | align = sizeof(long) -1; | ||
| 321 | break; | ||
| 322 | case X86_BREAKPOINT_LEN_1: | 320 | case X86_BREAKPOINT_LEN_1: |
| 323 | align = 0; | 321 | align = 0; |
| 324 | break; | 322 | break; |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a46cb3522c0c..58bb239a2fd7 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -68,19 +68,22 @@ static void __cpuinit init_thread_xstate(void) | |||
| 68 | */ | 68 | */ |
| 69 | 69 | ||
| 70 | if (!HAVE_HWFP) { | 70 | if (!HAVE_HWFP) { |
| 71 | /* | ||
| 72 | * Disable xsave as we do not support it if i387 | ||
| 73 | * emulation is enabled. | ||
| 74 | */ | ||
| 75 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
| 76 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
| 71 | xstate_size = sizeof(struct i387_soft_struct); | 77 | xstate_size = sizeof(struct i387_soft_struct); |
| 72 | return; | 78 | return; |
| 73 | } | 79 | } |
| 74 | 80 | ||
| 75 | if (cpu_has_fxsr) | 81 | if (cpu_has_fxsr) |
| 76 | xstate_size = sizeof(struct i387_fxsave_struct); | 82 | xstate_size = sizeof(struct i387_fxsave_struct); |
| 77 | #ifdef CONFIG_X86_32 | ||
| 78 | else | 83 | else |
| 79 | xstate_size = sizeof(struct i387_fsave_struct); | 84 | xstate_size = sizeof(struct i387_fsave_struct); |
| 80 | #endif | ||
| 81 | } | 85 | } |
| 82 | 86 | ||
| 83 | #ifdef CONFIG_X86_64 | ||
| 84 | /* | 87 | /* |
| 85 | * Called at bootup to set up the initial FPU state that is later cloned | 88 | * Called at bootup to set up the initial FPU state that is later cloned |
| 86 | * into all processes. | 89 | * into all processes. |
| @@ -88,12 +91,21 @@ static void __cpuinit init_thread_xstate(void) | |||
| 88 | 91 | ||
| 89 | void __cpuinit fpu_init(void) | 92 | void __cpuinit fpu_init(void) |
| 90 | { | 93 | { |
| 91 | unsigned long oldcr0 = read_cr0(); | 94 | unsigned long cr0; |
| 92 | 95 | unsigned long cr4_mask = 0; | |
| 93 | set_in_cr4(X86_CR4_OSFXSR); | ||
| 94 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
| 95 | 96 | ||
| 96 | write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ | 97 | if (cpu_has_fxsr) |
| 98 | cr4_mask |= X86_CR4_OSFXSR; | ||
| 99 | if (cpu_has_xmm) | ||
| 100 | cr4_mask |= X86_CR4_OSXMMEXCPT; | ||
| 101 | if (cr4_mask) | ||
| 102 | set_in_cr4(cr4_mask); | ||
| 103 | |||
| 104 | cr0 = read_cr0(); | ||
| 105 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | ||
| 106 | if (!HAVE_HWFP) | ||
| 107 | cr0 |= X86_CR0_EM; | ||
| 108 | write_cr0(cr0); | ||
| 97 | 109 | ||
| 98 | if (!smp_processor_id()) | 110 | if (!smp_processor_id()) |
| 99 | init_thread_xstate(); | 111 | init_thread_xstate(); |
| @@ -104,24 +116,12 @@ void __cpuinit fpu_init(void) | |||
| 104 | clear_used_math(); | 116 | clear_used_math(); |
| 105 | } | 117 | } |
| 106 | 118 | ||
| 107 | #else /* CONFIG_X86_64 */ | ||
| 108 | |||
| 109 | void __cpuinit fpu_init(void) | ||
| 110 | { | ||
| 111 | if (!smp_processor_id()) | ||
| 112 | init_thread_xstate(); | ||
| 113 | } | ||
| 114 | |||
| 115 | #endif /* CONFIG_X86_32 */ | ||
| 116 | |||
| 117 | void fpu_finit(struct fpu *fpu) | 119 | void fpu_finit(struct fpu *fpu) |
| 118 | { | 120 | { |
| 119 | #ifdef CONFIG_X86_32 | ||
| 120 | if (!HAVE_HWFP) { | 121 | if (!HAVE_HWFP) { |
| 121 | finit_soft_fpu(&fpu->state->soft); | 122 | finit_soft_fpu(&fpu->state->soft); |
| 122 | return; | 123 | return; |
| 123 | } | 124 | } |
| 124 | #endif | ||
| 125 | 125 | ||
| 126 | if (cpu_has_fxsr) { | 126 | if (cpu_has_fxsr) { |
| 127 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | 127 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| @@ -386,19 +386,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | |||
| 386 | #ifdef CONFIG_X86_64 | 386 | #ifdef CONFIG_X86_64 |
| 387 | env->fip = fxsave->rip; | 387 | env->fip = fxsave->rip; |
| 388 | env->foo = fxsave->rdp; | 388 | env->foo = fxsave->rdp; |
| 389 | /* | ||
| 390 | * should be actually ds/cs at fpu exception time, but | ||
| 391 | * that information is not available in 64bit mode. | ||
| 392 | */ | ||
| 393 | env->fcs = task_pt_regs(tsk)->cs; | ||
| 389 | if (tsk == current) { | 394 | if (tsk == current) { |
| 390 | /* | 395 | savesegment(ds, env->fos); |
| 391 | * should be actually ds/cs at fpu exception time, but | ||
| 392 | * that information is not available in 64bit mode. | ||
| 393 | */ | ||
| 394 | asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos)); | ||
| 395 | asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs)); | ||
| 396 | } else { | 396 | } else { |
| 397 | struct pt_regs *regs = task_pt_regs(tsk); | 397 | env->fos = tsk->thread.ds; |
| 398 | |||
| 399 | env->fos = 0xffff0000 | tsk->thread.ds; | ||
| 400 | env->fcs = regs->cs; | ||
| 401 | } | 398 | } |
| 399 | env->fos |= 0xffff0000; | ||
| 402 | #else | 400 | #else |
| 403 | env->fip = fxsave->fip; | 401 | env->fip = fxsave->fip; |
| 404 | env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); | 402 | env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index cafa7c80ac95..20757cb2efa3 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
| @@ -29,24 +29,10 @@ | |||
| 29 | * plus some generic x86 specific things if generic specifics makes | 29 | * plus some generic x86 specific things if generic specifics makes |
| 30 | * any sense at all. | 30 | * any sense at all. |
| 31 | */ | 31 | */ |
| 32 | static void init_8259A(int auto_eoi); | ||
| 32 | 33 | ||
| 33 | static int i8259A_auto_eoi; | 34 | static int i8259A_auto_eoi; |
| 34 | DEFINE_RAW_SPINLOCK(i8259A_lock); | 35 | DEFINE_RAW_SPINLOCK(i8259A_lock); |
| 35 | static void mask_and_ack_8259A(unsigned int); | ||
| 36 | static void mask_8259A(void); | ||
| 37 | static void unmask_8259A(void); | ||
| 38 | static void disable_8259A_irq(unsigned int irq); | ||
| 39 | static void enable_8259A_irq(unsigned int irq); | ||
| 40 | static void init_8259A(int auto_eoi); | ||
| 41 | static int i8259A_irq_pending(unsigned int irq); | ||
| 42 | |||
| 43 | struct irq_chip i8259A_chip = { | ||
| 44 | .name = "XT-PIC", | ||
| 45 | .mask = disable_8259A_irq, | ||
| 46 | .disable = disable_8259A_irq, | ||
| 47 | .unmask = enable_8259A_irq, | ||
| 48 | .mask_ack = mask_and_ack_8259A, | ||
| 49 | }; | ||
| 50 | 36 | ||
| 51 | /* | 37 | /* |
| 52 | * 8259A PIC functions to handle ISA devices: | 38 | * 8259A PIC functions to handle ISA devices: |
| @@ -68,7 +54,7 @@ unsigned int cached_irq_mask = 0xffff; | |||
| 68 | */ | 54 | */ |
| 69 | unsigned long io_apic_irqs; | 55 | unsigned long io_apic_irqs; |
| 70 | 56 | ||
| 71 | static void disable_8259A_irq(unsigned int irq) | 57 | static void mask_8259A_irq(unsigned int irq) |
| 72 | { | 58 | { |
| 73 | unsigned int mask = 1 << irq; | 59 | unsigned int mask = 1 << irq; |
| 74 | unsigned long flags; | 60 | unsigned long flags; |
| @@ -82,7 +68,12 @@ static void disable_8259A_irq(unsigned int irq) | |||
| 82 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | 68 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
| 83 | } | 69 | } |
| 84 | 70 | ||
| 85 | static void enable_8259A_irq(unsigned int irq) | 71 | static void disable_8259A_irq(struct irq_data *data) |
| 72 | { | ||
| 73 | mask_8259A_irq(data->irq); | ||
| 74 | } | ||
| 75 | |||
| 76 | static void unmask_8259A_irq(unsigned int irq) | ||
| 86 | { | 77 | { |
| 87 | unsigned int mask = ~(1 << irq); | 78 | unsigned int mask = ~(1 << irq); |
| 88 | unsigned long flags; | 79 | unsigned long flags; |
| @@ -96,6 +87,11 @@ static void enable_8259A_irq(unsigned int irq) | |||
| 96 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | 87 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
| 97 | } | 88 | } |
| 98 | 89 | ||
| 90 | static void enable_8259A_irq(struct irq_data *data) | ||
| 91 | { | ||
| 92 | unmask_8259A_irq(data->irq); | ||
| 93 | } | ||
| 94 | |||
| 99 | static int i8259A_irq_pending(unsigned int irq) | 95 | static int i8259A_irq_pending(unsigned int irq) |
| 100 | { | 96 | { |
| 101 | unsigned int mask = 1<<irq; | 97 | unsigned int mask = 1<<irq; |
| @@ -117,7 +113,7 @@ static void make_8259A_irq(unsigned int irq) | |||
| 117 | disable_irq_nosync(irq); | 113 | disable_irq_nosync(irq); |
| 118 | io_apic_irqs &= ~(1<<irq); | 114 | io_apic_irqs &= ~(1<<irq); |
| 119 | set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, | 115 | set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, |
| 120 | "XT"); | 116 | i8259A_chip.name); |
| 121 | enable_irq(irq); | 117 | enable_irq(irq); |
| 122 | } | 118 | } |
| 123 | 119 | ||
| @@ -150,8 +146,9 @@ static inline int i8259A_irq_real(unsigned int irq) | |||
| 150 | * first, _then_ send the EOI, and the order of EOI | 146 | * first, _then_ send the EOI, and the order of EOI |
| 151 | * to the two 8259s is important! | 147 | * to the two 8259s is important! |
| 152 | */ | 148 | */ |
| 153 | static void mask_and_ack_8259A(unsigned int irq) | 149 | static void mask_and_ack_8259A(struct irq_data *data) |
| 154 | { | 150 | { |
| 151 | unsigned int irq = data->irq; | ||
| 155 | unsigned int irqmask = 1 << irq; | 152 | unsigned int irqmask = 1 << irq; |
| 156 | unsigned long flags; | 153 | unsigned long flags; |
| 157 | 154 | ||
| @@ -223,6 +220,14 @@ spurious_8259A_irq: | |||
| 223 | } | 220 | } |
| 224 | } | 221 | } |
| 225 | 222 | ||
| 223 | struct irq_chip i8259A_chip = { | ||
| 224 | .name = "XT-PIC", | ||
| 225 | .irq_mask = disable_8259A_irq, | ||
| 226 | .irq_disable = disable_8259A_irq, | ||
| 227 | .irq_unmask = enable_8259A_irq, | ||
| 228 | .irq_mask_ack = mask_and_ack_8259A, | ||
| 229 | }; | ||
| 230 | |||
| 226 | static char irq_trigger[2]; | 231 | static char irq_trigger[2]; |
| 227 | /** | 232 | /** |
| 228 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ | 233 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ |
| @@ -342,9 +347,9 @@ static void init_8259A(int auto_eoi) | |||
| 342 | * In AEOI mode we just have to mask the interrupt | 347 | * In AEOI mode we just have to mask the interrupt |
| 343 | * when acking. | 348 | * when acking. |
| 344 | */ | 349 | */ |
| 345 | i8259A_chip.mask_ack = disable_8259A_irq; | 350 | i8259A_chip.irq_mask_ack = disable_8259A_irq; |
| 346 | else | 351 | else |
| 347 | i8259A_chip.mask_ack = mask_and_ack_8259A; | 352 | i8259A_chip.irq_mask_ack = mask_and_ack_8259A; |
| 348 | 353 | ||
| 349 | udelay(100); /* wait for 8259A to initialize */ | 354 | udelay(100); /* wait for 8259A to initialize */ |
| 350 | 355 | ||
| @@ -363,14 +368,6 @@ static void init_8259A(int auto_eoi) | |||
| 363 | static void legacy_pic_noop(void) { }; | 368 | static void legacy_pic_noop(void) { }; |
| 364 | static void legacy_pic_uint_noop(unsigned int unused) { }; | 369 | static void legacy_pic_uint_noop(unsigned int unused) { }; |
| 365 | static void legacy_pic_int_noop(int unused) { }; | 370 | static void legacy_pic_int_noop(int unused) { }; |
| 366 | |||
| 367 | static struct irq_chip dummy_pic_chip = { | ||
| 368 | .name = "dummy pic", | ||
| 369 | .mask = legacy_pic_uint_noop, | ||
| 370 | .unmask = legacy_pic_uint_noop, | ||
| 371 | .disable = legacy_pic_uint_noop, | ||
| 372 | .mask_ack = legacy_pic_uint_noop, | ||
| 373 | }; | ||
| 374 | static int legacy_pic_irq_pending_noop(unsigned int irq) | 371 | static int legacy_pic_irq_pending_noop(unsigned int irq) |
| 375 | { | 372 | { |
| 376 | return 0; | 373 | return 0; |
| @@ -378,7 +375,9 @@ static int legacy_pic_irq_pending_noop(unsigned int irq) | |||
| 378 | 375 | ||
| 379 | struct legacy_pic null_legacy_pic = { | 376 | struct legacy_pic null_legacy_pic = { |
| 380 | .nr_legacy_irqs = 0, | 377 | .nr_legacy_irqs = 0, |
| 381 | .chip = &dummy_pic_chip, | 378 | .chip = &dummy_irq_chip, |
| 379 | .mask = legacy_pic_uint_noop, | ||
| 380 | .unmask = legacy_pic_uint_noop, | ||
| 382 | .mask_all = legacy_pic_noop, | 381 | .mask_all = legacy_pic_noop, |
| 383 | .restore_mask = legacy_pic_noop, | 382 | .restore_mask = legacy_pic_noop, |
| 384 | .init = legacy_pic_int_noop, | 383 | .init = legacy_pic_int_noop, |
| @@ -389,7 +388,9 @@ struct legacy_pic null_legacy_pic = { | |||
| 389 | struct legacy_pic default_legacy_pic = { | 388 | struct legacy_pic default_legacy_pic = { |
| 390 | .nr_legacy_irqs = NR_IRQS_LEGACY, | 389 | .nr_legacy_irqs = NR_IRQS_LEGACY, |
| 391 | .chip = &i8259A_chip, | 390 | .chip = &i8259A_chip, |
| 392 | .mask_all = mask_8259A, | 391 | .mask = mask_8259A_irq, |
| 392 | .unmask = unmask_8259A_irq, | ||
| 393 | .mask_all = mask_8259A, | ||
| 393 | .restore_mask = unmask_8259A, | 394 | .restore_mask = unmask_8259A, |
| 394 | .init = init_8259A, | 395 | .init = init_8259A, |
| 395 | .irq_pending = i8259A_irq_pending, | 396 | .irq_pending = i8259A_irq_pending, |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 91fd0c70a18a..83ec0175f986 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
| @@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
| 67 | for_each_online_cpu(j) | 67 | for_each_online_cpu(j) |
| 68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | 68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); |
| 69 | seq_printf(p, " Performance monitoring interrupts\n"); | 69 | seq_printf(p, " Performance monitoring interrupts\n"); |
| 70 | seq_printf(p, "%*s: ", prec, "PND"); | 70 | seq_printf(p, "%*s: ", prec, "IWI"); |
| 71 | for_each_online_cpu(j) | 71 | for_each_online_cpu(j) |
| 72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | 72 | seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); |
| 73 | seq_printf(p, " Performance pending work\n"); | 73 | seq_printf(p, " IRQ work interrupts\n"); |
| 74 | #endif | 74 | #endif |
| 75 | if (x86_platform_ipi_callback) { | 75 | if (x86_platform_ipi_callback) { |
| 76 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
| @@ -159,7 +159,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
| 159 | seq_printf(p, "%*d: ", prec, i); | 159 | seq_printf(p, "%*d: ", prec, i); |
| 160 | for_each_online_cpu(j) | 160 | for_each_online_cpu(j) |
| 161 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 161 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
| 162 | seq_printf(p, " %8s", desc->chip->name); | 162 | seq_printf(p, " %8s", desc->irq_data.chip->name); |
| 163 | seq_printf(p, "-%-8s", desc->name); | 163 | seq_printf(p, "-%-8s", desc->name); |
| 164 | 164 | ||
| 165 | if (action) { | 165 | if (action) { |
| @@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
| 185 | sum += irq_stats(cpu)->apic_timer_irqs; | 185 | sum += irq_stats(cpu)->apic_timer_irqs; |
| 186 | sum += irq_stats(cpu)->irq_spurious_count; | 186 | sum += irq_stats(cpu)->irq_spurious_count; |
| 187 | sum += irq_stats(cpu)->apic_perf_irqs; | 187 | sum += irq_stats(cpu)->apic_perf_irqs; |
| 188 | sum += irq_stats(cpu)->apic_pending_irqs; | 188 | sum += irq_stats(cpu)->apic_irq_work_irqs; |
| 189 | #endif | 189 | #endif |
| 190 | if (x86_platform_ipi_callback) | 190 | if (x86_platform_ipi_callback) |
| 191 | sum += irq_stats(cpu)->x86_platform_ipis; | 191 | sum += irq_stats(cpu)->x86_platform_ipis; |
| @@ -282,6 +282,7 @@ void fixup_irqs(void) | |||
| 282 | unsigned int irq, vector; | 282 | unsigned int irq, vector; |
| 283 | static int warned; | 283 | static int warned; |
| 284 | struct irq_desc *desc; | 284 | struct irq_desc *desc; |
| 285 | struct irq_data *data; | ||
| 285 | 286 | ||
| 286 | for_each_irq_desc(irq, desc) { | 287 | for_each_irq_desc(irq, desc) { |
| 287 | int break_affinity = 0; | 288 | int break_affinity = 0; |
| @@ -296,7 +297,8 @@ void fixup_irqs(void) | |||
| 296 | /* interrupt's are disabled at this point */ | 297 | /* interrupt's are disabled at this point */ |
| 297 | raw_spin_lock(&desc->lock); | 298 | raw_spin_lock(&desc->lock); |
| 298 | 299 | ||
| 299 | affinity = desc->affinity; | 300 | data = &desc->irq_data; |
| 301 | affinity = data->affinity; | ||
| 300 | if (!irq_has_action(irq) || | 302 | if (!irq_has_action(irq) || |
| 301 | cpumask_equal(affinity, cpu_online_mask)) { | 303 | cpumask_equal(affinity, cpu_online_mask)) { |
| 302 | raw_spin_unlock(&desc->lock); | 304 | raw_spin_unlock(&desc->lock); |
| @@ -315,16 +317,16 @@ void fixup_irqs(void) | |||
| 315 | affinity = cpu_all_mask; | 317 | affinity = cpu_all_mask; |
| 316 | } | 318 | } |
| 317 | 319 | ||
| 318 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) | 320 | if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask) |
| 319 | desc->chip->mask(irq); | 321 | data->chip->irq_mask(data); |
| 320 | 322 | ||
| 321 | if (desc->chip->set_affinity) | 323 | if (data->chip->irq_set_affinity) |
| 322 | desc->chip->set_affinity(irq, affinity); | 324 | data->chip->irq_set_affinity(data, affinity, true); |
| 323 | else if (!(warned++)) | 325 | else if (!(warned++)) |
| 324 | set_affinity = 0; | 326 | set_affinity = 0; |
| 325 | 327 | ||
| 326 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) | 328 | if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask) |
| 327 | desc->chip->unmask(irq); | 329 | data->chip->irq_unmask(data); |
| 328 | 330 | ||
| 329 | raw_spin_unlock(&desc->lock); | 331 | raw_spin_unlock(&desc->lock); |
| 330 | 332 | ||
| @@ -355,10 +357,10 @@ void fixup_irqs(void) | |||
| 355 | if (irr & (1 << (vector % 32))) { | 357 | if (irr & (1 << (vector % 32))) { |
| 356 | irq = __get_cpu_var(vector_irq)[vector]; | 358 | irq = __get_cpu_var(vector_irq)[vector]; |
| 357 | 359 | ||
| 358 | desc = irq_to_desc(irq); | 360 | data = irq_get_irq_data(irq); |
| 359 | raw_spin_lock(&desc->lock); | 361 | raw_spin_lock(&desc->lock); |
| 360 | if (desc->chip->retrigger) | 362 | if (data->chip->irq_retrigger) |
| 361 | desc->chip->retrigger(irq); | 363 | data->chip->irq_retrigger(data); |
| 362 | raw_spin_unlock(&desc->lock); | 364 | raw_spin_unlock(&desc->lock); |
| 363 | } | 365 | } |
| 364 | } | 366 | } |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 10709f29d166..50fbbe60e507 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
| @@ -49,20 +49,19 @@ static inline int check_stack_overflow(void) { return 0; } | |||
| 49 | static inline void print_stack_overflow(void) { } | 49 | static inline void print_stack_overflow(void) { } |
| 50 | #endif | 50 | #endif |
| 51 | 51 | ||
| 52 | #ifdef CONFIG_4KSTACKS | ||
| 53 | /* | 52 | /* |
| 54 | * per-CPU IRQ handling contexts (thread information and stack) | 53 | * per-CPU IRQ handling contexts (thread information and stack) |
| 55 | */ | 54 | */ |
| 56 | union irq_ctx { | 55 | union irq_ctx { |
| 57 | struct thread_info tinfo; | 56 | struct thread_info tinfo; |
| 58 | u32 stack[THREAD_SIZE/sizeof(u32)]; | 57 | u32 stack[THREAD_SIZE/sizeof(u32)]; |
| 59 | } __attribute__((aligned(PAGE_SIZE))); | 58 | } __attribute__((aligned(THREAD_SIZE))); |
| 60 | 59 | ||
| 61 | static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); | 60 | static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); |
| 62 | static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); | 61 | static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); |
| 63 | 62 | ||
| 64 | static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); | 63 | static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE); |
| 65 | static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); | 64 | static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE); |
| 66 | 65 | ||
| 67 | static void call_on_stack(void *func, void *stack) | 66 | static void call_on_stack(void *func, void *stack) |
| 68 | { | 67 | { |
| @@ -187,11 +186,6 @@ asmlinkage void do_softirq(void) | |||
| 187 | local_irq_restore(flags); | 186 | local_irq_restore(flags); |
| 188 | } | 187 | } |
| 189 | 188 | ||
| 190 | #else | ||
| 191 | static inline int | ||
| 192 | execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } | ||
| 193 | #endif | ||
| 194 | |||
| 195 | bool handle_irq(unsigned irq, struct pt_regs *regs) | 189 | bool handle_irq(unsigned irq, struct pt_regs *regs) |
| 196 | { | 190 | { |
| 197 | struct irq_desc *desc; | 191 | struct irq_desc *desc; |
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c new file mode 100644 index 000000000000..ca8f703a1e70 --- /dev/null +++ b/arch/x86/kernel/irq_work.c | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /* | ||
| 2 | * x86 specific code for irq_work | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/kernel.h> | ||
| 8 | #include <linux/irq_work.h> | ||
| 9 | #include <linux/hardirq.h> | ||
| 10 | #include <asm/apic.h> | ||
| 11 | |||
| 12 | void smp_irq_work_interrupt(struct pt_regs *regs) | ||
| 13 | { | ||
| 14 | irq_enter(); | ||
| 15 | ack_APIC_irq(); | ||
| 16 | inc_irq_stat(apic_irq_work_irqs); | ||
| 17 | irq_work_run(); | ||
| 18 | irq_exit(); | ||
| 19 | } | ||
| 20 | |||
| 21 | void arch_irq_work_raise(void) | ||
| 22 | { | ||
| 23 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 24 | if (!cpu_has_apic) | ||
| 25 | return; | ||
| 26 | |||
| 27 | apic->send_IPI_self(IRQ_WORK_VECTOR); | ||
| 28 | apic_wait_icr_idle(); | ||
| 29 | #endif | ||
| 30 | } | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 990ae7cfc578..c752e973958d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -100,6 +100,8 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
| 100 | 100 | ||
| 101 | void __init init_ISA_irqs(void) | 101 | void __init init_ISA_irqs(void) |
| 102 | { | 102 | { |
| 103 | struct irq_chip *chip = legacy_pic->chip; | ||
| 104 | const char *name = chip->name; | ||
| 103 | int i; | 105 | int i; |
| 104 | 106 | ||
| 105 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | 107 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
| @@ -107,19 +109,8 @@ void __init init_ISA_irqs(void) | |||
| 107 | #endif | 109 | #endif |
| 108 | legacy_pic->init(0); | 110 | legacy_pic->init(0); |
| 109 | 111 | ||
| 110 | /* | 112 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) |
| 111 | * 16 old-style INTA-cycle interrupts: | 113 | set_irq_chip_and_handler_name(i, chip, handle_level_irq, name); |
| 112 | */ | ||
| 113 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) { | ||
| 114 | struct irq_desc *desc = irq_to_desc(i); | ||
| 115 | |||
| 116 | desc->status = IRQ_DISABLED; | ||
| 117 | desc->action = NULL; | ||
| 118 | desc->depth = 1; | ||
| 119 | |||
| 120 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
| 121 | handle_level_irq, "XT"); | ||
| 122 | } | ||
| 123 | } | 114 | } |
| 124 | 115 | ||
| 125 | void __init init_IRQ(void) | 116 | void __init init_IRQ(void) |
| @@ -224,9 +215,9 @@ static void __init apic_intr_init(void) | |||
| 224 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 215 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
| 225 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 216 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
| 226 | 217 | ||
| 227 | /* Performance monitoring interrupts: */ | 218 | /* IRQ work interrupts: */ |
| 228 | # ifdef CONFIG_PERF_EVENTS | 219 | # ifdef CONFIG_IRQ_WORK |
| 229 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | 220 | alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt); |
| 230 | # endif | 221 | # endif |
| 231 | 222 | ||
| 232 | #endif | 223 | #endif |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c new file mode 100644 index 000000000000..961b6b30ba90 --- /dev/null +++ b/arch/x86/kernel/jump_label.c | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | /* | ||
| 2 | * jump label x86 support | ||
| 3 | * | ||
| 4 | * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> | ||
| 5 | * | ||
| 6 | */ | ||
| 7 | #include <linux/jump_label.h> | ||
| 8 | #include <linux/memory.h> | ||
| 9 | #include <linux/uaccess.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/list.h> | ||
| 12 | #include <linux/jhash.h> | ||
| 13 | #include <linux/cpu.h> | ||
| 14 | #include <asm/kprobes.h> | ||
| 15 | #include <asm/alternative.h> | ||
| 16 | |||
| 17 | #ifdef HAVE_JUMP_LABEL | ||
| 18 | |||
| 19 | union jump_code_union { | ||
| 20 | char code[JUMP_LABEL_NOP_SIZE]; | ||
| 21 | struct { | ||
| 22 | char jump; | ||
| 23 | int offset; | ||
| 24 | } __attribute__((packed)); | ||
| 25 | }; | ||
| 26 | |||
| 27 | void arch_jump_label_transform(struct jump_entry *entry, | ||
| 28 | enum jump_label_type type) | ||
| 29 | { | ||
| 30 | union jump_code_union code; | ||
| 31 | |||
| 32 | if (type == JUMP_LABEL_ENABLE) { | ||
| 33 | code.jump = 0xe9; | ||
| 34 | code.offset = entry->target - | ||
| 35 | (entry->code + JUMP_LABEL_NOP_SIZE); | ||
| 36 | } else | ||
| 37 | memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); | ||
| 38 | get_online_cpus(); | ||
| 39 | mutex_lock(&text_mutex); | ||
| 40 | text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | ||
| 41 | mutex_unlock(&text_mutex); | ||
| 42 | put_online_cpus(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void arch_jump_label_text_poke_early(jump_label_t addr) | ||
| 46 | { | ||
| 47 | text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); | ||
| 48 | } | ||
| 49 | |||
| 50 | #endif | ||
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 8afd9f321f10..90fcf62854bb 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
| @@ -78,6 +78,7 @@ static int setup_data_open(struct inode *inode, struct file *file) | |||
| 78 | static const struct file_operations fops_setup_data = { | 78 | static const struct file_operations fops_setup_data = { |
| 79 | .read = setup_data_read, | 79 | .read = setup_data_read, |
| 80 | .open = setup_data_open, | 80 | .open = setup_data_open, |
| 81 | .llseek = default_llseek, | ||
| 81 | }; | 82 | }; |
| 82 | 83 | ||
| 83 | static int __init | 84 | static int __init |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 852b81967a37..d81cfebb848f 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
| @@ -477,8 +477,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
| 477 | raw_smp_processor_id()); | 477 | raw_smp_processor_id()); |
| 478 | } | 478 | } |
| 479 | 479 | ||
| 480 | kgdb_correct_hw_break(); | ||
| 481 | |||
| 482 | return 0; | 480 | return 0; |
| 483 | } | 481 | } |
| 484 | 482 | ||
| @@ -621,7 +619,12 @@ int kgdb_arch_init(void) | |||
| 621 | static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, | 619 | static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, |
| 622 | struct perf_sample_data *data, struct pt_regs *regs) | 620 | struct perf_sample_data *data, struct pt_regs *regs) |
| 623 | { | 621 | { |
| 624 | kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP); | 622 | struct task_struct *tsk = current; |
| 623 | int i; | ||
| 624 | |||
| 625 | for (i = 0; i < 4; i++) | ||
| 626 | if (breakinfo[i].enabled) | ||
| 627 | tsk->thread.debugreg6 |= (DR_TRAP0 << i); | ||
| 625 | } | 628 | } |
| 626 | 629 | ||
| 627 | void kgdb_arch_late(void) | 630 | void kgdb_arch_late(void) |
| @@ -644,7 +647,7 @@ void kgdb_arch_late(void) | |||
| 644 | if (breakinfo[i].pev) | 647 | if (breakinfo[i].pev) |
| 645 | continue; | 648 | continue; |
| 646 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | 649 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); |
| 647 | if (IS_ERR(breakinfo[i].pev)) { | 650 | if (IS_ERR((void * __force)breakinfo[i].pev)) { |
| 648 | printk(KERN_ERR "kgdb: Could not allocate hw" | 651 | printk(KERN_ERR "kgdb: Could not allocate hw" |
| 649 | "breakpoints\nDisabling the kernel debugger\n"); | 652 | "breakpoints\nDisabling the kernel debugger\n"); |
| 650 | breakinfo[i].pev = NULL; | 653 | breakinfo[i].pev = NULL; |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 770ebfb349e9..1cbd54c0df99 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
| 230 | return 0; | 230 | return 0; |
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | /* Dummy buffers for kallsyms_lookup */ | ||
| 234 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
| 235 | |||
| 236 | /* Check if paddr is at an instruction boundary */ | 233 | /* Check if paddr is at an instruction boundary */ |
| 237 | static int __kprobes can_probe(unsigned long paddr) | 234 | static int __kprobes can_probe(unsigned long paddr) |
| 238 | { | 235 | { |
| @@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr) | |||
| 241 | struct insn insn; | 238 | struct insn insn; |
| 242 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 239 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
| 243 | 240 | ||
| 244 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | 241 | if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) |
| 245 | return 0; | 242 | return 0; |
| 246 | 243 | ||
| 247 | /* Decode instructions */ | 244 | /* Decode instructions */ |
| @@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | |||
| 1129 | *(unsigned long *)addr = val; | 1126 | *(unsigned long *)addr = val; |
| 1130 | } | 1127 | } |
| 1131 | 1128 | ||
| 1132 | void __kprobes kprobes_optinsn_template_holder(void) | 1129 | static void __used __kprobes kprobes_optinsn_template_holder(void) |
| 1133 | { | 1130 | { |
| 1134 | asm volatile ( | 1131 | asm volatile ( |
| 1135 | ".global optprobe_template_entry\n" | 1132 | ".global optprobe_template_entry\n" |
| @@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | |||
| 1221 | } | 1218 | } |
| 1222 | /* Check whether the address range is reserved */ | 1219 | /* Check whether the address range is reserved */ |
| 1223 | if (ftrace_text_reserved(src, src + len - 1) || | 1220 | if (ftrace_text_reserved(src, src + len - 1) || |
| 1224 | alternatives_text_reserved(src, src + len - 1)) | 1221 | alternatives_text_reserved(src, src + len - 1) || |
| 1222 | jump_label_text_reserved(src, src + len - 1)) | ||
| 1225 | return -EBUSY; | 1223 | return -EBUSY; |
| 1226 | 1224 | ||
| 1227 | return len; | 1225 | return len; |
| @@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr) | |||
| 1269 | unsigned long addr, size = 0, offset = 0; | 1267 | unsigned long addr, size = 0, offset = 0; |
| 1270 | struct insn insn; | 1268 | struct insn insn; |
| 1271 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 1269 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
| 1272 | /* Dummy buffers for lookup_symbol_attrs */ | ||
| 1273 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
| 1274 | 1270 | ||
| 1275 | /* Lookup symbol including addr */ | 1271 | /* Lookup symbol including addr */ |
| 1276 | if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) | 1272 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) |
| 1277 | return 0; | 1273 | return 0; |
| 1278 | 1274 | ||
| 1279 | /* Check there is enough space for a relative jump. */ | 1275 | /* Check there is enough space for a relative jump. */ |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 035c8c529181..b3ea9db39db6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
| @@ -36,7 +36,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | |||
| 36 | if (!page) | 36 | if (!page) |
| 37 | goto out; | 37 | goto out; |
| 38 | pud = (pud_t *)page_address(page); | 38 | pud = (pud_t *)page_address(page); |
| 39 | memset(pud, 0, PAGE_SIZE); | 39 | clear_page(pud); |
| 40 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | 40 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); |
| 41 | } | 41 | } |
| 42 | pud = pud_offset(pgd, addr); | 42 | pud = pud_offset(pgd, addr); |
| @@ -45,7 +45,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | |||
| 45 | if (!page) | 45 | if (!page) |
| 46 | goto out; | 46 | goto out; |
| 47 | pmd = (pmd_t *)page_address(page); | 47 | pmd = (pmd_t *)page_address(page); |
| 48 | memset(pmd, 0, PAGE_SIZE); | 48 | clear_page(pmd); |
| 49 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | 49 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); |
| 50 | } | 50 | } |
| 51 | pmd = pmd_offset(pud, addr); | 51 | pmd = pmd_offset(pud, addr); |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fa6551d36c10..0b3d37e83606 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -232,6 +232,7 @@ static const struct file_operations microcode_fops = { | |||
| 232 | .owner = THIS_MODULE, | 232 | .owner = THIS_MODULE, |
| 233 | .write = microcode_write, | 233 | .write = microcode_write, |
| 234 | .open = microcode_open, | 234 | .open = microcode_open, |
| 235 | .llseek = no_llseek, | ||
| 235 | }; | 236 | }; |
| 236 | 237 | ||
| 237 | static struct miscdevice microcode_dev = { | 238 | static struct miscdevice microcode_dev = { |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d7501..8f2956091735 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
| @@ -239,11 +239,13 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
| 239 | apply_paravirt(pseg, pseg + para->sh_size); | 239 | apply_paravirt(pseg, pseg + para->sh_size); |
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | return module_bug_finalize(hdr, sechdrs, me); | 242 | /* make jump label nops */ |
| 243 | jump_label_apply_nops(me); | ||
| 244 | |||
| 245 | return 0; | ||
| 243 | } | 246 | } |
| 244 | 247 | ||
| 245 | void module_arch_cleanup(struct module *mod) | 248 | void module_arch_cleanup(struct module *mod) |
| 246 | { | 249 | { |
| 247 | alternatives_smp_module_del(mod); | 250 | alternatives_smp_module_del(mod); |
| 248 | module_bug_cleanup(mod); | ||
| 249 | } | 251 | } |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d7b6f7fb4fec..9af64d9c4b67 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
| 12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
| 13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
| 14 | #include <linux/memblock.h> | ||
| 14 | #include <linux/kernel_stat.h> | 15 | #include <linux/kernel_stat.h> |
| 15 | #include <linux/mc146818rtc.h> | 16 | #include <linux/mc146818rtc.h> |
| 16 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
| @@ -657,7 +658,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) | |||
| 657 | { | 658 | { |
| 658 | unsigned long size = get_mpc_size(mpf->physptr); | 659 | unsigned long size = get_mpc_size(mpf->physptr); |
| 659 | 660 | ||
| 660 | reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); | 661 | memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc"); |
| 661 | } | 662 | } |
| 662 | 663 | ||
| 663 | static int __init smp_scan_config(unsigned long base, unsigned long length) | 664 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
| @@ -686,7 +687,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
| 686 | mpf, (u64)virt_to_phys(mpf)); | 687 | mpf, (u64)virt_to_phys(mpf)); |
| 687 | 688 | ||
| 688 | mem = virt_to_phys(mpf); | 689 | mem = virt_to_phys(mpf); |
| 689 | reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); | 690 | memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); |
| 690 | if (mpf->physptr) | 691 | if (mpf->physptr) |
| 691 | smp_reserve_memory(mpf); | 692 | smp_reserve_memory(mpf); |
| 692 | 693 | ||
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c new file mode 100644 index 000000000000..f5442c03abc3 --- /dev/null +++ b/arch/x86/kernel/olpc-xo1.c | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | /* | ||
| 2 | * Support for features of the OLPC XO-1 laptop | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010 One Laptop per Child | ||
| 5 | * Copyright (C) 2006 Red Hat, Inc. | ||
| 6 | * Copyright (C) 2006 Advanced Micro Devices, Inc. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/pci.h> | ||
| 16 | #include <linux/pci_ids.h> | ||
| 17 | #include <linux/platform_device.h> | ||
| 18 | #include <linux/pm.h> | ||
| 19 | |||
| 20 | #include <asm/io.h> | ||
| 21 | #include <asm/olpc.h> | ||
| 22 | |||
| 23 | #define DRV_NAME "olpc-xo1" | ||
| 24 | |||
| 25 | #define PMS_BAR 4 | ||
| 26 | #define ACPI_BAR 5 | ||
| 27 | |||
| 28 | /* PMC registers (PMS block) */ | ||
| 29 | #define PM_SCLK 0x10 | ||
| 30 | #define PM_IN_SLPCTL 0x20 | ||
| 31 | #define PM_WKXD 0x34 | ||
| 32 | #define PM_WKD 0x30 | ||
| 33 | #define PM_SSC 0x54 | ||
| 34 | |||
| 35 | /* PM registers (ACPI block) */ | ||
| 36 | #define PM1_CNT 0x08 | ||
| 37 | #define PM_GPE0_STS 0x18 | ||
| 38 | |||
| 39 | static unsigned long acpi_base; | ||
| 40 | static unsigned long pms_base; | ||
| 41 | |||
| 42 | static void xo1_power_off(void) | ||
| 43 | { | ||
| 44 | printk(KERN_INFO "OLPC XO-1 power off sequence...\n"); | ||
| 45 | |||
| 46 | /* Enable all of these controls with 0 delay */ | ||
| 47 | outl(0x40000000, pms_base + PM_SCLK); | ||
| 48 | outl(0x40000000, pms_base + PM_IN_SLPCTL); | ||
| 49 | outl(0x40000000, pms_base + PM_WKXD); | ||
| 50 | outl(0x40000000, pms_base + PM_WKD); | ||
| 51 | |||
| 52 | /* Clear status bits (possibly unnecessary) */ | ||
| 53 | outl(0x0002ffff, pms_base + PM_SSC); | ||
| 54 | outl(0xffffffff, acpi_base + PM_GPE0_STS); | ||
| 55 | |||
| 56 | /* Write SLP_EN bit to start the machinery */ | ||
| 57 | outl(0x00002000, acpi_base + PM1_CNT); | ||
| 58 | } | ||
| 59 | |||
| 60 | /* Read the base addresses from the PCI BAR info */ | ||
| 61 | static int __devinit setup_bases(struct pci_dev *pdev) | ||
| 62 | { | ||
| 63 | int r; | ||
| 64 | |||
| 65 | r = pci_enable_device_io(pdev); | ||
| 66 | if (r) { | ||
| 67 | dev_err(&pdev->dev, "can't enable device IO\n"); | ||
| 68 | return r; | ||
| 69 | } | ||
| 70 | |||
| 71 | r = pci_request_region(pdev, ACPI_BAR, DRV_NAME); | ||
| 72 | if (r) { | ||
| 73 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR); | ||
| 74 | return r; | ||
| 75 | } | ||
| 76 | |||
| 77 | r = pci_request_region(pdev, PMS_BAR, DRV_NAME); | ||
| 78 | if (r) { | ||
| 79 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR); | ||
| 80 | pci_release_region(pdev, ACPI_BAR); | ||
| 81 | return r; | ||
| 82 | } | ||
| 83 | |||
| 84 | acpi_base = pci_resource_start(pdev, ACPI_BAR); | ||
| 85 | pms_base = pci_resource_start(pdev, PMS_BAR); | ||
| 86 | |||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) | ||
| 91 | { | ||
| 92 | struct pci_dev *pcidev; | ||
| 93 | int r; | ||
| 94 | |||
| 95 | pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, | ||
| 96 | NULL); | ||
| 97 | if (!pdev) | ||
| 98 | return -ENODEV; | ||
| 99 | |||
| 100 | r = setup_bases(pcidev); | ||
| 101 | if (r) | ||
| 102 | return r; | ||
| 103 | |||
| 104 | pm_power_off = xo1_power_off; | ||
| 105 | |||
| 106 | printk(KERN_INFO "OLPC XO-1 support registered\n"); | ||
| 107 | return 0; | ||
| 108 | } | ||
| 109 | |||
| 110 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) | ||
| 111 | { | ||
| 112 | pm_power_off = NULL; | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | static struct platform_driver olpc_xo1_driver = { | ||
| 117 | .driver = { | ||
| 118 | .name = DRV_NAME, | ||
| 119 | .owner = THIS_MODULE, | ||
| 120 | }, | ||
| 121 | .probe = olpc_xo1_probe, | ||
| 122 | .remove = __devexit_p(olpc_xo1_remove), | ||
| 123 | }; | ||
| 124 | |||
| 125 | static int __init olpc_xo1_init(void) | ||
| 126 | { | ||
| 127 | return platform_driver_register(&olpc_xo1_driver); | ||
| 128 | } | ||
| 129 | |||
| 130 | static void __exit olpc_xo1_exit(void) | ||
| 131 | { | ||
| 132 | platform_driver_unregister(&olpc_xo1_driver); | ||
| 133 | } | ||
| 134 | |||
| 135 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); | ||
| 136 | MODULE_LICENSE("GPL"); | ||
| 137 | MODULE_ALIAS("platform:olpc-xo1"); | ||
| 138 | |||
| 139 | module_init(olpc_xo1_init); | ||
| 140 | module_exit(olpc_xo1_exit); | ||
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 0e0cdde519be..edaf3fe8dc5e 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
| 19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
| 20 | #include <linux/platform_device.h> | ||
| 20 | 21 | ||
| 21 | #include <asm/geode.h> | 22 | #include <asm/geode.h> |
| 22 | #include <asm/setup.h> | 23 | #include <asm/setup.h> |
| @@ -114,6 +115,7 @@ int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, | |||
| 114 | unsigned long flags; | 115 | unsigned long flags; |
| 115 | int ret = -EIO; | 116 | int ret = -EIO; |
| 116 | int i; | 117 | int i; |
| 118 | int restarts = 0; | ||
| 117 | 119 | ||
| 118 | spin_lock_irqsave(&ec_lock, flags); | 120 | spin_lock_irqsave(&ec_lock, flags); |
| 119 | 121 | ||
| @@ -169,7 +171,9 @@ restart: | |||
| 169 | if (wait_on_obf(0x6c, 1)) { | 171 | if (wait_on_obf(0x6c, 1)) { |
| 170 | printk(KERN_ERR "olpc-ec: timeout waiting for" | 172 | printk(KERN_ERR "olpc-ec: timeout waiting for" |
| 171 | " EC to provide data!\n"); | 173 | " EC to provide data!\n"); |
| 172 | goto restart; | 174 | if (restarts++ < 10) |
| 175 | goto restart; | ||
| 176 | goto err; | ||
| 173 | } | 177 | } |
| 174 | outbuf[i] = inb(0x68); | 178 | outbuf[i] = inb(0x68); |
| 175 | pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); | 179 | pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); |
| @@ -183,8 +187,21 @@ err: | |||
| 183 | } | 187 | } |
| 184 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); | 188 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); |
| 185 | 189 | ||
| 186 | #ifdef CONFIG_OLPC_OPENFIRMWARE | 190 | static bool __init check_ofw_architecture(void) |
| 187 | static void __init platform_detect(void) | 191 | { |
| 192 | size_t propsize; | ||
| 193 | char olpc_arch[5]; | ||
| 194 | const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 }; | ||
| 195 | void *res[] = { &propsize }; | ||
| 196 | |||
| 197 | if (olpc_ofw("getprop", args, res)) { | ||
| 198 | printk(KERN_ERR "ofw: getprop call failed!\n"); | ||
| 199 | return false; | ||
| 200 | } | ||
| 201 | return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | static u32 __init get_board_revision(void) | ||
| 188 | { | 205 | { |
| 189 | size_t propsize; | 206 | size_t propsize; |
| 190 | __be32 rev; | 207 | __be32 rev; |
| @@ -193,45 +210,43 @@ static void __init platform_detect(void) | |||
| 193 | 210 | ||
| 194 | if (olpc_ofw("getprop", args, res) || propsize != 4) { | 211 | if (olpc_ofw("getprop", args, res) || propsize != 4) { |
| 195 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 212 | printk(KERN_ERR "ofw: getprop call failed!\n"); |
| 196 | rev = cpu_to_be32(0); | 213 | return cpu_to_be32(0); |
| 197 | } | 214 | } |
| 198 | olpc_platform_info.boardrev = be32_to_cpu(rev); | 215 | return be32_to_cpu(rev); |
| 199 | } | 216 | } |
| 200 | #else | 217 | |
| 201 | static void __init platform_detect(void) | 218 | static bool __init platform_detect(void) |
| 202 | { | 219 | { |
| 203 | /* stopgap until OFW support is added to the kernel */ | 220 | if (!check_ofw_architecture()) |
| 204 | olpc_platform_info.boardrev = olpc_board(0xc2); | 221 | return false; |
| 222 | olpc_platform_info.flags |= OLPC_F_PRESENT; | ||
| 223 | olpc_platform_info.boardrev = get_board_revision(); | ||
| 224 | return true; | ||
| 205 | } | 225 | } |
| 206 | #endif | ||
| 207 | 226 | ||
| 208 | static int __init olpc_init(void) | 227 | static int __init add_xo1_platform_devices(void) |
| 209 | { | 228 | { |
| 210 | unsigned char *romsig; | 229 | struct platform_device *pdev; |
| 211 | 230 | ||
| 212 | /* The ioremap check is dangerous; limit what we run it on */ | 231 | pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0); |
| 213 | if (!is_geode() || cs5535_has_vsa2()) | 232 | if (IS_ERR(pdev)) |
| 214 | return 0; | 233 | return PTR_ERR(pdev); |
| 215 | 234 | ||
| 216 | spin_lock_init(&ec_lock); | 235 | pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0); |
| 236 | if (IS_ERR(pdev)) | ||
| 237 | return PTR_ERR(pdev); | ||
| 217 | 238 | ||
| 218 | romsig = ioremap(0xffffffc0, 16); | 239 | return 0; |
| 219 | if (!romsig) | 240 | } |
| 220 | return 0; | ||
| 221 | 241 | ||
| 222 | if (strncmp(romsig, "CL1 Q", 7)) | 242 | static int __init olpc_init(void) |
| 223 | goto unmap; | 243 | { |
| 224 | if (strncmp(romsig+6, romsig+13, 3)) { | 244 | int r = 0; |
| 225 | printk(KERN_INFO "OLPC BIOS signature looks invalid. " | ||
| 226 | "Assuming not OLPC\n"); | ||
| 227 | goto unmap; | ||
| 228 | } | ||
| 229 | 245 | ||
| 230 | printk(KERN_INFO "OLPC board with OpenFirmware %.16s\n", romsig); | 246 | if (!olpc_ofw_present() || !platform_detect()) |
| 231 | olpc_platform_info.flags |= OLPC_F_PRESENT; | 247 | return 0; |
| 232 | 248 | ||
| 233 | /* get the platform revision */ | 249 | spin_lock_init(&ec_lock); |
| 234 | platform_detect(); | ||
| 235 | 250 | ||
| 236 | /* assume B1 and above models always have a DCON */ | 251 | /* assume B1 and above models always have a DCON */ |
| 237 | if (olpc_board_at_least(olpc_board(0xb1))) | 252 | if (olpc_board_at_least(olpc_board(0xb1))) |
| @@ -242,8 +257,10 @@ static int __init olpc_init(void) | |||
| 242 | (unsigned char *) &olpc_platform_info.ecver, 1); | 257 | (unsigned char *) &olpc_platform_info.ecver, 1); |
| 243 | 258 | ||
| 244 | #ifdef CONFIG_PCI_OLPC | 259 | #ifdef CONFIG_PCI_OLPC |
| 245 | /* If the VSA exists let it emulate PCI, if not emulate in kernel */ | 260 | /* If the VSA exists let it emulate PCI, if not emulate in kernel. |
| 246 | if (!cs5535_has_vsa2()) | 261 | * XO-1 only. */ |
| 262 | if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) && | ||
| 263 | !cs5535_has_vsa2()) | ||
| 247 | x86_init.pci.arch_init = pci_olpc_init; | 264 | x86_init.pci.arch_init = pci_olpc_init; |
| 248 | #endif | 265 | #endif |
| 249 | 266 | ||
| @@ -252,8 +269,12 @@ static int __init olpc_init(void) | |||
| 252 | olpc_platform_info.boardrev >> 4, | 269 | olpc_platform_info.boardrev >> 4, |
| 253 | olpc_platform_info.ecver); | 270 | olpc_platform_info.ecver); |
| 254 | 271 | ||
| 255 | unmap: | 272 | if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */ |
| 256 | iounmap(romsig); | 273 | r = add_xo1_platform_devices(); |
| 274 | if (r) | ||
| 275 | return r; | ||
| 276 | } | ||
| 277 | |||
| 257 | return 0; | 278 | return 0; |
| 258 | } | 279 | } |
| 259 | 280 | ||
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c index 3218aa71ab5e..787320464379 100644 --- a/arch/x86/kernel/olpc_ofw.c +++ b/arch/x86/kernel/olpc_ofw.c | |||
| @@ -74,6 +74,12 @@ int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, | |||
| 74 | } | 74 | } |
| 75 | EXPORT_SYMBOL_GPL(__olpc_ofw); | 75 | EXPORT_SYMBOL_GPL(__olpc_ofw); |
| 76 | 76 | ||
| 77 | bool olpc_ofw_present(void) | ||
| 78 | { | ||
| 79 | return olpc_ofw_cif != NULL; | ||
| 80 | } | ||
| 81 | EXPORT_SYMBOL_GPL(olpc_ofw_present); | ||
| 82 | |||
| 77 | /* OFW cif _should_ be above this address */ | 83 | /* OFW cif _should_ be above this address */ |
| 78 | #define OFW_MIN 0xff000000 | 84 | #define OFW_MIN 0xff000000 |
| 79 | 85 | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1db183ed7c01..c5b250011fd4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
| @@ -413,7 +413,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
| 413 | 413 | ||
| 414 | .alloc_pte = paravirt_nop, | 414 | .alloc_pte = paravirt_nop, |
| 415 | .alloc_pmd = paravirt_nop, | 415 | .alloc_pmd = paravirt_nop, |
| 416 | .alloc_pmd_clone = paravirt_nop, | ||
| 417 | .alloc_pud = paravirt_nop, | 416 | .alloc_pud = paravirt_nop, |
| 418 | .release_pte = paravirt_nop, | 417 | .release_pte = paravirt_nop, |
| 419 | .release_pmd = paravirt_nop, | 418 | .release_pmd = paravirt_nop, |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 078d4ec1a9d9..f56a117cef68 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include <asm/rio.h> | 47 | #include <asm/rio.h> |
| 48 | #include <asm/bios_ebda.h> | 48 | #include <asm/bios_ebda.h> |
| 49 | #include <asm/x86_init.h> | 49 | #include <asm/x86_init.h> |
| 50 | #include <asm/iommu_table.h> | ||
| 50 | 51 | ||
| 51 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT | 52 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT |
| 52 | int use_calgary __read_mostly = 1; | 53 | int use_calgary __read_mostly = 1; |
| @@ -1364,7 +1365,7 @@ static int __init calgary_iommu_init(void) | |||
| 1364 | return 0; | 1365 | return 0; |
| 1365 | } | 1366 | } |
| 1366 | 1367 | ||
| 1367 | void __init detect_calgary(void) | 1368 | int __init detect_calgary(void) |
| 1368 | { | 1369 | { |
| 1369 | int bus; | 1370 | int bus; |
| 1370 | void *tbl; | 1371 | void *tbl; |
| @@ -1378,13 +1379,13 @@ void __init detect_calgary(void) | |||
| 1378 | * another HW IOMMU already, bail out. | 1379 | * another HW IOMMU already, bail out. |
| 1379 | */ | 1380 | */ |
| 1380 | if (no_iommu || iommu_detected) | 1381 | if (no_iommu || iommu_detected) |
| 1381 | return; | 1382 | return -ENODEV; |
| 1382 | 1383 | ||
| 1383 | if (!use_calgary) | 1384 | if (!use_calgary) |
| 1384 | return; | 1385 | return -ENODEV; |
| 1385 | 1386 | ||
| 1386 | if (!early_pci_allowed()) | 1387 | if (!early_pci_allowed()) |
| 1387 | return; | 1388 | return -ENODEV; |
| 1388 | 1389 | ||
| 1389 | printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); | 1390 | printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); |
| 1390 | 1391 | ||
| @@ -1410,13 +1411,13 @@ void __init detect_calgary(void) | |||
| 1410 | if (!rio_table_hdr) { | 1411 | if (!rio_table_hdr) { |
| 1411 | printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " | 1412 | printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " |
| 1412 | "in EBDA - bailing!\n"); | 1413 | "in EBDA - bailing!\n"); |
| 1413 | return; | 1414 | return -ENODEV; |
| 1414 | } | 1415 | } |
| 1415 | 1416 | ||
| 1416 | ret = build_detail_arrays(); | 1417 | ret = build_detail_arrays(); |
| 1417 | if (ret) { | 1418 | if (ret) { |
| 1418 | printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); | 1419 | printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); |
| 1419 | return; | 1420 | return -ENOMEM; |
| 1420 | } | 1421 | } |
| 1421 | 1422 | ||
| 1422 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? | 1423 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? |
| @@ -1464,7 +1465,7 @@ void __init detect_calgary(void) | |||
| 1464 | 1465 | ||
| 1465 | x86_init.iommu.iommu_init = calgary_iommu_init; | 1466 | x86_init.iommu.iommu_init = calgary_iommu_init; |
| 1466 | } | 1467 | } |
| 1467 | return; | 1468 | return calgary_found; |
| 1468 | 1469 | ||
| 1469 | cleanup: | 1470 | cleanup: |
| 1470 | for (--bus; bus >= 0; --bus) { | 1471 | for (--bus; bus >= 0; --bus) { |
| @@ -1473,6 +1474,7 @@ cleanup: | |||
| 1473 | if (info->tce_space) | 1474 | if (info->tce_space) |
| 1474 | free_tce_table(info->tce_space); | 1475 | free_tce_table(info->tce_space); |
| 1475 | } | 1476 | } |
| 1477 | return -ENOMEM; | ||
| 1476 | } | 1478 | } |
| 1477 | 1479 | ||
| 1478 | static int __init calgary_parse_options(char *p) | 1480 | static int __init calgary_parse_options(char *p) |
| @@ -1594,3 +1596,5 @@ static int __init calgary_fixup_tce_spaces(void) | |||
| 1594 | * and before device_initcall. | 1596 | * and before device_initcall. |
| 1595 | */ | 1597 | */ |
| 1596 | rootfs_initcall(calgary_fixup_tce_spaces); | 1598 | rootfs_initcall(calgary_fixup_tce_spaces); |
| 1599 | |||
| 1600 | IOMMU_INIT_POST(detect_calgary); | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 9f07cfcbd3a5..9ea999a4dcc1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -11,9 +11,8 @@ | |||
| 11 | #include <asm/iommu.h> | 11 | #include <asm/iommu.h> |
| 12 | #include <asm/gart.h> | 12 | #include <asm/gart.h> |
| 13 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
| 14 | #include <asm/amd_iommu.h> | ||
| 15 | #include <asm/x86_init.h> | 14 | #include <asm/x86_init.h> |
| 16 | #include <asm/xen/swiotlb-xen.h> | 15 | #include <asm/iommu_table.h> |
| 17 | 16 | ||
| 18 | static int forbid_dac __read_mostly; | 17 | static int forbid_dac __read_mostly; |
| 19 | 18 | ||
| @@ -45,6 +44,8 @@ int iommu_detected __read_mostly = 0; | |||
| 45 | */ | 44 | */ |
| 46 | int iommu_pass_through __read_mostly; | 45 | int iommu_pass_through __read_mostly; |
| 47 | 46 | ||
| 47 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | ||
| 48 | |||
| 48 | /* Dummy device used for NULL arguments (normally ISA). */ | 49 | /* Dummy device used for NULL arguments (normally ISA). */ |
| 49 | struct device x86_dma_fallback_dev = { | 50 | struct device x86_dma_fallback_dev = { |
| 50 | .init_name = "fallback device", | 51 | .init_name = "fallback device", |
| @@ -130,26 +131,24 @@ static void __init dma32_free_bootmem(void) | |||
| 130 | 131 | ||
| 131 | void __init pci_iommu_alloc(void) | 132 | void __init pci_iommu_alloc(void) |
| 132 | { | 133 | { |
| 134 | struct iommu_table_entry *p; | ||
| 135 | |||
| 133 | /* free the range so iommu could get some range less than 4G */ | 136 | /* free the range so iommu could get some range less than 4G */ |
| 134 | dma32_free_bootmem(); | 137 | dma32_free_bootmem(); |
| 135 | 138 | ||
| 136 | if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) | 139 | sort_iommu_table(__iommu_table, __iommu_table_end); |
| 137 | goto out; | 140 | check_iommu_entries(__iommu_table, __iommu_table_end); |
| 138 | |||
| 139 | gart_iommu_hole_init(); | ||
| 140 | |||
| 141 | detect_calgary(); | ||
| 142 | |||
| 143 | detect_intel_iommu(); | ||
| 144 | 141 | ||
| 145 | /* needs to be called after gart_iommu_hole_init */ | 142 | for (p = __iommu_table; p < __iommu_table_end; p++) { |
| 146 | amd_iommu_detect(); | 143 | if (p && p->detect && p->detect() > 0) { |
| 147 | out: | 144 | p->flags |= IOMMU_DETECTED; |
| 148 | pci_xen_swiotlb_init(); | 145 | if (p->early_init) |
| 149 | 146 | p->early_init(); | |
| 150 | pci_swiotlb_init(); | 147 | if (p->flags & IOMMU_FINISH_IF_DETECTED) |
| 148 | break; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } | 151 | } |
| 152 | |||
| 153 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 152 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
| 154 | dma_addr_t *dma_addr, gfp_t flag) | 153 | dma_addr_t *dma_addr, gfp_t flag) |
| 155 | { | 154 | { |
| @@ -292,6 +291,7 @@ EXPORT_SYMBOL(dma_supported); | |||
| 292 | 291 | ||
| 293 | static int __init pci_iommu_init(void) | 292 | static int __init pci_iommu_init(void) |
| 294 | { | 293 | { |
| 294 | struct iommu_table_entry *p; | ||
| 295 | dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); | 295 | dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); |
| 296 | 296 | ||
| 297 | #ifdef CONFIG_PCI | 297 | #ifdef CONFIG_PCI |
| @@ -299,12 +299,10 @@ static int __init pci_iommu_init(void) | |||
| 299 | #endif | 299 | #endif |
| 300 | x86_init.iommu.iommu_init(); | 300 | x86_init.iommu.iommu_init(); |
| 301 | 301 | ||
| 302 | if (swiotlb || xen_swiotlb) { | 302 | for (p = __iommu_table; p < __iommu_table_end; p++) { |
| 303 | printk(KERN_INFO "PCI-DMA: " | 303 | if (p && (p->flags & IOMMU_DETECTED) && p->late_init) |
| 304 | "Using software bounce buffering for IO (SWIOTLB)\n"); | 304 | p->late_init(); |
| 305 | swiotlb_print_info(); | 305 | } |
| 306 | } else | ||
| 307 | swiotlb_free(); | ||
| 308 | 306 | ||
| 309 | return 0; | 307 | return 0; |
| 310 | } | 308 | } |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0f7f130caa67..ba0f0ca9f280 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
| @@ -39,8 +39,9 @@ | |||
| 39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
| 40 | #include <asm/swiotlb.h> | 40 | #include <asm/swiotlb.h> |
| 41 | #include <asm/dma.h> | 41 | #include <asm/dma.h> |
| 42 | #include <asm/k8.h> | 42 | #include <asm/amd_nb.h> |
| 43 | #include <asm/x86_init.h> | 43 | #include <asm/x86_init.h> |
| 44 | #include <asm/iommu_table.h> | ||
| 44 | 45 | ||
| 45 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 46 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
| 46 | static unsigned long iommu_size; /* size of remapping area bytes */ | 47 | static unsigned long iommu_size; /* size of remapping area bytes */ |
| @@ -560,8 +561,11 @@ static void enable_gart_translations(void) | |||
| 560 | { | 561 | { |
| 561 | int i; | 562 | int i; |
| 562 | 563 | ||
| 563 | for (i = 0; i < num_k8_northbridges; i++) { | 564 | if (!k8_northbridges.gart_supported) |
| 564 | struct pci_dev *dev = k8_northbridges[i]; | 565 | return; |
| 566 | |||
| 567 | for (i = 0; i < k8_northbridges.num; i++) { | ||
| 568 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; | ||
| 565 | 569 | ||
| 566 | enable_gart_translation(dev, __pa(agp_gatt_table)); | 570 | enable_gart_translation(dev, __pa(agp_gatt_table)); |
| 567 | } | 571 | } |
| @@ -592,16 +596,19 @@ static void gart_fixup_northbridges(struct sys_device *dev) | |||
| 592 | if (!fix_up_north_bridges) | 596 | if (!fix_up_north_bridges) |
| 593 | return; | 597 | return; |
| 594 | 598 | ||
| 599 | if (!k8_northbridges.gart_supported) | ||
| 600 | return; | ||
| 601 | |||
| 595 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); | 602 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); |
| 596 | 603 | ||
| 597 | for (i = 0; i < num_k8_northbridges; i++) { | 604 | for (i = 0; i < k8_northbridges.num; i++) { |
| 598 | struct pci_dev *dev = k8_northbridges[i]; | 605 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; |
| 599 | 606 | ||
| 600 | /* | 607 | /* |
| 601 | * Don't enable translations just yet. That is the next | 608 | * Don't enable translations just yet. That is the next |
| 602 | * step. Restore the pre-suspend aperture settings. | 609 | * step. Restore the pre-suspend aperture settings. |
| 603 | */ | 610 | */ |
| 604 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); | 611 | gart_set_size_and_enable(dev, aperture_order); |
| 605 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); | 612 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); |
| 606 | } | 613 | } |
| 607 | } | 614 | } |
| @@ -649,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 649 | 656 | ||
| 650 | aper_size = aper_base = info->aper_size = 0; | 657 | aper_size = aper_base = info->aper_size = 0; |
| 651 | dev = NULL; | 658 | dev = NULL; |
| 652 | for (i = 0; i < num_k8_northbridges; i++) { | 659 | for (i = 0; i < k8_northbridges.num; i++) { |
| 653 | dev = k8_northbridges[i]; | 660 | dev = k8_northbridges.nb_misc[i]; |
| 654 | new_aper_base = read_aperture(dev, &new_aper_size); | 661 | new_aper_base = read_aperture(dev, &new_aper_size); |
| 655 | if (!new_aper_base) | 662 | if (!new_aper_base) |
| 656 | goto nommu; | 663 | goto nommu; |
| @@ -718,10 +725,13 @@ static void gart_iommu_shutdown(void) | |||
| 718 | if (!no_agp) | 725 | if (!no_agp) |
| 719 | return; | 726 | return; |
| 720 | 727 | ||
| 721 | for (i = 0; i < num_k8_northbridges; i++) { | 728 | if (!k8_northbridges.gart_supported) |
| 729 | return; | ||
| 730 | |||
| 731 | for (i = 0; i < k8_northbridges.num; i++) { | ||
| 722 | u32 ctl; | 732 | u32 ctl; |
| 723 | 733 | ||
| 724 | dev = k8_northbridges[i]; | 734 | dev = k8_northbridges.nb_misc[i]; |
| 725 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); | 735 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); |
| 726 | 736 | ||
| 727 | ctl &= ~GARTEN; | 737 | ctl &= ~GARTEN; |
| @@ -739,7 +749,7 @@ int __init gart_iommu_init(void) | |||
| 739 | unsigned long scratch; | 749 | unsigned long scratch; |
| 740 | long i; | 750 | long i; |
| 741 | 751 | ||
| 742 | if (num_k8_northbridges == 0) | 752 | if (!k8_northbridges.gart_supported) |
| 743 | return 0; | 753 | return 0; |
| 744 | 754 | ||
| 745 | #ifndef CONFIG_AGP_AMD64 | 755 | #ifndef CONFIG_AGP_AMD64 |
| @@ -896,3 +906,4 @@ void __init gart_parse_options(char *p) | |||
| 896 | } | 906 | } |
| 897 | } | 907 | } |
| 898 | } | 908 | } |
| 909 | IOMMU_INIT_POST(gart_iommu_hole_init); | ||
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c new file mode 100644 index 000000000000..55d745ec1181 --- /dev/null +++ b/arch/x86/kernel/pci-iommu_table.c | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | #include <linux/dma-mapping.h> | ||
| 2 | #include <asm/iommu_table.h> | ||
| 3 | #include <linux/string.h> | ||
| 4 | #include <linux/kallsyms.h> | ||
| 5 | |||
| 6 | |||
| 7 | #define DEBUG 1 | ||
| 8 | |||
| 9 | static struct iommu_table_entry * __init | ||
| 10 | find_dependents_of(struct iommu_table_entry *start, | ||
| 11 | struct iommu_table_entry *finish, | ||
| 12 | struct iommu_table_entry *q) | ||
| 13 | { | ||
| 14 | struct iommu_table_entry *p; | ||
| 15 | |||
| 16 | if (!q) | ||
| 17 | return NULL; | ||
| 18 | |||
| 19 | for (p = start; p < finish; p++) | ||
| 20 | if (p->detect == q->depend) | ||
| 21 | return p; | ||
| 22 | |||
| 23 | return NULL; | ||
| 24 | } | ||
| 25 | |||
| 26 | |||
| 27 | void __init sort_iommu_table(struct iommu_table_entry *start, | ||
| 28 | struct iommu_table_entry *finish) { | ||
| 29 | |||
| 30 | struct iommu_table_entry *p, *q, tmp; | ||
| 31 | |||
| 32 | for (p = start; p < finish; p++) { | ||
| 33 | again: | ||
| 34 | q = find_dependents_of(start, finish, p); | ||
| 35 | /* We are bit sneaky here. We use the memory address to figure | ||
| 36 | * out if the node we depend on is past our point, if so, swap. | ||
| 37 | */ | ||
| 38 | if (q > p) { | ||
| 39 | tmp = *p; | ||
| 40 | memmove(p, q, sizeof(*p)); | ||
| 41 | *q = tmp; | ||
| 42 | goto again; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | } | ||
| 47 | |||
| 48 | #ifdef DEBUG | ||
| 49 | void __init check_iommu_entries(struct iommu_table_entry *start, | ||
| 50 | struct iommu_table_entry *finish) | ||
| 51 | { | ||
| 52 | struct iommu_table_entry *p, *q, *x; | ||
| 53 | char sym_p[KSYM_SYMBOL_LEN]; | ||
| 54 | char sym_q[KSYM_SYMBOL_LEN]; | ||
| 55 | |||
| 56 | /* Simple cyclic dependency checker. */ | ||
| 57 | for (p = start; p < finish; p++) { | ||
| 58 | q = find_dependents_of(start, finish, p); | ||
| 59 | x = find_dependents_of(start, finish, q); | ||
| 60 | if (p == x) { | ||
| 61 | sprint_symbol(sym_p, (unsigned long)p->detect); | ||
| 62 | sprint_symbol(sym_q, (unsigned long)q->detect); | ||
| 63 | |||
| 64 | printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \ | ||
| 65 | " on %s and vice-versa. BREAKING IT.\n", | ||
| 66 | sym_p, sym_q); | ||
| 67 | /* Heavy handed way..*/ | ||
| 68 | x->depend = 0; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | for (p = start; p < finish; p++) { | ||
| 73 | q = find_dependents_of(p, finish, p); | ||
| 74 | if (q && q > p) { | ||
| 75 | sprint_symbol(sym_p, (unsigned long)p->detect); | ||
| 76 | sprint_symbol(sym_q, (unsigned long)q->detect); | ||
| 77 | |||
| 78 | printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\ | ||
| 79 | "should be called before %s!\n", | ||
| 80 | sym_p, sym_q); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | } | ||
| 84 | #else | ||
| 85 | inline void check_iommu_entries(struct iommu_table_entry *start, | ||
| 86 | struct iommu_table_entry *finish) | ||
| 87 | { | ||
| 88 | } | ||
| 89 | #endif | ||
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index a5bc528d4328..8f972cbddef0 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
| @@ -10,7 +10,8 @@ | |||
| 10 | #include <asm/iommu.h> | 10 | #include <asm/iommu.h> |
| 11 | #include <asm/swiotlb.h> | 11 | #include <asm/swiotlb.h> |
| 12 | #include <asm/dma.h> | 12 | #include <asm/dma.h> |
| 13 | 13 | #include <asm/xen/swiotlb-xen.h> | |
| 14 | #include <asm/iommu_table.h> | ||
| 14 | int swiotlb __read_mostly; | 15 | int swiotlb __read_mostly; |
| 15 | 16 | ||
| 16 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 17 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
| @@ -41,25 +42,42 @@ static struct dma_map_ops swiotlb_dma_ops = { | |||
| 41 | }; | 42 | }; |
| 42 | 43 | ||
| 43 | /* | 44 | /* |
| 44 | * pci_swiotlb_detect - set swiotlb to 1 if necessary | 45 | * pci_swiotlb_detect_override - set swiotlb to 1 if necessary |
| 45 | * | 46 | * |
| 46 | * This returns non-zero if we are forced to use swiotlb (by the boot | 47 | * This returns non-zero if we are forced to use swiotlb (by the boot |
| 47 | * option). | 48 | * option). |
| 48 | */ | 49 | */ |
| 49 | int __init pci_swiotlb_detect(void) | 50 | int __init pci_swiotlb_detect_override(void) |
| 50 | { | 51 | { |
| 51 | int use_swiotlb = swiotlb | swiotlb_force; | 52 | int use_swiotlb = swiotlb | swiotlb_force; |
| 52 | 53 | ||
| 54 | if (swiotlb_force) | ||
| 55 | swiotlb = 1; | ||
| 56 | |||
| 57 | return use_swiotlb; | ||
| 58 | } | ||
| 59 | IOMMU_INIT_FINISH(pci_swiotlb_detect_override, | ||
| 60 | pci_xen_swiotlb_detect, | ||
| 61 | pci_swiotlb_init, | ||
| 62 | pci_swiotlb_late_init); | ||
| 63 | |||
| 64 | /* | ||
| 65 | * if 4GB or more detected (and iommu=off not set) return 1 | ||
| 66 | * and set swiotlb to 1. | ||
| 67 | */ | ||
| 68 | int __init pci_swiotlb_detect_4gb(void) | ||
| 69 | { | ||
| 53 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 70 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
| 54 | #ifdef CONFIG_X86_64 | 71 | #ifdef CONFIG_X86_64 |
| 55 | if (!no_iommu && max_pfn > MAX_DMA32_PFN) | 72 | if (!no_iommu && max_pfn > MAX_DMA32_PFN) |
| 56 | swiotlb = 1; | 73 | swiotlb = 1; |
| 57 | #endif | 74 | #endif |
| 58 | if (swiotlb_force) | 75 | return swiotlb; |
| 59 | swiotlb = 1; | ||
| 60 | |||
| 61 | return use_swiotlb; | ||
| 62 | } | 76 | } |
| 77 | IOMMU_INIT(pci_swiotlb_detect_4gb, | ||
| 78 | pci_swiotlb_detect_override, | ||
| 79 | pci_swiotlb_init, | ||
| 80 | pci_swiotlb_late_init); | ||
| 63 | 81 | ||
| 64 | void __init pci_swiotlb_init(void) | 82 | void __init pci_swiotlb_init(void) |
| 65 | { | 83 | { |
| @@ -68,3 +86,15 @@ void __init pci_swiotlb_init(void) | |||
| 68 | dma_ops = &swiotlb_dma_ops; | 86 | dma_ops = &swiotlb_dma_ops; |
| 69 | } | 87 | } |
| 70 | } | 88 | } |
| 89 | |||
| 90 | void __init pci_swiotlb_late_init(void) | ||
| 91 | { | ||
| 92 | /* An IOMMU turned us off. */ | ||
| 93 | if (!swiotlb) | ||
| 94 | swiotlb_free(); | ||
| 95 | else { | ||
| 96 | printk(KERN_INFO "PCI-DMA: " | ||
| 97 | "Using software bounce buffering for IO (SWIOTLB)\n"); | ||
| 98 | swiotlb_print_info(); | ||
| 99 | } | ||
| 100 | } | ||
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c deleted file mode 100644 index b112406f1996..000000000000 --- a/arch/x86/kernel/pmtimer_64.c +++ /dev/null | |||
| @@ -1,69 +0,0 @@ | |||
| 1 | /* Ported over from i386 by AK, original copyright was: | ||
| 2 | * | ||
| 3 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
| 4 | * | ||
| 5 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
| 6 | * southbridges as primary timing source for the Linux kernel. | ||
| 7 | * | ||
| 8 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
| 9 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
| 10 | * | ||
| 11 | * This file is licensed under the GPL v2. | ||
| 12 | * | ||
| 13 | * Dropped all the hardware bug workarounds for now. Hopefully they | ||
| 14 | * are not needed on 64bit chipsets. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/jiffies.h> | ||
| 18 | #include <linux/kernel.h> | ||
| 19 | #include <linux/time.h> | ||
| 20 | #include <linux/init.h> | ||
| 21 | #include <linux/cpumask.h> | ||
| 22 | #include <linux/acpi_pmtmr.h> | ||
| 23 | |||
| 24 | #include <asm/io.h> | ||
| 25 | #include <asm/proto.h> | ||
| 26 | #include <asm/msr.h> | ||
| 27 | #include <asm/vsyscall.h> | ||
| 28 | |||
| 29 | static inline u32 cyc2us(u32 cycles) | ||
| 30 | { | ||
| 31 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
| 32 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
| 33 | * | ||
| 34 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
| 35 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
| 36 | * u32 overflows. | ||
| 37 | */ | ||
| 38 | cycles *= 286; | ||
| 39 | return (cycles >> 10); | ||
| 40 | } | ||
| 41 | |||
| 42 | static unsigned pmtimer_wait_tick(void) | ||
| 43 | { | ||
| 44 | u32 a, b; | ||
| 45 | for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK; | ||
| 46 | a == b; | ||
| 47 | b = inl(pmtmr_ioport) & ACPI_PM_MASK) | ||
| 48 | cpu_relax(); | ||
| 49 | return b; | ||
| 50 | } | ||
| 51 | |||
| 52 | /* note: wait time is rounded up to one tick */ | ||
| 53 | void pmtimer_wait(unsigned us) | ||
| 54 | { | ||
| 55 | u32 a, b; | ||
| 56 | a = pmtimer_wait_tick(); | ||
| 57 | do { | ||
| 58 | b = inl(pmtmr_ioport); | ||
| 59 | cpu_relax(); | ||
| 60 | } while (cyc2us(b - a) < us); | ||
| 61 | } | ||
| 62 | |||
| 63 | static int __init nopmtimer_setup(char *s) | ||
| 64 | { | ||
| 65 | pmtmr_ioport = 0; | ||
| 66 | return 1; | ||
| 67 | } | ||
| 68 | |||
| 69 | __setup("nopmtimer", nopmtimer_setup); | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3d9ea531ddd1..b3d7a3a04f38 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 424 | load_TLS(next, cpu); | 424 | load_TLS(next, cpu); |
| 425 | 425 | ||
| 426 | /* Must be after DS reload */ | 426 | /* Must be after DS reload */ |
| 427 | unlazy_fpu(prev_p); | 427 | __unlazy_fpu(prev_p); |
| 428 | 428 | ||
| 429 | /* Make sure cpu is ready for new context */ | 429 | /* Make sure cpu is ready for new context */ |
| 430 | if (preload_fpu) | 430 | if (preload_fpu) |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e3af342fe83a..f7f53dcd3e0a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -84,7 +84,7 @@ static int __init reboot_setup(char *str) | |||
| 84 | } | 84 | } |
| 85 | /* we will leave sorting out the final value | 85 | /* we will leave sorting out the final value |
| 86 | when we are ready to reboot, since we might not | 86 | when we are ready to reboot, since we might not |
| 87 | have set up boot_cpu_id or smp_num_cpu */ | 87 | have detected BSP APIC ID or smp_num_cpu */ |
| 88 | break; | 88 | break; |
| 89 | #endif /* CONFIG_SMP */ | 89 | #endif /* CONFIG_SMP */ |
| 90 | 90 | ||
| @@ -371,16 +371,10 @@ void machine_real_restart(const unsigned char *code, int length) | |||
| 371 | CMOS_WRITE(0x00, 0x8f); | 371 | CMOS_WRITE(0x00, 0x8f); |
| 372 | spin_unlock(&rtc_lock); | 372 | spin_unlock(&rtc_lock); |
| 373 | 373 | ||
| 374 | /* Remap the kernel at virtual address zero, as well as offset zero | ||
| 375 | from the kernel segment. This assumes the kernel segment starts at | ||
| 376 | virtual address PAGE_OFFSET. */ | ||
| 377 | memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 378 | sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); | ||
| 379 | |||
| 380 | /* | 374 | /* |
| 381 | * Use `swapper_pg_dir' as our page directory. | 375 | * Switch back to the initial page table. |
| 382 | */ | 376 | */ |
| 383 | load_cr3(swapper_pg_dir); | 377 | load_cr3(initial_page_table); |
| 384 | 378 | ||
| 385 | /* Write 0x1234 to absolute memory location 0x472. The BIOS reads | 379 | /* Write 0x1234 to absolute memory location 0x472. The BIOS reads |
| 386 | this on booting to tell it to "Bypass memory test (also warm | 380 | this on booting to tell it to "Bypass memory test (also warm |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c3a4fbb2b996..95a32746fbf9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/apm_bios.h> | 31 | #include <linux/apm_bios.h> |
| 32 | #include <linux/initrd.h> | 32 | #include <linux/initrd.h> |
| 33 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
| 34 | #include <linux/memblock.h> | ||
| 34 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
| 35 | #include <linux/console.h> | 36 | #include <linux/console.h> |
| 36 | #include <linux/mca.h> | 37 | #include <linux/mca.h> |
| @@ -83,7 +84,6 @@ | |||
| 83 | #include <asm/dmi.h> | 84 | #include <asm/dmi.h> |
| 84 | #include <asm/io_apic.h> | 85 | #include <asm/io_apic.h> |
| 85 | #include <asm/ist.h> | 86 | #include <asm/ist.h> |
| 86 | #include <asm/vmi.h> | ||
| 87 | #include <asm/setup_arch.h> | 87 | #include <asm/setup_arch.h> |
| 88 | #include <asm/bios_ebda.h> | 88 | #include <asm/bios_ebda.h> |
| 89 | #include <asm/cacheflush.h> | 89 | #include <asm/cacheflush.h> |
| @@ -107,11 +107,12 @@ | |||
| 107 | #include <asm/percpu.h> | 107 | #include <asm/percpu.h> |
| 108 | #include <asm/topology.h> | 108 | #include <asm/topology.h> |
| 109 | #include <asm/apicdef.h> | 109 | #include <asm/apicdef.h> |
| 110 | #include <asm/k8.h> | 110 | #include <asm/amd_nb.h> |
| 111 | #ifdef CONFIG_X86_64 | 111 | #ifdef CONFIG_X86_64 |
| 112 | #include <asm/numa_64.h> | 112 | #include <asm/numa_64.h> |
| 113 | #endif | 113 | #endif |
| 114 | #include <asm/mce.h> | 114 | #include <asm/mce.h> |
| 115 | #include <asm/alternative.h> | ||
| 115 | 116 | ||
| 116 | /* | 117 | /* |
| 117 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 118 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
| @@ -125,7 +126,6 @@ unsigned long max_pfn_mapped; | |||
| 125 | RESERVE_BRK(dmi_alloc, 65536); | 126 | RESERVE_BRK(dmi_alloc, 65536); |
| 126 | #endif | 127 | #endif |
| 127 | 128 | ||
| 128 | unsigned int boot_cpu_id __read_mostly; | ||
| 129 | 129 | ||
| 130 | static __initdata unsigned long _brk_start = (unsigned long)__brk_base; | 130 | static __initdata unsigned long _brk_start = (unsigned long)__brk_base; |
| 131 | unsigned long _brk_end = (unsigned long)__brk_base; | 131 | unsigned long _brk_end = (unsigned long)__brk_base; |
| @@ -302,7 +302,7 @@ static inline void init_gbpages(void) | |||
| 302 | static void __init reserve_brk(void) | 302 | static void __init reserve_brk(void) |
| 303 | { | 303 | { |
| 304 | if (_brk_end > _brk_start) | 304 | if (_brk_end > _brk_start) |
| 305 | reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); | 305 | memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); |
| 306 | 306 | ||
| 307 | /* Mark brk area as locked down and no longer taking any | 307 | /* Mark brk area as locked down and no longer taking any |
| 308 | new allocations */ | 308 | new allocations */ |
| @@ -324,17 +324,16 @@ static void __init relocate_initrd(void) | |||
| 324 | char *p, *q; | 324 | char *p, *q; |
| 325 | 325 | ||
| 326 | /* We need to move the initrd down into lowmem */ | 326 | /* We need to move the initrd down into lowmem */ |
| 327 | ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, | 327 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, |
| 328 | PAGE_SIZE); | 328 | PAGE_SIZE); |
| 329 | 329 | ||
| 330 | if (ramdisk_here == -1ULL) | 330 | if (ramdisk_here == MEMBLOCK_ERROR) |
| 331 | panic("Cannot find place for new RAMDISK of size %lld\n", | 331 | panic("Cannot find place for new RAMDISK of size %lld\n", |
| 332 | ramdisk_size); | 332 | ramdisk_size); |
| 333 | 333 | ||
| 334 | /* Note: this includes all the lowmem currently occupied by | 334 | /* Note: this includes all the lowmem currently occupied by |
| 335 | the initrd, we rely on that fact to keep the data intact. */ | 335 | the initrd, we rely on that fact to keep the data intact. */ |
| 336 | reserve_early(ramdisk_here, ramdisk_here + area_size, | 336 | memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); |
| 337 | "NEW RAMDISK"); | ||
| 338 | initrd_start = ramdisk_here + PAGE_OFFSET; | 337 | initrd_start = ramdisk_here + PAGE_OFFSET; |
| 339 | initrd_end = initrd_start + ramdisk_size; | 338 | initrd_end = initrd_start + ramdisk_size; |
| 340 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | 339 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", |
| @@ -390,7 +389,7 @@ static void __init reserve_initrd(void) | |||
| 390 | initrd_start = 0; | 389 | initrd_start = 0; |
| 391 | 390 | ||
| 392 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 391 | if (ramdisk_size >= (end_of_lowmem>>1)) { |
| 393 | free_early(ramdisk_image, ramdisk_end); | 392 | memblock_x86_free_range(ramdisk_image, ramdisk_end); |
| 394 | printk(KERN_ERR "initrd too large to handle, " | 393 | printk(KERN_ERR "initrd too large to handle, " |
| 395 | "disabling initrd\n"); | 394 | "disabling initrd\n"); |
| 396 | return; | 395 | return; |
| @@ -413,7 +412,7 @@ static void __init reserve_initrd(void) | |||
| 413 | 412 | ||
| 414 | relocate_initrd(); | 413 | relocate_initrd(); |
| 415 | 414 | ||
| 416 | free_early(ramdisk_image, ramdisk_end); | 415 | memblock_x86_free_range(ramdisk_image, ramdisk_end); |
| 417 | } | 416 | } |
| 418 | #else | 417 | #else |
| 419 | static void __init reserve_initrd(void) | 418 | static void __init reserve_initrd(void) |
| @@ -469,7 +468,7 @@ static void __init e820_reserve_setup_data(void) | |||
| 469 | e820_print_map("reserve setup_data"); | 468 | e820_print_map("reserve setup_data"); |
| 470 | } | 469 | } |
| 471 | 470 | ||
| 472 | static void __init reserve_early_setup_data(void) | 471 | static void __init memblock_x86_reserve_range_setup_data(void) |
| 473 | { | 472 | { |
| 474 | struct setup_data *data; | 473 | struct setup_data *data; |
| 475 | u64 pa_data; | 474 | u64 pa_data; |
| @@ -481,7 +480,7 @@ static void __init reserve_early_setup_data(void) | |||
| 481 | while (pa_data) { | 480 | while (pa_data) { |
| 482 | data = early_memremap(pa_data, sizeof(*data)); | 481 | data = early_memremap(pa_data, sizeof(*data)); |
| 483 | sprintf(buf, "setup data %x", data->type); | 482 | sprintf(buf, "setup data %x", data->type); |
| 484 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | 483 | memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf); |
| 485 | pa_data = data->next; | 484 | pa_data = data->next; |
| 486 | early_iounmap(data, sizeof(*data)); | 485 | early_iounmap(data, sizeof(*data)); |
| 487 | } | 486 | } |
| @@ -502,6 +501,7 @@ static inline unsigned long long get_total_mem(void) | |||
| 502 | return total << PAGE_SHIFT; | 501 | return total << PAGE_SHIFT; |
| 503 | } | 502 | } |
| 504 | 503 | ||
| 504 | #define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF | ||
| 505 | static void __init reserve_crashkernel(void) | 505 | static void __init reserve_crashkernel(void) |
| 506 | { | 506 | { |
| 507 | unsigned long long total_mem; | 507 | unsigned long long total_mem; |
| @@ -519,23 +519,27 @@ static void __init reserve_crashkernel(void) | |||
| 519 | if (crash_base <= 0) { | 519 | if (crash_base <= 0) { |
| 520 | const unsigned long long alignment = 16<<20; /* 16M */ | 520 | const unsigned long long alignment = 16<<20; /* 16M */ |
| 521 | 521 | ||
| 522 | crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, | 522 | /* |
| 523 | alignment); | 523 | * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX |
| 524 | if (crash_base == -1ULL) { | 524 | */ |
| 525 | crash_base = memblock_find_in_range(alignment, | ||
| 526 | DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); | ||
| 527 | |||
| 528 | if (crash_base == MEMBLOCK_ERROR) { | ||
| 525 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 529 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
| 526 | return; | 530 | return; |
| 527 | } | 531 | } |
| 528 | } else { | 532 | } else { |
| 529 | unsigned long long start; | 533 | unsigned long long start; |
| 530 | 534 | ||
| 531 | start = find_e820_area(crash_base, ULONG_MAX, crash_size, | 535 | start = memblock_find_in_range(crash_base, |
| 532 | 1<<20); | 536 | crash_base + crash_size, crash_size, 1<<20); |
| 533 | if (start != crash_base) { | 537 | if (start != crash_base) { |
| 534 | pr_info("crashkernel reservation failed - memory is in use.\n"); | 538 | pr_info("crashkernel reservation failed - memory is in use.\n"); |
| 535 | return; | 539 | return; |
| 536 | } | 540 | } |
| 537 | } | 541 | } |
| 538 | reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); | 542 | memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); |
| 539 | 543 | ||
| 540 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 544 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
| 541 | "for crashkernel (System RAM: %ldMB)\n", | 545 | "for crashkernel (System RAM: %ldMB)\n", |
| @@ -615,82 +619,10 @@ static __init void reserve_ibft_region(void) | |||
| 615 | addr = find_ibft_region(&size); | 619 | addr = find_ibft_region(&size); |
| 616 | 620 | ||
| 617 | if (size) | 621 | if (size) |
| 618 | reserve_early_overlap_ok(addr, addr + size, "ibft"); | 622 | memblock_x86_reserve_range(addr, addr + size, "* ibft"); |
| 619 | } | 623 | } |
| 620 | 624 | ||
| 621 | #ifdef CONFIG_X86_RESERVE_LOW_64K | 625 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; |
| 622 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | ||
| 623 | { | ||
| 624 | printk(KERN_NOTICE | ||
| 625 | "%s detected: BIOS may corrupt low RAM, working around it.\n", | ||
| 626 | d->ident); | ||
| 627 | |||
| 628 | e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); | ||
| 629 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
| 630 | |||
| 631 | return 0; | ||
| 632 | } | ||
| 633 | #endif | ||
| 634 | |||
| 635 | /* List of systems that have known low memory corruption BIOS problems */ | ||
| 636 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | ||
| 637 | #ifdef CONFIG_X86_RESERVE_LOW_64K | ||
| 638 | { | ||
| 639 | .callback = dmi_low_memory_corruption, | ||
| 640 | .ident = "AMI BIOS", | ||
| 641 | .matches = { | ||
| 642 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), | ||
| 643 | }, | ||
| 644 | }, | ||
| 645 | { | ||
| 646 | .callback = dmi_low_memory_corruption, | ||
| 647 | .ident = "Phoenix BIOS", | ||
| 648 | .matches = { | ||
| 649 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), | ||
| 650 | }, | ||
| 651 | }, | ||
| 652 | { | ||
| 653 | .callback = dmi_low_memory_corruption, | ||
| 654 | .ident = "Phoenix/MSC BIOS", | ||
| 655 | .matches = { | ||
| 656 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), | ||
| 657 | }, | ||
| 658 | }, | ||
| 659 | /* | ||
| 660 | * AMI BIOS with low memory corruption was found on Intel DG45ID and | ||
| 661 | * DG45FC boards. | ||
| 662 | * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | ||
| 663 | * match only DMI_BOARD_NAME and see if there is more bad products | ||
| 664 | * with this vendor. | ||
| 665 | */ | ||
| 666 | { | ||
| 667 | .callback = dmi_low_memory_corruption, | ||
| 668 | .ident = "AMI BIOS", | ||
| 669 | .matches = { | ||
| 670 | DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), | ||
| 671 | }, | ||
| 672 | }, | ||
| 673 | { | ||
| 674 | .callback = dmi_low_memory_corruption, | ||
| 675 | .ident = "AMI BIOS", | ||
| 676 | .matches = { | ||
| 677 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), | ||
| 678 | }, | ||
| 679 | }, | ||
| 680 | /* | ||
| 681 | * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so | ||
| 682 | * match on the product name. | ||
| 683 | */ | ||
| 684 | { | ||
| 685 | .callback = dmi_low_memory_corruption, | ||
| 686 | .ident = "Phoenix BIOS", | ||
| 687 | .matches = { | ||
| 688 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), | ||
| 689 | }, | ||
| 690 | }, | ||
| 691 | #endif | ||
| 692 | {} | ||
| 693 | }; | ||
| 694 | 626 | ||
| 695 | static void __init trim_bios_range(void) | 627 | static void __init trim_bios_range(void) |
| 696 | { | 628 | { |
| @@ -698,8 +630,14 @@ static void __init trim_bios_range(void) | |||
| 698 | * A special case is the first 4Kb of memory; | 630 | * A special case is the first 4Kb of memory; |
| 699 | * This is a BIOS owned area, not kernel ram, but generally | 631 | * This is a BIOS owned area, not kernel ram, but generally |
| 700 | * not listed as such in the E820 table. | 632 | * not listed as such in the E820 table. |
| 633 | * | ||
| 634 | * This typically reserves additional memory (64KiB by default) | ||
| 635 | * since some BIOSes are known to corrupt low memory. See the | ||
| 636 | * Kconfig help text for X86_RESERVE_LOW. | ||
| 701 | */ | 637 | */ |
| 702 | e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); | 638 | e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), |
| 639 | E820_RAM, E820_RESERVED); | ||
| 640 | |||
| 703 | /* | 641 | /* |
| 704 | * special case: Some BIOSen report the PC BIOS | 642 | * special case: Some BIOSen report the PC BIOS |
| 705 | * area (640->1Mb) as ram even though it is not. | 643 | * area (640->1Mb) as ram even though it is not. |
| @@ -709,6 +647,37 @@ static void __init trim_bios_range(void) | |||
| 709 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 647 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
| 710 | } | 648 | } |
| 711 | 649 | ||
| 650 | static int __init parse_reservelow(char *p) | ||
| 651 | { | ||
| 652 | unsigned long long size; | ||
| 653 | |||
| 654 | if (!p) | ||
| 655 | return -EINVAL; | ||
| 656 | |||
| 657 | size = memparse(p, &p); | ||
| 658 | |||
| 659 | if (size < 4096) | ||
| 660 | size = 4096; | ||
| 661 | |||
| 662 | if (size > 640*1024) | ||
| 663 | size = 640*1024; | ||
| 664 | |||
| 665 | reserve_low = size; | ||
| 666 | |||
| 667 | return 0; | ||
| 668 | } | ||
| 669 | |||
| 670 | early_param("reservelow", parse_reservelow); | ||
| 671 | |||
| 672 | static u64 __init get_max_mapped(void) | ||
| 673 | { | ||
| 674 | u64 end = max_pfn_mapped; | ||
| 675 | |||
| 676 | end <<= PAGE_SHIFT; | ||
| 677 | |||
| 678 | return end; | ||
| 679 | } | ||
| 680 | |||
| 712 | /* | 681 | /* |
| 713 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 682 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
| 714 | * passed the efi memmap, systab, etc., so we should use these data structures | 683 | * passed the efi memmap, systab, etc., so we should use these data structures |
| @@ -726,18 +695,30 @@ void __init setup_arch(char **cmdline_p) | |||
| 726 | { | 695 | { |
| 727 | int acpi = 0; | 696 | int acpi = 0; |
| 728 | int k8 = 0; | 697 | int k8 = 0; |
| 698 | unsigned long flags; | ||
| 729 | 699 | ||
| 730 | #ifdef CONFIG_X86_32 | 700 | #ifdef CONFIG_X86_32 |
| 731 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 701 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
| 732 | visws_early_detect(); | 702 | visws_early_detect(); |
| 703 | |||
| 704 | /* | ||
| 705 | * copy kernel address range established so far and switch | ||
| 706 | * to the proper swapper page table | ||
| 707 | */ | ||
| 708 | clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 709 | initial_page_table + KERNEL_PGD_BOUNDARY, | ||
| 710 | KERNEL_PGD_PTRS); | ||
| 711 | |||
| 712 | load_cr3(swapper_pg_dir); | ||
| 713 | __flush_tlb_all(); | ||
| 733 | #else | 714 | #else |
| 734 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 715 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
| 735 | #endif | 716 | #endif |
| 736 | 717 | ||
| 737 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 718 | /* |
| 738 | vmi_init(); | 719 | * If we have OLPC OFW, we might end up relocating the fixmap due to |
| 739 | 720 | * reserve_top(), so do this before touching the ioremap area. | |
| 740 | /* OFW also may relocate the fixmap */ | 721 | */ |
| 741 | olpc_ofw_detect(); | 722 | olpc_ofw_detect(); |
| 742 | 723 | ||
| 743 | early_trap_init(); | 724 | early_trap_init(); |
| @@ -782,7 +763,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 782 | #endif | 763 | #endif |
| 783 | 4)) { | 764 | 4)) { |
| 784 | efi_enabled = 1; | 765 | efi_enabled = 1; |
| 785 | efi_reserve_early(); | 766 | efi_memblock_x86_reserve_range(); |
| 786 | } | 767 | } |
| 787 | #endif | 768 | #endif |
| 788 | 769 | ||
| @@ -838,11 +819,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 838 | 819 | ||
| 839 | x86_report_nx(); | 820 | x86_report_nx(); |
| 840 | 821 | ||
| 841 | /* Must be before kernel pagetables are setup */ | ||
| 842 | vmi_activate(); | ||
| 843 | |||
| 844 | /* after early param, so could get panic from serial */ | 822 | /* after early param, so could get panic from serial */ |
| 845 | reserve_early_setup_data(); | 823 | memblock_x86_reserve_range_setup_data(); |
| 846 | 824 | ||
| 847 | if (acpi_mps_check()) { | 825 | if (acpi_mps_check()) { |
| 848 | #ifdef CONFIG_X86_LOCAL_APIC | 826 | #ifdef CONFIG_X86_LOCAL_APIC |
| @@ -863,8 +841,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 863 | 841 | ||
| 864 | dmi_scan_machine(); | 842 | dmi_scan_machine(); |
| 865 | 843 | ||
| 866 | dmi_check_system(bad_bios_dmi_table); | ||
| 867 | |||
| 868 | /* | 844 | /* |
| 869 | * VMware detection requires dmi to be available, so this | 845 | * VMware detection requires dmi to be available, so this |
| 870 | * needs to be done after dmi_scan_machine, for the BP. | 846 | * needs to be done after dmi_scan_machine, for the BP. |
| @@ -897,8 +873,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 897 | */ | 873 | */ |
| 898 | max_pfn = e820_end_of_ram_pfn(); | 874 | max_pfn = e820_end_of_ram_pfn(); |
| 899 | 875 | ||
| 900 | /* preallocate 4k for mptable mpc */ | ||
| 901 | early_reserve_e820_mpc_new(); | ||
| 902 | /* update e820 for memory not covered by WB MTRRs */ | 876 | /* update e820 for memory not covered by WB MTRRs */ |
| 903 | mtrr_bp_init(); | 877 | mtrr_bp_init(); |
| 904 | if (mtrr_trim_uncached_memory(max_pfn)) | 878 | if (mtrr_trim_uncached_memory(max_pfn)) |
| @@ -920,18 +894,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 920 | max_low_pfn = max_pfn; | 894 | max_low_pfn = max_pfn; |
| 921 | 895 | ||
| 922 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | 896 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; |
| 923 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
| 924 | #endif | 897 | #endif |
| 925 | 898 | ||
| 926 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
| 927 | setup_bios_corruption_check(); | ||
| 928 | #endif | ||
| 929 | |||
| 930 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | ||
| 931 | max_pfn_mapped<<PAGE_SHIFT); | ||
| 932 | |||
| 933 | reserve_brk(); | ||
| 934 | |||
| 935 | /* | 899 | /* |
| 936 | * Find and reserve possible boot-time SMP configuration: | 900 | * Find and reserve possible boot-time SMP configuration: |
| 937 | */ | 901 | */ |
| @@ -939,6 +903,26 @@ void __init setup_arch(char **cmdline_p) | |||
| 939 | 903 | ||
| 940 | reserve_ibft_region(); | 904 | reserve_ibft_region(); |
| 941 | 905 | ||
| 906 | /* | ||
| 907 | * Need to conclude brk, before memblock_x86_fill() | ||
| 908 | * it could use memblock_find_in_range, could overlap with | ||
| 909 | * brk area. | ||
| 910 | */ | ||
| 911 | reserve_brk(); | ||
| 912 | |||
| 913 | memblock.current_limit = get_max_mapped(); | ||
| 914 | memblock_x86_fill(); | ||
| 915 | |||
| 916 | /* preallocate 4k for mptable mpc */ | ||
| 917 | early_reserve_e820_mpc_new(); | ||
| 918 | |||
| 919 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
| 920 | setup_bios_corruption_check(); | ||
| 921 | #endif | ||
| 922 | |||
| 923 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | ||
| 924 | max_pfn_mapped<<PAGE_SHIFT); | ||
| 925 | |||
| 942 | reserve_trampoline_memory(); | 926 | reserve_trampoline_memory(); |
| 943 | 927 | ||
| 944 | #ifdef CONFIG_ACPI_SLEEP | 928 | #ifdef CONFIG_ACPI_SLEEP |
| @@ -962,6 +946,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 962 | max_low_pfn = max_pfn; | 946 | max_low_pfn = max_pfn; |
| 963 | } | 947 | } |
| 964 | #endif | 948 | #endif |
| 949 | memblock.current_limit = get_max_mapped(); | ||
| 965 | 950 | ||
| 966 | /* | 951 | /* |
| 967 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | 952 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. |
| @@ -1000,10 +985,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 1000 | #endif | 985 | #endif |
| 1001 | 986 | ||
| 1002 | initmem_init(0, max_pfn, acpi, k8); | 987 | initmem_init(0, max_pfn, acpi, k8); |
| 1003 | #ifndef CONFIG_NO_BOOTMEM | 988 | memblock_find_dma_reserve(); |
| 1004 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
| 1005 | #endif | ||
| 1006 | |||
| 1007 | dma32_reserve_bootmem(); | 989 | dma32_reserve_bootmem(); |
| 1008 | 990 | ||
| 1009 | #ifdef CONFIG_KVM_CLOCK | 991 | #ifdef CONFIG_KVM_CLOCK |
| @@ -1014,7 +996,12 @@ void __init setup_arch(char **cmdline_p) | |||
| 1014 | paging_init(); | 996 | paging_init(); |
| 1015 | x86_init.paging.pagetable_setup_done(swapper_pg_dir); | 997 | x86_init.paging.pagetable_setup_done(swapper_pg_dir); |
| 1016 | 998 | ||
| 1017 | setup_trampoline_page_table(); | 999 | #ifdef CONFIG_X86_32 |
| 1000 | /* sync back kernel address range */ | ||
| 1001 | clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, | ||
| 1002 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 1003 | KERNEL_PGD_PTRS); | ||
| 1004 | #endif | ||
| 1018 | 1005 | ||
| 1019 | tboot_probe(); | 1006 | tboot_probe(); |
| 1020 | 1007 | ||
| @@ -1071,6 +1058,10 @@ void __init setup_arch(char **cmdline_p) | |||
| 1071 | x86_init.oem.banner(); | 1058 | x86_init.oem.banner(); |
| 1072 | 1059 | ||
| 1073 | mcheck_init(); | 1060 | mcheck_init(); |
| 1061 | |||
| 1062 | local_irq_save(flags); | ||
| 1063 | arch_init_ideal_nop5(); | ||
| 1064 | local_irq_restore(flags); | ||
| 1074 | } | 1065 | } |
| 1075 | 1066 | ||
| 1076 | #ifdef CONFIG_X86_32 | 1067 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index a60df9ae6454..002b79685f73 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -131,13 +131,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
| 131 | 131 | ||
| 132 | static void __init pcpu_fc_free(void *ptr, size_t size) | 132 | static void __init pcpu_fc_free(void *ptr, size_t size) |
| 133 | { | 133 | { |
| 134 | #ifdef CONFIG_NO_BOOTMEM | ||
| 135 | u64 start = __pa(ptr); | ||
| 136 | u64 end = start + size; | ||
| 137 | free_early_partial(start, end); | ||
| 138 | #else | ||
| 139 | free_bootmem(__pa(ptr), size); | 134 | free_bootmem(__pa(ptr), size); |
| 140 | #endif | ||
| 141 | } | 135 | } |
| 142 | 136 | ||
| 143 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) | 137 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
| @@ -253,7 +247,7 @@ void __init setup_per_cpu_areas(void) | |||
| 253 | * Up to this point, the boot CPU has been using .init.data | 247 | * Up to this point, the boot CPU has been using .init.data |
| 254 | * area. Reload any changed state for the boot CPU. | 248 | * area. Reload any changed state for the boot CPU. |
| 255 | */ | 249 | */ |
| 256 | if (cpu == boot_cpu_id) | 250 | if (!cpu) |
| 257 | switch_to_new_gdt(cpu); | 251 | switch_to_new_gdt(cpu); |
| 258 | } | 252 | } |
| 259 | 253 | ||
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index cb22acf3ed09..dd4c281ffe57 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | #ifdef CONFIG_X86_LOCAL_APIC | 34 | #ifdef CONFIG_X86_LOCAL_APIC |
| 35 | static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | 35 | static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; |
| 36 | 36 | ||
| 37 | void __init mp_sfi_register_lapic_address(unsigned long address) | 37 | static void __init mp_sfi_register_lapic_address(unsigned long address) |
| 38 | { | 38 | { |
| 39 | mp_lapic_addr = address; | 39 | mp_lapic_addr = address; |
| 40 | 40 | ||
| @@ -46,7 +46,7 @@ void __init mp_sfi_register_lapic_address(unsigned long address) | |||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | /* All CPUs enumerated by SFI must be present and enabled */ | 48 | /* All CPUs enumerated by SFI must be present and enabled */ |
| 49 | void __cpuinit mp_sfi_register_lapic(u8 id) | 49 | static void __cpuinit mp_sfi_register_lapic(u8 id) |
| 50 | { | 50 | { |
| 51 | if (MAX_APICS - id <= 0) { | 51 | if (MAX_APICS - id <= 0) { |
| 52 | pr_warning("Processor #%d invalid (max %d)\n", | 52 | pr_warning("Processor #%d invalid (max %d)\n", |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b3bfc4dd708..6af118511b4a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -62,7 +62,7 @@ | |||
| 62 | #include <asm/pgtable.h> | 62 | #include <asm/pgtable.h> |
| 63 | #include <asm/tlbflush.h> | 63 | #include <asm/tlbflush.h> |
| 64 | #include <asm/mtrr.h> | 64 | #include <asm/mtrr.h> |
| 65 | #include <asm/vmi.h> | 65 | #include <asm/mwait.h> |
| 66 | #include <asm/apic.h> | 66 | #include <asm/apic.h> |
| 67 | #include <asm/setup.h> | 67 | #include <asm/setup.h> |
| 68 | #include <asm/uv/uv.h> | 68 | #include <asm/uv/uv.h> |
| @@ -299,23 +299,16 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 299 | * fragile that we want to limit the things done here to the | 299 | * fragile that we want to limit the things done here to the |
| 300 | * most necessary things. | 300 | * most necessary things. |
| 301 | */ | 301 | */ |
| 302 | cpu_init(); | ||
| 303 | preempt_disable(); | ||
| 304 | smp_callin(); | ||
| 302 | 305 | ||
| 303 | #ifdef CONFIG_X86_32 | 306 | #ifdef CONFIG_X86_32 |
| 304 | /* | 307 | /* switch away from the initial page table */ |
| 305 | * Switch away from the trampoline page-table | ||
| 306 | * | ||
| 307 | * Do this before cpu_init() because it needs to access per-cpu | ||
| 308 | * data which may not be mapped in the trampoline page-table. | ||
| 309 | */ | ||
| 310 | load_cr3(swapper_pg_dir); | 308 | load_cr3(swapper_pg_dir); |
| 311 | __flush_tlb_all(); | 309 | __flush_tlb_all(); |
| 312 | #endif | 310 | #endif |
| 313 | 311 | ||
| 314 | vmi_bringup(); | ||
| 315 | cpu_init(); | ||
| 316 | preempt_disable(); | ||
| 317 | smp_callin(); | ||
| 318 | |||
| 319 | /* otherwise gcc will move up smp_processor_id before the cpu_init */ | 312 | /* otherwise gcc will move up smp_processor_id before the cpu_init */ |
| 320 | barrier(); | 313 | barrier(); |
| 321 | /* | 314 | /* |
| @@ -324,9 +317,9 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 324 | check_tsc_sync_target(); | 317 | check_tsc_sync_target(); |
| 325 | 318 | ||
| 326 | if (nmi_watchdog == NMI_IO_APIC) { | 319 | if (nmi_watchdog == NMI_IO_APIC) { |
| 327 | legacy_pic->chip->mask(0); | 320 | legacy_pic->mask(0); |
| 328 | enable_NMI_through_LVT0(); | 321 | enable_NMI_through_LVT0(); |
| 329 | legacy_pic->chip->unmask(0); | 322 | legacy_pic->unmask(0); |
| 330 | } | 323 | } |
| 331 | 324 | ||
| 332 | /* This must be done before setting cpu_online_mask */ | 325 | /* This must be done before setting cpu_online_mask */ |
| @@ -397,6 +390,19 @@ void __cpuinit smp_store_cpu_info(int id) | |||
| 397 | identify_secondary_cpu(c); | 390 | identify_secondary_cpu(c); |
| 398 | } | 391 | } |
| 399 | 392 | ||
| 393 | static void __cpuinit link_thread_siblings(int cpu1, int cpu2) | ||
| 394 | { | ||
| 395 | struct cpuinfo_x86 *c1 = &cpu_data(cpu1); | ||
| 396 | struct cpuinfo_x86 *c2 = &cpu_data(cpu2); | ||
| 397 | |||
| 398 | cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); | ||
| 399 | cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); | ||
| 400 | cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); | ||
| 401 | cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); | ||
| 402 | cpumask_set_cpu(cpu1, c2->llc_shared_map); | ||
| 403 | cpumask_set_cpu(cpu2, c1->llc_shared_map); | ||
| 404 | } | ||
| 405 | |||
| 400 | 406 | ||
| 401 | void __cpuinit set_cpu_sibling_map(int cpu) | 407 | void __cpuinit set_cpu_sibling_map(int cpu) |
| 402 | { | 408 | { |
| @@ -409,14 +415,13 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 409 | for_each_cpu(i, cpu_sibling_setup_mask) { | 415 | for_each_cpu(i, cpu_sibling_setup_mask) { |
| 410 | struct cpuinfo_x86 *o = &cpu_data(i); | 416 | struct cpuinfo_x86 *o = &cpu_data(i); |
| 411 | 417 | ||
| 412 | if (c->phys_proc_id == o->phys_proc_id && | 418 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { |
| 413 | c->cpu_core_id == o->cpu_core_id) { | 419 | if (c->phys_proc_id == o->phys_proc_id && |
| 414 | cpumask_set_cpu(i, cpu_sibling_mask(cpu)); | 420 | c->compute_unit_id == o->compute_unit_id) |
| 415 | cpumask_set_cpu(cpu, cpu_sibling_mask(i)); | 421 | link_thread_siblings(cpu, i); |
| 416 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 422 | } else if (c->phys_proc_id == o->phys_proc_id && |
| 417 | cpumask_set_cpu(cpu, cpu_core_mask(i)); | 423 | c->cpu_core_id == o->cpu_core_id) { |
| 418 | cpumask_set_cpu(i, c->llc_shared_map); | 424 | link_thread_siblings(cpu, i); |
| 419 | cpumask_set_cpu(cpu, o->llc_shared_map); | ||
| 420 | } | 425 | } |
| 421 | } | 426 | } |
| 422 | } else { | 427 | } else { |
| @@ -774,7 +779,6 @@ do_rest: | |||
| 774 | #ifdef CONFIG_X86_32 | 779 | #ifdef CONFIG_X86_32 |
| 775 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 780 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
| 776 | irq_ctx_init(cpu); | 781 | irq_ctx_init(cpu); |
| 777 | initial_page_table = __pa(&trampoline_pg_dir); | ||
| 778 | #else | 782 | #else |
| 779 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 783 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
| 780 | initial_gs = per_cpu_offset(cpu); | 784 | initial_gs = per_cpu_offset(cpu); |
| @@ -923,7 +927,6 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
| 923 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 927 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
| 924 | 928 | ||
| 925 | err = do_boot_cpu(apicid, cpu); | 929 | err = do_boot_cpu(apicid, cpu); |
| 926 | |||
| 927 | if (err) { | 930 | if (err) { |
| 928 | pr_debug("do_boot_cpu failed %d\n", err); | 931 | pr_debug("do_boot_cpu failed %d\n", err); |
| 929 | return -EIO; | 932 | return -EIO; |
| @@ -1109,8 +1112,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1109 | } | 1112 | } |
| 1110 | set_cpu_sibling_map(0); | 1113 | set_cpu_sibling_map(0); |
| 1111 | 1114 | ||
| 1112 | enable_IR_x2apic(); | ||
| 1113 | default_setup_apic_routing(); | ||
| 1114 | 1115 | ||
| 1115 | if (smp_sanity_check(max_cpus) < 0) { | 1116 | if (smp_sanity_check(max_cpus) < 0) { |
| 1116 | printk(KERN_INFO "SMP disabled\n"); | 1117 | printk(KERN_INFO "SMP disabled\n"); |
| @@ -1118,6 +1119,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1118 | goto out; | 1119 | goto out; |
| 1119 | } | 1120 | } |
| 1120 | 1121 | ||
| 1122 | default_setup_apic_routing(); | ||
| 1123 | |||
| 1121 | preempt_disable(); | 1124 | preempt_disable(); |
| 1122 | if (read_apic_id() != boot_cpu_physical_apicid) { | 1125 | if (read_apic_id() != boot_cpu_physical_apicid) { |
| 1123 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", | 1126 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", |
| @@ -1383,11 +1386,88 @@ void play_dead_common(void) | |||
| 1383 | local_irq_disable(); | 1386 | local_irq_disable(); |
| 1384 | } | 1387 | } |
| 1385 | 1388 | ||
| 1389 | /* | ||
| 1390 | * We need to flush the caches before going to sleep, lest we have | ||
| 1391 | * dirty data in our caches when we come back up. | ||
| 1392 | */ | ||
| 1393 | static inline void mwait_play_dead(void) | ||
| 1394 | { | ||
| 1395 | unsigned int eax, ebx, ecx, edx; | ||
| 1396 | unsigned int highest_cstate = 0; | ||
| 1397 | unsigned int highest_subcstate = 0; | ||
| 1398 | int i; | ||
| 1399 | void *mwait_ptr; | ||
| 1400 | |||
| 1401 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) | ||
| 1402 | return; | ||
| 1403 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_CLFLSH)) | ||
| 1404 | return; | ||
| 1405 | if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) | ||
| 1406 | return; | ||
| 1407 | |||
| 1408 | eax = CPUID_MWAIT_LEAF; | ||
| 1409 | ecx = 0; | ||
| 1410 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
| 1411 | |||
| 1412 | /* | ||
| 1413 | * eax will be 0 if EDX enumeration is not valid. | ||
| 1414 | * Initialized below to cstate, sub_cstate value when EDX is valid. | ||
| 1415 | */ | ||
| 1416 | if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { | ||
| 1417 | eax = 0; | ||
| 1418 | } else { | ||
| 1419 | edx >>= MWAIT_SUBSTATE_SIZE; | ||
| 1420 | for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { | ||
| 1421 | if (edx & MWAIT_SUBSTATE_MASK) { | ||
| 1422 | highest_cstate = i; | ||
| 1423 | highest_subcstate = edx & MWAIT_SUBSTATE_MASK; | ||
| 1424 | } | ||
| 1425 | } | ||
| 1426 | eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | | ||
| 1427 | (highest_subcstate - 1); | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | /* | ||
| 1431 | * This should be a memory location in a cache line which is | ||
| 1432 | * unlikely to be touched by other processors. The actual | ||
| 1433 | * content is immaterial as it is not actually modified in any way. | ||
| 1434 | */ | ||
| 1435 | mwait_ptr = ¤t_thread_info()->flags; | ||
| 1436 | |||
| 1437 | wbinvd(); | ||
| 1438 | |||
| 1439 | while (1) { | ||
| 1440 | /* | ||
| 1441 | * The CLFLUSH is a workaround for erratum AAI65 for | ||
| 1442 | * the Xeon 7400 series. It's not clear it is actually | ||
| 1443 | * needed, but it should be harmless in either case. | ||
| 1444 | * The WBINVD is insufficient due to the spurious-wakeup | ||
| 1445 | * case where we return around the loop. | ||
| 1446 | */ | ||
| 1447 | clflush(mwait_ptr); | ||
| 1448 | __monitor(mwait_ptr, 0, 0); | ||
| 1449 | mb(); | ||
| 1450 | __mwait(eax, 0); | ||
| 1451 | } | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | static inline void hlt_play_dead(void) | ||
| 1455 | { | ||
| 1456 | if (current_cpu_data.x86 >= 4) | ||
| 1457 | wbinvd(); | ||
| 1458 | |||
| 1459 | while (1) { | ||
| 1460 | native_halt(); | ||
| 1461 | } | ||
| 1462 | } | ||
| 1463 | |||
| 1386 | void native_play_dead(void) | 1464 | void native_play_dead(void) |
| 1387 | { | 1465 | { |
| 1388 | play_dead_common(); | 1466 | play_dead_common(); |
| 1389 | tboot_shutdown(TB_SHUTDOWN_WFS); | 1467 | tboot_shutdown(TB_SHUTDOWN_WFS); |
| 1390 | wbinvd_halt(); | 1468 | |
| 1469 | mwait_play_dead(); /* Only returns on failure */ | ||
| 1470 | hlt_play_dead(); | ||
| 1391 | } | 1471 | } |
| 1392 | 1472 | ||
| 1393 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1473 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index d5e06624e34a..0b0cb5fede19 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
| @@ -33,8 +33,8 @@ int kernel_execve(const char *filename, | |||
| 33 | const char *const envp[]) | 33 | const char *const envp[]) |
| 34 | { | 34 | { |
| 35 | long __res; | 35 | long __res; |
| 36 | asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" | 36 | asm volatile ("int $0x80" |
| 37 | : "=a" (__res) | 37 | : "=a" (__res) |
| 38 | : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); | 38 | : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory"); |
| 39 | return __res; | 39 | return __res; |
| 40 | } | 40 | } |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 312ef0292815..20ea20a39e2a 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
| @@ -1001,10 +1001,10 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
| 1001 | static ssize_t tunables_read(struct file *file, char __user *userbuf, | 1001 | static ssize_t tunables_read(struct file *file, char __user *userbuf, |
| 1002 | size_t count, loff_t *ppos) | 1002 | size_t count, loff_t *ppos) |
| 1003 | { | 1003 | { |
| 1004 | char buf[300]; | 1004 | char *buf; |
| 1005 | int ret; | 1005 | int ret; |
| 1006 | 1006 | ||
| 1007 | ret = snprintf(buf, 300, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | 1007 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", |
| 1008 | "max_bau_concurrent plugged_delay plugsb4reset", | 1008 | "max_bau_concurrent plugged_delay plugsb4reset", |
| 1009 | "timeoutsb4reset ipi_reset_limit complete_threshold", | 1009 | "timeoutsb4reset ipi_reset_limit complete_threshold", |
| 1010 | "congested_response_us congested_reps congested_period", | 1010 | "congested_response_us congested_reps congested_period", |
| @@ -1012,7 +1012,12 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, | |||
| 1012 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | 1012 | timeoutsb4reset, ipi_reset_limit, complete_threshold, |
| 1013 | congested_response_us, congested_reps, congested_period); | 1013 | congested_response_us, congested_reps, congested_period); |
| 1014 | 1014 | ||
| 1015 | return simple_read_from_buffer(userbuf, count, ppos, buf, ret); | 1015 | if (!buf) |
| 1016 | return -ENOMEM; | ||
| 1017 | |||
| 1018 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); | ||
| 1019 | kfree(buf); | ||
| 1020 | return ret; | ||
| 1016 | } | 1021 | } |
| 1017 | 1022 | ||
| 1018 | /* | 1023 | /* |
| @@ -1285,6 +1290,7 @@ static const struct file_operations tunables_fops = { | |||
| 1285 | .open = tunables_open, | 1290 | .open = tunables_open, |
| 1286 | .read = tunables_read, | 1291 | .read = tunables_read, |
| 1287 | .write = tunables_write, | 1292 | .write = tunables_write, |
| 1293 | .llseek = default_llseek, | ||
| 1288 | }; | 1294 | }; |
| 1289 | 1295 | ||
| 1290 | static int __init uv_ptc_init(void) | 1296 | static int __init uv_ptc_init(void) |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index e2a595257390..a375616d77f7 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
| 2 | #include <linux/memblock.h> | ||
| 2 | 3 | ||
| 3 | #include <asm/trampoline.h> | 4 | #include <asm/trampoline.h> |
| 4 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
| 5 | #include <asm/e820.h> | ||
| 6 | 6 | ||
| 7 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | 7 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) |
| 8 | #define __trampinit | 8 | #define __trampinit |
| @@ -17,15 +17,15 @@ unsigned char *__trampinitdata trampoline_base; | |||
| 17 | 17 | ||
| 18 | void __init reserve_trampoline_memory(void) | 18 | void __init reserve_trampoline_memory(void) |
| 19 | { | 19 | { |
| 20 | unsigned long mem; | 20 | phys_addr_t mem; |
| 21 | 21 | ||
| 22 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 22 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
| 23 | mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); | 23 | mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); |
| 24 | if (mem == -1L) | 24 | if (mem == MEMBLOCK_ERROR) |
| 25 | panic("Cannot allocate trampoline\n"); | 25 | panic("Cannot allocate trampoline\n"); |
| 26 | 26 | ||
| 27 | trampoline_base = __va(mem); | 27 | trampoline_base = __va(mem); |
| 28 | reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); | 28 | memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | /* | 31 | /* |
| @@ -38,19 +38,3 @@ unsigned long __trampinit setup_trampoline(void) | |||
| 38 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); | 38 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
| 39 | return virt_to_phys(trampoline_base); | 39 | return virt_to_phys(trampoline_base); |
| 40 | } | 40 | } |
| 41 | |||
| 42 | void __init setup_trampoline_page_table(void) | ||
| 43 | { | ||
| 44 | #ifdef CONFIG_X86_32 | ||
| 45 | /* Copy kernel address range */ | ||
| 46 | clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 47 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 48 | KERNEL_PGD_PTRS); | ||
| 49 | |||
| 50 | /* Initialize low mappings */ | ||
| 51 | clone_pgd_range(trampoline_pg_dir, | ||
| 52 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | ||
| 53 | min_t(unsigned long, KERNEL_PGD_PTRS, | ||
| 54 | KERNEL_PGD_BOUNDARY)); | ||
| 55 | #endif | ||
| 56 | } | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 60788dee0f8a..cb838ca42c96 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -575,6 +575,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 575 | if (regs->flags & X86_VM_MASK) { | 575 | if (regs->flags & X86_VM_MASK) { |
| 576 | handle_vm86_trap((struct kernel_vm86_regs *) regs, | 576 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
| 577 | error_code, 1); | 577 | error_code, 1); |
| 578 | preempt_conditional_cli(regs); | ||
| 578 | return; | 579 | return; |
| 579 | } | 580 | } |
| 580 | 581 | ||
| @@ -776,21 +777,10 @@ asmlinkage void math_state_restore(void) | |||
| 776 | } | 777 | } |
| 777 | EXPORT_SYMBOL_GPL(math_state_restore); | 778 | EXPORT_SYMBOL_GPL(math_state_restore); |
| 778 | 779 | ||
| 779 | #ifndef CONFIG_MATH_EMULATION | ||
| 780 | void math_emulate(struct math_emu_info *info) | ||
| 781 | { | ||
| 782 | printk(KERN_EMERG | ||
| 783 | "math-emulation not enabled and no coprocessor found.\n"); | ||
| 784 | printk(KERN_EMERG "killing %s.\n", current->comm); | ||
| 785 | force_sig(SIGFPE, current); | ||
| 786 | schedule(); | ||
| 787 | } | ||
| 788 | #endif /* CONFIG_MATH_EMULATION */ | ||
| 789 | |||
| 790 | dotraplinkage void __kprobes | 780 | dotraplinkage void __kprobes |
| 791 | do_device_not_available(struct pt_regs *regs, long error_code) | 781 | do_device_not_available(struct pt_regs *regs, long error_code) |
| 792 | { | 782 | { |
| 793 | #ifdef CONFIG_X86_32 | 783 | #ifdef CONFIG_MATH_EMULATION |
| 794 | if (read_cr0() & X86_CR0_EM) { | 784 | if (read_cr0() & X86_CR0_EM) { |
| 795 | struct math_emu_info info = { }; | 785 | struct math_emu_info info = { }; |
| 796 | 786 | ||
| @@ -798,12 +788,12 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
| 798 | 788 | ||
| 799 | info.regs = regs; | 789 | info.regs = regs; |
| 800 | math_emulate(&info); | 790 | math_emulate(&info); |
| 801 | } else { | 791 | return; |
| 802 | math_state_restore(); /* interrupts still off */ | ||
| 803 | conditional_sti(regs); | ||
| 804 | } | 792 | } |
| 805 | #else | 793 | #endif |
| 806 | math_state_restore(); | 794 | math_state_restore(); /* interrupts still off */ |
| 795 | #ifdef CONFIG_X86_32 | ||
| 796 | conditional_sti(regs); | ||
| 807 | #endif | 797 | #endif |
| 808 | } | 798 | } |
| 809 | 799 | ||
| @@ -881,18 +871,6 @@ void __init trap_init(void) | |||
| 881 | #endif | 871 | #endif |
| 882 | 872 | ||
| 883 | #ifdef CONFIG_X86_32 | 873 | #ifdef CONFIG_X86_32 |
| 884 | if (cpu_has_fxsr) { | ||
| 885 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | ||
| 886 | set_in_cr4(X86_CR4_OSFXSR); | ||
| 887 | printk("done.\n"); | ||
| 888 | } | ||
| 889 | if (cpu_has_xmm) { | ||
| 890 | printk(KERN_INFO | ||
| 891 | "Enabling unmasked SIMD FPU exception support... "); | ||
| 892 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
| 893 | printk("done.\n"); | ||
| 894 | } | ||
| 895 | |||
| 896 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 874 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
| 897 | set_bit(SYSCALL_VECTOR, used_vectors); | 875 | set_bit(SYSCALL_VECTOR, used_vectors); |
| 898 | #endif | 876 | #endif |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 26a863a9c2a8..0c40d8b72416 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
| @@ -104,10 +104,14 @@ int __init notsc_setup(char *str) | |||
| 104 | 104 | ||
| 105 | __setup("notsc", notsc_setup); | 105 | __setup("notsc", notsc_setup); |
| 106 | 106 | ||
| 107 | static int no_sched_irq_time; | ||
| 108 | |||
| 107 | static int __init tsc_setup(char *str) | 109 | static int __init tsc_setup(char *str) |
| 108 | { | 110 | { |
| 109 | if (!strcmp(str, "reliable")) | 111 | if (!strcmp(str, "reliable")) |
| 110 | tsc_clocksource_reliable = 1; | 112 | tsc_clocksource_reliable = 1; |
| 113 | if (!strncmp(str, "noirqtime", 9)) | ||
| 114 | no_sched_irq_time = 1; | ||
| 111 | return 1; | 115 | return 1; |
| 112 | } | 116 | } |
| 113 | 117 | ||
| @@ -801,6 +805,7 @@ void mark_tsc_unstable(char *reason) | |||
| 801 | if (!tsc_unstable) { | 805 | if (!tsc_unstable) { |
| 802 | tsc_unstable = 1; | 806 | tsc_unstable = 1; |
| 803 | sched_clock_stable = 0; | 807 | sched_clock_stable = 0; |
| 808 | disable_sched_clock_irqtime(); | ||
| 804 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); | 809 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); |
| 805 | /* Change only the rating, when not registered */ | 810 | /* Change only the rating, when not registered */ |
| 806 | if (clocksource_tsc.mult) | 811 | if (clocksource_tsc.mult) |
| @@ -892,60 +897,6 @@ static void __init init_tsc_clocksource(void) | |||
| 892 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 897 | clocksource_register_khz(&clocksource_tsc, tsc_khz); |
| 893 | } | 898 | } |
| 894 | 899 | ||
| 895 | #ifdef CONFIG_X86_64 | ||
| 896 | /* | ||
| 897 | * calibrate_cpu is used on systems with fixed rate TSCs to determine | ||
| 898 | * processor frequency | ||
| 899 | */ | ||
| 900 | #define TICK_COUNT 100000000 | ||
| 901 | static unsigned long __init calibrate_cpu(void) | ||
| 902 | { | ||
| 903 | int tsc_start, tsc_now; | ||
| 904 | int i, no_ctr_free; | ||
| 905 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
| 906 | unsigned long flags; | ||
| 907 | |||
| 908 | for (i = 0; i < 4; i++) | ||
| 909 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
| 910 | break; | ||
| 911 | no_ctr_free = (i == 4); | ||
| 912 | if (no_ctr_free) { | ||
| 913 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
| 914 | "cpu_khz value may be incorrect.\n"); | ||
| 915 | i = 3; | ||
| 916 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
| 917 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
| 918 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
| 919 | } else { | ||
| 920 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
| 921 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
| 922 | } | ||
| 923 | local_irq_save(flags); | ||
| 924 | /* start measuring cycles, incrementing from 0 */ | ||
| 925 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
| 926 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
| 927 | rdtscl(tsc_start); | ||
| 928 | do { | ||
| 929 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
| 930 | tsc_now = get_cycles(); | ||
| 931 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
| 932 | |||
| 933 | local_irq_restore(flags); | ||
| 934 | if (no_ctr_free) { | ||
| 935 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
| 936 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
| 937 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
| 938 | } else { | ||
| 939 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
| 940 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
| 941 | } | ||
| 942 | |||
| 943 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
| 944 | } | ||
| 945 | #else | ||
| 946 | static inline unsigned long calibrate_cpu(void) { return cpu_khz; } | ||
| 947 | #endif | ||
| 948 | |||
| 949 | void __init tsc_init(void) | 900 | void __init tsc_init(void) |
| 950 | { | 901 | { |
| 951 | u64 lpj; | 902 | u64 lpj; |
| @@ -964,10 +915,6 @@ void __init tsc_init(void) | |||
| 964 | return; | 915 | return; |
| 965 | } | 916 | } |
| 966 | 917 | ||
| 967 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | ||
| 968 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | ||
| 969 | cpu_khz = calibrate_cpu(); | ||
| 970 | |||
| 971 | printk("Detected %lu.%03lu MHz processor.\n", | 918 | printk("Detected %lu.%03lu MHz processor.\n", |
| 972 | (unsigned long)cpu_khz / 1000, | 919 | (unsigned long)cpu_khz / 1000, |
| 973 | (unsigned long)cpu_khz % 1000); | 920 | (unsigned long)cpu_khz % 1000); |
| @@ -987,6 +934,9 @@ void __init tsc_init(void) | |||
| 987 | /* now allow native_sched_clock() to use rdtsc */ | 934 | /* now allow native_sched_clock() to use rdtsc */ |
| 988 | tsc_disabled = 0; | 935 | tsc_disabled = 0; |
| 989 | 936 | ||
| 937 | if (!no_sched_irq_time) | ||
| 938 | enable_sched_clock_irqtime(); | ||
| 939 | |||
| 990 | lpj = ((u64)tsc_khz * 1000); | 940 | lpj = ((u64)tsc_khz * 1000); |
| 991 | do_div(lpj, HZ); | 941 | do_div(lpj, HZ); |
| 992 | lpj_fine = lpj; | 942 | lpj_fine = lpj; |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 1132129db792..7b24460917d5 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
| @@ -28,34 +28,21 @@ struct uv_irq_2_mmr_pnode{ | |||
| 28 | static spinlock_t uv_irq_lock; | 28 | static spinlock_t uv_irq_lock; |
| 29 | static struct rb_root uv_irq_root; | 29 | static struct rb_root uv_irq_root; |
| 30 | 30 | ||
| 31 | static int uv_set_irq_affinity(unsigned int, const struct cpumask *); | 31 | static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); |
| 32 | 32 | ||
| 33 | static void uv_noop(unsigned int irq) | 33 | static void uv_noop(struct irq_data *data) { } |
| 34 | { | ||
| 35 | } | ||
| 36 | |||
| 37 | static unsigned int uv_noop_ret(unsigned int irq) | ||
| 38 | { | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | 34 | ||
| 42 | static void uv_ack_apic(unsigned int irq) | 35 | static void uv_ack_apic(struct irq_data *data) |
| 43 | { | 36 | { |
| 44 | ack_APIC_irq(); | 37 | ack_APIC_irq(); |
| 45 | } | 38 | } |
| 46 | 39 | ||
| 47 | static struct irq_chip uv_irq_chip = { | 40 | static struct irq_chip uv_irq_chip = { |
| 48 | .name = "UV-CORE", | 41 | .name = "UV-CORE", |
| 49 | .startup = uv_noop_ret, | 42 | .irq_mask = uv_noop, |
| 50 | .shutdown = uv_noop, | 43 | .irq_unmask = uv_noop, |
| 51 | .enable = uv_noop, | 44 | .irq_eoi = uv_ack_apic, |
| 52 | .disable = uv_noop, | 45 | .irq_set_affinity = uv_set_irq_affinity, |
| 53 | .ack = uv_noop, | ||
| 54 | .mask = uv_noop, | ||
| 55 | .unmask = uv_noop, | ||
| 56 | .eoi = uv_ack_apic, | ||
| 57 | .end = uv_noop, | ||
| 58 | .set_affinity = uv_set_irq_affinity, | ||
| 59 | }; | 46 | }; |
| 60 | 47 | ||
| 61 | /* | 48 | /* |
| @@ -144,26 +131,22 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
| 144 | unsigned long mmr_offset, int limit) | 131 | unsigned long mmr_offset, int limit) |
| 145 | { | 132 | { |
| 146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | 133 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
| 147 | struct irq_desc *desc = irq_to_desc(irq); | 134 | struct irq_cfg *cfg = get_irq_chip_data(irq); |
| 148 | struct irq_cfg *cfg; | ||
| 149 | int mmr_pnode; | ||
| 150 | unsigned long mmr_value; | 135 | unsigned long mmr_value; |
| 151 | struct uv_IO_APIC_route_entry *entry; | 136 | struct uv_IO_APIC_route_entry *entry; |
| 152 | int err; | 137 | int mmr_pnode, err; |
| 153 | 138 | ||
| 154 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | 139 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != |
| 155 | sizeof(unsigned long)); | 140 | sizeof(unsigned long)); |
| 156 | 141 | ||
| 157 | cfg = irq_cfg(irq); | ||
| 158 | |||
| 159 | err = assign_irq_vector(irq, cfg, eligible_cpu); | 142 | err = assign_irq_vector(irq, cfg, eligible_cpu); |
| 160 | if (err != 0) | 143 | if (err != 0) |
| 161 | return err; | 144 | return err; |
| 162 | 145 | ||
| 163 | if (limit == UV_AFFINITY_CPU) | 146 | if (limit == UV_AFFINITY_CPU) |
| 164 | desc->status |= IRQ_NO_BALANCING; | 147 | irq_set_status_flags(irq, IRQ_NO_BALANCING); |
| 165 | else | 148 | else |
| 166 | desc->status |= IRQ_MOVE_PCNTXT; | 149 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 167 | 150 | ||
| 168 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | 151 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, |
| 169 | irq_name); | 152 | irq_name); |
| @@ -206,17 +189,17 @@ static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) | |||
| 206 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 189 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
| 207 | } | 190 | } |
| 208 | 191 | ||
| 209 | static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | 192 | static int |
| 193 | uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, | ||
| 194 | bool force) | ||
| 210 | { | 195 | { |
| 211 | struct irq_desc *desc = irq_to_desc(irq); | 196 | struct irq_cfg *cfg = data->chip_data; |
| 212 | struct irq_cfg *cfg = desc->chip_data; | ||
| 213 | unsigned int dest; | 197 | unsigned int dest; |
| 214 | unsigned long mmr_value; | 198 | unsigned long mmr_value, mmr_offset; |
| 215 | struct uv_IO_APIC_route_entry *entry; | 199 | struct uv_IO_APIC_route_entry *entry; |
| 216 | unsigned long mmr_offset; | ||
| 217 | int mmr_pnode; | 200 | int mmr_pnode; |
| 218 | 201 | ||
| 219 | if (set_desc_affinity(desc, mask, &dest)) | 202 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 220 | return -1; | 203 | return -1; |
| 221 | 204 | ||
| 222 | mmr_value = 0; | 205 | mmr_value = 0; |
| @@ -231,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 231 | entry->dest = dest; | 214 | entry->dest = dest; |
| 232 | 215 | ||
| 233 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ | 216 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ |
| 234 | if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) | 217 | if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) |
| 235 | return -1; | 218 | return -1; |
| 236 | 219 | ||
| 237 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 220 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index e680ea52db9b..3371bd053b89 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
| @@ -66,10 +66,7 @@ static void __init visws_time_init(void) | |||
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | /* Replaces the default init_ISA_irqs in the generic setup */ | 68 | /* Replaces the default init_ISA_irqs in the generic setup */ |
| 69 | static void __init visws_pre_intr_init(void) | 69 | static void __init visws_pre_intr_init(void); |
| 70 | { | ||
| 71 | init_VISWS_APIC_irqs(); | ||
| 72 | } | ||
| 73 | 70 | ||
| 74 | /* Quirk for machine specific memory setup. */ | 71 | /* Quirk for machine specific memory setup. */ |
| 75 | 72 | ||
| @@ -429,67 +426,34 @@ static int is_co_apic(unsigned int irq) | |||
| 429 | /* | 426 | /* |
| 430 | * This is the SGI Cobalt (IO-)APIC: | 427 | * This is the SGI Cobalt (IO-)APIC: |
| 431 | */ | 428 | */ |
| 432 | 429 | static void enable_cobalt_irq(struct irq_data *data) | |
| 433 | static void enable_cobalt_irq(unsigned int irq) | ||
| 434 | { | 430 | { |
| 435 | co_apic_set(is_co_apic(irq), irq); | 431 | co_apic_set(is_co_apic(data->irq), data->irq); |
| 436 | } | 432 | } |
| 437 | 433 | ||
| 438 | static void disable_cobalt_irq(unsigned int irq) | 434 | static void disable_cobalt_irq(struct irq_data *data) |
| 439 | { | 435 | { |
| 440 | int entry = is_co_apic(irq); | 436 | int entry = is_co_apic(data->irq); |
| 441 | 437 | ||
| 442 | co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); | 438 | co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); |
| 443 | co_apic_read(CO_APIC_LO(entry)); | 439 | co_apic_read(CO_APIC_LO(entry)); |
| 444 | } | 440 | } |
| 445 | 441 | ||
| 446 | /* | 442 | static void ack_cobalt_irq(struct irq_data *data) |
| 447 | * "irq" really just serves to identify the device. Here is where we | ||
| 448 | * map this to the Cobalt APIC entry where it's physically wired. | ||
| 449 | * This is called via request_irq -> setup_irq -> irq_desc->startup() | ||
| 450 | */ | ||
| 451 | static unsigned int startup_cobalt_irq(unsigned int irq) | ||
| 452 | { | 443 | { |
| 453 | unsigned long flags; | 444 | unsigned long flags; |
| 454 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 455 | 445 | ||
| 456 | spin_lock_irqsave(&cobalt_lock, flags); | 446 | spin_lock_irqsave(&cobalt_lock, flags); |
| 457 | if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) | 447 | disable_cobalt_irq(data); |
| 458 | desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); | ||
| 459 | enable_cobalt_irq(irq); | ||
| 460 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
| 461 | return 0; | ||
| 462 | } | ||
| 463 | |||
| 464 | static void ack_cobalt_irq(unsigned int irq) | ||
| 465 | { | ||
| 466 | unsigned long flags; | ||
| 467 | |||
| 468 | spin_lock_irqsave(&cobalt_lock, flags); | ||
| 469 | disable_cobalt_irq(irq); | ||
| 470 | apic_write(APIC_EOI, APIC_EIO_ACK); | 448 | apic_write(APIC_EOI, APIC_EIO_ACK); |
| 471 | spin_unlock_irqrestore(&cobalt_lock, flags); | 449 | spin_unlock_irqrestore(&cobalt_lock, flags); |
| 472 | } | 450 | } |
| 473 | 451 | ||
| 474 | static void end_cobalt_irq(unsigned int irq) | ||
| 475 | { | ||
| 476 | unsigned long flags; | ||
| 477 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 478 | |||
| 479 | spin_lock_irqsave(&cobalt_lock, flags); | ||
| 480 | if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS))) | ||
| 481 | enable_cobalt_irq(irq); | ||
| 482 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
| 483 | } | ||
| 484 | |||
| 485 | static struct irq_chip cobalt_irq_type = { | 452 | static struct irq_chip cobalt_irq_type = { |
| 486 | .name = "Cobalt-APIC", | 453 | .name = "Cobalt-APIC", |
| 487 | .startup = startup_cobalt_irq, | 454 | .irq_enable = enable_cobalt_irq, |
| 488 | .shutdown = disable_cobalt_irq, | 455 | .irq_disable = disable_cobalt_irq, |
| 489 | .enable = enable_cobalt_irq, | 456 | .irq_ack = ack_cobalt_irq, |
| 490 | .disable = disable_cobalt_irq, | ||
| 491 | .ack = ack_cobalt_irq, | ||
| 492 | .end = end_cobalt_irq, | ||
| 493 | }; | 457 | }; |
| 494 | 458 | ||
| 495 | 459 | ||
| @@ -503,35 +467,34 @@ static struct irq_chip cobalt_irq_type = { | |||
| 503 | * interrupt controller type, and through a special virtual interrupt- | 467 | * interrupt controller type, and through a special virtual interrupt- |
| 504 | * controller. Device drivers only see the virtual interrupt sources. | 468 | * controller. Device drivers only see the virtual interrupt sources. |
| 505 | */ | 469 | */ |
| 506 | static unsigned int startup_piix4_master_irq(unsigned int irq) | 470 | static unsigned int startup_piix4_master_irq(struct irq_data *data) |
| 507 | { | 471 | { |
| 508 | legacy_pic->init(0); | 472 | legacy_pic->init(0); |
| 509 | 473 | enable_cobalt_irq(data); | |
| 510 | return startup_cobalt_irq(irq); | ||
| 511 | } | 474 | } |
| 512 | 475 | ||
| 513 | static void end_piix4_master_irq(unsigned int irq) | 476 | static void end_piix4_master_irq(struct irq_data *data) |
| 514 | { | 477 | { |
| 515 | unsigned long flags; | 478 | unsigned long flags; |
| 516 | 479 | ||
| 517 | spin_lock_irqsave(&cobalt_lock, flags); | 480 | spin_lock_irqsave(&cobalt_lock, flags); |
| 518 | enable_cobalt_irq(irq); | 481 | enable_cobalt_irq(data); |
| 519 | spin_unlock_irqrestore(&cobalt_lock, flags); | 482 | spin_unlock_irqrestore(&cobalt_lock, flags); |
| 520 | } | 483 | } |
| 521 | 484 | ||
| 522 | static struct irq_chip piix4_master_irq_type = { | 485 | static struct irq_chip piix4_master_irq_type = { |
| 523 | .name = "PIIX4-master", | 486 | .name = "PIIX4-master", |
| 524 | .startup = startup_piix4_master_irq, | 487 | .irq_startup = startup_piix4_master_irq, |
| 525 | .ack = ack_cobalt_irq, | 488 | .irq_ack = ack_cobalt_irq, |
| 526 | .end = end_piix4_master_irq, | ||
| 527 | }; | 489 | }; |
| 528 | 490 | ||
| 491 | static void pii4_mask(struct irq_data *data) { } | ||
| 529 | 492 | ||
| 530 | static struct irq_chip piix4_virtual_irq_type = { | 493 | static struct irq_chip piix4_virtual_irq_type = { |
| 531 | .name = "PIIX4-virtual", | 494 | .name = "PIIX4-virtual", |
| 495 | .mask = pii4_mask, | ||
| 532 | }; | 496 | }; |
| 533 | 497 | ||
| 534 | |||
| 535 | /* | 498 | /* |
| 536 | * PIIX4-8259 master/virtual functions to handle interrupt requests | 499 | * PIIX4-8259 master/virtual functions to handle interrupt requests |
| 537 | * from legacy devices: floppy, parallel, serial, rtc. | 500 | * from legacy devices: floppy, parallel, serial, rtc. |
| @@ -549,9 +512,8 @@ static struct irq_chip piix4_virtual_irq_type = { | |||
| 549 | */ | 512 | */ |
| 550 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) | 513 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) |
| 551 | { | 514 | { |
| 552 | int realirq; | ||
| 553 | struct irq_desc *desc; | ||
| 554 | unsigned long flags; | 515 | unsigned long flags; |
| 516 | int realirq; | ||
| 555 | 517 | ||
| 556 | raw_spin_lock_irqsave(&i8259A_lock, flags); | 518 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
| 557 | 519 | ||
| @@ -592,18 +554,10 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
| 592 | 554 | ||
| 593 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | 555 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
| 594 | 556 | ||
| 595 | desc = irq_to_desc(realirq); | ||
| 596 | |||
| 597 | /* | 557 | /* |
| 598 | * handle this 'virtual interrupt' as a Cobalt one now. | 558 | * handle this 'virtual interrupt' as a Cobalt one now. |
| 599 | */ | 559 | */ |
| 600 | kstat_incr_irqs_this_cpu(realirq, desc); | 560 | generic_handle_irq(realirq); |
| 601 | |||
| 602 | if (likely(desc->action != NULL)) | ||
| 603 | handle_IRQ_event(realirq, desc->action); | ||
| 604 | |||
| 605 | if (!(desc->status & IRQ_DISABLED)) | ||
| 606 | legacy_pic->chip->unmask(realirq); | ||
| 607 | 561 | ||
| 608 | return IRQ_HANDLED; | 562 | return IRQ_HANDLED; |
| 609 | 563 | ||
| @@ -624,41 +578,35 @@ static struct irqaction cascade_action = { | |||
| 624 | 578 | ||
| 625 | static inline void set_piix4_virtual_irq_type(void) | 579 | static inline void set_piix4_virtual_irq_type(void) |
| 626 | { | 580 | { |
| 627 | piix4_virtual_irq_type.shutdown = i8259A_chip.mask; | ||
| 628 | piix4_virtual_irq_type.enable = i8259A_chip.unmask; | 581 | piix4_virtual_irq_type.enable = i8259A_chip.unmask; |
| 629 | piix4_virtual_irq_type.disable = i8259A_chip.mask; | 582 | piix4_virtual_irq_type.disable = i8259A_chip.mask; |
| 583 | piix4_virtual_irq_type.unmask = i8259A_chip.unmask; | ||
| 630 | } | 584 | } |
| 631 | 585 | ||
| 632 | void init_VISWS_APIC_irqs(void) | 586 | static void __init visws_pre_intr_init(void) |
| 633 | { | 587 | { |
| 634 | int i; | 588 | int i; |
| 635 | 589 | ||
| 636 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { | 590 | set_piix4_virtual_irq_type(); |
| 637 | struct irq_desc *desc = irq_to_desc(i); | ||
| 638 | |||
| 639 | desc->status = IRQ_DISABLED; | ||
| 640 | desc->action = 0; | ||
| 641 | desc->depth = 1; | ||
| 642 | 591 | ||
| 643 | if (i == 0) { | 592 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { |
| 644 | desc->chip = &cobalt_irq_type; | 593 | struct irq_chip *chip = NULL; |
| 645 | } | 594 | |
| 646 | else if (i == CO_IRQ_IDE0) { | 595 | if (i == 0) |
| 647 | desc->chip = &cobalt_irq_type; | 596 | chip = &cobalt_irq_type; |
| 648 | } | 597 | else if (i == CO_IRQ_IDE0) |
| 649 | else if (i == CO_IRQ_IDE1) { | 598 | chip = &cobalt_irq_type; |
| 650 | desc->chip = &cobalt_irq_type; | 599 | else if (i == CO_IRQ_IDE1) |
| 651 | } | 600 | >chip = &cobalt_irq_type; |
| 652 | else if (i == CO_IRQ_8259) { | 601 | else if (i == CO_IRQ_8259) |
| 653 | desc->chip = &piix4_master_irq_type; | 602 | chip = &piix4_master_irq_type; |
| 654 | } | 603 | else if (i < CO_IRQ_APIC0) |
| 655 | else if (i < CO_IRQ_APIC0) { | 604 | chip = &piix4_virtual_irq_type; |
| 656 | set_piix4_virtual_irq_type(); | 605 | else if (IS_CO_APIC(i)) |
| 657 | desc->chip = &piix4_virtual_irq_type; | 606 | chip = &cobalt_irq_type; |
| 658 | } | 607 | |
| 659 | else if (IS_CO_APIC(i)) { | 608 | if (chip) |
| 660 | desc->chip = &cobalt_irq_type; | 609 | set_irq_chip(i, chip); |
| 661 | } | ||
| 662 | } | 610 | } |
| 663 | 611 | ||
| 664 | setup_irq(CO_IRQ_8259, &master_action); | 612 | setup_irq(CO_IRQ_8259, &master_action); |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5ffb5622f793..61fb98519622 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
| @@ -551,8 +551,14 @@ cannot_handle: | |||
| 551 | int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) | 551 | int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) |
| 552 | { | 552 | { |
| 553 | if (VMPI.is_vm86pus) { | 553 | if (VMPI.is_vm86pus) { |
| 554 | if ((trapno == 3) || (trapno == 1)) | 554 | if ((trapno == 3) || (trapno == 1)) { |
| 555 | return_to_32bit(regs, VM86_TRAP + (trapno << 8)); | 555 | KVM86->regs32->ax = VM86_TRAP + (trapno << 8); |
| 556 | /* setting this flag forces the code in entry_32.S to | ||
| 557 | call save_v86_state() and change the stack pointer | ||
| 558 | to KVM86->regs32 */ | ||
| 559 | set_thread_flag(TIF_IRET); | ||
| 560 | return 0; | ||
| 561 | } | ||
| 556 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); | 562 | do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); |
| 557 | return 0; | 563 | return 0; |
| 558 | } | 564 | } |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c deleted file mode 100644 index ce9fbacb7526..000000000000 --- a/arch/x86/kernel/vmi_32.c +++ /dev/null | |||
| @@ -1,893 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * VMI specific paravirt-ops implementation | ||
| 3 | * | ||
| 4 | * Copyright (C) 2005, VMware, Inc. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 15 | * details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 20 | * | ||
| 21 | * Send feedback to zach@vmware.com | ||
| 22 | * | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include <linux/module.h> | ||
| 26 | #include <linux/cpu.h> | ||
| 27 | #include <linux/bootmem.h> | ||
| 28 | #include <linux/mm.h> | ||
| 29 | #include <linux/highmem.h> | ||
| 30 | #include <linux/sched.h> | ||
| 31 | #include <linux/gfp.h> | ||
| 32 | #include <asm/vmi.h> | ||
| 33 | #include <asm/io.h> | ||
| 34 | #include <asm/fixmap.h> | ||
| 35 | #include <asm/apicdef.h> | ||
| 36 | #include <asm/apic.h> | ||
| 37 | #include <asm/pgalloc.h> | ||
| 38 | #include <asm/processor.h> | ||
| 39 | #include <asm/timer.h> | ||
| 40 | #include <asm/vmi_time.h> | ||
| 41 | #include <asm/kmap_types.h> | ||
| 42 | #include <asm/setup.h> | ||
| 43 | |||
| 44 | /* Convenient for calling VMI functions indirectly in the ROM */ | ||
| 45 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | ||
| 46 | typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); | ||
| 47 | |||
| 48 | #define call_vrom_func(rom,func) \ | ||
| 49 | (((VROMFUNC *)(rom->func))()) | ||
| 50 | |||
| 51 | #define call_vrom_long_func(rom,func,arg) \ | ||
| 52 | (((VROMLONGFUNC *)(rom->func)) (arg)) | ||
| 53 | |||
| 54 | static struct vrom_header *vmi_rom; | ||
| 55 | static int disable_pge; | ||
| 56 | static int disable_pse; | ||
| 57 | static int disable_sep; | ||
| 58 | static int disable_tsc; | ||
| 59 | static int disable_mtrr; | ||
| 60 | static int disable_noidle; | ||
| 61 | static int disable_vmi_timer; | ||
| 62 | |||
| 63 | /* Cached VMI operations */ | ||
| 64 | static struct { | ||
| 65 | void (*cpuid)(void /* non-c */); | ||
| 66 | void (*_set_ldt)(u32 selector); | ||
| 67 | void (*set_tr)(u32 selector); | ||
| 68 | void (*write_idt_entry)(struct desc_struct *, int, u32, u32); | ||
| 69 | void (*write_gdt_entry)(struct desc_struct *, int, u32, u32); | ||
| 70 | void (*write_ldt_entry)(struct desc_struct *, int, u32, u32); | ||
| 71 | void (*set_kernel_stack)(u32 selector, u32 sp0); | ||
| 72 | void (*allocate_page)(u32, u32, u32, u32, u32); | ||
| 73 | void (*release_page)(u32, u32); | ||
| 74 | void (*set_pte)(pte_t, pte_t *, unsigned); | ||
| 75 | void (*update_pte)(pte_t *, unsigned); | ||
| 76 | void (*set_linear_mapping)(int, void *, u32, u32); | ||
| 77 | void (*_flush_tlb)(int); | ||
| 78 | void (*set_initial_ap_state)(int, int); | ||
| 79 | void (*halt)(void); | ||
| 80 | void (*set_lazy_mode)(int mode); | ||
| 81 | } vmi_ops; | ||
| 82 | |||
| 83 | /* Cached VMI operations */ | ||
| 84 | struct vmi_timer_ops vmi_timer_ops; | ||
| 85 | |||
| 86 | /* | ||
| 87 | * VMI patching routines. | ||
| 88 | */ | ||
| 89 | #define MNEM_CALL 0xe8 | ||
| 90 | #define MNEM_JMP 0xe9 | ||
| 91 | #define MNEM_RET 0xc3 | ||
| 92 | |||
| 93 | #define IRQ_PATCH_INT_MASK 0 | ||
| 94 | #define IRQ_PATCH_DISABLE 5 | ||
| 95 | |||
| 96 | static inline void patch_offset(void *insnbuf, | ||
| 97 | unsigned long ip, unsigned long dest) | ||
| 98 | { | ||
| 99 | *(unsigned long *)(insnbuf+1) = dest-ip-5; | ||
| 100 | } | ||
| 101 | |||
| 102 | static unsigned patch_internal(int call, unsigned len, void *insnbuf, | ||
| 103 | unsigned long ip) | ||
| 104 | { | ||
| 105 | u64 reloc; | ||
| 106 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | ||
| 107 | reloc = call_vrom_long_func(vmi_rom, get_reloc, call); | ||
| 108 | switch(rel->type) { | ||
| 109 | case VMI_RELOCATION_CALL_REL: | ||
| 110 | BUG_ON(len < 5); | ||
| 111 | *(char *)insnbuf = MNEM_CALL; | ||
| 112 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); | ||
| 113 | return 5; | ||
| 114 | |||
| 115 | case VMI_RELOCATION_JUMP_REL: | ||
| 116 | BUG_ON(len < 5); | ||
| 117 | *(char *)insnbuf = MNEM_JMP; | ||
| 118 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); | ||
| 119 | return 5; | ||
| 120 | |||
| 121 | case VMI_RELOCATION_NOP: | ||
| 122 | /* obliterate the whole thing */ | ||
| 123 | return 0; | ||
| 124 | |||
| 125 | case VMI_RELOCATION_NONE: | ||
| 126 | /* leave native code in place */ | ||
| 127 | break; | ||
| 128 | |||
| 129 | default: | ||
| 130 | BUG(); | ||
| 131 | } | ||
| 132 | return len; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Apply patch if appropriate, return length of new instruction | ||
| 137 | * sequence. The callee does nop padding for us. | ||
| 138 | */ | ||
| 139 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | ||
| 140 | unsigned long ip, unsigned len) | ||
| 141 | { | ||
| 142 | switch (type) { | ||
| 143 | case PARAVIRT_PATCH(pv_irq_ops.irq_disable): | ||
| 144 | return patch_internal(VMI_CALL_DisableInterrupts, len, | ||
| 145 | insns, ip); | ||
| 146 | case PARAVIRT_PATCH(pv_irq_ops.irq_enable): | ||
| 147 | return patch_internal(VMI_CALL_EnableInterrupts, len, | ||
| 148 | insns, ip); | ||
| 149 | case PARAVIRT_PATCH(pv_irq_ops.restore_fl): | ||
| 150 | return patch_internal(VMI_CALL_SetInterruptMask, len, | ||
| 151 | insns, ip); | ||
| 152 | case PARAVIRT_PATCH(pv_irq_ops.save_fl): | ||
| 153 | return patch_internal(VMI_CALL_GetInterruptMask, len, | ||
| 154 | insns, ip); | ||
| 155 | case PARAVIRT_PATCH(pv_cpu_ops.iret): | ||
| 156 | return patch_internal(VMI_CALL_IRET, len, insns, ip); | ||
| 157 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): | ||
| 158 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); | ||
| 159 | default: | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | return len; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | ||
| 166 | static void vmi_cpuid(unsigned int *ax, unsigned int *bx, | ||
| 167 | unsigned int *cx, unsigned int *dx) | ||
| 168 | { | ||
| 169 | int override = 0; | ||
| 170 | if (*ax == 1) | ||
| 171 | override = 1; | ||
| 172 | asm volatile ("call *%6" | ||
| 173 | : "=a" (*ax), | ||
| 174 | "=b" (*bx), | ||
| 175 | "=c" (*cx), | ||
| 176 | "=d" (*dx) | ||
| 177 | : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid)); | ||
| 178 | if (override) { | ||
| 179 | if (disable_pse) | ||
| 180 | *dx &= ~X86_FEATURE_PSE; | ||
| 181 | if (disable_pge) | ||
| 182 | *dx &= ~X86_FEATURE_PGE; | ||
| 183 | if (disable_sep) | ||
| 184 | *dx &= ~X86_FEATURE_SEP; | ||
| 185 | if (disable_tsc) | ||
| 186 | *dx &= ~X86_FEATURE_TSC; | ||
| 187 | if (disable_mtrr) | ||
| 188 | *dx &= ~X86_FEATURE_MTRR; | ||
| 189 | } | ||
| 190 | } | ||
| 191 | |||
| 192 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | ||
| 193 | { | ||
| 194 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | ||
| 195 | write_gdt_entry(gdt, nr, new, 0); | ||
| 196 | } | ||
| 197 | |||
| 198 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | ||
| 199 | { | ||
| 200 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
| 201 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]); | ||
| 202 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]); | ||
| 203 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]); | ||
| 204 | } | ||
| 205 | |||
| 206 | static void vmi_set_ldt(const void *addr, unsigned entries) | ||
| 207 | { | ||
| 208 | unsigned cpu = smp_processor_id(); | ||
| 209 | struct desc_struct desc; | ||
| 210 | |||
| 211 | pack_descriptor(&desc, (unsigned long)addr, | ||
| 212 | entries * sizeof(struct desc_struct) - 1, | ||
| 213 | DESC_LDT, 0); | ||
| 214 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT); | ||
| 215 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | ||
| 216 | } | ||
| 217 | |||
| 218 | static void vmi_set_tr(void) | ||
| 219 | { | ||
| 220 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | ||
| 221 | } | ||
| 222 | |||
| 223 | static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) | ||
| 224 | { | ||
| 225 | u32 *idt_entry = (u32 *)g; | ||
| 226 | vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]); | ||
| 227 | } | ||
| 228 | |||
| 229 | static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, | ||
| 230 | const void *desc, int type) | ||
| 231 | { | ||
| 232 | u32 *gdt_entry = (u32 *)desc; | ||
| 233 | vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]); | ||
| 234 | } | ||
| 235 | |||
| 236 | static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | ||
| 237 | const void *desc) | ||
| 238 | { | ||
| 239 | u32 *ldt_entry = (u32 *)desc; | ||
| 240 | vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | ||
| 241 | } | ||
| 242 | |||
| 243 | static void vmi_load_sp0(struct tss_struct *tss, | ||
| 244 | struct thread_struct *thread) | ||
| 245 | { | ||
| 246 | tss->x86_tss.sp0 = thread->sp0; | ||
| 247 | |||
| 248 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
| 249 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | ||
| 250 | tss->x86_tss.ss1 = thread->sysenter_cs; | ||
| 251 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
| 252 | } | ||
| 253 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0); | ||
| 254 | } | ||
| 255 | |||
| 256 | static void vmi_flush_tlb_user(void) | ||
| 257 | { | ||
| 258 | vmi_ops._flush_tlb(VMI_FLUSH_TLB); | ||
| 259 | } | ||
| 260 | |||
| 261 | static void vmi_flush_tlb_kernel(void) | ||
| 262 | { | ||
| 263 | vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | ||
| 264 | } | ||
| 265 | |||
| 266 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | ||
| 267 | static void vmi_nop(void) | ||
| 268 | { | ||
| 269 | } | ||
| 270 | |||
| 271 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | ||
| 272 | { | ||
| 273 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | ||
| 274 | } | ||
| 275 | |||
| 276 | static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) | ||
| 277 | { | ||
| 278 | /* | ||
| 279 | * This call comes in very early, before mem_map is setup. | ||
| 280 | * It is called only for swapper_pg_dir, which already has | ||
| 281 | * data on it. | ||
| 282 | */ | ||
| 283 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | ||
| 284 | } | ||
| 285 | |||
| 286 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) | ||
| 287 | { | ||
| 288 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | ||
| 289 | } | ||
| 290 | |||
| 291 | static void vmi_release_pte(unsigned long pfn) | ||
| 292 | { | ||
| 293 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | ||
| 294 | } | ||
| 295 | |||
| 296 | static void vmi_release_pmd(unsigned long pfn) | ||
| 297 | { | ||
| 298 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
| 299 | } | ||
| 300 | |||
| 301 | /* | ||
| 302 | * We use the pgd_free hook for releasing the pgd page: | ||
| 303 | */ | ||
| 304 | static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
| 305 | { | ||
| 306 | unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; | ||
| 307 | |||
| 308 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
| 309 | } | ||
| 310 | |||
| 311 | /* | ||
| 312 | * Helper macros for MMU update flags. We can defer updates until a flush | ||
| 313 | * or page invalidation only if the update is to the current address space | ||
| 314 | * (otherwise, there is no flush). We must check against init_mm, since | ||
| 315 | * this could be a kernel update, which usually passes init_mm, although | ||
| 316 | * sometimes this check can be skipped if we know the particular function | ||
| 317 | * is only called on user mode PTEs. We could change the kernel to pass | ||
| 318 | * current->active_mm here, but in particular, I was unsure if changing | ||
| 319 | * mm/highmem.c to do this would still be correct on other architectures. | ||
| 320 | */ | ||
| 321 | #define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \ | ||
| 322 | (!mustbeuser && (mm) == &init_mm)) | ||
| 323 | #define vmi_flags_addr(mm, addr, level, user) \ | ||
| 324 | ((level) | (is_current_as(mm, user) ? \ | ||
| 325 | (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
| 326 | #define vmi_flags_addr_defer(mm, addr, level, user) \ | ||
| 327 | ((level) | (is_current_as(mm, user) ? \ | ||
| 328 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
| 329 | |||
| 330 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
| 331 | { | ||
| 332 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
| 333 | } | ||
| 334 | |||
| 335 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
| 336 | { | ||
| 337 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | ||
| 338 | } | ||
| 339 | |||
| 340 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | ||
| 341 | { | ||
| 342 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | ||
| 343 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | ||
| 344 | } | ||
| 345 | |||
| 346 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
| 347 | { | ||
| 348 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
| 349 | } | ||
| 350 | |||
| 351 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
| 352 | { | ||
| 353 | #ifdef CONFIG_X86_PAE | ||
| 354 | const pte_t pte = { .pte = pmdval.pmd }; | ||
| 355 | #else | ||
| 356 | const pte_t pte = { pmdval.pud.pgd.pgd }; | ||
| 357 | #endif | ||
| 358 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | ||
| 359 | } | ||
| 360 | |||
| 361 | #ifdef CONFIG_X86_PAE | ||
| 362 | |||
| 363 | static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
| 364 | { | ||
| 365 | /* | ||
| 366 | * XXX This is called from set_pmd_pte, but at both PT | ||
| 367 | * and PD layers so the VMI_PAGE_PT flag is wrong. But | ||
| 368 | * it is only called for large page mapping changes, | ||
| 369 | * the Xen backend, doesn't support large pages, and the | ||
| 370 | * ESX backend doesn't depend on the flag. | ||
| 371 | */ | ||
| 372 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
| 373 | vmi_ops.update_pte(ptep, VMI_PAGE_PT); | ||
| 374 | } | ||
| 375 | |||
| 376 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | ||
| 377 | { | ||
| 378 | /* Um, eww */ | ||
| 379 | const pte_t pte = { .pte = pudval.pgd.pgd }; | ||
| 380 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | ||
| 381 | } | ||
| 382 | |||
| 383 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
| 384 | { | ||
| 385 | const pte_t pte = { .pte = 0 }; | ||
| 386 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
| 387 | } | ||
| 388 | |||
| 389 | static void vmi_pmd_clear(pmd_t *pmd) | ||
| 390 | { | ||
| 391 | const pte_t pte = { .pte = 0 }; | ||
| 392 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | ||
| 393 | } | ||
| 394 | #endif | ||
| 395 | |||
| 396 | #ifdef CONFIG_SMP | ||
| 397 | static void __devinit | ||
| 398 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
| 399 | unsigned long start_esp) | ||
| 400 | { | ||
| 401 | struct vmi_ap_state ap; | ||
| 402 | |||
| 403 | /* Default everything to zero. This is fine for most GPRs. */ | ||
| 404 | memset(&ap, 0, sizeof(struct vmi_ap_state)); | ||
| 405 | |||
| 406 | ap.gdtr_limit = GDT_SIZE - 1; | ||
| 407 | ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid); | ||
| 408 | |||
| 409 | ap.idtr_limit = IDT_ENTRIES * 8 - 1; | ||
| 410 | ap.idtr_base = (unsigned long) idt_table; | ||
| 411 | |||
| 412 | ap.ldtr = 0; | ||
| 413 | |||
| 414 | ap.cs = __KERNEL_CS; | ||
| 415 | ap.eip = (unsigned long) start_eip; | ||
| 416 | ap.ss = __KERNEL_DS; | ||
| 417 | ap.esp = (unsigned long) start_esp; | ||
| 418 | |||
| 419 | ap.ds = __USER_DS; | ||
| 420 | ap.es = __USER_DS; | ||
| 421 | ap.fs = __KERNEL_PERCPU; | ||
| 422 | ap.gs = __KERNEL_STACK_CANARY; | ||
| 423 | |||
| 424 | ap.eflags = 0; | ||
| 425 | |||
| 426 | #ifdef CONFIG_X86_PAE | ||
| 427 | /* efer should match BSP efer. */ | ||
| 428 | if (cpu_has_nx) { | ||
| 429 | unsigned l, h; | ||
| 430 | rdmsr(MSR_EFER, l, h); | ||
| 431 | ap.efer = (unsigned long long) h << 32 | l; | ||
| 432 | } | ||
| 433 | #endif | ||
| 434 | |||
| 435 | ap.cr3 = __pa(swapper_pg_dir); | ||
| 436 | /* Protected mode, paging, AM, WP, NE, MP. */ | ||
| 437 | ap.cr0 = 0x80050023; | ||
| 438 | ap.cr4 = mmu_cr4_features; | ||
| 439 | vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid); | ||
| 440 | } | ||
| 441 | #endif | ||
| 442 | |||
| 443 | static void vmi_start_context_switch(struct task_struct *prev) | ||
| 444 | { | ||
| 445 | paravirt_start_context_switch(prev); | ||
| 446 | vmi_ops.set_lazy_mode(2); | ||
| 447 | } | ||
| 448 | |||
| 449 | static void vmi_end_context_switch(struct task_struct *next) | ||
| 450 | { | ||
| 451 | vmi_ops.set_lazy_mode(0); | ||
| 452 | paravirt_end_context_switch(next); | ||
| 453 | } | ||
| 454 | |||
| 455 | static void vmi_enter_lazy_mmu(void) | ||
| 456 | { | ||
| 457 | paravirt_enter_lazy_mmu(); | ||
| 458 | vmi_ops.set_lazy_mode(1); | ||
| 459 | } | ||
| 460 | |||
| 461 | static void vmi_leave_lazy_mmu(void) | ||
| 462 | { | ||
| 463 | vmi_ops.set_lazy_mode(0); | ||
| 464 | paravirt_leave_lazy_mmu(); | ||
| 465 | } | ||
| 466 | |||
| 467 | static inline int __init check_vmi_rom(struct vrom_header *rom) | ||
| 468 | { | ||
| 469 | struct pci_header *pci; | ||
| 470 | struct pnp_header *pnp; | ||
| 471 | const char *manufacturer = "UNKNOWN"; | ||
| 472 | const char *product = "UNKNOWN"; | ||
| 473 | const char *license = "unspecified"; | ||
| 474 | |||
| 475 | if (rom->rom_signature != 0xaa55) | ||
| 476 | return 0; | ||
| 477 | if (rom->vrom_signature != VMI_SIGNATURE) | ||
| 478 | return 0; | ||
| 479 | if (rom->api_version_maj != VMI_API_REV_MAJOR || | ||
| 480 | rom->api_version_min+1 < VMI_API_REV_MINOR+1) { | ||
| 481 | printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", | ||
| 482 | rom->api_version_maj, | ||
| 483 | rom->api_version_min); | ||
| 484 | return 0; | ||
| 485 | } | ||
| 486 | |||
| 487 | /* | ||
| 488 | * Relying on the VMI_SIGNATURE field is not 100% safe, so check | ||
| 489 | * the PCI header and device type to make sure this is really a | ||
| 490 | * VMI device. | ||
| 491 | */ | ||
| 492 | if (!rom->pci_header_offs) { | ||
| 493 | printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n"); | ||
| 494 | return 0; | ||
| 495 | } | ||
| 496 | |||
| 497 | pci = (struct pci_header *)((char *)rom+rom->pci_header_offs); | ||
| 498 | if (pci->vendorID != PCI_VENDOR_ID_VMWARE || | ||
| 499 | pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) { | ||
| 500 | /* Allow it to run... anyways, but warn */ | ||
| 501 | printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n"); | ||
| 502 | } | ||
| 503 | |||
| 504 | if (rom->pnp_header_offs) { | ||
| 505 | pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs); | ||
| 506 | if (pnp->manufacturer_offset) | ||
| 507 | manufacturer = (const char *)rom+pnp->manufacturer_offset; | ||
| 508 | if (pnp->product_offset) | ||
| 509 | product = (const char *)rom+pnp->product_offset; | ||
| 510 | } | ||
| 511 | |||
| 512 | if (rom->license_offs) | ||
| 513 | license = (char *)rom+rom->license_offs; | ||
| 514 | |||
| 515 | printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n", | ||
| 516 | manufacturer, product, | ||
| 517 | rom->api_version_maj, rom->api_version_min, | ||
| 518 | pci->rom_version_maj, pci->rom_version_min); | ||
| 519 | |||
| 520 | /* Don't allow BSD/MIT here for now because we don't want to end up | ||
| 521 | with any binary only shim layers */ | ||
| 522 | if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) { | ||
| 523 | printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n", | ||
| 524 | license); | ||
| 525 | return 0; | ||
| 526 | } | ||
| 527 | |||
| 528 | return 1; | ||
| 529 | } | ||
| 530 | |||
| 531 | /* | ||
| 532 | * Probe for the VMI option ROM | ||
| 533 | */ | ||
| 534 | static inline int __init probe_vmi_rom(void) | ||
| 535 | { | ||
| 536 | unsigned long base; | ||
| 537 | |||
| 538 | /* VMI ROM is in option ROM area, check signature */ | ||
| 539 | for (base = 0xC0000; base < 0xE0000; base += 2048) { | ||
| 540 | struct vrom_header *romstart; | ||
| 541 | romstart = (struct vrom_header *)isa_bus_to_virt(base); | ||
| 542 | if (check_vmi_rom(romstart)) { | ||
| 543 | vmi_rom = romstart; | ||
| 544 | return 1; | ||
| 545 | } | ||
| 546 | } | ||
| 547 | return 0; | ||
| 548 | } | ||
| 549 | |||
| 550 | /* | ||
| 551 | * VMI setup common to all processors | ||
| 552 | */ | ||
| 553 | void vmi_bringup(void) | ||
| 554 | { | ||
| 555 | /* We must establish the lowmem mapping for MMU ops to work */ | ||
| 556 | if (vmi_ops.set_linear_mapping) | ||
| 557 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); | ||
| 558 | } | ||
| 559 | |||
| 560 | /* | ||
| 561 | * Return a pointer to a VMI function or NULL if unimplemented | ||
| 562 | */ | ||
| 563 | static void *vmi_get_function(int vmicall) | ||
| 564 | { | ||
| 565 | u64 reloc; | ||
| 566 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
| 567 | reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall); | ||
| 568 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); | ||
| 569 | if (rel->type == VMI_RELOCATION_CALL_REL) | ||
| 570 | return (void *)rel->eip; | ||
| 571 | else | ||
| 572 | return NULL; | ||
| 573 | } | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
| 577 | * For unimplemented operations, fall back to default, unless nop | ||
| 578 | * is returned by the ROM. | ||
| 579 | */ | ||
| 580 | #define para_fill(opname, vmicall) \ | ||
| 581 | do { \ | ||
| 582 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
| 583 | VMI_CALL_##vmicall); \ | ||
| 584 | if (rel->type == VMI_RELOCATION_CALL_REL) \ | ||
| 585 | opname = (void *)rel->eip; \ | ||
| 586 | else if (rel->type == VMI_RELOCATION_NOP) \ | ||
| 587 | opname = (void *)vmi_nop; \ | ||
| 588 | else if (rel->type != VMI_RELOCATION_NONE) \ | ||
| 589 | printk(KERN_WARNING "VMI: Unknown relocation " \ | ||
| 590 | "type %d for " #vmicall"\n",\ | ||
| 591 | rel->type); \ | ||
| 592 | } while (0) | ||
| 593 | |||
| 594 | /* | ||
| 595 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
| 596 | * For cached operations which do not match the VMI ROM ABI and must | ||
| 597 | * go through a tranlation stub. Ignore NOPs, since it is not clear | ||
| 598 | * a NOP * VMI function corresponds to a NOP paravirt-op when the | ||
| 599 | * functions are not in 1-1 correspondence. | ||
| 600 | */ | ||
| 601 | #define para_wrap(opname, wrapper, cache, vmicall) \ | ||
| 602 | do { \ | ||
| 603 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
| 604 | VMI_CALL_##vmicall); \ | ||
| 605 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ | ||
| 606 | if (rel->type == VMI_RELOCATION_CALL_REL) { \ | ||
| 607 | opname = wrapper; \ | ||
| 608 | vmi_ops.cache = (void *)rel->eip; \ | ||
| 609 | } \ | ||
| 610 | } while (0) | ||
| 611 | |||
| 612 | /* | ||
| 613 | * Activate the VMI interface and switch into paravirtualized mode | ||
| 614 | */ | ||
| 615 | static inline int __init activate_vmi(void) | ||
| 616 | { | ||
| 617 | short kernel_cs; | ||
| 618 | u64 reloc; | ||
| 619 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
| 620 | |||
| 621 | /* | ||
| 622 | * Prevent page tables from being allocated in highmem, even if | ||
| 623 | * CONFIG_HIGHPTE is enabled. | ||
| 624 | */ | ||
| 625 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
| 626 | |||
| 627 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | ||
| 628 | printk(KERN_ERR "VMI ROM failed to initialize!"); | ||
| 629 | return 0; | ||
| 630 | } | ||
| 631 | savesegment(cs, kernel_cs); | ||
| 632 | |||
| 633 | pv_info.paravirt_enabled = 1; | ||
| 634 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | ||
| 635 | pv_info.name = "vmi [deprecated]"; | ||
| 636 | |||
| 637 | pv_init_ops.patch = vmi_patch; | ||
| 638 | |||
| 639 | /* | ||
| 640 | * Many of these operations are ABI compatible with VMI. | ||
| 641 | * This means we can fill in the paravirt-ops with direct | ||
| 642 | * pointers into the VMI ROM. If the calling convention for | ||
| 643 | * these operations changes, this code needs to be updated. | ||
| 644 | * | ||
| 645 | * Exceptions | ||
| 646 | * CPUID paravirt-op uses pointers, not the native ISA | ||
| 647 | * halt has no VMI equivalent; all VMI halts are "safe" | ||
| 648 | * no MSR support yet - just trap and emulate. VMI uses the | ||
| 649 | * same ABI as the native ISA, but Linux wants exceptions | ||
| 650 | * from bogus MSR read / write handled | ||
| 651 | * rdpmc is not yet used in Linux | ||
| 652 | */ | ||
| 653 | |||
| 654 | /* CPUID is special, so very special it gets wrapped like a present */ | ||
| 655 | para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID); | ||
| 656 | |||
| 657 | para_fill(pv_cpu_ops.clts, CLTS); | ||
| 658 | para_fill(pv_cpu_ops.get_debugreg, GetDR); | ||
| 659 | para_fill(pv_cpu_ops.set_debugreg, SetDR); | ||
| 660 | para_fill(pv_cpu_ops.read_cr0, GetCR0); | ||
| 661 | para_fill(pv_mmu_ops.read_cr2, GetCR2); | ||
| 662 | para_fill(pv_mmu_ops.read_cr3, GetCR3); | ||
| 663 | para_fill(pv_cpu_ops.read_cr4, GetCR4); | ||
| 664 | para_fill(pv_cpu_ops.write_cr0, SetCR0); | ||
| 665 | para_fill(pv_mmu_ops.write_cr2, SetCR2); | ||
| 666 | para_fill(pv_mmu_ops.write_cr3, SetCR3); | ||
| 667 | para_fill(pv_cpu_ops.write_cr4, SetCR4); | ||
| 668 | |||
| 669 | para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); | ||
| 670 | para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); | ||
| 671 | para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); | ||
| 672 | para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); | ||
| 673 | |||
| 674 | para_fill(pv_cpu_ops.wbinvd, WBINVD); | ||
| 675 | para_fill(pv_cpu_ops.read_tsc, RDTSC); | ||
| 676 | |||
| 677 | /* The following we emulate with trap and emulate for now */ | ||
| 678 | /* paravirt_ops.read_msr = vmi_rdmsr */ | ||
| 679 | /* paravirt_ops.write_msr = vmi_wrmsr */ | ||
| 680 | /* paravirt_ops.rdpmc = vmi_rdpmc */ | ||
| 681 | |||
| 682 | /* TR interface doesn't pass TR value, wrap */ | ||
| 683 | para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR); | ||
| 684 | |||
| 685 | /* LDT is special, too */ | ||
| 686 | para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT); | ||
| 687 | |||
| 688 | para_fill(pv_cpu_ops.load_gdt, SetGDT); | ||
| 689 | para_fill(pv_cpu_ops.load_idt, SetIDT); | ||
| 690 | para_fill(pv_cpu_ops.store_gdt, GetGDT); | ||
| 691 | para_fill(pv_cpu_ops.store_idt, GetIDT); | ||
| 692 | para_fill(pv_cpu_ops.store_tr, GetTR); | ||
| 693 | pv_cpu_ops.load_tls = vmi_load_tls; | ||
| 694 | para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry, | ||
| 695 | write_ldt_entry, WriteLDTEntry); | ||
| 696 | para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry, | ||
| 697 | write_gdt_entry, WriteGDTEntry); | ||
| 698 | para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry, | ||
| 699 | write_idt_entry, WriteIDTEntry); | ||
| 700 | para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack); | ||
| 701 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | ||
| 702 | para_fill(pv_cpu_ops.io_delay, IODelay); | ||
| 703 | |||
| 704 | para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, | ||
| 705 | set_lazy_mode, SetLazyMode); | ||
| 706 | para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, | ||
| 707 | set_lazy_mode, SetLazyMode); | ||
| 708 | |||
| 709 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, | ||
| 710 | set_lazy_mode, SetLazyMode); | ||
| 711 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, | ||
| 712 | set_lazy_mode, SetLazyMode); | ||
| 713 | |||
| 714 | /* user and kernel flush are just handled with different flags to FlushTLB */ | ||
| 715 | para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); | ||
| 716 | para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); | ||
| 717 | para_fill(pv_mmu_ops.flush_tlb_single, InvalPage); | ||
| 718 | |||
| 719 | /* | ||
| 720 | * Until a standard flag format can be agreed on, we need to | ||
| 721 | * implement these as wrappers in Linux. Get the VMI ROM | ||
| 722 | * function pointers for the two backend calls. | ||
| 723 | */ | ||
| 724 | #ifdef CONFIG_X86_PAE | ||
| 725 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong); | ||
| 726 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong); | ||
| 727 | #else | ||
| 728 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); | ||
| 729 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); | ||
| 730 | #endif | ||
| 731 | |||
| 732 | if (vmi_ops.set_pte) { | ||
| 733 | pv_mmu_ops.set_pte = vmi_set_pte; | ||
| 734 | pv_mmu_ops.set_pte_at = vmi_set_pte_at; | ||
| 735 | pv_mmu_ops.set_pmd = vmi_set_pmd; | ||
| 736 | #ifdef CONFIG_X86_PAE | ||
| 737 | pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; | ||
| 738 | pv_mmu_ops.set_pud = vmi_set_pud; | ||
| 739 | pv_mmu_ops.pte_clear = vmi_pte_clear; | ||
| 740 | pv_mmu_ops.pmd_clear = vmi_pmd_clear; | ||
| 741 | #endif | ||
| 742 | } | ||
| 743 | |||
| 744 | if (vmi_ops.update_pte) { | ||
| 745 | pv_mmu_ops.pte_update = vmi_update_pte; | ||
| 746 | pv_mmu_ops.pte_update_defer = vmi_update_pte_defer; | ||
| 747 | } | ||
| 748 | |||
| 749 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
| 750 | if (vmi_ops.allocate_page) { | ||
| 751 | pv_mmu_ops.alloc_pte = vmi_allocate_pte; | ||
| 752 | pv_mmu_ops.alloc_pmd = vmi_allocate_pmd; | ||
| 753 | pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone; | ||
| 754 | } | ||
| 755 | |||
| 756 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
| 757 | if (vmi_ops.release_page) { | ||
| 758 | pv_mmu_ops.release_pte = vmi_release_pte; | ||
| 759 | pv_mmu_ops.release_pmd = vmi_release_pmd; | ||
| 760 | pv_mmu_ops.pgd_free = vmi_pgd_free; | ||
| 761 | } | ||
| 762 | |||
| 763 | /* Set linear is needed in all cases */ | ||
| 764 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
| 765 | |||
| 766 | /* | ||
| 767 | * These MUST always be patched. Don't support indirect jumps | ||
| 768 | * through these operations, as the VMI interface may use either | ||
| 769 | * a jump or a call to get to these operations, depending on | ||
| 770 | * the backend. They are performance critical anyway, so requiring | ||
| 771 | * a patch is not a big problem. | ||
| 772 | */ | ||
| 773 | pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; | ||
| 774 | pv_cpu_ops.iret = (void *)0xbadbab0; | ||
| 775 | |||
| 776 | #ifdef CONFIG_SMP | ||
| 777 | para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); | ||
| 778 | #endif | ||
| 779 | |||
| 780 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 781 | para_fill(apic->read, APICRead); | ||
| 782 | para_fill(apic->write, APICWrite); | ||
| 783 | #endif | ||
| 784 | |||
| 785 | /* | ||
| 786 | * Check for VMI timer functionality by probing for a cycle frequency method | ||
| 787 | */ | ||
| 788 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | ||
| 789 | if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { | ||
| 790 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | ||
| 791 | vmi_timer_ops.get_cycle_counter = | ||
| 792 | vmi_get_function(VMI_CALL_GetCycleCounter); | ||
| 793 | vmi_timer_ops.get_wallclock = | ||
| 794 | vmi_get_function(VMI_CALL_GetWallclockTime); | ||
| 795 | vmi_timer_ops.wallclock_updated = | ||
| 796 | vmi_get_function(VMI_CALL_WallclockUpdated); | ||
| 797 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | ||
| 798 | vmi_timer_ops.cancel_alarm = | ||
| 799 | vmi_get_function(VMI_CALL_CancelAlarm); | ||
| 800 | x86_init.timers.timer_init = vmi_time_init; | ||
| 801 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 802 | x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init; | ||
| 803 | x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; | ||
| 804 | #endif | ||
| 805 | pv_time_ops.sched_clock = vmi_sched_clock; | ||
| 806 | x86_platform.calibrate_tsc = vmi_tsc_khz; | ||
| 807 | x86_platform.get_wallclock = vmi_get_wallclock; | ||
| 808 | x86_platform.set_wallclock = vmi_set_wallclock; | ||
| 809 | |||
| 810 | /* We have true wallclock functions; disable CMOS clock sync */ | ||
| 811 | no_sync_cmos_clock = 1; | ||
| 812 | } else { | ||
| 813 | disable_noidle = 1; | ||
| 814 | disable_vmi_timer = 1; | ||
| 815 | } | ||
| 816 | |||
| 817 | para_fill(pv_irq_ops.safe_halt, Halt); | ||
| 818 | |||
| 819 | /* | ||
| 820 | * Alternative instruction rewriting doesn't happen soon enough | ||
| 821 | * to convert VMI_IRET to a call instead of a jump; so we have | ||
| 822 | * to do this before IRQs get reenabled. Fortunately, it is | ||
| 823 | * idempotent. | ||
| 824 | */ | ||
| 825 | apply_paravirt(__parainstructions, __parainstructions_end); | ||
| 826 | |||
| 827 | vmi_bringup(); | ||
| 828 | |||
| 829 | return 1; | ||
| 830 | } | ||
| 831 | |||
| 832 | #undef para_fill | ||
| 833 | |||
| 834 | void __init vmi_init(void) | ||
| 835 | { | ||
| 836 | if (!vmi_rom) | ||
| 837 | probe_vmi_rom(); | ||
| 838 | else | ||
| 839 | check_vmi_rom(vmi_rom); | ||
| 840 | |||
| 841 | /* In case probing for or validating the ROM failed, basil */ | ||
| 842 | if (!vmi_rom) | ||
| 843 | return; | ||
| 844 | |||
| 845 | reserve_top_address(-vmi_rom->virtual_top); | ||
| 846 | |||
| 847 | #ifdef CONFIG_X86_IO_APIC | ||
| 848 | /* This is virtual hardware; timer routing is wired correctly */ | ||
| 849 | no_timer_check = 1; | ||
| 850 | #endif | ||
| 851 | } | ||
| 852 | |||
| 853 | void __init vmi_activate(void) | ||
| 854 | { | ||
| 855 | unsigned long flags; | ||
| 856 | |||
| 857 | if (!vmi_rom) | ||
| 858 | return; | ||
| 859 | |||
| 860 | local_irq_save(flags); | ||
| 861 | activate_vmi(); | ||
| 862 | local_irq_restore(flags & X86_EFLAGS_IF); | ||
| 863 | } | ||
| 864 | |||
| 865 | static int __init parse_vmi(char *arg) | ||
| 866 | { | ||
| 867 | if (!arg) | ||
| 868 | return -EINVAL; | ||
| 869 | |||
| 870 | if (!strcmp(arg, "disable_pge")) { | ||
| 871 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); | ||
| 872 | disable_pge = 1; | ||
| 873 | } else if (!strcmp(arg, "disable_pse")) { | ||
| 874 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE); | ||
| 875 | disable_pse = 1; | ||
| 876 | } else if (!strcmp(arg, "disable_sep")) { | ||
| 877 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); | ||
| 878 | disable_sep = 1; | ||
| 879 | } else if (!strcmp(arg, "disable_tsc")) { | ||
| 880 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC); | ||
| 881 | disable_tsc = 1; | ||
| 882 | } else if (!strcmp(arg, "disable_mtrr")) { | ||
| 883 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR); | ||
| 884 | disable_mtrr = 1; | ||
| 885 | } else if (!strcmp(arg, "disable_timer")) { | ||
| 886 | disable_vmi_timer = 1; | ||
| 887 | disable_noidle = 1; | ||
| 888 | } else if (!strcmp(arg, "disable_noidle")) | ||
| 889 | disable_noidle = 1; | ||
| 890 | return 0; | ||
| 891 | } | ||
| 892 | |||
| 893 | early_param("vmi", parse_vmi); | ||
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c deleted file mode 100644 index 5e1ff66ecd73..000000000000 --- a/arch/x86/kernel/vmiclock_32.c +++ /dev/null | |||
| @@ -1,317 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * VMI paravirtual timer support routines. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007, VMware, Inc. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 15 | * details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/smp.h> | ||
| 24 | #include <linux/interrupt.h> | ||
| 25 | #include <linux/cpumask.h> | ||
| 26 | #include <linux/clocksource.h> | ||
| 27 | #include <linux/clockchips.h> | ||
| 28 | |||
| 29 | #include <asm/vmi.h> | ||
| 30 | #include <asm/vmi_time.h> | ||
| 31 | #include <asm/apicdef.h> | ||
| 32 | #include <asm/apic.h> | ||
| 33 | #include <asm/timer.h> | ||
| 34 | #include <asm/i8253.h> | ||
| 35 | #include <asm/irq_vectors.h> | ||
| 36 | |||
| 37 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
| 38 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
| 39 | |||
| 40 | static DEFINE_PER_CPU(struct clock_event_device, local_events); | ||
| 41 | |||
| 42 | static inline u32 vmi_counter(u32 flags) | ||
| 43 | { | ||
| 44 | /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding | ||
| 45 | * cycle counter. */ | ||
| 46 | return flags & VMI_ALARM_COUNTER_MASK; | ||
| 47 | } | ||
| 48 | |||
| 49 | /* paravirt_ops.get_wallclock = vmi_get_wallclock */ | ||
| 50 | unsigned long vmi_get_wallclock(void) | ||
| 51 | { | ||
| 52 | unsigned long long wallclock; | ||
| 53 | wallclock = vmi_timer_ops.get_wallclock(); // nsec | ||
| 54 | (void)do_div(wallclock, 1000000000); // sec | ||
| 55 | |||
| 56 | return wallclock; | ||
| 57 | } | ||
| 58 | |||
| 59 | /* paravirt_ops.set_wallclock = vmi_set_wallclock */ | ||
| 60 | int vmi_set_wallclock(unsigned long now) | ||
| 61 | { | ||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | /* paravirt_ops.sched_clock = vmi_sched_clock */ | ||
| 66 | unsigned long long vmi_sched_clock(void) | ||
| 67 | { | ||
| 68 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); | ||
| 69 | } | ||
| 70 | |||
| 71 | /* x86_platform.calibrate_tsc = vmi_tsc_khz */ | ||
| 72 | unsigned long vmi_tsc_khz(void) | ||
| 73 | { | ||
| 74 | unsigned long long khz; | ||
| 75 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
| 76 | (void)do_div(khz, 1000); | ||
| 77 | return khz; | ||
| 78 | } | ||
| 79 | |||
| 80 | static inline unsigned int vmi_get_timer_vector(void) | ||
| 81 | { | ||
| 82 | return IRQ0_VECTOR; | ||
| 83 | } | ||
| 84 | |||
| 85 | /** vmi clockchip */ | ||
| 86 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 87 | static unsigned int startup_timer_irq(unsigned int irq) | ||
| 88 | { | ||
| 89 | unsigned long val = apic_read(APIC_LVTT); | ||
| 90 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
| 91 | |||
| 92 | return (val & APIC_SEND_PENDING); | ||
| 93 | } | ||
| 94 | |||
| 95 | static void mask_timer_irq(unsigned int irq) | ||
| 96 | { | ||
| 97 | unsigned long val = apic_read(APIC_LVTT); | ||
| 98 | apic_write(APIC_LVTT, val | APIC_LVT_MASKED); | ||
| 99 | } | ||
| 100 | |||
| 101 | static void unmask_timer_irq(unsigned int irq) | ||
| 102 | { | ||
| 103 | unsigned long val = apic_read(APIC_LVTT); | ||
| 104 | apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED); | ||
| 105 | } | ||
| 106 | |||
| 107 | static void ack_timer_irq(unsigned int irq) | ||
| 108 | { | ||
| 109 | ack_APIC_irq(); | ||
| 110 | } | ||
| 111 | |||
| 112 | static struct irq_chip vmi_chip __read_mostly = { | ||
| 113 | .name = "VMI-LOCAL", | ||
| 114 | .startup = startup_timer_irq, | ||
| 115 | .mask = mask_timer_irq, | ||
| 116 | .unmask = unmask_timer_irq, | ||
| 117 | .ack = ack_timer_irq | ||
| 118 | }; | ||
| 119 | #endif | ||
| 120 | |||
| 121 | /** vmi clockevent */ | ||
| 122 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
| 123 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
| 124 | static int vmi_wiring = VMI_ALARM_WIRED_IRQ0; | ||
| 125 | |||
| 126 | static inline int vmi_get_alarm_wiring(void) | ||
| 127 | { | ||
| 128 | return vmi_wiring; | ||
| 129 | } | ||
| 130 | |||
| 131 | static void vmi_timer_set_mode(enum clock_event_mode mode, | ||
| 132 | struct clock_event_device *evt) | ||
| 133 | { | ||
| 134 | cycle_t now, cycles_per_hz; | ||
| 135 | BUG_ON(!irqs_disabled()); | ||
| 136 | |||
| 137 | switch (mode) { | ||
| 138 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 139 | case CLOCK_EVT_MODE_RESUME: | ||
| 140 | break; | ||
| 141 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 142 | cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); | ||
| 143 | (void)do_div(cycles_per_hz, HZ); | ||
| 144 | now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC)); | ||
| 145 | vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz); | ||
| 146 | break; | ||
| 147 | case CLOCK_EVT_MODE_UNUSED: | ||
| 148 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
| 149 | switch (evt->mode) { | ||
| 150 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 151 | vmi_timer_ops.cancel_alarm(VMI_ONESHOT); | ||
| 152 | break; | ||
| 153 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 154 | vmi_timer_ops.cancel_alarm(VMI_PERIODIC); | ||
| 155 | break; | ||
| 156 | default: | ||
| 157 | break; | ||
| 158 | } | ||
| 159 | break; | ||
| 160 | default: | ||
| 161 | break; | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | static int vmi_timer_next_event(unsigned long delta, | ||
| 166 | struct clock_event_device *evt) | ||
| 167 | { | ||
| 168 | /* Unfortunately, set_next_event interface only passes relative | ||
| 169 | * expiry, but we want absolute expiry. It'd be better if were | ||
| 170 | * were passed an absolute expiry, since a bunch of time may | ||
| 171 | * have been stolen between the time the delta is computed and | ||
| 172 | * when we set the alarm below. */ | ||
| 173 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | ||
| 174 | |||
| 175 | BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
| 176 | vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0); | ||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | |||
| 180 | static struct clock_event_device vmi_clockevent = { | ||
| 181 | .name = "vmi-timer", | ||
| 182 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
| 183 | .shift = 22, | ||
| 184 | .set_mode = vmi_timer_set_mode, | ||
| 185 | .set_next_event = vmi_timer_next_event, | ||
| 186 | .rating = 1000, | ||
| 187 | .irq = 0, | ||
| 188 | }; | ||
| 189 | |||
| 190 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
| 191 | { | ||
| 192 | struct clock_event_device *evt = &__get_cpu_var(local_events); | ||
| 193 | evt->event_handler(evt); | ||
| 194 | return IRQ_HANDLED; | ||
| 195 | } | ||
| 196 | |||
| 197 | static struct irqaction vmi_clock_action = { | ||
| 198 | .name = "vmi-timer", | ||
| 199 | .handler = vmi_timer_interrupt, | ||
| 200 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, | ||
| 201 | }; | ||
| 202 | |||
| 203 | static void __devinit vmi_time_init_clockevent(void) | ||
| 204 | { | ||
| 205 | cycle_t cycles_per_msec; | ||
| 206 | struct clock_event_device *evt; | ||
| 207 | |||
| 208 | int cpu = smp_processor_id(); | ||
| 209 | evt = &__get_cpu_var(local_events); | ||
| 210 | |||
| 211 | /* Use cycles_per_msec since div_sc params are 32-bits. */ | ||
| 212 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
| 213 | (void)do_div(cycles_per_msec, 1000); | ||
| 214 | |||
| 215 | memcpy(evt, &vmi_clockevent, sizeof(*evt)); | ||
| 216 | /* Must pick .shift such that .mult fits in 32-bits. Choosing | ||
| 217 | * .shift to be 22 allows 2^(32-22) cycles per nano-seconds | ||
| 218 | * before overflow. */ | ||
| 219 | evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift); | ||
| 220 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | ||
| 221 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | ||
| 222 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | ||
| 223 | evt->cpumask = cpumask_of(cpu); | ||
| 224 | |||
| 225 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n", | ||
| 226 | evt->name, evt->mult, evt->shift); | ||
| 227 | clockevents_register_device(evt); | ||
| 228 | } | ||
| 229 | |||
| 230 | void __init vmi_time_init(void) | ||
| 231 | { | ||
| 232 | unsigned int cpu; | ||
| 233 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | ||
| 234 | outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
| 235 | |||
| 236 | vmi_time_init_clockevent(); | ||
| 237 | setup_irq(0, &vmi_clock_action); | ||
| 238 | for_each_possible_cpu(cpu) | ||
| 239 | per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; | ||
| 240 | } | ||
| 241 | |||
| 242 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 243 | void __devinit vmi_time_bsp_init(void) | ||
| 244 | { | ||
| 245 | /* | ||
| 246 | * On APIC systems, we want local timers to fire on each cpu. We do | ||
| 247 | * this by programming LVTT to deliver timer events to the IRQ handler | ||
| 248 | * for IRQ-0, since we can't re-use the APIC local timer handler | ||
| 249 | * without interfering with that code. | ||
| 250 | */ | ||
| 251 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | ||
| 252 | local_irq_disable(); | ||
| 253 | #ifdef CONFIG_SMP | ||
| 254 | /* | ||
| 255 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | ||
| 256 | * to using it, since this is a local interrupt, which each CPU must | ||
| 257 | * handle individually without locking out or dropping simultaneous | ||
| 258 | * local timers on other CPUs. We also don't want to trigger the | ||
| 259 | * quirk workaround code for interrupts which gets invoked from | ||
| 260 | * handle_percpu_irq via eoi, so we use our own IRQ chip. | ||
| 261 | */ | ||
| 262 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt"); | ||
| 263 | #else | ||
| 264 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt"); | ||
| 265 | #endif | ||
| 266 | vmi_wiring = VMI_ALARM_WIRED_LVTT; | ||
| 267 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
| 268 | local_irq_enable(); | ||
| 269 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | ||
| 270 | } | ||
| 271 | |||
| 272 | void __devinit vmi_time_ap_init(void) | ||
| 273 | { | ||
| 274 | vmi_time_init_clockevent(); | ||
| 275 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
| 276 | } | ||
| 277 | #endif | ||
| 278 | |||
| 279 | /** vmi clocksource */ | ||
| 280 | static struct clocksource clocksource_vmi; | ||
| 281 | |||
| 282 | static cycle_t read_real_cycles(struct clocksource *cs) | ||
| 283 | { | ||
| 284 | cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
| 285 | return max(ret, clocksource_vmi.cycle_last); | ||
| 286 | } | ||
| 287 | |||
| 288 | static struct clocksource clocksource_vmi = { | ||
| 289 | .name = "vmi-timer", | ||
| 290 | .rating = 450, | ||
| 291 | .read = read_real_cycles, | ||
| 292 | .mask = CLOCKSOURCE_MASK(64), | ||
| 293 | .mult = 0, /* to be set */ | ||
| 294 | .shift = 22, | ||
| 295 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
| 296 | }; | ||
| 297 | |||
| 298 | static int __init init_vmi_clocksource(void) | ||
| 299 | { | ||
| 300 | cycle_t cycles_per_msec; | ||
| 301 | |||
| 302 | if (!vmi_timer_ops.get_cycle_frequency) | ||
| 303 | return 0; | ||
| 304 | /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */ | ||
| 305 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
| 306 | (void)do_div(cycles_per_msec, 1000); | ||
| 307 | |||
| 308 | /* Note that clocksource.{mult, shift} converts in the opposite direction | ||
| 309 | * as clockevents. */ | ||
| 310 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
| 311 | clocksource_vmi.shift); | ||
| 312 | |||
| 313 | printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec); | ||
| 314 | return clocksource_register(&clocksource_vmi); | ||
| 315 | |||
| 316 | } | ||
| 317 | module_init(init_vmi_clocksource); | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d0bb52296fa3..e03530aebfd0 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
| @@ -242,6 +242,12 @@ SECTIONS | |||
| 242 | __x86_cpu_dev_end = .; | 242 | __x86_cpu_dev_end = .; |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | /* | ||
| 246 | * start address and size of operations which during runtime | ||
| 247 | * can be patched with virtualization friendly instructions or | ||
| 248 | * baremetal native ones. Think page table operations. | ||
| 249 | * Details in paravirt_types.h | ||
| 250 | */ | ||
| 245 | . = ALIGN(8); | 251 | . = ALIGN(8); |
| 246 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | 252 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { |
| 247 | __parainstructions = .; | 253 | __parainstructions = .; |
| @@ -249,6 +255,11 @@ SECTIONS | |||
| 249 | __parainstructions_end = .; | 255 | __parainstructions_end = .; |
| 250 | } | 256 | } |
| 251 | 257 | ||
| 258 | /* | ||
| 259 | * struct alt_inst entries. From the header (alternative.h): | ||
| 260 | * "Alternative instructions for different CPU types or capabilities" | ||
| 261 | * Think locking instructions on spinlocks. | ||
| 262 | */ | ||
| 252 | . = ALIGN(8); | 263 | . = ALIGN(8); |
| 253 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { | 264 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { |
| 254 | __alt_instructions = .; | 265 | __alt_instructions = .; |
| @@ -256,11 +267,28 @@ SECTIONS | |||
| 256 | __alt_instructions_end = .; | 267 | __alt_instructions_end = .; |
| 257 | } | 268 | } |
| 258 | 269 | ||
| 270 | /* | ||
| 271 | * And here are the replacement instructions. The linker sticks | ||
| 272 | * them as binary blobs. The .altinstructions has enough data to | ||
| 273 | * get the address and the length of them to patch the kernel safely. | ||
| 274 | */ | ||
| 259 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { | 275 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { |
| 260 | *(.altinstr_replacement) | 276 | *(.altinstr_replacement) |
| 261 | } | 277 | } |
| 262 | 278 | ||
| 263 | /* | 279 | /* |
| 280 | * struct iommu_table_entry entries are injected in this section. | ||
| 281 | * It is an array of IOMMUs which during run time gets sorted depending | ||
| 282 | * on its dependency order. After rootfs_initcall is complete | ||
| 283 | * this section can be safely removed. | ||
| 284 | */ | ||
| 285 | .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) { | ||
| 286 | __iommu_table = .; | ||
| 287 | *(.iommu_table) | ||
| 288 | __iommu_table_end = .; | ||
| 289 | } | ||
| 290 | . = ALIGN(8); | ||
| 291 | /* | ||
| 264 | * .exit.text is discard at runtime, not link time, to deal with | 292 | * .exit.text is discard at runtime, not link time, to deal with |
| 265 | * references from .altinstructions and .eh_frame | 293 | * references from .altinstructions and .eh_frame |
| 266 | */ | 294 | */ |
| @@ -273,7 +301,7 @@ SECTIONS | |||
| 273 | } | 301 | } |
| 274 | 302 | ||
| 275 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) | 303 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
| 276 | PERCPU(PAGE_SIZE) | 304 | PERCPU(THREAD_SIZE) |
| 277 | #endif | 305 | #endif |
| 278 | 306 | ||
| 279 | . = ALIGN(PAGE_SIZE); | 307 | . = ALIGN(PAGE_SIZE); |
