diff options
Diffstat (limited to 'arch/x86/kernel')
119 files changed, 7593 insertions, 5212 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d364df03c1d6..339ce35648e6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -23,11 +23,12 @@ nostackp := $(call cc-option, -fno-stack-protector) | |||
| 23 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | 23 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) |
| 24 | CFLAGS_hpet.o := $(nostackp) | 24 | CFLAGS_hpet.o := $(nostackp) |
| 25 | CFLAGS_tsc.o := $(nostackp) | 25 | CFLAGS_tsc.o := $(nostackp) |
| 26 | CFLAGS_paravirt.o := $(nostackp) | ||
| 26 | 27 | ||
| 27 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 28 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
| 28 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 29 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
| 29 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o | 30 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o |
| 30 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 31 | obj-y += setup.o i8259.o irqinit_$(BITS).o |
| 31 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 32 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
| 32 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 33 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
| 33 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 34 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
| @@ -49,31 +50,27 @@ obj-y += step.o | |||
| 49 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 50 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
| 50 | obj-y += cpu/ | 51 | obj-y += cpu/ |
| 51 | obj-y += acpi/ | 52 | obj-y += acpi/ |
| 52 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | 53 | obj-y += reboot.o |
| 53 | obj-$(CONFIG_MCA) += mca_32.o | 54 | obj-$(CONFIG_MCA) += mca_32.o |
| 54 | obj-$(CONFIG_X86_MSR) += msr.o | 55 | obj-$(CONFIG_X86_MSR) += msr.o |
| 55 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 56 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
| 56 | obj-$(CONFIG_PCI) += early-quirks.o | 57 | obj-$(CONFIG_PCI) += early-quirks.o |
| 57 | apm-y := apm_32.o | 58 | apm-y := apm_32.o |
| 58 | obj-$(CONFIG_APM) += apm.o | 59 | obj-$(CONFIG_APM) += apm.o |
| 59 | obj-$(CONFIG_X86_SMP) += smp.o | 60 | obj-$(CONFIG_SMP) += smp.o |
| 60 | obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o | 61 | obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o |
| 61 | obj-$(CONFIG_X86_32_SMP) += smpcommon.o | 62 | obj-$(CONFIG_SMP) += setup_percpu.o |
| 62 | obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o | 63 | obj-$(CONFIG_X86_64_SMP) += tsc_sync.o |
| 63 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o | 64 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o |
| 64 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 65 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
| 65 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 66 | obj-y += apic/ |
| 66 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | ||
| 67 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 67 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
| 68 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 68 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
| 69 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 69 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
| 70 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 70 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
| 71 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 71 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
| 72 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 72 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
| 73 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 73 | obj-$(CONFIG_X86_VSMP) += vsmp_64.o |
| 74 | obj-$(CONFIG_X86_ES7000) += es7000_32.o | ||
| 75 | obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o | ||
| 76 | obj-y += vsmp_64.o | ||
| 77 | obj-$(CONFIG_KPROBES) += kprobes.o | 74 | obj-$(CONFIG_KPROBES) += kprobes.o |
| 78 | obj-$(CONFIG_MODULES) += module_$(BITS).o | 75 | obj-$(CONFIG_MODULES) += module_$(BITS).o |
| 79 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o | 76 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o |
| @@ -114,16 +111,13 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 | |||
| 114 | ### | 111 | ### |
| 115 | # 64 bit specific files | 112 | # 64 bit specific files |
| 116 | ifeq ($(CONFIG_X86_64),y) | 113 | ifeq ($(CONFIG_X86_64),y) |
| 117 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o | 114 | obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o |
| 118 | obj-y += bios_uv.o uv_irq.o uv_sysfs.o | 115 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o |
| 119 | obj-y += genx2apic_cluster.o | 116 | obj-$(CONFIG_AUDIT) += audit_64.o |
| 120 | obj-y += genx2apic_phys.o | 117 | |
| 121 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | 118 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o |
| 122 | obj-$(CONFIG_AUDIT) += audit_64.o | 119 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o |
| 123 | 120 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | |
| 124 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 121 | |
| 125 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | 122 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o |
| 126 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | ||
| 127 | |||
| 128 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o | ||
| 129 | endif | 123 | endif |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7678f10c4568..a18eb7ce2236 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
| @@ -37,15 +37,10 @@ | |||
| 37 | #include <asm/pgtable.h> | 37 | #include <asm/pgtable.h> |
| 38 | #include <asm/io_apic.h> | 38 | #include <asm/io_apic.h> |
| 39 | #include <asm/apic.h> | 39 | #include <asm/apic.h> |
| 40 | #include <asm/genapic.h> | ||
| 41 | #include <asm/io.h> | 40 | #include <asm/io.h> |
| 42 | #include <asm/mpspec.h> | 41 | #include <asm/mpspec.h> |
| 43 | #include <asm/smp.h> | 42 | #include <asm/smp.h> |
| 44 | 43 | ||
| 45 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 46 | # include <mach_apic.h> | ||
| 47 | #endif | ||
| 48 | |||
| 49 | static int __initdata acpi_force = 0; | 44 | static int __initdata acpi_force = 0; |
| 50 | u32 acpi_rsdt_forced; | 45 | u32 acpi_rsdt_forced; |
| 51 | #ifdef CONFIG_ACPI | 46 | #ifdef CONFIG_ACPI |
| @@ -56,16 +51,7 @@ int acpi_disabled = 1; | |||
| 56 | EXPORT_SYMBOL(acpi_disabled); | 51 | EXPORT_SYMBOL(acpi_disabled); |
| 57 | 52 | ||
| 58 | #ifdef CONFIG_X86_64 | 53 | #ifdef CONFIG_X86_64 |
| 59 | 54 | # include <asm/proto.h> | |
| 60 | #include <asm/proto.h> | ||
| 61 | |||
| 62 | #else /* X86 */ | ||
| 63 | |||
| 64 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 65 | #include <mach_apic.h> | ||
| 66 | #include <mach_mpparse.h> | ||
| 67 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
| 68 | |||
| 69 | #endif /* X86 */ | 55 | #endif /* X86 */ |
| 70 | 56 | ||
| 71 | #define BAD_MADT_ENTRY(entry, end) ( \ | 57 | #define BAD_MADT_ENTRY(entry, end) ( \ |
| @@ -121,35 +107,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | |||
| 121 | */ | 107 | */ |
| 122 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) | 108 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) |
| 123 | { | 109 | { |
| 124 | unsigned long base, offset, mapped_size; | ||
| 125 | int idx; | ||
| 126 | 110 | ||
| 127 | if (!phys || !size) | 111 | if (!phys || !size) |
| 128 | return NULL; | 112 | return NULL; |
| 129 | 113 | ||
| 130 | if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) | 114 | return early_ioremap(phys, size); |
| 131 | return __va(phys); | 115 | } |
| 132 | 116 | void __init __acpi_unmap_table(char *map, unsigned long size) | |
| 133 | offset = phys & (PAGE_SIZE - 1); | 117 | { |
| 134 | mapped_size = PAGE_SIZE - offset; | 118 | if (!map || !size) |
| 135 | clear_fixmap(FIX_ACPI_END); | 119 | return; |
| 136 | set_fixmap(FIX_ACPI_END, phys); | ||
| 137 | base = fix_to_virt(FIX_ACPI_END); | ||
| 138 | |||
| 139 | /* | ||
| 140 | * Most cases can be covered by the below. | ||
| 141 | */ | ||
| 142 | idx = FIX_ACPI_END; | ||
| 143 | while (mapped_size < size) { | ||
| 144 | if (--idx < FIX_ACPI_BEGIN) | ||
| 145 | return NULL; /* cannot handle this */ | ||
| 146 | phys += PAGE_SIZE; | ||
| 147 | clear_fixmap(idx); | ||
| 148 | set_fixmap(idx, phys); | ||
| 149 | mapped_size += PAGE_SIZE; | ||
| 150 | } | ||
| 151 | 120 | ||
| 152 | return ((unsigned char *)base + offset); | 121 | early_iounmap(map, size); |
| 153 | } | 122 | } |
| 154 | 123 | ||
| 155 | #ifdef CONFIG_PCI_MMCONFIG | 124 | #ifdef CONFIG_PCI_MMCONFIG |
| @@ -239,7 +208,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) | |||
| 239 | madt->address); | 208 | madt->address); |
| 240 | } | 209 | } |
| 241 | 210 | ||
| 242 | acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); | 211 | default_acpi_madt_oem_check(madt->header.oem_id, |
| 212 | madt->header.oem_table_id); | ||
| 243 | 213 | ||
| 244 | return 0; | 214 | return 0; |
| 245 | } | 215 | } |
| @@ -884,7 +854,7 @@ static struct { | |||
| 884 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | 854 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); |
| 885 | } mp_ioapic_routing[MAX_IO_APICS]; | 855 | } mp_ioapic_routing[MAX_IO_APICS]; |
| 886 | 856 | ||
| 887 | static int mp_find_ioapic(int gsi) | 857 | int mp_find_ioapic(int gsi) |
| 888 | { | 858 | { |
| 889 | int i = 0; | 859 | int i = 0; |
| 890 | 860 | ||
| @@ -899,6 +869,16 @@ static int mp_find_ioapic(int gsi) | |||
| 899 | return -1; | 869 | return -1; |
| 900 | } | 870 | } |
| 901 | 871 | ||
| 872 | int mp_find_ioapic_pin(int ioapic, int gsi) | ||
| 873 | { | ||
| 874 | if (WARN_ON(ioapic == -1)) | ||
| 875 | return -1; | ||
| 876 | if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end)) | ||
| 877 | return -1; | ||
| 878 | |||
| 879 | return gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
| 880 | } | ||
| 881 | |||
| 902 | static u8 __init uniq_ioapic_id(u8 id) | 882 | static u8 __init uniq_ioapic_id(u8 id) |
| 903 | { | 883 | { |
| 904 | #ifdef CONFIG_X86_32 | 884 | #ifdef CONFIG_X86_32 |
| @@ -912,8 +892,8 @@ static u8 __init uniq_ioapic_id(u8 id) | |||
| 912 | DECLARE_BITMAP(used, 256); | 892 | DECLARE_BITMAP(used, 256); |
| 913 | bitmap_zero(used, 256); | 893 | bitmap_zero(used, 256); |
| 914 | for (i = 0; i < nr_ioapics; i++) { | 894 | for (i = 0; i < nr_ioapics; i++) { |
| 915 | struct mp_config_ioapic *ia = &mp_ioapics[i]; | 895 | struct mpc_ioapic *ia = &mp_ioapics[i]; |
| 916 | __set_bit(ia->mp_apicid, used); | 896 | __set_bit(ia->apicid, used); |
| 917 | } | 897 | } |
| 918 | if (!test_bit(id, used)) | 898 | if (!test_bit(id, used)) |
| 919 | return id; | 899 | return id; |
| @@ -945,29 +925,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
| 945 | 925 | ||
| 946 | idx = nr_ioapics; | 926 | idx = nr_ioapics; |
| 947 | 927 | ||
| 948 | mp_ioapics[idx].mp_type = MP_IOAPIC; | 928 | mp_ioapics[idx].type = MP_IOAPIC; |
| 949 | mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; | 929 | mp_ioapics[idx].flags = MPC_APIC_USABLE; |
| 950 | mp_ioapics[idx].mp_apicaddr = address; | 930 | mp_ioapics[idx].apicaddr = address; |
| 951 | 931 | ||
| 952 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 932 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); |
| 953 | mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); | 933 | mp_ioapics[idx].apicid = uniq_ioapic_id(id); |
| 954 | #ifdef CONFIG_X86_32 | 934 | #ifdef CONFIG_X86_32 |
| 955 | mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); | 935 | mp_ioapics[idx].apicver = io_apic_get_version(idx); |
| 956 | #else | 936 | #else |
| 957 | mp_ioapics[idx].mp_apicver = 0; | 937 | mp_ioapics[idx].apicver = 0; |
| 958 | #endif | 938 | #endif |
| 959 | /* | 939 | /* |
| 960 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 940 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
| 961 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 941 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
| 962 | */ | 942 | */ |
| 963 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; | 943 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; |
| 964 | mp_ioapic_routing[idx].gsi_base = gsi_base; | 944 | mp_ioapic_routing[idx].gsi_base = gsi_base; |
| 965 | mp_ioapic_routing[idx].gsi_end = gsi_base + | 945 | mp_ioapic_routing[idx].gsi_end = gsi_base + |
| 966 | io_apic_get_redir_entries(idx); | 946 | io_apic_get_redir_entries(idx); |
| 967 | 947 | ||
| 968 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " | 948 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
| 969 | "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, | 949 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, |
| 970 | mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, | 950 | mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, |
| 971 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | 951 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); |
| 972 | 952 | ||
| 973 | nr_ioapics++; | 953 | nr_ioapics++; |
| @@ -996,19 +976,19 @@ int __init acpi_probe_gsi(void) | |||
| 996 | return max_gsi + 1; | 976 | return max_gsi + 1; |
| 997 | } | 977 | } |
| 998 | 978 | ||
| 999 | static void assign_to_mp_irq(struct mp_config_intsrc *m, | 979 | static void assign_to_mp_irq(struct mpc_intsrc *m, |
| 1000 | struct mp_config_intsrc *mp_irq) | 980 | struct mpc_intsrc *mp_irq) |
| 1001 | { | 981 | { |
| 1002 | memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); | 982 | memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); |
| 1003 | } | 983 | } |
| 1004 | 984 | ||
| 1005 | static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, | 985 | static int mp_irq_cmp(struct mpc_intsrc *mp_irq, |
| 1006 | struct mp_config_intsrc *m) | 986 | struct mpc_intsrc *m) |
| 1007 | { | 987 | { |
| 1008 | return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); | 988 | return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); |
| 1009 | } | 989 | } |
| 1010 | 990 | ||
| 1011 | static void save_mp_irq(struct mp_config_intsrc *m) | 991 | static void save_mp_irq(struct mpc_intsrc *m) |
| 1012 | { | 992 | { |
| 1013 | int i; | 993 | int i; |
| 1014 | 994 | ||
| @@ -1026,7 +1006,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
| 1026 | { | 1006 | { |
| 1027 | int ioapic; | 1007 | int ioapic; |
| 1028 | int pin; | 1008 | int pin; |
| 1029 | struct mp_config_intsrc mp_irq; | 1009 | struct mpc_intsrc mp_irq; |
| 1030 | 1010 | ||
| 1031 | /* | 1011 | /* |
| 1032 | * Convert 'gsi' to 'ioapic.pin'. | 1012 | * Convert 'gsi' to 'ioapic.pin'. |
| @@ -1034,7 +1014,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
| 1034 | ioapic = mp_find_ioapic(gsi); | 1014 | ioapic = mp_find_ioapic(gsi); |
| 1035 | if (ioapic < 0) | 1015 | if (ioapic < 0) |
| 1036 | return; | 1016 | return; |
| 1037 | pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1017 | pin = mp_find_ioapic_pin(ioapic, gsi); |
| 1038 | 1018 | ||
| 1039 | /* | 1019 | /* |
| 1040 | * TBD: This check is for faulty timer entries, where the override | 1020 | * TBD: This check is for faulty timer entries, where the override |
| @@ -1044,13 +1024,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
| 1044 | if ((bus_irq == 0) && (trigger == 3)) | 1024 | if ((bus_irq == 0) && (trigger == 3)) |
| 1045 | trigger = 1; | 1025 | trigger = 1; |
| 1046 | 1026 | ||
| 1047 | mp_irq.mp_type = MP_INTSRC; | 1027 | mp_irq.type = MP_INTSRC; |
| 1048 | mp_irq.mp_irqtype = mp_INT; | 1028 | mp_irq.irqtype = mp_INT; |
| 1049 | mp_irq.mp_irqflag = (trigger << 2) | polarity; | 1029 | mp_irq.irqflag = (trigger << 2) | polarity; |
| 1050 | mp_irq.mp_srcbus = MP_ISA_BUS; | 1030 | mp_irq.srcbus = MP_ISA_BUS; |
| 1051 | mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ | 1031 | mp_irq.srcbusirq = bus_irq; /* IRQ */ |
| 1052 | mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ | 1032 | mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ |
| 1053 | mp_irq.mp_dstirq = pin; /* INTIN# */ | 1033 | mp_irq.dstirq = pin; /* INTIN# */ |
| 1054 | 1034 | ||
| 1055 | save_mp_irq(&mp_irq); | 1035 | save_mp_irq(&mp_irq); |
| 1056 | } | 1036 | } |
| @@ -1060,7 +1040,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 1060 | int i; | 1040 | int i; |
| 1061 | int ioapic; | 1041 | int ioapic; |
| 1062 | unsigned int dstapic; | 1042 | unsigned int dstapic; |
| 1063 | struct mp_config_intsrc mp_irq; | 1043 | struct mpc_intsrc mp_irq; |
| 1064 | 1044 | ||
| 1065 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 1045 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
| 1066 | /* | 1046 | /* |
| @@ -1085,7 +1065,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 1085 | ioapic = mp_find_ioapic(0); | 1065 | ioapic = mp_find_ioapic(0); |
| 1086 | if (ioapic < 0) | 1066 | if (ioapic < 0) |
| 1087 | return; | 1067 | return; |
| 1088 | dstapic = mp_ioapics[ioapic].mp_apicid; | 1068 | dstapic = mp_ioapics[ioapic].apicid; |
| 1089 | 1069 | ||
| 1090 | /* | 1070 | /* |
| 1091 | * Use the default configuration for the IRQs 0-15. Unless | 1071 | * Use the default configuration for the IRQs 0-15. Unless |
| @@ -1095,16 +1075,14 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 1095 | int idx; | 1075 | int idx; |
| 1096 | 1076 | ||
| 1097 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1077 | for (idx = 0; idx < mp_irq_entries; idx++) { |
| 1098 | struct mp_config_intsrc *irq = mp_irqs + idx; | 1078 | struct mpc_intsrc *irq = mp_irqs + idx; |
| 1099 | 1079 | ||
| 1100 | /* Do we already have a mapping for this ISA IRQ? */ | 1080 | /* Do we already have a mapping for this ISA IRQ? */ |
| 1101 | if (irq->mp_srcbus == MP_ISA_BUS | 1081 | if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) |
| 1102 | && irq->mp_srcbusirq == i) | ||
| 1103 | break; | 1082 | break; |
| 1104 | 1083 | ||
| 1105 | /* Do we already have a mapping for this IOAPIC pin */ | 1084 | /* Do we already have a mapping for this IOAPIC pin */ |
| 1106 | if (irq->mp_dstapic == dstapic && | 1085 | if (irq->dstapic == dstapic && irq->dstirq == i) |
| 1107 | irq->mp_dstirq == i) | ||
| 1108 | break; | 1086 | break; |
| 1109 | } | 1087 | } |
| 1110 | 1088 | ||
| @@ -1113,13 +1091,13 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 1113 | continue; /* IRQ already used */ | 1091 | continue; /* IRQ already used */ |
| 1114 | } | 1092 | } |
| 1115 | 1093 | ||
| 1116 | mp_irq.mp_type = MP_INTSRC; | 1094 | mp_irq.type = MP_INTSRC; |
| 1117 | mp_irq.mp_irqflag = 0; /* Conforming */ | 1095 | mp_irq.irqflag = 0; /* Conforming */ |
| 1118 | mp_irq.mp_srcbus = MP_ISA_BUS; | 1096 | mp_irq.srcbus = MP_ISA_BUS; |
| 1119 | mp_irq.mp_dstapic = dstapic; | 1097 | mp_irq.dstapic = dstapic; |
| 1120 | mp_irq.mp_irqtype = mp_INT; | 1098 | mp_irq.irqtype = mp_INT; |
| 1121 | mp_irq.mp_srcbusirq = i; /* Identity mapped */ | 1099 | mp_irq.srcbusirq = i; /* Identity mapped */ |
| 1122 | mp_irq.mp_dstirq = i; | 1100 | mp_irq.dstirq = i; |
| 1123 | 1101 | ||
| 1124 | save_mp_irq(&mp_irq); | 1102 | save_mp_irq(&mp_irq); |
| 1125 | } | 1103 | } |
| @@ -1156,7 +1134,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
| 1156 | return gsi; | 1134 | return gsi; |
| 1157 | } | 1135 | } |
| 1158 | 1136 | ||
| 1159 | ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1137 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); |
| 1160 | 1138 | ||
| 1161 | #ifdef CONFIG_X86_32 | 1139 | #ifdef CONFIG_X86_32 |
| 1162 | if (ioapic_renumber_irq) | 1140 | if (ioapic_renumber_irq) |
| @@ -1230,22 +1208,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, | |||
| 1230 | u32 gsi, int triggering, int polarity) | 1208 | u32 gsi, int triggering, int polarity) |
| 1231 | { | 1209 | { |
| 1232 | #ifdef CONFIG_X86_MPPARSE | 1210 | #ifdef CONFIG_X86_MPPARSE |
| 1233 | struct mp_config_intsrc mp_irq; | 1211 | struct mpc_intsrc mp_irq; |
| 1234 | int ioapic; | 1212 | int ioapic; |
| 1235 | 1213 | ||
| 1236 | if (!acpi_ioapic) | 1214 | if (!acpi_ioapic) |
| 1237 | return 0; | 1215 | return 0; |
| 1238 | 1216 | ||
| 1239 | /* print the entry should happen on mptable identically */ | 1217 | /* print the entry should happen on mptable identically */ |
| 1240 | mp_irq.mp_type = MP_INTSRC; | 1218 | mp_irq.type = MP_INTSRC; |
| 1241 | mp_irq.mp_irqtype = mp_INT; | 1219 | mp_irq.irqtype = mp_INT; |
| 1242 | mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | | 1220 | mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | |
| 1243 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); | 1221 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); |
| 1244 | mp_irq.mp_srcbus = number; | 1222 | mp_irq.srcbus = number; |
| 1245 | mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); | 1223 | mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); |
| 1246 | ioapic = mp_find_ioapic(gsi); | 1224 | ioapic = mp_find_ioapic(gsi); |
| 1247 | mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; | 1225 | mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; |
| 1248 | mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1226 | mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); |
| 1249 | 1227 | ||
| 1250 | save_mp_irq(&mp_irq); | 1228 | save_mp_irq(&mp_irq); |
| 1251 | #endif | 1229 | #endif |
| @@ -1372,7 +1350,7 @@ static void __init acpi_process_madt(void) | |||
| 1372 | if (!error) { | 1350 | if (!error) { |
| 1373 | acpi_lapic = 1; | 1351 | acpi_lapic = 1; |
| 1374 | 1352 | ||
| 1375 | #ifdef CONFIG_X86_GENERICARCH | 1353 | #ifdef CONFIG_X86_BIGSMP |
| 1376 | generic_bigsmp_probe(); | 1354 | generic_bigsmp_probe(); |
| 1377 | #endif | 1355 | #endif |
| 1378 | /* | 1356 | /* |
| @@ -1384,9 +1362,8 @@ static void __init acpi_process_madt(void) | |||
| 1384 | acpi_ioapic = 1; | 1362 | acpi_ioapic = 1; |
| 1385 | 1363 | ||
| 1386 | smp_found_config = 1; | 1364 | smp_found_config = 1; |
| 1387 | #ifdef CONFIG_X86_32 | 1365 | if (apic->setup_apic_routing) |
| 1388 | setup_apic_routing(); | 1366 | apic->setup_apic_routing(); |
| 1389 | #endif | ||
| 1390 | } | 1367 | } |
| 1391 | } | 1368 | } |
| 1392 | if (error == -EINVAL) { | 1369 | if (error == -EINVAL) { |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 3355973b12ac..580b4e296010 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S | |||
| @@ -3,8 +3,8 @@ | |||
| 3 | */ | 3 | */ |
| 4 | #include <asm/segment.h> | 4 | #include <asm/segment.h> |
| 5 | #include <asm/msr-index.h> | 5 | #include <asm/msr-index.h> |
| 6 | #include <asm/page.h> | 6 | #include <asm/page_types.h> |
| 7 | #include <asm/pgtable.h> | 7 | #include <asm/pgtable_types.h> |
| 8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
| 9 | 9 | ||
| 10 | .code16 | 10 | .code16 |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index a60c1f3bcb87..7c243a2c5115 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
| @@ -101,6 +101,7 @@ int acpi_save_state_mem(void) | |||
| 101 | stack_start.sp = temp_stack + sizeof(temp_stack); | 101 | stack_start.sp = temp_stack + sizeof(temp_stack); |
| 102 | early_gdt_descr.address = | 102 | early_gdt_descr.address = |
| 103 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); | 103 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); |
| 104 | initial_gs = per_cpu_offset(smp_processor_id()); | ||
| 104 | #endif | 105 | #endif |
| 105 | initial_code = (unsigned long)wakeup_long64; | 106 | initial_code = (unsigned long)wakeup_long64; |
| 106 | saved_magic = 0x123456789abcdef0; | 107 | saved_magic = 0x123456789abcdef0; |
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index a12e6a9fb659..8ded418b0593 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | .section .text.page_aligned | 1 | .section .text.page_aligned |
| 2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
| 3 | #include <asm/segment.h> | 3 | #include <asm/segment.h> |
| 4 | #include <asm/page.h> | 4 | #include <asm/page_types.h> |
| 5 | 5 | ||
| 6 | # Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 | 6 | # Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 |
| 7 | 7 | ||
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 96258d9dc974..8ea5164cbd04 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | .text | 1 | .text |
| 2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
| 3 | #include <asm/segment.h> | 3 | #include <asm/segment.h> |
| 4 | #include <asm/pgtable.h> | 4 | #include <asm/pgtable_types.h> |
| 5 | #include <asm/page.h> | 5 | #include <asm/page_types.h> |
| 6 | #include <asm/msr.h> | 6 | #include <asm/msr.h> |
| 7 | #include <asm/asm-offsets.h> | 7 | #include <asm/asm-offsets.h> |
| 8 | 8 | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a84ac7b570e6..4c80f1557433 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -414,9 +414,17 @@ void __init alternative_instructions(void) | |||
| 414 | that might execute the to be patched code. | 414 | that might execute the to be patched code. |
| 415 | Other CPUs are not running. */ | 415 | Other CPUs are not running. */ |
| 416 | stop_nmi(); | 416 | stop_nmi(); |
| 417 | #ifdef CONFIG_X86_MCE | 417 | |
| 418 | stop_mce(); | 418 | /* |
| 419 | #endif | 419 | * Don't stop machine check exceptions while patching. |
| 420 | * MCEs only happen when something got corrupted and in this | ||
| 421 | * case we must do something about the corruption. | ||
| 422 | * Ignoring it is worse than a unlikely patching race. | ||
| 423 | * Also machine checks tend to be broadcast and if one CPU | ||
| 424 | * goes into machine check the others follow quickly, so we don't | ||
| 425 | * expect a machine check to cause undue problems during to code | ||
| 426 | * patching. | ||
| 427 | */ | ||
| 420 | 428 | ||
| 421 | apply_alternatives(__alt_instructions, __alt_instructions_end); | 429 | apply_alternatives(__alt_instructions, __alt_instructions_end); |
| 422 | 430 | ||
| @@ -456,9 +464,6 @@ void __init alternative_instructions(void) | |||
| 456 | (unsigned long)__smp_locks_end); | 464 | (unsigned long)__smp_locks_end); |
| 457 | 465 | ||
| 458 | restart_nmi(); | 466 | restart_nmi(); |
| 459 | #ifdef CONFIG_X86_MCE | ||
| 460 | restart_mce(); | ||
| 461 | #endif | ||
| 462 | } | 467 | } |
| 463 | 468 | ||
| 464 | /** | 469 | /** |
| @@ -498,12 +503,12 @@ void *text_poke_early(void *addr, const void *opcode, size_t len) | |||
| 498 | */ | 503 | */ |
| 499 | void *__kprobes text_poke(void *addr, const void *opcode, size_t len) | 504 | void *__kprobes text_poke(void *addr, const void *opcode, size_t len) |
| 500 | { | 505 | { |
| 501 | unsigned long flags; | ||
| 502 | char *vaddr; | 506 | char *vaddr; |
| 503 | int nr_pages = 2; | 507 | int nr_pages = 2; |
| 504 | struct page *pages[2]; | 508 | struct page *pages[2]; |
| 505 | int i; | 509 | int i; |
| 506 | 510 | ||
| 511 | might_sleep(); | ||
| 507 | if (!core_kernel_text((unsigned long)addr)) { | 512 | if (!core_kernel_text((unsigned long)addr)) { |
| 508 | pages[0] = vmalloc_to_page(addr); | 513 | pages[0] = vmalloc_to_page(addr); |
| 509 | pages[1] = vmalloc_to_page(addr + PAGE_SIZE); | 514 | pages[1] = vmalloc_to_page(addr + PAGE_SIZE); |
| @@ -517,9 +522,9 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) | |||
| 517 | nr_pages = 1; | 522 | nr_pages = 1; |
| 518 | vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); | 523 | vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); |
| 519 | BUG_ON(!vaddr); | 524 | BUG_ON(!vaddr); |
| 520 | local_irq_save(flags); | 525 | local_irq_disable(); |
| 521 | memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); | 526 | memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); |
| 522 | local_irq_restore(flags); | 527 | local_irq_enable(); |
| 523 | vunmap(vaddr); | 528 | vunmap(vaddr); |
| 524 | sync_core(); | 529 | sync_core(); |
| 525 | /* Could also do a CLFLUSH here to speed up CPU recovery; but | 530 | /* Could also do a CLFLUSH here to speed up CPU recovery; but |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile new file mode 100644 index 000000000000..da7b7b9f8bd8 --- /dev/null +++ b/arch/x86/kernel/apic/Makefile | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | # | ||
| 2 | # Makefile for local APIC drivers and for the IO-APIC code | ||
| 3 | # | ||
| 4 | |||
| 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o | ||
| 6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | ||
| 7 | obj-$(CONFIG_SMP) += ipi.o | ||
| 8 | |||
| 9 | ifeq ($(CONFIG_X86_64),y) | ||
| 10 | obj-y += apic_flat_64.o | ||
| 11 | obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o | ||
| 12 | obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o | ||
| 13 | obj-$(CONFIG_X86_UV) += x2apic_uv_x.o | ||
| 14 | endif | ||
| 15 | |||
| 16 | obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o | ||
| 17 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | ||
| 18 | obj-$(CONFIG_X86_ES7000) += es7000_32.o | ||
| 19 | obj-$(CONFIG_X86_SUMMIT) += summit_32.o | ||
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic/apic.c index 570f36e44e59..30909a258d0f 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Local APIC handling, local APIC timers | 2 | * Local APIC handling, local APIC timers |
| 3 | * | 3 | * |
| 4 | * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> | 4 | * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
| 5 | * | 5 | * |
| 6 | * Fixes | 6 | * Fixes |
| 7 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; | 7 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; |
| @@ -14,51 +14,70 @@ | |||
| 14 | * Mikael Pettersson : PM converted to driver model. | 14 | * Mikael Pettersson : PM converted to driver model. |
| 15 | */ | 15 | */ |
| 16 | 16 | ||
| 17 | #include <linux/init.h> | ||
| 18 | |||
| 19 | #include <linux/mm.h> | ||
| 20 | #include <linux/delay.h> | ||
| 21 | #include <linux/bootmem.h> | ||
| 22 | #include <linux/interrupt.h> | ||
| 23 | #include <linux/mc146818rtc.h> | ||
| 24 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
| 25 | #include <linux/sysdev.h> | 18 | #include <linux/mc146818rtc.h> |
| 26 | #include <linux/ioport.h> | ||
| 27 | #include <linux/cpu.h> | ||
| 28 | #include <linux/clockchips.h> | ||
| 29 | #include <linux/acpi_pmtmr.h> | 19 | #include <linux/acpi_pmtmr.h> |
| 20 | #include <linux/clockchips.h> | ||
| 21 | #include <linux/interrupt.h> | ||
| 22 | #include <linux/bootmem.h> | ||
| 23 | #include <linux/ftrace.h> | ||
| 24 | #include <linux/ioport.h> | ||
| 30 | #include <linux/module.h> | 25 | #include <linux/module.h> |
| 31 | #include <linux/dmi.h> | 26 | #include <linux/sysdev.h> |
| 27 | #include <linux/delay.h> | ||
| 28 | #include <linux/timex.h> | ||
| 32 | #include <linux/dmar.h> | 29 | #include <linux/dmar.h> |
| 33 | #include <linux/ftrace.h> | 30 | #include <linux/init.h> |
| 34 | #include <linux/smp.h> | 31 | #include <linux/cpu.h> |
| 32 | #include <linux/dmi.h> | ||
| 35 | #include <linux/nmi.h> | 33 | #include <linux/nmi.h> |
| 36 | #include <linux/timex.h> | 34 | #include <linux/smp.h> |
| 35 | #include <linux/mm.h> | ||
| 37 | 36 | ||
| 37 | #include <asm/pgalloc.h> | ||
| 38 | #include <asm/atomic.h> | 38 | #include <asm/atomic.h> |
| 39 | #include <asm/mtrr.h> | ||
| 40 | #include <asm/mpspec.h> | 39 | #include <asm/mpspec.h> |
| 41 | #include <asm/desc.h> | ||
| 42 | #include <asm/arch_hooks.h> | ||
| 43 | #include <asm/hpet.h> | ||
| 44 | #include <asm/pgalloc.h> | ||
| 45 | #include <asm/i8253.h> | 40 | #include <asm/i8253.h> |
| 46 | #include <asm/idle.h> | 41 | #include <asm/i8259.h> |
| 47 | #include <asm/proto.h> | 42 | #include <asm/proto.h> |
| 48 | #include <asm/apic.h> | 43 | #include <asm/apic.h> |
| 49 | #include <asm/i8259.h> | 44 | #include <asm/desc.h> |
| 45 | #include <asm/hpet.h> | ||
| 46 | #include <asm/idle.h> | ||
| 47 | #include <asm/mtrr.h> | ||
| 50 | #include <asm/smp.h> | 48 | #include <asm/smp.h> |
| 49 | #include <asm/mce.h> | ||
| 50 | |||
| 51 | unsigned int num_processors; | ||
| 52 | |||
| 53 | unsigned disabled_cpus __cpuinitdata; | ||
| 51 | 54 | ||
| 52 | #include <mach_apic.h> | 55 | /* Processor that is doing the boot up */ |
| 53 | #include <mach_apicdef.h> | 56 | unsigned int boot_cpu_physical_apicid = -1U; |
| 54 | #include <mach_ipi.h> | ||
| 55 | 57 | ||
| 56 | /* | 58 | /* |
| 57 | * Sanity check | 59 | * The highest APIC ID seen during enumeration. |
| 60 | * | ||
| 61 | * This determines the messaging protocol we can use: if all APIC IDs | ||
| 62 | * are in the 0 ... 7 range, then we can use logical addressing which | ||
| 63 | * has some performance advantages (better broadcasting). | ||
| 64 | * | ||
| 65 | * If there's an APIC ID above 8, we use physical addressing. | ||
| 58 | */ | 66 | */ |
| 59 | #if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) | 67 | unsigned int max_physical_apicid; |
| 60 | # error SPURIOUS_APIC_VECTOR definition error | 68 | |
| 61 | #endif | 69 | /* |
| 70 | * Bitmask of physically existing CPUs: | ||
| 71 | */ | ||
| 72 | physid_mask_t phys_cpu_present_map; | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Map cpu index to physical APIC ID | ||
| 76 | */ | ||
| 77 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | ||
| 78 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | ||
| 79 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
| 80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
| 62 | 81 | ||
| 63 | #ifdef CONFIG_X86_32 | 82 | #ifdef CONFIG_X86_32 |
| 64 | /* | 83 | /* |
| @@ -92,11 +111,7 @@ static __init int setup_apicpmtimer(char *s) | |||
| 92 | __setup("apicpmtimer", setup_apicpmtimer); | 111 | __setup("apicpmtimer", setup_apicpmtimer); |
| 93 | #endif | 112 | #endif |
| 94 | 113 | ||
| 95 | #ifdef CONFIG_X86_64 | 114 | #ifdef CONFIG_X86_X2APIC |
| 96 | #define HAVE_X2APIC | ||
| 97 | #endif | ||
| 98 | |||
| 99 | #ifdef HAVE_X2APIC | ||
| 100 | int x2apic; | 115 | int x2apic; |
| 101 | /* x2apic enabled before OS handover */ | 116 | /* x2apic enabled before OS handover */ |
| 102 | static int x2apic_preenabled; | 117 | static int x2apic_preenabled; |
| @@ -194,18 +209,13 @@ static int modern_apic(void) | |||
| 194 | return lapic_get_version() >= 0x14; | 209 | return lapic_get_version() >= 0x14; |
| 195 | } | 210 | } |
| 196 | 211 | ||
| 197 | /* | 212 | void native_apic_wait_icr_idle(void) |
| 198 | * Paravirt kernels also might be using these below ops. So we still | ||
| 199 | * use generic apic_read()/apic_write(), which might be pointing to different | ||
| 200 | * ops in PARAVIRT case. | ||
| 201 | */ | ||
| 202 | void xapic_wait_icr_idle(void) | ||
| 203 | { | 213 | { |
| 204 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | 214 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) |
| 205 | cpu_relax(); | 215 | cpu_relax(); |
| 206 | } | 216 | } |
| 207 | 217 | ||
| 208 | u32 safe_xapic_wait_icr_idle(void) | 218 | u32 native_safe_apic_wait_icr_idle(void) |
| 209 | { | 219 | { |
| 210 | u32 send_status; | 220 | u32 send_status; |
| 211 | int timeout; | 221 | int timeout; |
| @@ -221,13 +231,13 @@ u32 safe_xapic_wait_icr_idle(void) | |||
| 221 | return send_status; | 231 | return send_status; |
| 222 | } | 232 | } |
| 223 | 233 | ||
| 224 | void xapic_icr_write(u32 low, u32 id) | 234 | void native_apic_icr_write(u32 low, u32 id) |
| 225 | { | 235 | { |
| 226 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); | 236 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); |
| 227 | apic_write(APIC_ICR, low); | 237 | apic_write(APIC_ICR, low); |
| 228 | } | 238 | } |
| 229 | 239 | ||
| 230 | static u64 xapic_icr_read(void) | 240 | u64 native_apic_icr_read(void) |
| 231 | { | 241 | { |
| 232 | u32 icr1, icr2; | 242 | u32 icr1, icr2; |
| 233 | 243 | ||
| @@ -237,54 +247,6 @@ static u64 xapic_icr_read(void) | |||
| 237 | return icr1 | ((u64)icr2 << 32); | 247 | return icr1 | ((u64)icr2 << 32); |
| 238 | } | 248 | } |
| 239 | 249 | ||
| 240 | static struct apic_ops xapic_ops = { | ||
| 241 | .read = native_apic_mem_read, | ||
| 242 | .write = native_apic_mem_write, | ||
| 243 | .icr_read = xapic_icr_read, | ||
| 244 | .icr_write = xapic_icr_write, | ||
| 245 | .wait_icr_idle = xapic_wait_icr_idle, | ||
| 246 | .safe_wait_icr_idle = safe_xapic_wait_icr_idle, | ||
| 247 | }; | ||
| 248 | |||
| 249 | struct apic_ops __read_mostly *apic_ops = &xapic_ops; | ||
| 250 | EXPORT_SYMBOL_GPL(apic_ops); | ||
| 251 | |||
| 252 | #ifdef HAVE_X2APIC | ||
| 253 | static void x2apic_wait_icr_idle(void) | ||
| 254 | { | ||
| 255 | /* no need to wait for icr idle in x2apic */ | ||
| 256 | return; | ||
| 257 | } | ||
| 258 | |||
| 259 | static u32 safe_x2apic_wait_icr_idle(void) | ||
| 260 | { | ||
| 261 | /* no need to wait for icr idle in x2apic */ | ||
| 262 | return 0; | ||
| 263 | } | ||
| 264 | |||
| 265 | void x2apic_icr_write(u32 low, u32 id) | ||
| 266 | { | ||
| 267 | wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); | ||
| 268 | } | ||
| 269 | |||
| 270 | static u64 x2apic_icr_read(void) | ||
| 271 | { | ||
| 272 | unsigned long val; | ||
| 273 | |||
| 274 | rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); | ||
| 275 | return val; | ||
| 276 | } | ||
| 277 | |||
| 278 | static struct apic_ops x2apic_ops = { | ||
| 279 | .read = native_apic_msr_read, | ||
| 280 | .write = native_apic_msr_write, | ||
| 281 | .icr_read = x2apic_icr_read, | ||
| 282 | .icr_write = x2apic_icr_write, | ||
| 283 | .wait_icr_idle = x2apic_wait_icr_idle, | ||
| 284 | .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, | ||
| 285 | }; | ||
| 286 | #endif | ||
| 287 | |||
| 288 | /** | 250 | /** |
| 289 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | 251 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 |
| 290 | */ | 252 | */ |
| @@ -457,7 +419,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
| 457 | static void lapic_timer_broadcast(const struct cpumask *mask) | 419 | static void lapic_timer_broadcast(const struct cpumask *mask) |
| 458 | { | 420 | { |
| 459 | #ifdef CONFIG_SMP | 421 | #ifdef CONFIG_SMP |
| 460 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | 422 | apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); |
| 461 | #endif | 423 | #endif |
| 462 | } | 424 | } |
| 463 | 425 | ||
| @@ -535,7 +497,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) | |||
| 535 | } | 497 | } |
| 536 | } | 498 | } |
| 537 | 499 | ||
| 538 | static int __init calibrate_by_pmtimer(long deltapm, long *delta) | 500 | static int __init |
| 501 | calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) | ||
| 539 | { | 502 | { |
| 540 | const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; | 503 | const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; |
| 541 | const long pm_thresh = pm_100ms / 100; | 504 | const long pm_thresh = pm_100ms / 100; |
| @@ -546,7 +509,7 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) | |||
| 546 | return -1; | 509 | return -1; |
| 547 | #endif | 510 | #endif |
| 548 | 511 | ||
| 549 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | 512 | apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); |
| 550 | 513 | ||
| 551 | /* Check, if the PM timer is available */ | 514 | /* Check, if the PM timer is available */ |
| 552 | if (!deltapm) | 515 | if (!deltapm) |
| @@ -556,19 +519,30 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) | |||
| 556 | 519 | ||
| 557 | if (deltapm > (pm_100ms - pm_thresh) && | 520 | if (deltapm > (pm_100ms - pm_thresh) && |
| 558 | deltapm < (pm_100ms + pm_thresh)) { | 521 | deltapm < (pm_100ms + pm_thresh)) { |
| 559 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | 522 | apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); |
| 560 | } else { | 523 | return 0; |
| 561 | res = (((u64)deltapm) * mult) >> 22; | 524 | } |
| 562 | do_div(res, 1000000); | 525 | |
| 563 | pr_warning("APIC calibration not consistent " | 526 | res = (((u64)deltapm) * mult) >> 22; |
| 564 | "with PM Timer: %ldms instead of 100ms\n", | 527 | do_div(res, 1000000); |
| 565 | (long)res); | 528 | pr_warning("APIC calibration not consistent " |
| 566 | /* Correct the lapic counter value */ | 529 | "with PM-Timer: %ldms instead of 100ms\n",(long)res); |
| 567 | res = (((u64)(*delta)) * pm_100ms); | 530 | |
| 531 | /* Correct the lapic counter value */ | ||
| 532 | res = (((u64)(*delta)) * pm_100ms); | ||
| 533 | do_div(res, deltapm); | ||
| 534 | pr_info("APIC delta adjusted to PM-Timer: " | ||
| 535 | "%lu (%ld)\n", (unsigned long)res, *delta); | ||
| 536 | *delta = (long)res; | ||
| 537 | |||
| 538 | /* Correct the tsc counter value */ | ||
| 539 | if (cpu_has_tsc) { | ||
| 540 | res = (((u64)(*deltatsc)) * pm_100ms); | ||
| 568 | do_div(res, deltapm); | 541 | do_div(res, deltapm); |
| 569 | pr_info("APIC delta adjusted to PM-Timer: " | 542 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " |
| 570 | "%lu (%ld)\n", (unsigned long)res, *delta); | 543 | "PM-Timer: %lu (%ld) \n", |
| 571 | *delta = (long)res; | 544 | (unsigned long)res, *deltatsc); |
| 545 | *deltatsc = (long)res; | ||
| 572 | } | 546 | } |
| 573 | 547 | ||
| 574 | return 0; | 548 | return 0; |
| @@ -579,7 +553,7 @@ static int __init calibrate_APIC_clock(void) | |||
| 579 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 553 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
| 580 | void (*real_handler)(struct clock_event_device *dev); | 554 | void (*real_handler)(struct clock_event_device *dev); |
| 581 | unsigned long deltaj; | 555 | unsigned long deltaj; |
| 582 | long delta; | 556 | long delta, deltatsc; |
| 583 | int pm_referenced = 0; | 557 | int pm_referenced = 0; |
| 584 | 558 | ||
| 585 | local_irq_disable(); | 559 | local_irq_disable(); |
| @@ -609,9 +583,11 @@ static int __init calibrate_APIC_clock(void) | |||
| 609 | delta = lapic_cal_t1 - lapic_cal_t2; | 583 | delta = lapic_cal_t1 - lapic_cal_t2; |
| 610 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); | 584 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); |
| 611 | 585 | ||
| 586 | deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); | ||
| 587 | |||
| 612 | /* we trust the PM based calibration if possible */ | 588 | /* we trust the PM based calibration if possible */ |
| 613 | pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, | 589 | pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, |
| 614 | &delta); | 590 | &delta, &deltatsc); |
| 615 | 591 | ||
| 616 | /* Calculate the scaled math multiplication factor */ | 592 | /* Calculate the scaled math multiplication factor */ |
| 617 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, | 593 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, |
| @@ -629,11 +605,10 @@ static int __init calibrate_APIC_clock(void) | |||
| 629 | calibration_result); | 605 | calibration_result); |
| 630 | 606 | ||
| 631 | if (cpu_has_tsc) { | 607 | if (cpu_has_tsc) { |
| 632 | delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); | ||
| 633 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | 608 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " |
| 634 | "%ld.%04ld MHz.\n", | 609 | "%ld.%04ld MHz.\n", |
| 635 | (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), | 610 | (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), |
| 636 | (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); | 611 | (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); |
| 637 | } | 612 | } |
| 638 | 613 | ||
| 639 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | 614 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " |
| @@ -868,6 +843,14 @@ void clear_local_APIC(void) | |||
| 868 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); | 843 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); |
| 869 | } | 844 | } |
| 870 | #endif | 845 | #endif |
| 846 | #ifdef CONFIG_X86_MCE_INTEL | ||
| 847 | if (maxlvt >= 6) { | ||
| 848 | v = apic_read(APIC_LVTCMCI); | ||
| 849 | if (!(v & APIC_LVT_MASKED)) | ||
| 850 | apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); | ||
| 851 | } | ||
| 852 | #endif | ||
| 853 | |||
| 871 | /* | 854 | /* |
| 872 | * Clean APIC state for other OSs: | 855 | * Clean APIC state for other OSs: |
| 873 | */ | 856 | */ |
| @@ -991,11 +974,11 @@ int __init verify_local_APIC(void) | |||
| 991 | */ | 974 | */ |
| 992 | reg0 = apic_read(APIC_ID); | 975 | reg0 = apic_read(APIC_ID); |
| 993 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); | 976 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); |
| 994 | apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); | 977 | apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); |
| 995 | reg1 = apic_read(APIC_ID); | 978 | reg1 = apic_read(APIC_ID); |
| 996 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); | 979 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); |
| 997 | apic_write(APIC_ID, reg0); | 980 | apic_write(APIC_ID, reg0); |
| 998 | if (reg1 != (reg0 ^ APIC_ID_MASK)) | 981 | if (reg1 != (reg0 ^ apic->apic_id_mask)) |
| 999 | return 0; | 982 | return 0; |
| 1000 | 983 | ||
| 1001 | /* | 984 | /* |
| @@ -1089,7 +1072,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
| 1089 | return; | 1072 | return; |
| 1090 | } | 1073 | } |
| 1091 | 1074 | ||
| 1092 | if (esr_disable) { | 1075 | if (apic->disable_esr) { |
| 1093 | /* | 1076 | /* |
| 1094 | * Something untraceable is creating bad interrupts on | 1077 | * Something untraceable is creating bad interrupts on |
| 1095 | * secondary quads ... for the moment, just leave the | 1078 | * secondary quads ... for the moment, just leave the |
| @@ -1130,9 +1113,14 @@ void __cpuinit setup_local_APIC(void) | |||
| 1130 | unsigned int value; | 1113 | unsigned int value; |
| 1131 | int i, j; | 1114 | int i, j; |
| 1132 | 1115 | ||
| 1116 | if (disable_apic) { | ||
| 1117 | arch_disable_smp_support(); | ||
| 1118 | return; | ||
| 1119 | } | ||
| 1120 | |||
| 1133 | #ifdef CONFIG_X86_32 | 1121 | #ifdef CONFIG_X86_32 |
| 1134 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ | 1122 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ |
| 1135 | if (lapic_is_integrated() && esr_disable) { | 1123 | if (lapic_is_integrated() && apic->disable_esr) { |
| 1136 | apic_write(APIC_ESR, 0); | 1124 | apic_write(APIC_ESR, 0); |
| 1137 | apic_write(APIC_ESR, 0); | 1125 | apic_write(APIC_ESR, 0); |
| 1138 | apic_write(APIC_ESR, 0); | 1126 | apic_write(APIC_ESR, 0); |
| @@ -1146,7 +1134,7 @@ void __cpuinit setup_local_APIC(void) | |||
| 1146 | * Double-check whether this APIC is really registered. | 1134 | * Double-check whether this APIC is really registered. |
| 1147 | * This is meaningless in clustered apic mode, so we skip it. | 1135 | * This is meaningless in clustered apic mode, so we skip it. |
| 1148 | */ | 1136 | */ |
| 1149 | if (!apic_id_registered()) | 1137 | if (!apic->apic_id_registered()) |
| 1150 | BUG(); | 1138 | BUG(); |
| 1151 | 1139 | ||
| 1152 | /* | 1140 | /* |
| @@ -1154,7 +1142,7 @@ void __cpuinit setup_local_APIC(void) | |||
| 1154 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | 1142 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel |
| 1155 | * document number 292116). So here it goes... | 1143 | * document number 292116). So here it goes... |
| 1156 | */ | 1144 | */ |
| 1157 | init_apic_ldr(); | 1145 | apic->init_apic_ldr(); |
| 1158 | 1146 | ||
| 1159 | /* | 1147 | /* |
| 1160 | * Set Task Priority to 'accept all'. We never change this | 1148 | * Set Task Priority to 'accept all'. We never change this |
| @@ -1262,6 +1250,12 @@ void __cpuinit setup_local_APIC(void) | |||
| 1262 | apic_write(APIC_LVT1, value); | 1250 | apic_write(APIC_LVT1, value); |
| 1263 | 1251 | ||
| 1264 | preempt_enable(); | 1252 | preempt_enable(); |
| 1253 | |||
| 1254 | #ifdef CONFIG_X86_MCE_INTEL | ||
| 1255 | /* Recheck CMCI information after local APIC is up on CPU #0 */ | ||
| 1256 | if (smp_processor_id() == 0) | ||
| 1257 | cmci_recheck(); | ||
| 1258 | #endif | ||
| 1265 | } | 1259 | } |
| 1266 | 1260 | ||
| 1267 | void __cpuinit end_local_APIC_setup(void) | 1261 | void __cpuinit end_local_APIC_setup(void) |
| @@ -1282,17 +1276,12 @@ void __cpuinit end_local_APIC_setup(void) | |||
| 1282 | apic_pm_activate(); | 1276 | apic_pm_activate(); |
| 1283 | } | 1277 | } |
| 1284 | 1278 | ||
| 1285 | #ifdef HAVE_X2APIC | 1279 | #ifdef CONFIG_X86_X2APIC |
| 1286 | void check_x2apic(void) | 1280 | void check_x2apic(void) |
| 1287 | { | 1281 | { |
| 1288 | int msr, msr2; | 1282 | if (x2apic_enabled()) { |
| 1289 | |||
| 1290 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | ||
| 1291 | |||
| 1292 | if (msr & X2APIC_ENABLE) { | ||
| 1293 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1283 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); |
| 1294 | x2apic_preenabled = x2apic = 1; | 1284 | x2apic_preenabled = x2apic = 1; |
| 1295 | apic_ops = &x2apic_ops; | ||
| 1296 | } | 1285 | } |
| 1297 | } | 1286 | } |
| 1298 | 1287 | ||
| @@ -1300,6 +1289,9 @@ void enable_x2apic(void) | |||
| 1300 | { | 1289 | { |
| 1301 | int msr, msr2; | 1290 | int msr, msr2; |
| 1302 | 1291 | ||
| 1292 | if (!x2apic) | ||
| 1293 | return; | ||
| 1294 | |||
| 1303 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1295 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
| 1304 | if (!(msr & X2APIC_ENABLE)) { | 1296 | if (!(msr & X2APIC_ENABLE)) { |
| 1305 | pr_info("Enabling x2apic\n"); | 1297 | pr_info("Enabling x2apic\n"); |
| @@ -1363,7 +1355,6 @@ void __init enable_IR_x2apic(void) | |||
| 1363 | 1355 | ||
| 1364 | if (!x2apic) { | 1356 | if (!x2apic) { |
| 1365 | x2apic = 1; | 1357 | x2apic = 1; |
| 1366 | apic_ops = &x2apic_ops; | ||
| 1367 | enable_x2apic(); | 1358 | enable_x2apic(); |
| 1368 | } | 1359 | } |
| 1369 | 1360 | ||
| @@ -1401,7 +1392,7 @@ end: | |||
| 1401 | 1392 | ||
| 1402 | return; | 1393 | return; |
| 1403 | } | 1394 | } |
| 1404 | #endif /* HAVE_X2APIC */ | 1395 | #endif /* CONFIG_X86_X2APIC */ |
| 1405 | 1396 | ||
| 1406 | #ifdef CONFIG_X86_64 | 1397 | #ifdef CONFIG_X86_64 |
| 1407 | /* | 1398 | /* |
| @@ -1532,7 +1523,7 @@ void __init early_init_lapic_mapping(void) | |||
| 1532 | */ | 1523 | */ |
| 1533 | void __init init_apic_mappings(void) | 1524 | void __init init_apic_mappings(void) |
| 1534 | { | 1525 | { |
| 1535 | #ifdef HAVE_X2APIC | 1526 | #ifdef CONFIG_X86_X2APIC |
| 1536 | if (x2apic) { | 1527 | if (x2apic) { |
| 1537 | boot_cpu_physical_apicid = read_apic_id(); | 1528 | boot_cpu_physical_apicid = read_apic_id(); |
| 1538 | return; | 1529 | return; |
| @@ -1570,11 +1561,11 @@ int apic_version[MAX_APICS]; | |||
| 1570 | 1561 | ||
| 1571 | int __init APIC_init_uniprocessor(void) | 1562 | int __init APIC_init_uniprocessor(void) |
| 1572 | { | 1563 | { |
| 1573 | #ifdef CONFIG_X86_64 | ||
| 1574 | if (disable_apic) { | 1564 | if (disable_apic) { |
| 1575 | pr_info("Apic disabled\n"); | 1565 | pr_info("Apic disabled\n"); |
| 1576 | return -1; | 1566 | return -1; |
| 1577 | } | 1567 | } |
| 1568 | #ifdef CONFIG_X86_64 | ||
| 1578 | if (!cpu_has_apic) { | 1569 | if (!cpu_has_apic) { |
| 1579 | disable_apic = 1; | 1570 | disable_apic = 1; |
| 1580 | pr_info("Apic disabled by BIOS\n"); | 1571 | pr_info("Apic disabled by BIOS\n"); |
| @@ -1596,11 +1587,9 @@ int __init APIC_init_uniprocessor(void) | |||
| 1596 | } | 1587 | } |
| 1597 | #endif | 1588 | #endif |
| 1598 | 1589 | ||
| 1599 | #ifdef HAVE_X2APIC | ||
| 1600 | enable_IR_x2apic(); | 1590 | enable_IR_x2apic(); |
| 1601 | #endif | ||
| 1602 | #ifdef CONFIG_X86_64 | 1591 | #ifdef CONFIG_X86_64 |
| 1603 | setup_apic_routing(); | 1592 | default_setup_apic_routing(); |
| 1604 | #endif | 1593 | #endif |
| 1605 | 1594 | ||
| 1606 | verify_local_APIC(); | 1595 | verify_local_APIC(); |
| @@ -1621,35 +1610,31 @@ int __init APIC_init_uniprocessor(void) | |||
| 1621 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | 1610 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); |
| 1622 | setup_local_APIC(); | 1611 | setup_local_APIC(); |
| 1623 | 1612 | ||
| 1624 | #ifdef CONFIG_X86_64 | 1613 | #ifdef CONFIG_X86_IO_APIC |
| 1625 | /* | 1614 | /* |
| 1626 | * Now enable IO-APICs, actually call clear_IO_APIC | 1615 | * Now enable IO-APICs, actually call clear_IO_APIC |
| 1627 | * We need clear_IO_APIC before enabling vector on BP | 1616 | * We need clear_IO_APIC before enabling error vector |
| 1628 | */ | 1617 | */ |
| 1629 | if (!skip_ioapic_setup && nr_ioapics) | 1618 | if (!skip_ioapic_setup && nr_ioapics) |
| 1630 | enable_IO_APIC(); | 1619 | enable_IO_APIC(); |
| 1631 | #endif | 1620 | #endif |
| 1632 | 1621 | ||
| 1633 | #ifdef CONFIG_X86_IO_APIC | ||
| 1634 | if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) | ||
| 1635 | #endif | ||
| 1636 | localise_nmi_watchdog(); | ||
| 1637 | end_local_APIC_setup(); | 1622 | end_local_APIC_setup(); |
| 1638 | 1623 | ||
| 1639 | #ifdef CONFIG_X86_IO_APIC | 1624 | #ifdef CONFIG_X86_IO_APIC |
| 1640 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | 1625 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) |
| 1641 | setup_IO_APIC(); | 1626 | setup_IO_APIC(); |
| 1642 | # ifdef CONFIG_X86_64 | 1627 | else { |
| 1643 | else | ||
| 1644 | nr_ioapics = 0; | 1628 | nr_ioapics = 0; |
| 1645 | # endif | 1629 | localise_nmi_watchdog(); |
| 1630 | } | ||
| 1631 | #else | ||
| 1632 | localise_nmi_watchdog(); | ||
| 1646 | #endif | 1633 | #endif |
| 1647 | 1634 | ||
| 1635 | setup_boot_clock(); | ||
| 1648 | #ifdef CONFIG_X86_64 | 1636 | #ifdef CONFIG_X86_64 |
| 1649 | setup_boot_APIC_clock(); | ||
| 1650 | check_nmi_watchdog(); | 1637 | check_nmi_watchdog(); |
| 1651 | #else | ||
| 1652 | setup_boot_clock(); | ||
| 1653 | #endif | 1638 | #endif |
| 1654 | 1639 | ||
| 1655 | return 0; | 1640 | return 0; |
| @@ -1738,7 +1723,8 @@ void __init connect_bsp_APIC(void) | |||
| 1738 | outb(0x01, 0x23); | 1723 | outb(0x01, 0x23); |
| 1739 | } | 1724 | } |
| 1740 | #endif | 1725 | #endif |
| 1741 | enable_apic_mode(); | 1726 | if (apic->enable_apic_mode) |
| 1727 | apic->enable_apic_mode(); | ||
| 1742 | } | 1728 | } |
| 1743 | 1729 | ||
| 1744 | /** | 1730 | /** |
| @@ -1876,29 +1862,39 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1876 | } | 1862 | } |
| 1877 | #endif | 1863 | #endif |
| 1878 | 1864 | ||
| 1879 | #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) | 1865 | #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) |
| 1880 | /* are we being called early in kernel startup? */ | 1866 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; |
| 1881 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { | 1867 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
| 1882 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
| 1883 | u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | ||
| 1884 | |||
| 1885 | cpu_to_apicid[cpu] = apicid; | ||
| 1886 | bios_cpu_apicid[cpu] = apicid; | ||
| 1887 | } else { | ||
| 1888 | per_cpu(x86_cpu_to_apicid, cpu) = apicid; | ||
| 1889 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | ||
| 1890 | } | ||
| 1891 | #endif | 1868 | #endif |
| 1892 | 1869 | ||
| 1893 | set_cpu_possible(cpu, true); | 1870 | set_cpu_possible(cpu, true); |
| 1894 | set_cpu_present(cpu, true); | 1871 | set_cpu_present(cpu, true); |
| 1895 | } | 1872 | } |
| 1896 | 1873 | ||
| 1897 | #ifdef CONFIG_X86_64 | ||
| 1898 | int hard_smp_processor_id(void) | 1874 | int hard_smp_processor_id(void) |
| 1899 | { | 1875 | { |
| 1900 | return read_apic_id(); | 1876 | return read_apic_id(); |
| 1901 | } | 1877 | } |
| 1878 | |||
| 1879 | void default_init_apic_ldr(void) | ||
| 1880 | { | ||
| 1881 | unsigned long val; | ||
| 1882 | |||
| 1883 | apic_write(APIC_DFR, APIC_DFR_VALUE); | ||
| 1884 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
| 1885 | val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); | ||
| 1886 | apic_write(APIC_LDR, val); | ||
| 1887 | } | ||
| 1888 | |||
| 1889 | #ifdef CONFIG_X86_32 | ||
| 1890 | int default_apicid_to_node(int logical_apicid) | ||
| 1891 | { | ||
| 1892 | #ifdef CONFIG_SMP | ||
| 1893 | return apicid_2_node[hard_smp_processor_id()]; | ||
| 1894 | #else | ||
| 1895 | return 0; | ||
| 1896 | #endif | ||
| 1897 | } | ||
| 1902 | #endif | 1898 | #endif |
| 1903 | 1899 | ||
| 1904 | /* | 1900 | /* |
| @@ -1976,7 +1972,7 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1976 | 1972 | ||
| 1977 | local_irq_save(flags); | 1973 | local_irq_save(flags); |
| 1978 | 1974 | ||
| 1979 | #ifdef HAVE_X2APIC | 1975 | #ifdef CONFIG_X86_X2APIC |
| 1980 | if (x2apic) | 1976 | if (x2apic) |
| 1981 | enable_x2apic(); | 1977 | enable_x2apic(); |
| 1982 | else | 1978 | else |
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 34185488e4fb..f933822dba18 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
| @@ -17,9 +17,8 @@ | |||
| 17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
| 18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
| 19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
| 20 | #include <asm/apic.h> | ||
| 20 | #include <asm/ipi.h> | 21 | #include <asm/ipi.h> |
| 21 | #include <asm/genapic.h> | ||
| 22 | #include <mach_apicdef.h> | ||
| 23 | 22 | ||
| 24 | #ifdef CONFIG_ACPI | 23 | #ifdef CONFIG_ACPI |
| 25 | #include <acpi/acpi_bus.h> | 24 | #include <acpi/acpi_bus.h> |
| @@ -74,7 +73,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) | |||
| 74 | unsigned long flags; | 73 | unsigned long flags; |
| 75 | 74 | ||
| 76 | local_irq_save(flags); | 75 | local_irq_save(flags); |
| 77 | __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); | 76 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); |
| 78 | local_irq_restore(flags); | 77 | local_irq_restore(flags); |
| 79 | } | 78 | } |
| 80 | 79 | ||
| @@ -85,14 +84,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) | |||
| 85 | _flat_send_IPI_mask(mask, vector); | 84 | _flat_send_IPI_mask(mask, vector); |
| 86 | } | 85 | } |
| 87 | 86 | ||
| 88 | static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, | 87 | static void |
| 89 | int vector) | 88 | flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) |
| 90 | { | 89 | { |
| 91 | unsigned long mask = cpumask_bits(cpumask)[0]; | 90 | unsigned long mask = cpumask_bits(cpumask)[0]; |
| 92 | int cpu = smp_processor_id(); | 91 | int cpu = smp_processor_id(); |
| 93 | 92 | ||
| 94 | if (cpu < BITS_PER_LONG) | 93 | if (cpu < BITS_PER_LONG) |
| 95 | clear_bit(cpu, &mask); | 94 | clear_bit(cpu, &mask); |
| 95 | |||
| 96 | _flat_send_IPI_mask(mask, vector); | 96 | _flat_send_IPI_mask(mask, vector); |
| 97 | } | 97 | } |
| 98 | 98 | ||
| @@ -114,23 +114,27 @@ static void flat_send_IPI_allbutself(int vector) | |||
| 114 | _flat_send_IPI_mask(mask, vector); | 114 | _flat_send_IPI_mask(mask, vector); |
| 115 | } | 115 | } |
| 116 | } else if (num_online_cpus() > 1) { | 116 | } else if (num_online_cpus() > 1) { |
| 117 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); | 117 | __default_send_IPI_shortcut(APIC_DEST_ALLBUT, |
| 118 | vector, apic->dest_logical); | ||
| 118 | } | 119 | } |
| 119 | } | 120 | } |
| 120 | 121 | ||
| 121 | static void flat_send_IPI_all(int vector) | 122 | static void flat_send_IPI_all(int vector) |
| 122 | { | 123 | { |
| 123 | if (vector == NMI_VECTOR) | 124 | if (vector == NMI_VECTOR) { |
| 124 | flat_send_IPI_mask(cpu_online_mask, vector); | 125 | flat_send_IPI_mask(cpu_online_mask, vector); |
| 125 | else | 126 | } else { |
| 126 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | 127 | __default_send_IPI_shortcut(APIC_DEST_ALLINC, |
| 128 | vector, apic->dest_logical); | ||
| 129 | } | ||
| 127 | } | 130 | } |
| 128 | 131 | ||
| 129 | static unsigned int get_apic_id(unsigned long x) | 132 | static unsigned int flat_get_apic_id(unsigned long x) |
| 130 | { | 133 | { |
| 131 | unsigned int id; | 134 | unsigned int id; |
| 132 | 135 | ||
| 133 | id = (((x)>>24) & 0xFFu); | 136 | id = (((x)>>24) & 0xFFu); |
| 137 | |||
| 134 | return id; | 138 | return id; |
| 135 | } | 139 | } |
| 136 | 140 | ||
| @@ -146,7 +150,7 @@ static unsigned int read_xapic_id(void) | |||
| 146 | { | 150 | { |
| 147 | unsigned int id; | 151 | unsigned int id; |
| 148 | 152 | ||
| 149 | id = get_apic_id(apic_read(APIC_ID)); | 153 | id = flat_get_apic_id(apic_read(APIC_ID)); |
| 150 | return id; | 154 | return id; |
| 151 | } | 155 | } |
| 152 | 156 | ||
| @@ -169,31 +173,67 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
| 169 | return mask1 & mask2; | 173 | return mask1 & mask2; |
| 170 | } | 174 | } |
| 171 | 175 | ||
| 172 | static unsigned int phys_pkg_id(int index_msb) | 176 | static int flat_phys_pkg_id(int initial_apic_id, int index_msb) |
| 173 | { | 177 | { |
| 174 | return hard_smp_processor_id() >> index_msb; | 178 | return hard_smp_processor_id() >> index_msb; |
| 175 | } | 179 | } |
| 176 | 180 | ||
| 177 | struct genapic apic_flat = { | 181 | struct apic apic_flat = { |
| 178 | .name = "flat", | 182 | .name = "flat", |
| 179 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | 183 | .probe = NULL, |
| 180 | .int_delivery_mode = dest_LowestPrio, | 184 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, |
| 181 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), | 185 | .apic_id_registered = flat_apic_id_registered, |
| 182 | .target_cpus = flat_target_cpus, | 186 | |
| 183 | .vector_allocation_domain = flat_vector_allocation_domain, | 187 | .irq_delivery_mode = dest_LowestPrio, |
| 184 | .apic_id_registered = flat_apic_id_registered, | 188 | .irq_dest_mode = 1, /* logical */ |
| 185 | .init_apic_ldr = flat_init_apic_ldr, | 189 | |
| 186 | .send_IPI_all = flat_send_IPI_all, | 190 | .target_cpus = flat_target_cpus, |
| 187 | .send_IPI_allbutself = flat_send_IPI_allbutself, | 191 | .disable_esr = 0, |
| 188 | .send_IPI_mask = flat_send_IPI_mask, | 192 | .dest_logical = APIC_DEST_LOGICAL, |
| 189 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, | 193 | .check_apicid_used = NULL, |
| 190 | .send_IPI_self = apic_send_IPI_self, | 194 | .check_apicid_present = NULL, |
| 191 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, | 195 | |
| 192 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, | 196 | .vector_allocation_domain = flat_vector_allocation_domain, |
| 193 | .phys_pkg_id = phys_pkg_id, | 197 | .init_apic_ldr = flat_init_apic_ldr, |
| 194 | .get_apic_id = get_apic_id, | 198 | |
| 195 | .set_apic_id = set_apic_id, | 199 | .ioapic_phys_id_map = NULL, |
| 196 | .apic_id_mask = (0xFFu<<24), | 200 | .setup_apic_routing = NULL, |
| 201 | .multi_timer_check = NULL, | ||
| 202 | .apicid_to_node = NULL, | ||
| 203 | .cpu_to_logical_apicid = NULL, | ||
| 204 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
| 205 | .apicid_to_cpu_present = NULL, | ||
| 206 | .setup_portio_remap = NULL, | ||
| 207 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
| 208 | .enable_apic_mode = NULL, | ||
| 209 | .phys_pkg_id = flat_phys_pkg_id, | ||
| 210 | .mps_oem_check = NULL, | ||
| 211 | |||
| 212 | .get_apic_id = flat_get_apic_id, | ||
| 213 | .set_apic_id = set_apic_id, | ||
| 214 | .apic_id_mask = 0xFFu << 24, | ||
| 215 | |||
| 216 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, | ||
| 217 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, | ||
| 218 | |||
| 219 | .send_IPI_mask = flat_send_IPI_mask, | ||
| 220 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, | ||
| 221 | .send_IPI_allbutself = flat_send_IPI_allbutself, | ||
| 222 | .send_IPI_all = flat_send_IPI_all, | ||
| 223 | .send_IPI_self = apic_send_IPI_self, | ||
| 224 | |||
| 225 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 226 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 227 | .wait_for_init_deassert = NULL, | ||
| 228 | .smp_callin_clear_local_apic = NULL, | ||
| 229 | .inquire_remote_apic = NULL, | ||
| 230 | |||
| 231 | .read = native_apic_mem_read, | ||
| 232 | .write = native_apic_mem_write, | ||
| 233 | .icr_read = native_apic_icr_read, | ||
| 234 | .icr_write = native_apic_icr_write, | ||
| 235 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 236 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 197 | }; | 237 | }; |
| 198 | 238 | ||
| 199 | /* | 239 | /* |
| @@ -232,18 +272,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) | |||
| 232 | 272 | ||
| 233 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) | 273 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) |
| 234 | { | 274 | { |
| 235 | send_IPI_mask_sequence(cpumask, vector); | 275 | default_send_IPI_mask_sequence_phys(cpumask, vector); |
| 236 | } | 276 | } |
| 237 | 277 | ||
| 238 | static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, | 278 | static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, |
| 239 | int vector) | 279 | int vector) |
| 240 | { | 280 | { |
| 241 | send_IPI_mask_allbutself(cpumask, vector); | 281 | default_send_IPI_mask_allbutself_phys(cpumask, vector); |
| 242 | } | 282 | } |
| 243 | 283 | ||
| 244 | static void physflat_send_IPI_allbutself(int vector) | 284 | static void physflat_send_IPI_allbutself(int vector) |
| 245 | { | 285 | { |
| 246 | send_IPI_mask_allbutself(cpu_online_mask, vector); | 286 | default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); |
| 247 | } | 287 | } |
| 248 | 288 | ||
| 249 | static void physflat_send_IPI_all(int vector) | 289 | static void physflat_send_IPI_all(int vector) |
| @@ -276,32 +316,72 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
| 276 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 316 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
| 277 | * May as well be the first. | 317 | * May as well be the first. |
| 278 | */ | 318 | */ |
| 279 | for_each_cpu_and(cpu, cpumask, andmask) | 319 | for_each_cpu_and(cpu, cpumask, andmask) { |
| 280 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 320 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
| 281 | break; | 321 | break; |
| 322 | } | ||
| 282 | if (cpu < nr_cpu_ids) | 323 | if (cpu < nr_cpu_ids) |
| 283 | return per_cpu(x86_cpu_to_apicid, cpu); | 324 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 325 | |||
| 284 | return BAD_APICID; | 326 | return BAD_APICID; |
| 285 | } | 327 | } |
| 286 | 328 | ||
| 287 | struct genapic apic_physflat = { | 329 | struct apic apic_physflat = { |
| 288 | .name = "physical flat", | 330 | |
| 289 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, | 331 | .name = "physical flat", |
| 290 | .int_delivery_mode = dest_Fixed, | 332 | .probe = NULL, |
| 291 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 333 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, |
| 292 | .target_cpus = physflat_target_cpus, | 334 | .apic_id_registered = flat_apic_id_registered, |
| 293 | .vector_allocation_domain = physflat_vector_allocation_domain, | 335 | |
| 294 | .apic_id_registered = flat_apic_id_registered, | 336 | .irq_delivery_mode = dest_Fixed, |
| 295 | .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ | 337 | .irq_dest_mode = 0, /* physical */ |
| 296 | .send_IPI_all = physflat_send_IPI_all, | 338 | |
| 297 | .send_IPI_allbutself = physflat_send_IPI_allbutself, | 339 | .target_cpus = physflat_target_cpus, |
| 298 | .send_IPI_mask = physflat_send_IPI_mask, | 340 | .disable_esr = 0, |
| 299 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, | 341 | .dest_logical = 0, |
| 300 | .send_IPI_self = apic_send_IPI_self, | 342 | .check_apicid_used = NULL, |
| 301 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | 343 | .check_apicid_present = NULL, |
| 302 | .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, | 344 | |
| 303 | .phys_pkg_id = phys_pkg_id, | 345 | .vector_allocation_domain = physflat_vector_allocation_domain, |
| 304 | .get_apic_id = get_apic_id, | 346 | /* not needed, but shouldn't hurt: */ |
| 305 | .set_apic_id = set_apic_id, | 347 | .init_apic_ldr = flat_init_apic_ldr, |
| 306 | .apic_id_mask = (0xFFu<<24), | 348 | |
| 349 | .ioapic_phys_id_map = NULL, | ||
| 350 | .setup_apic_routing = NULL, | ||
| 351 | .multi_timer_check = NULL, | ||
| 352 | .apicid_to_node = NULL, | ||
| 353 | .cpu_to_logical_apicid = NULL, | ||
| 354 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
| 355 | .apicid_to_cpu_present = NULL, | ||
| 356 | .setup_portio_remap = NULL, | ||
| 357 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
| 358 | .enable_apic_mode = NULL, | ||
| 359 | .phys_pkg_id = flat_phys_pkg_id, | ||
| 360 | .mps_oem_check = NULL, | ||
| 361 | |||
| 362 | .get_apic_id = flat_get_apic_id, | ||
| 363 | .set_apic_id = set_apic_id, | ||
| 364 | .apic_id_mask = 0xFFu << 24, | ||
| 365 | |||
| 366 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | ||
| 367 | .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, | ||
| 368 | |||
| 369 | .send_IPI_mask = physflat_send_IPI_mask, | ||
| 370 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, | ||
| 371 | .send_IPI_allbutself = physflat_send_IPI_allbutself, | ||
| 372 | .send_IPI_all = physflat_send_IPI_all, | ||
| 373 | .send_IPI_self = apic_send_IPI_self, | ||
| 374 | |||
| 375 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 376 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 377 | .wait_for_init_deassert = NULL, | ||
| 378 | .smp_callin_clear_local_apic = NULL, | ||
| 379 | .inquire_remote_apic = NULL, | ||
| 380 | |||
| 381 | .read = native_apic_mem_read, | ||
| 382 | .write = native_apic_mem_write, | ||
| 383 | .icr_read = native_apic_icr_read, | ||
| 384 | .icr_write = native_apic_icr_write, | ||
| 385 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 386 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 307 | }; | 387 | }; |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c new file mode 100644 index 000000000000..676cdac385c0 --- /dev/null +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
| @@ -0,0 +1,267 @@ | |||
| 1 | /* | ||
| 2 | * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs. | ||
| 3 | * | ||
| 4 | * Drives the local APIC in "clustered mode". | ||
| 5 | */ | ||
| 6 | #include <linux/threads.h> | ||
| 7 | #include <linux/cpumask.h> | ||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/init.h> | ||
| 10 | #include <linux/dmi.h> | ||
| 11 | #include <linux/smp.h> | ||
| 12 | |||
| 13 | #include <asm/apicdef.h> | ||
| 14 | #include <asm/fixmap.h> | ||
| 15 | #include <asm/mpspec.h> | ||
| 16 | #include <asm/apic.h> | ||
| 17 | #include <asm/ipi.h> | ||
| 18 | |||
| 19 | static unsigned bigsmp_get_apic_id(unsigned long x) | ||
| 20 | { | ||
| 21 | return (x >> 24) & 0xFF; | ||
| 22 | } | ||
| 23 | |||
| 24 | static int bigsmp_apic_id_registered(void) | ||
| 25 | { | ||
| 26 | return 1; | ||
| 27 | } | ||
| 28 | |||
| 29 | static const struct cpumask *bigsmp_target_cpus(void) | ||
| 30 | { | ||
| 31 | #ifdef CONFIG_SMP | ||
| 32 | return cpu_online_mask; | ||
| 33 | #else | ||
| 34 | return cpumask_of(0); | ||
| 35 | #endif | ||
| 36 | } | ||
| 37 | |||
| 38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
| 39 | { | ||
| 40 | return 0; | ||
| 41 | } | ||
| 42 | |||
| 43 | static unsigned long bigsmp_check_apicid_present(int bit) | ||
| 44 | { | ||
| 45 | return 1; | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline unsigned long calculate_ldr(int cpu) | ||
| 49 | { | ||
| 50 | unsigned long val, id; | ||
| 51 | |||
| 52 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
| 53 | id = per_cpu(x86_bios_cpu_apicid, cpu); | ||
| 54 | val |= SET_APIC_LOGICAL_ID(id); | ||
| 55 | |||
| 56 | return val; | ||
| 57 | } | ||
| 58 | |||
| 59 | /* | ||
| 60 | * Set up the logical destination ID. | ||
| 61 | * | ||
| 62 | * Intel recommends to set DFR, LDR and TPR before enabling | ||
| 63 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
| 64 | * document number 292116). So here it goes... | ||
| 65 | */ | ||
| 66 | static void bigsmp_init_apic_ldr(void) | ||
| 67 | { | ||
| 68 | unsigned long val; | ||
| 69 | int cpu = smp_processor_id(); | ||
| 70 | |||
| 71 | apic_write(APIC_DFR, APIC_DFR_FLAT); | ||
| 72 | val = calculate_ldr(cpu); | ||
| 73 | apic_write(APIC_LDR, val); | ||
| 74 | } | ||
| 75 | |||
| 76 | static void bigsmp_setup_apic_routing(void) | ||
| 77 | { | ||
| 78 | printk(KERN_INFO | ||
| 79 | "Enabling APIC mode: Physflat. Using %d I/O APICs\n", | ||
| 80 | nr_ioapics); | ||
| 81 | } | ||
| 82 | |||
| 83 | static int bigsmp_apicid_to_node(int logical_apicid) | ||
| 84 | { | ||
| 85 | return apicid_2_node[hard_smp_processor_id()]; | ||
| 86 | } | ||
| 87 | |||
| 88 | static int bigsmp_cpu_present_to_apicid(int mps_cpu) | ||
| 89 | { | ||
| 90 | if (mps_cpu < nr_cpu_ids) | ||
| 91 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
| 92 | |||
| 93 | return BAD_APICID; | ||
| 94 | } | ||
| 95 | |||
| 96 | static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) | ||
| 97 | { | ||
| 98 | return physid_mask_of_physid(phys_apicid); | ||
| 99 | } | ||
| 100 | |||
| 101 | /* Mapping from cpu number to logical apicid */ | ||
| 102 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) | ||
| 103 | { | ||
| 104 | if (cpu >= nr_cpu_ids) | ||
| 105 | return BAD_APICID; | ||
| 106 | return cpu_physical_id(cpu); | ||
| 107 | } | ||
| 108 | |||
| 109 | static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) | ||
| 110 | { | ||
| 111 | /* For clustered we don't have a good way to do this yet - hack */ | ||
| 112 | return physids_promote(0xFFL); | ||
| 113 | } | ||
| 114 | |||
| 115 | static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
| 116 | { | ||
| 117 | return 1; | ||
| 118 | } | ||
| 119 | |||
| 120 | /* As we are using single CPU as destination, pick only one CPU here */ | ||
| 121 | static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
| 122 | { | ||
| 123 | return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask)); | ||
| 124 | } | ||
| 125 | |||
| 126 | static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
| 127 | const struct cpumask *andmask) | ||
| 128 | { | ||
| 129 | int cpu; | ||
| 130 | |||
| 131 | /* | ||
| 132 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
| 133 | * May as well be the first. | ||
| 134 | */ | ||
| 135 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
| 136 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | if (cpu < nr_cpu_ids) | ||
| 140 | return bigsmp_cpu_to_logical_apicid(cpu); | ||
| 141 | |||
| 142 | return BAD_APICID; | ||
| 143 | } | ||
| 144 | |||
| 145 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) | ||
| 146 | { | ||
| 147 | return cpuid_apic >> index_msb; | ||
| 148 | } | ||
| 149 | |||
| 150 | static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) | ||
| 151 | { | ||
| 152 | default_send_IPI_mask_sequence_phys(mask, vector); | ||
| 153 | } | ||
| 154 | |||
| 155 | static void bigsmp_send_IPI_allbutself(int vector) | ||
| 156 | { | ||
| 157 | default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); | ||
| 158 | } | ||
| 159 | |||
| 160 | static void bigsmp_send_IPI_all(int vector) | ||
| 161 | { | ||
| 162 | bigsmp_send_IPI_mask(cpu_online_mask, vector); | ||
| 163 | } | ||
| 164 | |||
| 165 | static int dmi_bigsmp; /* can be set by dmi scanners */ | ||
| 166 | |||
| 167 | static int hp_ht_bigsmp(const struct dmi_system_id *d) | ||
| 168 | { | ||
| 169 | printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); | ||
| 170 | dmi_bigsmp = 1; | ||
| 171 | |||
| 172 | return 0; | ||
| 173 | } | ||
| 174 | |||
| 175 | |||
| 176 | static const struct dmi_system_id bigsmp_dmi_table[] = { | ||
| 177 | { hp_ht_bigsmp, "HP ProLiant DL760 G2", | ||
| 178 | { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), | ||
| 179 | DMI_MATCH(DMI_BIOS_VERSION, "P44-"), | ||
| 180 | } | ||
| 181 | }, | ||
| 182 | |||
| 183 | { hp_ht_bigsmp, "HP ProLiant DL740", | ||
| 184 | { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), | ||
| 185 | DMI_MATCH(DMI_BIOS_VERSION, "P47-"), | ||
| 186 | } | ||
| 187 | }, | ||
| 188 | { } /* NULL entry stops DMI scanning */ | ||
| 189 | }; | ||
| 190 | |||
| 191 | static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
| 192 | { | ||
| 193 | cpumask_clear(retmask); | ||
| 194 | cpumask_set_cpu(cpu, retmask); | ||
| 195 | } | ||
| 196 | |||
| 197 | static int probe_bigsmp(void) | ||
| 198 | { | ||
| 199 | if (def_to_bigsmp) | ||
| 200 | dmi_bigsmp = 1; | ||
| 201 | else | ||
| 202 | dmi_check_system(bigsmp_dmi_table); | ||
| 203 | |||
| 204 | return dmi_bigsmp; | ||
| 205 | } | ||
| 206 | |||
| 207 | struct apic apic_bigsmp = { | ||
| 208 | |||
| 209 | .name = "bigsmp", | ||
| 210 | .probe = probe_bigsmp, | ||
| 211 | .acpi_madt_oem_check = NULL, | ||
| 212 | .apic_id_registered = bigsmp_apic_id_registered, | ||
| 213 | |||
| 214 | .irq_delivery_mode = dest_Fixed, | ||
| 215 | /* phys delivery to target CPU: */ | ||
| 216 | .irq_dest_mode = 0, | ||
| 217 | |||
| 218 | .target_cpus = bigsmp_target_cpus, | ||
| 219 | .disable_esr = 1, | ||
| 220 | .dest_logical = 0, | ||
| 221 | .check_apicid_used = bigsmp_check_apicid_used, | ||
| 222 | .check_apicid_present = bigsmp_check_apicid_present, | ||
| 223 | |||
| 224 | .vector_allocation_domain = bigsmp_vector_allocation_domain, | ||
| 225 | .init_apic_ldr = bigsmp_init_apic_ldr, | ||
| 226 | |||
| 227 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, | ||
| 228 | .setup_apic_routing = bigsmp_setup_apic_routing, | ||
| 229 | .multi_timer_check = NULL, | ||
| 230 | .apicid_to_node = bigsmp_apicid_to_node, | ||
| 231 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, | ||
| 232 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, | ||
| 233 | .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, | ||
| 234 | .setup_portio_remap = NULL, | ||
| 235 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, | ||
| 236 | .enable_apic_mode = NULL, | ||
| 237 | .phys_pkg_id = bigsmp_phys_pkg_id, | ||
| 238 | .mps_oem_check = NULL, | ||
| 239 | |||
| 240 | .get_apic_id = bigsmp_get_apic_id, | ||
| 241 | .set_apic_id = NULL, | ||
| 242 | .apic_id_mask = 0xFF << 24, | ||
| 243 | |||
| 244 | .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, | ||
| 245 | .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, | ||
| 246 | |||
| 247 | .send_IPI_mask = bigsmp_send_IPI_mask, | ||
| 248 | .send_IPI_mask_allbutself = NULL, | ||
| 249 | .send_IPI_allbutself = bigsmp_send_IPI_allbutself, | ||
| 250 | .send_IPI_all = bigsmp_send_IPI_all, | ||
| 251 | .send_IPI_self = default_send_IPI_self, | ||
| 252 | |||
| 253 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 254 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 255 | |||
| 256 | .wait_for_init_deassert = default_wait_for_init_deassert, | ||
| 257 | |||
| 258 | .smp_callin_clear_local_apic = NULL, | ||
| 259 | .inquire_remote_apic = default_inquire_remote_apic, | ||
| 260 | |||
| 261 | .read = native_apic_mem_read, | ||
| 262 | .write = native_apic_mem_write, | ||
| 263 | .icr_read = native_apic_icr_read, | ||
| 264 | .icr_write = native_apic_icr_write, | ||
| 265 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 266 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 267 | }; | ||
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c new file mode 100644 index 000000000000..1c11b819f245 --- /dev/null +++ b/arch/x86/kernel/apic/es7000_32.c | |||
| @@ -0,0 +1,781 @@ | |||
| 1 | /* | ||
| 2 | * Written by: Garry Forsgren, Unisys Corporation | ||
| 3 | * Natalie Protasevich, Unisys Corporation | ||
| 4 | * | ||
| 5 | * This file contains the code to configure and interface | ||
| 6 | * with Unisys ES7000 series hardware system manager. | ||
| 7 | * | ||
| 8 | * Copyright (c) 2003 Unisys Corporation. | ||
| 9 | * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar | ||
| 10 | * | ||
| 11 | * All Rights Reserved. | ||
| 12 | * | ||
| 13 | * This program is free software; you can redistribute it and/or modify it | ||
| 14 | * under the terms of version 2 of the GNU General Public License as | ||
| 15 | * published by the Free Software Foundation. | ||
| 16 | * | ||
| 17 | * This program is distributed in the hope that it would be useful, but | ||
| 18 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License along | ||
| 22 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
| 23 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
| 24 | * | ||
| 25 | * Contact information: Unisys Corporation, Township Line & Union Meeting | ||
| 26 | * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: | ||
| 27 | * | ||
| 28 | * http://www.unisys.com | ||
| 29 | */ | ||
| 30 | #include <linux/notifier.h> | ||
| 31 | #include <linux/spinlock.h> | ||
| 32 | #include <linux/cpumask.h> | ||
| 33 | #include <linux/threads.h> | ||
| 34 | #include <linux/kernel.h> | ||
| 35 | #include <linux/module.h> | ||
| 36 | #include <linux/reboot.h> | ||
| 37 | #include <linux/string.h> | ||
| 38 | #include <linux/types.h> | ||
| 39 | #include <linux/errno.h> | ||
| 40 | #include <linux/acpi.h> | ||
| 41 | #include <linux/init.h> | ||
| 42 | #include <linux/nmi.h> | ||
| 43 | #include <linux/smp.h> | ||
| 44 | #include <linux/io.h> | ||
| 45 | |||
| 46 | #include <asm/apicdef.h> | ||
| 47 | #include <asm/atomic.h> | ||
| 48 | #include <asm/fixmap.h> | ||
| 49 | #include <asm/mpspec.h> | ||
| 50 | #include <asm/setup.h> | ||
| 51 | #include <asm/apic.h> | ||
| 52 | #include <asm/ipi.h> | ||
| 53 | |||
| 54 | /* | ||
| 55 | * ES7000 chipsets | ||
| 56 | */ | ||
| 57 | |||
| 58 | #define NON_UNISYS 0 | ||
| 59 | #define ES7000_CLASSIC 1 | ||
| 60 | #define ES7000_ZORRO 2 | ||
| 61 | |||
| 62 | #define MIP_REG 1 | ||
| 63 | #define MIP_PSAI_REG 4 | ||
| 64 | |||
| 65 | #define MIP_BUSY 1 | ||
| 66 | #define MIP_SPIN 0xf0000 | ||
| 67 | #define MIP_VALID 0x0100000000000000ULL | ||
| 68 | #define MIP_SW_APIC 0x1020b | ||
| 69 | |||
| 70 | #define MIP_PORT(val) ((val >> 32) & 0xffff) | ||
| 71 | |||
| 72 | #define MIP_RD_LO(val) (val & 0xffffffff) | ||
| 73 | |||
| 74 | struct mip_reg { | ||
| 75 | unsigned long long off_0x00; | ||
| 76 | unsigned long long off_0x08; | ||
| 77 | unsigned long long off_0x10; | ||
| 78 | unsigned long long off_0x18; | ||
| 79 | unsigned long long off_0x20; | ||
| 80 | unsigned long long off_0x28; | ||
| 81 | unsigned long long off_0x30; | ||
| 82 | unsigned long long off_0x38; | ||
| 83 | }; | ||
| 84 | |||
| 85 | struct mip_reg_info { | ||
| 86 | unsigned long long mip_info; | ||
| 87 | unsigned long long delivery_info; | ||
| 88 | unsigned long long host_reg; | ||
| 89 | unsigned long long mip_reg; | ||
| 90 | }; | ||
| 91 | |||
| 92 | struct psai { | ||
| 93 | unsigned long long entry_type; | ||
| 94 | unsigned long long addr; | ||
| 95 | unsigned long long bep_addr; | ||
| 96 | }; | ||
| 97 | |||
| 98 | #ifdef CONFIG_ACPI | ||
| 99 | |||
| 100 | struct es7000_oem_table { | ||
| 101 | struct acpi_table_header Header; | ||
| 102 | u32 OEMTableAddr; | ||
| 103 | u32 OEMTableSize; | ||
| 104 | }; | ||
| 105 | |||
| 106 | static unsigned long oem_addrX; | ||
| 107 | static unsigned long oem_size; | ||
| 108 | |||
| 109 | #endif | ||
| 110 | |||
| 111 | /* | ||
| 112 | * ES7000 Globals | ||
| 113 | */ | ||
| 114 | |||
| 115 | static volatile unsigned long *psai; | ||
| 116 | static struct mip_reg *mip_reg; | ||
| 117 | static struct mip_reg *host_reg; | ||
| 118 | static int mip_port; | ||
| 119 | static unsigned long mip_addr; | ||
| 120 | static unsigned long host_addr; | ||
| 121 | |||
| 122 | int es7000_plat; | ||
| 123 | |||
| 124 | /* | ||
| 125 | * GSI override for ES7000 platforms. | ||
| 126 | */ | ||
| 127 | |||
| 128 | static unsigned int base; | ||
| 129 | |||
| 130 | static int | ||
| 131 | es7000_rename_gsi(int ioapic, int gsi) | ||
| 132 | { | ||
| 133 | if (es7000_plat == ES7000_ZORRO) | ||
| 134 | return gsi; | ||
| 135 | |||
| 136 | if (!base) { | ||
| 137 | int i; | ||
| 138 | for (i = 0; i < nr_ioapics; i++) | ||
| 139 | base += nr_ioapic_registers[i]; | ||
| 140 | } | ||
| 141 | |||
| 142 | if (!ioapic && (gsi < 16)) | ||
| 143 | gsi += base; | ||
| 144 | |||
| 145 | return gsi; | ||
| 146 | } | ||
| 147 | |||
| 148 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | ||
| 149 | { | ||
| 150 | unsigned long vect = 0, psaival = 0; | ||
| 151 | |||
| 152 | if (psai == NULL) | ||
| 153 | return -1; | ||
| 154 | |||
| 155 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
| 156 | psaival = (0x1000000 | vect | cpu); | ||
| 157 | |||
| 158 | while (*psai & 0x1000000) | ||
| 159 | ; | ||
| 160 | |||
| 161 | *psai = psaival; | ||
| 162 | |||
| 163 | return 0; | ||
| 164 | } | ||
| 165 | |||
| 166 | static int es7000_apic_is_cluster(void) | ||
| 167 | { | ||
| 168 | /* MPENTIUMIII */ | ||
| 169 | if (boot_cpu_data.x86 == 6 && | ||
| 170 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) | ||
| 171 | return 1; | ||
| 172 | |||
| 173 | return 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | static void setup_unisys(void) | ||
| 177 | { | ||
| 178 | /* | ||
| 179 | * Determine the generation of the ES7000 currently running. | ||
| 180 | * | ||
| 181 | * es7000_plat = 1 if the machine is a 5xx ES7000 box | ||
| 182 | * es7000_plat = 2 if the machine is a x86_64 ES7000 box | ||
| 183 | * | ||
| 184 | */ | ||
| 185 | if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) | ||
| 186 | es7000_plat = ES7000_ZORRO; | ||
| 187 | else | ||
| 188 | es7000_plat = ES7000_CLASSIC; | ||
| 189 | ioapic_renumber_irq = es7000_rename_gsi; | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | ||
| 193 | * Parse the OEM Table: | ||
| 194 | */ | ||
| 195 | static int parse_unisys_oem(char *oemptr) | ||
| 196 | { | ||
| 197 | int i; | ||
| 198 | int success = 0; | ||
| 199 | unsigned char type, size; | ||
| 200 | unsigned long val; | ||
| 201 | char *tp = NULL; | ||
| 202 | struct psai *psaip = NULL; | ||
| 203 | struct mip_reg_info *mi; | ||
| 204 | struct mip_reg *host, *mip; | ||
| 205 | |||
| 206 | tp = oemptr; | ||
| 207 | |||
| 208 | tp += 8; | ||
| 209 | |||
| 210 | for (i = 0; i <= 6; i++) { | ||
| 211 | type = *tp++; | ||
| 212 | size = *tp++; | ||
| 213 | tp -= 2; | ||
| 214 | switch (type) { | ||
| 215 | case MIP_REG: | ||
| 216 | mi = (struct mip_reg_info *)tp; | ||
| 217 | val = MIP_RD_LO(mi->host_reg); | ||
| 218 | host_addr = val; | ||
| 219 | host = (struct mip_reg *)val; | ||
| 220 | host_reg = __va(host); | ||
| 221 | val = MIP_RD_LO(mi->mip_reg); | ||
| 222 | mip_port = MIP_PORT(mi->mip_info); | ||
| 223 | mip_addr = val; | ||
| 224 | mip = (struct mip_reg *)val; | ||
| 225 | mip_reg = __va(mip); | ||
| 226 | pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", | ||
| 227 | (unsigned long)host_reg); | ||
| 228 | pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", | ||
| 229 | (unsigned long)mip_reg); | ||
| 230 | success++; | ||
| 231 | break; | ||
| 232 | case MIP_PSAI_REG: | ||
| 233 | psaip = (struct psai *)tp; | ||
| 234 | if (tp != NULL) { | ||
| 235 | if (psaip->addr) | ||
| 236 | psai = __va(psaip->addr); | ||
| 237 | else | ||
| 238 | psai = NULL; | ||
| 239 | success++; | ||
| 240 | } | ||
| 241 | break; | ||
| 242 | default: | ||
| 243 | break; | ||
| 244 | } | ||
| 245 | tp += size; | ||
| 246 | } | ||
| 247 | |||
| 248 | if (success < 2) | ||
| 249 | es7000_plat = NON_UNISYS; | ||
| 250 | else | ||
| 251 | setup_unisys(); | ||
| 252 | |||
| 253 | return es7000_plat; | ||
| 254 | } | ||
| 255 | |||
| 256 | #ifdef CONFIG_ACPI | ||
| 257 | static int find_unisys_acpi_oem_table(unsigned long *oem_addr) | ||
| 258 | { | ||
| 259 | struct acpi_table_header *header = NULL; | ||
| 260 | struct es7000_oem_table *table; | ||
| 261 | acpi_size tbl_size; | ||
| 262 | acpi_status ret; | ||
| 263 | int i = 0; | ||
| 264 | |||
| 265 | for (;;) { | ||
| 266 | ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size); | ||
| 267 | if (!ACPI_SUCCESS(ret)) | ||
| 268 | return -1; | ||
| 269 | |||
| 270 | if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) | ||
| 271 | break; | ||
| 272 | |||
| 273 | early_acpi_os_unmap_memory(header, tbl_size); | ||
| 274 | } | ||
| 275 | |||
| 276 | table = (void *)header; | ||
| 277 | |||
| 278 | oem_addrX = table->OEMTableAddr; | ||
| 279 | oem_size = table->OEMTableSize; | ||
| 280 | |||
| 281 | early_acpi_os_unmap_memory(header, tbl_size); | ||
| 282 | |||
| 283 | *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); | ||
| 284 | |||
| 285 | return 0; | ||
| 286 | } | ||
| 287 | |||
| 288 | static void unmap_unisys_acpi_oem_table(unsigned long oem_addr) | ||
| 289 | { | ||
| 290 | if (!oem_addr) | ||
| 291 | return; | ||
| 292 | |||
| 293 | __acpi_unmap_table((char *)oem_addr, oem_size); | ||
| 294 | } | ||
| 295 | |||
| 296 | static int es7000_check_dsdt(void) | ||
| 297 | { | ||
| 298 | struct acpi_table_header header; | ||
| 299 | |||
| 300 | if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && | ||
| 301 | !strncmp(header.oem_id, "UNISYS", 6)) | ||
| 302 | return 1; | ||
| 303 | return 0; | ||
| 304 | } | ||
| 305 | |||
| 306 | static int es7000_acpi_ret; | ||
| 307 | |||
| 308 | /* Hook from generic ACPI tables.c */ | ||
| 309 | static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 310 | { | ||
| 311 | unsigned long oem_addr = 0; | ||
| 312 | int check_dsdt; | ||
| 313 | int ret = 0; | ||
| 314 | |||
| 315 | /* check dsdt at first to avoid clear fix_map for oem_addr */ | ||
| 316 | check_dsdt = es7000_check_dsdt(); | ||
| 317 | |||
| 318 | if (!find_unisys_acpi_oem_table(&oem_addr)) { | ||
| 319 | if (check_dsdt) { | ||
| 320 | ret = parse_unisys_oem((char *)oem_addr); | ||
| 321 | } else { | ||
| 322 | setup_unisys(); | ||
| 323 | ret = 1; | ||
| 324 | } | ||
| 325 | /* | ||
| 326 | * we need to unmap it | ||
| 327 | */ | ||
| 328 | unmap_unisys_acpi_oem_table(oem_addr); | ||
| 329 | } | ||
| 330 | |||
| 331 | es7000_acpi_ret = ret; | ||
| 332 | |||
| 333 | return ret && !es7000_apic_is_cluster(); | ||
| 334 | } | ||
| 335 | |||
| 336 | static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) | ||
| 337 | { | ||
| 338 | int ret = es7000_acpi_ret; | ||
| 339 | |||
| 340 | return ret && es7000_apic_is_cluster(); | ||
| 341 | } | ||
| 342 | |||
| 343 | #else /* !CONFIG_ACPI: */ | ||
| 344 | static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 345 | { | ||
| 346 | return 0; | ||
| 347 | } | ||
| 348 | |||
| 349 | static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) | ||
| 350 | { | ||
| 351 | return 0; | ||
| 352 | } | ||
| 353 | #endif /* !CONFIG_ACPI */ | ||
| 354 | |||
| 355 | static void es7000_spin(int n) | ||
| 356 | { | ||
| 357 | int i = 0; | ||
| 358 | |||
| 359 | while (i++ < n) | ||
| 360 | rep_nop(); | ||
| 361 | } | ||
| 362 | |||
| 363 | static int es7000_mip_write(struct mip_reg *mip_reg) | ||
| 364 | { | ||
| 365 | int status = 0; | ||
| 366 | int spin; | ||
| 367 | |||
| 368 | spin = MIP_SPIN; | ||
| 369 | while ((host_reg->off_0x38 & MIP_VALID) != 0) { | ||
| 370 | if (--spin <= 0) { | ||
| 371 | WARN(1, "Timeout waiting for Host Valid Flag\n"); | ||
| 372 | return -1; | ||
| 373 | } | ||
| 374 | es7000_spin(MIP_SPIN); | ||
| 375 | } | ||
| 376 | |||
| 377 | memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); | ||
| 378 | outb(1, mip_port); | ||
| 379 | |||
| 380 | spin = MIP_SPIN; | ||
| 381 | |||
| 382 | while ((mip_reg->off_0x38 & MIP_VALID) == 0) { | ||
| 383 | if (--spin <= 0) { | ||
| 384 | WARN(1, "Timeout waiting for MIP Valid Flag\n"); | ||
| 385 | return -1; | ||
| 386 | } | ||
| 387 | es7000_spin(MIP_SPIN); | ||
| 388 | } | ||
| 389 | |||
| 390 | status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48; | ||
| 391 | mip_reg->off_0x38 &= ~MIP_VALID; | ||
| 392 | |||
| 393 | return status; | ||
| 394 | } | ||
| 395 | |||
| 396 | static void es7000_enable_apic_mode(void) | ||
| 397 | { | ||
| 398 | struct mip_reg es7000_mip_reg; | ||
| 399 | int mip_status; | ||
| 400 | |||
| 401 | if (!es7000_plat) | ||
| 402 | return; | ||
| 403 | |||
| 404 | printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); | ||
| 405 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); | ||
| 406 | es7000_mip_reg.off_0x00 = MIP_SW_APIC; | ||
| 407 | es7000_mip_reg.off_0x38 = MIP_VALID; | ||
| 408 | |||
| 409 | while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) | ||
| 410 | WARN(1, "Command failed, status = %x\n", mip_status); | ||
| 411 | } | ||
| 412 | |||
| 413 | static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
| 414 | { | ||
| 415 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
| 416 | * specified in the interrupt destination when using lowest | ||
| 417 | * priority interrupt delivery mode. | ||
| 418 | * | ||
| 419 | * In particular there was a hyperthreading cpu observed to | ||
| 420 | * deliver interrupts to the wrong hyperthread when only one | ||
| 421 | * hyperthread was specified in the interrupt desitination. | ||
| 422 | */ | ||
| 423 | cpumask_clear(retmask); | ||
| 424 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
| 425 | } | ||
| 426 | |||
| 427 | |||
| 428 | static void es7000_wait_for_init_deassert(atomic_t *deassert) | ||
| 429 | { | ||
| 430 | while (!atomic_read(deassert)) | ||
| 431 | cpu_relax(); | ||
| 432 | } | ||
| 433 | |||
| 434 | static unsigned int es7000_get_apic_id(unsigned long x) | ||
| 435 | { | ||
| 436 | return (x >> 24) & 0xFF; | ||
| 437 | } | ||
| 438 | |||
| 439 | static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) | ||
| 440 | { | ||
| 441 | default_send_IPI_mask_sequence_phys(mask, vector); | ||
| 442 | } | ||
| 443 | |||
| 444 | static void es7000_send_IPI_allbutself(int vector) | ||
| 445 | { | ||
| 446 | default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); | ||
| 447 | } | ||
| 448 | |||
| 449 | static void es7000_send_IPI_all(int vector) | ||
| 450 | { | ||
| 451 | es7000_send_IPI_mask(cpu_online_mask, vector); | ||
| 452 | } | ||
| 453 | |||
| 454 | static int es7000_apic_id_registered(void) | ||
| 455 | { | ||
| 456 | return 1; | ||
| 457 | } | ||
| 458 | |||
| 459 | static const struct cpumask *target_cpus_cluster(void) | ||
| 460 | { | ||
| 461 | return cpu_all_mask; | ||
| 462 | } | ||
| 463 | |||
| 464 | static const struct cpumask *es7000_target_cpus(void) | ||
| 465 | { | ||
| 466 | return cpumask_of(smp_processor_id()); | ||
| 467 | } | ||
| 468 | |||
| 469 | static unsigned long | ||
| 470 | es7000_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
| 471 | { | ||
| 472 | return 0; | ||
| 473 | } | ||
| 474 | static unsigned long es7000_check_apicid_present(int bit) | ||
| 475 | { | ||
| 476 | return physid_isset(bit, phys_cpu_present_map); | ||
| 477 | } | ||
| 478 | |||
| 479 | static unsigned long calculate_ldr(int cpu) | ||
| 480 | { | ||
| 481 | unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); | ||
| 482 | |||
| 483 | return SET_APIC_LOGICAL_ID(id); | ||
| 484 | } | ||
| 485 | |||
| 486 | /* | ||
| 487 | * Set up the logical destination ID. | ||
| 488 | * | ||
| 489 | * Intel recommends to set DFR, LdR and TPR before enabling | ||
| 490 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
| 491 | * document number 292116). So here it goes... | ||
| 492 | */ | ||
| 493 | static void es7000_init_apic_ldr_cluster(void) | ||
| 494 | { | ||
| 495 | unsigned long val; | ||
| 496 | int cpu = smp_processor_id(); | ||
| 497 | |||
| 498 | apic_write(APIC_DFR, APIC_DFR_CLUSTER); | ||
| 499 | val = calculate_ldr(cpu); | ||
| 500 | apic_write(APIC_LDR, val); | ||
| 501 | } | ||
| 502 | |||
| 503 | static void es7000_init_apic_ldr(void) | ||
| 504 | { | ||
| 505 | unsigned long val; | ||
| 506 | int cpu = smp_processor_id(); | ||
| 507 | |||
| 508 | apic_write(APIC_DFR, APIC_DFR_FLAT); | ||
| 509 | val = calculate_ldr(cpu); | ||
| 510 | apic_write(APIC_LDR, val); | ||
| 511 | } | ||
| 512 | |||
| 513 | static void es7000_setup_apic_routing(void) | ||
| 514 | { | ||
| 515 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); | ||
| 516 | |||
| 517 | printk(KERN_INFO | ||
| 518 | "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", | ||
| 519 | (apic_version[apic] == 0x14) ? | ||
| 520 | "Physical Cluster" : "Logical Cluster", | ||
| 521 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); | ||
| 522 | } | ||
| 523 | |||
| 524 | static int es7000_apicid_to_node(int logical_apicid) | ||
| 525 | { | ||
| 526 | return 0; | ||
| 527 | } | ||
| 528 | |||
| 529 | |||
| 530 | static int es7000_cpu_present_to_apicid(int mps_cpu) | ||
| 531 | { | ||
| 532 | if (!mps_cpu) | ||
| 533 | return boot_cpu_physical_apicid; | ||
| 534 | else if (mps_cpu < nr_cpu_ids) | ||
| 535 | return per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
| 536 | else | ||
| 537 | return BAD_APICID; | ||
| 538 | } | ||
| 539 | |||
| 540 | static int cpu_id; | ||
| 541 | |||
| 542 | static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) | ||
| 543 | { | ||
| 544 | physid_mask_t mask; | ||
| 545 | |||
| 546 | mask = physid_mask_of_physid(cpu_id); | ||
| 547 | ++cpu_id; | ||
| 548 | |||
| 549 | return mask; | ||
| 550 | } | ||
| 551 | |||
| 552 | /* Mapping from cpu number to logical apicid */ | ||
| 553 | static int es7000_cpu_to_logical_apicid(int cpu) | ||
| 554 | { | ||
| 555 | #ifdef CONFIG_SMP | ||
| 556 | if (cpu >= nr_cpu_ids) | ||
| 557 | return BAD_APICID; | ||
| 558 | return cpu_2_logical_apicid[cpu]; | ||
| 559 | #else | ||
| 560 | return logical_smp_processor_id(); | ||
| 561 | #endif | ||
| 562 | } | ||
| 563 | |||
| 564 | static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) | ||
| 565 | { | ||
| 566 | /* For clustered we don't have a good way to do this yet - hack */ | ||
| 567 | return physids_promote(0xff); | ||
| 568 | } | ||
| 569 | |||
| 570 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | ||
| 571 | { | ||
| 572 | boot_cpu_physical_apicid = read_apic_id(); | ||
| 573 | return 1; | ||
| 574 | } | ||
| 575 | |||
| 576 | static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
| 577 | { | ||
| 578 | unsigned int round = 0; | ||
| 579 | int cpu, uninitialized_var(apicid); | ||
| 580 | |||
| 581 | /* | ||
| 582 | * The cpus in the mask must all be on the apic cluster. | ||
| 583 | */ | ||
| 584 | for_each_cpu(cpu, cpumask) { | ||
| 585 | int new_apicid = es7000_cpu_to_logical_apicid(cpu); | ||
| 586 | |||
| 587 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | ||
| 588 | WARN(1, "Not a valid mask!"); | ||
| 589 | |||
| 590 | return BAD_APICID; | ||
| 591 | } | ||
| 592 | apicid = new_apicid; | ||
| 593 | round++; | ||
| 594 | } | ||
| 595 | return apicid; | ||
| 596 | } | ||
| 597 | |||
| 598 | static unsigned int | ||
| 599 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
| 600 | const struct cpumask *andmask) | ||
| 601 | { | ||
| 602 | int apicid = es7000_cpu_to_logical_apicid(0); | ||
| 603 | cpumask_var_t cpumask; | ||
| 604 | |||
| 605 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
| 606 | return apicid; | ||
| 607 | |||
| 608 | cpumask_and(cpumask, inmask, andmask); | ||
| 609 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
| 610 | apicid = es7000_cpu_mask_to_apicid(cpumask); | ||
| 611 | |||
| 612 | free_cpumask_var(cpumask); | ||
| 613 | |||
| 614 | return apicid; | ||
| 615 | } | ||
| 616 | |||
| 617 | static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) | ||
| 618 | { | ||
| 619 | return cpuid_apic >> index_msb; | ||
| 620 | } | ||
| 621 | |||
| 622 | static int probe_es7000(void) | ||
| 623 | { | ||
| 624 | /* probed later in mptable/ACPI hooks */ | ||
| 625 | return 0; | ||
| 626 | } | ||
| 627 | |||
| 628 | static int es7000_mps_ret; | ||
| 629 | static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem, | ||
| 630 | char *productid) | ||
| 631 | { | ||
| 632 | int ret = 0; | ||
| 633 | |||
| 634 | if (mpc->oemptr) { | ||
| 635 | struct mpc_oemtable *oem_table = | ||
| 636 | (struct mpc_oemtable *)mpc->oemptr; | ||
| 637 | |||
| 638 | if (!strncmp(oem, "UNISYS", 6)) | ||
| 639 | ret = parse_unisys_oem((char *)oem_table); | ||
| 640 | } | ||
| 641 | |||
| 642 | es7000_mps_ret = ret; | ||
| 643 | |||
| 644 | return ret && !es7000_apic_is_cluster(); | ||
| 645 | } | ||
| 646 | |||
| 647 | static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, | ||
| 648 | char *productid) | ||
| 649 | { | ||
| 650 | int ret = es7000_mps_ret; | ||
| 651 | |||
| 652 | return ret && es7000_apic_is_cluster(); | ||
| 653 | } | ||
| 654 | |||
| 655 | struct apic apic_es7000_cluster = { | ||
| 656 | |||
| 657 | .name = "es7000", | ||
| 658 | .probe = probe_es7000, | ||
| 659 | .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, | ||
| 660 | .apic_id_registered = es7000_apic_id_registered, | ||
| 661 | |||
| 662 | .irq_delivery_mode = dest_LowestPrio, | ||
| 663 | /* logical delivery broadcast to all procs: */ | ||
| 664 | .irq_dest_mode = 1, | ||
| 665 | |||
| 666 | .target_cpus = target_cpus_cluster, | ||
| 667 | .disable_esr = 1, | ||
| 668 | .dest_logical = 0, | ||
| 669 | .check_apicid_used = es7000_check_apicid_used, | ||
| 670 | .check_apicid_present = es7000_check_apicid_present, | ||
| 671 | |||
| 672 | .vector_allocation_domain = es7000_vector_allocation_domain, | ||
| 673 | .init_apic_ldr = es7000_init_apic_ldr_cluster, | ||
| 674 | |||
| 675 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | ||
| 676 | .setup_apic_routing = es7000_setup_apic_routing, | ||
| 677 | .multi_timer_check = NULL, | ||
| 678 | .apicid_to_node = es7000_apicid_to_node, | ||
| 679 | .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, | ||
| 680 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, | ||
| 681 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, | ||
| 682 | .setup_portio_remap = NULL, | ||
| 683 | .check_phys_apicid_present = es7000_check_phys_apicid_present, | ||
| 684 | .enable_apic_mode = es7000_enable_apic_mode, | ||
| 685 | .phys_pkg_id = es7000_phys_pkg_id, | ||
| 686 | .mps_oem_check = es7000_mps_oem_check_cluster, | ||
| 687 | |||
| 688 | .get_apic_id = es7000_get_apic_id, | ||
| 689 | .set_apic_id = NULL, | ||
| 690 | .apic_id_mask = 0xFF << 24, | ||
| 691 | |||
| 692 | .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, | ||
| 693 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | ||
| 694 | |||
| 695 | .send_IPI_mask = es7000_send_IPI_mask, | ||
| 696 | .send_IPI_mask_allbutself = NULL, | ||
| 697 | .send_IPI_allbutself = es7000_send_IPI_allbutself, | ||
| 698 | .send_IPI_all = es7000_send_IPI_all, | ||
| 699 | .send_IPI_self = default_send_IPI_self, | ||
| 700 | |||
| 701 | .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip, | ||
| 702 | |||
| 703 | .trampoline_phys_low = 0x467, | ||
| 704 | .trampoline_phys_high = 0x469, | ||
| 705 | |||
| 706 | .wait_for_init_deassert = NULL, | ||
| 707 | |||
| 708 | /* Nothing to do for most platforms, since cleared by the INIT cycle: */ | ||
| 709 | .smp_callin_clear_local_apic = NULL, | ||
| 710 | .inquire_remote_apic = default_inquire_remote_apic, | ||
| 711 | |||
| 712 | .read = native_apic_mem_read, | ||
| 713 | .write = native_apic_mem_write, | ||
| 714 | .icr_read = native_apic_icr_read, | ||
| 715 | .icr_write = native_apic_icr_write, | ||
| 716 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 717 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 718 | }; | ||
| 719 | |||
| 720 | struct apic apic_es7000 = { | ||
| 721 | |||
| 722 | .name = "es7000", | ||
| 723 | .probe = probe_es7000, | ||
| 724 | .acpi_madt_oem_check = es7000_acpi_madt_oem_check, | ||
| 725 | .apic_id_registered = es7000_apic_id_registered, | ||
| 726 | |||
| 727 | .irq_delivery_mode = dest_Fixed, | ||
| 728 | /* phys delivery to target CPUs: */ | ||
| 729 | .irq_dest_mode = 0, | ||
| 730 | |||
| 731 | .target_cpus = es7000_target_cpus, | ||
| 732 | .disable_esr = 1, | ||
| 733 | .dest_logical = 0, | ||
| 734 | .check_apicid_used = es7000_check_apicid_used, | ||
| 735 | .check_apicid_present = es7000_check_apicid_present, | ||
| 736 | |||
| 737 | .vector_allocation_domain = es7000_vector_allocation_domain, | ||
| 738 | .init_apic_ldr = es7000_init_apic_ldr, | ||
| 739 | |||
| 740 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | ||
| 741 | .setup_apic_routing = es7000_setup_apic_routing, | ||
| 742 | .multi_timer_check = NULL, | ||
| 743 | .apicid_to_node = es7000_apicid_to_node, | ||
| 744 | .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, | ||
| 745 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, | ||
| 746 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, | ||
| 747 | .setup_portio_remap = NULL, | ||
| 748 | .check_phys_apicid_present = es7000_check_phys_apicid_present, | ||
| 749 | .enable_apic_mode = es7000_enable_apic_mode, | ||
| 750 | .phys_pkg_id = es7000_phys_pkg_id, | ||
| 751 | .mps_oem_check = es7000_mps_oem_check, | ||
| 752 | |||
| 753 | .get_apic_id = es7000_get_apic_id, | ||
| 754 | .set_apic_id = NULL, | ||
| 755 | .apic_id_mask = 0xFF << 24, | ||
| 756 | |||
| 757 | .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, | ||
| 758 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | ||
| 759 | |||
| 760 | .send_IPI_mask = es7000_send_IPI_mask, | ||
| 761 | .send_IPI_mask_allbutself = NULL, | ||
| 762 | .send_IPI_allbutself = es7000_send_IPI_allbutself, | ||
| 763 | .send_IPI_all = es7000_send_IPI_all, | ||
| 764 | .send_IPI_self = default_send_IPI_self, | ||
| 765 | |||
| 766 | .trampoline_phys_low = 0x467, | ||
| 767 | .trampoline_phys_high = 0x469, | ||
| 768 | |||
| 769 | .wait_for_init_deassert = es7000_wait_for_init_deassert, | ||
| 770 | |||
| 771 | /* Nothing to do for most platforms, since cleared by the INIT cycle: */ | ||
| 772 | .smp_callin_clear_local_apic = NULL, | ||
| 773 | .inquire_remote_apic = default_inquire_remote_apic, | ||
| 774 | |||
| 775 | .read = native_apic_mem_read, | ||
| 776 | .write = native_apic_mem_write, | ||
| 777 | .icr_read = native_apic_icr_read, | ||
| 778 | .icr_write = native_apic_icr_write, | ||
| 779 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 780 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 781 | }; | ||
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bc7ac4da90d7..00e6071cefc4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Intel IO-APIC support for multi-Pentium hosts. | 2 | * Intel IO-APIC support for multi-Pentium hosts. |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo | 4 | * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo |
| 5 | * | 5 | * |
| 6 | * Many thanks to Stig Venaas for trying out countless experimental | 6 | * Many thanks to Stig Venaas for trying out countless experimental |
| 7 | * patches and reporting/debugging problems patiently! | 7 | * patches and reporting/debugging problems patiently! |
| @@ -46,6 +46,7 @@ | |||
| 46 | #include <asm/idle.h> | 46 | #include <asm/idle.h> |
| 47 | #include <asm/io.h> | 47 | #include <asm/io.h> |
| 48 | #include <asm/smp.h> | 48 | #include <asm/smp.h> |
| 49 | #include <asm/cpu.h> | ||
| 49 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
| 50 | #include <asm/proto.h> | 51 | #include <asm/proto.h> |
| 51 | #include <asm/acpi.h> | 52 | #include <asm/acpi.h> |
| @@ -61,9 +62,7 @@ | |||
| 61 | #include <asm/uv/uv_hub.h> | 62 | #include <asm/uv/uv_hub.h> |
| 62 | #include <asm/uv/uv_irq.h> | 63 | #include <asm/uv/uv_irq.h> |
| 63 | 64 | ||
| 64 | #include <mach_ipi.h> | 65 | #include <asm/apic.h> |
| 65 | #include <mach_apic.h> | ||
| 66 | #include <mach_apicdef.h> | ||
| 67 | 66 | ||
| 68 | #define __apicdebuginit(type) static type __init | 67 | #define __apicdebuginit(type) static type __init |
| 69 | 68 | ||
| @@ -82,11 +81,11 @@ static DEFINE_SPINLOCK(vector_lock); | |||
| 82 | int nr_ioapic_registers[MAX_IO_APICS]; | 81 | int nr_ioapic_registers[MAX_IO_APICS]; |
| 83 | 82 | ||
| 84 | /* I/O APIC entries */ | 83 | /* I/O APIC entries */ |
| 85 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; | 84 | struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; |
| 86 | int nr_ioapics; | 85 | int nr_ioapics; |
| 87 | 86 | ||
| 88 | /* MP IRQ source entries */ | 87 | /* MP IRQ source entries */ |
| 89 | struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 88 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
| 90 | 89 | ||
| 91 | /* # of MP IRQ source entries */ | 90 | /* # of MP IRQ source entries */ |
| 92 | int mp_irq_entries; | 91 | int mp_irq_entries; |
| @@ -99,10 +98,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |||
| 99 | 98 | ||
| 100 | int skip_ioapic_setup; | 99 | int skip_ioapic_setup; |
| 101 | 100 | ||
| 101 | void arch_disable_smp_support(void) | ||
| 102 | { | ||
| 103 | #ifdef CONFIG_PCI | ||
| 104 | noioapicquirk = 1; | ||
| 105 | noioapicreroute = -1; | ||
| 106 | #endif | ||
| 107 | skip_ioapic_setup = 1; | ||
| 108 | } | ||
| 109 | |||
| 102 | static int __init parse_noapic(char *str) | 110 | static int __init parse_noapic(char *str) |
| 103 | { | 111 | { |
| 104 | /* disable IO-APIC */ | 112 | /* disable IO-APIC */ |
| 105 | disable_ioapic_setup(); | 113 | arch_disable_smp_support(); |
| 106 | return 0; | 114 | return 0; |
| 107 | } | 115 | } |
| 108 | early_param("noapic", parse_noapic); | 116 | early_param("noapic", parse_noapic); |
| @@ -356,7 +364,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
| 356 | 364 | ||
| 357 | if (!cfg->move_in_progress) { | 365 | if (!cfg->move_in_progress) { |
| 358 | /* it means that domain is not changed */ | 366 | /* it means that domain is not changed */ |
| 359 | if (!cpumask_intersects(&desc->affinity, mask)) | 367 | if (!cpumask_intersects(desc->affinity, mask)) |
| 360 | cfg->move_desc_pending = 1; | 368 | cfg->move_desc_pending = 1; |
| 361 | } | 369 | } |
| 362 | } | 370 | } |
| @@ -386,7 +394,7 @@ struct io_apic { | |||
| 386 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | 394 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
| 387 | { | 395 | { |
| 388 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | 396 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
| 389 | + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); | 397 | + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); |
| 390 | } | 398 | } |
| 391 | 399 | ||
| 392 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | 400 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
| @@ -478,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |||
| 478 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 486 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
| 479 | } | 487 | } |
| 480 | 488 | ||
| 481 | static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 489 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
| 482 | { | 490 | { |
| 483 | unsigned long flags; | 491 | unsigned long flags; |
| 484 | spin_lock_irqsave(&ioapic_lock, flags); | 492 | spin_lock_irqsave(&ioapic_lock, flags); |
| @@ -513,11 +521,11 @@ static void send_cleanup_vector(struct irq_cfg *cfg) | |||
| 513 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | 521 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) |
| 514 | cfg->move_cleanup_count++; | 522 | cfg->move_cleanup_count++; |
| 515 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | 523 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) |
| 516 | send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | 524 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); |
| 517 | } else { | 525 | } else { |
| 518 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | 526 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); |
| 519 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | 527 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); |
| 520 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | 528 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); |
| 521 | free_cpumask_var(cleanup_mask); | 529 | free_cpumask_var(cleanup_mask); |
| 522 | } | 530 | } |
| 523 | cfg->move_in_progress = 0; | 531 | cfg->move_in_progress = 0; |
| @@ -562,8 +570,9 @@ static int | |||
| 562 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | 570 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); |
| 563 | 571 | ||
| 564 | /* | 572 | /* |
| 565 | * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid | 573 | * Either sets desc->affinity to a valid value, and returns |
| 566 | * of that, or returns BAD_APICID and leaves desc->affinity untouched. | 574 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and |
| 575 | * leaves desc->affinity untouched. | ||
| 567 | */ | 576 | */ |
| 568 | static unsigned int | 577 | static unsigned int |
| 569 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | 578 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) |
| @@ -579,9 +588,10 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | |||
| 579 | if (assign_irq_vector(irq, cfg, mask)) | 588 | if (assign_irq_vector(irq, cfg, mask)) |
| 580 | return BAD_APICID; | 589 | return BAD_APICID; |
| 581 | 590 | ||
| 582 | cpumask_and(&desc->affinity, cfg->domain, mask); | 591 | cpumask_and(desc->affinity, cfg->domain, mask); |
| 583 | set_extra_move_desc(desc, mask); | 592 | set_extra_move_desc(desc, mask); |
| 584 | return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); | 593 | |
| 594 | return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); | ||
| 585 | } | 595 | } |
| 586 | 596 | ||
| 587 | static void | 597 | static void |
| @@ -796,23 +806,6 @@ static void clear_IO_APIC (void) | |||
| 796 | clear_IO_APIC_pin(apic, pin); | 806 | clear_IO_APIC_pin(apic, pin); |
| 797 | } | 807 | } |
| 798 | 808 | ||
| 799 | #if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) | ||
| 800 | void send_IPI_self(int vector) | ||
| 801 | { | ||
| 802 | unsigned int cfg; | ||
| 803 | |||
| 804 | /* | ||
| 805 | * Wait for idle. | ||
| 806 | */ | ||
| 807 | apic_wait_icr_idle(); | ||
| 808 | cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; | ||
| 809 | /* | ||
| 810 | * Send the IPI. The write to APIC_ICR fires this off. | ||
| 811 | */ | ||
| 812 | apic_write(APIC_ICR, cfg); | ||
| 813 | } | ||
| 814 | #endif /* !CONFIG_SMP && CONFIG_X86_32*/ | ||
| 815 | |||
| 816 | #ifdef CONFIG_X86_32 | 809 | #ifdef CONFIG_X86_32 |
| 817 | /* | 810 | /* |
| 818 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to | 811 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to |
| @@ -820,8 +813,9 @@ void send_IPI_self(int vector) | |||
| 820 | */ | 813 | */ |
| 821 | 814 | ||
| 822 | #define MAX_PIRQS 8 | 815 | #define MAX_PIRQS 8 |
| 823 | static int pirq_entries [MAX_PIRQS]; | 816 | static int pirq_entries[MAX_PIRQS] = { |
| 824 | static int pirqs_enabled; | 817 | [0 ... MAX_PIRQS - 1] = -1 |
| 818 | }; | ||
| 825 | 819 | ||
| 826 | static int __init ioapic_pirq_setup(char *str) | 820 | static int __init ioapic_pirq_setup(char *str) |
| 827 | { | 821 | { |
| @@ -830,10 +824,6 @@ static int __init ioapic_pirq_setup(char *str) | |||
| 830 | 824 | ||
| 831 | get_options(str, ARRAY_SIZE(ints), ints); | 825 | get_options(str, ARRAY_SIZE(ints), ints); |
| 832 | 826 | ||
| 833 | for (i = 0; i < MAX_PIRQS; i++) | ||
| 834 | pirq_entries[i] = -1; | ||
| 835 | |||
| 836 | pirqs_enabled = 1; | ||
| 837 | apic_printk(APIC_VERBOSE, KERN_INFO | 827 | apic_printk(APIC_VERBOSE, KERN_INFO |
| 838 | "PIRQ redirection, working around broken MP-BIOS.\n"); | 828 | "PIRQ redirection, working around broken MP-BIOS.\n"); |
| 839 | max = MAX_PIRQS; | 829 | max = MAX_PIRQS; |
| @@ -944,10 +934,10 @@ static int find_irq_entry(int apic, int pin, int type) | |||
| 944 | int i; | 934 | int i; |
| 945 | 935 | ||
| 946 | for (i = 0; i < mp_irq_entries; i++) | 936 | for (i = 0; i < mp_irq_entries; i++) |
| 947 | if (mp_irqs[i].mp_irqtype == type && | 937 | if (mp_irqs[i].irqtype == type && |
| 948 | (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || | 938 | (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || |
| 949 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) && | 939 | mp_irqs[i].dstapic == MP_APIC_ALL) && |
| 950 | mp_irqs[i].mp_dstirq == pin) | 940 | mp_irqs[i].dstirq == pin) |
| 951 | return i; | 941 | return i; |
| 952 | 942 | ||
| 953 | return -1; | 943 | return -1; |
| @@ -961,13 +951,13 @@ static int __init find_isa_irq_pin(int irq, int type) | |||
| 961 | int i; | 951 | int i; |
| 962 | 952 | ||
| 963 | for (i = 0; i < mp_irq_entries; i++) { | 953 | for (i = 0; i < mp_irq_entries; i++) { |
| 964 | int lbus = mp_irqs[i].mp_srcbus; | 954 | int lbus = mp_irqs[i].srcbus; |
| 965 | 955 | ||
| 966 | if (test_bit(lbus, mp_bus_not_pci) && | 956 | if (test_bit(lbus, mp_bus_not_pci) && |
| 967 | (mp_irqs[i].mp_irqtype == type) && | 957 | (mp_irqs[i].irqtype == type) && |
| 968 | (mp_irqs[i].mp_srcbusirq == irq)) | 958 | (mp_irqs[i].srcbusirq == irq)) |
| 969 | 959 | ||
| 970 | return mp_irqs[i].mp_dstirq; | 960 | return mp_irqs[i].dstirq; |
| 971 | } | 961 | } |
| 972 | return -1; | 962 | return -1; |
| 973 | } | 963 | } |
| @@ -977,17 +967,17 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
| 977 | int i; | 967 | int i; |
| 978 | 968 | ||
| 979 | for (i = 0; i < mp_irq_entries; i++) { | 969 | for (i = 0; i < mp_irq_entries; i++) { |
| 980 | int lbus = mp_irqs[i].mp_srcbus; | 970 | int lbus = mp_irqs[i].srcbus; |
| 981 | 971 | ||
| 982 | if (test_bit(lbus, mp_bus_not_pci) && | 972 | if (test_bit(lbus, mp_bus_not_pci) && |
| 983 | (mp_irqs[i].mp_irqtype == type) && | 973 | (mp_irqs[i].irqtype == type) && |
| 984 | (mp_irqs[i].mp_srcbusirq == irq)) | 974 | (mp_irqs[i].srcbusirq == irq)) |
| 985 | break; | 975 | break; |
| 986 | } | 976 | } |
| 987 | if (i < mp_irq_entries) { | 977 | if (i < mp_irq_entries) { |
| 988 | int apic; | 978 | int apic; |
| 989 | for(apic = 0; apic < nr_ioapics; apic++) { | 979 | for(apic = 0; apic < nr_ioapics; apic++) { |
| 990 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) | 980 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) |
| 991 | return apic; | 981 | return apic; |
| 992 | } | 982 | } |
| 993 | } | 983 | } |
| @@ -1012,23 +1002,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
| 1012 | return -1; | 1002 | return -1; |
| 1013 | } | 1003 | } |
| 1014 | for (i = 0; i < mp_irq_entries; i++) { | 1004 | for (i = 0; i < mp_irq_entries; i++) { |
| 1015 | int lbus = mp_irqs[i].mp_srcbus; | 1005 | int lbus = mp_irqs[i].srcbus; |
| 1016 | 1006 | ||
| 1017 | for (apic = 0; apic < nr_ioapics; apic++) | 1007 | for (apic = 0; apic < nr_ioapics; apic++) |
| 1018 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || | 1008 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || |
| 1019 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) | 1009 | mp_irqs[i].dstapic == MP_APIC_ALL) |
| 1020 | break; | 1010 | break; |
| 1021 | 1011 | ||
| 1022 | if (!test_bit(lbus, mp_bus_not_pci) && | 1012 | if (!test_bit(lbus, mp_bus_not_pci) && |
| 1023 | !mp_irqs[i].mp_irqtype && | 1013 | !mp_irqs[i].irqtype && |
| 1024 | (bus == lbus) && | 1014 | (bus == lbus) && |
| 1025 | (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { | 1015 | (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { |
| 1026 | int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); | 1016 | int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); |
| 1027 | 1017 | ||
| 1028 | if (!(apic || IO_APIC_IRQ(irq))) | 1018 | if (!(apic || IO_APIC_IRQ(irq))) |
| 1029 | continue; | 1019 | continue; |
| 1030 | 1020 | ||
| 1031 | if (pin == (mp_irqs[i].mp_srcbusirq & 3)) | 1021 | if (pin == (mp_irqs[i].srcbusirq & 3)) |
| 1032 | return irq; | 1022 | return irq; |
| 1033 | /* | 1023 | /* |
| 1034 | * Use the first all-but-pin matching entry as a | 1024 | * Use the first all-but-pin matching entry as a |
| @@ -1071,7 +1061,7 @@ static int EISA_ELCR(unsigned int irq) | |||
| 1071 | * EISA conforming in the MP table, that means its trigger type must | 1061 | * EISA conforming in the MP table, that means its trigger type must |
| 1072 | * be read in from the ELCR */ | 1062 | * be read in from the ELCR */ |
| 1073 | 1063 | ||
| 1074 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) | 1064 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) |
| 1075 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) | 1065 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) |
| 1076 | 1066 | ||
| 1077 | /* PCI interrupts are always polarity one level triggered, | 1067 | /* PCI interrupts are always polarity one level triggered, |
| @@ -1088,13 +1078,13 @@ static int EISA_ELCR(unsigned int irq) | |||
| 1088 | 1078 | ||
| 1089 | static int MPBIOS_polarity(int idx) | 1079 | static int MPBIOS_polarity(int idx) |
| 1090 | { | 1080 | { |
| 1091 | int bus = mp_irqs[idx].mp_srcbus; | 1081 | int bus = mp_irqs[idx].srcbus; |
| 1092 | int polarity; | 1082 | int polarity; |
| 1093 | 1083 | ||
| 1094 | /* | 1084 | /* |
| 1095 | * Determine IRQ line polarity (high active or low active): | 1085 | * Determine IRQ line polarity (high active or low active): |
| 1096 | */ | 1086 | */ |
| 1097 | switch (mp_irqs[idx].mp_irqflag & 3) | 1087 | switch (mp_irqs[idx].irqflag & 3) |
| 1098 | { | 1088 | { |
| 1099 | case 0: /* conforms, ie. bus-type dependent polarity */ | 1089 | case 0: /* conforms, ie. bus-type dependent polarity */ |
| 1100 | if (test_bit(bus, mp_bus_not_pci)) | 1090 | if (test_bit(bus, mp_bus_not_pci)) |
| @@ -1130,13 +1120,13 @@ static int MPBIOS_polarity(int idx) | |||
| 1130 | 1120 | ||
| 1131 | static int MPBIOS_trigger(int idx) | 1121 | static int MPBIOS_trigger(int idx) |
| 1132 | { | 1122 | { |
| 1133 | int bus = mp_irqs[idx].mp_srcbus; | 1123 | int bus = mp_irqs[idx].srcbus; |
| 1134 | int trigger; | 1124 | int trigger; |
| 1135 | 1125 | ||
| 1136 | /* | 1126 | /* |
| 1137 | * Determine IRQ trigger mode (edge or level sensitive): | 1127 | * Determine IRQ trigger mode (edge or level sensitive): |
| 1138 | */ | 1128 | */ |
| 1139 | switch ((mp_irqs[idx].mp_irqflag>>2) & 3) | 1129 | switch ((mp_irqs[idx].irqflag>>2) & 3) |
| 1140 | { | 1130 | { |
| 1141 | case 0: /* conforms, ie. bus-type dependent */ | 1131 | case 0: /* conforms, ie. bus-type dependent */ |
| 1142 | if (test_bit(bus, mp_bus_not_pci)) | 1132 | if (test_bit(bus, mp_bus_not_pci)) |
| @@ -1214,16 +1204,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); | |||
| 1214 | static int pin_2_irq(int idx, int apic, int pin) | 1204 | static int pin_2_irq(int idx, int apic, int pin) |
| 1215 | { | 1205 | { |
| 1216 | int irq, i; | 1206 | int irq, i; |
| 1217 | int bus = mp_irqs[idx].mp_srcbus; | 1207 | int bus = mp_irqs[idx].srcbus; |
| 1218 | 1208 | ||
| 1219 | /* | 1209 | /* |
| 1220 | * Debugging check, we are in big trouble if this message pops up! | 1210 | * Debugging check, we are in big trouble if this message pops up! |
| 1221 | */ | 1211 | */ |
| 1222 | if (mp_irqs[idx].mp_dstirq != pin) | 1212 | if (mp_irqs[idx].dstirq != pin) |
| 1223 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 1213 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
| 1224 | 1214 | ||
| 1225 | if (test_bit(bus, mp_bus_not_pci)) { | 1215 | if (test_bit(bus, mp_bus_not_pci)) { |
| 1226 | irq = mp_irqs[idx].mp_srcbusirq; | 1216 | irq = mp_irqs[idx].srcbusirq; |
| 1227 | } else { | 1217 | } else { |
| 1228 | /* | 1218 | /* |
| 1229 | * PCI IRQs are mapped in order | 1219 | * PCI IRQs are mapped in order |
| @@ -1315,7 +1305,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
| 1315 | int new_cpu; | 1305 | int new_cpu; |
| 1316 | int vector, offset; | 1306 | int vector, offset; |
| 1317 | 1307 | ||
| 1318 | vector_allocation_domain(cpu, tmp_mask); | 1308 | apic->vector_allocation_domain(cpu, tmp_mask); |
| 1319 | 1309 | ||
| 1320 | vector = current_vector; | 1310 | vector = current_vector; |
| 1321 | offset = current_offset; | 1311 | offset = current_offset; |
| @@ -1485,10 +1475,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t | |||
| 1485 | handle_edge_irq, "edge"); | 1475 | handle_edge_irq, "edge"); |
| 1486 | } | 1476 | } |
| 1487 | 1477 | ||
| 1488 | static int setup_ioapic_entry(int apic, int irq, | 1478 | int setup_ioapic_entry(int apic_id, int irq, |
| 1489 | struct IO_APIC_route_entry *entry, | 1479 | struct IO_APIC_route_entry *entry, |
| 1490 | unsigned int destination, int trigger, | 1480 | unsigned int destination, int trigger, |
| 1491 | int polarity, int vector) | 1481 | int polarity, int vector) |
| 1492 | { | 1482 | { |
| 1493 | /* | 1483 | /* |
| 1494 | * add it to the IO-APIC irq-routing table: | 1484 | * add it to the IO-APIC irq-routing table: |
| @@ -1497,25 +1487,25 @@ static int setup_ioapic_entry(int apic, int irq, | |||
| 1497 | 1487 | ||
| 1498 | #ifdef CONFIG_INTR_REMAP | 1488 | #ifdef CONFIG_INTR_REMAP |
| 1499 | if (intr_remapping_enabled) { | 1489 | if (intr_remapping_enabled) { |
| 1500 | struct intel_iommu *iommu = map_ioapic_to_ir(apic); | 1490 | struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); |
| 1501 | struct irte irte; | 1491 | struct irte irte; |
| 1502 | struct IR_IO_APIC_route_entry *ir_entry = | 1492 | struct IR_IO_APIC_route_entry *ir_entry = |
| 1503 | (struct IR_IO_APIC_route_entry *) entry; | 1493 | (struct IR_IO_APIC_route_entry *) entry; |
| 1504 | int index; | 1494 | int index; |
| 1505 | 1495 | ||
| 1506 | if (!iommu) | 1496 | if (!iommu) |
| 1507 | panic("No mapping iommu for ioapic %d\n", apic); | 1497 | panic("No mapping iommu for ioapic %d\n", apic_id); |
| 1508 | 1498 | ||
| 1509 | index = alloc_irte(iommu, irq, 1); | 1499 | index = alloc_irte(iommu, irq, 1); |
| 1510 | if (index < 0) | 1500 | if (index < 0) |
| 1511 | panic("Failed to allocate IRTE for ioapic %d\n", apic); | 1501 | panic("Failed to allocate IRTE for ioapic %d\n", apic_id); |
| 1512 | 1502 | ||
| 1513 | memset(&irte, 0, sizeof(irte)); | 1503 | memset(&irte, 0, sizeof(irte)); |
| 1514 | 1504 | ||
| 1515 | irte.present = 1; | 1505 | irte.present = 1; |
| 1516 | irte.dst_mode = INT_DEST_MODE; | 1506 | irte.dst_mode = apic->irq_dest_mode; |
| 1517 | irte.trigger_mode = trigger; | 1507 | irte.trigger_mode = trigger; |
| 1518 | irte.dlvry_mode = INT_DELIVERY_MODE; | 1508 | irte.dlvry_mode = apic->irq_delivery_mode; |
| 1519 | irte.vector = vector; | 1509 | irte.vector = vector; |
| 1520 | irte.dest_id = IRTE_DEST(destination); | 1510 | irte.dest_id = IRTE_DEST(destination); |
| 1521 | 1511 | ||
| @@ -1528,8 +1518,8 @@ static int setup_ioapic_entry(int apic, int irq, | |||
| 1528 | } else | 1518 | } else |
| 1529 | #endif | 1519 | #endif |
| 1530 | { | 1520 | { |
| 1531 | entry->delivery_mode = INT_DELIVERY_MODE; | 1521 | entry->delivery_mode = apic->irq_delivery_mode; |
| 1532 | entry->dest_mode = INT_DEST_MODE; | 1522 | entry->dest_mode = apic->irq_dest_mode; |
| 1533 | entry->dest = destination; | 1523 | entry->dest = destination; |
| 1534 | } | 1524 | } |
| 1535 | 1525 | ||
| @@ -1546,7 +1536,7 @@ static int setup_ioapic_entry(int apic, int irq, | |||
| 1546 | return 0; | 1536 | return 0; |
| 1547 | } | 1537 | } |
| 1548 | 1538 | ||
| 1549 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, | 1539 | static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, |
| 1550 | int trigger, int polarity) | 1540 | int trigger, int polarity) |
| 1551 | { | 1541 | { |
| 1552 | struct irq_cfg *cfg; | 1542 | struct irq_cfg *cfg; |
| @@ -1558,22 +1548,22 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de | |||
| 1558 | 1548 | ||
| 1559 | cfg = desc->chip_data; | 1549 | cfg = desc->chip_data; |
| 1560 | 1550 | ||
| 1561 | if (assign_irq_vector(irq, cfg, TARGET_CPUS)) | 1551 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
| 1562 | return; | 1552 | return; |
| 1563 | 1553 | ||
| 1564 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); | 1554 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); |
| 1565 | 1555 | ||
| 1566 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1556 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
| 1567 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1557 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
| 1568 | "IRQ %d Mode:%i Active:%i)\n", | 1558 | "IRQ %d Mode:%i Active:%i)\n", |
| 1569 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, | 1559 | apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, |
| 1570 | irq, trigger, polarity); | 1560 | irq, trigger, polarity); |
| 1571 | 1561 | ||
| 1572 | 1562 | ||
| 1573 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, | 1563 | if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, |
| 1574 | dest, trigger, polarity, cfg->vector)) { | 1564 | dest, trigger, polarity, cfg->vector)) { |
| 1575 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1565 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
| 1576 | mp_ioapics[apic].mp_apicid, pin); | 1566 | mp_ioapics[apic_id].apicid, pin); |
| 1577 | __clear_irq_vector(irq, cfg); | 1567 | __clear_irq_vector(irq, cfg); |
| 1578 | return; | 1568 | return; |
| 1579 | } | 1569 | } |
| @@ -1582,12 +1572,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de | |||
| 1582 | if (irq < NR_IRQS_LEGACY) | 1572 | if (irq < NR_IRQS_LEGACY) |
| 1583 | disable_8259A_irq(irq); | 1573 | disable_8259A_irq(irq); |
| 1584 | 1574 | ||
| 1585 | ioapic_write_entry(apic, pin, entry); | 1575 | ioapic_write_entry(apic_id, pin, entry); |
| 1586 | } | 1576 | } |
| 1587 | 1577 | ||
| 1588 | static void __init setup_IO_APIC_irqs(void) | 1578 | static void __init setup_IO_APIC_irqs(void) |
| 1589 | { | 1579 | { |
| 1590 | int apic, pin, idx, irq; | 1580 | int apic_id, pin, idx, irq; |
| 1591 | int notcon = 0; | 1581 | int notcon = 0; |
| 1592 | struct irq_desc *desc; | 1582 | struct irq_desc *desc; |
| 1593 | struct irq_cfg *cfg; | 1583 | struct irq_cfg *cfg; |
| @@ -1595,21 +1585,19 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1595 | 1585 | ||
| 1596 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1586 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
| 1597 | 1587 | ||
| 1598 | for (apic = 0; apic < nr_ioapics; apic++) { | 1588 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { |
| 1599 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1589 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { |
| 1600 | 1590 | ||
| 1601 | idx = find_irq_entry(apic, pin, mp_INT); | 1591 | idx = find_irq_entry(apic_id, pin, mp_INT); |
| 1602 | if (idx == -1) { | 1592 | if (idx == -1) { |
| 1603 | if (!notcon) { | 1593 | if (!notcon) { |
| 1604 | notcon = 1; | 1594 | notcon = 1; |
| 1605 | apic_printk(APIC_VERBOSE, | 1595 | apic_printk(APIC_VERBOSE, |
| 1606 | KERN_DEBUG " %d-%d", | 1596 | KERN_DEBUG " %d-%d", |
| 1607 | mp_ioapics[apic].mp_apicid, | 1597 | mp_ioapics[apic_id].apicid, pin); |
| 1608 | pin); | ||
| 1609 | } else | 1598 | } else |
| 1610 | apic_printk(APIC_VERBOSE, " %d-%d", | 1599 | apic_printk(APIC_VERBOSE, " %d-%d", |
| 1611 | mp_ioapics[apic].mp_apicid, | 1600 | mp_ioapics[apic_id].apicid, pin); |
| 1612 | pin); | ||
| 1613 | continue; | 1601 | continue; |
| 1614 | } | 1602 | } |
| 1615 | if (notcon) { | 1603 | if (notcon) { |
| @@ -1618,20 +1606,25 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1618 | notcon = 0; | 1606 | notcon = 0; |
| 1619 | } | 1607 | } |
| 1620 | 1608 | ||
| 1621 | irq = pin_2_irq(idx, apic, pin); | 1609 | irq = pin_2_irq(idx, apic_id, pin); |
| 1622 | #ifdef CONFIG_X86_32 | 1610 | |
| 1623 | if (multi_timer_check(apic, irq)) | 1611 | /* |
| 1612 | * Skip the timer IRQ if there's a quirk handler | ||
| 1613 | * installed and if it returns 1: | ||
| 1614 | */ | ||
| 1615 | if (apic->multi_timer_check && | ||
| 1616 | apic->multi_timer_check(apic_id, irq)) | ||
| 1624 | continue; | 1617 | continue; |
| 1625 | #endif | 1618 | |
| 1626 | desc = irq_to_desc_alloc_cpu(irq, cpu); | 1619 | desc = irq_to_desc_alloc_cpu(irq, cpu); |
| 1627 | if (!desc) { | 1620 | if (!desc) { |
| 1628 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | 1621 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); |
| 1629 | continue; | 1622 | continue; |
| 1630 | } | 1623 | } |
| 1631 | cfg = desc->chip_data; | 1624 | cfg = desc->chip_data; |
| 1632 | add_pin_to_irq_cpu(cfg, cpu, apic, pin); | 1625 | add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); |
| 1633 | 1626 | ||
| 1634 | setup_IO_APIC_irq(apic, pin, irq, desc, | 1627 | setup_IO_APIC_irq(apic_id, pin, irq, desc, |
| 1635 | irq_trigger(idx), irq_polarity(idx)); | 1628 | irq_trigger(idx), irq_polarity(idx)); |
| 1636 | } | 1629 | } |
| 1637 | } | 1630 | } |
| @@ -1644,7 +1637,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1644 | /* | 1637 | /* |
| 1645 | * Set up the timer pin, possibly with the 8259A-master behind. | 1638 | * Set up the timer pin, possibly with the 8259A-master behind. |
| 1646 | */ | 1639 | */ |
| 1647 | static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | 1640 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, |
| 1648 | int vector) | 1641 | int vector) |
| 1649 | { | 1642 | { |
| 1650 | struct IO_APIC_route_entry entry; | 1643 | struct IO_APIC_route_entry entry; |
| @@ -1660,10 +1653,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
| 1660 | * We use logical delivery to get the timer IRQ | 1653 | * We use logical delivery to get the timer IRQ |
| 1661 | * to the first CPU. | 1654 | * to the first CPU. |
| 1662 | */ | 1655 | */ |
| 1663 | entry.dest_mode = INT_DEST_MODE; | 1656 | entry.dest_mode = apic->irq_dest_mode; |
| 1664 | entry.mask = 1; /* mask IRQ now */ | 1657 | entry.mask = 0; /* don't mask IRQ for edge */ |
| 1665 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); | 1658 | entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); |
| 1666 | entry.delivery_mode = INT_DELIVERY_MODE; | 1659 | entry.delivery_mode = apic->irq_delivery_mode; |
| 1667 | entry.polarity = 0; | 1660 | entry.polarity = 0; |
| 1668 | entry.trigger = 0; | 1661 | entry.trigger = 0; |
| 1669 | entry.vector = vector; | 1662 | entry.vector = vector; |
| @@ -1677,7 +1670,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
| 1677 | /* | 1670 | /* |
| 1678 | * Add it to the IO-APIC irq-routing table: | 1671 | * Add it to the IO-APIC irq-routing table: |
| 1679 | */ | 1672 | */ |
| 1680 | ioapic_write_entry(apic, pin, entry); | 1673 | ioapic_write_entry(apic_id, pin, entry); |
| 1681 | } | 1674 | } |
| 1682 | 1675 | ||
| 1683 | 1676 | ||
| @@ -1699,7 +1692,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1699 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1692 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
| 1700 | for (i = 0; i < nr_ioapics; i++) | 1693 | for (i = 0; i < nr_ioapics; i++) |
| 1701 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1694 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
| 1702 | mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); | 1695 | mp_ioapics[i].apicid, nr_ioapic_registers[i]); |
| 1703 | 1696 | ||
| 1704 | /* | 1697 | /* |
| 1705 | * We are a bit conservative about what we expect. We have to | 1698 | * We are a bit conservative about what we expect. We have to |
| @@ -1719,7 +1712,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1719 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1712 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 1720 | 1713 | ||
| 1721 | printk("\n"); | 1714 | printk("\n"); |
| 1722 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); | 1715 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); |
| 1723 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1716 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
| 1724 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1717 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
| 1725 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1718 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
| @@ -1980,13 +1973,6 @@ void __init enable_IO_APIC(void) | |||
| 1980 | int apic; | 1973 | int apic; |
| 1981 | unsigned long flags; | 1974 | unsigned long flags; |
| 1982 | 1975 | ||
| 1983 | #ifdef CONFIG_X86_32 | ||
| 1984 | int i; | ||
| 1985 | if (!pirqs_enabled) | ||
| 1986 | for (i = 0; i < MAX_PIRQS; i++) | ||
| 1987 | pirq_entries[i] = -1; | ||
| 1988 | #endif | ||
| 1989 | |||
| 1990 | /* | 1976 | /* |
| 1991 | * The number of IO-APIC IRQ registers (== #pins): | 1977 | * The number of IO-APIC IRQ registers (== #pins): |
| 1992 | */ | 1978 | */ |
| @@ -2090,7 +2076,7 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
| 2090 | { | 2076 | { |
| 2091 | union IO_APIC_reg_00 reg_00; | 2077 | union IO_APIC_reg_00 reg_00; |
| 2092 | physid_mask_t phys_id_present_map; | 2078 | physid_mask_t phys_id_present_map; |
| 2093 | int apic; | 2079 | int apic_id; |
| 2094 | int i; | 2080 | int i; |
| 2095 | unsigned char old_id; | 2081 | unsigned char old_id; |
| 2096 | unsigned long flags; | 2082 | unsigned long flags; |
| @@ -2109,26 +2095,26 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
| 2109 | * This is broken; anything with a real cpu count has to | 2095 | * This is broken; anything with a real cpu count has to |
| 2110 | * circumvent this idiocy regardless. | 2096 | * circumvent this idiocy regardless. |
| 2111 | */ | 2097 | */ |
| 2112 | phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); | 2098 | phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); |
| 2113 | 2099 | ||
| 2114 | /* | 2100 | /* |
| 2115 | * Set the IOAPIC ID to the value stored in the MPC table. | 2101 | * Set the IOAPIC ID to the value stored in the MPC table. |
| 2116 | */ | 2102 | */ |
| 2117 | for (apic = 0; apic < nr_ioapics; apic++) { | 2103 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { |
| 2118 | 2104 | ||
| 2119 | /* Read the register 0 value */ | 2105 | /* Read the register 0 value */ |
| 2120 | spin_lock_irqsave(&ioapic_lock, flags); | 2106 | spin_lock_irqsave(&ioapic_lock, flags); |
| 2121 | reg_00.raw = io_apic_read(apic, 0); | 2107 | reg_00.raw = io_apic_read(apic_id, 0); |
| 2122 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2108 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2123 | 2109 | ||
| 2124 | old_id = mp_ioapics[apic].mp_apicid; | 2110 | old_id = mp_ioapics[apic_id].apicid; |
| 2125 | 2111 | ||
| 2126 | if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { | 2112 | if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { |
| 2127 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | 2113 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", |
| 2128 | apic, mp_ioapics[apic].mp_apicid); | 2114 | apic_id, mp_ioapics[apic_id].apicid); |
| 2129 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 2115 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
| 2130 | reg_00.bits.ID); | 2116 | reg_00.bits.ID); |
| 2131 | mp_ioapics[apic].mp_apicid = reg_00.bits.ID; | 2117 | mp_ioapics[apic_id].apicid = reg_00.bits.ID; |
| 2132 | } | 2118 | } |
| 2133 | 2119 | ||
| 2134 | /* | 2120 | /* |
| @@ -2136,10 +2122,10 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
| 2136 | * system must have a unique ID or we get lots of nice | 2122 | * system must have a unique ID or we get lots of nice |
| 2137 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 2123 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
| 2138 | */ | 2124 | */ |
| 2139 | if (check_apicid_used(phys_id_present_map, | 2125 | if (apic->check_apicid_used(phys_id_present_map, |
| 2140 | mp_ioapics[apic].mp_apicid)) { | 2126 | mp_ioapics[apic_id].apicid)) { |
| 2141 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 2127 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
| 2142 | apic, mp_ioapics[apic].mp_apicid); | 2128 | apic_id, mp_ioapics[apic_id].apicid); |
| 2143 | for (i = 0; i < get_physical_broadcast(); i++) | 2129 | for (i = 0; i < get_physical_broadcast(); i++) |
| 2144 | if (!physid_isset(i, phys_id_present_map)) | 2130 | if (!physid_isset(i, phys_id_present_map)) |
| 2145 | break; | 2131 | break; |
| @@ -2148,13 +2134,13 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
| 2148 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 2134 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
| 2149 | i); | 2135 | i); |
| 2150 | physid_set(i, phys_id_present_map); | 2136 | physid_set(i, phys_id_present_map); |
| 2151 | mp_ioapics[apic].mp_apicid = i; | 2137 | mp_ioapics[apic_id].apicid = i; |
| 2152 | } else { | 2138 | } else { |
| 2153 | physid_mask_t tmp; | 2139 | physid_mask_t tmp; |
| 2154 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); | 2140 | tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); |
| 2155 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 2141 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
| 2156 | "phys_id_present_map\n", | 2142 | "phys_id_present_map\n", |
| 2157 | mp_ioapics[apic].mp_apicid); | 2143 | mp_ioapics[apic_id].apicid); |
| 2158 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | 2144 | physids_or(phys_id_present_map, phys_id_present_map, tmp); |
| 2159 | } | 2145 | } |
| 2160 | 2146 | ||
| @@ -2163,11 +2149,11 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
| 2163 | * We need to adjust the IRQ routing table | 2149 | * We need to adjust the IRQ routing table |
| 2164 | * if the ID changed. | 2150 | * if the ID changed. |
| 2165 | */ | 2151 | */ |
| 2166 | if (old_id != mp_ioapics[apic].mp_apicid) | 2152 | if (old_id != mp_ioapics[apic_id].apicid) |
| 2167 | for (i = 0; i < mp_irq_entries; i++) | 2153 | for (i = 0; i < mp_irq_entries; i++) |
| 2168 | if (mp_irqs[i].mp_dstapic == old_id) | 2154 | if (mp_irqs[i].dstapic == old_id) |
| 2169 | mp_irqs[i].mp_dstapic | 2155 | mp_irqs[i].dstapic |
| 2170 | = mp_ioapics[apic].mp_apicid; | 2156 | = mp_ioapics[apic_id].apicid; |
| 2171 | 2157 | ||
| 2172 | /* | 2158 | /* |
| 2173 | * Read the right value from the MPC table and | 2159 | * Read the right value from the MPC table and |
| @@ -2175,20 +2161,20 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
| 2175 | */ | 2161 | */ |
| 2176 | apic_printk(APIC_VERBOSE, KERN_INFO | 2162 | apic_printk(APIC_VERBOSE, KERN_INFO |
| 2177 | "...changing IO-APIC physical APIC ID to %d ...", | 2163 | "...changing IO-APIC physical APIC ID to %d ...", |
| 2178 | mp_ioapics[apic].mp_apicid); | 2164 | mp_ioapics[apic_id].apicid); |
| 2179 | 2165 | ||
| 2180 | reg_00.bits.ID = mp_ioapics[apic].mp_apicid; | 2166 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; |
| 2181 | spin_lock_irqsave(&ioapic_lock, flags); | 2167 | spin_lock_irqsave(&ioapic_lock, flags); |
| 2182 | io_apic_write(apic, 0, reg_00.raw); | 2168 | io_apic_write(apic_id, 0, reg_00.raw); |
| 2183 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2169 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2184 | 2170 | ||
| 2185 | /* | 2171 | /* |
| 2186 | * Sanity check | 2172 | * Sanity check |
| 2187 | */ | 2173 | */ |
| 2188 | spin_lock_irqsave(&ioapic_lock, flags); | 2174 | spin_lock_irqsave(&ioapic_lock, flags); |
| 2189 | reg_00.raw = io_apic_read(apic, 0); | 2175 | reg_00.raw = io_apic_read(apic_id, 0); |
| 2190 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2176 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2191 | if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) | 2177 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) |
| 2192 | printk("could not set ID!\n"); | 2178 | printk("could not set ID!\n"); |
| 2193 | else | 2179 | else |
| 2194 | apic_printk(APIC_VERBOSE, " ok.\n"); | 2180 | apic_printk(APIC_VERBOSE, " ok.\n"); |
| @@ -2291,7 +2277,7 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
| 2291 | unsigned long flags; | 2277 | unsigned long flags; |
| 2292 | 2278 | ||
| 2293 | spin_lock_irqsave(&vector_lock, flags); | 2279 | spin_lock_irqsave(&vector_lock, flags); |
| 2294 | send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); | 2280 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); |
| 2295 | spin_unlock_irqrestore(&vector_lock, flags); | 2281 | spin_unlock_irqrestore(&vector_lock, flags); |
| 2296 | 2282 | ||
| 2297 | return 1; | 2283 | return 1; |
| @@ -2299,7 +2285,7 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
| 2299 | #else | 2285 | #else |
| 2300 | static int ioapic_retrigger_irq(unsigned int irq) | 2286 | static int ioapic_retrigger_irq(unsigned int irq) |
| 2301 | { | 2287 | { |
| 2302 | send_IPI_self(irq_cfg(irq)->vector); | 2288 | apic->send_IPI_self(irq_cfg(irq)->vector); |
| 2303 | 2289 | ||
| 2304 | return 1; | 2290 | return 1; |
| 2305 | } | 2291 | } |
| @@ -2363,7 +2349,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
| 2363 | 2349 | ||
| 2364 | set_extra_move_desc(desc, mask); | 2350 | set_extra_move_desc(desc, mask); |
| 2365 | 2351 | ||
| 2366 | dest = cpu_mask_to_apicid_and(cfg->domain, mask); | 2352 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); |
| 2367 | 2353 | ||
| 2368 | modify_ioapic_rte = desc->status & IRQ_LEVEL; | 2354 | modify_ioapic_rte = desc->status & IRQ_LEVEL; |
| 2369 | if (modify_ioapic_rte) { | 2355 | if (modify_ioapic_rte) { |
| @@ -2383,7 +2369,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
| 2383 | if (cfg->move_in_progress) | 2369 | if (cfg->move_in_progress) |
| 2384 | send_cleanup_vector(cfg); | 2370 | send_cleanup_vector(cfg); |
| 2385 | 2371 | ||
| 2386 | cpumask_copy(&desc->affinity, mask); | 2372 | cpumask_copy(desc->affinity, mask); |
| 2387 | } | 2373 | } |
| 2388 | 2374 | ||
| 2389 | static int migrate_irq_remapped_level_desc(struct irq_desc *desc) | 2375 | static int migrate_irq_remapped_level_desc(struct irq_desc *desc) |
| @@ -2405,11 +2391,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) | |||
| 2405 | } | 2391 | } |
| 2406 | 2392 | ||
| 2407 | /* everthing is clear. we have right of way */ | 2393 | /* everthing is clear. we have right of way */ |
| 2408 | migrate_ioapic_irq_desc(desc, &desc->pending_mask); | 2394 | migrate_ioapic_irq_desc(desc, desc->pending_mask); |
| 2409 | 2395 | ||
| 2410 | ret = 0; | 2396 | ret = 0; |
| 2411 | desc->status &= ~IRQ_MOVE_PENDING; | 2397 | desc->status &= ~IRQ_MOVE_PENDING; |
| 2412 | cpumask_clear(&desc->pending_mask); | 2398 | cpumask_clear(desc->pending_mask); |
| 2413 | 2399 | ||
| 2414 | unmask: | 2400 | unmask: |
| 2415 | unmask_IO_APIC_irq_desc(desc); | 2401 | unmask_IO_APIC_irq_desc(desc); |
| @@ -2434,7 +2420,7 @@ static void ir_irq_migration(struct work_struct *work) | |||
| 2434 | continue; | 2420 | continue; |
| 2435 | } | 2421 | } |
| 2436 | 2422 | ||
| 2437 | desc->chip->set_affinity(irq, &desc->pending_mask); | 2423 | desc->chip->set_affinity(irq, desc->pending_mask); |
| 2438 | spin_unlock_irqrestore(&desc->lock, flags); | 2424 | spin_unlock_irqrestore(&desc->lock, flags); |
| 2439 | } | 2425 | } |
| 2440 | } | 2426 | } |
| @@ -2448,7 +2434,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, | |||
| 2448 | { | 2434 | { |
| 2449 | if (desc->status & IRQ_LEVEL) { | 2435 | if (desc->status & IRQ_LEVEL) { |
| 2450 | desc->status |= IRQ_MOVE_PENDING; | 2436 | desc->status |= IRQ_MOVE_PENDING; |
| 2451 | cpumask_copy(&desc->pending_mask, mask); | 2437 | cpumask_copy(desc->pending_mask, mask); |
| 2452 | migrate_irq_remapped_level_desc(desc); | 2438 | migrate_irq_remapped_level_desc(desc); |
| 2453 | return; | 2439 | return; |
| 2454 | } | 2440 | } |
| @@ -2516,7 +2502,7 @@ static void irq_complete_move(struct irq_desc **descp) | |||
| 2516 | 2502 | ||
| 2517 | /* domain has not changed, but affinity did */ | 2503 | /* domain has not changed, but affinity did */ |
| 2518 | me = smp_processor_id(); | 2504 | me = smp_processor_id(); |
| 2519 | if (cpu_isset(me, desc->affinity)) { | 2505 | if (cpumask_test_cpu(me, desc->affinity)) { |
| 2520 | *descp = desc = move_irq_desc(desc, me); | 2506 | *descp = desc = move_irq_desc(desc, me); |
| 2521 | /* get the new one */ | 2507 | /* get the new one */ |
| 2522 | cfg = desc->chip_data; | 2508 | cfg = desc->chip_data; |
| @@ -2867,19 +2853,15 @@ static inline void __init check_timer(void) | |||
| 2867 | int cpu = boot_cpu_id; | 2853 | int cpu = boot_cpu_id; |
| 2868 | int apic1, pin1, apic2, pin2; | 2854 | int apic1, pin1, apic2, pin2; |
| 2869 | unsigned long flags; | 2855 | unsigned long flags; |
| 2870 | unsigned int ver; | ||
| 2871 | int no_pin1 = 0; | 2856 | int no_pin1 = 0; |
| 2872 | 2857 | ||
| 2873 | local_irq_save(flags); | 2858 | local_irq_save(flags); |
| 2874 | 2859 | ||
| 2875 | ver = apic_read(APIC_LVR); | ||
| 2876 | ver = GET_APIC_VERSION(ver); | ||
| 2877 | |||
| 2878 | /* | 2860 | /* |
| 2879 | * get/set the timer IRQ vector: | 2861 | * get/set the timer IRQ vector: |
| 2880 | */ | 2862 | */ |
| 2881 | disable_8259A_irq(0); | 2863 | disable_8259A_irq(0); |
| 2882 | assign_irq_vector(0, cfg, TARGET_CPUS); | 2864 | assign_irq_vector(0, cfg, apic->target_cpus()); |
| 2883 | 2865 | ||
| 2884 | /* | 2866 | /* |
| 2885 | * As IRQ0 is to be enabled in the 8259A, the virtual | 2867 | * As IRQ0 is to be enabled in the 8259A, the virtual |
| @@ -2893,7 +2875,13 @@ static inline void __init check_timer(void) | |||
| 2893 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2875 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
| 2894 | init_8259A(1); | 2876 | init_8259A(1); |
| 2895 | #ifdef CONFIG_X86_32 | 2877 | #ifdef CONFIG_X86_32 |
| 2896 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | 2878 | { |
| 2879 | unsigned int ver; | ||
| 2880 | |||
| 2881 | ver = apic_read(APIC_LVR); | ||
| 2882 | ver = GET_APIC_VERSION(ver); | ||
| 2883 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
| 2884 | } | ||
| 2897 | #endif | 2885 | #endif |
| 2898 | 2886 | ||
| 2899 | pin1 = find_isa_irq_pin(0, mp_INT); | 2887 | pin1 = find_isa_irq_pin(0, mp_INT); |
| @@ -2932,8 +2920,17 @@ static inline void __init check_timer(void) | |||
| 2932 | if (no_pin1) { | 2920 | if (no_pin1) { |
| 2933 | add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); | 2921 | add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); |
| 2934 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2922 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
| 2923 | } else { | ||
| 2924 | /* for edge trigger, setup_IO_APIC_irq already | ||
| 2925 | * leave it unmasked. | ||
| 2926 | * so only need to unmask if it is level-trigger | ||
| 2927 | * do we really have level trigger timer? | ||
| 2928 | */ | ||
| 2929 | int idx; | ||
| 2930 | idx = find_irq_entry(apic1, pin1, mp_INT); | ||
| 2931 | if (idx != -1 && irq_trigger(idx)) | ||
| 2932 | unmask_IO_APIC_irq_desc(desc); | ||
| 2935 | } | 2933 | } |
| 2936 | unmask_IO_APIC_irq_desc(desc); | ||
| 2937 | if (timer_irq_works()) { | 2934 | if (timer_irq_works()) { |
| 2938 | if (nmi_watchdog == NMI_IO_APIC) { | 2935 | if (nmi_watchdog == NMI_IO_APIC) { |
| 2939 | setup_nmi(); | 2936 | setup_nmi(); |
| @@ -2947,6 +2944,7 @@ static inline void __init check_timer(void) | |||
| 2947 | if (intr_remapping_enabled) | 2944 | if (intr_remapping_enabled) |
| 2948 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); | 2945 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); |
| 2949 | #endif | 2946 | #endif |
| 2947 | local_irq_disable(); | ||
| 2950 | clear_IO_APIC_pin(apic1, pin1); | 2948 | clear_IO_APIC_pin(apic1, pin1); |
| 2951 | if (!no_pin1) | 2949 | if (!no_pin1) |
| 2952 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " | 2950 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
| @@ -2961,7 +2959,6 @@ static inline void __init check_timer(void) | |||
| 2961 | */ | 2959 | */ |
| 2962 | replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); | 2960 | replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); |
| 2963 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 2961 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
| 2964 | unmask_IO_APIC_irq_desc(desc); | ||
| 2965 | enable_8259A_irq(0); | 2962 | enable_8259A_irq(0); |
| 2966 | if (timer_irq_works()) { | 2963 | if (timer_irq_works()) { |
| 2967 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2964 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
| @@ -2976,6 +2973,7 @@ static inline void __init check_timer(void) | |||
| 2976 | /* | 2973 | /* |
| 2977 | * Cleanup, just in case ... | 2974 | * Cleanup, just in case ... |
| 2978 | */ | 2975 | */ |
| 2976 | local_irq_disable(); | ||
| 2979 | disable_8259A_irq(0); | 2977 | disable_8259A_irq(0); |
| 2980 | clear_IO_APIC_pin(apic2, pin2); | 2978 | clear_IO_APIC_pin(apic2, pin2); |
| 2981 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 2979 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
| @@ -3001,6 +2999,7 @@ static inline void __init check_timer(void) | |||
| 3001 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 2999 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
| 3002 | goto out; | 3000 | goto out; |
| 3003 | } | 3001 | } |
| 3002 | local_irq_disable(); | ||
| 3004 | disable_8259A_irq(0); | 3003 | disable_8259A_irq(0); |
| 3005 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 3004 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
| 3006 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | 3005 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
| @@ -3018,6 +3017,7 @@ static inline void __init check_timer(void) | |||
| 3018 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 3017 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
| 3019 | goto out; | 3018 | goto out; |
| 3020 | } | 3019 | } |
| 3020 | local_irq_disable(); | ||
| 3021 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | 3021 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
| 3022 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | 3022 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
| 3023 | "report. Then try booting with the 'noapic' option.\n"); | 3023 | "report. Then try booting with the 'noapic' option.\n"); |
| @@ -3047,13 +3047,9 @@ out: | |||
| 3047 | void __init setup_IO_APIC(void) | 3047 | void __init setup_IO_APIC(void) |
| 3048 | { | 3048 | { |
| 3049 | 3049 | ||
| 3050 | #ifdef CONFIG_X86_32 | ||
| 3051 | enable_IO_APIC(); | ||
| 3052 | #else | ||
| 3053 | /* | 3050 | /* |
| 3054 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 3051 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP |
| 3055 | */ | 3052 | */ |
| 3056 | #endif | ||
| 3057 | 3053 | ||
| 3058 | io_apic_irqs = ~PIC_IRQS; | 3054 | io_apic_irqs = ~PIC_IRQS; |
| 3059 | 3055 | ||
| @@ -3118,8 +3114,8 @@ static int ioapic_resume(struct sys_device *dev) | |||
| 3118 | 3114 | ||
| 3119 | spin_lock_irqsave(&ioapic_lock, flags); | 3115 | spin_lock_irqsave(&ioapic_lock, flags); |
| 3120 | reg_00.raw = io_apic_read(dev->id, 0); | 3116 | reg_00.raw = io_apic_read(dev->id, 0); |
| 3121 | if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { | 3117 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { |
| 3122 | reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; | 3118 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; |
| 3123 | io_apic_write(dev->id, 0, reg_00.raw); | 3119 | io_apic_write(dev->id, 0, reg_00.raw); |
| 3124 | } | 3120 | } |
| 3125 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3121 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| @@ -3169,6 +3165,7 @@ static int __init ioapic_init_sysfs(void) | |||
| 3169 | 3165 | ||
| 3170 | device_initcall(ioapic_init_sysfs); | 3166 | device_initcall(ioapic_init_sysfs); |
| 3171 | 3167 | ||
| 3168 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
| 3172 | /* | 3169 | /* |
| 3173 | * Dynamic irq allocate and deallocation | 3170 | * Dynamic irq allocate and deallocation |
| 3174 | */ | 3171 | */ |
| @@ -3183,11 +3180,11 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
| 3183 | struct irq_desc *desc_new = NULL; | 3180 | struct irq_desc *desc_new = NULL; |
| 3184 | 3181 | ||
| 3185 | irq = 0; | 3182 | irq = 0; |
| 3186 | spin_lock_irqsave(&vector_lock, flags); | 3183 | if (irq_want < nr_irqs_gsi) |
| 3187 | for (new = irq_want; new < NR_IRQS; new++) { | 3184 | irq_want = nr_irqs_gsi; |
| 3188 | if (platform_legacy_irq(new)) | ||
| 3189 | continue; | ||
| 3190 | 3185 | ||
| 3186 | spin_lock_irqsave(&vector_lock, flags); | ||
| 3187 | for (new = irq_want; new < nr_irqs; new++) { | ||
| 3191 | desc_new = irq_to_desc_alloc_cpu(new, cpu); | 3188 | desc_new = irq_to_desc_alloc_cpu(new, cpu); |
| 3192 | if (!desc_new) { | 3189 | if (!desc_new) { |
| 3193 | printk(KERN_INFO "can not get irq_desc for %d\n", new); | 3190 | printk(KERN_INFO "can not get irq_desc for %d\n", new); |
| @@ -3197,7 +3194,7 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
| 3197 | 3194 | ||
| 3198 | if (cfg_new->vector != 0) | 3195 | if (cfg_new->vector != 0) |
| 3199 | continue; | 3196 | continue; |
| 3200 | if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) | 3197 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) |
| 3201 | irq = new; | 3198 | irq = new; |
| 3202 | break; | 3199 | break; |
| 3203 | } | 3200 | } |
| @@ -3212,7 +3209,6 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
| 3212 | return irq; | 3209 | return irq; |
| 3213 | } | 3210 | } |
| 3214 | 3211 | ||
| 3215 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
| 3216 | int create_irq(void) | 3212 | int create_irq(void) |
| 3217 | { | 3213 | { |
| 3218 | unsigned int irq_want; | 3214 | unsigned int irq_want; |
| @@ -3259,12 +3255,15 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
| 3259 | int err; | 3255 | int err; |
| 3260 | unsigned dest; | 3256 | unsigned dest; |
| 3261 | 3257 | ||
| 3258 | if (disable_apic) | ||
| 3259 | return -ENXIO; | ||
| 3260 | |||
| 3262 | cfg = irq_cfg(irq); | 3261 | cfg = irq_cfg(irq); |
| 3263 | err = assign_irq_vector(irq, cfg, TARGET_CPUS); | 3262 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); |
| 3264 | if (err) | 3263 | if (err) |
| 3265 | return err; | 3264 | return err; |
| 3266 | 3265 | ||
| 3267 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); | 3266 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); |
| 3268 | 3267 | ||
| 3269 | #ifdef CONFIG_INTR_REMAP | 3268 | #ifdef CONFIG_INTR_REMAP |
| 3270 | if (irq_remapped(irq)) { | 3269 | if (irq_remapped(irq)) { |
| @@ -3278,9 +3277,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
| 3278 | memset (&irte, 0, sizeof(irte)); | 3277 | memset (&irte, 0, sizeof(irte)); |
| 3279 | 3278 | ||
| 3280 | irte.present = 1; | 3279 | irte.present = 1; |
| 3281 | irte.dst_mode = INT_DEST_MODE; | 3280 | irte.dst_mode = apic->irq_dest_mode; |
| 3282 | irte.trigger_mode = 0; /* edge */ | 3281 | irte.trigger_mode = 0; /* edge */ |
| 3283 | irte.dlvry_mode = INT_DELIVERY_MODE; | 3282 | irte.dlvry_mode = apic->irq_delivery_mode; |
| 3284 | irte.vector = cfg->vector; | 3283 | irte.vector = cfg->vector; |
| 3285 | irte.dest_id = IRTE_DEST(dest); | 3284 | irte.dest_id = IRTE_DEST(dest); |
| 3286 | 3285 | ||
| @@ -3298,10 +3297,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
| 3298 | msg->address_hi = MSI_ADDR_BASE_HI; | 3297 | msg->address_hi = MSI_ADDR_BASE_HI; |
| 3299 | msg->address_lo = | 3298 | msg->address_lo = |
| 3300 | MSI_ADDR_BASE_LO | | 3299 | MSI_ADDR_BASE_LO | |
| 3301 | ((INT_DEST_MODE == 0) ? | 3300 | ((apic->irq_dest_mode == 0) ? |
| 3302 | MSI_ADDR_DEST_MODE_PHYSICAL: | 3301 | MSI_ADDR_DEST_MODE_PHYSICAL: |
| 3303 | MSI_ADDR_DEST_MODE_LOGICAL) | | 3302 | MSI_ADDR_DEST_MODE_LOGICAL) | |
| 3304 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 3303 | ((apic->irq_delivery_mode != dest_LowestPrio) ? |
| 3305 | MSI_ADDR_REDIRECTION_CPU: | 3304 | MSI_ADDR_REDIRECTION_CPU: |
| 3306 | MSI_ADDR_REDIRECTION_LOWPRI) | | 3305 | MSI_ADDR_REDIRECTION_LOWPRI) | |
| 3307 | MSI_ADDR_DEST_ID(dest); | 3306 | MSI_ADDR_DEST_ID(dest); |
| @@ -3309,7 +3308,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
| 3309 | msg->data = | 3308 | msg->data = |
| 3310 | MSI_DATA_TRIGGER_EDGE | | 3309 | MSI_DATA_TRIGGER_EDGE | |
| 3311 | MSI_DATA_LEVEL_ASSERT | | 3310 | MSI_DATA_LEVEL_ASSERT | |
| 3312 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 3311 | ((apic->irq_delivery_mode != dest_LowestPrio) ? |
| 3313 | MSI_DATA_DELIVERY_FIXED: | 3312 | MSI_DATA_DELIVERY_FIXED: |
| 3314 | MSI_DATA_DELIVERY_LOWPRI) | | 3313 | MSI_DATA_DELIVERY_LOWPRI) | |
| 3315 | MSI_DATA_VECTOR(cfg->vector); | 3314 | MSI_DATA_VECTOR(cfg->vector); |
| @@ -3464,40 +3463,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
| 3464 | return 0; | 3463 | return 0; |
| 3465 | } | 3464 | } |
| 3466 | 3465 | ||
| 3467 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) | ||
| 3468 | { | ||
| 3469 | unsigned int irq; | ||
| 3470 | int ret; | ||
| 3471 | unsigned int irq_want; | ||
| 3472 | |||
| 3473 | irq_want = nr_irqs_gsi; | ||
| 3474 | irq = create_irq_nr(irq_want); | ||
| 3475 | if (irq == 0) | ||
| 3476 | return -1; | ||
| 3477 | |||
| 3478 | #ifdef CONFIG_INTR_REMAP | ||
| 3479 | if (!intr_remapping_enabled) | ||
| 3480 | goto no_ir; | ||
| 3481 | |||
| 3482 | ret = msi_alloc_irte(dev, irq, 1); | ||
| 3483 | if (ret < 0) | ||
| 3484 | goto error; | ||
| 3485 | no_ir: | ||
| 3486 | #endif | ||
| 3487 | ret = setup_msi_irq(dev, msidesc, irq); | ||
| 3488 | if (ret < 0) { | ||
| 3489 | destroy_irq(irq); | ||
| 3490 | return ret; | ||
| 3491 | } | ||
| 3492 | return 0; | ||
| 3493 | |||
| 3494 | #ifdef CONFIG_INTR_REMAP | ||
| 3495 | error: | ||
| 3496 | destroy_irq(irq); | ||
| 3497 | return ret; | ||
| 3498 | #endif | ||
| 3499 | } | ||
| 3500 | |||
| 3501 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 3466 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
| 3502 | { | 3467 | { |
| 3503 | unsigned int irq; | 3468 | unsigned int irq; |
| @@ -3514,9 +3479,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
| 3514 | sub_handle = 0; | 3479 | sub_handle = 0; |
| 3515 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 3480 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
| 3516 | irq = create_irq_nr(irq_want); | 3481 | irq = create_irq_nr(irq_want); |
| 3517 | irq_want++; | ||
| 3518 | if (irq == 0) | 3482 | if (irq == 0) |
| 3519 | return -1; | 3483 | return -1; |
| 3484 | irq_want = irq + 1; | ||
| 3520 | #ifdef CONFIG_INTR_REMAP | 3485 | #ifdef CONFIG_INTR_REMAP |
| 3521 | if (!intr_remapping_enabled) | 3486 | if (!intr_remapping_enabled) |
| 3522 | goto no_ir; | 3487 | goto no_ir; |
| @@ -3727,13 +3692,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
| 3727 | struct irq_cfg *cfg; | 3692 | struct irq_cfg *cfg; |
| 3728 | int err; | 3693 | int err; |
| 3729 | 3694 | ||
| 3695 | if (disable_apic) | ||
| 3696 | return -ENXIO; | ||
| 3697 | |||
| 3730 | cfg = irq_cfg(irq); | 3698 | cfg = irq_cfg(irq); |
| 3731 | err = assign_irq_vector(irq, cfg, TARGET_CPUS); | 3699 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); |
| 3732 | if (!err) { | 3700 | if (!err) { |
| 3733 | struct ht_irq_msg msg; | 3701 | struct ht_irq_msg msg; |
| 3734 | unsigned dest; | 3702 | unsigned dest; |
| 3735 | 3703 | ||
| 3736 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); | 3704 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, |
| 3705 | apic->target_cpus()); | ||
| 3737 | 3706 | ||
| 3738 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); | 3707 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
| 3739 | 3708 | ||
| @@ -3741,11 +3710,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
| 3741 | HT_IRQ_LOW_BASE | | 3710 | HT_IRQ_LOW_BASE | |
| 3742 | HT_IRQ_LOW_DEST_ID(dest) | | 3711 | HT_IRQ_LOW_DEST_ID(dest) | |
| 3743 | HT_IRQ_LOW_VECTOR(cfg->vector) | | 3712 | HT_IRQ_LOW_VECTOR(cfg->vector) | |
| 3744 | ((INT_DEST_MODE == 0) ? | 3713 | ((apic->irq_dest_mode == 0) ? |
| 3745 | HT_IRQ_LOW_DM_PHYSICAL : | 3714 | HT_IRQ_LOW_DM_PHYSICAL : |
| 3746 | HT_IRQ_LOW_DM_LOGICAL) | | 3715 | HT_IRQ_LOW_DM_LOGICAL) | |
| 3747 | HT_IRQ_LOW_RQEOI_EDGE | | 3716 | HT_IRQ_LOW_RQEOI_EDGE | |
| 3748 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 3717 | ((apic->irq_delivery_mode != dest_LowestPrio) ? |
| 3749 | HT_IRQ_LOW_MT_FIXED : | 3718 | HT_IRQ_LOW_MT_FIXED : |
| 3750 | HT_IRQ_LOW_MT_ARBITRATED) | | 3719 | HT_IRQ_LOW_MT_ARBITRATED) | |
| 3751 | HT_IRQ_LOW_IRQ_MASKED; | 3720 | HT_IRQ_LOW_IRQ_MASKED; |
| @@ -3761,7 +3730,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
| 3761 | } | 3730 | } |
| 3762 | #endif /* CONFIG_HT_IRQ */ | 3731 | #endif /* CONFIG_HT_IRQ */ |
| 3763 | 3732 | ||
| 3764 | #ifdef CONFIG_X86_64 | 3733 | #ifdef CONFIG_X86_UV |
| 3765 | /* | 3734 | /* |
| 3766 | * Re-target the irq to the specified CPU and enable the specified MMR located | 3735 | * Re-target the irq to the specified CPU and enable the specified MMR located |
| 3767 | * on the specified blade to allow the sending of MSIs to the specified CPU. | 3736 | * on the specified blade to allow the sending of MSIs to the specified CPU. |
| @@ -3793,12 +3762,12 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
| 3793 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | 3762 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); |
| 3794 | 3763 | ||
| 3795 | entry->vector = cfg->vector; | 3764 | entry->vector = cfg->vector; |
| 3796 | entry->delivery_mode = INT_DELIVERY_MODE; | 3765 | entry->delivery_mode = apic->irq_delivery_mode; |
| 3797 | entry->dest_mode = INT_DEST_MODE; | 3766 | entry->dest_mode = apic->irq_dest_mode; |
| 3798 | entry->polarity = 0; | 3767 | entry->polarity = 0; |
| 3799 | entry->trigger = 0; | 3768 | entry->trigger = 0; |
| 3800 | entry->mask = 0; | 3769 | entry->mask = 0; |
| 3801 | entry->dest = cpu_mask_to_apicid(eligible_cpu); | 3770 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); |
| 3802 | 3771 | ||
| 3803 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3772 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
| 3804 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 3773 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
| @@ -3861,6 +3830,28 @@ void __init probe_nr_irqs_gsi(void) | |||
| 3861 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); | 3830 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); |
| 3862 | } | 3831 | } |
| 3863 | 3832 | ||
| 3833 | #ifdef CONFIG_SPARSE_IRQ | ||
| 3834 | int __init arch_probe_nr_irqs(void) | ||
| 3835 | { | ||
| 3836 | int nr; | ||
| 3837 | |||
| 3838 | if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) | ||
| 3839 | nr_irqs = NR_VECTORS * nr_cpu_ids; | ||
| 3840 | |||
| 3841 | nr = nr_irqs_gsi + 8 * nr_cpu_ids; | ||
| 3842 | #if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) | ||
| 3843 | /* | ||
| 3844 | * for MSI and HT dyn irq | ||
| 3845 | */ | ||
| 3846 | nr += nr_irqs_gsi * 16; | ||
| 3847 | #endif | ||
| 3848 | if (nr < nr_irqs) | ||
| 3849 | nr_irqs = nr; | ||
| 3850 | |||
| 3851 | return 0; | ||
| 3852 | } | ||
| 3853 | #endif | ||
| 3854 | |||
| 3864 | /* -------------------------------------------------------------------------- | 3855 | /* -------------------------------------------------------------------------- |
| 3865 | ACPI-based IOAPIC Configuration | 3856 | ACPI-based IOAPIC Configuration |
| 3866 | -------------------------------------------------------------------------- */ | 3857 | -------------------------------------------------------------------------- */ |
| @@ -3886,7 +3877,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
| 3886 | */ | 3877 | */ |
| 3887 | 3878 | ||
| 3888 | if (physids_empty(apic_id_map)) | 3879 | if (physids_empty(apic_id_map)) |
| 3889 | apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); | 3880 | apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); |
| 3890 | 3881 | ||
| 3891 | spin_lock_irqsave(&ioapic_lock, flags); | 3882 | spin_lock_irqsave(&ioapic_lock, flags); |
| 3892 | reg_00.raw = io_apic_read(ioapic, 0); | 3883 | reg_00.raw = io_apic_read(ioapic, 0); |
| @@ -3902,10 +3893,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
| 3902 | * Every APIC in a system must have a unique ID or we get lots of nice | 3893 | * Every APIC in a system must have a unique ID or we get lots of nice |
| 3903 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 3894 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
| 3904 | */ | 3895 | */ |
| 3905 | if (check_apicid_used(apic_id_map, apic_id)) { | 3896 | if (apic->check_apicid_used(apic_id_map, apic_id)) { |
| 3906 | 3897 | ||
| 3907 | for (i = 0; i < get_physical_broadcast(); i++) { | 3898 | for (i = 0; i < get_physical_broadcast(); i++) { |
| 3908 | if (!check_apicid_used(apic_id_map, i)) | 3899 | if (!apic->check_apicid_used(apic_id_map, i)) |
| 3909 | break; | 3900 | break; |
| 3910 | } | 3901 | } |
| 3911 | 3902 | ||
| @@ -3918,7 +3909,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
| 3918 | apic_id = i; | 3909 | apic_id = i; |
| 3919 | } | 3910 | } |
| 3920 | 3911 | ||
| 3921 | tmp = apicid_to_cpu_present(apic_id); | 3912 | tmp = apic->apicid_to_cpu_present(apic_id); |
| 3922 | physids_or(apic_id_map, apic_id_map, tmp); | 3913 | physids_or(apic_id_map, apic_id_map, tmp); |
| 3923 | 3914 | ||
| 3924 | if (reg_00.bits.ID != apic_id) { | 3915 | if (reg_00.bits.ID != apic_id) { |
| @@ -3995,8 +3986,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
| 3995 | return -1; | 3986 | return -1; |
| 3996 | 3987 | ||
| 3997 | for (i = 0; i < mp_irq_entries; i++) | 3988 | for (i = 0; i < mp_irq_entries; i++) |
| 3998 | if (mp_irqs[i].mp_irqtype == mp_INT && | 3989 | if (mp_irqs[i].irqtype == mp_INT && |
| 3999 | mp_irqs[i].mp_srcbusirq == bus_irq) | 3990 | mp_irqs[i].srcbusirq == bus_irq) |
| 4000 | break; | 3991 | break; |
| 4001 | if (i >= mp_irq_entries) | 3992 | if (i >= mp_irq_entries) |
| 4002 | return -1; | 3993 | return -1; |
| @@ -4011,7 +4002,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
| 4011 | /* | 4002 | /* |
| 4012 | * This function currently is only a helper for the i386 smp boot process where | 4003 | * This function currently is only a helper for the i386 smp boot process where |
| 4013 | * we need to reprogram the ioredtbls to cater for the cpus which have come online | 4004 | * we need to reprogram the ioredtbls to cater for the cpus which have come online |
| 4014 | * so mask in all cases should simply be TARGET_CPUS | 4005 | * so mask in all cases should simply be apic->target_cpus() |
| 4015 | */ | 4006 | */ |
| 4016 | #ifdef CONFIG_SMP | 4007 | #ifdef CONFIG_SMP |
| 4017 | void __init setup_ioapic_dest(void) | 4008 | void __init setup_ioapic_dest(void) |
| @@ -4050,9 +4041,9 @@ void __init setup_ioapic_dest(void) | |||
| 4050 | */ | 4041 | */ |
| 4051 | if (desc->status & | 4042 | if (desc->status & |
| 4052 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | 4043 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) |
| 4053 | mask = &desc->affinity; | 4044 | mask = desc->affinity; |
| 4054 | else | 4045 | else |
| 4055 | mask = TARGET_CPUS; | 4046 | mask = apic->target_cpus(); |
| 4056 | 4047 | ||
| 4057 | #ifdef CONFIG_INTR_REMAP | 4048 | #ifdef CONFIG_INTR_REMAP |
| 4058 | if (intr_remapping_enabled) | 4049 | if (intr_remapping_enabled) |
| @@ -4111,7 +4102,7 @@ void __init ioapic_init_mappings(void) | |||
| 4111 | ioapic_res = ioapic_setup_resources(); | 4102 | ioapic_res = ioapic_setup_resources(); |
| 4112 | for (i = 0; i < nr_ioapics; i++) { | 4103 | for (i = 0; i < nr_ioapics; i++) { |
| 4113 | if (smp_found_config) { | 4104 | if (smp_found_config) { |
| 4114 | ioapic_phys = mp_ioapics[i].mp_apicaddr; | 4105 | ioapic_phys = mp_ioapics[i].apicaddr; |
| 4115 | #ifdef CONFIG_X86_32 | 4106 | #ifdef CONFIG_X86_32 |
| 4116 | if (!ioapic_phys) { | 4107 | if (!ioapic_phys) { |
| 4117 | printk(KERN_ERR | 4108 | printk(KERN_ERR |
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c new file mode 100644 index 000000000000..dbf5445727a9 --- /dev/null +++ b/arch/x86/kernel/apic/ipi.c | |||
| @@ -0,0 +1,164 @@ | |||
| 1 | #include <linux/cpumask.h> | ||
| 2 | #include <linux/interrupt.h> | ||
| 3 | #include <linux/init.h> | ||
| 4 | |||
| 5 | #include <linux/mm.h> | ||
| 6 | #include <linux/delay.h> | ||
| 7 | #include <linux/spinlock.h> | ||
| 8 | #include <linux/kernel_stat.h> | ||
| 9 | #include <linux/mc146818rtc.h> | ||
| 10 | #include <linux/cache.h> | ||
| 11 | #include <linux/cpu.h> | ||
| 12 | #include <linux/module.h> | ||
| 13 | |||
| 14 | #include <asm/smp.h> | ||
| 15 | #include <asm/mtrr.h> | ||
| 16 | #include <asm/tlbflush.h> | ||
| 17 | #include <asm/mmu_context.h> | ||
| 18 | #include <asm/apic.h> | ||
| 19 | #include <asm/proto.h> | ||
| 20 | #include <asm/ipi.h> | ||
| 21 | |||
| 22 | void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) | ||
| 23 | { | ||
| 24 | unsigned long query_cpu; | ||
| 25 | unsigned long flags; | ||
| 26 | |||
| 27 | /* | ||
| 28 | * Hack. The clustered APIC addressing mode doesn't allow us to send | ||
| 29 | * to an arbitrary mask, so I do a unicast to each CPU instead. | ||
| 30 | * - mbligh | ||
| 31 | */ | ||
| 32 | local_irq_save(flags); | ||
| 33 | for_each_cpu(query_cpu, mask) { | ||
| 34 | __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, | ||
| 35 | query_cpu), vector, APIC_DEST_PHYSICAL); | ||
| 36 | } | ||
| 37 | local_irq_restore(flags); | ||
| 38 | } | ||
| 39 | |||
| 40 | void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, | ||
| 41 | int vector) | ||
| 42 | { | ||
| 43 | unsigned int this_cpu = smp_processor_id(); | ||
| 44 | unsigned int query_cpu; | ||
| 45 | unsigned long flags; | ||
| 46 | |||
| 47 | /* See Hack comment above */ | ||
| 48 | |||
| 49 | local_irq_save(flags); | ||
| 50 | for_each_cpu(query_cpu, mask) { | ||
| 51 | if (query_cpu == this_cpu) | ||
| 52 | continue; | ||
| 53 | __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, | ||
| 54 | query_cpu), vector, APIC_DEST_PHYSICAL); | ||
| 55 | } | ||
| 56 | local_irq_restore(flags); | ||
| 57 | } | ||
| 58 | |||
| 59 | void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, | ||
| 60 | int vector) | ||
| 61 | { | ||
| 62 | unsigned long flags; | ||
| 63 | unsigned int query_cpu; | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Hack. The clustered APIC addressing mode doesn't allow us to send | ||
| 67 | * to an arbitrary mask, so I do a unicasts to each CPU instead. This | ||
| 68 | * should be modified to do 1 message per cluster ID - mbligh | ||
| 69 | */ | ||
| 70 | |||
| 71 | local_irq_save(flags); | ||
| 72 | for_each_cpu(query_cpu, mask) | ||
| 73 | __default_send_IPI_dest_field( | ||
| 74 | apic->cpu_to_logical_apicid(query_cpu), vector, | ||
| 75 | apic->dest_logical); | ||
| 76 | local_irq_restore(flags); | ||
| 77 | } | ||
| 78 | |||
| 79 | void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, | ||
| 80 | int vector) | ||
| 81 | { | ||
| 82 | unsigned long flags; | ||
| 83 | unsigned int query_cpu; | ||
| 84 | unsigned int this_cpu = smp_processor_id(); | ||
| 85 | |||
| 86 | /* See Hack comment above */ | ||
| 87 | |||
| 88 | local_irq_save(flags); | ||
| 89 | for_each_cpu(query_cpu, mask) { | ||
| 90 | if (query_cpu == this_cpu) | ||
| 91 | continue; | ||
| 92 | __default_send_IPI_dest_field( | ||
| 93 | apic->cpu_to_logical_apicid(query_cpu), vector, | ||
| 94 | apic->dest_logical); | ||
| 95 | } | ||
| 96 | local_irq_restore(flags); | ||
| 97 | } | ||
| 98 | |||
| 99 | #ifdef CONFIG_X86_32 | ||
| 100 | |||
| 101 | /* | ||
| 102 | * This is only used on smaller machines. | ||
| 103 | */ | ||
| 104 | void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) | ||
| 105 | { | ||
| 106 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
| 107 | unsigned long flags; | ||
| 108 | |||
| 109 | local_irq_save(flags); | ||
| 110 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); | ||
| 111 | __default_send_IPI_dest_field(mask, vector, apic->dest_logical); | ||
| 112 | local_irq_restore(flags); | ||
| 113 | } | ||
| 114 | |||
| 115 | void default_send_IPI_allbutself(int vector) | ||
| 116 | { | ||
| 117 | /* | ||
| 118 | * if there are no other CPUs in the system then we get an APIC send | ||
| 119 | * error if we try to broadcast, thus avoid sending IPIs in this case. | ||
| 120 | */ | ||
| 121 | if (!(num_online_cpus() > 1)) | ||
| 122 | return; | ||
| 123 | |||
| 124 | __default_local_send_IPI_allbutself(vector); | ||
| 125 | } | ||
| 126 | |||
| 127 | void default_send_IPI_all(int vector) | ||
| 128 | { | ||
| 129 | __default_local_send_IPI_all(vector); | ||
| 130 | } | ||
| 131 | |||
| 132 | void default_send_IPI_self(int vector) | ||
| 133 | { | ||
| 134 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); | ||
| 135 | } | ||
| 136 | |||
| 137 | /* must come after the send_IPI functions above for inlining */ | ||
| 138 | static int convert_apicid_to_cpu(int apic_id) | ||
| 139 | { | ||
| 140 | int i; | ||
| 141 | |||
| 142 | for_each_possible_cpu(i) { | ||
| 143 | if (per_cpu(x86_cpu_to_apicid, i) == apic_id) | ||
| 144 | return i; | ||
| 145 | } | ||
| 146 | return -1; | ||
| 147 | } | ||
| 148 | |||
| 149 | int safe_smp_processor_id(void) | ||
| 150 | { | ||
| 151 | int apicid, cpuid; | ||
| 152 | |||
| 153 | if (!boot_cpu_has(X86_FEATURE_APIC)) | ||
| 154 | return 0; | ||
| 155 | |||
| 156 | apicid = hard_smp_processor_id(); | ||
| 157 | if (apicid == BAD_APICID) | ||
| 158 | return 0; | ||
| 159 | |||
| 160 | cpuid = convert_apicid_to_cpu(apicid); | ||
| 161 | |||
| 162 | return cpuid >= 0 ? cpuid : 0; | ||
| 163 | } | ||
| 164 | #endif | ||
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/apic/nmi.c index 7228979f1e7f..d6bd62407152 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
| @@ -34,12 +34,12 @@ | |||
| 34 | 34 | ||
| 35 | #include <asm/mce.h> | 35 | #include <asm/mce.h> |
| 36 | 36 | ||
| 37 | #include <mach_traps.h> | 37 | #include <asm/mach_traps.h> |
| 38 | 38 | ||
| 39 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
| 40 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
| 41 | 41 | ||
| 42 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | 42 | static cpumask_var_t backtrace_mask; |
| 43 | 43 | ||
| 44 | /* nmi_active: | 44 | /* nmi_active: |
| 45 | * >0: the lapic NMI watchdog is active, but can be disabled | 45 | * >0: the lapic NMI watchdog is active, but can be disabled |
| @@ -61,11 +61,7 @@ static int endflag __initdata; | |||
| 61 | 61 | ||
| 62 | static inline unsigned int get_nmi_count(int cpu) | 62 | static inline unsigned int get_nmi_count(int cpu) |
| 63 | { | 63 | { |
| 64 | #ifdef CONFIG_X86_64 | 64 | return per_cpu(irq_stat, cpu).__nmi_count; |
| 65 | return cpu_pda(cpu)->__nmi_count; | ||
| 66 | #else | ||
| 67 | return nmi_count(cpu); | ||
| 68 | #endif | ||
| 69 | } | 65 | } |
| 70 | 66 | ||
| 71 | static inline int mce_in_progress(void) | 67 | static inline int mce_in_progress(void) |
| @@ -82,12 +78,8 @@ static inline int mce_in_progress(void) | |||
| 82 | */ | 78 | */ |
| 83 | static inline unsigned int get_timer_irqs(int cpu) | 79 | static inline unsigned int get_timer_irqs(int cpu) |
| 84 | { | 80 | { |
| 85 | #ifdef CONFIG_X86_64 | ||
| 86 | return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); | ||
| 87 | #else | ||
| 88 | return per_cpu(irq_stat, cpu).apic_timer_irqs + | 81 | return per_cpu(irq_stat, cpu).apic_timer_irqs + |
| 89 | per_cpu(irq_stat, cpu).irq0_irqs; | 82 | per_cpu(irq_stat, cpu).irq0_irqs; |
| 90 | #endif | ||
| 91 | } | 83 | } |
| 92 | 84 | ||
| 93 | #ifdef CONFIG_SMP | 85 | #ifdef CONFIG_SMP |
| @@ -146,6 +138,7 @@ int __init check_nmi_watchdog(void) | |||
| 146 | if (!prev_nmi_count) | 138 | if (!prev_nmi_count) |
| 147 | goto error; | 139 | goto error; |
| 148 | 140 | ||
| 141 | alloc_cpumask_var(&backtrace_mask, GFP_KERNEL); | ||
| 149 | printk(KERN_INFO "Testing NMI watchdog ... "); | 142 | printk(KERN_INFO "Testing NMI watchdog ... "); |
| 150 | 143 | ||
| 151 | #ifdef CONFIG_SMP | 144 | #ifdef CONFIG_SMP |
| @@ -421,14 +414,14 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
| 421 | touched = 1; | 414 | touched = 1; |
| 422 | } | 415 | } |
| 423 | 416 | ||
| 424 | if (cpu_isset(cpu, backtrace_mask)) { | 417 | if (cpumask_test_cpu(cpu, backtrace_mask)) { |
| 425 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ |
| 426 | 419 | ||
| 427 | spin_lock(&lock); | 420 | spin_lock(&lock); |
| 428 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 421 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
| 429 | dump_stack(); | 422 | dump_stack(); |
| 430 | spin_unlock(&lock); | 423 | spin_unlock(&lock); |
| 431 | cpu_clear(cpu, backtrace_mask); | 424 | cpumask_clear_cpu(cpu, backtrace_mask); |
| 432 | } | 425 | } |
| 433 | 426 | ||
| 434 | /* Could check oops_in_progress here too, but it's safer not to */ | 427 | /* Could check oops_in_progress here too, but it's safer not to */ |
| @@ -562,10 +555,10 @@ void __trigger_all_cpu_backtrace(void) | |||
| 562 | { | 555 | { |
| 563 | int i; | 556 | int i; |
| 564 | 557 | ||
| 565 | backtrace_mask = cpu_online_map; | 558 | cpumask_copy(backtrace_mask, cpu_online_mask); |
| 566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 559 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
| 567 | for (i = 0; i < 10 * 1000; i++) { | 560 | for (i = 0; i < 10 * 1000; i++) { |
| 568 | if (cpus_empty(backtrace_mask)) | 561 | if (cpumask_empty(backtrace_mask)) |
| 569 | break; | 562 | break; |
| 570 | mdelay(1); | 563 | mdelay(1); |
| 571 | } | 564 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c new file mode 100644 index 000000000000..533e59c6fc82 --- /dev/null +++ b/arch/x86/kernel/apic/numaq_32.c | |||
| @@ -0,0 +1,558 @@ | |||
| 1 | /* | ||
| 2 | * Written by: Patricia Gaughen, IBM Corporation | ||
| 3 | * | ||
| 4 | * Copyright (C) 2002, IBM Corp. | ||
| 5 | * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar | ||
| 6 | * | ||
| 7 | * All rights reserved. | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or modify | ||
| 10 | * it under the terms of the GNU General Public License as published by | ||
| 11 | * the Free Software Foundation; either version 2 of the License, or | ||
| 12 | * (at your option) any later version. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, but | ||
| 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 17 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 18 | * details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public License | ||
| 21 | * along with this program; if not, write to the Free Software | ||
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 23 | * | ||
| 24 | * Send feedback to <gone@us.ibm.com> | ||
| 25 | */ | ||
| 26 | #include <linux/nodemask.h> | ||
| 27 | #include <linux/topology.h> | ||
| 28 | #include <linux/bootmem.h> | ||
| 29 | #include <linux/threads.h> | ||
| 30 | #include <linux/cpumask.h> | ||
| 31 | #include <linux/kernel.h> | ||
| 32 | #include <linux/mmzone.h> | ||
| 33 | #include <linux/module.h> | ||
| 34 | #include <linux/string.h> | ||
| 35 | #include <linux/init.h> | ||
| 36 | #include <linux/numa.h> | ||
| 37 | #include <linux/smp.h> | ||
| 38 | #include <linux/io.h> | ||
| 39 | #include <linux/mm.h> | ||
| 40 | |||
| 41 | #include <asm/processor.h> | ||
| 42 | #include <asm/fixmap.h> | ||
| 43 | #include <asm/mpspec.h> | ||
| 44 | #include <asm/numaq.h> | ||
| 45 | #include <asm/setup.h> | ||
| 46 | #include <asm/apic.h> | ||
| 47 | #include <asm/e820.h> | ||
| 48 | #include <asm/ipi.h> | ||
| 49 | |||
| 50 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | ||
| 51 | |||
| 52 | int found_numaq; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Have to match translation table entries to main table entries by counter | ||
| 56 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
| 57 | * doing this .... | ||
| 58 | */ | ||
| 59 | struct mpc_trans { | ||
| 60 | unsigned char mpc_type; | ||
| 61 | unsigned char trans_len; | ||
| 62 | unsigned char trans_type; | ||
| 63 | unsigned char trans_quad; | ||
| 64 | unsigned char trans_global; | ||
| 65 | unsigned char trans_local; | ||
| 66 | unsigned short trans_reserved; | ||
| 67 | }; | ||
| 68 | |||
| 69 | /* x86_quirks member */ | ||
| 70 | static int mpc_record; | ||
| 71 | |||
| 72 | static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; | ||
| 73 | |||
| 74 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
| 75 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
| 76 | int quad_local_to_mp_bus_id[NR_CPUS/4][4]; | ||
| 77 | |||
| 78 | |||
| 79 | static inline void numaq_register_node(int node, struct sys_cfg_data *scd) | ||
| 80 | { | ||
| 81 | struct eachquadmem *eq = scd->eq + node; | ||
| 82 | |||
| 83 | node_set_online(node); | ||
| 84 | |||
| 85 | /* Convert to pages */ | ||
| 86 | node_start_pfn[node] = | ||
| 87 | MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size); | ||
| 88 | |||
| 89 | node_end_pfn[node] = | ||
| 90 | MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | ||
| 91 | |||
| 92 | e820_register_active_regions(node, node_start_pfn[node], | ||
| 93 | node_end_pfn[node]); | ||
| 94 | |||
| 95 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); | ||
| 96 | |||
| 97 | node_remap_size[node] = node_memmap_size_bytes(node, | ||
| 98 | node_start_pfn[node], | ||
| 99 | node_end_pfn[node]); | ||
| 100 | } | ||
| 101 | |||
| 102 | /* | ||
| 103 | * Function: smp_dump_qct() | ||
| 104 | * | ||
| 105 | * Description: gets memory layout from the quad config table. This | ||
| 106 | * function also updates node_online_map with the nodes (quads) present. | ||
| 107 | */ | ||
| 108 | static void __init smp_dump_qct(void) | ||
| 109 | { | ||
| 110 | struct sys_cfg_data *scd; | ||
| 111 | int node; | ||
| 112 | |||
| 113 | scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); | ||
| 114 | |||
| 115 | nodes_clear(node_online_map); | ||
| 116 | for_each_node(node) { | ||
| 117 | if (scd->quads_present31_0 & (1 << node)) | ||
| 118 | numaq_register_node(node, scd); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | void __cpuinit numaq_tsc_disable(void) | ||
| 123 | { | ||
| 124 | if (!found_numaq) | ||
| 125 | return; | ||
| 126 | |||
| 127 | if (num_online_nodes() > 1) { | ||
| 128 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
| 129 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | static int __init numaq_pre_time_init(void) | ||
| 134 | { | ||
| 135 | numaq_tsc_disable(); | ||
| 136 | return 0; | ||
| 137 | } | ||
| 138 | |||
| 139 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
| 140 | { | ||
| 141 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
| 142 | } | ||
| 143 | |||
| 144 | /* x86_quirks member */ | ||
| 145 | static int mpc_apic_id(struct mpc_cpu *m) | ||
| 146 | { | ||
| 147 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 148 | int logical_apicid = generate_logical_apicid(quad, m->apicid); | ||
| 149 | |||
| 150 | printk(KERN_DEBUG | ||
| 151 | "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
| 152 | m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, | ||
| 153 | (m->cpufeature & CPU_MODEL_MASK) >> 4, | ||
| 154 | m->apicver, quad, logical_apicid); | ||
| 155 | |||
| 156 | return logical_apicid; | ||
| 157 | } | ||
| 158 | |||
| 159 | /* x86_quirks member */ | ||
| 160 | static void mpc_oem_bus_info(struct mpc_bus *m, char *name) | ||
| 161 | { | ||
| 162 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 163 | int local = translation_table[mpc_record]->trans_local; | ||
| 164 | |||
| 165 | mp_bus_id_to_node[m->busid] = quad; | ||
| 166 | mp_bus_id_to_local[m->busid] = local; | ||
| 167 | |||
| 168 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); | ||
| 169 | } | ||
| 170 | |||
| 171 | /* x86_quirks member */ | ||
| 172 | static void mpc_oem_pci_bus(struct mpc_bus *m) | ||
| 173 | { | ||
| 174 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 175 | int local = translation_table[mpc_record]->trans_local; | ||
| 176 | |||
| 177 | quad_local_to_mp_bus_id[quad][local] = m->busid; | ||
| 178 | } | ||
| 179 | |||
| 180 | static void __init MP_translation_info(struct mpc_trans *m) | ||
| 181 | { | ||
| 182 | printk(KERN_INFO | ||
| 183 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
| 184 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
| 185 | m->trans_local); | ||
| 186 | |||
| 187 | if (mpc_record >= MAX_MPC_ENTRY) | ||
| 188 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
| 189 | else | ||
| 190 | translation_table[mpc_record] = m; /* stash this for later */ | ||
| 191 | |||
| 192 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
| 193 | node_set_online(m->trans_quad); | ||
| 194 | } | ||
| 195 | |||
| 196 | static int __init mpf_checksum(unsigned char *mp, int len) | ||
| 197 | { | ||
| 198 | int sum = 0; | ||
| 199 | |||
| 200 | while (len--) | ||
| 201 | sum += *mp++; | ||
| 202 | |||
| 203 | return sum & 0xFF; | ||
| 204 | } | ||
| 205 | |||
| 206 | /* | ||
| 207 | * Read/parse the MPC oem tables | ||
| 208 | */ | ||
| 209 | static void __init | ||
| 210 | smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize) | ||
| 211 | { | ||
| 212 | int count = sizeof(*oemtable); /* the header size */ | ||
| 213 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
| 214 | |||
| 215 | mpc_record = 0; | ||
| 216 | printk(KERN_INFO | ||
| 217 | "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); | ||
| 218 | |||
| 219 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { | ||
| 220 | printk(KERN_WARNING | ||
| 221 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
| 222 | oemtable->signature[0], oemtable->signature[1], | ||
| 223 | oemtable->signature[2], oemtable->signature[3]); | ||
| 224 | return; | ||
| 225 | } | ||
| 226 | |||
| 227 | if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { | ||
| 228 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | |||
| 232 | while (count < oemtable->length) { | ||
| 233 | switch (*oemptr) { | ||
| 234 | case MP_TRANSLATION: | ||
| 235 | { | ||
| 236 | struct mpc_trans *m = (void *)oemptr; | ||
| 237 | |||
| 238 | MP_translation_info(m); | ||
| 239 | oemptr += sizeof(*m); | ||
| 240 | count += sizeof(*m); | ||
| 241 | ++mpc_record; | ||
| 242 | break; | ||
| 243 | } | ||
| 244 | default: | ||
| 245 | printk(KERN_WARNING | ||
| 246 | "Unrecognised OEM table entry type! - %d\n", | ||
| 247 | (int)*oemptr); | ||
| 248 | return; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | } | ||
| 252 | |||
| 253 | static int __init numaq_setup_ioapic_ids(void) | ||
| 254 | { | ||
| 255 | /* so can skip it */ | ||
| 256 | return 1; | ||
| 257 | } | ||
| 258 | |||
| 259 | static struct x86_quirks numaq_x86_quirks __initdata = { | ||
| 260 | .arch_pre_time_init = numaq_pre_time_init, | ||
| 261 | .arch_time_init = NULL, | ||
| 262 | .arch_pre_intr_init = NULL, | ||
| 263 | .arch_memory_setup = NULL, | ||
| 264 | .arch_intr_init = NULL, | ||
| 265 | .arch_trap_init = NULL, | ||
| 266 | .mach_get_smp_config = NULL, | ||
| 267 | .mach_find_smp_config = NULL, | ||
| 268 | .mpc_record = &mpc_record, | ||
| 269 | .mpc_apic_id = mpc_apic_id, | ||
| 270 | .mpc_oem_bus_info = mpc_oem_bus_info, | ||
| 271 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | ||
| 272 | .smp_read_mpc_oem = smp_read_mpc_oem, | ||
| 273 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | ||
| 274 | }; | ||
| 275 | |||
| 276 | static __init void early_check_numaq(void) | ||
| 277 | { | ||
| 278 | /* | ||
| 279 | * Find possible boot-time SMP configuration: | ||
| 280 | */ | ||
| 281 | early_find_smp_config(); | ||
| 282 | |||
| 283 | /* | ||
| 284 | * get boot-time SMP configuration: | ||
| 285 | */ | ||
| 286 | if (smp_found_config) | ||
| 287 | early_get_smp_config(); | ||
| 288 | |||
| 289 | if (found_numaq) | ||
| 290 | x86_quirks = &numaq_x86_quirks; | ||
| 291 | } | ||
| 292 | |||
| 293 | int __init get_memcfg_numaq(void) | ||
| 294 | { | ||
| 295 | early_check_numaq(); | ||
| 296 | if (!found_numaq) | ||
| 297 | return 0; | ||
| 298 | smp_dump_qct(); | ||
| 299 | |||
| 300 | return 1; | ||
| 301 | } | ||
| 302 | |||
| 303 | #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | ||
| 304 | |||
| 305 | static inline unsigned int numaq_get_apic_id(unsigned long x) | ||
| 306 | { | ||
| 307 | return (x >> 24) & 0x0F; | ||
| 308 | } | ||
| 309 | |||
| 310 | static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) | ||
| 311 | { | ||
| 312 | default_send_IPI_mask_sequence_logical(mask, vector); | ||
| 313 | } | ||
| 314 | |||
| 315 | static inline void numaq_send_IPI_allbutself(int vector) | ||
| 316 | { | ||
| 317 | default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); | ||
| 318 | } | ||
| 319 | |||
| 320 | static inline void numaq_send_IPI_all(int vector) | ||
| 321 | { | ||
| 322 | numaq_send_IPI_mask(cpu_online_mask, vector); | ||
| 323 | } | ||
| 324 | |||
| 325 | #define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) | ||
| 326 | #define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) | ||
| 327 | |||
| 328 | /* | ||
| 329 | * Because we use NMIs rather than the INIT-STARTUP sequence to | ||
| 330 | * bootstrap the CPUs, the APIC may be in a weird state. Kick it: | ||
| 331 | */ | ||
| 332 | static inline void numaq_smp_callin_clear_local_apic(void) | ||
| 333 | { | ||
| 334 | clear_local_APIC(); | ||
| 335 | } | ||
| 336 | |||
| 337 | static inline const struct cpumask *numaq_target_cpus(void) | ||
| 338 | { | ||
| 339 | return cpu_all_mask; | ||
| 340 | } | ||
| 341 | |||
| 342 | static inline unsigned long | ||
| 343 | numaq_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
| 344 | { | ||
| 345 | return physid_isset(apicid, bitmap); | ||
| 346 | } | ||
| 347 | |||
| 348 | static inline unsigned long numaq_check_apicid_present(int bit) | ||
| 349 | { | ||
| 350 | return physid_isset(bit, phys_cpu_present_map); | ||
| 351 | } | ||
| 352 | |||
| 353 | static inline int numaq_apic_id_registered(void) | ||
| 354 | { | ||
| 355 | return 1; | ||
| 356 | } | ||
| 357 | |||
| 358 | static inline void numaq_init_apic_ldr(void) | ||
| 359 | { | ||
| 360 | /* Already done in NUMA-Q firmware */ | ||
| 361 | } | ||
| 362 | |||
| 363 | static inline void numaq_setup_apic_routing(void) | ||
| 364 | { | ||
| 365 | printk(KERN_INFO | ||
| 366 | "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n", | ||
| 367 | nr_ioapics); | ||
| 368 | } | ||
| 369 | |||
| 370 | /* | ||
| 371 | * Skip adding the timer int on secondary nodes, which causes | ||
| 372 | * a small but painful rift in the time-space continuum. | ||
| 373 | */ | ||
| 374 | static inline int numaq_multi_timer_check(int apic, int irq) | ||
| 375 | { | ||
| 376 | return apic != 0 && irq == 0; | ||
| 377 | } | ||
| 378 | |||
| 379 | static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) | ||
| 380 | { | ||
| 381 | /* We don't have a good way to do this yet - hack */ | ||
| 382 | return physids_promote(0xFUL); | ||
| 383 | } | ||
| 384 | |||
| 385 | static inline int numaq_cpu_to_logical_apicid(int cpu) | ||
| 386 | { | ||
| 387 | if (cpu >= nr_cpu_ids) | ||
| 388 | return BAD_APICID; | ||
| 389 | return cpu_2_logical_apicid[cpu]; | ||
| 390 | } | ||
| 391 | |||
| 392 | /* | ||
| 393 | * Supporting over 60 cpus on NUMA-Q requires a locality-dependent | ||
| 394 | * cpu to APIC ID relation to properly interact with the intelligent | ||
| 395 | * mode of the cluster controller. | ||
| 396 | */ | ||
| 397 | static inline int numaq_cpu_present_to_apicid(int mps_cpu) | ||
| 398 | { | ||
| 399 | if (mps_cpu < 60) | ||
| 400 | return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); | ||
| 401 | else | ||
| 402 | return BAD_APICID; | ||
| 403 | } | ||
| 404 | |||
| 405 | static inline int numaq_apicid_to_node(int logical_apicid) | ||
| 406 | { | ||
| 407 | return logical_apicid >> 4; | ||
| 408 | } | ||
| 409 | |||
| 410 | static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) | ||
| 411 | { | ||
| 412 | int node = numaq_apicid_to_node(logical_apicid); | ||
| 413 | int cpu = __ffs(logical_apicid & 0xf); | ||
| 414 | |||
| 415 | return physid_mask_of_physid(cpu + 4*node); | ||
| 416 | } | ||
| 417 | |||
| 418 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | ||
| 419 | void *xquad_portio; | ||
| 420 | |||
| 421 | static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
| 422 | { | ||
| 423 | return 1; | ||
| 424 | } | ||
| 425 | |||
| 426 | /* | ||
| 427 | * We use physical apicids here, not logical, so just return the default | ||
| 428 | * physical broadcast to stop people from breaking us | ||
| 429 | */ | ||
| 430 | static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
| 431 | { | ||
| 432 | return 0x0F; | ||
| 433 | } | ||
| 434 | |||
| 435 | static inline unsigned int | ||
| 436 | numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
| 437 | const struct cpumask *andmask) | ||
| 438 | { | ||
| 439 | return 0x0F; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ | ||
| 443 | static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) | ||
| 444 | { | ||
| 445 | return cpuid_apic >> index_msb; | ||
| 446 | } | ||
| 447 | |||
| 448 | static int | ||
| 449 | numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
| 450 | { | ||
| 451 | if (strncmp(oem, "IBM NUMA", 8)) | ||
| 452 | printk(KERN_ERR "Warning! Not a NUMA-Q system!\n"); | ||
| 453 | else | ||
| 454 | found_numaq = 1; | ||
| 455 | |||
| 456 | return found_numaq; | ||
| 457 | } | ||
| 458 | |||
| 459 | static int probe_numaq(void) | ||
| 460 | { | ||
| 461 | /* already know from get_memcfg_numaq() */ | ||
| 462 | return found_numaq; | ||
| 463 | } | ||
| 464 | |||
| 465 | static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
| 466 | { | ||
| 467 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
| 468 | * specified in the interrupt destination when using lowest | ||
| 469 | * priority interrupt delivery mode. | ||
| 470 | * | ||
| 471 | * In particular there was a hyperthreading cpu observed to | ||
| 472 | * deliver interrupts to the wrong hyperthread when only one | ||
| 473 | * hyperthread was specified in the interrupt desitination. | ||
| 474 | */ | ||
| 475 | cpumask_clear(retmask); | ||
| 476 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
| 477 | } | ||
| 478 | |||
| 479 | static void numaq_setup_portio_remap(void) | ||
| 480 | { | ||
| 481 | int num_quads = num_online_nodes(); | ||
| 482 | |||
| 483 | if (num_quads <= 1) | ||
| 484 | return; | ||
| 485 | |||
| 486 | printk(KERN_INFO | ||
| 487 | "Remapping cross-quad port I/O for %d quads\n", num_quads); | ||
| 488 | |||
| 489 | xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); | ||
| 490 | |||
| 491 | printk(KERN_INFO | ||
| 492 | "xquad_portio vaddr 0x%08lx, len %08lx\n", | ||
| 493 | (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); | ||
| 494 | } | ||
| 495 | |||
| 496 | struct apic apic_numaq = { | ||
| 497 | |||
| 498 | .name = "NUMAQ", | ||
| 499 | .probe = probe_numaq, | ||
| 500 | .acpi_madt_oem_check = NULL, | ||
| 501 | .apic_id_registered = numaq_apic_id_registered, | ||
| 502 | |||
| 503 | .irq_delivery_mode = dest_LowestPrio, | ||
| 504 | /* physical delivery on LOCAL quad: */ | ||
| 505 | .irq_dest_mode = 0, | ||
| 506 | |||
| 507 | .target_cpus = numaq_target_cpus, | ||
| 508 | .disable_esr = 1, | ||
| 509 | .dest_logical = APIC_DEST_LOGICAL, | ||
| 510 | .check_apicid_used = numaq_check_apicid_used, | ||
| 511 | .check_apicid_present = numaq_check_apicid_present, | ||
| 512 | |||
| 513 | .vector_allocation_domain = numaq_vector_allocation_domain, | ||
| 514 | .init_apic_ldr = numaq_init_apic_ldr, | ||
| 515 | |||
| 516 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, | ||
| 517 | .setup_apic_routing = numaq_setup_apic_routing, | ||
| 518 | .multi_timer_check = numaq_multi_timer_check, | ||
| 519 | .apicid_to_node = numaq_apicid_to_node, | ||
| 520 | .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, | ||
| 521 | .cpu_present_to_apicid = numaq_cpu_present_to_apicid, | ||
| 522 | .apicid_to_cpu_present = numaq_apicid_to_cpu_present, | ||
| 523 | .setup_portio_remap = numaq_setup_portio_remap, | ||
| 524 | .check_phys_apicid_present = numaq_check_phys_apicid_present, | ||
| 525 | .enable_apic_mode = NULL, | ||
| 526 | .phys_pkg_id = numaq_phys_pkg_id, | ||
| 527 | .mps_oem_check = numaq_mps_oem_check, | ||
| 528 | |||
| 529 | .get_apic_id = numaq_get_apic_id, | ||
| 530 | .set_apic_id = NULL, | ||
| 531 | .apic_id_mask = 0x0F << 24, | ||
| 532 | |||
| 533 | .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, | ||
| 534 | .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, | ||
| 535 | |||
| 536 | .send_IPI_mask = numaq_send_IPI_mask, | ||
| 537 | .send_IPI_mask_allbutself = NULL, | ||
| 538 | .send_IPI_allbutself = numaq_send_IPI_allbutself, | ||
| 539 | .send_IPI_all = numaq_send_IPI_all, | ||
| 540 | .send_IPI_self = default_send_IPI_self, | ||
| 541 | |||
| 542 | .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi, | ||
| 543 | .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, | ||
| 544 | .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, | ||
| 545 | |||
| 546 | /* We don't do anything here because we use NMI's to boot instead */ | ||
| 547 | .wait_for_init_deassert = NULL, | ||
| 548 | |||
| 549 | .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, | ||
| 550 | .inquire_remote_apic = NULL, | ||
| 551 | |||
| 552 | .read = native_apic_mem_read, | ||
| 553 | .write = native_apic_mem_write, | ||
| 554 | .icr_read = native_apic_icr_read, | ||
| 555 | .icr_write = native_apic_icr_write, | ||
| 556 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 557 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 558 | }; | ||
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c new file mode 100644 index 000000000000..01eda2ac65e4 --- /dev/null +++ b/arch/x86/kernel/apic/probe_32.c | |||
| @@ -0,0 +1,285 @@ | |||
| 1 | /* | ||
| 2 | * Default generic APIC driver. This handles up to 8 CPUs. | ||
| 3 | * | ||
| 4 | * Copyright 2003 Andi Kleen, SuSE Labs. | ||
| 5 | * Subject to the GNU Public License, v.2 | ||
| 6 | * | ||
| 7 | * Generic x86 APIC driver probe layer. | ||
| 8 | */ | ||
| 9 | #include <linux/threads.h> | ||
| 10 | #include <linux/cpumask.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/string.h> | ||
| 13 | #include <linux/kernel.h> | ||
| 14 | #include <linux/ctype.h> | ||
| 15 | #include <linux/init.h> | ||
| 16 | #include <linux/errno.h> | ||
| 17 | #include <asm/fixmap.h> | ||
| 18 | #include <asm/mpspec.h> | ||
| 19 | #include <asm/apicdef.h> | ||
| 20 | #include <asm/apic.h> | ||
| 21 | #include <asm/setup.h> | ||
| 22 | |||
| 23 | #include <linux/threads.h> | ||
| 24 | #include <linux/cpumask.h> | ||
| 25 | #include <asm/mpspec.h> | ||
| 26 | #include <asm/fixmap.h> | ||
| 27 | #include <asm/apicdef.h> | ||
| 28 | #include <linux/kernel.h> | ||
| 29 | #include <linux/string.h> | ||
| 30 | #include <linux/smp.h> | ||
| 31 | #include <linux/init.h> | ||
| 32 | #include <asm/ipi.h> | ||
| 33 | |||
| 34 | #include <linux/smp.h> | ||
| 35 | #include <linux/init.h> | ||
| 36 | #include <linux/interrupt.h> | ||
| 37 | #include <asm/acpi.h> | ||
| 38 | #include <asm/e820.h> | ||
| 39 | #include <asm/setup.h> | ||
| 40 | |||
| 41 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 42 | #define DEFAULT_SEND_IPI (1) | ||
| 43 | #else | ||
| 44 | #define DEFAULT_SEND_IPI (0) | ||
| 45 | #endif | ||
| 46 | |||
| 47 | int no_broadcast = DEFAULT_SEND_IPI; | ||
| 48 | |||
| 49 | static __init int no_ipi_broadcast(char *str) | ||
| 50 | { | ||
| 51 | get_option(&str, &no_broadcast); | ||
| 52 | pr_info("Using %s mode\n", | ||
| 53 | no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); | ||
| 54 | return 1; | ||
| 55 | } | ||
| 56 | __setup("no_ipi_broadcast=", no_ipi_broadcast); | ||
| 57 | |||
| 58 | static int __init print_ipi_mode(void) | ||
| 59 | { | ||
| 60 | pr_info("Using IPI %s mode\n", | ||
| 61 | no_broadcast ? "No-Shortcut" : "Shortcut"); | ||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | late_initcall(print_ipi_mode); | ||
| 65 | |||
| 66 | void default_setup_apic_routing(void) | ||
| 67 | { | ||
| 68 | #ifdef CONFIG_X86_IO_APIC | ||
| 69 | printk(KERN_INFO | ||
| 70 | "Enabling APIC mode: Flat. Using %d I/O APICs\n", | ||
| 71 | nr_ioapics); | ||
| 72 | #endif | ||
| 73 | } | ||
| 74 | |||
| 75 | static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
| 76 | { | ||
| 77 | /* | ||
| 78 | * Careful. Some cpus do not strictly honor the set of cpus | ||
| 79 | * specified in the interrupt destination when using lowest | ||
| 80 | * priority interrupt delivery mode. | ||
| 81 | * | ||
| 82 | * In particular there was a hyperthreading cpu observed to | ||
| 83 | * deliver interrupts to the wrong hyperthread when only one | ||
| 84 | * hyperthread was specified in the interrupt desitination. | ||
| 85 | */ | ||
| 86 | cpumask_clear(retmask); | ||
| 87 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
| 88 | } | ||
| 89 | |||
| 90 | /* should be called last. */ | ||
| 91 | static int probe_default(void) | ||
| 92 | { | ||
| 93 | return 1; | ||
| 94 | } | ||
| 95 | |||
| 96 | struct apic apic_default = { | ||
| 97 | |||
| 98 | .name = "default", | ||
| 99 | .probe = probe_default, | ||
| 100 | .acpi_madt_oem_check = NULL, | ||
| 101 | .apic_id_registered = default_apic_id_registered, | ||
| 102 | |||
| 103 | .irq_delivery_mode = dest_LowestPrio, | ||
| 104 | /* logical delivery broadcast to all CPUs: */ | ||
| 105 | .irq_dest_mode = 1, | ||
| 106 | |||
| 107 | .target_cpus = default_target_cpus, | ||
| 108 | .disable_esr = 0, | ||
| 109 | .dest_logical = APIC_DEST_LOGICAL, | ||
| 110 | .check_apicid_used = default_check_apicid_used, | ||
| 111 | .check_apicid_present = default_check_apicid_present, | ||
| 112 | |||
| 113 | .vector_allocation_domain = default_vector_allocation_domain, | ||
| 114 | .init_apic_ldr = default_init_apic_ldr, | ||
| 115 | |||
| 116 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | ||
| 117 | .setup_apic_routing = default_setup_apic_routing, | ||
| 118 | .multi_timer_check = NULL, | ||
| 119 | .apicid_to_node = default_apicid_to_node, | ||
| 120 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, | ||
| 121 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
| 122 | .apicid_to_cpu_present = default_apicid_to_cpu_present, | ||
| 123 | .setup_portio_remap = NULL, | ||
| 124 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
| 125 | .enable_apic_mode = NULL, | ||
| 126 | .phys_pkg_id = default_phys_pkg_id, | ||
| 127 | .mps_oem_check = NULL, | ||
| 128 | |||
| 129 | .get_apic_id = default_get_apic_id, | ||
| 130 | .set_apic_id = NULL, | ||
| 131 | .apic_id_mask = 0x0F << 24, | ||
| 132 | |||
| 133 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | ||
| 134 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
| 135 | |||
| 136 | .send_IPI_mask = default_send_IPI_mask_logical, | ||
| 137 | .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, | ||
| 138 | .send_IPI_allbutself = default_send_IPI_allbutself, | ||
| 139 | .send_IPI_all = default_send_IPI_all, | ||
| 140 | .send_IPI_self = default_send_IPI_self, | ||
| 141 | |||
| 142 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 143 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 144 | |||
| 145 | .wait_for_init_deassert = default_wait_for_init_deassert, | ||
| 146 | |||
| 147 | .smp_callin_clear_local_apic = NULL, | ||
| 148 | .inquire_remote_apic = default_inquire_remote_apic, | ||
| 149 | |||
| 150 | .read = native_apic_mem_read, | ||
| 151 | .write = native_apic_mem_write, | ||
| 152 | .icr_read = native_apic_icr_read, | ||
| 153 | .icr_write = native_apic_icr_write, | ||
| 154 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 155 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 156 | }; | ||
| 157 | |||
| 158 | extern struct apic apic_numaq; | ||
| 159 | extern struct apic apic_summit; | ||
| 160 | extern struct apic apic_bigsmp; | ||
| 161 | extern struct apic apic_es7000; | ||
| 162 | extern struct apic apic_es7000_cluster; | ||
| 163 | extern struct apic apic_default; | ||
| 164 | |||
| 165 | struct apic *apic = &apic_default; | ||
| 166 | EXPORT_SYMBOL_GPL(apic); | ||
| 167 | |||
| 168 | static struct apic *apic_probe[] __initdata = { | ||
| 169 | #ifdef CONFIG_X86_NUMAQ | ||
| 170 | &apic_numaq, | ||
| 171 | #endif | ||
| 172 | #ifdef CONFIG_X86_SUMMIT | ||
| 173 | &apic_summit, | ||
| 174 | #endif | ||
| 175 | #ifdef CONFIG_X86_BIGSMP | ||
| 176 | &apic_bigsmp, | ||
| 177 | #endif | ||
| 178 | #ifdef CONFIG_X86_ES7000 | ||
| 179 | &apic_es7000, | ||
| 180 | &apic_es7000_cluster, | ||
| 181 | #endif | ||
| 182 | &apic_default, /* must be last */ | ||
| 183 | NULL, | ||
| 184 | }; | ||
| 185 | |||
| 186 | static int cmdline_apic __initdata; | ||
| 187 | static int __init parse_apic(char *arg) | ||
| 188 | { | ||
| 189 | int i; | ||
| 190 | |||
| 191 | if (!arg) | ||
| 192 | return -EINVAL; | ||
| 193 | |||
| 194 | for (i = 0; apic_probe[i]; i++) { | ||
| 195 | if (!strcmp(apic_probe[i]->name, arg)) { | ||
| 196 | apic = apic_probe[i]; | ||
| 197 | cmdline_apic = 1; | ||
| 198 | return 0; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | /* Parsed again by __setup for debug/verbose */ | ||
| 203 | return 0; | ||
| 204 | } | ||
| 205 | early_param("apic", parse_apic); | ||
| 206 | |||
| 207 | void __init generic_bigsmp_probe(void) | ||
| 208 | { | ||
| 209 | #ifdef CONFIG_X86_BIGSMP | ||
| 210 | /* | ||
| 211 | * This routine is used to switch to bigsmp mode when | ||
| 212 | * - There is no apic= option specified by the user | ||
| 213 | * - generic_apic_probe() has chosen apic_default as the sub_arch | ||
| 214 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support | ||
| 215 | */ | ||
| 216 | |||
| 217 | if (!cmdline_apic && apic == &apic_default) { | ||
| 218 | if (apic_bigsmp.probe()) { | ||
| 219 | apic = &apic_bigsmp; | ||
| 220 | printk(KERN_INFO "Overriding APIC driver with %s\n", | ||
| 221 | apic->name); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | #endif | ||
| 225 | } | ||
| 226 | |||
| 227 | void __init generic_apic_probe(void) | ||
| 228 | { | ||
| 229 | if (!cmdline_apic) { | ||
| 230 | int i; | ||
| 231 | for (i = 0; apic_probe[i]; i++) { | ||
| 232 | if (apic_probe[i]->probe()) { | ||
| 233 | apic = apic_probe[i]; | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | } | ||
| 237 | /* Not visible without early console */ | ||
| 238 | if (!apic_probe[i]) | ||
| 239 | panic("Didn't find an APIC driver"); | ||
| 240 | } | ||
| 241 | printk(KERN_INFO "Using APIC driver %s\n", apic->name); | ||
| 242 | } | ||
| 243 | |||
| 244 | /* These functions can switch the APIC even after the initial ->probe() */ | ||
| 245 | |||
| 246 | int __init | ||
| 247 | generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
| 248 | { | ||
| 249 | int i; | ||
| 250 | |||
| 251 | for (i = 0; apic_probe[i]; ++i) { | ||
| 252 | if (!apic_probe[i]->mps_oem_check) | ||
| 253 | continue; | ||
| 254 | if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) | ||
| 255 | continue; | ||
| 256 | |||
| 257 | if (!cmdline_apic) { | ||
| 258 | apic = apic_probe[i]; | ||
| 259 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | ||
| 260 | apic->name); | ||
| 261 | } | ||
| 262 | return 1; | ||
| 263 | } | ||
| 264 | return 0; | ||
| 265 | } | ||
| 266 | |||
| 267 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 268 | { | ||
| 269 | int i; | ||
| 270 | |||
| 271 | for (i = 0; apic_probe[i]; ++i) { | ||
| 272 | if (!apic_probe[i]->acpi_madt_oem_check) | ||
| 273 | continue; | ||
| 274 | if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) | ||
| 275 | continue; | ||
| 276 | |||
| 277 | if (!cmdline_apic) { | ||
| 278 | apic = apic_probe[i]; | ||
| 279 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | ||
| 280 | apic->name); | ||
| 281 | } | ||
| 282 | return 1; | ||
| 283 | } | ||
| 284 | return 0; | ||
| 285 | } | ||
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/apic/probe_64.c index 2bced78b0b8e..8d7748efe6a8 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
| @@ -19,22 +19,27 @@ | |||
| 19 | #include <linux/dmar.h> | 19 | #include <linux/dmar.h> |
| 20 | 20 | ||
| 21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
| 22 | #include <asm/apic.h> | ||
| 22 | #include <asm/ipi.h> | 23 | #include <asm/ipi.h> |
| 23 | #include <asm/genapic.h> | ||
| 24 | #include <asm/setup.h> | 24 | #include <asm/setup.h> |
| 25 | 25 | ||
| 26 | extern struct genapic apic_flat; | 26 | extern struct apic apic_flat; |
| 27 | extern struct genapic apic_physflat; | 27 | extern struct apic apic_physflat; |
| 28 | extern struct genapic apic_x2xpic_uv_x; | 28 | extern struct apic apic_x2xpic_uv_x; |
| 29 | extern struct genapic apic_x2apic_phys; | 29 | extern struct apic apic_x2apic_phys; |
| 30 | extern struct genapic apic_x2apic_cluster; | 30 | extern struct apic apic_x2apic_cluster; |
| 31 | 31 | ||
| 32 | struct genapic __read_mostly *genapic = &apic_flat; | 32 | struct apic __read_mostly *apic = &apic_flat; |
| 33 | EXPORT_SYMBOL_GPL(apic); | ||
| 33 | 34 | ||
| 34 | static struct genapic *apic_probe[] __initdata = { | 35 | static struct apic *apic_probe[] __initdata = { |
| 36 | #ifdef CONFIG_X86_UV | ||
| 35 | &apic_x2apic_uv_x, | 37 | &apic_x2apic_uv_x, |
| 38 | #endif | ||
| 39 | #ifdef CONFIG_X86_X2APIC | ||
| 36 | &apic_x2apic_phys, | 40 | &apic_x2apic_phys, |
| 37 | &apic_x2apic_cluster, | 41 | &apic_x2apic_cluster, |
| 42 | #endif | ||
| 38 | &apic_physflat, | 43 | &apic_physflat, |
| 39 | NULL, | 44 | NULL, |
| 40 | }; | 45 | }; |
| @@ -42,39 +47,45 @@ static struct genapic *apic_probe[] __initdata = { | |||
| 42 | /* | 47 | /* |
| 43 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 48 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
| 44 | */ | 49 | */ |
| 45 | void __init setup_apic_routing(void) | 50 | void __init default_setup_apic_routing(void) |
| 46 | { | 51 | { |
| 47 | if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { | 52 | #ifdef CONFIG_X86_X2APIC |
| 48 | if (!intr_remapping_enabled) | 53 | if (x2apic && (apic != &apic_x2apic_phys && |
| 49 | genapic = &apic_flat; | 54 | #ifdef CONFIG_X86_UV |
| 55 | apic != &apic_x2apic_uv_x && | ||
| 56 | #endif | ||
| 57 | apic != &apic_x2apic_cluster)) { | ||
| 58 | if (x2apic_phys) | ||
| 59 | apic = &apic_x2apic_phys; | ||
| 60 | else | ||
| 61 | apic = &apic_x2apic_cluster; | ||
| 62 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | ||
| 50 | } | 63 | } |
| 64 | #endif | ||
| 51 | 65 | ||
| 52 | if (genapic == &apic_flat) { | 66 | if (apic == &apic_flat) { |
| 53 | if (max_physical_apicid >= 8) | 67 | if (max_physical_apicid >= 8) |
| 54 | genapic = &apic_physflat; | 68 | apic = &apic_physflat; |
| 55 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | 69 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); |
| 56 | } | 70 | } |
| 57 | |||
| 58 | if (x86_quirks->update_genapic) | ||
| 59 | x86_quirks->update_genapic(); | ||
| 60 | } | 71 | } |
| 61 | 72 | ||
| 62 | /* Same for both flat and physical. */ | 73 | /* Same for both flat and physical. */ |
| 63 | 74 | ||
| 64 | void apic_send_IPI_self(int vector) | 75 | void apic_send_IPI_self(int vector) |
| 65 | { | 76 | { |
| 66 | __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); | 77 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); |
| 67 | } | 78 | } |
| 68 | 79 | ||
| 69 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 80 | int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
| 70 | { | 81 | { |
| 71 | int i; | 82 | int i; |
| 72 | 83 | ||
| 73 | for (i = 0; apic_probe[i]; ++i) { | 84 | for (i = 0; apic_probe[i]; ++i) { |
| 74 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { | 85 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { |
| 75 | genapic = apic_probe[i]; | 86 | apic = apic_probe[i]; |
| 76 | printk(KERN_INFO "Setting APIC routing to %s.\n", | 87 | printk(KERN_INFO "Setting APIC routing to %s.\n", |
| 77 | genapic->name); | 88 | apic->name); |
| 78 | return 1; | 89 | return 1; |
| 79 | } | 90 | } |
| 80 | } | 91 | } |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c new file mode 100644 index 000000000000..9cfe1f415d81 --- /dev/null +++ b/arch/x86/kernel/apic/summit_32.c | |||
| @@ -0,0 +1,576 @@ | |||
| 1 | /* | ||
| 2 | * IBM Summit-Specific Code | ||
| 3 | * | ||
| 4 | * Written By: Matthew Dobson, IBM Corporation | ||
| 5 | * | ||
| 6 | * Copyright (c) 2003 IBM Corp. | ||
| 7 | * | ||
| 8 | * All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License as published by | ||
| 12 | * the Free Software Foundation; either version 2 of the License, or (at | ||
| 13 | * your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, but | ||
| 16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 18 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 19 | * details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 24 | * | ||
| 25 | * Send feedback to <colpatch@us.ibm.com> | ||
| 26 | * | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include <linux/mm.h> | ||
| 30 | #include <linux/init.h> | ||
| 31 | #include <asm/io.h> | ||
| 32 | #include <asm/bios_ebda.h> | ||
| 33 | |||
| 34 | /* | ||
| 35 | * APIC driver for the IBM "Summit" chipset. | ||
| 36 | */ | ||
| 37 | #include <linux/threads.h> | ||
| 38 | #include <linux/cpumask.h> | ||
| 39 | #include <asm/mpspec.h> | ||
| 40 | #include <asm/apic.h> | ||
| 41 | #include <asm/smp.h> | ||
| 42 | #include <asm/fixmap.h> | ||
| 43 | #include <asm/apicdef.h> | ||
| 44 | #include <asm/ipi.h> | ||
| 45 | #include <linux/kernel.h> | ||
| 46 | #include <linux/string.h> | ||
| 47 | #include <linux/init.h> | ||
| 48 | #include <linux/gfp.h> | ||
| 49 | #include <linux/smp.h> | ||
| 50 | |||
| 51 | static unsigned summit_get_apic_id(unsigned long x) | ||
| 52 | { | ||
| 53 | return (x >> 24) & 0xFF; | ||
| 54 | } | ||
| 55 | |||
| 56 | static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector) | ||
| 57 | { | ||
| 58 | default_send_IPI_mask_sequence_logical(mask, vector); | ||
| 59 | } | ||
| 60 | |||
| 61 | static void summit_send_IPI_allbutself(int vector) | ||
| 62 | { | ||
| 63 | default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); | ||
| 64 | } | ||
| 65 | |||
| 66 | static void summit_send_IPI_all(int vector) | ||
| 67 | { | ||
| 68 | summit_send_IPI_mask(cpu_online_mask, vector); | ||
| 69 | } | ||
| 70 | |||
| 71 | #include <asm/tsc.h> | ||
| 72 | |||
| 73 | extern int use_cyclone; | ||
| 74 | |||
| 75 | #ifdef CONFIG_X86_SUMMIT_NUMA | ||
| 76 | static void setup_summit(void); | ||
| 77 | #else | ||
| 78 | static inline void setup_summit(void) {} | ||
| 79 | #endif | ||
| 80 | |||
| 81 | static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, | ||
| 82 | char *productid) | ||
| 83 | { | ||
| 84 | if (!strncmp(oem, "IBM ENSW", 8) && | ||
| 85 | (!strncmp(productid, "VIGIL SMP", 9) | ||
| 86 | || !strncmp(productid, "EXA", 3) | ||
| 87 | || !strncmp(productid, "RUTHLESS SMP", 12))){ | ||
| 88 | mark_tsc_unstable("Summit based system"); | ||
| 89 | use_cyclone = 1; /*enable cyclone-timer*/ | ||
| 90 | setup_summit(); | ||
| 91 | return 1; | ||
| 92 | } | ||
| 93 | return 0; | ||
| 94 | } | ||
| 95 | |||
| 96 | /* Hook from generic ACPI tables.c */ | ||
| 97 | static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 98 | { | ||
| 99 | if (!strncmp(oem_id, "IBM", 3) && | ||
| 100 | (!strncmp(oem_table_id, "SERVIGIL", 8) | ||
| 101 | || !strncmp(oem_table_id, "EXA", 3))){ | ||
| 102 | mark_tsc_unstable("Summit based system"); | ||
| 103 | use_cyclone = 1; /*enable cyclone-timer*/ | ||
| 104 | setup_summit(); | ||
| 105 | return 1; | ||
| 106 | } | ||
| 107 | return 0; | ||
| 108 | } | ||
| 109 | |||
| 110 | struct rio_table_hdr { | ||
| 111 | unsigned char version; /* Version number of this data structure */ | ||
| 112 | /* Version 3 adds chassis_num & WP_index */ | ||
| 113 | unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ | ||
| 114 | unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ | ||
| 115 | } __attribute__((packed)); | ||
| 116 | |||
| 117 | struct scal_detail { | ||
| 118 | unsigned char node_id; /* Scalability Node ID */ | ||
| 119 | unsigned long CBAR; /* Address of 1MB register space */ | ||
| 120 | unsigned char port0node; /* Node ID port connected to: 0xFF=None */ | ||
| 121 | unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
| 122 | unsigned char port1node; /* Node ID port connected to: 0xFF = None */ | ||
| 123 | unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
| 124 | unsigned char port2node; /* Node ID port connected to: 0xFF = None */ | ||
| 125 | unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
| 126 | unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ | ||
| 127 | } __attribute__((packed)); | ||
| 128 | |||
| 129 | struct rio_detail { | ||
| 130 | unsigned char node_id; /* RIO Node ID */ | ||
| 131 | unsigned long BBAR; /* Address of 1MB register space */ | ||
| 132 | unsigned char type; /* Type of device */ | ||
| 133 | unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ | ||
| 134 | /* For CYC: Node ID of Twister that owns this CYC */ | ||
| 135 | unsigned char port0node; /* Node ID port connected to: 0xFF=None */ | ||
| 136 | unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
| 137 | unsigned char port1node; /* Node ID port connected to: 0xFF=None */ | ||
| 138 | unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
| 139 | unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ | ||
| 140 | /* For CYC: 0 */ | ||
| 141 | unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ | ||
| 142 | /* = 0 : the XAPIC is not used, ie:*/ | ||
| 143 | /* ints fwded to another XAPIC */ | ||
| 144 | /* Bits1:7 Reserved */ | ||
| 145 | /* For CYC: Bits0:7 Reserved */ | ||
| 146 | unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ | ||
| 147 | /* lower slot numbers/PCI bus numbers */ | ||
| 148 | /* For CYC: No meaning */ | ||
| 149 | unsigned char chassis_num; /* 1 based Chassis number */ | ||
| 150 | /* For LookOut WPEGs this field indicates the */ | ||
| 151 | /* Expansion Chassis #, enumerated from Boot */ | ||
| 152 | /* Node WPEG external port, then Boot Node CYC */ | ||
| 153 | /* external port, then Next Vigil chassis WPEG */ | ||
| 154 | /* external port, etc. */ | ||
| 155 | /* Shared Lookouts have only 1 chassis number (the */ | ||
| 156 | /* first one assigned) */ | ||
| 157 | } __attribute__((packed)); | ||
| 158 | |||
| 159 | |||
| 160 | typedef enum { | ||
| 161 | CompatTwister = 0, /* Compatibility Twister */ | ||
| 162 | AltTwister = 1, /* Alternate Twister of internal 8-way */ | ||
| 163 | CompatCyclone = 2, /* Compatibility Cyclone */ | ||
| 164 | AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ | ||
| 165 | CompatWPEG = 4, /* Compatibility WPEG */ | ||
| 166 | AltWPEG = 5, /* Second Planar WPEG */ | ||
| 167 | LookOutAWPEG = 6, /* LookOut WPEG */ | ||
| 168 | LookOutBWPEG = 7, /* LookOut WPEG */ | ||
| 169 | } node_type; | ||
| 170 | |||
| 171 | static inline int is_WPEG(struct rio_detail *rio){ | ||
| 172 | return (rio->type == CompatWPEG || rio->type == AltWPEG || | ||
| 173 | rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); | ||
| 174 | } | ||
| 175 | |||
| 176 | |||
| 177 | /* In clustered mode, the high nibble of APIC ID is a cluster number. | ||
| 178 | * The low nibble is a 4-bit bitmap. */ | ||
| 179 | #define XAPIC_DEST_CPUS_SHIFT 4 | ||
| 180 | #define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) | ||
| 181 | #define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) | ||
| 182 | |||
| 183 | #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | ||
| 184 | |||
| 185 | static const struct cpumask *summit_target_cpus(void) | ||
| 186 | { | ||
| 187 | /* CPU_MASK_ALL (0xff) has undefined behaviour with | ||
| 188 | * dest_LowestPrio mode logical clustered apic interrupt routing | ||
| 189 | * Just start on cpu 0. IRQ balancing will spread load | ||
| 190 | */ | ||
| 191 | return cpumask_of(0); | ||
| 192 | } | ||
| 193 | |||
| 194 | static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
| 195 | { | ||
| 196 | return 0; | ||
| 197 | } | ||
| 198 | |||
| 199 | /* we don't use the phys_cpu_present_map to indicate apicid presence */ | ||
| 200 | static unsigned long summit_check_apicid_present(int bit) | ||
| 201 | { | ||
| 202 | return 1; | ||
| 203 | } | ||
| 204 | |||
| 205 | static void summit_init_apic_ldr(void) | ||
| 206 | { | ||
| 207 | unsigned long val, id; | ||
| 208 | int count = 0; | ||
| 209 | u8 my_id = (u8)hard_smp_processor_id(); | ||
| 210 | u8 my_cluster = APIC_CLUSTER(my_id); | ||
| 211 | #ifdef CONFIG_SMP | ||
| 212 | u8 lid; | ||
| 213 | int i; | ||
| 214 | |||
| 215 | /* Create logical APIC IDs by counting CPUs already in cluster. */ | ||
| 216 | for (count = 0, i = nr_cpu_ids; --i >= 0; ) { | ||
| 217 | lid = cpu_2_logical_apicid[i]; | ||
| 218 | if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) | ||
| 219 | ++count; | ||
| 220 | } | ||
| 221 | #endif | ||
| 222 | /* We only have a 4 wide bitmap in cluster mode. If a deranged | ||
| 223 | * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ | ||
| 224 | BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); | ||
| 225 | id = my_cluster | (1UL << count); | ||
| 226 | apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); | ||
| 227 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
| 228 | val |= SET_APIC_LOGICAL_ID(id); | ||
| 229 | apic_write(APIC_LDR, val); | ||
| 230 | } | ||
| 231 | |||
| 232 | static int summit_apic_id_registered(void) | ||
| 233 | { | ||
| 234 | return 1; | ||
| 235 | } | ||
| 236 | |||
| 237 | static void summit_setup_apic_routing(void) | ||
| 238 | { | ||
| 239 | printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", | ||
| 240 | nr_ioapics); | ||
| 241 | } | ||
| 242 | |||
| 243 | static int summit_apicid_to_node(int logical_apicid) | ||
| 244 | { | ||
| 245 | #ifdef CONFIG_SMP | ||
| 246 | return apicid_2_node[hard_smp_processor_id()]; | ||
| 247 | #else | ||
| 248 | return 0; | ||
| 249 | #endif | ||
| 250 | } | ||
| 251 | |||
| 252 | /* Mapping from cpu number to logical apicid */ | ||
| 253 | static inline int summit_cpu_to_logical_apicid(int cpu) | ||
| 254 | { | ||
| 255 | #ifdef CONFIG_SMP | ||
| 256 | if (cpu >= nr_cpu_ids) | ||
| 257 | return BAD_APICID; | ||
| 258 | return cpu_2_logical_apicid[cpu]; | ||
| 259 | #else | ||
| 260 | return logical_smp_processor_id(); | ||
| 261 | #endif | ||
| 262 | } | ||
| 263 | |||
| 264 | static int summit_cpu_present_to_apicid(int mps_cpu) | ||
| 265 | { | ||
| 266 | if (mps_cpu < nr_cpu_ids) | ||
| 267 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
| 268 | else | ||
| 269 | return BAD_APICID; | ||
| 270 | } | ||
| 271 | |||
| 272 | static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) | ||
| 273 | { | ||
| 274 | /* For clustered we don't have a good way to do this yet - hack */ | ||
| 275 | return physids_promote(0x0F); | ||
| 276 | } | ||
| 277 | |||
| 278 | static physid_mask_t summit_apicid_to_cpu_present(int apicid) | ||
| 279 | { | ||
| 280 | return physid_mask_of_physid(0); | ||
| 281 | } | ||
| 282 | |||
| 283 | static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
| 284 | { | ||
| 285 | return 1; | ||
| 286 | } | ||
| 287 | |||
| 288 | static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
| 289 | { | ||
| 290 | unsigned int round = 0; | ||
| 291 | int cpu, apicid = 0; | ||
| 292 | |||
| 293 | /* | ||
| 294 | * The cpus in the mask must all be on the apic cluster. | ||
| 295 | */ | ||
| 296 | for_each_cpu(cpu, cpumask) { | ||
| 297 | int new_apicid = summit_cpu_to_logical_apicid(cpu); | ||
| 298 | |||
| 299 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | ||
| 300 | printk("%s: Not a valid mask!\n", __func__); | ||
| 301 | return BAD_APICID; | ||
| 302 | } | ||
| 303 | apicid |= new_apicid; | ||
| 304 | round++; | ||
| 305 | } | ||
| 306 | return apicid; | ||
| 307 | } | ||
| 308 | |||
| 309 | static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
| 310 | const struct cpumask *andmask) | ||
| 311 | { | ||
| 312 | int apicid = summit_cpu_to_logical_apicid(0); | ||
| 313 | cpumask_var_t cpumask; | ||
| 314 | |||
| 315 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
| 316 | return apicid; | ||
| 317 | |||
| 318 | cpumask_and(cpumask, inmask, andmask); | ||
| 319 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
| 320 | apicid = summit_cpu_mask_to_apicid(cpumask); | ||
| 321 | |||
| 322 | free_cpumask_var(cpumask); | ||
| 323 | |||
| 324 | return apicid; | ||
| 325 | } | ||
| 326 | |||
| 327 | /* | ||
| 328 | * cpuid returns the value latched in the HW at reset, not the APIC ID | ||
| 329 | * register's value. For any box whose BIOS changes APIC IDs, like | ||
| 330 | * clustered APIC systems, we must use hard_smp_processor_id. | ||
| 331 | * | ||
| 332 | * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. | ||
| 333 | */ | ||
| 334 | static int summit_phys_pkg_id(int cpuid_apic, int index_msb) | ||
| 335 | { | ||
| 336 | return hard_smp_processor_id() >> index_msb; | ||
| 337 | } | ||
| 338 | |||
| 339 | static int probe_summit(void) | ||
| 340 | { | ||
| 341 | /* probed later in mptable/ACPI hooks */ | ||
| 342 | return 0; | ||
| 343 | } | ||
| 344 | |||
| 345 | static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
| 346 | { | ||
| 347 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
| 348 | * specified in the interrupt destination when using lowest | ||
| 349 | * priority interrupt delivery mode. | ||
| 350 | * | ||
| 351 | * In particular there was a hyperthreading cpu observed to | ||
| 352 | * deliver interrupts to the wrong hyperthread when only one | ||
| 353 | * hyperthread was specified in the interrupt desitination. | ||
| 354 | */ | ||
| 355 | cpumask_clear(retmask); | ||
| 356 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
| 357 | } | ||
| 358 | |||
| 359 | #ifdef CONFIG_X86_SUMMIT_NUMA | ||
| 360 | static struct rio_table_hdr *rio_table_hdr; | ||
| 361 | static struct scal_detail *scal_devs[MAX_NUMNODES]; | ||
| 362 | static struct rio_detail *rio_devs[MAX_NUMNODES*4]; | ||
| 363 | |||
| 364 | #ifndef CONFIG_X86_NUMAQ | ||
| 365 | static int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
| 366 | #endif | ||
| 367 | |||
| 368 | static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | ||
| 369 | { | ||
| 370 | int twister = 0, node = 0; | ||
| 371 | int i, bus, num_buses; | ||
| 372 | |||
| 373 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { | ||
| 374 | if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) { | ||
| 375 | twister = rio_devs[i]->owner_id; | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | } | ||
| 379 | if (i == rio_table_hdr->num_rio_dev) { | ||
| 380 | printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); | ||
| 381 | return last_bus; | ||
| 382 | } | ||
| 383 | |||
| 384 | for (i = 0; i < rio_table_hdr->num_scal_dev; i++) { | ||
| 385 | if (scal_devs[i]->node_id == twister) { | ||
| 386 | node = scal_devs[i]->node_id; | ||
| 387 | break; | ||
| 388 | } | ||
| 389 | } | ||
| 390 | if (i == rio_table_hdr->num_scal_dev) { | ||
| 391 | printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); | ||
| 392 | return last_bus; | ||
| 393 | } | ||
| 394 | |||
| 395 | switch (rio_devs[wpeg_num]->type) { | ||
| 396 | case CompatWPEG: | ||
| 397 | /* | ||
| 398 | * The Compatibility Winnipeg controls the 2 legacy buses, | ||
| 399 | * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case | ||
| 400 | * a PCI-PCI bridge card is used in either slot: total 5 buses. | ||
| 401 | */ | ||
| 402 | num_buses = 5; | ||
| 403 | break; | ||
| 404 | case AltWPEG: | ||
| 405 | /* | ||
| 406 | * The Alternate Winnipeg controls the 2 133MHz buses [1 slot | ||
| 407 | * each], their 2 "extra" buses, the 100MHz bus [2 slots] and | ||
| 408 | * the "extra" buses for each of those slots: total 7 buses. | ||
| 409 | */ | ||
| 410 | num_buses = 7; | ||
| 411 | break; | ||
| 412 | case LookOutAWPEG: | ||
| 413 | case LookOutBWPEG: | ||
| 414 | /* | ||
| 415 | * A Lookout Winnipeg controls 3 100MHz buses [2 slots each] | ||
| 416 | * & the "extra" buses for each of those slots: total 9 buses. | ||
| 417 | */ | ||
| 418 | num_buses = 9; | ||
| 419 | break; | ||
| 420 | default: | ||
| 421 | printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); | ||
| 422 | return last_bus; | ||
| 423 | } | ||
| 424 | |||
| 425 | for (bus = last_bus; bus < last_bus + num_buses; bus++) | ||
| 426 | mp_bus_id_to_node[bus] = node; | ||
| 427 | return bus; | ||
| 428 | } | ||
| 429 | |||
| 430 | static int build_detail_arrays(void) | ||
| 431 | { | ||
| 432 | unsigned long ptr; | ||
| 433 | int i, scal_detail_size, rio_detail_size; | ||
| 434 | |||
| 435 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { | ||
| 436 | printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); | ||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | |||
| 440 | switch (rio_table_hdr->version) { | ||
| 441 | default: | ||
| 442 | printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); | ||
| 443 | return 0; | ||
| 444 | case 2: | ||
| 445 | scal_detail_size = 11; | ||
| 446 | rio_detail_size = 13; | ||
| 447 | break; | ||
| 448 | case 3: | ||
| 449 | scal_detail_size = 12; | ||
| 450 | rio_detail_size = 15; | ||
| 451 | break; | ||
| 452 | } | ||
| 453 | |||
| 454 | ptr = (unsigned long)rio_table_hdr + 3; | ||
| 455 | for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) | ||
| 456 | scal_devs[i] = (struct scal_detail *)ptr; | ||
| 457 | |||
| 458 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) | ||
| 459 | rio_devs[i] = (struct rio_detail *)ptr; | ||
| 460 | |||
| 461 | return 1; | ||
| 462 | } | ||
| 463 | |||
| 464 | void setup_summit(void) | ||
| 465 | { | ||
| 466 | unsigned long ptr; | ||
| 467 | unsigned short offset; | ||
| 468 | int i, next_wpeg, next_bus = 0; | ||
| 469 | |||
| 470 | /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ | ||
| 471 | ptr = get_bios_ebda(); | ||
| 472 | ptr = (unsigned long)phys_to_virt(ptr); | ||
| 473 | |||
| 474 | rio_table_hdr = NULL; | ||
| 475 | offset = 0x180; | ||
| 476 | while (offset) { | ||
| 477 | /* The block id is stored in the 2nd word */ | ||
| 478 | if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) { | ||
| 479 | /* set the pointer past the offset & block id */ | ||
| 480 | rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); | ||
| 481 | break; | ||
| 482 | } | ||
| 483 | /* The next offset is stored in the 1st word. 0 means no more */ | ||
| 484 | offset = *((unsigned short *)(ptr + offset)); | ||
| 485 | } | ||
| 486 | if (!rio_table_hdr) { | ||
| 487 | printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); | ||
| 488 | return; | ||
| 489 | } | ||
| 490 | |||
| 491 | if (!build_detail_arrays()) | ||
| 492 | return; | ||
| 493 | |||
| 494 | /* The first Winnipeg we're looking for has an index of 0 */ | ||
| 495 | next_wpeg = 0; | ||
| 496 | do { | ||
| 497 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { | ||
| 498 | if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) { | ||
| 499 | /* It's the Winnipeg we're looking for! */ | ||
| 500 | next_bus = setup_pci_node_map_for_wpeg(i, next_bus); | ||
| 501 | next_wpeg++; | ||
| 502 | break; | ||
| 503 | } | ||
| 504 | } | ||
| 505 | /* | ||
| 506 | * If we go through all Rio devices and don't find one with | ||
| 507 | * the next index, it means we've found all the Winnipegs, | ||
| 508 | * and thus all the PCI buses. | ||
| 509 | */ | ||
| 510 | if (i == rio_table_hdr->num_rio_dev) | ||
| 511 | next_wpeg = 0; | ||
| 512 | } while (next_wpeg != 0); | ||
| 513 | } | ||
| 514 | #endif | ||
| 515 | |||
| 516 | struct apic apic_summit = { | ||
| 517 | |||
| 518 | .name = "summit", | ||
| 519 | .probe = probe_summit, | ||
| 520 | .acpi_madt_oem_check = summit_acpi_madt_oem_check, | ||
| 521 | .apic_id_registered = summit_apic_id_registered, | ||
| 522 | |||
| 523 | .irq_delivery_mode = dest_LowestPrio, | ||
| 524 | /* logical delivery broadcast to all CPUs: */ | ||
| 525 | .irq_dest_mode = 1, | ||
| 526 | |||
| 527 | .target_cpus = summit_target_cpus, | ||
| 528 | .disable_esr = 1, | ||
| 529 | .dest_logical = APIC_DEST_LOGICAL, | ||
| 530 | .check_apicid_used = summit_check_apicid_used, | ||
| 531 | .check_apicid_present = summit_check_apicid_present, | ||
| 532 | |||
| 533 | .vector_allocation_domain = summit_vector_allocation_domain, | ||
| 534 | .init_apic_ldr = summit_init_apic_ldr, | ||
| 535 | |||
| 536 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, | ||
| 537 | .setup_apic_routing = summit_setup_apic_routing, | ||
| 538 | .multi_timer_check = NULL, | ||
| 539 | .apicid_to_node = summit_apicid_to_node, | ||
| 540 | .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, | ||
| 541 | .cpu_present_to_apicid = summit_cpu_present_to_apicid, | ||
| 542 | .apicid_to_cpu_present = summit_apicid_to_cpu_present, | ||
| 543 | .setup_portio_remap = NULL, | ||
| 544 | .check_phys_apicid_present = summit_check_phys_apicid_present, | ||
| 545 | .enable_apic_mode = NULL, | ||
| 546 | .phys_pkg_id = summit_phys_pkg_id, | ||
| 547 | .mps_oem_check = summit_mps_oem_check, | ||
| 548 | |||
| 549 | .get_apic_id = summit_get_apic_id, | ||
| 550 | .set_apic_id = NULL, | ||
| 551 | .apic_id_mask = 0xFF << 24, | ||
| 552 | |||
| 553 | .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, | ||
| 554 | .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, | ||
| 555 | |||
| 556 | .send_IPI_mask = summit_send_IPI_mask, | ||
| 557 | .send_IPI_mask_allbutself = NULL, | ||
| 558 | .send_IPI_allbutself = summit_send_IPI_allbutself, | ||
| 559 | .send_IPI_all = summit_send_IPI_all, | ||
| 560 | .send_IPI_self = default_send_IPI_self, | ||
| 561 | |||
| 562 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 563 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 564 | |||
| 565 | .wait_for_init_deassert = default_wait_for_init_deassert, | ||
| 566 | |||
| 567 | .smp_callin_clear_local_apic = NULL, | ||
| 568 | .inquire_remote_apic = default_inquire_remote_apic, | ||
| 569 | |||
| 570 | .read = native_apic_mem_read, | ||
| 571 | .write = native_apic_mem_write, | ||
| 572 | .icr_read = native_apic_icr_read, | ||
| 573 | .icr_write = native_apic_icr_write, | ||
| 574 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
| 575 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
| 576 | }; | ||
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6ce497cc372d..8fb87b6dd633 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
| @@ -7,17 +7,14 @@ | |||
| 7 | #include <linux/dmar.h> | 7 | #include <linux/dmar.h> |
| 8 | 8 | ||
| 9 | #include <asm/smp.h> | 9 | #include <asm/smp.h> |
| 10 | #include <asm/apic.h> | ||
| 10 | #include <asm/ipi.h> | 11 | #include <asm/ipi.h> |
| 11 | #include <asm/genapic.h> | ||
| 12 | 12 | ||
| 13 | DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); | 13 | DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); |
| 14 | 14 | ||
| 15 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 15 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
| 16 | { | 16 | { |
| 17 | if (cpu_has_x2apic) | 17 | return x2apic_enabled(); |
| 18 | return 1; | ||
| 19 | |||
| 20 | return 0; | ||
| 21 | } | 18 | } |
| 22 | 19 | ||
| 23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 20 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
| @@ -36,8 +33,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | |||
| 36 | cpumask_set_cpu(cpu, retmask); | 33 | cpumask_set_cpu(cpu, retmask); |
| 37 | } | 34 | } |
| 38 | 35 | ||
| 39 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | 36 | static void |
| 40 | unsigned int dest) | 37 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) |
| 41 | { | 38 | { |
| 42 | unsigned long cfg; | 39 | unsigned long cfg; |
| 43 | 40 | ||
| @@ -46,7 +43,7 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
| 46 | /* | 43 | /* |
| 47 | * send the IPI. | 44 | * send the IPI. |
| 48 | */ | 45 | */ |
| 49 | x2apic_icr_write(cfg, apicid); | 46 | native_x2apic_icr_write(cfg, apicid); |
| 50 | } | 47 | } |
| 51 | 48 | ||
| 52 | /* | 49 | /* |
| @@ -57,45 +54,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
| 57 | */ | 54 | */ |
| 58 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | 55 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
| 59 | { | 56 | { |
| 60 | unsigned long flags; | ||
| 61 | unsigned long query_cpu; | 57 | unsigned long query_cpu; |
| 58 | unsigned long flags; | ||
| 62 | 59 | ||
| 63 | local_irq_save(flags); | 60 | local_irq_save(flags); |
| 64 | for_each_cpu(query_cpu, mask) | 61 | for_each_cpu(query_cpu, mask) { |
| 65 | __x2apic_send_IPI_dest( | 62 | __x2apic_send_IPI_dest( |
| 66 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 63 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), |
| 67 | vector, APIC_DEST_LOGICAL); | 64 | vector, apic->dest_logical); |
| 65 | } | ||
| 68 | local_irq_restore(flags); | 66 | local_irq_restore(flags); |
| 69 | } | 67 | } |
| 70 | 68 | ||
| 71 | static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, | 69 | static void |
| 72 | int vector) | 70 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) |
| 73 | { | 71 | { |
| 74 | unsigned long flags; | ||
| 75 | unsigned long query_cpu; | ||
| 76 | unsigned long this_cpu = smp_processor_id(); | 72 | unsigned long this_cpu = smp_processor_id(); |
| 73 | unsigned long query_cpu; | ||
| 74 | unsigned long flags; | ||
| 77 | 75 | ||
| 78 | local_irq_save(flags); | 76 | local_irq_save(flags); |
| 79 | for_each_cpu(query_cpu, mask) | 77 | for_each_cpu(query_cpu, mask) { |
| 80 | if (query_cpu != this_cpu) | 78 | if (query_cpu == this_cpu) |
| 81 | __x2apic_send_IPI_dest( | 79 | continue; |
| 80 | __x2apic_send_IPI_dest( | ||
| 82 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 81 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), |
| 83 | vector, APIC_DEST_LOGICAL); | 82 | vector, apic->dest_logical); |
| 83 | } | ||
| 84 | local_irq_restore(flags); | 84 | local_irq_restore(flags); |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | static void x2apic_send_IPI_allbutself(int vector) | 87 | static void x2apic_send_IPI_allbutself(int vector) |
| 88 | { | 88 | { |
| 89 | unsigned long flags; | ||
| 90 | unsigned long query_cpu; | ||
| 91 | unsigned long this_cpu = smp_processor_id(); | 89 | unsigned long this_cpu = smp_processor_id(); |
| 90 | unsigned long query_cpu; | ||
| 91 | unsigned long flags; | ||
| 92 | 92 | ||
| 93 | local_irq_save(flags); | 93 | local_irq_save(flags); |
| 94 | for_each_online_cpu(query_cpu) | 94 | for_each_online_cpu(query_cpu) { |
| 95 | if (query_cpu != this_cpu) | 95 | if (query_cpu == this_cpu) |
| 96 | __x2apic_send_IPI_dest( | 96 | continue; |
| 97 | __x2apic_send_IPI_dest( | ||
| 97 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 98 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), |
| 98 | vector, APIC_DEST_LOGICAL); | 99 | vector, apic->dest_logical); |
| 100 | } | ||
| 99 | local_irq_restore(flags); | 101 | local_irq_restore(flags); |
| 100 | } | 102 | } |
| 101 | 103 | ||
| @@ -111,21 +113,21 @@ static int x2apic_apic_id_registered(void) | |||
| 111 | 113 | ||
| 112 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | 114 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
| 113 | { | 115 | { |
| 114 | int cpu; | ||
| 115 | |||
| 116 | /* | 116 | /* |
| 117 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | 117 | * We're using fixed IRQ delivery, can only return one logical APIC ID. |
| 118 | * May as well be the first. | 118 | * May as well be the first. |
| 119 | */ | 119 | */ |
| 120 | cpu = cpumask_first(cpumask); | 120 | int cpu = cpumask_first(cpumask); |
| 121 | |||
| 121 | if ((unsigned)cpu < nr_cpu_ids) | 122 | if ((unsigned)cpu < nr_cpu_ids) |
| 122 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 123 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
| 123 | else | 124 | else |
| 124 | return BAD_APICID; | 125 | return BAD_APICID; |
| 125 | } | 126 | } |
| 126 | 127 | ||
| 127 | static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 128 | static unsigned int |
| 128 | const struct cpumask *andmask) | 129 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
| 130 | const struct cpumask *andmask) | ||
| 129 | { | 131 | { |
| 130 | int cpu; | 132 | int cpu; |
| 131 | 133 | ||
| @@ -133,15 +135,18 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
| 133 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | 135 | * We're using fixed IRQ delivery, can only return one logical APIC ID. |
| 134 | * May as well be the first. | 136 | * May as well be the first. |
| 135 | */ | 137 | */ |
| 136 | for_each_cpu_and(cpu, cpumask, andmask) | 138 | for_each_cpu_and(cpu, cpumask, andmask) { |
| 137 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 139 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
| 138 | break; | 140 | break; |
| 141 | } | ||
| 142 | |||
| 139 | if (cpu < nr_cpu_ids) | 143 | if (cpu < nr_cpu_ids) |
| 140 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 144 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
| 145 | |||
| 141 | return BAD_APICID; | 146 | return BAD_APICID; |
| 142 | } | 147 | } |
| 143 | 148 | ||
| 144 | static unsigned int get_apic_id(unsigned long x) | 149 | static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) |
| 145 | { | 150 | { |
| 146 | unsigned int id; | 151 | unsigned int id; |
| 147 | 152 | ||
| @@ -157,7 +162,7 @@ static unsigned long set_apic_id(unsigned int id) | |||
| 157 | return x; | 162 | return x; |
| 158 | } | 163 | } |
| 159 | 164 | ||
| 160 | static unsigned int phys_pkg_id(int index_msb) | 165 | static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) |
| 161 | { | 166 | { |
| 162 | return current_cpu_data.initial_apicid >> index_msb; | 167 | return current_cpu_data.initial_apicid >> index_msb; |
| 163 | } | 168 | } |
| @@ -172,27 +177,63 @@ static void init_x2apic_ldr(void) | |||
| 172 | int cpu = smp_processor_id(); | 177 | int cpu = smp_processor_id(); |
| 173 | 178 | ||
| 174 | per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); | 179 | per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); |
| 175 | return; | 180 | } |
| 176 | } | 181 | |
| 177 | 182 | struct apic apic_x2apic_cluster = { | |
| 178 | struct genapic apic_x2apic_cluster = { | 183 | |
| 179 | .name = "cluster x2apic", | 184 | .name = "cluster x2apic", |
| 180 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | 185 | .probe = NULL, |
| 181 | .int_delivery_mode = dest_LowestPrio, | 186 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, |
| 182 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), | 187 | .apic_id_registered = x2apic_apic_id_registered, |
| 183 | .target_cpus = x2apic_target_cpus, | 188 | |
| 184 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 189 | .irq_delivery_mode = dest_LowestPrio, |
| 185 | .apic_id_registered = x2apic_apic_id_registered, | 190 | .irq_dest_mode = 1, /* logical */ |
| 186 | .init_apic_ldr = init_x2apic_ldr, | 191 | |
| 187 | .send_IPI_all = x2apic_send_IPI_all, | 192 | .target_cpus = x2apic_target_cpus, |
| 188 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 193 | .disable_esr = 0, |
| 189 | .send_IPI_mask = x2apic_send_IPI_mask, | 194 | .dest_logical = APIC_DEST_LOGICAL, |
| 190 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | 195 | .check_apicid_used = NULL, |
| 191 | .send_IPI_self = x2apic_send_IPI_self, | 196 | .check_apicid_present = NULL, |
| 192 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 197 | |
| 193 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | 198 | .vector_allocation_domain = x2apic_vector_allocation_domain, |
| 194 | .phys_pkg_id = phys_pkg_id, | 199 | .init_apic_ldr = init_x2apic_ldr, |
| 195 | .get_apic_id = get_apic_id, | 200 | |
| 196 | .set_apic_id = set_apic_id, | 201 | .ioapic_phys_id_map = NULL, |
| 197 | .apic_id_mask = (0xFFFFFFFFu), | 202 | .setup_apic_routing = NULL, |
| 203 | .multi_timer_check = NULL, | ||
| 204 | .apicid_to_node = NULL, | ||
| 205 | .cpu_to_logical_apicid = NULL, | ||
| 206 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
| 207 | .apicid_to_cpu_present = NULL, | ||
| 208 | .setup_portio_remap = NULL, | ||
| 209 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
| 210 | .enable_apic_mode = NULL, | ||
| 211 | .phys_pkg_id = x2apic_cluster_phys_pkg_id, | ||
| 212 | .mps_oem_check = NULL, | ||
| 213 | |||
| 214 | .get_apic_id = x2apic_cluster_phys_get_apic_id, | ||
| 215 | .set_apic_id = set_apic_id, | ||
| 216 | .apic_id_mask = 0xFFFFFFFFu, | ||
| 217 | |||
| 218 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
| 219 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
| 220 | |||
| 221 | .send_IPI_mask = x2apic_send_IPI_mask, | ||
| 222 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
| 223 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | ||
| 224 | .send_IPI_all = x2apic_send_IPI_all, | ||
| 225 | .send_IPI_self = x2apic_send_IPI_self, | ||
| 226 | |||
| 227 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 228 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 229 | .wait_for_init_deassert = NULL, | ||
| 230 | .smp_callin_clear_local_apic = NULL, | ||
| 231 | .inquire_remote_apic = NULL, | ||
| 232 | |||
| 233 | .read = native_apic_msr_read, | ||
| 234 | .write = native_apic_msr_write, | ||
| 235 | .icr_read = native_x2apic_icr_read, | ||
| 236 | .icr_write = native_x2apic_icr_write, | ||
| 237 | .wait_icr_idle = native_x2apic_wait_icr_idle, | ||
| 238 | .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, | ||
| 198 | }; | 239 | }; |
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 21bcc0e098ba..23625b9f98b2 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
| @@ -7,10 +7,10 @@ | |||
| 7 | #include <linux/dmar.h> | 7 | #include <linux/dmar.h> |
| 8 | 8 | ||
| 9 | #include <asm/smp.h> | 9 | #include <asm/smp.h> |
| 10 | #include <asm/apic.h> | ||
| 10 | #include <asm/ipi.h> | 11 | #include <asm/ipi.h> |
| 11 | #include <asm/genapic.h> | ||
| 12 | 12 | ||
| 13 | static int x2apic_phys; | 13 | int x2apic_phys; |
| 14 | 14 | ||
| 15 | static int set_x2apic_phys_mode(char *arg) | 15 | static int set_x2apic_phys_mode(char *arg) |
| 16 | { | 16 | { |
| @@ -21,10 +21,10 @@ early_param("x2apic_phys", set_x2apic_phys_mode); | |||
| 21 | 21 | ||
| 22 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 22 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
| 23 | { | 23 | { |
| 24 | if (cpu_has_x2apic && x2apic_phys) | 24 | if (x2apic_phys) |
| 25 | return 1; | 25 | return x2apic_enabled(); |
| 26 | 26 | else | |
| 27 | return 0; | 27 | return 0; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
| @@ -50,13 +50,13 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
| 50 | /* | 50 | /* |
| 51 | * send the IPI. | 51 | * send the IPI. |
| 52 | */ | 52 | */ |
| 53 | x2apic_icr_write(cfg, apicid); | 53 | native_x2apic_icr_write(cfg, apicid); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | 56 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
| 57 | { | 57 | { |
| 58 | unsigned long flags; | ||
| 59 | unsigned long query_cpu; | 58 | unsigned long query_cpu; |
| 59 | unsigned long flags; | ||
| 60 | 60 | ||
| 61 | local_irq_save(flags); | 61 | local_irq_save(flags); |
| 62 | for_each_cpu(query_cpu, mask) { | 62 | for_each_cpu(query_cpu, mask) { |
| @@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | |||
| 66 | local_irq_restore(flags); | 66 | local_irq_restore(flags); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, | 69 | static void |
| 70 | int vector) | 70 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) |
| 71 | { | 71 | { |
| 72 | unsigned long flags; | ||
| 73 | unsigned long query_cpu; | ||
| 74 | unsigned long this_cpu = smp_processor_id(); | 72 | unsigned long this_cpu = smp_processor_id(); |
| 73 | unsigned long query_cpu; | ||
| 74 | unsigned long flags; | ||
| 75 | 75 | ||
| 76 | local_irq_save(flags); | 76 | local_irq_save(flags); |
| 77 | for_each_cpu(query_cpu, mask) { | 77 | for_each_cpu(query_cpu, mask) { |
| @@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, | |||
| 85 | 85 | ||
| 86 | static void x2apic_send_IPI_allbutself(int vector) | 86 | static void x2apic_send_IPI_allbutself(int vector) |
| 87 | { | 87 | { |
| 88 | unsigned long flags; | ||
| 89 | unsigned long query_cpu; | ||
| 90 | unsigned long this_cpu = smp_processor_id(); | 88 | unsigned long this_cpu = smp_processor_id(); |
| 89 | unsigned long query_cpu; | ||
| 90 | unsigned long flags; | ||
| 91 | 91 | ||
| 92 | local_irq_save(flags); | 92 | local_irq_save(flags); |
| 93 | for_each_online_cpu(query_cpu) | 93 | for_each_online_cpu(query_cpu) { |
| 94 | if (query_cpu != this_cpu) | 94 | if (query_cpu == this_cpu) |
| 95 | __x2apic_send_IPI_dest( | 95 | continue; |
| 96 | per_cpu(x86_cpu_to_apicid, query_cpu), | 96 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), |
| 97 | vector, APIC_DEST_PHYSICAL); | 97 | vector, APIC_DEST_PHYSICAL); |
| 98 | } | ||
| 98 | local_irq_restore(flags); | 99 | local_irq_restore(flags); |
| 99 | } | 100 | } |
| 100 | 101 | ||
| @@ -110,21 +111,21 @@ static int x2apic_apic_id_registered(void) | |||
| 110 | 111 | ||
| 111 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | 112 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
| 112 | { | 113 | { |
| 113 | int cpu; | ||
| 114 | |||
| 115 | /* | 114 | /* |
| 116 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 115 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
| 117 | * May as well be the first. | 116 | * May as well be the first. |
| 118 | */ | 117 | */ |
| 119 | cpu = cpumask_first(cpumask); | 118 | int cpu = cpumask_first(cpumask); |
| 119 | |||
| 120 | if ((unsigned)cpu < nr_cpu_ids) | 120 | if ((unsigned)cpu < nr_cpu_ids) |
| 121 | return per_cpu(x86_cpu_to_apicid, cpu); | 121 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 122 | else | 122 | else |
| 123 | return BAD_APICID; | 123 | return BAD_APICID; |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 126 | static unsigned int |
| 127 | const struct cpumask *andmask) | 127 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
| 128 | const struct cpumask *andmask) | ||
| 128 | { | 129 | { |
| 129 | int cpu; | 130 | int cpu; |
| 130 | 131 | ||
| @@ -132,31 +133,28 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
| 132 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 133 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
| 133 | * May as well be the first. | 134 | * May as well be the first. |
| 134 | */ | 135 | */ |
| 135 | for_each_cpu_and(cpu, cpumask, andmask) | 136 | for_each_cpu_and(cpu, cpumask, andmask) { |
| 136 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 137 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
| 137 | break; | 138 | break; |
| 139 | } | ||
| 140 | |||
| 138 | if (cpu < nr_cpu_ids) | 141 | if (cpu < nr_cpu_ids) |
| 139 | return per_cpu(x86_cpu_to_apicid, cpu); | 142 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 143 | |||
| 140 | return BAD_APICID; | 144 | return BAD_APICID; |
| 141 | } | 145 | } |
| 142 | 146 | ||
| 143 | static unsigned int get_apic_id(unsigned long x) | 147 | static unsigned int x2apic_phys_get_apic_id(unsigned long x) |
| 144 | { | 148 | { |
| 145 | unsigned int id; | 149 | return x; |
| 146 | |||
| 147 | id = x; | ||
| 148 | return id; | ||
| 149 | } | 150 | } |
| 150 | 151 | ||
| 151 | static unsigned long set_apic_id(unsigned int id) | 152 | static unsigned long set_apic_id(unsigned int id) |
| 152 | { | 153 | { |
| 153 | unsigned long x; | 154 | return id; |
| 154 | |||
| 155 | x = id; | ||
| 156 | return x; | ||
| 157 | } | 155 | } |
| 158 | 156 | ||
| 159 | static unsigned int phys_pkg_id(int index_msb) | 157 | static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) |
| 160 | { | 158 | { |
| 161 | return current_cpu_data.initial_apicid >> index_msb; | 159 | return current_cpu_data.initial_apicid >> index_msb; |
| 162 | } | 160 | } |
| @@ -168,27 +166,63 @@ static void x2apic_send_IPI_self(int vector) | |||
| 168 | 166 | ||
| 169 | static void init_x2apic_ldr(void) | 167 | static void init_x2apic_ldr(void) |
| 170 | { | 168 | { |
| 171 | return; | 169 | } |
| 172 | } | 170 | |
| 173 | 171 | struct apic apic_x2apic_phys = { | |
| 174 | struct genapic apic_x2apic_phys = { | 172 | |
| 175 | .name = "physical x2apic", | 173 | .name = "physical x2apic", |
| 176 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | 174 | .probe = NULL, |
| 177 | .int_delivery_mode = dest_Fixed, | 175 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, |
| 178 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 176 | .apic_id_registered = x2apic_apic_id_registered, |
| 179 | .target_cpus = x2apic_target_cpus, | 177 | |
| 180 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 178 | .irq_delivery_mode = dest_Fixed, |
| 181 | .apic_id_registered = x2apic_apic_id_registered, | 179 | .irq_dest_mode = 0, /* physical */ |
| 182 | .init_apic_ldr = init_x2apic_ldr, | 180 | |
| 183 | .send_IPI_all = x2apic_send_IPI_all, | 181 | .target_cpus = x2apic_target_cpus, |
| 184 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 182 | .disable_esr = 0, |
| 185 | .send_IPI_mask = x2apic_send_IPI_mask, | 183 | .dest_logical = 0, |
| 186 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | 184 | .check_apicid_used = NULL, |
| 187 | .send_IPI_self = x2apic_send_IPI_self, | 185 | .check_apicid_present = NULL, |
| 188 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 186 | |
| 189 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | 187 | .vector_allocation_domain = x2apic_vector_allocation_domain, |
| 190 | .phys_pkg_id = phys_pkg_id, | 188 | .init_apic_ldr = init_x2apic_ldr, |
| 191 | .get_apic_id = get_apic_id, | 189 | |
| 192 | .set_apic_id = set_apic_id, | 190 | .ioapic_phys_id_map = NULL, |
| 193 | .apic_id_mask = (0xFFFFFFFFu), | 191 | .setup_apic_routing = NULL, |
| 192 | .multi_timer_check = NULL, | ||
| 193 | .apicid_to_node = NULL, | ||
| 194 | .cpu_to_logical_apicid = NULL, | ||
| 195 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
| 196 | .apicid_to_cpu_present = NULL, | ||
| 197 | .setup_portio_remap = NULL, | ||
| 198 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
| 199 | .enable_apic_mode = NULL, | ||
| 200 | .phys_pkg_id = x2apic_phys_pkg_id, | ||
| 201 | .mps_oem_check = NULL, | ||
| 202 | |||
| 203 | .get_apic_id = x2apic_phys_get_apic_id, | ||
| 204 | .set_apic_id = set_apic_id, | ||
| 205 | .apic_id_mask = 0xFFFFFFFFu, | ||
| 206 | |||
| 207 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
| 208 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
| 209 | |||
| 210 | .send_IPI_mask = x2apic_send_IPI_mask, | ||
| 211 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
| 212 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | ||
| 213 | .send_IPI_all = x2apic_send_IPI_all, | ||
| 214 | .send_IPI_self = x2apic_send_IPI_self, | ||
| 215 | |||
| 216 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 217 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 218 | .wait_for_init_deassert = NULL, | ||
| 219 | .smp_callin_clear_local_apic = NULL, | ||
| 220 | .inquire_remote_apic = NULL, | ||
| 221 | |||
| 222 | .read = native_apic_msr_read, | ||
| 223 | .write = native_apic_msr_write, | ||
| 224 | .icr_read = native_x2apic_icr_read, | ||
| 225 | .icr_write = native_x2apic_icr_write, | ||
| 226 | .wait_icr_idle = native_x2apic_wait_icr_idle, | ||
| 227 | .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, | ||
| 194 | }; | 228 | }; |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b193e082f6ce..1bd6da1f8fad 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
| @@ -7,27 +7,28 @@ | |||
| 7 | * | 7 | * |
| 8 | * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. |
| 9 | */ | 9 | */ |
| 10 | |||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/threads.h> | ||
| 13 | #include <linux/cpu.h> | ||
| 14 | #include <linux/cpumask.h> | 10 | #include <linux/cpumask.h> |
| 11 | #include <linux/hardirq.h> | ||
| 12 | #include <linux/proc_fs.h> | ||
| 13 | #include <linux/threads.h> | ||
| 14 | #include <linux/kernel.h> | ||
| 15 | #include <linux/module.h> | ||
| 15 | #include <linux/string.h> | 16 | #include <linux/string.h> |
| 16 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
| 17 | #include <linux/init.h> | ||
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/module.h> | ||
| 20 | #include <linux/hardirq.h> | ||
| 21 | #include <linux/timer.h> | 19 | #include <linux/timer.h> |
| 22 | #include <linux/proc_fs.h> | 20 | #include <linux/cpu.h> |
| 23 | #include <asm/current.h> | 21 | #include <linux/init.h> |
| 24 | #include <asm/smp.h> | 22 | |
| 25 | #include <asm/ipi.h> | ||
| 26 | #include <asm/genapic.h> | ||
| 27 | #include <asm/pgtable.h> | ||
| 28 | #include <asm/uv/uv_mmrs.h> | 23 | #include <asm/uv/uv_mmrs.h> |
| 29 | #include <asm/uv/uv_hub.h> | 24 | #include <asm/uv/uv_hub.h> |
| 25 | #include <asm/current.h> | ||
| 26 | #include <asm/pgtable.h> | ||
| 30 | #include <asm/uv/bios.h> | 27 | #include <asm/uv/bios.h> |
| 28 | #include <asm/uv/uv.h> | ||
| 29 | #include <asm/apic.h> | ||
| 30 | #include <asm/ipi.h> | ||
| 31 | #include <asm/smp.h> | ||
| 31 | 32 | ||
| 32 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 33 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
| 33 | 34 | ||
| @@ -90,39 +91,43 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) | |||
| 90 | cpumask_set_cpu(cpu, retmask); | 91 | cpumask_set_cpu(cpu, retmask); |
| 91 | } | 92 | } |
| 92 | 93 | ||
| 93 | int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) | 94 | static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) |
| 94 | { | 95 | { |
| 96 | #ifdef CONFIG_SMP | ||
| 95 | unsigned long val; | 97 | unsigned long val; |
| 96 | int pnode; | 98 | int pnode; |
| 97 | 99 | ||
| 98 | pnode = uv_apicid_to_pnode(phys_apicid); | 100 | pnode = uv_apicid_to_pnode(phys_apicid); |
| 99 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 101 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
| 100 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 102 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
| 101 | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | | 103 | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | |
| 102 | APIC_DM_INIT; | 104 | APIC_DM_INIT; |
| 103 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 105 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
| 104 | mdelay(10); | 106 | mdelay(10); |
| 105 | 107 | ||
| 106 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 108 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
| 107 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 109 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
| 108 | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | | 110 | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | |
| 109 | APIC_DM_STARTUP; | 111 | APIC_DM_STARTUP; |
| 110 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 112 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
| 113 | |||
| 114 | atomic_set(&init_deasserted, 1); | ||
| 115 | #endif | ||
| 111 | return 0; | 116 | return 0; |
| 112 | } | 117 | } |
| 113 | 118 | ||
| 114 | static void uv_send_IPI_one(int cpu, int vector) | 119 | static void uv_send_IPI_one(int cpu, int vector) |
| 115 | { | 120 | { |
| 116 | unsigned long val, apicid, lapicid; | 121 | unsigned long val, apicid; |
| 117 | int pnode; | 122 | int pnode; |
| 118 | 123 | ||
| 119 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | 124 | apicid = per_cpu(x86_cpu_to_apicid, cpu); |
| 120 | lapicid = apicid & 0x3f; /* ZZZ macro needed */ | ||
| 121 | pnode = uv_apicid_to_pnode(apicid); | 125 | pnode = uv_apicid_to_pnode(apicid); |
| 122 | val = | 126 | |
| 123 | (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << | 127 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
| 124 | UVH_IPI_INT_APIC_ID_SHFT) | | 128 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
| 125 | (vector << UVH_IPI_INT_VECTOR_SHFT); | 129 | (vector << UVH_IPI_INT_VECTOR_SHFT); |
| 130 | |||
| 126 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 131 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
| 127 | } | 132 | } |
| 128 | 133 | ||
| @@ -136,22 +141,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector) | |||
| 136 | 141 | ||
| 137 | static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | 142 | static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) |
| 138 | { | 143 | { |
| 139 | unsigned int cpu; | ||
| 140 | unsigned int this_cpu = smp_processor_id(); | 144 | unsigned int this_cpu = smp_processor_id(); |
| 145 | unsigned int cpu; | ||
| 141 | 146 | ||
| 142 | for_each_cpu(cpu, mask) | 147 | for_each_cpu(cpu, mask) { |
| 143 | if (cpu != this_cpu) | 148 | if (cpu != this_cpu) |
| 144 | uv_send_IPI_one(cpu, vector); | 149 | uv_send_IPI_one(cpu, vector); |
| 150 | } | ||
| 145 | } | 151 | } |
| 146 | 152 | ||
| 147 | static void uv_send_IPI_allbutself(int vector) | 153 | static void uv_send_IPI_allbutself(int vector) |
| 148 | { | 154 | { |
| 149 | unsigned int cpu; | ||
| 150 | unsigned int this_cpu = smp_processor_id(); | 155 | unsigned int this_cpu = smp_processor_id(); |
| 156 | unsigned int cpu; | ||
| 151 | 157 | ||
| 152 | for_each_online_cpu(cpu) | 158 | for_each_online_cpu(cpu) { |
| 153 | if (cpu != this_cpu) | 159 | if (cpu != this_cpu) |
| 154 | uv_send_IPI_one(cpu, vector); | 160 | uv_send_IPI_one(cpu, vector); |
| 161 | } | ||
| 155 | } | 162 | } |
| 156 | 163 | ||
| 157 | static void uv_send_IPI_all(int vector) | 164 | static void uv_send_IPI_all(int vector) |
| @@ -170,21 +177,21 @@ static void uv_init_apic_ldr(void) | |||
| 170 | 177 | ||
| 171 | static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) | 178 | static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) |
| 172 | { | 179 | { |
| 173 | int cpu; | ||
| 174 | |||
| 175 | /* | 180 | /* |
| 176 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 181 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
| 177 | * May as well be the first. | 182 | * May as well be the first. |
| 178 | */ | 183 | */ |
| 179 | cpu = cpumask_first(cpumask); | 184 | int cpu = cpumask_first(cpumask); |
| 185 | |||
| 180 | if ((unsigned)cpu < nr_cpu_ids) | 186 | if ((unsigned)cpu < nr_cpu_ids) |
| 181 | return per_cpu(x86_cpu_to_apicid, cpu); | 187 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 182 | else | 188 | else |
| 183 | return BAD_APICID; | 189 | return BAD_APICID; |
| 184 | } | 190 | } |
| 185 | 191 | ||
| 186 | static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 192 | static unsigned int |
| 187 | const struct cpumask *andmask) | 193 | uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
| 194 | const struct cpumask *andmask) | ||
| 188 | { | 195 | { |
| 189 | int cpu; | 196 | int cpu; |
| 190 | 197 | ||
| @@ -192,15 +199,17 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
| 192 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 199 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
| 193 | * May as well be the first. | 200 | * May as well be the first. |
| 194 | */ | 201 | */ |
| 195 | for_each_cpu_and(cpu, cpumask, andmask) | 202 | for_each_cpu_and(cpu, cpumask, andmask) { |
| 196 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 203 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
| 197 | break; | 204 | break; |
| 205 | } | ||
| 198 | if (cpu < nr_cpu_ids) | 206 | if (cpu < nr_cpu_ids) |
| 199 | return per_cpu(x86_cpu_to_apicid, cpu); | 207 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 208 | |||
| 200 | return BAD_APICID; | 209 | return BAD_APICID; |
| 201 | } | 210 | } |
| 202 | 211 | ||
| 203 | static unsigned int get_apic_id(unsigned long x) | 212 | static unsigned int x2apic_get_apic_id(unsigned long x) |
| 204 | { | 213 | { |
| 205 | unsigned int id; | 214 | unsigned int id; |
| 206 | 215 | ||
| @@ -222,10 +231,10 @@ static unsigned long set_apic_id(unsigned int id) | |||
| 222 | static unsigned int uv_read_apic_id(void) | 231 | static unsigned int uv_read_apic_id(void) |
| 223 | { | 232 | { |
| 224 | 233 | ||
| 225 | return get_apic_id(apic_read(APIC_ID)); | 234 | return x2apic_get_apic_id(apic_read(APIC_ID)); |
| 226 | } | 235 | } |
| 227 | 236 | ||
| 228 | static unsigned int phys_pkg_id(int index_msb) | 237 | static int uv_phys_pkg_id(int initial_apicid, int index_msb) |
| 229 | { | 238 | { |
| 230 | return uv_read_apic_id() >> index_msb; | 239 | return uv_read_apic_id() >> index_msb; |
| 231 | } | 240 | } |
| @@ -235,26 +244,64 @@ static void uv_send_IPI_self(int vector) | |||
| 235 | apic_write(APIC_SELF_IPI, vector); | 244 | apic_write(APIC_SELF_IPI, vector); |
| 236 | } | 245 | } |
| 237 | 246 | ||
| 238 | struct genapic apic_x2apic_uv_x = { | 247 | struct apic apic_x2apic_uv_x = { |
| 239 | .name = "UV large system", | 248 | |
| 240 | .acpi_madt_oem_check = uv_acpi_madt_oem_check, | 249 | .name = "UV large system", |
| 241 | .int_delivery_mode = dest_Fixed, | 250 | .probe = NULL, |
| 242 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 251 | .acpi_madt_oem_check = uv_acpi_madt_oem_check, |
| 243 | .target_cpus = uv_target_cpus, | 252 | .apic_id_registered = uv_apic_id_registered, |
| 244 | .vector_allocation_domain = uv_vector_allocation_domain, | 253 | |
| 245 | .apic_id_registered = uv_apic_id_registered, | 254 | .irq_delivery_mode = dest_Fixed, |
| 246 | .init_apic_ldr = uv_init_apic_ldr, | 255 | .irq_dest_mode = 1, /* logical */ |
| 247 | .send_IPI_all = uv_send_IPI_all, | 256 | |
| 248 | .send_IPI_allbutself = uv_send_IPI_allbutself, | 257 | .target_cpus = uv_target_cpus, |
| 249 | .send_IPI_mask = uv_send_IPI_mask, | 258 | .disable_esr = 0, |
| 250 | .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, | 259 | .dest_logical = APIC_DEST_LOGICAL, |
| 251 | .send_IPI_self = uv_send_IPI_self, | 260 | .check_apicid_used = NULL, |
| 252 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | 261 | .check_apicid_present = NULL, |
| 253 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, | 262 | |
| 254 | .phys_pkg_id = phys_pkg_id, | 263 | .vector_allocation_domain = uv_vector_allocation_domain, |
| 255 | .get_apic_id = get_apic_id, | 264 | .init_apic_ldr = uv_init_apic_ldr, |
| 256 | .set_apic_id = set_apic_id, | 265 | |
| 257 | .apic_id_mask = (0xFFFFFFFFu), | 266 | .ioapic_phys_id_map = NULL, |
| 267 | .setup_apic_routing = NULL, | ||
| 268 | .multi_timer_check = NULL, | ||
| 269 | .apicid_to_node = NULL, | ||
| 270 | .cpu_to_logical_apicid = NULL, | ||
| 271 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
| 272 | .apicid_to_cpu_present = NULL, | ||
| 273 | .setup_portio_remap = NULL, | ||
| 274 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
| 275 | .enable_apic_mode = NULL, | ||
| 276 | .phys_pkg_id = uv_phys_pkg_id, | ||
| 277 | .mps_oem_check = NULL, | ||
| 278 | |||
| 279 | .get_apic_id = x2apic_get_apic_id, | ||
| 280 | .set_apic_id = set_apic_id, | ||
| 281 | .apic_id_mask = 0xFFFFFFFFu, | ||
| 282 | |||
| 283 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | ||
| 284 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, | ||
| 285 | |||
| 286 | .send_IPI_mask = uv_send_IPI_mask, | ||
| 287 | .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, | ||
| 288 | .send_IPI_allbutself = uv_send_IPI_allbutself, | ||
| 289 | .send_IPI_all = uv_send_IPI_all, | ||
| 290 | .send_IPI_self = uv_send_IPI_self, | ||
| 291 | |||
| 292 | .wakeup_secondary_cpu = uv_wakeup_secondary, | ||
| 293 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
| 294 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
| 295 | .wait_for_init_deassert = NULL, | ||
| 296 | .smp_callin_clear_local_apic = NULL, | ||
| 297 | .inquire_remote_apic = NULL, | ||
| 298 | |||
| 299 | .read = native_apic_msr_read, | ||
| 300 | .write = native_apic_msr_write, | ||
| 301 | .icr_read = native_x2apic_icr_read, | ||
| 302 | .icr_write = native_x2apic_icr_write, | ||
| 303 | .wait_icr_idle = native_x2apic_wait_icr_idle, | ||
| 304 | .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, | ||
| 258 | }; | 305 | }; |
| 259 | 306 | ||
| 260 | static __cpuinit void set_x2apic_extra_bits(int pnode) | 307 | static __cpuinit void set_x2apic_extra_bits(int pnode) |
| @@ -322,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift, | |||
| 322 | paddr = base << shift; | 369 | paddr = base << shift; |
| 323 | bytes = (1UL << shift) * (max_pnode + 1); | 370 | bytes = (1UL << shift) * (max_pnode + 1); |
| 324 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, | 371 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, |
| 325 | paddr + bytes); | 372 | paddr + bytes); |
| 326 | if (map_type == map_uc) | 373 | if (map_type == map_uc) |
| 327 | init_extra_mapping_uc(paddr, bytes); | 374 | init_extra_mapping_uc(paddr, bytes); |
| 328 | else | 375 | else |
| @@ -485,7 +532,7 @@ late_initcall(uv_init_heartbeat); | |||
| 485 | 532 | ||
| 486 | /* | 533 | /* |
| 487 | * Called on each cpu to initialize the per_cpu UV data area. | 534 | * Called on each cpu to initialize the per_cpu UV data area. |
| 488 | * ZZZ hotplug not supported yet | 535 | * FIXME: hotplug not supported yet |
| 489 | */ | 536 | */ |
| 490 | void __cpuinit uv_cpu_init(void) | 537 | void __cpuinit uv_cpu_init(void) |
| 491 | { | 538 | { |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 266ec6c18b6c..c1941be9fb17 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int); | |||
| 301 | */ | 301 | */ |
| 302 | #define APM_ZERO_SEGS | 302 | #define APM_ZERO_SEGS |
| 303 | 303 | ||
| 304 | #include "apm.h" | 304 | #include <asm/apm.h> |
| 305 | 305 | ||
| 306 | /* | 306 | /* |
| 307 | * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. | 307 | * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. |
| @@ -466,7 +466,7 @@ static const lookup_t error_table[] = { | |||
| 466 | * @err: APM BIOS return code | 466 | * @err: APM BIOS return code |
| 467 | * | 467 | * |
| 468 | * Write a meaningful log entry to the kernel log in the event of | 468 | * Write a meaningful log entry to the kernel log in the event of |
| 469 | * an APM error. | 469 | * an APM error. Note that this also handles (negative) kernel errors. |
| 470 | */ | 470 | */ |
| 471 | 471 | ||
| 472 | static void apm_error(char *str, int err) | 472 | static void apm_error(char *str, int err) |
| @@ -478,43 +478,14 @@ static void apm_error(char *str, int err) | |||
| 478 | break; | 478 | break; |
| 479 | if (i < ERROR_COUNT) | 479 | if (i < ERROR_COUNT) |
| 480 | printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); | 480 | printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); |
| 481 | else if (err < 0) | ||
| 482 | printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); | ||
| 481 | else | 483 | else |
| 482 | printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", | 484 | printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", |
| 483 | str, err); | 485 | str, err); |
| 484 | } | 486 | } |
| 485 | 487 | ||
| 486 | /* | 488 | /* |
| 487 | * Lock APM functionality to physical CPU 0 | ||
| 488 | */ | ||
| 489 | |||
| 490 | #ifdef CONFIG_SMP | ||
| 491 | |||
| 492 | static cpumask_t apm_save_cpus(void) | ||
| 493 | { | ||
| 494 | cpumask_t x = current->cpus_allowed; | ||
| 495 | /* Some bioses don't like being called from CPU != 0 */ | ||
| 496 | set_cpus_allowed(current, cpumask_of_cpu(0)); | ||
| 497 | BUG_ON(smp_processor_id() != 0); | ||
| 498 | return x; | ||
| 499 | } | ||
| 500 | |||
| 501 | static inline void apm_restore_cpus(cpumask_t mask) | ||
| 502 | { | ||
| 503 | set_cpus_allowed(current, mask); | ||
| 504 | } | ||
| 505 | |||
| 506 | #else | ||
| 507 | |||
| 508 | /* | ||
| 509 | * No CPU lockdown needed on a uniprocessor | ||
| 510 | */ | ||
| 511 | |||
| 512 | #define apm_save_cpus() (current->cpus_allowed) | ||
| 513 | #define apm_restore_cpus(x) (void)(x) | ||
| 514 | |||
| 515 | #endif | ||
| 516 | |||
| 517 | /* | ||
| 518 | * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and | 489 | * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and |
| 519 | * apm_info.allow_ints, we are being really paranoid here! Not only | 490 | * apm_info.allow_ints, we are being really paranoid here! Not only |
| 520 | * are interrupts disabled, but all the segment registers (except SS) | 491 | * are interrupts disabled, but all the segment registers (except SS) |
| @@ -568,16 +539,23 @@ static inline void apm_irq_restore(unsigned long flags) | |||
| 568 | # define APM_DO_RESTORE_SEGS | 539 | # define APM_DO_RESTORE_SEGS |
| 569 | #endif | 540 | #endif |
| 570 | 541 | ||
| 542 | struct apm_bios_call { | ||
| 543 | u32 func; | ||
| 544 | /* In and out */ | ||
| 545 | u32 ebx; | ||
| 546 | u32 ecx; | ||
| 547 | /* Out only */ | ||
| 548 | u32 eax; | ||
| 549 | u32 edx; | ||
| 550 | u32 esi; | ||
| 551 | |||
| 552 | /* Error: -ENOMEM, or bits 8-15 of eax */ | ||
| 553 | int err; | ||
| 554 | }; | ||
| 555 | |||
| 571 | /** | 556 | /** |
| 572 | * apm_bios_call - Make an APM BIOS 32bit call | 557 | * __apm_bios_call - Make an APM BIOS 32bit call |
| 573 | * @func: APM function to execute | 558 | * @_call: pointer to struct apm_bios_call. |
| 574 | * @ebx_in: EBX register for call entry | ||
| 575 | * @ecx_in: ECX register for call entry | ||
| 576 | * @eax: EAX register return | ||
| 577 | * @ebx: EBX register return | ||
| 578 | * @ecx: ECX register return | ||
| 579 | * @edx: EDX register return | ||
| 580 | * @esi: ESI register return | ||
| 581 | * | 559 | * |
| 582 | * Make an APM call using the 32bit protected mode interface. The | 560 | * Make an APM call using the 32bit protected mode interface. The |
| 583 | * caller is responsible for knowing if APM BIOS is configured and | 561 | * caller is responsible for knowing if APM BIOS is configured and |
| @@ -586,80 +564,142 @@ static inline void apm_irq_restore(unsigned long flags) | |||
| 586 | * flag is loaded into AL. If there is an error, then the error | 564 | * flag is loaded into AL. If there is an error, then the error |
| 587 | * code is returned in AH (bits 8-15 of eax) and this function | 565 | * code is returned in AH (bits 8-15 of eax) and this function |
| 588 | * returns non-zero. | 566 | * returns non-zero. |
| 567 | * | ||
| 568 | * Note: this makes the call on the current CPU. | ||
| 589 | */ | 569 | */ |
| 590 | 570 | static long __apm_bios_call(void *_call) | |
| 591 | static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, | ||
| 592 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi) | ||
| 593 | { | 571 | { |
| 594 | APM_DECL_SEGS | 572 | APM_DECL_SEGS |
| 595 | unsigned long flags; | 573 | unsigned long flags; |
| 596 | cpumask_t cpus; | ||
| 597 | int cpu; | 574 | int cpu; |
| 598 | struct desc_struct save_desc_40; | 575 | struct desc_struct save_desc_40; |
| 599 | struct desc_struct *gdt; | 576 | struct desc_struct *gdt; |
| 600 | 577 | struct apm_bios_call *call = _call; | |
| 601 | cpus = apm_save_cpus(); | ||
| 602 | 578 | ||
| 603 | cpu = get_cpu(); | 579 | cpu = get_cpu(); |
| 580 | BUG_ON(cpu != 0); | ||
| 604 | gdt = get_cpu_gdt_table(cpu); | 581 | gdt = get_cpu_gdt_table(cpu); |
| 605 | save_desc_40 = gdt[0x40 / 8]; | 582 | save_desc_40 = gdt[0x40 / 8]; |
| 606 | gdt[0x40 / 8] = bad_bios_desc; | 583 | gdt[0x40 / 8] = bad_bios_desc; |
| 607 | 584 | ||
| 608 | apm_irq_save(flags); | 585 | apm_irq_save(flags); |
| 609 | APM_DO_SAVE_SEGS; | 586 | APM_DO_SAVE_SEGS; |
| 610 | apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); | 587 | apm_bios_call_asm(call->func, call->ebx, call->ecx, |
| 588 | &call->eax, &call->ebx, &call->ecx, &call->edx, | ||
| 589 | &call->esi); | ||
| 611 | APM_DO_RESTORE_SEGS; | 590 | APM_DO_RESTORE_SEGS; |
| 612 | apm_irq_restore(flags); | 591 | apm_irq_restore(flags); |
| 613 | gdt[0x40 / 8] = save_desc_40; | 592 | gdt[0x40 / 8] = save_desc_40; |
| 614 | put_cpu(); | 593 | put_cpu(); |
| 615 | apm_restore_cpus(cpus); | ||
| 616 | 594 | ||
| 617 | return *eax & 0xff; | 595 | return call->eax & 0xff; |
| 596 | } | ||
| 597 | |||
| 598 | /* Run __apm_bios_call or __apm_bios_call_simple on CPU 0 */ | ||
| 599 | static int on_cpu0(long (*fn)(void *), struct apm_bios_call *call) | ||
| 600 | { | ||
| 601 | int ret; | ||
| 602 | |||
| 603 | /* Don't bother with work_on_cpu in the common case, so we don't | ||
| 604 | * have to worry about OOM or overhead. */ | ||
| 605 | if (get_cpu() == 0) { | ||
| 606 | ret = fn(call); | ||
| 607 | put_cpu(); | ||
| 608 | } else { | ||
| 609 | put_cpu(); | ||
| 610 | ret = work_on_cpu(0, fn, call); | ||
| 611 | } | ||
| 612 | |||
| 613 | /* work_on_cpu can fail with -ENOMEM */ | ||
| 614 | if (ret < 0) | ||
| 615 | call->err = ret; | ||
| 616 | else | ||
| 617 | call->err = (call->eax >> 8) & 0xff; | ||
| 618 | |||
| 619 | return ret; | ||
| 618 | } | 620 | } |
| 619 | 621 | ||
| 620 | /** | 622 | /** |
| 621 | * apm_bios_call_simple - make a simple APM BIOS 32bit call | 623 | * apm_bios_call - Make an APM BIOS 32bit call (on CPU 0) |
| 622 | * @func: APM function to invoke | 624 | * @call: the apm_bios_call registers. |
| 623 | * @ebx_in: EBX register value for BIOS call | 625 | * |
| 624 | * @ecx_in: ECX register value for BIOS call | 626 | * If there is an error, it is returned in @call.err. |
| 625 | * @eax: EAX register on return from the BIOS call | 627 | */ |
| 628 | static int apm_bios_call(struct apm_bios_call *call) | ||
| 629 | { | ||
| 630 | return on_cpu0(__apm_bios_call, call); | ||
| 631 | } | ||
| 632 | |||
| 633 | /** | ||
| 634 | * __apm_bios_call_simple - Make an APM BIOS 32bit call (on CPU 0) | ||
| 635 | * @_call: pointer to struct apm_bios_call. | ||
| 626 | * | 636 | * |
| 627 | * Make a BIOS call that returns one value only, or just status. | 637 | * Make a BIOS call that returns one value only, or just status. |
| 628 | * If there is an error, then the error code is returned in AH | 638 | * If there is an error, then the error code is returned in AH |
| 629 | * (bits 8-15 of eax) and this function returns non-zero. This is | 639 | * (bits 8-15 of eax) and this function returns non-zero (it can |
| 630 | * used for simpler BIOS operations. This call may hold interrupts | 640 | * also return -ENOMEM). This is used for simpler BIOS operations. |
| 631 | * off for a long time on some laptops. | 641 | * This call may hold interrupts off for a long time on some laptops. |
| 642 | * | ||
| 643 | * Note: this makes the call on the current CPU. | ||
| 632 | */ | 644 | */ |
| 633 | 645 | static long __apm_bios_call_simple(void *_call) | |
| 634 | static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) | ||
| 635 | { | 646 | { |
| 636 | u8 error; | 647 | u8 error; |
| 637 | APM_DECL_SEGS | 648 | APM_DECL_SEGS |
| 638 | unsigned long flags; | 649 | unsigned long flags; |
| 639 | cpumask_t cpus; | ||
| 640 | int cpu; | 650 | int cpu; |
| 641 | struct desc_struct save_desc_40; | 651 | struct desc_struct save_desc_40; |
| 642 | struct desc_struct *gdt; | 652 | struct desc_struct *gdt; |
| 643 | 653 | struct apm_bios_call *call = _call; | |
| 644 | cpus = apm_save_cpus(); | ||
| 645 | 654 | ||
| 646 | cpu = get_cpu(); | 655 | cpu = get_cpu(); |
| 656 | BUG_ON(cpu != 0); | ||
| 647 | gdt = get_cpu_gdt_table(cpu); | 657 | gdt = get_cpu_gdt_table(cpu); |
| 648 | save_desc_40 = gdt[0x40 / 8]; | 658 | save_desc_40 = gdt[0x40 / 8]; |
| 649 | gdt[0x40 / 8] = bad_bios_desc; | 659 | gdt[0x40 / 8] = bad_bios_desc; |
| 650 | 660 | ||
| 651 | apm_irq_save(flags); | 661 | apm_irq_save(flags); |
| 652 | APM_DO_SAVE_SEGS; | 662 | APM_DO_SAVE_SEGS; |
| 653 | error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); | 663 | error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, |
| 664 | &call->eax); | ||
| 654 | APM_DO_RESTORE_SEGS; | 665 | APM_DO_RESTORE_SEGS; |
| 655 | apm_irq_restore(flags); | 666 | apm_irq_restore(flags); |
| 656 | gdt[0x40 / 8] = save_desc_40; | 667 | gdt[0x40 / 8] = save_desc_40; |
| 657 | put_cpu(); | 668 | put_cpu(); |
| 658 | apm_restore_cpus(cpus); | ||
| 659 | return error; | 669 | return error; |
| 660 | } | 670 | } |
| 661 | 671 | ||
| 662 | /** | 672 | /** |
| 673 | * apm_bios_call_simple - make a simple APM BIOS 32bit call | ||
| 674 | * @func: APM function to invoke | ||
| 675 | * @ebx_in: EBX register value for BIOS call | ||
| 676 | * @ecx_in: ECX register value for BIOS call | ||
| 677 | * @eax: EAX register on return from the BIOS call | ||
| 678 | * @err: bits | ||
| 679 | * | ||
| 680 | * Make a BIOS call that returns one value only, or just status. | ||
| 681 | * If there is an error, then the error code is returned in @err | ||
| 682 | * and this function returns non-zero. This is used for simpler | ||
| 683 | * BIOS operations. This call may hold interrupts off for a long | ||
| 684 | * time on some laptops. | ||
| 685 | */ | ||
| 686 | static int apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax, | ||
| 687 | int *err) | ||
| 688 | { | ||
| 689 | struct apm_bios_call call; | ||
| 690 | int ret; | ||
| 691 | |||
| 692 | call.func = func; | ||
| 693 | call.ebx = ebx_in; | ||
| 694 | call.ecx = ecx_in; | ||
| 695 | |||
| 696 | ret = on_cpu0(__apm_bios_call_simple, &call); | ||
| 697 | *eax = call.eax; | ||
| 698 | *err = call.err; | ||
| 699 | return ret; | ||
| 700 | } | ||
| 701 | |||
| 702 | /** | ||
| 663 | * apm_driver_version - APM driver version | 703 | * apm_driver_version - APM driver version |
| 664 | * @val: loaded with the APM version on return | 704 | * @val: loaded with the APM version on return |
| 665 | * | 705 | * |
| @@ -678,9 +718,10 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) | |||
| 678 | static int apm_driver_version(u_short *val) | 718 | static int apm_driver_version(u_short *val) |
| 679 | { | 719 | { |
| 680 | u32 eax; | 720 | u32 eax; |
| 721 | int err; | ||
| 681 | 722 | ||
| 682 | if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) | 723 | if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax, &err)) |
| 683 | return (eax >> 8) & 0xff; | 724 | return err; |
| 684 | *val = eax; | 725 | *val = eax; |
| 685 | return APM_SUCCESS; | 726 | return APM_SUCCESS; |
| 686 | } | 727 | } |
| @@ -701,22 +742,21 @@ static int apm_driver_version(u_short *val) | |||
| 701 | * that APM 1.2 is in use. If no messges are pending the value 0x80 | 742 | * that APM 1.2 is in use. If no messges are pending the value 0x80 |
| 702 | * is returned (No power management events pending). | 743 | * is returned (No power management events pending). |
| 703 | */ | 744 | */ |
| 704 | |||
| 705 | static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) | 745 | static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) |
| 706 | { | 746 | { |
| 707 | u32 eax; | 747 | struct apm_bios_call call; |
| 708 | u32 ebx; | ||
| 709 | u32 ecx; | ||
| 710 | u32 dummy; | ||
| 711 | 748 | ||
| 712 | if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, | 749 | call.func = APM_FUNC_GET_EVENT; |
| 713 | &dummy, &dummy)) | 750 | call.ebx = call.ecx = 0; |
| 714 | return (eax >> 8) & 0xff; | 751 | |
| 715 | *event = ebx; | 752 | if (apm_bios_call(&call)) |
| 753 | return call.err; | ||
| 754 | |||
| 755 | *event = call.ebx; | ||
| 716 | if (apm_info.connection_version < 0x0102) | 756 | if (apm_info.connection_version < 0x0102) |
| 717 | *info = ~0; /* indicate info not valid */ | 757 | *info = ~0; /* indicate info not valid */ |
| 718 | else | 758 | else |
| 719 | *info = ecx; | 759 | *info = call.ecx; |
| 720 | return APM_SUCCESS; | 760 | return APM_SUCCESS; |
| 721 | } | 761 | } |
| 722 | 762 | ||
| @@ -737,9 +777,10 @@ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) | |||
| 737 | static int set_power_state(u_short what, u_short state) | 777 | static int set_power_state(u_short what, u_short state) |
| 738 | { | 778 | { |
| 739 | u32 eax; | 779 | u32 eax; |
| 780 | int err; | ||
| 740 | 781 | ||
| 741 | if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) | 782 | if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax, &err)) |
| 742 | return (eax >> 8) & 0xff; | 783 | return err; |
| 743 | return APM_SUCCESS; | 784 | return APM_SUCCESS; |
| 744 | } | 785 | } |
| 745 | 786 | ||
| @@ -770,6 +811,7 @@ static int apm_do_idle(void) | |||
| 770 | u8 ret = 0; | 811 | u8 ret = 0; |
| 771 | int idled = 0; | 812 | int idled = 0; |
| 772 | int polling; | 813 | int polling; |
| 814 | int err; | ||
| 773 | 815 | ||
| 774 | polling = !!(current_thread_info()->status & TS_POLLING); | 816 | polling = !!(current_thread_info()->status & TS_POLLING); |
| 775 | if (polling) { | 817 | if (polling) { |
| @@ -782,7 +824,7 @@ static int apm_do_idle(void) | |||
| 782 | } | 824 | } |
| 783 | if (!need_resched()) { | 825 | if (!need_resched()) { |
| 784 | idled = 1; | 826 | idled = 1; |
| 785 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); | 827 | ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err); |
| 786 | } | 828 | } |
| 787 | if (polling) | 829 | if (polling) |
| 788 | current_thread_info()->status |= TS_POLLING; | 830 | current_thread_info()->status |= TS_POLLING; |
| @@ -797,8 +839,7 @@ static int apm_do_idle(void) | |||
| 797 | * Only report the failure the first 5 times. | 839 | * Only report the failure the first 5 times. |
| 798 | */ | 840 | */ |
| 799 | if (++t < 5) { | 841 | if (++t < 5) { |
| 800 | printk(KERN_DEBUG "apm_do_idle failed (%d)\n", | 842 | printk(KERN_DEBUG "apm_do_idle failed (%d)\n", err); |
| 801 | (eax >> 8) & 0xff); | ||
| 802 | t = jiffies; | 843 | t = jiffies; |
| 803 | } | 844 | } |
| 804 | return -1; | 845 | return -1; |
| @@ -816,9 +857,10 @@ static int apm_do_idle(void) | |||
| 816 | static void apm_do_busy(void) | 857 | static void apm_do_busy(void) |
| 817 | { | 858 | { |
| 818 | u32 dummy; | 859 | u32 dummy; |
| 860 | int err; | ||
| 819 | 861 | ||
| 820 | if (clock_slowed || ALWAYS_CALL_BUSY) { | 862 | if (clock_slowed || ALWAYS_CALL_BUSY) { |
| 821 | (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); | 863 | (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy, &err); |
| 822 | clock_slowed = 0; | 864 | clock_slowed = 0; |
| 823 | } | 865 | } |
| 824 | } | 866 | } |
| @@ -937,7 +979,7 @@ static void apm_power_off(void) | |||
| 937 | 979 | ||
| 938 | /* Some bioses don't like being called from CPU != 0 */ | 980 | /* Some bioses don't like being called from CPU != 0 */ |
| 939 | if (apm_info.realmode_power_off) { | 981 | if (apm_info.realmode_power_off) { |
| 940 | (void)apm_save_cpus(); | 982 | set_cpus_allowed_ptr(current, cpumask_of(0)); |
| 941 | machine_real_restart(po_bios_call, sizeof(po_bios_call)); | 983 | machine_real_restart(po_bios_call, sizeof(po_bios_call)); |
| 942 | } else { | 984 | } else { |
| 943 | (void)set_system_power_state(APM_STATE_OFF); | 985 | (void)set_system_power_state(APM_STATE_OFF); |
| @@ -956,12 +998,13 @@ static void apm_power_off(void) | |||
| 956 | static int apm_enable_power_management(int enable) | 998 | static int apm_enable_power_management(int enable) |
| 957 | { | 999 | { |
| 958 | u32 eax; | 1000 | u32 eax; |
| 1001 | int err; | ||
| 959 | 1002 | ||
| 960 | if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) | 1003 | if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) |
| 961 | return APM_NOT_ENGAGED; | 1004 | return APM_NOT_ENGAGED; |
| 962 | if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, | 1005 | if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, |
| 963 | enable, &eax)) | 1006 | enable, &eax, &err)) |
| 964 | return (eax >> 8) & 0xff; | 1007 | return err; |
| 965 | if (enable) | 1008 | if (enable) |
| 966 | apm_info.bios.flags &= ~APM_BIOS_DISABLED; | 1009 | apm_info.bios.flags &= ~APM_BIOS_DISABLED; |
| 967 | else | 1010 | else |
| @@ -986,24 +1029,23 @@ static int apm_enable_power_management(int enable) | |||
| 986 | 1029 | ||
| 987 | static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) | 1030 | static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) |
| 988 | { | 1031 | { |
| 989 | u32 eax; | 1032 | struct apm_bios_call call; |
| 990 | u32 ebx; | 1033 | |
| 991 | u32 ecx; | 1034 | call.func = APM_FUNC_GET_STATUS; |
| 992 | u32 edx; | 1035 | call.ebx = APM_DEVICE_ALL; |
| 993 | u32 dummy; | 1036 | call.ecx = 0; |
| 994 | 1037 | ||
| 995 | if (apm_info.get_power_status_broken) | 1038 | if (apm_info.get_power_status_broken) |
| 996 | return APM_32_UNSUPPORTED; | 1039 | return APM_32_UNSUPPORTED; |
| 997 | if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, | 1040 | if (apm_bios_call(&call)) |
| 998 | &eax, &ebx, &ecx, &edx, &dummy)) | 1041 | return call.err; |
| 999 | return (eax >> 8) & 0xff; | 1042 | *status = call.ebx; |
| 1000 | *status = ebx; | 1043 | *bat = call.ecx; |
| 1001 | *bat = ecx; | ||
| 1002 | if (apm_info.get_power_status_swabinminutes) { | 1044 | if (apm_info.get_power_status_swabinminutes) { |
| 1003 | *life = swab16((u16)edx); | 1045 | *life = swab16((u16)call.edx); |
| 1004 | *life |= 0x8000; | 1046 | *life |= 0x8000; |
| 1005 | } else | 1047 | } else |
| 1006 | *life = edx; | 1048 | *life = call.edx; |
| 1007 | return APM_SUCCESS; | 1049 | return APM_SUCCESS; |
| 1008 | } | 1050 | } |
| 1009 | 1051 | ||
| @@ -1048,12 +1090,14 @@ static int apm_get_battery_status(u_short which, u_short *status, | |||
| 1048 | static int apm_engage_power_management(u_short device, int enable) | 1090 | static int apm_engage_power_management(u_short device, int enable) |
| 1049 | { | 1091 | { |
| 1050 | u32 eax; | 1092 | u32 eax; |
| 1093 | int err; | ||
| 1051 | 1094 | ||
| 1052 | if ((enable == 0) && (device == APM_DEVICE_ALL) | 1095 | if ((enable == 0) && (device == APM_DEVICE_ALL) |
| 1053 | && (apm_info.bios.flags & APM_BIOS_DISABLED)) | 1096 | && (apm_info.bios.flags & APM_BIOS_DISABLED)) |
| 1054 | return APM_DISABLED; | 1097 | return APM_DISABLED; |
| 1055 | if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax)) | 1098 | if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, |
| 1056 | return (eax >> 8) & 0xff; | 1099 | &eax, &err)) |
| 1100 | return err; | ||
| 1057 | if (device == APM_DEVICE_ALL) { | 1101 | if (device == APM_DEVICE_ALL) { |
| 1058 | if (enable) | 1102 | if (enable) |
| 1059 | apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; | 1103 | apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; |
| @@ -1682,16 +1726,14 @@ static int apm(void *unused) | |||
| 1682 | char *power_stat; | 1726 | char *power_stat; |
| 1683 | char *bat_stat; | 1727 | char *bat_stat; |
| 1684 | 1728 | ||
| 1685 | #ifdef CONFIG_SMP | ||
| 1686 | /* 2002/08/01 - WT | 1729 | /* 2002/08/01 - WT |
| 1687 | * This is to avoid random crashes at boot time during initialization | 1730 | * This is to avoid random crashes at boot time during initialization |
| 1688 | * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. | 1731 | * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. |
| 1689 | * Some bioses don't like being called from CPU != 0. | 1732 | * Some bioses don't like being called from CPU != 0. |
| 1690 | * Method suggested by Ingo Molnar. | 1733 | * Method suggested by Ingo Molnar. |
| 1691 | */ | 1734 | */ |
| 1692 | set_cpus_allowed(current, cpumask_of_cpu(0)); | 1735 | set_cpus_allowed_ptr(current, cpumask_of(0)); |
| 1693 | BUG_ON(smp_processor_id() != 0); | 1736 | BUG_ON(smp_processor_id() != 0); |
| 1694 | #endif | ||
| 1695 | 1737 | ||
| 1696 | if (apm_info.connection_version == 0) { | 1738 | if (apm_info.connection_version == 0) { |
| 1697 | apm_info.connection_version = apm_info.bios.version; | 1739 | apm_info.connection_version = apm_info.bios.version; |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ee4df08feee6..fbf2f33e3080 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
| @@ -75,6 +75,7 @@ void foo(void) | |||
| 75 | OFFSET(PT_DS, pt_regs, ds); | 75 | OFFSET(PT_DS, pt_regs, ds); |
| 76 | OFFSET(PT_ES, pt_regs, es); | 76 | OFFSET(PT_ES, pt_regs, es); |
| 77 | OFFSET(PT_FS, pt_regs, fs); | 77 | OFFSET(PT_FS, pt_regs, fs); |
| 78 | OFFSET(PT_GS, pt_regs, gs); | ||
| 78 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); | 79 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); |
| 79 | OFFSET(PT_EIP, pt_regs, ip); | 80 | OFFSET(PT_EIP, pt_regs, ip); |
| 80 | OFFSET(PT_CS, pt_regs, cs); | 81 | OFFSET(PT_CS, pt_regs, cs); |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edbc..8793ab33e2c1 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <linux/hardirq.h> | 11 | #include <linux/hardirq.h> |
| 12 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
| 13 | #include <linux/kbuild.h> | 13 | #include <linux/kbuild.h> |
| 14 | #include <asm/pda.h> | ||
| 15 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
| 16 | #include <asm/segment.h> | 15 | #include <asm/segment.h> |
| 17 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
| @@ -48,16 +47,6 @@ int main(void) | |||
| 48 | #endif | 47 | #endif |
| 49 | BLANK(); | 48 | BLANK(); |
| 50 | #undef ENTRY | 49 | #undef ENTRY |
| 51 | #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) | ||
| 52 | ENTRY(kernelstack); | ||
| 53 | ENTRY(oldrsp); | ||
| 54 | ENTRY(pcurrent); | ||
| 55 | ENTRY(irqcount); | ||
| 56 | ENTRY(cpunumber); | ||
| 57 | ENTRY(irqstackptr); | ||
| 58 | ENTRY(data_offset); | ||
| 59 | BLANK(); | ||
| 60 | #undef ENTRY | ||
| 61 | #ifdef CONFIG_PARAVIRT | 50 | #ifdef CONFIG_PARAVIRT |
| 62 | BLANK(); | 51 | BLANK(); |
| 63 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | 52 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 2cf23634b6d9..6882a735d9c0 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <asm/pat.h> | 7 | #include <asm/pat.h> |
| 8 | #include <asm/processor.h> | 8 | #include <asm/processor.h> |
| 9 | 9 | ||
| 10 | #include <mach_apic.h> | 10 | #include <asm/apic.h> |
| 11 | 11 | ||
| 12 | struct cpuid_bit { | 12 | struct cpuid_bit { |
| 13 | u16 feature; | 13 | u16 feature; |
| @@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 69 | */ | 69 | */ |
| 70 | void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | 70 | void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) |
| 71 | { | 71 | { |
| 72 | #ifdef CONFIG_X86_SMP | 72 | #ifdef CONFIG_SMP |
| 73 | unsigned int eax, ebx, ecx, edx, sub_index; | 73 | unsigned int eax, ebx, ecx, edx, sub_index; |
| 74 | unsigned int ht_mask_width, core_plus_mask_width; | 74 | unsigned int ht_mask_width, core_plus_mask_width; |
| 75 | unsigned int core_select_mask, core_level_siblings; | 75 | unsigned int core_select_mask, core_level_siblings; |
| @@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
| 116 | 116 | ||
| 117 | core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; | 117 | core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; |
| 118 | 118 | ||
| 119 | #ifdef CONFIG_X86_32 | 119 | c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) |
| 120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) | ||
| 121 | & core_select_mask; | 120 | & core_select_mask; |
| 122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); | 121 | c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); |
| 123 | /* | 122 | /* |
| 124 | * Reinit the apicid, now that we have extended initial_apicid. | 123 | * Reinit the apicid, now that we have extended initial_apicid. |
| 125 | */ | 124 | */ |
| 126 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | 125 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
| 127 | #else | 126 | |
| 128 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; | ||
| 129 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); | ||
| 130 | /* | ||
| 131 | * Reinit the apicid, now that we have extended initial_apicid. | ||
| 132 | */ | ||
| 133 | c->apicid = phys_pkg_id(0); | ||
| 134 | #endif | ||
| 135 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | 127 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); |
| 136 | 128 | ||
| 137 | 129 | ||
| @@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
| 143 | return; | 135 | return; |
| 144 | #endif | 136 | #endif |
| 145 | } | 137 | } |
| 146 | |||
| 147 | #ifdef CONFIG_X86_PAT | ||
| 148 | void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) | ||
| 149 | { | ||
| 150 | if (!cpu_has_pat) | ||
| 151 | pat_disable("PAT not supported by CPU."); | ||
| 152 | |||
| 153 | switch (c->x86_vendor) { | ||
| 154 | case X86_VENDOR_INTEL: | ||
| 155 | /* | ||
| 156 | * There is a known erratum on Pentium III and Core Solo | ||
| 157 | * and Core Duo CPUs. | ||
| 158 | * " Page with PAT set to WC while associated MTRR is UC | ||
| 159 | * may consolidate to UC " | ||
| 160 | * Because of this erratum, it is better to stick with | ||
| 161 | * setting WC in MTRR rather than using PAT on these CPUs. | ||
| 162 | * | ||
| 163 | * Enable PAT WC only on P4, Core 2 or later CPUs. | ||
| 164 | */ | ||
| 165 | if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) | ||
| 166 | return; | ||
| 167 | |||
| 168 | pat_disable("PAT WC disabled due to known CPU erratum."); | ||
| 169 | return; | ||
| 170 | |||
| 171 | case X86_VENDOR_AMD: | ||
| 172 | case X86_VENDOR_CENTAUR: | ||
| 173 | case X86_VENDOR_TRANSMETA: | ||
| 174 | return; | ||
| 175 | } | ||
| 176 | |||
| 177 | pat_disable("PAT disabled. Not yet verified on this CPU type."); | ||
| 178 | } | ||
| 179 | #endif | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7c878f6aa919..f47df59016c5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <asm/io.h> | 5 | #include <asm/io.h> |
| 6 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
| 7 | #include <asm/apic.h> | 7 | #include <asm/apic.h> |
| 8 | #include <asm/cpu.h> | ||
| 8 | 9 | ||
| 9 | #ifdef CONFIG_X86_64 | 10 | #ifdef CONFIG_X86_64 |
| 10 | # include <asm/numa_64.h> | 11 | # include <asm/numa_64.h> |
| @@ -12,8 +13,6 @@ | |||
| 12 | # include <asm/cacheflush.h> | 13 | # include <asm/cacheflush.h> |
| 13 | #endif | 14 | #endif |
| 14 | 15 | ||
| 15 | #include <mach_apic.h> | ||
| 16 | |||
| 17 | #include "cpu.h" | 16 | #include "cpu.h" |
| 18 | 17 | ||
| 19 | #ifdef CONFIG_X86_32 | 18 | #ifdef CONFIG_X86_32 |
| @@ -143,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | |||
| 143 | } | 142 | } |
| 144 | } | 143 | } |
| 145 | 144 | ||
| 145 | static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | ||
| 146 | { | ||
| 147 | #ifdef CONFIG_SMP | ||
| 148 | /* calling is from identify_secondary_cpu() ? */ | ||
| 149 | if (c->cpu_index == boot_cpu_id) | ||
| 150 | return; | ||
| 151 | |||
| 152 | /* | ||
| 153 | * Certain Athlons might work (for various values of 'work') in SMP | ||
| 154 | * but they are not certified as MP capable. | ||
| 155 | */ | ||
| 156 | /* Athlon 660/661 is valid. */ | ||
| 157 | if ((c->x86_model == 6) && ((c->x86_mask == 0) || | ||
| 158 | (c->x86_mask == 1))) | ||
| 159 | goto valid_k7; | ||
| 160 | |||
| 161 | /* Duron 670 is valid */ | ||
| 162 | if ((c->x86_model == 7) && (c->x86_mask == 0)) | ||
| 163 | goto valid_k7; | ||
| 164 | |||
| 165 | /* | ||
| 166 | * Athlon 662, Duron 671, and Athlon >model 7 have capability | ||
| 167 | * bit. It's worth noting that the A5 stepping (662) of some | ||
| 168 | * Athlon XP's have the MP bit set. | ||
| 169 | * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for | ||
| 170 | * more. | ||
| 171 | */ | ||
| 172 | if (((c->x86_model == 6) && (c->x86_mask >= 2)) || | ||
| 173 | ((c->x86_model == 7) && (c->x86_mask >= 1)) || | ||
| 174 | (c->x86_model > 7)) | ||
| 175 | if (cpu_has_mp) | ||
| 176 | goto valid_k7; | ||
| 177 | |||
| 178 | /* If we get here, not a certified SMP capable AMD system. */ | ||
| 179 | |||
| 180 | /* | ||
| 181 | * Don't taint if we are running SMP kernel on a single non-MP | ||
| 182 | * approved Athlon | ||
| 183 | */ | ||
| 184 | WARN_ONCE(1, "WARNING: This combination of AMD" | ||
| 185 | "processors is not suitable for SMP.\n"); | ||
| 186 | if (!test_taint(TAINT_UNSAFE_SMP)) | ||
| 187 | add_taint(TAINT_UNSAFE_SMP); | ||
| 188 | |||
| 189 | valid_k7: | ||
| 190 | ; | ||
| 191 | #endif | ||
| 192 | } | ||
| 193 | |||
| 146 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | 194 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) |
| 147 | { | 195 | { |
| 148 | u32 l, h; | 196 | u32 l, h; |
| @@ -177,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | |||
| 177 | } | 225 | } |
| 178 | 226 | ||
| 179 | set_cpu_cap(c, X86_FEATURE_K7); | 227 | set_cpu_cap(c, X86_FEATURE_K7); |
| 228 | |||
| 229 | amd_k7_smp_check(c); | ||
| 180 | } | 230 | } |
| 181 | #endif | 231 | #endif |
| 182 | 232 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 83492b1f93b1..d7dd3c294e2a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -21,14 +21,14 @@ | |||
| 21 | #include <asm/asm.h> | 21 | #include <asm/asm.h> |
| 22 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
| 23 | #include <asm/smp.h> | 23 | #include <asm/smp.h> |
| 24 | #ifdef CONFIG_X86_LOCAL_APIC | 24 | #include <asm/cpu.h> |
| 25 | #include <asm/mpspec.h> | 25 | #include <asm/cpumask.h> |
| 26 | #include <asm/apic.h> | 26 | #include <asm/apic.h> |
| 27 | #include <mach_apic.h> | 27 | |
| 28 | #include <asm/genapic.h> | 28 | #ifdef CONFIG_X86_LOCAL_APIC |
| 29 | #include <asm/uv/uv.h> | ||
| 29 | #endif | 30 | #endif |
| 30 | 31 | ||
| 31 | #include <asm/pda.h> | ||
| 32 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
| 33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
| 34 | #include <asm/desc.h> | 34 | #include <asm/desc.h> |
| @@ -37,11 +37,10 @@ | |||
| 37 | #include <asm/sections.h> | 37 | #include <asm/sections.h> |
| 38 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
| 39 | #include <asm/hypervisor.h> | 39 | #include <asm/hypervisor.h> |
| 40 | #include <asm/stackprotector.h> | ||
| 40 | 41 | ||
| 41 | #include "cpu.h" | 42 | #include "cpu.h" |
| 42 | 43 | ||
| 43 | #ifdef CONFIG_X86_64 | ||
| 44 | |||
| 45 | /* all of these masks are initialized in setup_cpu_local_masks() */ | 44 | /* all of these masks are initialized in setup_cpu_local_masks() */ |
| 46 | cpumask_var_t cpu_callin_mask; | 45 | cpumask_var_t cpu_callin_mask; |
| 47 | cpumask_var_t cpu_callout_mask; | 46 | cpumask_var_t cpu_callout_mask; |
| @@ -50,35 +49,34 @@ cpumask_var_t cpu_initialized_mask; | |||
| 50 | /* representing cpus for which sibling maps can be computed */ | 49 | /* representing cpus for which sibling maps can be computed */ |
| 51 | cpumask_var_t cpu_sibling_setup_mask; | 50 | cpumask_var_t cpu_sibling_setup_mask; |
| 52 | 51 | ||
| 53 | #else /* CONFIG_X86_32 */ | 52 | /* correctly size the local cpu masks */ |
| 54 | 53 | void __init setup_cpu_local_masks(void) | |
| 55 | cpumask_t cpu_callin_map; | 54 | { |
| 56 | cpumask_t cpu_callout_map; | 55 | alloc_bootmem_cpumask_var(&cpu_initialized_mask); |
| 57 | cpumask_t cpu_initialized; | 56 | alloc_bootmem_cpumask_var(&cpu_callin_mask); |
| 58 | cpumask_t cpu_sibling_setup_map; | 57 | alloc_bootmem_cpumask_var(&cpu_callout_mask); |
| 59 | 58 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | |
| 60 | #endif /* CONFIG_X86_32 */ | 59 | } |
| 61 | |||
| 62 | 60 | ||
| 63 | static struct cpu_dev *this_cpu __cpuinitdata; | 61 | static struct cpu_dev *this_cpu __cpuinitdata; |
| 64 | 62 | ||
| 63 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | ||
| 65 | #ifdef CONFIG_X86_64 | 64 | #ifdef CONFIG_X86_64 |
| 66 | /* We need valid kernel segments for data and code in long mode too | 65 | /* |
| 67 | * IRET will check the segment types kkeil 2000/10/28 | 66 | * We need valid kernel segments for data and code in long mode too |
| 68 | * Also sysret mandates a special GDT layout | 67 | * IRET will check the segment types kkeil 2000/10/28 |
| 69 | */ | 68 | * Also sysret mandates a special GDT layout |
| 70 | /* The TLS descriptors are currently at a different place compared to i386. | 69 | * |
| 71 | Hopefully nobody expects them at a fixed place (Wine?) */ | 70 | * The TLS descriptors are currently at a different place compared to i386. |
| 72 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | 71 | * Hopefully nobody expects them at a fixed place (Wine?) |
| 72 | */ | ||
| 73 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | 73 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, |
| 74 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | 74 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, |
| 75 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | 75 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, |
| 76 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | 76 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, |
| 77 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | 77 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, |
| 78 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | 78 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, |
| 79 | } }; | ||
| 80 | #else | 79 | #else |
| 81 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | ||
| 82 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | 80 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, |
| 83 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 81 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
| 84 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, | 82 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, |
| @@ -110,9 +108,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
| 110 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | 108 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, |
| 111 | 109 | ||
| 112 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 110 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, |
| 113 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, | 111 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, |
| 114 | } }; | 112 | GDT_STACK_CANARY_INIT |
| 115 | #endif | 113 | #endif |
| 114 | } }; | ||
| 116 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 115 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
| 117 | 116 | ||
| 118 | #ifdef CONFIG_X86_32 | 117 | #ifdef CONFIG_X86_32 |
| @@ -213,6 +212,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |||
| 213 | #endif | 212 | #endif |
| 214 | 213 | ||
| 215 | /* | 214 | /* |
| 215 | * Some CPU features depend on higher CPUID levels, which may not always | ||
| 216 | * be available due to CPUID level capping or broken virtualization | ||
| 217 | * software. Add those features to this table to auto-disable them. | ||
| 218 | */ | ||
| 219 | struct cpuid_dependent_feature { | ||
| 220 | u32 feature; | ||
| 221 | u32 level; | ||
| 222 | }; | ||
| 223 | static const struct cpuid_dependent_feature __cpuinitconst | ||
| 224 | cpuid_dependent_features[] = { | ||
| 225 | { X86_FEATURE_MWAIT, 0x00000005 }, | ||
| 226 | { X86_FEATURE_DCA, 0x00000009 }, | ||
| 227 | { X86_FEATURE_XSAVE, 0x0000000d }, | ||
| 228 | { 0, 0 } | ||
| 229 | }; | ||
| 230 | |||
| 231 | static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) | ||
| 232 | { | ||
| 233 | const struct cpuid_dependent_feature *df; | ||
| 234 | for (df = cpuid_dependent_features; df->feature; df++) { | ||
| 235 | /* | ||
| 236 | * Note: cpuid_level is set to -1 if unavailable, but | ||
| 237 | * extended_extended_level is set to 0 if unavailable | ||
| 238 | * and the legitimate extended levels are all negative | ||
| 239 | * when signed; hence the weird messing around with | ||
| 240 | * signs here... | ||
| 241 | */ | ||
| 242 | if (cpu_has(c, df->feature) && | ||
| 243 | ((s32)df->level < 0 ? | ||
| 244 | (u32)df->level > (u32)c->extended_cpuid_level : | ||
| 245 | (s32)df->level > (s32)c->cpuid_level)) { | ||
| 246 | clear_cpu_cap(c, df->feature); | ||
| 247 | if (warn) | ||
| 248 | printk(KERN_WARNING | ||
| 249 | "CPU: CPU feature %s disabled " | ||
| 250 | "due to lack of CPUID level 0x%x\n", | ||
| 251 | x86_cap_flags[df->feature], | ||
| 252 | df->level); | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | /* | ||
| 216 | * Naming convention should be: <Name> [(<Codename>)] | 258 | * Naming convention should be: <Name> [(<Codename>)] |
| 217 | * This table only is used unless init_<vendor>() below doesn't set it; | 259 | * This table only is used unless init_<vendor>() below doesn't set it; |
| 218 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used | 260 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used |
| @@ -242,18 +284,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) | |||
| 242 | 284 | ||
| 243 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | 285 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; |
| 244 | 286 | ||
| 287 | void load_percpu_segment(int cpu) | ||
| 288 | { | ||
| 289 | #ifdef CONFIG_X86_32 | ||
| 290 | loadsegment(fs, __KERNEL_PERCPU); | ||
| 291 | #else | ||
| 292 | loadsegment(gs, 0); | ||
| 293 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); | ||
| 294 | #endif | ||
| 295 | load_stack_canary_segment(); | ||
| 296 | } | ||
| 297 | |||
| 245 | /* Current gdt points %fs at the "master" per-cpu area: after this, | 298 | /* Current gdt points %fs at the "master" per-cpu area: after this, |
| 246 | * it's on the real one. */ | 299 | * it's on the real one. */ |
| 247 | void switch_to_new_gdt(void) | 300 | void switch_to_new_gdt(int cpu) |
| 248 | { | 301 | { |
| 249 | struct desc_ptr gdt_descr; | 302 | struct desc_ptr gdt_descr; |
| 250 | 303 | ||
| 251 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | 304 | gdt_descr.address = (long)get_cpu_gdt_table(cpu); |
| 252 | gdt_descr.size = GDT_SIZE - 1; | 305 | gdt_descr.size = GDT_SIZE - 1; |
| 253 | load_gdt(&gdt_descr); | 306 | load_gdt(&gdt_descr); |
| 254 | #ifdef CONFIG_X86_32 | 307 | /* Reload the per-cpu base */ |
| 255 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); | 308 | |
| 256 | #endif | 309 | load_percpu_segment(cpu); |
| 257 | } | 310 | } |
| 258 | 311 | ||
| 259 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | 312 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; |
| @@ -383,11 +436,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
| 383 | } | 436 | } |
| 384 | 437 | ||
| 385 | index_msb = get_count_order(smp_num_siblings); | 438 | index_msb = get_count_order(smp_num_siblings); |
| 386 | #ifdef CONFIG_X86_64 | 439 | c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); |
| 387 | c->phys_proc_id = phys_pkg_id(index_msb); | ||
| 388 | #else | ||
| 389 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); | ||
| 390 | #endif | ||
| 391 | 440 | ||
| 392 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | 441 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
| 393 | 442 | ||
| @@ -395,13 +444,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
| 395 | 444 | ||
| 396 | core_bits = get_count_order(c->x86_max_cores); | 445 | core_bits = get_count_order(c->x86_max_cores); |
| 397 | 446 | ||
| 398 | #ifdef CONFIG_X86_64 | 447 | c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & |
| 399 | c->cpu_core_id = phys_pkg_id(index_msb) & | ||
| 400 | ((1 << core_bits) - 1); | ||
| 401 | #else | ||
| 402 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & | ||
| 403 | ((1 << core_bits) - 1); | 448 | ((1 << core_bits) - 1); |
| 404 | #endif | ||
| 405 | } | 449 | } |
| 406 | 450 | ||
| 407 | out: | 451 | out: |
| @@ -570,11 +614,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
| 570 | if (this_cpu->c_early_init) | 614 | if (this_cpu->c_early_init) |
| 571 | this_cpu->c_early_init(c); | 615 | this_cpu->c_early_init(c); |
| 572 | 616 | ||
| 573 | validate_pat_support(c); | ||
| 574 | |||
| 575 | #ifdef CONFIG_SMP | 617 | #ifdef CONFIG_SMP |
| 576 | c->cpu_index = boot_cpu_id; | 618 | c->cpu_index = boot_cpu_id; |
| 577 | #endif | 619 | #endif |
| 620 | filter_cpuid_features(c, false); | ||
| 578 | } | 621 | } |
| 579 | 622 | ||
| 580 | void __init early_cpu_init(void) | 623 | void __init early_cpu_init(void) |
| @@ -637,7 +680,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
| 637 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; | 680 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; |
| 638 | #ifdef CONFIG_X86_32 | 681 | #ifdef CONFIG_X86_32 |
| 639 | # ifdef CONFIG_X86_HT | 682 | # ifdef CONFIG_X86_HT |
| 640 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | 683 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
| 641 | # else | 684 | # else |
| 642 | c->apicid = c->initial_apicid; | 685 | c->apicid = c->initial_apicid; |
| 643 | # endif | 686 | # endif |
| @@ -684,7 +727,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 684 | this_cpu->c_identify(c); | 727 | this_cpu->c_identify(c); |
| 685 | 728 | ||
| 686 | #ifdef CONFIG_X86_64 | 729 | #ifdef CONFIG_X86_64 |
| 687 | c->apicid = phys_pkg_id(0); | 730 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
| 688 | #endif | 731 | #endif |
| 689 | 732 | ||
| 690 | /* | 733 | /* |
| @@ -708,6 +751,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 708 | * we do "generic changes." | 751 | * we do "generic changes." |
| 709 | */ | 752 | */ |
| 710 | 753 | ||
| 754 | /* Filter out anything that depends on CPUID levels we don't have */ | ||
| 755 | filter_cpuid_features(c, true); | ||
| 756 | |||
| 711 | /* If the model name is still unset, do table lookup. */ | 757 | /* If the model name is still unset, do table lookup. */ |
| 712 | if (!c->x86_model_id[0]) { | 758 | if (!c->x86_model_id[0]) { |
| 713 | char *p; | 759 | char *p; |
| @@ -766,6 +812,7 @@ static void vgetcpu_set_mode(void) | |||
| 766 | void __init identify_boot_cpu(void) | 812 | void __init identify_boot_cpu(void) |
| 767 | { | 813 | { |
| 768 | identify_cpu(&boot_cpu_data); | 814 | identify_cpu(&boot_cpu_data); |
| 815 | init_c1e_mask(); | ||
| 769 | #ifdef CONFIG_X86_32 | 816 | #ifdef CONFIG_X86_32 |
| 770 | sysenter_setup(); | 817 | sysenter_setup(); |
| 771 | enable_sep_cpu(); | 818 | enable_sep_cpu(); |
| @@ -877,54 +924,22 @@ static __init int setup_disablecpuid(char *arg) | |||
| 877 | __setup("clearcpuid=", setup_disablecpuid); | 924 | __setup("clearcpuid=", setup_disablecpuid); |
| 878 | 925 | ||
| 879 | #ifdef CONFIG_X86_64 | 926 | #ifdef CONFIG_X86_64 |
| 880 | struct x8664_pda **_cpu_pda __read_mostly; | ||
| 881 | EXPORT_SYMBOL(_cpu_pda); | ||
| 882 | |||
| 883 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 927 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
| 884 | 928 | ||
| 885 | static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | 929 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
| 930 | irq_stack_union) __aligned(PAGE_SIZE); | ||
| 931 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | ||
| 932 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | ||
| 886 | 933 | ||
| 887 | void __cpuinit pda_init(int cpu) | 934 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
| 888 | { | 935 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; |
| 889 | struct x8664_pda *pda = cpu_pda(cpu); | 936 | EXPORT_PER_CPU_SYMBOL(kernel_stack); |
| 890 | 937 | ||
| 891 | /* Setup up data that may be needed in __get_free_pages early */ | 938 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
| 892 | loadsegment(fs, 0); | ||
| 893 | loadsegment(gs, 0); | ||
| 894 | /* Memory clobbers used to order PDA accessed */ | ||
| 895 | mb(); | ||
| 896 | wrmsrl(MSR_GS_BASE, pda); | ||
| 897 | mb(); | ||
| 898 | |||
| 899 | pda->cpunumber = cpu; | ||
| 900 | pda->irqcount = -1; | ||
| 901 | pda->kernelstack = (unsigned long)stack_thread_info() - | ||
| 902 | PDA_STACKOFFSET + THREAD_SIZE; | ||
| 903 | pda->active_mm = &init_mm; | ||
| 904 | pda->mmu_state = 0; | ||
| 905 | |||
| 906 | if (cpu == 0) { | ||
| 907 | /* others are initialized in smpboot.c */ | ||
| 908 | pda->pcurrent = &init_task; | ||
| 909 | pda->irqstackptr = boot_cpu_stack; | ||
| 910 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 911 | } else { | ||
| 912 | if (!pda->irqstackptr) { | ||
| 913 | pda->irqstackptr = (char *) | ||
| 914 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | ||
| 915 | if (!pda->irqstackptr) | ||
| 916 | panic("cannot allocate irqstack for cpu %d", | ||
| 917 | cpu); | ||
| 918 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 919 | } | ||
| 920 | |||
| 921 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | ||
| 922 | pda->nodenumber = cpu_to_node(cpu); | ||
| 923 | } | ||
| 924 | } | ||
| 925 | 939 | ||
| 926 | static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | 940 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
| 927 | DEBUG_STKSZ] __page_aligned_bss; | 941 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) |
| 942 | __aligned(PAGE_SIZE); | ||
| 928 | 943 | ||
| 929 | extern asmlinkage void ignore_sysret(void); | 944 | extern asmlinkage void ignore_sysret(void); |
| 930 | 945 | ||
| @@ -957,16 +972,21 @@ unsigned long kernel_eflags; | |||
| 957 | */ | 972 | */ |
| 958 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 973 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
| 959 | 974 | ||
| 960 | #else | 975 | #else /* x86_64 */ |
| 976 | |||
| 977 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
| 978 | DEFINE_PER_CPU(unsigned long, stack_canary); | ||
| 979 | #endif | ||
| 961 | 980 | ||
| 962 | /* Make sure %fs is initialized properly in idle threads */ | 981 | /* Make sure %fs and %gs are initialized properly in idle threads */ |
| 963 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | 982 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) |
| 964 | { | 983 | { |
| 965 | memset(regs, 0, sizeof(struct pt_regs)); | 984 | memset(regs, 0, sizeof(struct pt_regs)); |
| 966 | regs->fs = __KERNEL_PERCPU; | 985 | regs->fs = __KERNEL_PERCPU; |
| 986 | regs->gs = __KERNEL_STACK_CANARY; | ||
| 967 | return regs; | 987 | return regs; |
| 968 | } | 988 | } |
| 969 | #endif | 989 | #endif /* x86_64 */ |
| 970 | 990 | ||
| 971 | /* | 991 | /* |
| 972 | * cpu_init() initializes state that is per-CPU. Some data is already | 992 | * cpu_init() initializes state that is per-CPU. Some data is already |
| @@ -982,15 +1002,14 @@ void __cpuinit cpu_init(void) | |||
| 982 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 1002 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
| 983 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | 1003 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); |
| 984 | unsigned long v; | 1004 | unsigned long v; |
| 985 | char *estacks = NULL; | ||
| 986 | struct task_struct *me; | 1005 | struct task_struct *me; |
| 987 | int i; | 1006 | int i; |
| 988 | 1007 | ||
| 989 | /* CPU 0 is initialised in head64.c */ | 1008 | #ifdef CONFIG_NUMA |
| 990 | if (cpu != 0) | 1009 | if (cpu != 0 && percpu_read(node_number) == 0 && |
| 991 | pda_init(cpu); | 1010 | cpu_to_node(cpu) != NUMA_NO_NODE) |
| 992 | else | 1011 | percpu_write(node_number, cpu_to_node(cpu)); |
| 993 | estacks = boot_exception_stacks; | 1012 | #endif |
| 994 | 1013 | ||
| 995 | me = current; | 1014 | me = current; |
| 996 | 1015 | ||
| @@ -1006,7 +1025,9 @@ void __cpuinit cpu_init(void) | |||
| 1006 | * and set up the GDT descriptor: | 1025 | * and set up the GDT descriptor: |
| 1007 | */ | 1026 | */ |
| 1008 | 1027 | ||
| 1009 | switch_to_new_gdt(); | 1028 | switch_to_new_gdt(cpu); |
| 1029 | loadsegment(fs, 0); | ||
| 1030 | |||
| 1010 | load_idt((const struct desc_ptr *)&idt_descr); | 1031 | load_idt((const struct desc_ptr *)&idt_descr); |
| 1011 | 1032 | ||
| 1012 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | 1033 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); |
| @@ -1017,25 +1038,20 @@ void __cpuinit cpu_init(void) | |||
| 1017 | barrier(); | 1038 | barrier(); |
| 1018 | 1039 | ||
| 1019 | check_efer(); | 1040 | check_efer(); |
| 1020 | if (cpu != 0 && x2apic) | 1041 | if (cpu != 0) |
| 1021 | enable_x2apic(); | 1042 | enable_x2apic(); |
| 1022 | 1043 | ||
| 1023 | /* | 1044 | /* |
| 1024 | * set up and load the per-CPU TSS | 1045 | * set up and load the per-CPU TSS |
| 1025 | */ | 1046 | */ |
| 1026 | if (!orig_ist->ist[0]) { | 1047 | if (!orig_ist->ist[0]) { |
| 1027 | static const unsigned int order[N_EXCEPTION_STACKS] = { | 1048 | static const unsigned int sizes[N_EXCEPTION_STACKS] = { |
| 1028 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | 1049 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
| 1029 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | 1050 | [DEBUG_STACK - 1] = DEBUG_STKSZ |
| 1030 | }; | 1051 | }; |
| 1052 | char *estacks = per_cpu(exception_stacks, cpu); | ||
| 1031 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 1053 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
| 1032 | if (cpu) { | 1054 | estacks += sizes[v]; |
| 1033 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
| 1034 | if (!estacks) | ||
| 1035 | panic("Cannot allocate exception " | ||
| 1036 | "stack %ld %d\n", v, cpu); | ||
| 1037 | } | ||
| 1038 | estacks += PAGE_SIZE << order[v]; | ||
| 1039 | orig_ist->ist[v] = t->x86_tss.ist[v] = | 1055 | orig_ist->ist[v] = t->x86_tss.ist[v] = |
| 1040 | (unsigned long)estacks; | 1056 | (unsigned long)estacks; |
| 1041 | } | 1057 | } |
| @@ -1069,22 +1085,19 @@ void __cpuinit cpu_init(void) | |||
| 1069 | */ | 1085 | */ |
| 1070 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | 1086 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) |
| 1071 | arch_kgdb_ops.correct_hw_break(); | 1087 | arch_kgdb_ops.correct_hw_break(); |
| 1072 | else { | 1088 | else |
| 1073 | #endif | 1089 | #endif |
| 1074 | /* | 1090 | { |
| 1075 | * Clear all 6 debug registers: | 1091 | /* |
| 1076 | */ | 1092 | * Clear all 6 debug registers: |
| 1077 | 1093 | */ | |
| 1078 | set_debugreg(0UL, 0); | 1094 | set_debugreg(0UL, 0); |
| 1079 | set_debugreg(0UL, 1); | 1095 | set_debugreg(0UL, 1); |
| 1080 | set_debugreg(0UL, 2); | 1096 | set_debugreg(0UL, 2); |
| 1081 | set_debugreg(0UL, 3); | 1097 | set_debugreg(0UL, 3); |
| 1082 | set_debugreg(0UL, 6); | 1098 | set_debugreg(0UL, 6); |
| 1083 | set_debugreg(0UL, 7); | 1099 | set_debugreg(0UL, 7); |
| 1084 | #ifdef CONFIG_KGDB | ||
| 1085 | /* If the kgdb is connected no debug regs should be altered. */ | ||
| 1086 | } | 1100 | } |
| 1087 | #endif | ||
| 1088 | 1101 | ||
| 1089 | fpu_init(); | 1102 | fpu_init(); |
| 1090 | 1103 | ||
| @@ -1114,7 +1127,7 @@ void __cpuinit cpu_init(void) | |||
| 1114 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1127 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
| 1115 | 1128 | ||
| 1116 | load_idt(&idt_descr); | 1129 | load_idt(&idt_descr); |
| 1117 | switch_to_new_gdt(); | 1130 | switch_to_new_gdt(cpu); |
| 1118 | 1131 | ||
| 1119 | /* | 1132 | /* |
| 1120 | * Set up and load the per-CPU TSS and LDT | 1133 | * Set up and load the per-CPU TSS and LDT |
| @@ -1135,9 +1148,6 @@ void __cpuinit cpu_init(void) | |||
| 1135 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | 1148 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
| 1136 | #endif | 1149 | #endif |
| 1137 | 1150 | ||
| 1138 | /* Clear %gs. */ | ||
| 1139 | asm volatile ("mov %0, %%gs" : : "r" (0)); | ||
| 1140 | |||
| 1141 | /* Clear all 6 debug registers: */ | 1151 | /* Clear all 6 debug registers: */ |
| 1142 | set_debugreg(0, 0); | 1152 | set_debugreg(0, 0); |
| 1143 | set_debugreg(0, 1); | 1153 | set_debugreg(0, 1); |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4b1c319d30c3..22590cf688ae 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
| 601 | if (!data) | 601 | if (!data) |
| 602 | return -ENOMEM; | 602 | return -ENOMEM; |
| 603 | 603 | ||
| 604 | data->acpi_data = percpu_ptr(acpi_perf_data, cpu); | 604 | data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); |
| 605 | per_cpu(drv_data, cpu) = data; | 605 | per_cpu(drv_data, cpu) = data; |
| 606 | 606 | ||
| 607 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) | 607 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) |
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c2f930d86640..41ab3f064cb1 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c | |||
| @@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy) | |||
| 204 | } | 204 | } |
| 205 | /* Enable Enhanced PowerSaver */ | 205 | /* Enable Enhanced PowerSaver */ |
| 206 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | 206 | rdmsrl(MSR_IA32_MISC_ENABLE, val); |
| 207 | if (!(val & 1 << 16)) { | 207 | if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { |
| 208 | val |= 1 << 16; | 208 | val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; |
| 209 | wrmsrl(MSR_IA32_MISC_ENABLE, val); | 209 | wrmsrl(MSR_IA32_MISC_ENABLE, val); |
| 210 | /* Can be locked at 0 */ | 210 | /* Can be locked at 0 */ |
| 211 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | 211 | rdmsrl(MSR_IA32_MISC_ENABLE, val); |
| 212 | if (!(val & 1 << 16)) { | 212 | if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { |
| 213 | printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); | 213 | printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); |
| 214 | return -ENODEV; | 214 | return -ENODEV; |
| 215 | } | 215 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 3178c3acd97e..d8341d17c189 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
| @@ -203,7 +203,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) | |||
| 203 | unsigned int i; | 203 | unsigned int i; |
| 204 | 204 | ||
| 205 | #ifdef CONFIG_SMP | 205 | #ifdef CONFIG_SMP |
| 206 | cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); | 206 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); |
| 207 | #endif | 207 | #endif |
| 208 | 208 | ||
| 209 | /* Errata workaround */ | 209 | /* Errata workaround */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6428aa17b40e..e8fd76f98883 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
| @@ -56,7 +56,10 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
| 56 | static int cpu_family = CPU_OPTERON; | 56 | static int cpu_family = CPU_OPTERON; |
| 57 | 57 | ||
| 58 | #ifndef CONFIG_SMP | 58 | #ifndef CONFIG_SMP |
| 59 | DEFINE_PER_CPU(cpumask_t, cpu_core_map); | 59 | static inline const struct cpumask *cpu_core_mask(int cpu) |
| 60 | { | ||
| 61 | return cpumask_of(0); | ||
| 62 | } | ||
| 60 | #endif | 63 | #endif |
| 61 | 64 | ||
| 62 | /* Return a frequency in MHz, given an input fid */ | 65 | /* Return a frequency in MHz, given an input fid */ |
| @@ -654,7 +657,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, | |||
| 654 | 657 | ||
| 655 | dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); | 658 | dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); |
| 656 | data->powernow_table = powernow_table; | 659 | data->powernow_table = powernow_table; |
| 657 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) | 660 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) |
| 658 | print_basics(data); | 661 | print_basics(data); |
| 659 | 662 | ||
| 660 | for (j = 0; j < data->numps; j++) | 663 | for (j = 0; j < data->numps; j++) |
| @@ -808,7 +811,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
| 808 | 811 | ||
| 809 | /* fill in data */ | 812 | /* fill in data */ |
| 810 | data->numps = data->acpi_data.state_count; | 813 | data->numps = data->acpi_data.state_count; |
| 811 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) | 814 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) |
| 812 | print_basics(data); | 815 | print_basics(data); |
| 813 | powernow_k8_acpi_pst_values(data, 0); | 816 | powernow_k8_acpi_pst_values(data, 0); |
| 814 | 817 | ||
| @@ -1224,7 +1227,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1224 | if (cpu_family == CPU_HW_PSTATE) | 1227 | if (cpu_family == CPU_HW_PSTATE) |
| 1225 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); | 1228 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); |
| 1226 | else | 1229 | else |
| 1227 | cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); | 1230 | cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); |
| 1228 | data->available_cores = pol->cpus; | 1231 | data->available_cores = pol->cpus; |
| 1229 | 1232 | ||
| 1230 | if (cpu_family == CPU_HW_PSTATE) | 1233 | if (cpu_family == CPU_HW_PSTATE) |
| @@ -1286,7 +1289,7 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
| 1286 | unsigned int khz = 0; | 1289 | unsigned int khz = 0; |
| 1287 | unsigned int first; | 1290 | unsigned int first; |
| 1288 | 1291 | ||
| 1289 | first = first_cpu(per_cpu(cpu_core_map, cpu)); | 1292 | first = cpumask_first(cpu_core_mask(cpu)); |
| 1290 | data = per_cpu(powernow_data, first); | 1293 | data = per_cpu(powernow_data, first); |
| 1291 | 1294 | ||
| 1292 | if (!data) | 1295 | if (!data) |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index f08998278a3a..c9f1fdc02830 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
| @@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
| 390 | enable it if not. */ | 390 | enable it if not. */ |
| 391 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 391 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 392 | 392 | ||
| 393 | if (!(l & (1<<16))) { | 393 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { |
| 394 | l |= (1<<16); | 394 | l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; |
| 395 | dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); | 395 | dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); |
| 396 | wrmsr(MSR_IA32_MISC_ENABLE, l, h); | 396 | wrmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 397 | 397 | ||
| 398 | /* check to see if it stuck */ | 398 | /* check to see if it stuck */ |
| 399 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 399 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 400 | if (!(l & (1<<16))) { | 400 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { |
| 401 | printk(KERN_INFO PFX | 401 | printk(KERN_INFO PFX |
| 402 | "couldn't enable Enhanced SpeedStep\n"); | 402 | "couldn't enable Enhanced SpeedStep\n"); |
| 403 | return -ENODEV; | 403 | return -ENODEV; |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index dedc1e98f168..1f0ec83d343b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
| @@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) | |||
| 322 | 322 | ||
| 323 | /* only run on CPU to be set, or on its sibling */ | 323 | /* only run on CPU to be set, or on its sibling */ |
| 324 | #ifdef CONFIG_SMP | 324 | #ifdef CONFIG_SMP |
| 325 | cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); | 325 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); |
| 326 | #endif | 326 | #endif |
| 327 | 327 | ||
| 328 | cpus_allowed = current->cpus_allowed; | 328 | cpus_allowed = current->cpus_allowed; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 24ff26a38ade..191117f1ad51 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
| 14 | #include <asm/ds.h> | 14 | #include <asm/ds.h> |
| 15 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
| 16 | #include <asm/cpu.h> | ||
| 16 | 17 | ||
| 17 | #ifdef CONFIG_X86_64 | 18 | #ifdef CONFIG_X86_64 |
| 18 | #include <asm/topology.h> | 19 | #include <asm/topology.h> |
| @@ -24,7 +25,6 @@ | |||
| 24 | #ifdef CONFIG_X86_LOCAL_APIC | 25 | #ifdef CONFIG_X86_LOCAL_APIC |
| 25 | #include <asm/mpspec.h> | 26 | #include <asm/mpspec.h> |
| 26 | #include <asm/apic.h> | 27 | #include <asm/apic.h> |
| 27 | #include <mach_apic.h> | ||
| 28 | #endif | 28 | #endif |
| 29 | 29 | ||
| 30 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | 30 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) |
| @@ -63,6 +63,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
| 63 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 63 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | /* | ||
| 67 | * There is a known erratum on Pentium III and Core Solo | ||
| 68 | * and Core Duo CPUs. | ||
| 69 | * " Page with PAT set to WC while associated MTRR is UC | ||
| 70 | * may consolidate to UC " | ||
| 71 | * Because of this erratum, it is better to stick with | ||
| 72 | * setting WC in MTRR rather than using PAT on these CPUs. | ||
| 73 | * | ||
| 74 | * Enable PAT WC only on P4, Core 2 or later CPUs. | ||
| 75 | */ | ||
| 76 | if (c->x86 == 6 && c->x86_model < 15) | ||
| 77 | clear_cpu_cap(c, X86_FEATURE_PAT); | ||
| 66 | } | 78 | } |
| 67 | 79 | ||
| 68 | #ifdef CONFIG_X86_32 | 80 | #ifdef CONFIG_X86_32 |
| @@ -99,6 +111,28 @@ static void __cpuinit trap_init_f00f_bug(void) | |||
| 99 | } | 111 | } |
| 100 | #endif | 112 | #endif |
| 101 | 113 | ||
| 114 | static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) | ||
| 115 | { | ||
| 116 | #ifdef CONFIG_SMP | ||
| 117 | /* calling is from identify_secondary_cpu() ? */ | ||
| 118 | if (c->cpu_index == boot_cpu_id) | ||
| 119 | return; | ||
| 120 | |||
| 121 | /* | ||
| 122 | * Mask B, Pentium, but not Pentium MMX | ||
| 123 | */ | ||
| 124 | if (c->x86 == 5 && | ||
| 125 | c->x86_mask >= 1 && c->x86_mask <= 4 && | ||
| 126 | c->x86_model <= 3) { | ||
| 127 | /* | ||
| 128 | * Remember we have B step Pentia with bugs | ||
| 129 | */ | ||
| 130 | WARN_ONCE(1, "WARNING: SMP operation may be unreliable" | ||
| 131 | "with B stepping processors.\n"); | ||
| 132 | } | ||
| 133 | #endif | ||
| 134 | } | ||
| 135 | |||
| 102 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | 136 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) |
| 103 | { | 137 | { |
| 104 | unsigned long lo, hi; | 138 | unsigned long lo, hi; |
| @@ -135,10 +169,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
| 135 | */ | 169 | */ |
| 136 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { | 170 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { |
| 137 | rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); | 171 | rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); |
| 138 | if ((lo & (1<<9)) == 0) { | 172 | if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) { |
| 139 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); | 173 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); |
| 140 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); | 174 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); |
| 141 | lo |= (1<<9); /* Disable hw prefetching */ | 175 | lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; |
| 142 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); | 176 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); |
| 143 | } | 177 | } |
| 144 | } | 178 | } |
| @@ -175,6 +209,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
| 175 | #ifdef CONFIG_X86_NUMAQ | 209 | #ifdef CONFIG_X86_NUMAQ |
| 176 | numaq_tsc_disable(); | 210 | numaq_tsc_disable(); |
| 177 | #endif | 211 | #endif |
| 212 | |||
| 213 | intel_smp_check(c); | ||
| 178 | } | 214 | } |
| 179 | #else | 215 | #else |
| 180 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | 216 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index da299eb85fc0..8e6ce2c146d6 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -147,10 +147,19 @@ struct _cpuid4_info { | |||
| 147 | union _cpuid4_leaf_ecx ecx; | 147 | union _cpuid4_leaf_ecx ecx; |
| 148 | unsigned long size; | 148 | unsigned long size; |
| 149 | unsigned long can_disable; | 149 | unsigned long can_disable; |
| 150 | cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ | 150 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
| 151 | }; | 151 | }; |
| 152 | 152 | ||
| 153 | #ifdef CONFIG_PCI | 153 | /* subset of above _cpuid4_info w/o shared_cpu_map */ |
| 154 | struct _cpuid4_info_regs { | ||
| 155 | union _cpuid4_leaf_eax eax; | ||
| 156 | union _cpuid4_leaf_ebx ebx; | ||
| 157 | union _cpuid4_leaf_ecx ecx; | ||
| 158 | unsigned long size; | ||
| 159 | unsigned long can_disable; | ||
| 160 | }; | ||
| 161 | |||
| 162 | #if defined(CONFIG_PCI) && defined(CONFIG_SYSFS) | ||
| 154 | static struct pci_device_id k8_nb_id[] = { | 163 | static struct pci_device_id k8_nb_id[] = { |
| 155 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | 164 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, |
| 156 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | 165 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, |
| @@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
| 278 | } | 287 | } |
| 279 | 288 | ||
| 280 | static void __cpuinit | 289 | static void __cpuinit |
| 281 | amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) | 290 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
| 282 | { | 291 | { |
| 283 | if (index < 3) | 292 | if (index < 3) |
| 284 | return; | 293 | return; |
| @@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) | |||
| 286 | } | 295 | } |
| 287 | 296 | ||
| 288 | static int | 297 | static int |
| 289 | __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | 298 | __cpuinit cpuid4_cache_lookup_regs(int index, |
| 299 | struct _cpuid4_info_regs *this_leaf) | ||
| 290 | { | 300 | { |
| 291 | union _cpuid4_leaf_eax eax; | 301 | union _cpuid4_leaf_eax eax; |
| 292 | union _cpuid4_leaf_ebx ebx; | 302 | union _cpuid4_leaf_ebx ebx; |
| @@ -353,11 +363,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
| 353 | * parameters cpuid leaf to find the cache details | 363 | * parameters cpuid leaf to find the cache details |
| 354 | */ | 364 | */ |
| 355 | for (i = 0; i < num_cache_leaves; i++) { | 365 | for (i = 0; i < num_cache_leaves; i++) { |
| 356 | struct _cpuid4_info this_leaf; | 366 | struct _cpuid4_info_regs this_leaf; |
| 357 | |||
| 358 | int retval; | 367 | int retval; |
| 359 | 368 | ||
| 360 | retval = cpuid4_cache_lookup(i, &this_leaf); | 369 | retval = cpuid4_cache_lookup_regs(i, &this_leaf); |
| 361 | if (retval >= 0) { | 370 | if (retval >= 0) { |
| 362 | switch(this_leaf.eax.split.level) { | 371 | switch(this_leaf.eax.split.level) { |
| 363 | case 1: | 372 | case 1: |
| @@ -490,6 +499,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
| 490 | return l2; | 499 | return l2; |
| 491 | } | 500 | } |
| 492 | 501 | ||
| 502 | #ifdef CONFIG_SYSFS | ||
| 503 | |||
| 493 | /* pointer to _cpuid4_info array (for each cache leaf) */ | 504 | /* pointer to _cpuid4_info array (for each cache leaf) */ |
| 494 | static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); | 505 | static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); |
| 495 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) | 506 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) |
| @@ -506,17 +517,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
| 506 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; | 517 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; |
| 507 | 518 | ||
| 508 | if (num_threads_sharing == 1) | 519 | if (num_threads_sharing == 1) |
| 509 | cpu_set(cpu, this_leaf->shared_cpu_map); | 520 | cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); |
| 510 | else { | 521 | else { |
| 511 | index_msb = get_count_order(num_threads_sharing); | 522 | index_msb = get_count_order(num_threads_sharing); |
| 512 | 523 | ||
| 513 | for_each_online_cpu(i) { | 524 | for_each_online_cpu(i) { |
| 514 | if (cpu_data(i).apicid >> index_msb == | 525 | if (cpu_data(i).apicid >> index_msb == |
| 515 | c->apicid >> index_msb) { | 526 | c->apicid >> index_msb) { |
| 516 | cpu_set(i, this_leaf->shared_cpu_map); | 527 | cpumask_set_cpu(i, |
| 528 | to_cpumask(this_leaf->shared_cpu_map)); | ||
| 517 | if (i != cpu && per_cpu(cpuid4_info, i)) { | 529 | if (i != cpu && per_cpu(cpuid4_info, i)) { |
| 518 | sibling_leaf = CPUID4_INFO_IDX(i, index); | 530 | sibling_leaf = |
| 519 | cpu_set(cpu, sibling_leaf->shared_cpu_map); | 531 | CPUID4_INFO_IDX(i, index); |
| 532 | cpumask_set_cpu(cpu, to_cpumask( | ||
| 533 | sibling_leaf->shared_cpu_map)); | ||
| 520 | } | 534 | } |
| 521 | } | 535 | } |
| 522 | } | 536 | } |
| @@ -528,9 +542,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
| 528 | int sibling; | 542 | int sibling; |
| 529 | 543 | ||
| 530 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 544 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
| 531 | for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { | 545 | for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { |
| 532 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); | 546 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); |
| 533 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); | 547 | cpumask_clear_cpu(cpu, |
| 548 | to_cpumask(sibling_leaf->shared_cpu_map)); | ||
| 534 | } | 549 | } |
| 535 | } | 550 | } |
| 536 | #else | 551 | #else |
| @@ -549,6 +564,15 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
| 549 | per_cpu(cpuid4_info, cpu) = NULL; | 564 | per_cpu(cpuid4_info, cpu) = NULL; |
| 550 | } | 565 | } |
| 551 | 566 | ||
| 567 | static int | ||
| 568 | __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | ||
| 569 | { | ||
| 570 | struct _cpuid4_info_regs *leaf_regs = | ||
| 571 | (struct _cpuid4_info_regs *)this_leaf; | ||
| 572 | |||
| 573 | return cpuid4_cache_lookup_regs(index, leaf_regs); | ||
| 574 | } | ||
| 575 | |||
| 552 | static void __cpuinit get_cpu_leaves(void *_retval) | 576 | static void __cpuinit get_cpu_leaves(void *_retval) |
| 553 | { | 577 | { |
| 554 | int j, *retval = _retval, cpu = smp_processor_id(); | 578 | int j, *retval = _retval, cpu = smp_processor_id(); |
| @@ -590,8 +614,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
| 590 | return retval; | 614 | return retval; |
| 591 | } | 615 | } |
| 592 | 616 | ||
| 593 | #ifdef CONFIG_SYSFS | ||
| 594 | |||
| 595 | #include <linux/kobject.h> | 617 | #include <linux/kobject.h> |
| 596 | #include <linux/sysfs.h> | 618 | #include <linux/sysfs.h> |
| 597 | 619 | ||
| @@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | |||
| 635 | int n = 0; | 657 | int n = 0; |
| 636 | 658 | ||
| 637 | if (len > 1) { | 659 | if (len > 1) { |
| 638 | cpumask_t *mask = &this_leaf->shared_cpu_map; | 660 | const struct cpumask *mask; |
| 639 | 661 | ||
| 662 | mask = to_cpumask(this_leaf->shared_cpu_map); | ||
| 640 | n = type? | 663 | n = type? |
| 641 | cpulist_scnprintf(buf, len-2, mask) : | 664 | cpulist_scnprintf(buf, len-2, mask) : |
| 642 | cpumask_scnprintf(buf, len-2, mask); | 665 | cpumask_scnprintf(buf, len-2, mask); |
| @@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node) | |||
| 699 | 722 | ||
| 700 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) | 723 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) |
| 701 | { | 724 | { |
| 702 | int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); | 725 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); |
| 726 | int node = cpu_to_node(cpumask_first(mask)); | ||
| 703 | struct pci_dev *dev = NULL; | 727 | struct pci_dev *dev = NULL; |
| 704 | ssize_t ret = 0; | 728 | ssize_t ret = 0; |
| 705 | int i; | 729 | int i; |
| @@ -733,7 +757,8 @@ static ssize_t | |||
| 733 | store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, | 757 | store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, |
| 734 | size_t count) | 758 | size_t count) |
| 735 | { | 759 | { |
| 736 | int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); | 760 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); |
| 761 | int node = cpu_to_node(cpumask_first(mask)); | ||
| 737 | struct pci_dev *dev = NULL; | 762 | struct pci_dev *dev = NULL; |
| 738 | unsigned int ret, index, val; | 763 | unsigned int ret, index, val; |
| 739 | 764 | ||
| @@ -878,7 +903,7 @@ err_out: | |||
| 878 | return -ENOMEM; | 903 | return -ENOMEM; |
| 879 | } | 904 | } |
| 880 | 905 | ||
| 881 | static cpumask_t cache_dev_map = CPU_MASK_NONE; | 906 | static DECLARE_BITMAP(cache_dev_map, NR_CPUS); |
| 882 | 907 | ||
| 883 | /* Add/Remove cache interface for CPU device */ | 908 | /* Add/Remove cache interface for CPU device */ |
| 884 | static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | 909 | static int __cpuinit cache_add_dev(struct sys_device * sys_dev) |
| @@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
| 918 | } | 943 | } |
| 919 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); | 944 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); |
| 920 | } | 945 | } |
| 921 | cpu_set(cpu, cache_dev_map); | 946 | cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); |
| 922 | 947 | ||
| 923 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); | 948 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); |
| 924 | return 0; | 949 | return 0; |
| @@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | |||
| 931 | 956 | ||
| 932 | if (per_cpu(cpuid4_info, cpu) == NULL) | 957 | if (per_cpu(cpuid4_info, cpu) == NULL) |
| 933 | return; | 958 | return; |
| 934 | if (!cpu_isset(cpu, cache_dev_map)) | 959 | if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) |
| 935 | return; | 960 | return; |
| 936 | cpu_clear(cpu, cache_dev_map); | 961 | cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); |
| 937 | 962 | ||
| 938 | for (i = 0; i < num_cache_leaves; i++) | 963 | for (i = 0; i < num_cache_leaves; i++) |
| 939 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); | 964 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index d7d2323bbb69..b2f89829bbe8 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
| @@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o | |||
| 4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o | 4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o |
| 5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o | 5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o |
| 6 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 6 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
| 7 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index dfaebce3633e..3552119b091d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c | |||
| @@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c) | |||
| 60 | } | 60 | } |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | static unsigned long old_cr4 __initdata; | ||
| 64 | |||
| 65 | void __init stop_mce(void) | ||
| 66 | { | ||
| 67 | old_cr4 = read_cr4(); | ||
| 68 | clear_in_cr4(X86_CR4_MCE); | ||
| 69 | } | ||
| 70 | |||
| 71 | void __init restart_mce(void) | ||
| 72 | { | ||
| 73 | if (old_cr4 & X86_CR4_MCE) | ||
| 74 | set_in_cr4(X86_CR4_MCE); | ||
| 75 | } | ||
| 76 | |||
| 77 | static int __init mcheck_disable(char *str) | 63 | static int __init mcheck_disable(char *str) |
| 78 | { | 64 | { |
| 79 | mce_disabled = 1; | 65 | mce_disabled = 1; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index fe79985ce0f2..863f89568b1a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | 3 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. |
| 4 | * Rest from unknown author(s). | 4 | * Rest from unknown author(s). |
| 5 | * 2004 Andi Kleen. Rewrote most of it. | 5 | * 2004 Andi Kleen. Rewrote most of it. |
| 6 | * Copyright 2008 Intel Corporation | ||
| 7 | * Author: Andi Kleen | ||
| 6 | */ | 8 | */ |
| 7 | 9 | ||
| 8 | #include <linux/init.h> | 10 | #include <linux/init.h> |
| @@ -24,6 +26,9 @@ | |||
| 24 | #include <linux/ctype.h> | 26 | #include <linux/ctype.h> |
| 25 | #include <linux/kmod.h> | 27 | #include <linux/kmod.h> |
| 26 | #include <linux/kdebug.h> | 28 | #include <linux/kdebug.h> |
| 29 | #include <linux/kobject.h> | ||
| 30 | #include <linux/sysfs.h> | ||
| 31 | #include <linux/ratelimit.h> | ||
| 27 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
| 28 | #include <asm/msr.h> | 33 | #include <asm/msr.h> |
| 29 | #include <asm/mce.h> | 34 | #include <asm/mce.h> |
| @@ -32,7 +37,6 @@ | |||
| 32 | #include <asm/idle.h> | 37 | #include <asm/idle.h> |
| 33 | 38 | ||
| 34 | #define MISC_MCELOG_MINOR 227 | 39 | #define MISC_MCELOG_MINOR 227 |
| 35 | #define NR_SYSFS_BANKS 6 | ||
| 36 | 40 | ||
| 37 | atomic_t mce_entry; | 41 | atomic_t mce_entry; |
| 38 | 42 | ||
| @@ -47,7 +51,7 @@ static int mce_dont_init; | |||
| 47 | */ | 51 | */ |
| 48 | static int tolerant = 1; | 52 | static int tolerant = 1; |
| 49 | static int banks; | 53 | static int banks; |
| 50 | static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; | 54 | static u64 *bank; |
| 51 | static unsigned long notify_user; | 55 | static unsigned long notify_user; |
| 52 | static int rip_msr; | 56 | static int rip_msr; |
| 53 | static int mce_bootlog = -1; | 57 | static int mce_bootlog = -1; |
| @@ -58,6 +62,19 @@ static char *trigger_argv[2] = { trigger, NULL }; | |||
| 58 | 62 | ||
| 59 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | 63 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); |
| 60 | 64 | ||
| 65 | /* MCA banks polled by the period polling timer for corrected events */ | ||
| 66 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | ||
| 67 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | ||
| 68 | }; | ||
| 69 | |||
| 70 | /* Do initial initialization of a struct mce */ | ||
| 71 | void mce_setup(struct mce *m) | ||
| 72 | { | ||
| 73 | memset(m, 0, sizeof(struct mce)); | ||
| 74 | m->cpu = smp_processor_id(); | ||
| 75 | rdtscll(m->tsc); | ||
| 76 | } | ||
| 77 | |||
| 61 | /* | 78 | /* |
| 62 | * Lockless MCE logging infrastructure. | 79 | * Lockless MCE logging infrastructure. |
| 63 | * This avoids deadlocks on printk locks without having to break locks. Also | 80 | * This avoids deadlocks on printk locks without having to break locks. Also |
| @@ -119,11 +136,11 @@ static void print_mce(struct mce *m) | |||
| 119 | print_symbol("{%s}", m->ip); | 136 | print_symbol("{%s}", m->ip); |
| 120 | printk("\n"); | 137 | printk("\n"); |
| 121 | } | 138 | } |
| 122 | printk(KERN_EMERG "TSC %Lx ", m->tsc); | 139 | printk(KERN_EMERG "TSC %llx ", m->tsc); |
| 123 | if (m->addr) | 140 | if (m->addr) |
| 124 | printk("ADDR %Lx ", m->addr); | 141 | printk("ADDR %llx ", m->addr); |
| 125 | if (m->misc) | 142 | if (m->misc) |
| 126 | printk("MISC %Lx ", m->misc); | 143 | printk("MISC %llx ", m->misc); |
| 127 | printk("\n"); | 144 | printk("\n"); |
| 128 | printk(KERN_EMERG "This is not a software problem!\n"); | 145 | printk(KERN_EMERG "This is not a software problem!\n"); |
| 129 | printk(KERN_EMERG "Run through mcelog --ascii to decode " | 146 | printk(KERN_EMERG "Run through mcelog --ascii to decode " |
| @@ -149,8 +166,10 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start) | |||
| 149 | panic(msg); | 166 | panic(msg); |
| 150 | } | 167 | } |
| 151 | 168 | ||
| 152 | static int mce_available(struct cpuinfo_x86 *c) | 169 | int mce_available(struct cpuinfo_x86 *c) |
| 153 | { | 170 | { |
| 171 | if (mce_dont_init) | ||
| 172 | return 0; | ||
| 154 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | 173 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); |
| 155 | } | 174 | } |
| 156 | 175 | ||
| @@ -172,7 +191,77 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | |||
| 172 | } | 191 | } |
| 173 | 192 | ||
| 174 | /* | 193 | /* |
| 175 | * The actual machine check handler | 194 | * Poll for corrected events or events that happened before reset. |
| 195 | * Those are just logged through /dev/mcelog. | ||
| 196 | * | ||
| 197 | * This is executed in standard interrupt context. | ||
| 198 | */ | ||
| 199 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | ||
| 200 | { | ||
| 201 | struct mce m; | ||
| 202 | int i; | ||
| 203 | |||
| 204 | mce_setup(&m); | ||
| 205 | |||
| 206 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | ||
| 207 | for (i = 0; i < banks; i++) { | ||
| 208 | if (!bank[i] || !test_bit(i, *b)) | ||
| 209 | continue; | ||
| 210 | |||
| 211 | m.misc = 0; | ||
| 212 | m.addr = 0; | ||
| 213 | m.bank = i; | ||
| 214 | m.tsc = 0; | ||
| 215 | |||
| 216 | barrier(); | ||
| 217 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | ||
| 218 | if (!(m.status & MCI_STATUS_VAL)) | ||
| 219 | continue; | ||
| 220 | |||
| 221 | /* | ||
| 222 | * Uncorrected events are handled by the exception handler | ||
| 223 | * when it is enabled. But when the exception is disabled log | ||
| 224 | * everything. | ||
| 225 | * | ||
| 226 | * TBD do the same check for MCI_STATUS_EN here? | ||
| 227 | */ | ||
| 228 | if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) | ||
| 229 | continue; | ||
| 230 | |||
| 231 | if (m.status & MCI_STATUS_MISCV) | ||
| 232 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | ||
| 233 | if (m.status & MCI_STATUS_ADDRV) | ||
| 234 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | ||
| 235 | |||
| 236 | if (!(flags & MCP_TIMESTAMP)) | ||
| 237 | m.tsc = 0; | ||
| 238 | /* | ||
| 239 | * Don't get the IP here because it's unlikely to | ||
| 240 | * have anything to do with the actual error location. | ||
| 241 | */ | ||
| 242 | |||
| 243 | mce_log(&m); | ||
| 244 | add_taint(TAINT_MACHINE_CHECK); | ||
| 245 | |||
| 246 | /* | ||
| 247 | * Clear state for this bank. | ||
| 248 | */ | ||
| 249 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 250 | } | ||
| 251 | |||
| 252 | /* | ||
| 253 | * Don't clear MCG_STATUS here because it's only defined for | ||
| 254 | * exceptions. | ||
| 255 | */ | ||
| 256 | } | ||
| 257 | |||
| 258 | /* | ||
| 259 | * The actual machine check handler. This only handles real | ||
| 260 | * exceptions when something got corrupted coming in through int 18. | ||
| 261 | * | ||
| 262 | * This is executed in NMI context not subject to normal locking rules. This | ||
| 263 | * implies that most kernel services cannot be safely used. Don't even | ||
| 264 | * think about putting a printk in there! | ||
| 176 | */ | 265 | */ |
| 177 | void do_machine_check(struct pt_regs * regs, long error_code) | 266 | void do_machine_check(struct pt_regs * regs, long error_code) |
| 178 | { | 267 | { |
| @@ -190,17 +279,18 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 190 | * error. | 279 | * error. |
| 191 | */ | 280 | */ |
| 192 | int kill_it = 0; | 281 | int kill_it = 0; |
| 282 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | ||
| 193 | 283 | ||
| 194 | atomic_inc(&mce_entry); | 284 | atomic_inc(&mce_entry); |
| 195 | 285 | ||
| 196 | if ((regs | 286 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
| 197 | && notify_die(DIE_NMI, "machine check", regs, error_code, | ||
| 198 | 18, SIGKILL) == NOTIFY_STOP) | 287 | 18, SIGKILL) == NOTIFY_STOP) |
| 199 | || !banks) | 288 | goto out2; |
| 289 | if (!banks) | ||
| 200 | goto out2; | 290 | goto out2; |
| 201 | 291 | ||
| 202 | memset(&m, 0, sizeof(struct mce)); | 292 | mce_setup(&m); |
| 203 | m.cpu = smp_processor_id(); | 293 | |
| 204 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | 294 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); |
| 205 | /* if the restart IP is not valid, we're done for */ | 295 | /* if the restart IP is not valid, we're done for */ |
| 206 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | 296 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
| @@ -210,18 +300,32 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 210 | barrier(); | 300 | barrier(); |
| 211 | 301 | ||
| 212 | for (i = 0; i < banks; i++) { | 302 | for (i = 0; i < banks; i++) { |
| 213 | if (i < NR_SYSFS_BANKS && !bank[i]) | 303 | __clear_bit(i, toclear); |
| 304 | if (!bank[i]) | ||
| 214 | continue; | 305 | continue; |
| 215 | 306 | ||
| 216 | m.misc = 0; | 307 | m.misc = 0; |
| 217 | m.addr = 0; | 308 | m.addr = 0; |
| 218 | m.bank = i; | 309 | m.bank = i; |
| 219 | m.tsc = 0; | ||
| 220 | 310 | ||
| 221 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | 311 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); |
| 222 | if ((m.status & MCI_STATUS_VAL) == 0) | 312 | if ((m.status & MCI_STATUS_VAL) == 0) |
| 223 | continue; | 313 | continue; |
| 224 | 314 | ||
| 315 | /* | ||
| 316 | * Non uncorrected errors are handled by machine_check_poll | ||
| 317 | * Leave them alone. | ||
| 318 | */ | ||
| 319 | if ((m.status & MCI_STATUS_UC) == 0) | ||
| 320 | continue; | ||
| 321 | |||
| 322 | /* | ||
| 323 | * Set taint even when machine check was not enabled. | ||
| 324 | */ | ||
| 325 | add_taint(TAINT_MACHINE_CHECK); | ||
| 326 | |||
| 327 | __set_bit(i, toclear); | ||
| 328 | |||
| 225 | if (m.status & MCI_STATUS_EN) { | 329 | if (m.status & MCI_STATUS_EN) { |
| 226 | /* if PCC was set, there's no way out */ | 330 | /* if PCC was set, there's no way out */ |
| 227 | no_way_out |= !!(m.status & MCI_STATUS_PCC); | 331 | no_way_out |= !!(m.status & MCI_STATUS_PCC); |
| @@ -235,6 +339,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 235 | no_way_out = 1; | 339 | no_way_out = 1; |
| 236 | kill_it = 1; | 340 | kill_it = 1; |
| 237 | } | 341 | } |
| 342 | } else { | ||
| 343 | /* | ||
| 344 | * Machine check event was not enabled. Clear, but | ||
| 345 | * ignore. | ||
| 346 | */ | ||
| 347 | continue; | ||
| 238 | } | 348 | } |
| 239 | 349 | ||
| 240 | if (m.status & MCI_STATUS_MISCV) | 350 | if (m.status & MCI_STATUS_MISCV) |
| @@ -243,10 +353,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 243 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | 353 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); |
| 244 | 354 | ||
| 245 | mce_get_rip(&m, regs); | 355 | mce_get_rip(&m, regs); |
| 246 | if (error_code >= 0) | 356 | mce_log(&m); |
| 247 | rdtscll(m.tsc); | ||
| 248 | if (error_code != -2) | ||
| 249 | mce_log(&m); | ||
| 250 | 357 | ||
| 251 | /* Did this bank cause the exception? */ | 358 | /* Did this bank cause the exception? */ |
| 252 | /* Assume that the bank with uncorrectable errors did it, | 359 | /* Assume that the bank with uncorrectable errors did it, |
| @@ -255,14 +362,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 255 | panicm = m; | 362 | panicm = m; |
| 256 | panicm_found = 1; | 363 | panicm_found = 1; |
| 257 | } | 364 | } |
| 258 | |||
| 259 | add_taint(TAINT_MACHINE_CHECK); | ||
| 260 | } | 365 | } |
| 261 | 366 | ||
| 262 | /* Never do anything final in the polling timer */ | ||
| 263 | if (!regs) | ||
| 264 | goto out; | ||
| 265 | |||
| 266 | /* If we didn't find an uncorrectable error, pick | 367 | /* If we didn't find an uncorrectable error, pick |
| 267 | the last one (shouldn't happen, just being safe). */ | 368 | the last one (shouldn't happen, just being safe). */ |
| 268 | if (!panicm_found) | 369 | if (!panicm_found) |
| @@ -309,10 +410,11 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 309 | /* notify userspace ASAP */ | 410 | /* notify userspace ASAP */ |
| 310 | set_thread_flag(TIF_MCE_NOTIFY); | 411 | set_thread_flag(TIF_MCE_NOTIFY); |
| 311 | 412 | ||
| 312 | out: | ||
| 313 | /* the last thing we do is clear state */ | 413 | /* the last thing we do is clear state */ |
| 314 | for (i = 0; i < banks; i++) | 414 | for (i = 0; i < banks; i++) { |
| 315 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 415 | if (test_bit(i, toclear)) |
| 416 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 417 | } | ||
| 316 | wrmsrl(MSR_IA32_MCG_STATUS, 0); | 418 | wrmsrl(MSR_IA32_MCG_STATUS, 0); |
| 317 | out2: | 419 | out2: |
| 318 | atomic_dec(&mce_entry); | 420 | atomic_dec(&mce_entry); |
| @@ -332,15 +434,13 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 332 | * and historically has been the register value of the | 434 | * and historically has been the register value of the |
| 333 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | 435 | * MSR_IA32_THERMAL_STATUS (Intel) msr. |
| 334 | */ | 436 | */ |
| 335 | void mce_log_therm_throt_event(unsigned int cpu, __u64 status) | 437 | void mce_log_therm_throt_event(__u64 status) |
| 336 | { | 438 | { |
| 337 | struct mce m; | 439 | struct mce m; |
| 338 | 440 | ||
| 339 | memset(&m, 0, sizeof(m)); | 441 | mce_setup(&m); |
| 340 | m.cpu = cpu; | ||
| 341 | m.bank = MCE_THERMAL_BANK; | 442 | m.bank = MCE_THERMAL_BANK; |
| 342 | m.status = status; | 443 | m.status = status; |
| 343 | rdtscll(m.tsc); | ||
| 344 | mce_log(&m); | 444 | mce_log(&m); |
| 345 | } | 445 | } |
| 346 | #endif /* CONFIG_X86_MCE_INTEL */ | 446 | #endif /* CONFIG_X86_MCE_INTEL */ |
| @@ -353,18 +453,18 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status) | |||
| 353 | 453 | ||
| 354 | static int check_interval = 5 * 60; /* 5 minutes */ | 454 | static int check_interval = 5 * 60; /* 5 minutes */ |
| 355 | static int next_interval; /* in jiffies */ | 455 | static int next_interval; /* in jiffies */ |
| 356 | static void mcheck_timer(struct work_struct *work); | 456 | static void mcheck_timer(unsigned long); |
| 357 | static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); | 457 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
| 358 | 458 | ||
| 359 | static void mcheck_check_cpu(void *info) | 459 | static void mcheck_timer(unsigned long data) |
| 360 | { | 460 | { |
| 361 | if (mce_available(¤t_cpu_data)) | 461 | struct timer_list *t = &per_cpu(mce_timer, data); |
| 362 | do_machine_check(NULL, 0); | ||
| 363 | } | ||
| 364 | 462 | ||
| 365 | static void mcheck_timer(struct work_struct *work) | 463 | WARN_ON(smp_processor_id() != data); |
| 366 | { | 464 | |
| 367 | on_each_cpu(mcheck_check_cpu, NULL, 1); | 465 | if (mce_available(¤t_cpu_data)) |
| 466 | machine_check_poll(MCP_TIMESTAMP, | ||
| 467 | &__get_cpu_var(mce_poll_banks)); | ||
| 368 | 468 | ||
| 369 | /* | 469 | /* |
| 370 | * Alert userspace if needed. If we logged an MCE, reduce the | 470 | * Alert userspace if needed. If we logged an MCE, reduce the |
| @@ -377,31 +477,41 @@ static void mcheck_timer(struct work_struct *work) | |||
| 377 | (int)round_jiffies_relative(check_interval*HZ)); | 477 | (int)round_jiffies_relative(check_interval*HZ)); |
| 378 | } | 478 | } |
| 379 | 479 | ||
| 380 | schedule_delayed_work(&mcheck_work, next_interval); | 480 | t->expires = jiffies + next_interval; |
| 481 | add_timer(t); | ||
| 482 | } | ||
| 483 | |||
| 484 | static void mce_do_trigger(struct work_struct *work) | ||
| 485 | { | ||
| 486 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | ||
| 381 | } | 487 | } |
| 382 | 488 | ||
| 489 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | ||
| 490 | |||
| 383 | /* | 491 | /* |
| 384 | * This is only called from process context. This is where we do | 492 | * Notify the user(s) about new machine check events. |
| 385 | * anything we need to alert userspace about new MCEs. This is called | 493 | * Can be called from interrupt context, but not from machine check/NMI |
| 386 | * directly from the poller and also from entry.S and idle, thanks to | 494 | * context. |
| 387 | * TIF_MCE_NOTIFY. | ||
| 388 | */ | 495 | */ |
| 389 | int mce_notify_user(void) | 496 | int mce_notify_user(void) |
| 390 | { | 497 | { |
| 498 | /* Not more than two messages every minute */ | ||
| 499 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | ||
| 500 | |||
| 391 | clear_thread_flag(TIF_MCE_NOTIFY); | 501 | clear_thread_flag(TIF_MCE_NOTIFY); |
| 392 | if (test_and_clear_bit(0, ¬ify_user)) { | 502 | if (test_and_clear_bit(0, ¬ify_user)) { |
| 393 | static unsigned long last_print; | ||
| 394 | unsigned long now = jiffies; | ||
| 395 | |||
| 396 | wake_up_interruptible(&mce_wait); | 503 | wake_up_interruptible(&mce_wait); |
| 397 | if (trigger[0]) | ||
| 398 | call_usermodehelper(trigger, trigger_argv, NULL, | ||
| 399 | UMH_NO_WAIT); | ||
| 400 | 504 | ||
| 401 | if (time_after_eq(now, last_print + (check_interval*HZ))) { | 505 | /* |
| 402 | last_print = now; | 506 | * There is no risk of missing notifications because |
| 507 | * work_pending is always cleared before the function is | ||
| 508 | * executed. | ||
| 509 | */ | ||
| 510 | if (trigger[0] && !work_pending(&mce_trigger_work)) | ||
| 511 | schedule_work(&mce_trigger_work); | ||
| 512 | |||
| 513 | if (__ratelimit(&ratelimit)) | ||
| 403 | printk(KERN_INFO "Machine check events logged\n"); | 514 | printk(KERN_INFO "Machine check events logged\n"); |
| 404 | } | ||
| 405 | 515 | ||
| 406 | return 1; | 516 | return 1; |
| 407 | } | 517 | } |
| @@ -425,63 +535,78 @@ static struct notifier_block mce_idle_notifier = { | |||
| 425 | 535 | ||
| 426 | static __init int periodic_mcheck_init(void) | 536 | static __init int periodic_mcheck_init(void) |
| 427 | { | 537 | { |
| 428 | next_interval = check_interval * HZ; | 538 | idle_notifier_register(&mce_idle_notifier); |
| 429 | if (next_interval) | 539 | return 0; |
| 430 | schedule_delayed_work(&mcheck_work, | ||
| 431 | round_jiffies_relative(next_interval)); | ||
| 432 | idle_notifier_register(&mce_idle_notifier); | ||
| 433 | return 0; | ||
| 434 | } | 540 | } |
| 435 | __initcall(periodic_mcheck_init); | 541 | __initcall(periodic_mcheck_init); |
| 436 | 542 | ||
| 437 | |||
| 438 | /* | 543 | /* |
| 439 | * Initialize Machine Checks for a CPU. | 544 | * Initialize Machine Checks for a CPU. |
| 440 | */ | 545 | */ |
| 441 | static void mce_init(void *dummy) | 546 | static int mce_cap_init(void) |
| 442 | { | 547 | { |
| 443 | u64 cap; | 548 | u64 cap; |
| 444 | int i; | 549 | unsigned b; |
| 445 | 550 | ||
| 446 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 551 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
| 447 | banks = cap & 0xff; | 552 | b = cap & 0xff; |
| 448 | if (banks > MCE_EXTENDED_BANK) { | 553 | if (b > MAX_NR_BANKS) { |
| 449 | banks = MCE_EXTENDED_BANK; | 554 | printk(KERN_WARNING |
| 450 | printk(KERN_INFO "MCE: warning: using only %d banks\n", | 555 | "MCE: Using only %u machine check banks out of %u\n", |
| 451 | MCE_EXTENDED_BANK); | 556 | MAX_NR_BANKS, b); |
| 557 | b = MAX_NR_BANKS; | ||
| 452 | } | 558 | } |
| 559 | |||
| 560 | /* Don't support asymmetric configurations today */ | ||
| 561 | WARN_ON(banks != 0 && b != banks); | ||
| 562 | banks = b; | ||
| 563 | if (!bank) { | ||
| 564 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
| 565 | if (!bank) | ||
| 566 | return -ENOMEM; | ||
| 567 | memset(bank, 0xff, banks * sizeof(u64)); | ||
| 568 | } | ||
| 569 | |||
| 453 | /* Use accurate RIP reporting if available. */ | 570 | /* Use accurate RIP reporting if available. */ |
| 454 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | 571 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) |
| 455 | rip_msr = MSR_IA32_MCG_EIP; | 572 | rip_msr = MSR_IA32_MCG_EIP; |
| 456 | 573 | ||
| 457 | /* Log the machine checks left over from the previous reset. | 574 | return 0; |
| 458 | This also clears all registers */ | 575 | } |
| 459 | do_machine_check(NULL, mce_bootlog ? -1 : -2); | 576 | |
| 577 | static void mce_init(void *dummy) | ||
| 578 | { | ||
| 579 | u64 cap; | ||
| 580 | int i; | ||
| 581 | mce_banks_t all_banks; | ||
| 582 | |||
| 583 | /* | ||
| 584 | * Log the machine checks left over from the previous reset. | ||
| 585 | */ | ||
| 586 | bitmap_fill(all_banks, MAX_NR_BANKS); | ||
| 587 | machine_check_poll(MCP_UC, &all_banks); | ||
| 460 | 588 | ||
| 461 | set_in_cr4(X86_CR4_MCE); | 589 | set_in_cr4(X86_CR4_MCE); |
| 462 | 590 | ||
| 591 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 463 | if (cap & MCG_CTL_P) | 592 | if (cap & MCG_CTL_P) |
| 464 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 593 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
| 465 | 594 | ||
| 466 | for (i = 0; i < banks; i++) { | 595 | for (i = 0; i < banks; i++) { |
| 467 | if (i < NR_SYSFS_BANKS) | 596 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); |
| 468 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
| 469 | else | ||
| 470 | wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); | ||
| 471 | |||
| 472 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 597 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); |
| 473 | } | 598 | } |
| 474 | } | 599 | } |
| 475 | 600 | ||
| 476 | /* Add per CPU specific workarounds here */ | 601 | /* Add per CPU specific workarounds here */ |
| 477 | static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | 602 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) |
| 478 | { | 603 | { |
| 479 | /* This should be disabled by the BIOS, but isn't always */ | 604 | /* This should be disabled by the BIOS, but isn't always */ |
| 480 | if (c->x86_vendor == X86_VENDOR_AMD) { | 605 | if (c->x86_vendor == X86_VENDOR_AMD) { |
| 481 | if(c->x86 == 15) | 606 | if (c->x86 == 15 && banks > 4) |
| 482 | /* disable GART TBL walk error reporting, which trips off | 607 | /* disable GART TBL walk error reporting, which trips off |
| 483 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 608 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
| 484 | clear_bit(10, &bank[4]); | 609 | clear_bit(10, (unsigned long *)&bank[4]); |
| 485 | if(c->x86 <= 17 && mce_bootlog < 0) | 610 | if(c->x86 <= 17 && mce_bootlog < 0) |
| 486 | /* Lots of broken BIOS around that don't clear them | 611 | /* Lots of broken BIOS around that don't clear them |
| 487 | by default and leave crap in there. Don't log. */ | 612 | by default and leave crap in there. Don't log. */ |
| @@ -504,20 +629,38 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
| 504 | } | 629 | } |
| 505 | } | 630 | } |
| 506 | 631 | ||
| 632 | static void mce_init_timer(void) | ||
| 633 | { | ||
| 634 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
| 635 | |||
| 636 | /* data race harmless because everyone sets to the same value */ | ||
| 637 | if (!next_interval) | ||
| 638 | next_interval = check_interval * HZ; | ||
| 639 | if (!next_interval) | ||
| 640 | return; | ||
| 641 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
| 642 | t->expires = round_jiffies(jiffies + next_interval); | ||
| 643 | add_timer(t); | ||
| 644 | } | ||
| 645 | |||
| 507 | /* | 646 | /* |
| 508 | * Called for each booted CPU to set up machine checks. | 647 | * Called for each booted CPU to set up machine checks. |
| 509 | * Must be called with preempt off. | 648 | * Must be called with preempt off. |
| 510 | */ | 649 | */ |
| 511 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 650 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) |
| 512 | { | 651 | { |
| 513 | mce_cpu_quirks(c); | 652 | if (!mce_available(c)) |
| 653 | return; | ||
| 514 | 654 | ||
| 515 | if (mce_dont_init || | 655 | if (mce_cap_init() < 0) { |
| 516 | !mce_available(c)) | 656 | mce_dont_init = 1; |
| 517 | return; | 657 | return; |
| 658 | } | ||
| 659 | mce_cpu_quirks(c); | ||
| 518 | 660 | ||
| 519 | mce_init(NULL); | 661 | mce_init(NULL); |
| 520 | mce_cpu_features(c); | 662 | mce_cpu_features(c); |
| 663 | mce_init_timer(); | ||
| 521 | } | 664 | } |
| 522 | 665 | ||
| 523 | /* | 666 | /* |
| @@ -573,7 +716,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
| 573 | { | 716 | { |
| 574 | unsigned long *cpu_tsc; | 717 | unsigned long *cpu_tsc; |
| 575 | static DEFINE_MUTEX(mce_read_mutex); | 718 | static DEFINE_MUTEX(mce_read_mutex); |
| 576 | unsigned next; | 719 | unsigned prev, next; |
| 577 | char __user *buf = ubuf; | 720 | char __user *buf = ubuf; |
| 578 | int i, err; | 721 | int i, err; |
| 579 | 722 | ||
| @@ -592,25 +735,32 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
| 592 | } | 735 | } |
| 593 | 736 | ||
| 594 | err = 0; | 737 | err = 0; |
| 595 | for (i = 0; i < next; i++) { | 738 | prev = 0; |
| 596 | unsigned long start = jiffies; | 739 | do { |
| 597 | 740 | for (i = prev; i < next; i++) { | |
| 598 | while (!mcelog.entry[i].finished) { | 741 | unsigned long start = jiffies; |
| 599 | if (time_after_eq(jiffies, start + 2)) { | 742 | |
| 600 | memset(mcelog.entry + i,0, sizeof(struct mce)); | 743 | while (!mcelog.entry[i].finished) { |
| 601 | goto timeout; | 744 | if (time_after_eq(jiffies, start + 2)) { |
| 745 | memset(mcelog.entry + i, 0, | ||
| 746 | sizeof(struct mce)); | ||
| 747 | goto timeout; | ||
| 748 | } | ||
| 749 | cpu_relax(); | ||
| 602 | } | 750 | } |
| 603 | cpu_relax(); | 751 | smp_rmb(); |
| 752 | err |= copy_to_user(buf, mcelog.entry + i, | ||
| 753 | sizeof(struct mce)); | ||
| 754 | buf += sizeof(struct mce); | ||
| 755 | timeout: | ||
| 756 | ; | ||
| 604 | } | 757 | } |
| 605 | smp_rmb(); | ||
| 606 | err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); | ||
| 607 | buf += sizeof(struct mce); | ||
| 608 | timeout: | ||
| 609 | ; | ||
| 610 | } | ||
| 611 | 758 | ||
| 612 | memset(mcelog.entry, 0, next * sizeof(struct mce)); | 759 | memset(mcelog.entry + prev, 0, |
| 613 | mcelog.next = 0; | 760 | (next - prev) * sizeof(struct mce)); |
| 761 | prev = next; | ||
| 762 | next = cmpxchg(&mcelog.next, prev, 0); | ||
| 763 | } while (next != prev); | ||
| 614 | 764 | ||
| 615 | synchronize_sched(); | 765 | synchronize_sched(); |
| 616 | 766 | ||
| @@ -680,20 +830,6 @@ static struct miscdevice mce_log_device = { | |||
| 680 | &mce_chrdev_ops, | 830 | &mce_chrdev_ops, |
| 681 | }; | 831 | }; |
| 682 | 832 | ||
| 683 | static unsigned long old_cr4 __initdata; | ||
| 684 | |||
| 685 | void __init stop_mce(void) | ||
| 686 | { | ||
| 687 | old_cr4 = read_cr4(); | ||
| 688 | clear_in_cr4(X86_CR4_MCE); | ||
| 689 | } | ||
| 690 | |||
| 691 | void __init restart_mce(void) | ||
| 692 | { | ||
| 693 | if (old_cr4 & X86_CR4_MCE) | ||
| 694 | set_in_cr4(X86_CR4_MCE); | ||
| 695 | } | ||
| 696 | |||
| 697 | /* | 833 | /* |
| 698 | * Old style boot options parsing. Only for compatibility. | 834 | * Old style boot options parsing. Only for compatibility. |
| 699 | */ | 835 | */ |
| @@ -703,8 +839,7 @@ static int __init mcheck_disable(char *str) | |||
| 703 | return 1; | 839 | return 1; |
| 704 | } | 840 | } |
| 705 | 841 | ||
| 706 | /* mce=off disables machine check. Note you can re-enable it later | 842 | /* mce=off disables machine check. |
| 707 | using sysfs. | ||
| 708 | mce=TOLERANCELEVEL (number, see above) | 843 | mce=TOLERANCELEVEL (number, see above) |
| 709 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 844 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
| 710 | mce=nobootlog Don't log MCEs from before booting. */ | 845 | mce=nobootlog Don't log MCEs from before booting. */ |
| @@ -728,6 +863,29 @@ __setup("mce=", mcheck_enable); | |||
| 728 | * Sysfs support | 863 | * Sysfs support |
| 729 | */ | 864 | */ |
| 730 | 865 | ||
| 866 | /* | ||
| 867 | * Disable machine checks on suspend and shutdown. We can't really handle | ||
| 868 | * them later. | ||
| 869 | */ | ||
| 870 | static int mce_disable(void) | ||
| 871 | { | ||
| 872 | int i; | ||
| 873 | |||
| 874 | for (i = 0; i < banks; i++) | ||
| 875 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
| 876 | return 0; | ||
| 877 | } | ||
| 878 | |||
| 879 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | ||
| 880 | { | ||
| 881 | return mce_disable(); | ||
| 882 | } | ||
| 883 | |||
| 884 | static int mce_shutdown(struct sys_device *dev) | ||
| 885 | { | ||
| 886 | return mce_disable(); | ||
| 887 | } | ||
| 888 | |||
| 731 | /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. | 889 | /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. |
| 732 | Only one CPU is active at this time, the others get readded later using | 890 | Only one CPU is active at this time, the others get readded later using |
| 733 | CPU hotplug. */ | 891 | CPU hotplug. */ |
| @@ -738,20 +896,24 @@ static int mce_resume(struct sys_device *dev) | |||
| 738 | return 0; | 896 | return 0; |
| 739 | } | 897 | } |
| 740 | 898 | ||
| 899 | static void mce_cpu_restart(void *data) | ||
| 900 | { | ||
| 901 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
| 902 | if (mce_available(¤t_cpu_data)) | ||
| 903 | mce_init(NULL); | ||
| 904 | mce_init_timer(); | ||
| 905 | } | ||
| 906 | |||
| 741 | /* Reinit MCEs after user configuration changes */ | 907 | /* Reinit MCEs after user configuration changes */ |
| 742 | static void mce_restart(void) | 908 | static void mce_restart(void) |
| 743 | { | 909 | { |
| 744 | if (next_interval) | ||
| 745 | cancel_delayed_work(&mcheck_work); | ||
| 746 | /* Timer race is harmless here */ | ||
| 747 | on_each_cpu(mce_init, NULL, 1); | ||
| 748 | next_interval = check_interval * HZ; | 910 | next_interval = check_interval * HZ; |
| 749 | if (next_interval) | 911 | on_each_cpu(mce_cpu_restart, NULL, 1); |
| 750 | schedule_delayed_work(&mcheck_work, | ||
| 751 | round_jiffies_relative(next_interval)); | ||
| 752 | } | 912 | } |
| 753 | 913 | ||
| 754 | static struct sysdev_class mce_sysclass = { | 914 | static struct sysdev_class mce_sysclass = { |
| 915 | .suspend = mce_suspend, | ||
| 916 | .shutdown = mce_shutdown, | ||
| 755 | .resume = mce_resume, | 917 | .resume = mce_resume, |
| 756 | .name = "machinecheck", | 918 | .name = "machinecheck", |
| 757 | }; | 919 | }; |
| @@ -778,16 +940,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit | |||
| 778 | } \ | 940 | } \ |
| 779 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | 941 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); |
| 780 | 942 | ||
| 781 | /* | 943 | static struct sysdev_attribute *bank_attrs; |
| 782 | * TBD should generate these dynamically based on number of available banks. | 944 | |
| 783 | * Have only 6 contol banks in /sysfs until then. | 945 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, |
| 784 | */ | 946 | char *buf) |
| 785 | ACCESSOR(bank0ctl,bank[0],mce_restart()) | 947 | { |
| 786 | ACCESSOR(bank1ctl,bank[1],mce_restart()) | 948 | u64 b = bank[attr - bank_attrs]; |
| 787 | ACCESSOR(bank2ctl,bank[2],mce_restart()) | 949 | return sprintf(buf, "%llx\n", b); |
| 788 | ACCESSOR(bank3ctl,bank[3],mce_restart()) | 950 | } |
| 789 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 951 | |
| 790 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | 952 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, |
| 953 | const char *buf, size_t siz) | ||
| 954 | { | ||
| 955 | char *end; | ||
| 956 | u64 new = simple_strtoull(buf, &end, 0); | ||
| 957 | if (end == buf) | ||
| 958 | return -EINVAL; | ||
| 959 | bank[attr - bank_attrs] = new; | ||
| 960 | mce_restart(); | ||
| 961 | return end-buf; | ||
| 962 | } | ||
| 791 | 963 | ||
| 792 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 964 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
| 793 | char *buf) | 965 | char *buf) |
| @@ -814,13 +986,11 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | |||
| 814 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 986 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
| 815 | ACCESSOR(check_interval,check_interval,mce_restart()) | 987 | ACCESSOR(check_interval,check_interval,mce_restart()) |
| 816 | static struct sysdev_attribute *mce_attributes[] = { | 988 | static struct sysdev_attribute *mce_attributes[] = { |
| 817 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | ||
| 818 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, | ||
| 819 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, | 989 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, |
| 820 | NULL | 990 | NULL |
| 821 | }; | 991 | }; |
| 822 | 992 | ||
| 823 | static cpumask_t mce_device_initialized = CPU_MASK_NONE; | 993 | static cpumask_var_t mce_device_initialized; |
| 824 | 994 | ||
| 825 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ | 995 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ |
| 826 | static __cpuinit int mce_create_device(unsigned int cpu) | 996 | static __cpuinit int mce_create_device(unsigned int cpu) |
| @@ -845,11 +1015,22 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
| 845 | if (err) | 1015 | if (err) |
| 846 | goto error; | 1016 | goto error; |
| 847 | } | 1017 | } |
| 848 | cpu_set(cpu, mce_device_initialized); | 1018 | for (i = 0; i < banks; i++) { |
| 1019 | err = sysdev_create_file(&per_cpu(device_mce, cpu), | ||
| 1020 | &bank_attrs[i]); | ||
| 1021 | if (err) | ||
| 1022 | goto error2; | ||
| 1023 | } | ||
| 1024 | cpumask_set_cpu(cpu, mce_device_initialized); | ||
| 849 | 1025 | ||
| 850 | return 0; | 1026 | return 0; |
| 1027 | error2: | ||
| 1028 | while (--i >= 0) { | ||
| 1029 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
| 1030 | &bank_attrs[i]); | ||
| 1031 | } | ||
| 851 | error: | 1032 | error: |
| 852 | while (i--) { | 1033 | while (--i >= 0) { |
| 853 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 1034 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
| 854 | mce_attributes[i]); | 1035 | mce_attributes[i]); |
| 855 | } | 1036 | } |
| @@ -862,14 +1043,44 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
| 862 | { | 1043 | { |
| 863 | int i; | 1044 | int i; |
| 864 | 1045 | ||
| 865 | if (!cpu_isset(cpu, mce_device_initialized)) | 1046 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) |
| 866 | return; | 1047 | return; |
| 867 | 1048 | ||
| 868 | for (i = 0; mce_attributes[i]; i++) | 1049 | for (i = 0; mce_attributes[i]; i++) |
| 869 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 1050 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
| 870 | mce_attributes[i]); | 1051 | mce_attributes[i]); |
| 1052 | for (i = 0; i < banks; i++) | ||
| 1053 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
| 1054 | &bank_attrs[i]); | ||
| 871 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 1055 | sysdev_unregister(&per_cpu(device_mce,cpu)); |
| 872 | cpu_clear(cpu, mce_device_initialized); | 1056 | cpumask_clear_cpu(cpu, mce_device_initialized); |
| 1057 | } | ||
| 1058 | |||
| 1059 | /* Make sure there are no machine checks on offlined CPUs. */ | ||
| 1060 | static void mce_disable_cpu(void *h) | ||
| 1061 | { | ||
| 1062 | int i; | ||
| 1063 | unsigned long action = *(unsigned long *)h; | ||
| 1064 | |||
| 1065 | if (!mce_available(¤t_cpu_data)) | ||
| 1066 | return; | ||
| 1067 | if (!(action & CPU_TASKS_FROZEN)) | ||
| 1068 | cmci_clear(); | ||
| 1069 | for (i = 0; i < banks; i++) | ||
| 1070 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | static void mce_reenable_cpu(void *h) | ||
| 1074 | { | ||
| 1075 | int i; | ||
| 1076 | unsigned long action = *(unsigned long *)h; | ||
| 1077 | |||
| 1078 | if (!mce_available(¤t_cpu_data)) | ||
| 1079 | return; | ||
| 1080 | if (!(action & CPU_TASKS_FROZEN)) | ||
| 1081 | cmci_reenable(); | ||
| 1082 | for (i = 0; i < banks; i++) | ||
| 1083 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | ||
| 873 | } | 1084 | } |
| 874 | 1085 | ||
| 875 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | 1086 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ |
| @@ -877,6 +1088,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | |||
| 877 | unsigned long action, void *hcpu) | 1088 | unsigned long action, void *hcpu) |
| 878 | { | 1089 | { |
| 879 | unsigned int cpu = (unsigned long)hcpu; | 1090 | unsigned int cpu = (unsigned long)hcpu; |
| 1091 | struct timer_list *t = &per_cpu(mce_timer, cpu); | ||
| 880 | 1092 | ||
| 881 | switch (action) { | 1093 | switch (action) { |
| 882 | case CPU_ONLINE: | 1094 | case CPU_ONLINE: |
| @@ -891,6 +1103,21 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | |||
| 891 | threshold_cpu_callback(action, cpu); | 1103 | threshold_cpu_callback(action, cpu); |
| 892 | mce_remove_device(cpu); | 1104 | mce_remove_device(cpu); |
| 893 | break; | 1105 | break; |
| 1106 | case CPU_DOWN_PREPARE: | ||
| 1107 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 1108 | del_timer_sync(t); | ||
| 1109 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | ||
| 1110 | break; | ||
| 1111 | case CPU_DOWN_FAILED: | ||
| 1112 | case CPU_DOWN_FAILED_FROZEN: | ||
| 1113 | t->expires = round_jiffies(jiffies + next_interval); | ||
| 1114 | add_timer_on(t, cpu); | ||
| 1115 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | ||
| 1116 | break; | ||
| 1117 | case CPU_POST_DEAD: | ||
| 1118 | /* intentionally ignoring frozen here */ | ||
| 1119 | cmci_rediscover(cpu); | ||
| 1120 | break; | ||
| 894 | } | 1121 | } |
| 895 | return NOTIFY_OK; | 1122 | return NOTIFY_OK; |
| 896 | } | 1123 | } |
| @@ -899,6 +1126,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { | |||
| 899 | .notifier_call = mce_cpu_callback, | 1126 | .notifier_call = mce_cpu_callback, |
| 900 | }; | 1127 | }; |
| 901 | 1128 | ||
| 1129 | static __init int mce_init_banks(void) | ||
| 1130 | { | ||
| 1131 | int i; | ||
| 1132 | |||
| 1133 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
| 1134 | GFP_KERNEL); | ||
| 1135 | if (!bank_attrs) | ||
| 1136 | return -ENOMEM; | ||
| 1137 | |||
| 1138 | for (i = 0; i < banks; i++) { | ||
| 1139 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
| 1140 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
| 1141 | if (!a->attr.name) | ||
| 1142 | goto nomem; | ||
| 1143 | a->attr.mode = 0644; | ||
| 1144 | a->show = show_bank; | ||
| 1145 | a->store = set_bank; | ||
| 1146 | } | ||
| 1147 | return 0; | ||
| 1148 | |||
| 1149 | nomem: | ||
| 1150 | while (--i >= 0) | ||
| 1151 | kfree(bank_attrs[i].attr.name); | ||
| 1152 | kfree(bank_attrs); | ||
| 1153 | bank_attrs = NULL; | ||
| 1154 | return -ENOMEM; | ||
| 1155 | } | ||
| 1156 | |||
| 902 | static __init int mce_init_device(void) | 1157 | static __init int mce_init_device(void) |
| 903 | { | 1158 | { |
| 904 | int err; | 1159 | int err; |
| @@ -906,6 +1161,13 @@ static __init int mce_init_device(void) | |||
| 906 | 1161 | ||
| 907 | if (!mce_available(&boot_cpu_data)) | 1162 | if (!mce_available(&boot_cpu_data)) |
| 908 | return -EIO; | 1163 | return -EIO; |
| 1164 | |||
| 1165 | alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); | ||
| 1166 | |||
| 1167 | err = mce_init_banks(); | ||
| 1168 | if (err) | ||
| 1169 | return err; | ||
| 1170 | |||
| 909 | err = sysdev_class_register(&mce_sysclass); | 1171 | err = sysdev_class_register(&mce_sysclass); |
| 910 | if (err) | 1172 | if (err) |
| 911 | return err; | 1173 | return err; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index f2ee0ae29bd6..1f429ee3477d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
| @@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = { | |||
| 67 | struct threshold_bank { | 67 | struct threshold_bank { |
| 68 | struct kobject *kobj; | 68 | struct kobject *kobj; |
| 69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
| 70 | cpumask_t cpus; | 70 | cpumask_var_t cpus; |
| 71 | }; | 71 | }; |
| 72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); |
| 73 | 73 | ||
| @@ -79,6 +79,8 @@ static unsigned char shared_bank[NR_BANKS] = { | |||
| 79 | 79 | ||
| 80 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | 80 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ |
| 81 | 81 | ||
| 82 | static void amd_threshold_interrupt(void); | ||
| 83 | |||
| 82 | /* | 84 | /* |
| 83 | * CPU Initialization | 85 | * CPU Initialization |
| 84 | */ | 86 | */ |
| @@ -174,6 +176,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 174 | tr.reset = 0; | 176 | tr.reset = 0; |
| 175 | tr.old_limit = 0; | 177 | tr.old_limit = 0; |
| 176 | threshold_restart_bank(&tr); | 178 | threshold_restart_bank(&tr); |
| 179 | |||
| 180 | mce_threshold_vector = amd_threshold_interrupt; | ||
| 177 | } | 181 | } |
| 178 | } | 182 | } |
| 179 | } | 183 | } |
| @@ -187,19 +191,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 187 | * the interrupt goes off when error_count reaches threshold_limit. | 191 | * the interrupt goes off when error_count reaches threshold_limit. |
| 188 | * the handler will simply log mcelog w/ software defined bank number. | 192 | * the handler will simply log mcelog w/ software defined bank number. |
| 189 | */ | 193 | */ |
| 190 | asmlinkage void mce_threshold_interrupt(void) | 194 | static void amd_threshold_interrupt(void) |
| 191 | { | 195 | { |
| 192 | unsigned int bank, block; | 196 | unsigned int bank, block; |
| 193 | struct mce m; | 197 | struct mce m; |
| 194 | u32 low = 0, high = 0, address = 0; | 198 | u32 low = 0, high = 0, address = 0; |
| 195 | 199 | ||
| 196 | ack_APIC_irq(); | 200 | mce_setup(&m); |
| 197 | exit_idle(); | ||
| 198 | irq_enter(); | ||
| 199 | |||
| 200 | memset(&m, 0, sizeof(m)); | ||
| 201 | rdtscll(m.tsc); | ||
| 202 | m.cpu = smp_processor_id(); | ||
| 203 | 201 | ||
| 204 | /* assume first bank caused it */ | 202 | /* assume first bank caused it */ |
| 205 | for (bank = 0; bank < NR_BANKS; ++bank) { | 203 | for (bank = 0; bank < NR_BANKS; ++bank) { |
| @@ -233,7 +231,8 @@ asmlinkage void mce_threshold_interrupt(void) | |||
| 233 | 231 | ||
| 234 | /* Log the machine check that caused the threshold | 232 | /* Log the machine check that caused the threshold |
| 235 | event. */ | 233 | event. */ |
| 236 | do_machine_check(NULL, 0); | 234 | machine_check_poll(MCP_TIMESTAMP, |
| 235 | &__get_cpu_var(mce_poll_banks)); | ||
| 237 | 236 | ||
| 238 | if (high & MASK_OVERFLOW_HI) { | 237 | if (high & MASK_OVERFLOW_HI) { |
| 239 | rdmsrl(address, m.misc); | 238 | rdmsrl(address, m.misc); |
| @@ -243,13 +242,10 @@ asmlinkage void mce_threshold_interrupt(void) | |||
| 243 | + bank * NR_BLOCKS | 242 | + bank * NR_BLOCKS |
| 244 | + block; | 243 | + block; |
| 245 | mce_log(&m); | 244 | mce_log(&m); |
| 246 | goto out; | 245 | return; |
| 247 | } | 246 | } |
| 248 | } | 247 | } |
| 249 | } | 248 | } |
| 250 | out: | ||
| 251 | inc_irq_stat(irq_threshold_count); | ||
| 252 | irq_exit(); | ||
| 253 | } | 249 | } |
| 254 | 250 | ||
| 255 | /* | 251 | /* |
| @@ -481,7 +477,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 481 | 477 | ||
| 482 | #ifdef CONFIG_SMP | 478 | #ifdef CONFIG_SMP |
| 483 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 479 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ |
| 484 | i = first_cpu(per_cpu(cpu_core_map, cpu)); | 480 | i = cpumask_first(cpu_core_mask(cpu)); |
| 485 | 481 | ||
| 486 | /* first core not up yet */ | 482 | /* first core not up yet */ |
| 487 | if (cpu_data(i).cpu_core_id) | 483 | if (cpu_data(i).cpu_core_id) |
| @@ -501,7 +497,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 501 | if (err) | 497 | if (err) |
| 502 | goto out; | 498 | goto out; |
| 503 | 499 | ||
| 504 | b->cpus = per_cpu(cpu_core_map, cpu); | 500 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); |
| 505 | per_cpu(threshold_banks, cpu)[bank] = b; | 501 | per_cpu(threshold_banks, cpu)[bank] = b; |
| 506 | goto out; | 502 | goto out; |
| 507 | } | 503 | } |
| @@ -512,15 +508,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 512 | err = -ENOMEM; | 508 | err = -ENOMEM; |
| 513 | goto out; | 509 | goto out; |
| 514 | } | 510 | } |
| 511 | if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | ||
| 512 | kfree(b); | ||
| 513 | err = -ENOMEM; | ||
| 514 | goto out; | ||
| 515 | } | ||
| 515 | 516 | ||
| 516 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); | 517 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); |
| 517 | if (!b->kobj) | 518 | if (!b->kobj) |
| 518 | goto out_free; | 519 | goto out_free; |
| 519 | 520 | ||
| 520 | #ifndef CONFIG_SMP | 521 | #ifndef CONFIG_SMP |
| 521 | b->cpus = CPU_MASK_ALL; | 522 | cpumask_setall(b->cpus); |
| 522 | #else | 523 | #else |
| 523 | b->cpus = per_cpu(cpu_core_map, cpu); | 524 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); |
| 524 | #endif | 525 | #endif |
| 525 | 526 | ||
| 526 | per_cpu(threshold_banks, cpu)[bank] = b; | 527 | per_cpu(threshold_banks, cpu)[bank] = b; |
| @@ -529,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 529 | if (err) | 530 | if (err) |
| 530 | goto out_free; | 531 | goto out_free; |
| 531 | 532 | ||
| 532 | for_each_cpu_mask_nr(i, b->cpus) { | 533 | for_each_cpu(i, b->cpus) { |
| 533 | if (i == cpu) | 534 | if (i == cpu) |
| 534 | continue; | 535 | continue; |
| 535 | 536 | ||
| @@ -545,6 +546,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 545 | 546 | ||
| 546 | out_free: | 547 | out_free: |
| 547 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 548 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
| 549 | free_cpumask_var(b->cpus); | ||
| 548 | kfree(b); | 550 | kfree(b); |
| 549 | out: | 551 | out: |
| 550 | return err; | 552 | return err; |
| @@ -619,7 +621,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
| 619 | #endif | 621 | #endif |
| 620 | 622 | ||
| 621 | /* remove all sibling symlinks before unregistering */ | 623 | /* remove all sibling symlinks before unregistering */ |
| 622 | for_each_cpu_mask_nr(i, b->cpus) { | 624 | for_each_cpu(i, b->cpus) { |
| 623 | if (i == cpu) | 625 | if (i == cpu) |
| 624 | continue; | 626 | continue; |
| 625 | 627 | ||
| @@ -632,6 +634,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
| 632 | free_out: | 634 | free_out: |
| 633 | kobject_del(b->kobj); | 635 | kobject_del(b->kobj); |
| 634 | kobject_put(b->kobj); | 636 | kobject_put(b->kobj); |
| 637 | free_cpumask_var(b->cpus); | ||
| 635 | kfree(b); | 638 | kfree(b); |
| 636 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 639 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
| 637 | } | 640 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index f44c36624360..96b2a85545aa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c | |||
| @@ -1,17 +1,21 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Intel specific MCE features. | 2 | * Intel specific MCE features. |
| 3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | 3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> |
| 4 | * Copyright (C) 2008, 2009 Intel Corporation | ||
| 5 | * Author: Andi Kleen | ||
| 4 | */ | 6 | */ |
| 5 | 7 | ||
| 6 | #include <linux/init.h> | 8 | #include <linux/init.h> |
| 7 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
| 8 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
| 9 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
| 12 | #include <asm/apic.h> | ||
| 10 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
| 11 | #include <asm/mce.h> | 14 | #include <asm/mce.h> |
| 12 | #include <asm/hw_irq.h> | 15 | #include <asm/hw_irq.h> |
| 13 | #include <asm/idle.h> | 16 | #include <asm/idle.h> |
| 14 | #include <asm/therm_throt.h> | 17 | #include <asm/therm_throt.h> |
| 18 | #include <asm/apic.h> | ||
| 15 | 19 | ||
| 16 | asmlinkage void smp_thermal_interrupt(void) | 20 | asmlinkage void smp_thermal_interrupt(void) |
| 17 | { | 21 | { |
| @@ -24,7 +28,7 @@ asmlinkage void smp_thermal_interrupt(void) | |||
| 24 | 28 | ||
| 25 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 29 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
| 26 | if (therm_throt_process(msr_val & 1)) | 30 | if (therm_throt_process(msr_val & 1)) |
| 27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); | 31 | mce_log_therm_throt_event(msr_val); |
| 28 | 32 | ||
| 29 | inc_irq_stat(irq_thermal_count); | 33 | inc_irq_stat(irq_thermal_count); |
| 30 | irq_exit(); | 34 | irq_exit(); |
| @@ -48,13 +52,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
| 48 | */ | 52 | */ |
| 49 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 53 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 50 | h = apic_read(APIC_LVTTHMR); | 54 | h = apic_read(APIC_LVTTHMR); |
| 51 | if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { | 55 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
| 52 | printk(KERN_DEBUG | 56 | printk(KERN_DEBUG |
| 53 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 57 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); |
| 54 | return; | 58 | return; |
| 55 | } | 59 | } |
| 56 | 60 | ||
| 57 | if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) | 61 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) |
| 58 | tm2 = 1; | 62 | tm2 = 1; |
| 59 | 63 | ||
| 60 | if (h & APIC_VECTOR_MASK) { | 64 | if (h & APIC_VECTOR_MASK) { |
| @@ -72,7 +76,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
| 72 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); | 76 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); |
| 73 | 77 | ||
| 74 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 78 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 75 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); | 79 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); |
| 76 | 80 | ||
| 77 | l = apic_read(APIC_LVTTHMR); | 81 | l = apic_read(APIC_LVTTHMR); |
| 78 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 82 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
| @@ -84,7 +88,209 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
| 84 | return; | 88 | return; |
| 85 | } | 89 | } |
| 86 | 90 | ||
| 91 | /* | ||
| 92 | * Support for Intel Correct Machine Check Interrupts. This allows | ||
| 93 | * the CPU to raise an interrupt when a corrected machine check happened. | ||
| 94 | * Normally we pick those up using a regular polling timer. | ||
| 95 | * Also supports reliable discovery of shared banks. | ||
| 96 | */ | ||
| 97 | |||
| 98 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | ||
| 99 | |||
| 100 | /* | ||
| 101 | * cmci_discover_lock protects against parallel discovery attempts | ||
| 102 | * which could race against each other. | ||
| 103 | */ | ||
| 104 | static DEFINE_SPINLOCK(cmci_discover_lock); | ||
| 105 | |||
| 106 | #define CMCI_THRESHOLD 1 | ||
| 107 | |||
| 108 | static int cmci_supported(int *banks) | ||
| 109 | { | ||
| 110 | u64 cap; | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Vendor check is not strictly needed, but the initial | ||
| 114 | * initialization is vendor keyed and this | ||
| 115 | * makes sure none of the backdoors are entered otherwise. | ||
| 116 | */ | ||
| 117 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
| 118 | return 0; | ||
| 119 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) | ||
| 120 | return 0; | ||
| 121 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 122 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | ||
| 123 | return !!(cap & MCG_CMCI_P); | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * The interrupt handler. This is called on every event. | ||
| 128 | * Just call the poller directly to log any events. | ||
| 129 | * This could in theory increase the threshold under high load, | ||
| 130 | * but doesn't for now. | ||
| 131 | */ | ||
| 132 | static void intel_threshold_interrupt(void) | ||
| 133 | { | ||
| 134 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
| 135 | mce_notify_user(); | ||
| 136 | } | ||
| 137 | |||
| 138 | static void print_update(char *type, int *hdr, int num) | ||
| 139 | { | ||
| 140 | if (*hdr == 0) | ||
| 141 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
| 142 | *hdr = 1; | ||
| 143 | printk(KERN_CONT " %s:%d", type, num); | ||
| 144 | } | ||
| 145 | |||
| 146 | /* | ||
| 147 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | ||
| 148 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | ||
| 149 | * banks. | ||
| 150 | */ | ||
| 151 | static void cmci_discover(int banks, int boot) | ||
| 152 | { | ||
| 153 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | ||
| 154 | int hdr = 0; | ||
| 155 | int i; | ||
| 156 | |||
| 157 | spin_lock(&cmci_discover_lock); | ||
| 158 | for (i = 0; i < banks; i++) { | ||
| 159 | u64 val; | ||
| 160 | |||
| 161 | if (test_bit(i, owned)) | ||
| 162 | continue; | ||
| 163 | |||
| 164 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
| 165 | |||
| 166 | /* Already owned by someone else? */ | ||
| 167 | if (val & CMCI_EN) { | ||
| 168 | if (test_and_clear_bit(i, owned) || boot) | ||
| 169 | print_update("SHD", &hdr, i); | ||
| 170 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
| 171 | continue; | ||
| 172 | } | ||
| 173 | |||
| 174 | val |= CMCI_EN | CMCI_THRESHOLD; | ||
| 175 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
| 176 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
| 177 | |||
| 178 | /* Did the enable bit stick? -- the bank supports CMCI */ | ||
| 179 | if (val & CMCI_EN) { | ||
| 180 | if (!test_and_set_bit(i, owned) || boot) | ||
| 181 | print_update("CMCI", &hdr, i); | ||
| 182 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
| 183 | } else { | ||
| 184 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | ||
| 185 | } | ||
| 186 | } | ||
| 187 | spin_unlock(&cmci_discover_lock); | ||
| 188 | if (hdr) | ||
| 189 | printk(KERN_CONT "\n"); | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | ||
| 193 | * Just in case we missed an event during initialization check | ||
| 194 | * all the CMCI owned banks. | ||
| 195 | */ | ||
| 196 | void cmci_recheck(void) | ||
| 197 | { | ||
| 198 | unsigned long flags; | ||
| 199 | int banks; | ||
| 200 | |||
| 201 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | ||
| 202 | return; | ||
| 203 | local_irq_save(flags); | ||
| 204 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
| 205 | local_irq_restore(flags); | ||
| 206 | } | ||
| 207 | |||
| 208 | /* | ||
| 209 | * Disable CMCI on this CPU for all banks it owns when it goes down. | ||
| 210 | * This allows other CPUs to claim the banks on rediscovery. | ||
| 211 | */ | ||
| 212 | void cmci_clear(void) | ||
| 213 | { | ||
| 214 | int i; | ||
| 215 | int banks; | ||
| 216 | u64 val; | ||
| 217 | |||
| 218 | if (!cmci_supported(&banks)) | ||
| 219 | return; | ||
| 220 | spin_lock(&cmci_discover_lock); | ||
| 221 | for (i = 0; i < banks; i++) { | ||
| 222 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | ||
| 223 | continue; | ||
| 224 | /* Disable CMCI */ | ||
| 225 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
| 226 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | ||
| 227 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
| 228 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
| 229 | } | ||
| 230 | spin_unlock(&cmci_discover_lock); | ||
| 231 | } | ||
| 232 | |||
| 233 | /* | ||
| 234 | * After a CPU went down cycle through all the others and rediscover | ||
| 235 | * Must run in process context. | ||
| 236 | */ | ||
| 237 | void cmci_rediscover(int dying) | ||
| 238 | { | ||
| 239 | int banks; | ||
| 240 | int cpu; | ||
| 241 | cpumask_var_t old; | ||
| 242 | |||
| 243 | if (!cmci_supported(&banks)) | ||
| 244 | return; | ||
| 245 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
| 246 | return; | ||
| 247 | cpumask_copy(old, ¤t->cpus_allowed); | ||
| 248 | |||
| 249 | for_each_online_cpu (cpu) { | ||
| 250 | if (cpu == dying) | ||
| 251 | continue; | ||
| 252 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | ||
| 253 | continue; | ||
| 254 | /* Recheck banks in case CPUs don't all have the same */ | ||
| 255 | if (cmci_supported(&banks)) | ||
| 256 | cmci_discover(banks, 0); | ||
| 257 | } | ||
| 258 | |||
| 259 | set_cpus_allowed_ptr(current, old); | ||
| 260 | free_cpumask_var(old); | ||
| 261 | } | ||
| 262 | |||
| 263 | /* | ||
| 264 | * Reenable CMCI on this CPU in case a CPU down failed. | ||
| 265 | */ | ||
| 266 | void cmci_reenable(void) | ||
| 267 | { | ||
| 268 | int banks; | ||
| 269 | if (cmci_supported(&banks)) | ||
| 270 | cmci_discover(banks, 0); | ||
| 271 | } | ||
| 272 | |||
| 273 | static __cpuinit void intel_init_cmci(void) | ||
| 274 | { | ||
| 275 | int banks; | ||
| 276 | |||
| 277 | if (!cmci_supported(&banks)) | ||
| 278 | return; | ||
| 279 | |||
| 280 | mce_threshold_vector = intel_threshold_interrupt; | ||
| 281 | cmci_discover(banks, 1); | ||
| 282 | /* | ||
| 283 | * For CPU #0 this runs with still disabled APIC, but that's | ||
| 284 | * ok because only the vector is set up. We still do another | ||
| 285 | * check for the banks later for CPU #0 just to make sure | ||
| 286 | * to not miss any events. | ||
| 287 | */ | ||
| 288 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | ||
| 289 | cmci_recheck(); | ||
| 290 | } | ||
| 291 | |||
| 87 | void mce_intel_feature_init(struct cpuinfo_x86 *c) | 292 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
| 88 | { | 293 | { |
| 89 | intel_init_thermal(c); | 294 | intel_init_thermal(c); |
| 295 | intel_init_cmci(); | ||
| 90 | } | 296 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 9b60fce09f75..f53bdcbaf382 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
| @@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
| 85 | */ | 85 | */ |
| 86 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 86 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 87 | h = apic_read(APIC_LVTTHMR); | 87 | h = apic_read(APIC_LVTTHMR); |
| 88 | if ((l & (1<<3)) && (h & APIC_DM_SMI)) { | 88 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
| 89 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | 89 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", |
| 90 | cpu); | 90 | cpu); |
| 91 | return; /* -EBUSY */ | 91 | return; /* -EBUSY */ |
| @@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
| 111 | vendor_thermal_interrupt = intel_thermal_interrupt; | 111 | vendor_thermal_interrupt = intel_thermal_interrupt; |
| 112 | 112 | ||
| 113 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 113 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); | 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); |
| 115 | 115 | ||
| 116 | l = apic_read(APIC_LVTTHMR); | 116 | l = apic_read(APIC_LVTTHMR); |
| 117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c new file mode 100644 index 000000000000..23ee9e730f78 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | /* | ||
| 2 | * Common corrected MCE threshold handler code: | ||
| 3 | */ | ||
| 4 | #include <linux/interrupt.h> | ||
| 5 | #include <linux/kernel.h> | ||
| 6 | |||
| 7 | #include <asm/irq_vectors.h> | ||
| 8 | #include <asm/apic.h> | ||
| 9 | #include <asm/idle.h> | ||
| 10 | #include <asm/mce.h> | ||
| 11 | |||
| 12 | static void default_threshold_interrupt(void) | ||
| 13 | { | ||
| 14 | printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n", | ||
| 15 | THRESHOLD_APIC_VECTOR); | ||
| 16 | } | ||
| 17 | |||
| 18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; | ||
| 19 | |||
| 20 | asmlinkage void mce_threshold_interrupt(void) | ||
| 21 | { | ||
| 22 | exit_idle(); | ||
| 23 | irq_enter(); | ||
| 24 | inc_irq_stat(irq_threshold_count); | ||
| 25 | mce_threshold_vector(); | ||
| 26 | irq_exit(); | ||
| 27 | /* Ack only at the end to avoid potential reentry */ | ||
| 28 | ack_APIC_irq(); | ||
| 29 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 9abd48b22674..f6c70a164e32 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | #include <linux/nmi.h> | 19 | #include <linux/nmi.h> |
| 20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
| 21 | 21 | ||
| 22 | #include <asm/apic.h> | 22 | #include <asm/genapic.h> |
| 23 | #include <asm/intel_arch_perfmon.h> | 23 | #include <asm/intel_arch_perfmon.h> |
| 24 | 24 | ||
| 25 | struct nmi_watchdog_ctlblk { | 25 | struct nmi_watchdog_ctlblk { |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 01b1244ef1c0..f93047fed791 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
| @@ -7,15 +7,14 @@ | |||
| 7 | /* | 7 | /* |
| 8 | * Get CPU information for use by the procfs. | 8 | * Get CPU information for use by the procfs. |
| 9 | */ | 9 | */ |
| 10 | #ifdef CONFIG_X86_32 | ||
| 11 | static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, | 10 | static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, |
| 12 | unsigned int cpu) | 11 | unsigned int cpu) |
| 13 | { | 12 | { |
| 14 | #ifdef CONFIG_X86_HT | 13 | #ifdef CONFIG_SMP |
| 15 | if (c->x86_max_cores * smp_num_siblings > 1) { | 14 | if (c->x86_max_cores * smp_num_siblings > 1) { |
| 16 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); | 15 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); |
| 17 | seq_printf(m, "siblings\t: %d\n", | 16 | seq_printf(m, "siblings\t: %d\n", |
| 18 | cpus_weight(per_cpu(cpu_core_map, cpu))); | 17 | cpumask_weight(cpu_sibling_mask(cpu))); |
| 19 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); | 18 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); |
| 20 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | 19 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
| 21 | seq_printf(m, "apicid\t\t: %d\n", c->apicid); | 20 | seq_printf(m, "apicid\t\t: %d\n", c->apicid); |
| @@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, | |||
| 24 | #endif | 23 | #endif |
| 25 | } | 24 | } |
| 26 | 25 | ||
| 26 | #ifdef CONFIG_X86_32 | ||
| 27 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | 27 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) |
| 28 | { | 28 | { |
| 29 | /* | 29 | /* |
| @@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | |||
| 50 | c->wp_works_ok ? "yes" : "no"); | 50 | c->wp_works_ok ? "yes" : "no"); |
| 51 | } | 51 | } |
| 52 | #else | 52 | #else |
| 53 | static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, | ||
| 54 | unsigned int cpu) | ||
| 55 | { | ||
| 56 | #ifdef CONFIG_SMP | ||
| 57 | if (c->x86_max_cores * smp_num_siblings > 1) { | ||
| 58 | seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); | ||
| 59 | seq_printf(m, "siblings\t: %d\n", | ||
| 60 | cpus_weight(per_cpu(cpu_core_map, cpu))); | ||
| 61 | seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); | ||
| 62 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); | ||
| 63 | seq_printf(m, "apicid\t\t: %d\n", c->apicid); | ||
| 64 | seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid); | ||
| 65 | } | ||
| 66 | #endif | ||
| 67 | } | ||
| 68 | |||
| 69 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) | 53 | static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) |
| 70 | { | 54 | { |
| 71 | seq_printf(m, | 55 | seq_printf(m, |
| @@ -159,9 +143,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
| 159 | static void *c_start(struct seq_file *m, loff_t *pos) | 143 | static void *c_start(struct seq_file *m, loff_t *pos) |
| 160 | { | 144 | { |
| 161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ | 145 | if (*pos == 0) /* just in case, cpu 0 is not the first */ |
| 162 | *pos = first_cpu(cpu_online_map); | 146 | *pos = cpumask_first(cpu_online_mask); |
| 163 | else | 147 | else |
| 164 | *pos = next_cpu_nr(*pos - 1, cpu_online_map); | 148 | *pos = cpumask_next(*pos - 1, cpu_online_mask); |
| 165 | if ((*pos) < nr_cpu_ids) | 149 | if ((*pos) < nr_cpu_ids) |
| 166 | return &cpu_data(*pos); | 150 | return &cpu_data(*pos); |
| 167 | return NULL; | 151 | return NULL; |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c689d19e35ab..ff958248e61d 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
| @@ -24,12 +24,10 @@ | |||
| 24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
| 25 | #include <asm/hpet.h> | 25 | #include <asm/hpet.h> |
| 26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
| 27 | #include <asm/smp.h> | 27 | #include <asm/cpu.h> |
| 28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
| 29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
| 30 | 30 | ||
| 31 | #include <mach_ipi.h> | ||
| 32 | |||
| 33 | 31 | ||
| 34 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 32 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
| 35 | 33 | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f8661..87d103ded1c3 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
| @@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo, | |||
| 99 | frame = frame->next_frame; | 99 | frame = frame->next_frame; |
| 100 | bp = (unsigned long) frame; | 100 | bp = (unsigned long) frame; |
| 101 | } else { | 101 | } else { |
| 102 | ops->address(data, addr, bp == 0); | 102 | ops->address(data, addr, 0); |
| 103 | } | 103 | } |
| 104 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | 104 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); |
| 105 | } | 105 | } |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c302d0707048..d35db5993fd6 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
| @@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 106 | const struct stacktrace_ops *ops, void *data) | 106 | const struct stacktrace_ops *ops, void *data) |
| 107 | { | 107 | { |
| 108 | const unsigned cpu = get_cpu(); | 108 | const unsigned cpu = get_cpu(); |
| 109 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | 109 | unsigned long *irq_stack_end = |
| 110 | (unsigned long *)per_cpu(irq_stack_ptr, cpu); | ||
| 110 | unsigned used = 0; | 111 | unsigned used = 0; |
| 111 | struct thread_info *tinfo; | 112 | struct thread_info *tinfo; |
| 112 | int graph = 0; | 113 | int graph = 0; |
| @@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
| 160 | stack = (unsigned long *) estack_end[-2]; | 161 | stack = (unsigned long *) estack_end[-2]; |
| 161 | continue; | 162 | continue; |
| 162 | } | 163 | } |
| 163 | if (irqstack_end) { | 164 | if (irq_stack_end) { |
| 164 | unsigned long *irqstack; | 165 | unsigned long *irq_stack; |
| 165 | irqstack = irqstack_end - | 166 | irq_stack = irq_stack_end - |
| 166 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | 167 | (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); |
| 167 | 168 | ||
| 168 | if (stack >= irqstack && stack < irqstack_end) { | 169 | if (stack >= irq_stack && stack < irq_stack_end) { |
| 169 | if (ops->stack(data, "IRQ") < 0) | 170 | if (ops->stack(data, "IRQ") < 0) |
| 170 | break; | 171 | break; |
| 171 | bp = print_context_stack(tinfo, stack, bp, | 172 | bp = print_context_stack(tinfo, stack, bp, |
| 172 | ops, data, irqstack_end, &graph); | 173 | ops, data, irq_stack_end, &graph); |
| 173 | /* | 174 | /* |
| 174 | * We link to the next stack (which would be | 175 | * We link to the next stack (which would be |
| 175 | * the process stack normally) the last | 176 | * the process stack normally) the last |
| 176 | * pointer (index -1 to end) in the IRQ stack: | 177 | * pointer (index -1 to end) in the IRQ stack: |
| 177 | */ | 178 | */ |
| 178 | stack = (unsigned long *) (irqstack_end[-1]); | 179 | stack = (unsigned long *) (irq_stack_end[-1]); |
| 179 | irqstack_end = NULL; | 180 | irq_stack_end = NULL; |
| 180 | ops->stack(data, "EOI"); | 181 | ops->stack(data, "EOI"); |
| 181 | continue; | 182 | continue; |
| 182 | } | 183 | } |
| @@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 199 | unsigned long *stack; | 200 | unsigned long *stack; |
| 200 | int i; | 201 | int i; |
| 201 | const int cpu = smp_processor_id(); | 202 | const int cpu = smp_processor_id(); |
| 202 | unsigned long *irqstack_end = | 203 | unsigned long *irq_stack_end = |
| 203 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); | 204 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); |
| 204 | unsigned long *irqstack = | 205 | unsigned long *irq_stack = |
| 205 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | 206 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); |
| 206 | 207 | ||
| 207 | /* | 208 | /* |
| 208 | * debugging aid: "show_stack(NULL, NULL);" prints the | 209 | * debugging aid: "show_stack(NULL, NULL);" prints the |
| @@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 218 | 219 | ||
| 219 | stack = sp; | 220 | stack = sp; |
| 220 | for (i = 0; i < kstack_depth_to_print; i++) { | 221 | for (i = 0; i < kstack_depth_to_print; i++) { |
| 221 | if (stack >= irqstack && stack <= irqstack_end) { | 222 | if (stack >= irq_stack && stack <= irq_stack_end) { |
| 222 | if (stack == irqstack_end) { | 223 | if (stack == irq_stack_end) { |
| 223 | stack = (unsigned long *) (irqstack_end[-1]); | 224 | stack = (unsigned long *) (irq_stack_end[-1]); |
| 224 | printk(" <EOI> "); | 225 | printk(" <EOI> "); |
| 225 | } | 226 | } |
| 226 | } else { | 227 | } else { |
| @@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs) | |||
| 241 | int i; | 242 | int i; |
| 242 | unsigned long sp; | 243 | unsigned long sp; |
| 243 | const int cpu = smp_processor_id(); | 244 | const int cpu = smp_processor_id(); |
| 244 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | 245 | struct task_struct *cur = current; |
| 245 | 246 | ||
| 246 | sp = regs->sp; | 247 | sp = regs->sp; |
| 247 | printk("CPU %d ", cpu); | 248 | printk("CPU %d ", cpu); |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index e85826829cf2..508bec1cee27 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
| @@ -858,6 +858,9 @@ void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | |||
| 858 | */ | 858 | */ |
| 859 | void __init reserve_early(u64 start, u64 end, char *name) | 859 | void __init reserve_early(u64 start, u64 end, char *name) |
| 860 | { | 860 | { |
| 861 | if (start >= end) | ||
| 862 | return; | ||
| 863 | |||
| 861 | drop_overlaps_that_are_ok(start, end); | 864 | drop_overlaps_that_are_ok(start, end); |
| 862 | __reserve_early(start, end, name, 0); | 865 | __reserve_early(start, end, name, 0); |
| 863 | } | 866 | } |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 504ad198e4ad..639ad98238a2 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
| @@ -13,8 +13,8 @@ | |||
| 13 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
| 14 | #include <xen/hvc-console.h> | 14 | #include <xen/hvc-console.h> |
| 15 | #include <asm/pci-direct.h> | 15 | #include <asm/pci-direct.h> |
| 16 | #include <asm/pgtable.h> | ||
| 17 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
| 17 | #include <asm/pgtable.h> | ||
| 18 | #include <linux/usb/ehci_def.h> | 18 | #include <linux/usb/ehci_def.h> |
| 19 | 19 | ||
| 20 | /* Simple VGA output */ | 20 | /* Simple VGA output */ |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index eb1ef3b67dd5..1736acc4d7aa 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
| @@ -366,10 +366,12 @@ void __init efi_init(void) | |||
| 366 | SMBIOS_TABLE_GUID)) { | 366 | SMBIOS_TABLE_GUID)) { |
| 367 | efi.smbios = config_tables[i].table; | 367 | efi.smbios = config_tables[i].table; |
| 368 | printk(" SMBIOS=0x%lx ", config_tables[i].table); | 368 | printk(" SMBIOS=0x%lx ", config_tables[i].table); |
| 369 | #ifdef CONFIG_X86_UV | ||
| 369 | } else if (!efi_guidcmp(config_tables[i].guid, | 370 | } else if (!efi_guidcmp(config_tables[i].guid, |
| 370 | UV_SYSTEM_TABLE_GUID)) { | 371 | UV_SYSTEM_TABLE_GUID)) { |
| 371 | efi.uv_systab = config_tables[i].table; | 372 | efi.uv_systab = config_tables[i].table; |
| 372 | printk(" UVsystab=0x%lx ", config_tables[i].table); | 373 | printk(" UVsystab=0x%lx ", config_tables[i].table); |
| 374 | #endif | ||
| 373 | } else if (!efi_guidcmp(config_tables[i].guid, | 375 | } else if (!efi_guidcmp(config_tables[i].guid, |
| 374 | HCDP_TABLE_GUID)) { | 376 | HCDP_TABLE_GUID)) { |
| 375 | efi.hcdp = config_tables[i].table; | 377 | efi.hcdp = config_tables[i].table; |
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index cb783b92c50c..22c3b7828c50 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <asm/proto.h> | 36 | #include <asm/proto.h> |
| 37 | #include <asm/efi.h> | 37 | #include <asm/efi.h> |
| 38 | #include <asm/cacheflush.h> | 38 | #include <asm/cacheflush.h> |
| 39 | #include <asm/fixmap.h> | ||
| 39 | 40 | ||
| 40 | static pgd_t save_pgd __initdata; | 41 | static pgd_t save_pgd __initdata; |
| 41 | static unsigned long efi_flags __initdata; | 42 | static unsigned long efi_flags __initdata; |
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S index ef00bb77d7e4..fbe66e626c09 100644 --- a/arch/x86/kernel/efi_stub_32.S +++ b/arch/x86/kernel/efi_stub_32.S | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
| 9 | #include <asm/page.h> | 9 | #include <asm/page_types.h> |
| 10 | 10 | ||
| 11 | /* | 11 | /* |
| 12 | * efi_call_phys(void *, ...) is a function with variable parameters. | 12 | * efi_call_phys(void *, ...) is a function with variable parameters. |
| @@ -113,6 +113,7 @@ ENTRY(efi_call_phys) | |||
| 113 | movl (%edx), %ecx | 113 | movl (%edx), %ecx |
| 114 | pushl %ecx | 114 | pushl %ecx |
| 115 | ret | 115 | ret |
| 116 | ENDPROC(efi_call_phys) | ||
| 116 | .previous | 117 | .previous |
| 117 | 118 | ||
| 118 | .data | 119 | .data |
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S index 99b47d48c9f4..4c07ccab8146 100644 --- a/arch/x86/kernel/efi_stub_64.S +++ b/arch/x86/kernel/efi_stub_64.S | |||
| @@ -41,6 +41,7 @@ ENTRY(efi_call0) | |||
| 41 | addq $32, %rsp | 41 | addq $32, %rsp |
| 42 | RESTORE_XMM | 42 | RESTORE_XMM |
| 43 | ret | 43 | ret |
| 44 | ENDPROC(efi_call0) | ||
| 44 | 45 | ||
| 45 | ENTRY(efi_call1) | 46 | ENTRY(efi_call1) |
| 46 | SAVE_XMM | 47 | SAVE_XMM |
| @@ -50,6 +51,7 @@ ENTRY(efi_call1) | |||
| 50 | addq $32, %rsp | 51 | addq $32, %rsp |
| 51 | RESTORE_XMM | 52 | RESTORE_XMM |
| 52 | ret | 53 | ret |
| 54 | ENDPROC(efi_call1) | ||
| 53 | 55 | ||
| 54 | ENTRY(efi_call2) | 56 | ENTRY(efi_call2) |
| 55 | SAVE_XMM | 57 | SAVE_XMM |
| @@ -59,6 +61,7 @@ ENTRY(efi_call2) | |||
| 59 | addq $32, %rsp | 61 | addq $32, %rsp |
| 60 | RESTORE_XMM | 62 | RESTORE_XMM |
| 61 | ret | 63 | ret |
| 64 | ENDPROC(efi_call2) | ||
| 62 | 65 | ||
| 63 | ENTRY(efi_call3) | 66 | ENTRY(efi_call3) |
| 64 | SAVE_XMM | 67 | SAVE_XMM |
| @@ -69,6 +72,7 @@ ENTRY(efi_call3) | |||
| 69 | addq $32, %rsp | 72 | addq $32, %rsp |
| 70 | RESTORE_XMM | 73 | RESTORE_XMM |
| 71 | ret | 74 | ret |
| 75 | ENDPROC(efi_call3) | ||
| 72 | 76 | ||
| 73 | ENTRY(efi_call4) | 77 | ENTRY(efi_call4) |
| 74 | SAVE_XMM | 78 | SAVE_XMM |
| @@ -80,6 +84,7 @@ ENTRY(efi_call4) | |||
| 80 | addq $32, %rsp | 84 | addq $32, %rsp |
| 81 | RESTORE_XMM | 85 | RESTORE_XMM |
| 82 | ret | 86 | ret |
| 87 | ENDPROC(efi_call4) | ||
| 83 | 88 | ||
| 84 | ENTRY(efi_call5) | 89 | ENTRY(efi_call5) |
| 85 | SAVE_XMM | 90 | SAVE_XMM |
| @@ -92,6 +97,7 @@ ENTRY(efi_call5) | |||
| 92 | addq $48, %rsp | 97 | addq $48, %rsp |
| 93 | RESTORE_XMM | 98 | RESTORE_XMM |
| 94 | ret | 99 | ret |
| 100 | ENDPROC(efi_call5) | ||
| 95 | 101 | ||
| 96 | ENTRY(efi_call6) | 102 | ENTRY(efi_call6) |
| 97 | SAVE_XMM | 103 | SAVE_XMM |
| @@ -107,3 +113,4 @@ ENTRY(efi_call6) | |||
| 107 | addq $48, %rsp | 113 | addq $48, %rsp |
| 108 | RESTORE_XMM | 114 | RESTORE_XMM |
| 109 | ret | 115 | ret |
| 116 | ENDPROC(efi_call6) | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46469029e9d3..899e8938e79f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -30,12 +30,13 @@ | |||
| 30 | * 1C(%esp) - %ds | 30 | * 1C(%esp) - %ds |
| 31 | * 20(%esp) - %es | 31 | * 20(%esp) - %es |
| 32 | * 24(%esp) - %fs | 32 | * 24(%esp) - %fs |
| 33 | * 28(%esp) - orig_eax | 33 | * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS |
| 34 | * 2C(%esp) - %eip | 34 | * 2C(%esp) - orig_eax |
| 35 | * 30(%esp) - %cs | 35 | * 30(%esp) - %eip |
| 36 | * 34(%esp) - %eflags | 36 | * 34(%esp) - %cs |
| 37 | * 38(%esp) - %oldesp | 37 | * 38(%esp) - %eflags |
| 38 | * 3C(%esp) - %oldss | 38 | * 3C(%esp) - %oldesp |
| 39 | * 40(%esp) - %oldss | ||
| 39 | * | 40 | * |
| 40 | * "current" is in register %ebx during any slow entries. | 41 | * "current" is in register %ebx during any slow entries. |
| 41 | */ | 42 | */ |
| @@ -46,7 +47,7 @@ | |||
| 46 | #include <asm/errno.h> | 47 | #include <asm/errno.h> |
| 47 | #include <asm/segment.h> | 48 | #include <asm/segment.h> |
| 48 | #include <asm/smp.h> | 49 | #include <asm/smp.h> |
| 49 | #include <asm/page.h> | 50 | #include <asm/page_types.h> |
| 50 | #include <asm/desc.h> | 51 | #include <asm/desc.h> |
| 51 | #include <asm/percpu.h> | 52 | #include <asm/percpu.h> |
| 52 | #include <asm/dwarf2.h> | 53 | #include <asm/dwarf2.h> |
| @@ -101,121 +102,221 @@ | |||
| 101 | #define resume_userspace_sig resume_userspace | 102 | #define resume_userspace_sig resume_userspace |
| 102 | #endif | 103 | #endif |
| 103 | 104 | ||
| 104 | #define SAVE_ALL \ | 105 | /* |
| 105 | cld; \ | 106 | * User gs save/restore |
| 106 | pushl %fs; \ | 107 | * |
| 107 | CFI_ADJUST_CFA_OFFSET 4;\ | 108 | * %gs is used for userland TLS and kernel only uses it for stack |
| 108 | /*CFI_REL_OFFSET fs, 0;*/\ | 109 | * canary which is required to be at %gs:20 by gcc. Read the comment |
| 109 | pushl %es; \ | 110 | * at the top of stackprotector.h for more info. |
| 110 | CFI_ADJUST_CFA_OFFSET 4;\ | 111 | * |
| 111 | /*CFI_REL_OFFSET es, 0;*/\ | 112 | * Local labels 98 and 99 are used. |
| 112 | pushl %ds; \ | 113 | */ |
| 113 | CFI_ADJUST_CFA_OFFSET 4;\ | 114 | #ifdef CONFIG_X86_32_LAZY_GS |
| 114 | /*CFI_REL_OFFSET ds, 0;*/\ | 115 | |
| 115 | pushl %eax; \ | 116 | /* unfortunately push/pop can't be no-op */ |
| 116 | CFI_ADJUST_CFA_OFFSET 4;\ | 117 | .macro PUSH_GS |
| 117 | CFI_REL_OFFSET eax, 0;\ | 118 | pushl $0 |
| 118 | pushl %ebp; \ | 119 | CFI_ADJUST_CFA_OFFSET 4 |
| 119 | CFI_ADJUST_CFA_OFFSET 4;\ | 120 | .endm |
| 120 | CFI_REL_OFFSET ebp, 0;\ | 121 | .macro POP_GS pop=0 |
| 121 | pushl %edi; \ | 122 | addl $(4 + \pop), %esp |
| 122 | CFI_ADJUST_CFA_OFFSET 4;\ | 123 | CFI_ADJUST_CFA_OFFSET -(4 + \pop) |
| 123 | CFI_REL_OFFSET edi, 0;\ | 124 | .endm |
| 124 | pushl %esi; \ | 125 | .macro POP_GS_EX |
| 125 | CFI_ADJUST_CFA_OFFSET 4;\ | 126 | .endm |
| 126 | CFI_REL_OFFSET esi, 0;\ | 127 | |
| 127 | pushl %edx; \ | 128 | /* all the rest are no-op */ |
| 128 | CFI_ADJUST_CFA_OFFSET 4;\ | 129 | .macro PTGS_TO_GS |
| 129 | CFI_REL_OFFSET edx, 0;\ | 130 | .endm |
| 130 | pushl %ecx; \ | 131 | .macro PTGS_TO_GS_EX |
| 131 | CFI_ADJUST_CFA_OFFSET 4;\ | 132 | .endm |
| 132 | CFI_REL_OFFSET ecx, 0;\ | 133 | .macro GS_TO_REG reg |
| 133 | pushl %ebx; \ | 134 | .endm |
| 134 | CFI_ADJUST_CFA_OFFSET 4;\ | 135 | .macro REG_TO_PTGS reg |
| 135 | CFI_REL_OFFSET ebx, 0;\ | 136 | .endm |
| 136 | movl $(__USER_DS), %edx; \ | 137 | .macro SET_KERNEL_GS reg |
| 137 | movl %edx, %ds; \ | 138 | .endm |
| 138 | movl %edx, %es; \ | 139 | |
| 139 | movl $(__KERNEL_PERCPU), %edx; \ | 140 | #else /* CONFIG_X86_32_LAZY_GS */ |
| 141 | |||
| 142 | .macro PUSH_GS | ||
| 143 | pushl %gs | ||
| 144 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 145 | /*CFI_REL_OFFSET gs, 0*/ | ||
| 146 | .endm | ||
| 147 | |||
| 148 | .macro POP_GS pop=0 | ||
| 149 | 98: popl %gs | ||
| 150 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 151 | /*CFI_RESTORE gs*/ | ||
| 152 | .if \pop <> 0 | ||
| 153 | add $\pop, %esp | ||
| 154 | CFI_ADJUST_CFA_OFFSET -\pop | ||
| 155 | .endif | ||
| 156 | .endm | ||
| 157 | .macro POP_GS_EX | ||
| 158 | .pushsection .fixup, "ax" | ||
| 159 | 99: movl $0, (%esp) | ||
| 160 | jmp 98b | ||
| 161 | .section __ex_table, "a" | ||
| 162 | .align 4 | ||
| 163 | .long 98b, 99b | ||
| 164 | .popsection | ||
| 165 | .endm | ||
| 166 | |||
| 167 | .macro PTGS_TO_GS | ||
| 168 | 98: mov PT_GS(%esp), %gs | ||
| 169 | .endm | ||
| 170 | .macro PTGS_TO_GS_EX | ||
| 171 | .pushsection .fixup, "ax" | ||
| 172 | 99: movl $0, PT_GS(%esp) | ||
| 173 | jmp 98b | ||
| 174 | .section __ex_table, "a" | ||
| 175 | .align 4 | ||
| 176 | .long 98b, 99b | ||
| 177 | .popsection | ||
| 178 | .endm | ||
| 179 | |||
| 180 | .macro GS_TO_REG reg | ||
| 181 | movl %gs, \reg | ||
| 182 | /*CFI_REGISTER gs, \reg*/ | ||
| 183 | .endm | ||
| 184 | .macro REG_TO_PTGS reg | ||
| 185 | movl \reg, PT_GS(%esp) | ||
| 186 | /*CFI_REL_OFFSET gs, PT_GS*/ | ||
| 187 | .endm | ||
| 188 | .macro SET_KERNEL_GS reg | ||
| 189 | movl $(__KERNEL_STACK_CANARY), \reg | ||
| 190 | movl \reg, %gs | ||
| 191 | .endm | ||
| 192 | |||
| 193 | #endif /* CONFIG_X86_32_LAZY_GS */ | ||
| 194 | |||
| 195 | .macro SAVE_ALL | ||
| 196 | cld | ||
| 197 | PUSH_GS | ||
| 198 | pushl %fs | ||
| 199 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 200 | /*CFI_REL_OFFSET fs, 0;*/ | ||
| 201 | pushl %es | ||
| 202 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 203 | /*CFI_REL_OFFSET es, 0;*/ | ||
| 204 | pushl %ds | ||
| 205 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 206 | /*CFI_REL_OFFSET ds, 0;*/ | ||
| 207 | pushl %eax | ||
| 208 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 209 | CFI_REL_OFFSET eax, 0 | ||
| 210 | pushl %ebp | ||
| 211 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 212 | CFI_REL_OFFSET ebp, 0 | ||
| 213 | pushl %edi | ||
| 214 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 215 | CFI_REL_OFFSET edi, 0 | ||
| 216 | pushl %esi | ||
| 217 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 218 | CFI_REL_OFFSET esi, 0 | ||
| 219 | pushl %edx | ||
| 220 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 221 | CFI_REL_OFFSET edx, 0 | ||
| 222 | pushl %ecx | ||
| 223 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 224 | CFI_REL_OFFSET ecx, 0 | ||
| 225 | pushl %ebx | ||
| 226 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 227 | CFI_REL_OFFSET ebx, 0 | ||
| 228 | movl $(__USER_DS), %edx | ||
| 229 | movl %edx, %ds | ||
| 230 | movl %edx, %es | ||
| 231 | movl $(__KERNEL_PERCPU), %edx | ||
| 140 | movl %edx, %fs | 232 | movl %edx, %fs |
| 233 | SET_KERNEL_GS %edx | ||
| 234 | .endm | ||
| 141 | 235 | ||
| 142 | #define RESTORE_INT_REGS \ | 236 | .macro RESTORE_INT_REGS |
| 143 | popl %ebx; \ | 237 | popl %ebx |
| 144 | CFI_ADJUST_CFA_OFFSET -4;\ | 238 | CFI_ADJUST_CFA_OFFSET -4 |
| 145 | CFI_RESTORE ebx;\ | 239 | CFI_RESTORE ebx |
| 146 | popl %ecx; \ | 240 | popl %ecx |
| 147 | CFI_ADJUST_CFA_OFFSET -4;\ | 241 | CFI_ADJUST_CFA_OFFSET -4 |
| 148 | CFI_RESTORE ecx;\ | 242 | CFI_RESTORE ecx |
| 149 | popl %edx; \ | 243 | popl %edx |
| 150 | CFI_ADJUST_CFA_OFFSET -4;\ | 244 | CFI_ADJUST_CFA_OFFSET -4 |
| 151 | CFI_RESTORE edx;\ | 245 | CFI_RESTORE edx |
| 152 | popl %esi; \ | 246 | popl %esi |
| 153 | CFI_ADJUST_CFA_OFFSET -4;\ | 247 | CFI_ADJUST_CFA_OFFSET -4 |
| 154 | CFI_RESTORE esi;\ | 248 | CFI_RESTORE esi |
| 155 | popl %edi; \ | 249 | popl %edi |
| 156 | CFI_ADJUST_CFA_OFFSET -4;\ | 250 | CFI_ADJUST_CFA_OFFSET -4 |
| 157 | CFI_RESTORE edi;\ | 251 | CFI_RESTORE edi |
| 158 | popl %ebp; \ | 252 | popl %ebp |
| 159 | CFI_ADJUST_CFA_OFFSET -4;\ | 253 | CFI_ADJUST_CFA_OFFSET -4 |
| 160 | CFI_RESTORE ebp;\ | 254 | CFI_RESTORE ebp |
| 161 | popl %eax; \ | 255 | popl %eax |
| 162 | CFI_ADJUST_CFA_OFFSET -4;\ | 256 | CFI_ADJUST_CFA_OFFSET -4 |
| 163 | CFI_RESTORE eax | 257 | CFI_RESTORE eax |
| 258 | .endm | ||
| 164 | 259 | ||
| 165 | #define RESTORE_REGS \ | 260 | .macro RESTORE_REGS pop=0 |
| 166 | RESTORE_INT_REGS; \ | 261 | RESTORE_INT_REGS |
| 167 | 1: popl %ds; \ | 262 | 1: popl %ds |
| 168 | CFI_ADJUST_CFA_OFFSET -4;\ | 263 | CFI_ADJUST_CFA_OFFSET -4 |
| 169 | /*CFI_RESTORE ds;*/\ | 264 | /*CFI_RESTORE ds;*/ |
| 170 | 2: popl %es; \ | 265 | 2: popl %es |
| 171 | CFI_ADJUST_CFA_OFFSET -4;\ | 266 | CFI_ADJUST_CFA_OFFSET -4 |
| 172 | /*CFI_RESTORE es;*/\ | 267 | /*CFI_RESTORE es;*/ |
| 173 | 3: popl %fs; \ | 268 | 3: popl %fs |
| 174 | CFI_ADJUST_CFA_OFFSET -4;\ | 269 | CFI_ADJUST_CFA_OFFSET -4 |
| 175 | /*CFI_RESTORE fs;*/\ | 270 | /*CFI_RESTORE fs;*/ |
| 176 | .pushsection .fixup,"ax"; \ | 271 | POP_GS \pop |
| 177 | 4: movl $0,(%esp); \ | 272 | .pushsection .fixup, "ax" |
| 178 | jmp 1b; \ | 273 | 4: movl $0, (%esp) |
| 179 | 5: movl $0,(%esp); \ | 274 | jmp 1b |
| 180 | jmp 2b; \ | 275 | 5: movl $0, (%esp) |
| 181 | 6: movl $0,(%esp); \ | 276 | jmp 2b |
| 182 | jmp 3b; \ | 277 | 6: movl $0, (%esp) |
| 183 | .section __ex_table,"a";\ | 278 | jmp 3b |
| 184 | .align 4; \ | 279 | .section __ex_table, "a" |
| 185 | .long 1b,4b; \ | 280 | .align 4 |
| 186 | .long 2b,5b; \ | 281 | .long 1b, 4b |
| 187 | .long 3b,6b; \ | 282 | .long 2b, 5b |
| 283 | .long 3b, 6b | ||
| 188 | .popsection | 284 | .popsection |
| 285 | POP_GS_EX | ||
| 286 | .endm | ||
| 189 | 287 | ||
| 190 | #define RING0_INT_FRAME \ | 288 | .macro RING0_INT_FRAME |
| 191 | CFI_STARTPROC simple;\ | 289 | CFI_STARTPROC simple |
| 192 | CFI_SIGNAL_FRAME;\ | 290 | CFI_SIGNAL_FRAME |
| 193 | CFI_DEF_CFA esp, 3*4;\ | 291 | CFI_DEF_CFA esp, 3*4 |
| 194 | /*CFI_OFFSET cs, -2*4;*/\ | 292 | /*CFI_OFFSET cs, -2*4;*/ |
| 195 | CFI_OFFSET eip, -3*4 | 293 | CFI_OFFSET eip, -3*4 |
| 294 | .endm | ||
| 196 | 295 | ||
| 197 | #define RING0_EC_FRAME \ | 296 | .macro RING0_EC_FRAME |
| 198 | CFI_STARTPROC simple;\ | 297 | CFI_STARTPROC simple |
| 199 | CFI_SIGNAL_FRAME;\ | 298 | CFI_SIGNAL_FRAME |
| 200 | CFI_DEF_CFA esp, 4*4;\ | 299 | CFI_DEF_CFA esp, 4*4 |
| 201 | /*CFI_OFFSET cs, -2*4;*/\ | 300 | /*CFI_OFFSET cs, -2*4;*/ |
| 202 | CFI_OFFSET eip, -3*4 | 301 | CFI_OFFSET eip, -3*4 |
| 302 | .endm | ||
| 203 | 303 | ||
| 204 | #define RING0_PTREGS_FRAME \ | 304 | .macro RING0_PTREGS_FRAME |
| 205 | CFI_STARTPROC simple;\ | 305 | CFI_STARTPROC simple |
| 206 | CFI_SIGNAL_FRAME;\ | 306 | CFI_SIGNAL_FRAME |
| 207 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ | 307 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX |
| 208 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ | 308 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ |
| 209 | CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ | 309 | CFI_OFFSET eip, PT_EIP-PT_OLDESP |
| 210 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ | 310 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ |
| 211 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ | 311 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ |
| 212 | CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ | 312 | CFI_OFFSET eax, PT_EAX-PT_OLDESP |
| 213 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ | 313 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP |
| 214 | CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ | 314 | CFI_OFFSET edi, PT_EDI-PT_OLDESP |
| 215 | CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ | 315 | CFI_OFFSET esi, PT_ESI-PT_OLDESP |
| 216 | CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ | 316 | CFI_OFFSET edx, PT_EDX-PT_OLDESP |
| 217 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ | 317 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP |
| 218 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP | 318 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP |
| 319 | .endm | ||
| 219 | 320 | ||
| 220 | ENTRY(ret_from_fork) | 321 | ENTRY(ret_from_fork) |
| 221 | CFI_STARTPROC | 322 | CFI_STARTPROC |
| @@ -362,6 +463,7 @@ sysenter_exit: | |||
| 362 | xorl %ebp,%ebp | 463 | xorl %ebp,%ebp |
| 363 | TRACE_IRQS_ON | 464 | TRACE_IRQS_ON |
| 364 | 1: mov PT_FS(%esp), %fs | 465 | 1: mov PT_FS(%esp), %fs |
| 466 | PTGS_TO_GS | ||
| 365 | ENABLE_INTERRUPTS_SYSEXIT | 467 | ENABLE_INTERRUPTS_SYSEXIT |
| 366 | 468 | ||
| 367 | #ifdef CONFIG_AUDITSYSCALL | 469 | #ifdef CONFIG_AUDITSYSCALL |
| @@ -410,6 +512,7 @@ sysexit_audit: | |||
| 410 | .align 4 | 512 | .align 4 |
| 411 | .long 1b,2b | 513 | .long 1b,2b |
| 412 | .popsection | 514 | .popsection |
| 515 | PTGS_TO_GS_EX | ||
| 413 | ENDPROC(ia32_sysenter_target) | 516 | ENDPROC(ia32_sysenter_target) |
| 414 | 517 | ||
| 415 | # system call handler stub | 518 | # system call handler stub |
| @@ -452,8 +555,7 @@ restore_all: | |||
| 452 | restore_nocheck: | 555 | restore_nocheck: |
| 453 | TRACE_IRQS_IRET | 556 | TRACE_IRQS_IRET |
| 454 | restore_nocheck_notrace: | 557 | restore_nocheck_notrace: |
| 455 | RESTORE_REGS | 558 | RESTORE_REGS 4 # skip orig_eax/error_code |
| 456 | addl $4, %esp # skip orig_eax/error_code | ||
| 457 | CFI_ADJUST_CFA_OFFSET -4 | 559 | CFI_ADJUST_CFA_OFFSET -4 |
| 458 | irq_return: | 560 | irq_return: |
| 459 | INTERRUPT_RETURN | 561 | INTERRUPT_RETURN |
| @@ -595,28 +697,50 @@ syscall_badsys: | |||
| 595 | END(syscall_badsys) | 697 | END(syscall_badsys) |
| 596 | CFI_ENDPROC | 698 | CFI_ENDPROC |
| 597 | 699 | ||
| 598 | #define FIXUP_ESPFIX_STACK \ | 700 | /* |
| 599 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 701 | * System calls that need a pt_regs pointer. |
| 600 | PER_CPU(gdt_page, %ebx); \ | 702 | */ |
| 601 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 703 | #define PTREGSCALL(name) \ |
| 602 | addl %esp, %eax; \ | 704 | ALIGN; \ |
| 603 | pushl $__KERNEL_DS; \ | 705 | ptregs_##name: \ |
| 604 | CFI_ADJUST_CFA_OFFSET 4; \ | 706 | leal 4(%esp),%eax; \ |
| 605 | pushl %eax; \ | 707 | jmp sys_##name; |
| 606 | CFI_ADJUST_CFA_OFFSET 4; \ | 708 | |
| 607 | lss (%esp), %esp; \ | 709 | PTREGSCALL(iopl) |
| 608 | CFI_ADJUST_CFA_OFFSET -8; | 710 | PTREGSCALL(fork) |
| 609 | #define UNWIND_ESPFIX_STACK \ | 711 | PTREGSCALL(clone) |
| 610 | movl %ss, %eax; \ | 712 | PTREGSCALL(vfork) |
| 611 | /* see if on espfix stack */ \ | 713 | PTREGSCALL(execve) |
| 612 | cmpw $__ESPFIX_SS, %ax; \ | 714 | PTREGSCALL(sigaltstack) |
| 613 | jne 27f; \ | 715 | PTREGSCALL(sigreturn) |
| 614 | movl $__KERNEL_DS, %eax; \ | 716 | PTREGSCALL(rt_sigreturn) |
| 615 | movl %eax, %ds; \ | 717 | PTREGSCALL(vm86) |
| 616 | movl %eax, %es; \ | 718 | PTREGSCALL(vm86old) |
| 617 | /* switch to normal stack */ \ | 719 | |
| 618 | FIXUP_ESPFIX_STACK; \ | 720 | .macro FIXUP_ESPFIX_STACK |
| 619 | 27:; | 721 | /* since we are on a wrong stack, we cant make it a C code :( */ |
| 722 | PER_CPU(gdt_page, %ebx) | ||
| 723 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | ||
| 724 | addl %esp, %eax | ||
| 725 | pushl $__KERNEL_DS | ||
| 726 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 727 | pushl %eax | ||
| 728 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 729 | lss (%esp), %esp | ||
| 730 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 731 | .endm | ||
| 732 | .macro UNWIND_ESPFIX_STACK | ||
| 733 | movl %ss, %eax | ||
| 734 | /* see if on espfix stack */ | ||
| 735 | cmpw $__ESPFIX_SS, %ax | ||
| 736 | jne 27f | ||
| 737 | movl $__KERNEL_DS, %eax | ||
| 738 | movl %eax, %ds | ||
| 739 | movl %eax, %es | ||
| 740 | /* switch to normal stack */ | ||
| 741 | FIXUP_ESPFIX_STACK | ||
| 742 | 27: | ||
| 743 | .endm | ||
| 620 | 744 | ||
| 621 | /* | 745 | /* |
| 622 | * Build the entry stubs and pointer table with some assembler magic. | 746 | * Build the entry stubs and pointer table with some assembler magic. |
| @@ -672,7 +796,7 @@ common_interrupt: | |||
| 672 | ENDPROC(common_interrupt) | 796 | ENDPROC(common_interrupt) |
| 673 | CFI_ENDPROC | 797 | CFI_ENDPROC |
| 674 | 798 | ||
| 675 | #define BUILD_INTERRUPT(name, nr) \ | 799 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
| 676 | ENTRY(name) \ | 800 | ENTRY(name) \ |
| 677 | RING0_INT_FRAME; \ | 801 | RING0_INT_FRAME; \ |
| 678 | pushl $~(nr); \ | 802 | pushl $~(nr); \ |
| @@ -680,13 +804,15 @@ ENTRY(name) \ | |||
| 680 | SAVE_ALL; \ | 804 | SAVE_ALL; \ |
| 681 | TRACE_IRQS_OFF \ | 805 | TRACE_IRQS_OFF \ |
| 682 | movl %esp,%eax; \ | 806 | movl %esp,%eax; \ |
| 683 | call smp_##name; \ | 807 | call fn; \ |
| 684 | jmp ret_from_intr; \ | 808 | jmp ret_from_intr; \ |
| 685 | CFI_ENDPROC; \ | 809 | CFI_ENDPROC; \ |
| 686 | ENDPROC(name) | 810 | ENDPROC(name) |
| 687 | 811 | ||
| 812 | #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) | ||
| 813 | |||
| 688 | /* The include is where all of the SMP etc. interrupts come from */ | 814 | /* The include is where all of the SMP etc. interrupts come from */ |
| 689 | #include "entry_arch.h" | 815 | #include <asm/entry_arch.h> |
| 690 | 816 | ||
| 691 | ENTRY(coprocessor_error) | 817 | ENTRY(coprocessor_error) |
| 692 | RING0_INT_FRAME | 818 | RING0_INT_FRAME |
| @@ -1068,7 +1194,10 @@ ENTRY(page_fault) | |||
| 1068 | CFI_ADJUST_CFA_OFFSET 4 | 1194 | CFI_ADJUST_CFA_OFFSET 4 |
| 1069 | ALIGN | 1195 | ALIGN |
| 1070 | error_code: | 1196 | error_code: |
| 1071 | /* the function address is in %fs's slot on the stack */ | 1197 | /* the function address is in %gs's slot on the stack */ |
| 1198 | pushl %fs | ||
| 1199 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1200 | /*CFI_REL_OFFSET fs, 0*/ | ||
| 1072 | pushl %es | 1201 | pushl %es |
| 1073 | CFI_ADJUST_CFA_OFFSET 4 | 1202 | CFI_ADJUST_CFA_OFFSET 4 |
| 1074 | /*CFI_REL_OFFSET es, 0*/ | 1203 | /*CFI_REL_OFFSET es, 0*/ |
| @@ -1097,20 +1226,15 @@ error_code: | |||
| 1097 | CFI_ADJUST_CFA_OFFSET 4 | 1226 | CFI_ADJUST_CFA_OFFSET 4 |
| 1098 | CFI_REL_OFFSET ebx, 0 | 1227 | CFI_REL_OFFSET ebx, 0 |
| 1099 | cld | 1228 | cld |
| 1100 | pushl %fs | ||
| 1101 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1102 | /*CFI_REL_OFFSET fs, 0*/ | ||
| 1103 | movl $(__KERNEL_PERCPU), %ecx | 1229 | movl $(__KERNEL_PERCPU), %ecx |
| 1104 | movl %ecx, %fs | 1230 | movl %ecx, %fs |
| 1105 | UNWIND_ESPFIX_STACK | 1231 | UNWIND_ESPFIX_STACK |
| 1106 | popl %ecx | 1232 | GS_TO_REG %ecx |
| 1107 | CFI_ADJUST_CFA_OFFSET -4 | 1233 | movl PT_GS(%esp), %edi # get the function address |
| 1108 | /*CFI_REGISTER es, ecx*/ | ||
| 1109 | movl PT_FS(%esp), %edi # get the function address | ||
| 1110 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 1234 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
| 1111 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | 1235 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
| 1112 | mov %ecx, PT_FS(%esp) | 1236 | REG_TO_PTGS %ecx |
| 1113 | /*CFI_REL_OFFSET fs, ES*/ | 1237 | SET_KERNEL_GS %ecx |
| 1114 | movl $(__USER_DS), %ecx | 1238 | movl $(__USER_DS), %ecx |
| 1115 | movl %ecx, %ds | 1239 | movl %ecx, %ds |
| 1116 | movl %ecx, %es | 1240 | movl %ecx, %es |
| @@ -1134,26 +1258,27 @@ END(page_fault) | |||
| 1134 | * by hand onto the new stack - while updating the return eip past | 1258 | * by hand onto the new stack - while updating the return eip past |
| 1135 | * the instruction that would have done it for sysenter. | 1259 | * the instruction that would have done it for sysenter. |
| 1136 | */ | 1260 | */ |
| 1137 | #define FIX_STACK(offset, ok, label) \ | 1261 | .macro FIX_STACK offset ok label |
| 1138 | cmpw $__KERNEL_CS,4(%esp); \ | 1262 | cmpw $__KERNEL_CS, 4(%esp) |
| 1139 | jne ok; \ | 1263 | jne \ok |
| 1140 | label: \ | 1264 | \label: |
| 1141 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | 1265 | movl TSS_sysenter_sp0 + \offset(%esp), %esp |
| 1142 | CFI_DEF_CFA esp, 0; \ | 1266 | CFI_DEF_CFA esp, 0 |
| 1143 | CFI_UNDEFINED eip; \ | 1267 | CFI_UNDEFINED eip |
| 1144 | pushfl; \ | 1268 | pushfl |
| 1145 | CFI_ADJUST_CFA_OFFSET 4; \ | 1269 | CFI_ADJUST_CFA_OFFSET 4 |
| 1146 | pushl $__KERNEL_CS; \ | 1270 | pushl $__KERNEL_CS |
| 1147 | CFI_ADJUST_CFA_OFFSET 4; \ | 1271 | CFI_ADJUST_CFA_OFFSET 4 |
| 1148 | pushl $sysenter_past_esp; \ | 1272 | pushl $sysenter_past_esp |
| 1149 | CFI_ADJUST_CFA_OFFSET 4; \ | 1273 | CFI_ADJUST_CFA_OFFSET 4 |
| 1150 | CFI_REL_OFFSET eip, 0 | 1274 | CFI_REL_OFFSET eip, 0 |
| 1275 | .endm | ||
| 1151 | 1276 | ||
| 1152 | ENTRY(debug) | 1277 | ENTRY(debug) |
| 1153 | RING0_INT_FRAME | 1278 | RING0_INT_FRAME |
| 1154 | cmpl $ia32_sysenter_target,(%esp) | 1279 | cmpl $ia32_sysenter_target,(%esp) |
| 1155 | jne debug_stack_correct | 1280 | jne debug_stack_correct |
| 1156 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | 1281 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn |
| 1157 | debug_stack_correct: | 1282 | debug_stack_correct: |
| 1158 | pushl $-1 # mark this as an int | 1283 | pushl $-1 # mark this as an int |
| 1159 | CFI_ADJUST_CFA_OFFSET 4 | 1284 | CFI_ADJUST_CFA_OFFSET 4 |
| @@ -1211,7 +1336,7 @@ nmi_stack_correct: | |||
| 1211 | 1336 | ||
| 1212 | nmi_stack_fixup: | 1337 | nmi_stack_fixup: |
| 1213 | RING0_INT_FRAME | 1338 | RING0_INT_FRAME |
| 1214 | FIX_STACK(12,nmi_stack_correct, 1) | 1339 | FIX_STACK 12, nmi_stack_correct, 1 |
| 1215 | jmp nmi_stack_correct | 1340 | jmp nmi_stack_correct |
| 1216 | 1341 | ||
| 1217 | nmi_debug_stack_check: | 1342 | nmi_debug_stack_check: |
| @@ -1222,7 +1347,7 @@ nmi_debug_stack_check: | |||
| 1222 | jb nmi_stack_correct | 1347 | jb nmi_stack_correct |
| 1223 | cmpl $debug_esp_fix_insn,(%esp) | 1348 | cmpl $debug_esp_fix_insn,(%esp) |
| 1224 | ja nmi_stack_correct | 1349 | ja nmi_stack_correct |
| 1225 | FIX_STACK(24,nmi_stack_correct, 1) | 1350 | FIX_STACK 24, nmi_stack_correct, 1 |
| 1226 | jmp nmi_stack_correct | 1351 | jmp nmi_stack_correct |
| 1227 | 1352 | ||
| 1228 | nmi_espfix_stack: | 1353 | nmi_espfix_stack: |
| @@ -1234,7 +1359,7 @@ nmi_espfix_stack: | |||
| 1234 | CFI_ADJUST_CFA_OFFSET 4 | 1359 | CFI_ADJUST_CFA_OFFSET 4 |
| 1235 | pushl %esp | 1360 | pushl %esp |
| 1236 | CFI_ADJUST_CFA_OFFSET 4 | 1361 | CFI_ADJUST_CFA_OFFSET 4 |
| 1237 | addw $4, (%esp) | 1362 | addl $4, (%esp) |
| 1238 | /* copy the iret frame of 12 bytes */ | 1363 | /* copy the iret frame of 12 bytes */ |
| 1239 | .rept 3 | 1364 | .rept 3 |
| 1240 | pushl 16(%esp) | 1365 | pushl 16(%esp) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a1346217e43c..7ba4621c0dfa 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -48,10 +48,11 @@ | |||
| 48 | #include <asm/unistd.h> | 48 | #include <asm/unistd.h> |
| 49 | #include <asm/thread_info.h> | 49 | #include <asm/thread_info.h> |
| 50 | #include <asm/hw_irq.h> | 50 | #include <asm/hw_irq.h> |
| 51 | #include <asm/page.h> | 51 | #include <asm/page_types.h> |
| 52 | #include <asm/irqflags.h> | 52 | #include <asm/irqflags.h> |
| 53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | #include <asm/percpu.h> | ||
| 55 | 56 | ||
| 56 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
| 57 | #include <linux/elf-em.h> | 58 | #include <linux/elf-em.h> |
| @@ -76,20 +77,17 @@ ENTRY(ftrace_caller) | |||
| 76 | movq 8(%rbp), %rsi | 77 | movq 8(%rbp), %rsi |
| 77 | subq $MCOUNT_INSN_SIZE, %rdi | 78 | subq $MCOUNT_INSN_SIZE, %rdi |
| 78 | 79 | ||
| 79 | .globl ftrace_call | 80 | GLOBAL(ftrace_call) |
| 80 | ftrace_call: | ||
| 81 | call ftrace_stub | 81 | call ftrace_stub |
| 82 | 82 | ||
| 83 | MCOUNT_RESTORE_FRAME | 83 | MCOUNT_RESTORE_FRAME |
| 84 | 84 | ||
| 85 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 85 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
| 86 | .globl ftrace_graph_call | 86 | GLOBAL(ftrace_graph_call) |
| 87 | ftrace_graph_call: | ||
| 88 | jmp ftrace_stub | 87 | jmp ftrace_stub |
| 89 | #endif | 88 | #endif |
| 90 | 89 | ||
| 91 | .globl ftrace_stub | 90 | GLOBAL(ftrace_stub) |
| 92 | ftrace_stub: | ||
| 93 | retq | 91 | retq |
| 94 | END(ftrace_caller) | 92 | END(ftrace_caller) |
| 95 | 93 | ||
| @@ -109,8 +107,7 @@ ENTRY(mcount) | |||
| 109 | jnz ftrace_graph_caller | 107 | jnz ftrace_graph_caller |
| 110 | #endif | 108 | #endif |
| 111 | 109 | ||
| 112 | .globl ftrace_stub | 110 | GLOBAL(ftrace_stub) |
| 113 | ftrace_stub: | ||
| 114 | retq | 111 | retq |
| 115 | 112 | ||
| 116 | trace: | 113 | trace: |
| @@ -147,9 +144,7 @@ ENTRY(ftrace_graph_caller) | |||
| 147 | retq | 144 | retq |
| 148 | END(ftrace_graph_caller) | 145 | END(ftrace_graph_caller) |
| 149 | 146 | ||
| 150 | 147 | GLOBAL(return_to_handler) | |
| 151 | .globl return_to_handler | ||
| 152 | return_to_handler: | ||
| 153 | subq $80, %rsp | 148 | subq $80, %rsp |
| 154 | 149 | ||
| 155 | movq %rax, (%rsp) | 150 | movq %rax, (%rsp) |
| @@ -187,6 +182,7 @@ return_to_handler: | |||
| 187 | ENTRY(native_usergs_sysret64) | 182 | ENTRY(native_usergs_sysret64) |
| 188 | swapgs | 183 | swapgs |
| 189 | sysretq | 184 | sysretq |
| 185 | ENDPROC(native_usergs_sysret64) | ||
| 190 | #endif /* CONFIG_PARAVIRT */ | 186 | #endif /* CONFIG_PARAVIRT */ |
| 191 | 187 | ||
| 192 | 188 | ||
| @@ -209,7 +205,7 @@ ENTRY(native_usergs_sysret64) | |||
| 209 | 205 | ||
| 210 | /* %rsp:at FRAMEEND */ | 206 | /* %rsp:at FRAMEEND */ |
| 211 | .macro FIXUP_TOP_OF_STACK tmp offset=0 | 207 | .macro FIXUP_TOP_OF_STACK tmp offset=0 |
| 212 | movq %gs:pda_oldrsp,\tmp | 208 | movq PER_CPU_VAR(old_rsp),\tmp |
| 213 | movq \tmp,RSP+\offset(%rsp) | 209 | movq \tmp,RSP+\offset(%rsp) |
| 214 | movq $__USER_DS,SS+\offset(%rsp) | 210 | movq $__USER_DS,SS+\offset(%rsp) |
| 215 | movq $__USER_CS,CS+\offset(%rsp) | 211 | movq $__USER_CS,CS+\offset(%rsp) |
| @@ -220,7 +216,7 @@ ENTRY(native_usergs_sysret64) | |||
| 220 | 216 | ||
| 221 | .macro RESTORE_TOP_OF_STACK tmp offset=0 | 217 | .macro RESTORE_TOP_OF_STACK tmp offset=0 |
| 222 | movq RSP+\offset(%rsp),\tmp | 218 | movq RSP+\offset(%rsp),\tmp |
| 223 | movq \tmp,%gs:pda_oldrsp | 219 | movq \tmp,PER_CPU_VAR(old_rsp) |
| 224 | movq EFLAGS+\offset(%rsp),\tmp | 220 | movq EFLAGS+\offset(%rsp),\tmp |
| 225 | movq \tmp,R11+\offset(%rsp) | 221 | movq \tmp,R11+\offset(%rsp) |
| 226 | .endm | 222 | .endm |
| @@ -336,15 +332,15 @@ ENTRY(save_args) | |||
| 336 | je 1f | 332 | je 1f |
| 337 | SWAPGS | 333 | SWAPGS |
| 338 | /* | 334 | /* |
| 339 | * irqcount is used to check if a CPU is already on an interrupt stack | 335 | * irq_count is used to check if a CPU is already on an interrupt stack |
| 340 | * or not. While this is essentially redundant with preempt_count it is | 336 | * or not. While this is essentially redundant with preempt_count it is |
| 341 | * a little cheaper to use a separate counter in the PDA (short of | 337 | * a little cheaper to use a separate counter in the PDA (short of |
| 342 | * moving irq_enter into assembly, which would be too much work) | 338 | * moving irq_enter into assembly, which would be too much work) |
| 343 | */ | 339 | */ |
| 344 | 1: incl %gs:pda_irqcount | 340 | 1: incl PER_CPU_VAR(irq_count) |
| 345 | jne 2f | 341 | jne 2f |
| 346 | popq_cfi %rax /* move return address... */ | 342 | popq_cfi %rax /* move return address... */ |
| 347 | mov %gs:pda_irqstackptr,%rsp | 343 | mov PER_CPU_VAR(irq_stack_ptr),%rsp |
| 348 | EMPTY_FRAME 0 | 344 | EMPTY_FRAME 0 |
| 349 | pushq_cfi %rbp /* backlink for unwinder */ | 345 | pushq_cfi %rbp /* backlink for unwinder */ |
| 350 | pushq_cfi %rax /* ... to the new stack */ | 346 | pushq_cfi %rax /* ... to the new stack */ |
| @@ -409,6 +405,8 @@ END(save_paranoid) | |||
| 409 | ENTRY(ret_from_fork) | 405 | ENTRY(ret_from_fork) |
| 410 | DEFAULT_FRAME | 406 | DEFAULT_FRAME |
| 411 | 407 | ||
| 408 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | ||
| 409 | |||
| 412 | push kernel_eflags(%rip) | 410 | push kernel_eflags(%rip) |
| 413 | CFI_ADJUST_CFA_OFFSET 8 | 411 | CFI_ADJUST_CFA_OFFSET 8 |
| 414 | popf # reset kernel eflags | 412 | popf # reset kernel eflags |
| @@ -468,7 +466,7 @@ END(ret_from_fork) | |||
| 468 | ENTRY(system_call) | 466 | ENTRY(system_call) |
| 469 | CFI_STARTPROC simple | 467 | CFI_STARTPROC simple |
| 470 | CFI_SIGNAL_FRAME | 468 | CFI_SIGNAL_FRAME |
| 471 | CFI_DEF_CFA rsp,PDA_STACKOFFSET | 469 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET |
| 472 | CFI_REGISTER rip,rcx | 470 | CFI_REGISTER rip,rcx |
| 473 | /*CFI_REGISTER rflags,r11*/ | 471 | /*CFI_REGISTER rflags,r11*/ |
| 474 | SWAPGS_UNSAFE_STACK | 472 | SWAPGS_UNSAFE_STACK |
| @@ -479,8 +477,8 @@ ENTRY(system_call) | |||
| 479 | */ | 477 | */ |
| 480 | ENTRY(system_call_after_swapgs) | 478 | ENTRY(system_call_after_swapgs) |
| 481 | 479 | ||
| 482 | movq %rsp,%gs:pda_oldrsp | 480 | movq %rsp,PER_CPU_VAR(old_rsp) |
| 483 | movq %gs:pda_kernelstack,%rsp | 481 | movq PER_CPU_VAR(kernel_stack),%rsp |
| 484 | /* | 482 | /* |
| 485 | * No need to follow this irqs off/on section - it's straight | 483 | * No need to follow this irqs off/on section - it's straight |
| 486 | * and short: | 484 | * and short: |
| @@ -523,7 +521,7 @@ sysret_check: | |||
| 523 | CFI_REGISTER rip,rcx | 521 | CFI_REGISTER rip,rcx |
| 524 | RESTORE_ARGS 0,-ARG_SKIP,1 | 522 | RESTORE_ARGS 0,-ARG_SKIP,1 |
| 525 | /*CFI_REGISTER rflags,r11*/ | 523 | /*CFI_REGISTER rflags,r11*/ |
| 526 | movq %gs:pda_oldrsp, %rsp | 524 | movq PER_CPU_VAR(old_rsp), %rsp |
| 527 | USERGS_SYSRET64 | 525 | USERGS_SYSRET64 |
| 528 | 526 | ||
| 529 | CFI_RESTORE_STATE | 527 | CFI_RESTORE_STATE |
| @@ -630,16 +628,14 @@ tracesys: | |||
| 630 | * Syscall return path ending with IRET. | 628 | * Syscall return path ending with IRET. |
| 631 | * Has correct top of stack, but partial stack frame. | 629 | * Has correct top of stack, but partial stack frame. |
| 632 | */ | 630 | */ |
| 633 | .globl int_ret_from_sys_call | 631 | GLOBAL(int_ret_from_sys_call) |
| 634 | .globl int_with_check | ||
| 635 | int_ret_from_sys_call: | ||
| 636 | DISABLE_INTERRUPTS(CLBR_NONE) | 632 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 637 | TRACE_IRQS_OFF | 633 | TRACE_IRQS_OFF |
| 638 | testl $3,CS-ARGOFFSET(%rsp) | 634 | testl $3,CS-ARGOFFSET(%rsp) |
| 639 | je retint_restore_args | 635 | je retint_restore_args |
| 640 | movl $_TIF_ALLWORK_MASK,%edi | 636 | movl $_TIF_ALLWORK_MASK,%edi |
| 641 | /* edi: mask to check */ | 637 | /* edi: mask to check */ |
| 642 | int_with_check: | 638 | GLOBAL(int_with_check) |
| 643 | LOCKDEP_SYS_EXIT_IRQ | 639 | LOCKDEP_SYS_EXIT_IRQ |
| 644 | GET_THREAD_INFO(%rcx) | 640 | GET_THREAD_INFO(%rcx) |
| 645 | movl TI_flags(%rcx),%edx | 641 | movl TI_flags(%rcx),%edx |
| @@ -833,11 +829,11 @@ common_interrupt: | |||
| 833 | XCPT_FRAME | 829 | XCPT_FRAME |
| 834 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | 830 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ |
| 835 | interrupt do_IRQ | 831 | interrupt do_IRQ |
| 836 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 832 | /* 0(%rsp): old_rsp-ARGOFFSET */ |
| 837 | ret_from_intr: | 833 | ret_from_intr: |
| 838 | DISABLE_INTERRUPTS(CLBR_NONE) | 834 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 839 | TRACE_IRQS_OFF | 835 | TRACE_IRQS_OFF |
| 840 | decl %gs:pda_irqcount | 836 | decl PER_CPU_VAR(irq_count) |
| 841 | leaveq | 837 | leaveq |
| 842 | CFI_DEF_CFA_REGISTER rsp | 838 | CFI_DEF_CFA_REGISTER rsp |
| 843 | CFI_ADJUST_CFA_OFFSET -8 | 839 | CFI_ADJUST_CFA_OFFSET -8 |
| @@ -982,10 +978,14 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ | |||
| 982 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt | 978 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
| 983 | #endif | 979 | #endif |
| 984 | 980 | ||
| 981 | #ifdef CONFIG_X86_UV | ||
| 985 | apicinterrupt UV_BAU_MESSAGE \ | 982 | apicinterrupt UV_BAU_MESSAGE \ |
| 986 | uv_bau_message_intr1 uv_bau_message_interrupt | 983 | uv_bau_message_intr1 uv_bau_message_interrupt |
| 984 | #endif | ||
| 987 | apicinterrupt LOCAL_TIMER_VECTOR \ | 985 | apicinterrupt LOCAL_TIMER_VECTOR \ |
| 988 | apic_timer_interrupt smp_apic_timer_interrupt | 986 | apic_timer_interrupt smp_apic_timer_interrupt |
| 987 | apicinterrupt GENERIC_INTERRUPT_VECTOR \ | ||
| 988 | generic_interrupt smp_generic_interrupt | ||
| 989 | 989 | ||
| 990 | #ifdef CONFIG_SMP | 990 | #ifdef CONFIG_SMP |
| 991 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | 991 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ |
| @@ -1073,10 +1073,10 @@ ENTRY(\sym) | |||
| 1073 | TRACE_IRQS_OFF | 1073 | TRACE_IRQS_OFF |
| 1074 | movq %rsp,%rdi /* pt_regs pointer */ | 1074 | movq %rsp,%rdi /* pt_regs pointer */ |
| 1075 | xorl %esi,%esi /* no error code */ | 1075 | xorl %esi,%esi /* no error code */ |
| 1076 | movq %gs:pda_data_offset, %rbp | 1076 | PER_CPU(init_tss, %rbp) |
| 1077 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 1077 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) |
| 1078 | call \do_sym | 1078 | call \do_sym |
| 1079 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 1079 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) |
| 1080 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1080 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
| 1081 | CFI_ENDPROC | 1081 | CFI_ENDPROC |
| 1082 | END(\sym) | 1082 | END(\sym) |
| @@ -1138,7 +1138,7 @@ ENTRY(native_load_gs_index) | |||
| 1138 | CFI_STARTPROC | 1138 | CFI_STARTPROC |
| 1139 | pushf | 1139 | pushf |
| 1140 | CFI_ADJUST_CFA_OFFSET 8 | 1140 | CFI_ADJUST_CFA_OFFSET 8 |
| 1141 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) | 1141 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
| 1142 | SWAPGS | 1142 | SWAPGS |
| 1143 | gs_change: | 1143 | gs_change: |
| 1144 | movl %edi,%gs | 1144 | movl %edi,%gs |
| @@ -1260,14 +1260,14 @@ ENTRY(call_softirq) | |||
| 1260 | CFI_REL_OFFSET rbp,0 | 1260 | CFI_REL_OFFSET rbp,0 |
| 1261 | mov %rsp,%rbp | 1261 | mov %rsp,%rbp |
| 1262 | CFI_DEF_CFA_REGISTER rbp | 1262 | CFI_DEF_CFA_REGISTER rbp |
| 1263 | incl %gs:pda_irqcount | 1263 | incl PER_CPU_VAR(irq_count) |
| 1264 | cmove %gs:pda_irqstackptr,%rsp | 1264 | cmove PER_CPU_VAR(irq_stack_ptr),%rsp |
| 1265 | push %rbp # backlink for old unwinder | 1265 | push %rbp # backlink for old unwinder |
| 1266 | call __do_softirq | 1266 | call __do_softirq |
| 1267 | leaveq | 1267 | leaveq |
| 1268 | CFI_DEF_CFA_REGISTER rsp | 1268 | CFI_DEF_CFA_REGISTER rsp |
| 1269 | CFI_ADJUST_CFA_OFFSET -8 | 1269 | CFI_ADJUST_CFA_OFFSET -8 |
| 1270 | decl %gs:pda_irqcount | 1270 | decl PER_CPU_VAR(irq_count) |
| 1271 | ret | 1271 | ret |
| 1272 | CFI_ENDPROC | 1272 | CFI_ENDPROC |
| 1273 | END(call_softirq) | 1273 | END(call_softirq) |
| @@ -1297,15 +1297,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | |||
| 1297 | movq %rdi, %rsp # we don't return, adjust the stack frame | 1297 | movq %rdi, %rsp # we don't return, adjust the stack frame |
| 1298 | CFI_ENDPROC | 1298 | CFI_ENDPROC |
| 1299 | DEFAULT_FRAME | 1299 | DEFAULT_FRAME |
| 1300 | 11: incl %gs:pda_irqcount | 1300 | 11: incl PER_CPU_VAR(irq_count) |
| 1301 | movq %rsp,%rbp | 1301 | movq %rsp,%rbp |
| 1302 | CFI_DEF_CFA_REGISTER rbp | 1302 | CFI_DEF_CFA_REGISTER rbp |
| 1303 | cmovzq %gs:pda_irqstackptr,%rsp | 1303 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
| 1304 | pushq %rbp # backlink for old unwinder | 1304 | pushq %rbp # backlink for old unwinder |
| 1305 | call xen_evtchn_do_upcall | 1305 | call xen_evtchn_do_upcall |
| 1306 | popq %rsp | 1306 | popq %rsp |
| 1307 | CFI_DEF_CFA_REGISTER rsp | 1307 | CFI_DEF_CFA_REGISTER rsp |
| 1308 | decl %gs:pda_irqcount | 1308 | decl PER_CPU_VAR(irq_count) |
| 1309 | jmp error_exit | 1309 | jmp error_exit |
| 1310 | CFI_ENDPROC | 1310 | CFI_ENDPROC |
| 1311 | END(do_hypervisor_callback) | 1311 | END(do_hypervisor_callback) |
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c deleted file mode 100644 index 53699c931ad4..000000000000 --- a/arch/x86/kernel/es7000_32.c +++ /dev/null | |||
| @@ -1,378 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Written by: Garry Forsgren, Unisys Corporation | ||
| 3 | * Natalie Protasevich, Unisys Corporation | ||
| 4 | * This file contains the code to configure and interface | ||
| 5 | * with Unisys ES7000 series hardware system manager. | ||
| 6 | * | ||
| 7 | * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or modify it | ||
| 10 | * under the terms of version 2 of the GNU General Public License as | ||
| 11 | * published by the Free Software Foundation. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it would be useful, but | ||
| 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License along | ||
| 18 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
| 19 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
| 20 | * | ||
| 21 | * Contact information: Unisys Corporation, Township Line & Union Meeting | ||
| 22 | * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: | ||
| 23 | * | ||
| 24 | * http://www.unisys.com | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/module.h> | ||
| 28 | #include <linux/types.h> | ||
| 29 | #include <linux/kernel.h> | ||
| 30 | #include <linux/smp.h> | ||
| 31 | #include <linux/string.h> | ||
| 32 | #include <linux/spinlock.h> | ||
| 33 | #include <linux/errno.h> | ||
| 34 | #include <linux/notifier.h> | ||
| 35 | #include <linux/reboot.h> | ||
| 36 | #include <linux/init.h> | ||
| 37 | #include <linux/acpi.h> | ||
| 38 | #include <asm/io.h> | ||
| 39 | #include <asm/nmi.h> | ||
| 40 | #include <asm/smp.h> | ||
| 41 | #include <asm/atomic.h> | ||
| 42 | #include <asm/apicdef.h> | ||
| 43 | #include <mach_mpparse.h> | ||
| 44 | #include <asm/genapic.h> | ||
| 45 | #include <asm/setup.h> | ||
| 46 | |||
| 47 | /* | ||
| 48 | * ES7000 chipsets | ||
| 49 | */ | ||
| 50 | |||
| 51 | #define NON_UNISYS 0 | ||
| 52 | #define ES7000_CLASSIC 1 | ||
| 53 | #define ES7000_ZORRO 2 | ||
| 54 | |||
| 55 | |||
| 56 | #define MIP_REG 1 | ||
| 57 | #define MIP_PSAI_REG 4 | ||
| 58 | |||
| 59 | #define MIP_BUSY 1 | ||
| 60 | #define MIP_SPIN 0xf0000 | ||
| 61 | #define MIP_VALID 0x0100000000000000ULL | ||
| 62 | #define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) | ||
| 63 | |||
| 64 | #define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) | ||
| 65 | |||
| 66 | struct mip_reg_info { | ||
| 67 | unsigned long long mip_info; | ||
| 68 | unsigned long long delivery_info; | ||
| 69 | unsigned long long host_reg; | ||
| 70 | unsigned long long mip_reg; | ||
| 71 | }; | ||
| 72 | |||
| 73 | struct part_info { | ||
| 74 | unsigned char type; | ||
| 75 | unsigned char length; | ||
| 76 | unsigned char part_id; | ||
| 77 | unsigned char apic_mode; | ||
| 78 | unsigned long snum; | ||
| 79 | char ptype[16]; | ||
| 80 | char sname[64]; | ||
| 81 | char pname[64]; | ||
| 82 | }; | ||
| 83 | |||
| 84 | struct psai { | ||
| 85 | unsigned long long entry_type; | ||
| 86 | unsigned long long addr; | ||
| 87 | unsigned long long bep_addr; | ||
| 88 | }; | ||
| 89 | |||
| 90 | struct es7000_mem_info { | ||
| 91 | unsigned char type; | ||
| 92 | unsigned char length; | ||
| 93 | unsigned char resv[6]; | ||
| 94 | unsigned long long start; | ||
| 95 | unsigned long long size; | ||
| 96 | }; | ||
| 97 | |||
| 98 | struct es7000_oem_table { | ||
| 99 | unsigned long long hdr; | ||
| 100 | struct mip_reg_info mip; | ||
| 101 | struct part_info pif; | ||
| 102 | struct es7000_mem_info shm; | ||
| 103 | struct psai psai; | ||
| 104 | }; | ||
| 105 | |||
| 106 | #ifdef CONFIG_ACPI | ||
| 107 | |||
| 108 | struct oem_table { | ||
| 109 | struct acpi_table_header Header; | ||
| 110 | u32 OEMTableAddr; | ||
| 111 | u32 OEMTableSize; | ||
| 112 | }; | ||
| 113 | |||
| 114 | extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); | ||
| 115 | extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); | ||
| 116 | #endif | ||
| 117 | |||
| 118 | struct mip_reg { | ||
| 119 | unsigned long long off_0; | ||
| 120 | unsigned long long off_8; | ||
| 121 | unsigned long long off_10; | ||
| 122 | unsigned long long off_18; | ||
| 123 | unsigned long long off_20; | ||
| 124 | unsigned long long off_28; | ||
| 125 | unsigned long long off_30; | ||
| 126 | unsigned long long off_38; | ||
| 127 | }; | ||
| 128 | |||
| 129 | #define MIP_SW_APIC 0x1020b | ||
| 130 | #define MIP_FUNC(VALUE) (VALUE & 0xff) | ||
| 131 | |||
| 132 | /* | ||
| 133 | * ES7000 Globals | ||
| 134 | */ | ||
| 135 | |||
| 136 | static volatile unsigned long *psai = NULL; | ||
| 137 | static struct mip_reg *mip_reg; | ||
| 138 | static struct mip_reg *host_reg; | ||
| 139 | static int mip_port; | ||
| 140 | static unsigned long mip_addr, host_addr; | ||
| 141 | |||
| 142 | int es7000_plat; | ||
| 143 | |||
| 144 | /* | ||
| 145 | * GSI override for ES7000 platforms. | ||
| 146 | */ | ||
| 147 | |||
| 148 | static unsigned int base; | ||
| 149 | |||
| 150 | static int | ||
| 151 | es7000_rename_gsi(int ioapic, int gsi) | ||
| 152 | { | ||
| 153 | if (es7000_plat == ES7000_ZORRO) | ||
| 154 | return gsi; | ||
| 155 | |||
| 156 | if (!base) { | ||
| 157 | int i; | ||
| 158 | for (i = 0; i < nr_ioapics; i++) | ||
| 159 | base += nr_ioapic_registers[i]; | ||
| 160 | } | ||
| 161 | |||
| 162 | if (!ioapic && (gsi < 16)) | ||
| 163 | gsi += base; | ||
| 164 | return gsi; | ||
| 165 | } | ||
| 166 | |||
| 167 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | ||
| 168 | { | ||
| 169 | unsigned long vect = 0, psaival = 0; | ||
| 170 | |||
| 171 | if (psai == NULL) | ||
| 172 | return -1; | ||
| 173 | |||
| 174 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
| 175 | psaival = (0x1000000 | vect | cpu); | ||
| 176 | |||
| 177 | while (*psai & 0x1000000) | ||
| 178 | ; | ||
| 179 | |||
| 180 | *psai = psaival; | ||
| 181 | |||
| 182 | return 0; | ||
| 183 | } | ||
| 184 | |||
| 185 | static void noop_wait_for_deassert(atomic_t *deassert_not_used) | ||
| 186 | { | ||
| 187 | } | ||
| 188 | |||
| 189 | static int __init es7000_update_genapic(void) | ||
| 190 | { | ||
| 191 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
| 192 | |||
| 193 | /* MPENTIUMIII */ | ||
| 194 | if (boot_cpu_data.x86 == 6 && | ||
| 195 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { | ||
| 196 | es7000_update_genapic_to_cluster(); | ||
| 197 | genapic->wait_for_init_deassert = noop_wait_for_deassert; | ||
| 198 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
| 199 | } | ||
| 200 | |||
| 201 | return 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | void __init | ||
| 205 | setup_unisys(void) | ||
| 206 | { | ||
| 207 | /* | ||
| 208 | * Determine the generation of the ES7000 currently running. | ||
| 209 | * | ||
| 210 | * es7000_plat = 1 if the machine is a 5xx ES7000 box | ||
| 211 | * es7000_plat = 2 if the machine is a x86_64 ES7000 box | ||
| 212 | * | ||
| 213 | */ | ||
| 214 | if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) | ||
| 215 | es7000_plat = ES7000_ZORRO; | ||
| 216 | else | ||
| 217 | es7000_plat = ES7000_CLASSIC; | ||
| 218 | ioapic_renumber_irq = es7000_rename_gsi; | ||
| 219 | |||
| 220 | x86_quirks->update_genapic = es7000_update_genapic; | ||
| 221 | } | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Parse the OEM Table | ||
| 225 | */ | ||
| 226 | |||
| 227 | int __init | ||
| 228 | parse_unisys_oem (char *oemptr) | ||
| 229 | { | ||
| 230 | int i; | ||
| 231 | int success = 0; | ||
| 232 | unsigned char type, size; | ||
| 233 | unsigned long val; | ||
| 234 | char *tp = NULL; | ||
| 235 | struct psai *psaip = NULL; | ||
| 236 | struct mip_reg_info *mi; | ||
| 237 | struct mip_reg *host, *mip; | ||
| 238 | |||
| 239 | tp = oemptr; | ||
| 240 | |||
| 241 | tp += 8; | ||
| 242 | |||
| 243 | for (i=0; i <= 6; i++) { | ||
| 244 | type = *tp++; | ||
| 245 | size = *tp++; | ||
| 246 | tp -= 2; | ||
| 247 | switch (type) { | ||
| 248 | case MIP_REG: | ||
| 249 | mi = (struct mip_reg_info *)tp; | ||
| 250 | val = MIP_RD_LO(mi->host_reg); | ||
| 251 | host_addr = val; | ||
| 252 | host = (struct mip_reg *)val; | ||
| 253 | host_reg = __va(host); | ||
| 254 | val = MIP_RD_LO(mi->mip_reg); | ||
| 255 | mip_port = MIP_PORT(mi->mip_info); | ||
| 256 | mip_addr = val; | ||
| 257 | mip = (struct mip_reg *)val; | ||
| 258 | mip_reg = __va(mip); | ||
| 259 | pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", | ||
| 260 | (unsigned long)host_reg); | ||
| 261 | pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", | ||
| 262 | (unsigned long)mip_reg); | ||
| 263 | success++; | ||
| 264 | break; | ||
| 265 | case MIP_PSAI_REG: | ||
| 266 | psaip = (struct psai *)tp; | ||
| 267 | if (tp != NULL) { | ||
| 268 | if (psaip->addr) | ||
| 269 | psai = __va(psaip->addr); | ||
| 270 | else | ||
| 271 | psai = NULL; | ||
| 272 | success++; | ||
| 273 | } | ||
| 274 | break; | ||
| 275 | default: | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | tp += size; | ||
| 279 | } | ||
| 280 | |||
| 281 | if (success < 2) { | ||
| 282 | es7000_plat = NON_UNISYS; | ||
| 283 | } else | ||
| 284 | setup_unisys(); | ||
| 285 | return es7000_plat; | ||
| 286 | } | ||
| 287 | |||
| 288 | #ifdef CONFIG_ACPI | ||
| 289 | static unsigned long oem_addrX; | ||
| 290 | static unsigned long oem_size; | ||
| 291 | int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) | ||
| 292 | { | ||
| 293 | struct acpi_table_header *header = NULL; | ||
| 294 | int i = 0; | ||
| 295 | |||
| 296 | while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { | ||
| 297 | if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { | ||
| 298 | struct oem_table *t = (struct oem_table *)header; | ||
| 299 | |||
| 300 | oem_addrX = t->OEMTableAddr; | ||
| 301 | oem_size = t->OEMTableSize; | ||
| 302 | |||
| 303 | *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, | ||
| 304 | oem_size); | ||
| 305 | return 0; | ||
| 306 | } | ||
| 307 | } | ||
| 308 | return -1; | ||
| 309 | } | ||
| 310 | |||
| 311 | void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) | ||
| 312 | { | ||
| 313 | } | ||
| 314 | #endif | ||
| 315 | |||
| 316 | static void | ||
| 317 | es7000_spin(int n) | ||
| 318 | { | ||
| 319 | int i = 0; | ||
| 320 | |||
| 321 | while (i++ < n) | ||
| 322 | rep_nop(); | ||
| 323 | } | ||
| 324 | |||
| 325 | static int __init | ||
| 326 | es7000_mip_write(struct mip_reg *mip_reg) | ||
| 327 | { | ||
| 328 | int status = 0; | ||
| 329 | int spin; | ||
| 330 | |||
| 331 | spin = MIP_SPIN; | ||
| 332 | while (((unsigned long long)host_reg->off_38 & | ||
| 333 | (unsigned long long)MIP_VALID) != 0) { | ||
| 334 | if (--spin <= 0) { | ||
| 335 | printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); | ||
| 336 | return -1; | ||
| 337 | } | ||
| 338 | es7000_spin(MIP_SPIN); | ||
| 339 | } | ||
| 340 | |||
| 341 | memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); | ||
| 342 | outb(1, mip_port); | ||
| 343 | |||
| 344 | spin = MIP_SPIN; | ||
| 345 | |||
| 346 | while (((unsigned long long)mip_reg->off_38 & | ||
| 347 | (unsigned long long)MIP_VALID) == 0) { | ||
| 348 | if (--spin <= 0) { | ||
| 349 | printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); | ||
| 350 | return -1; | ||
| 351 | } | ||
| 352 | es7000_spin(MIP_SPIN); | ||
| 353 | } | ||
| 354 | |||
| 355 | status = ((unsigned long long)mip_reg->off_0 & | ||
| 356 | (unsigned long long)0xffff0000000000ULL) >> 48; | ||
| 357 | mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & | ||
| 358 | (unsigned long long)~MIP_VALID); | ||
| 359 | return status; | ||
| 360 | } | ||
| 361 | |||
| 362 | void __init | ||
| 363 | es7000_sw_apic(void) | ||
| 364 | { | ||
| 365 | if (es7000_plat) { | ||
| 366 | int mip_status; | ||
| 367 | struct mip_reg es7000_mip_reg; | ||
| 368 | |||
| 369 | printk("ES7000: Enabling APIC mode.\n"); | ||
| 370 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); | ||
| 371 | es7000_mip_reg.off_0 = MIP_SW_APIC; | ||
| 372 | es7000_mip_reg.off_38 = (MIP_VALID); | ||
| 373 | while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) | ||
| 374 | printk("es7000_sw_apic: command failed, status = %x\n", | ||
| 375 | mip_status); | ||
| 376 | return; | ||
| 377 | } | ||
| 378 | } | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b935..f5b272247690 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
| @@ -26,27 +26,6 @@ | |||
| 26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
| 27 | #include <asm/trampoline.h> | 27 | #include <asm/trampoline.h> |
| 28 | 28 | ||
| 29 | /* boot cpu pda */ | ||
| 30 | static struct x8664_pda _boot_cpu_pda; | ||
| 31 | |||
| 32 | #ifdef CONFIG_SMP | ||
| 33 | /* | ||
| 34 | * We install an empty cpu_pda pointer table to indicate to early users | ||
| 35 | * (numa_set_node) that the cpu_pda pointer table for cpus other than | ||
| 36 | * the boot cpu is not yet setup. | ||
| 37 | */ | ||
| 38 | static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | ||
| 39 | #else | ||
| 40 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | ||
| 41 | #endif | ||
| 42 | |||
| 43 | void __init x86_64_init_pda(void) | ||
| 44 | { | ||
| 45 | _cpu_pda = __cpu_pda; | ||
| 46 | cpu_pda(0) = &_boot_cpu_pda; | ||
| 47 | pda_init(0); | ||
| 48 | } | ||
| 49 | |||
| 50 | static void __init zap_identity_mappings(void) | 29 | static void __init zap_identity_mappings(void) |
| 51 | { | 30 | { |
| 52 | pgd_t *pgd = pgd_offset_k(0UL); | 31 | pgd_t *pgd = pgd_offset_k(0UL); |
| @@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
| 112 | if (console_loglevel == 10) | 91 | if (console_loglevel == 10) |
| 113 | early_printk("Kernel alive\n"); | 92 | early_printk("Kernel alive\n"); |
| 114 | 93 | ||
| 115 | x86_64_init_pda(); | ||
| 116 | |||
| 117 | x86_64_start_reservations(real_mode_data); | 94 | x86_64_start_reservations(real_mode_data); |
| 118 | } | 95 | } |
| 119 | 96 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70b..c32ca19d591a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -11,14 +11,15 @@ | |||
| 11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
| 12 | #include <linux/linkage.h> | 12 | #include <linux/linkage.h> |
| 13 | #include <asm/segment.h> | 13 | #include <asm/segment.h> |
| 14 | #include <asm/page.h> | 14 | #include <asm/page_types.h> |
| 15 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable_types.h> |
| 16 | #include <asm/desc.h> | 16 | #include <asm/desc.h> |
| 17 | #include <asm/cache.h> | 17 | #include <asm/cache.h> |
| 18 | #include <asm/thread_info.h> | 18 | #include <asm/thread_info.h> |
| 19 | #include <asm/asm-offsets.h> | 19 | #include <asm/asm-offsets.h> |
| 20 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
| 21 | #include <asm/processor-flags.h> | 21 | #include <asm/processor-flags.h> |
| 22 | #include <asm/percpu.h> | ||
| 22 | 23 | ||
| 23 | /* Physical address */ | 24 | /* Physical address */ |
| 24 | #define pa(X) ((X) - __PAGE_OFFSET) | 25 | #define pa(X) ((X) - __PAGE_OFFSET) |
| @@ -429,14 +430,34 @@ is386: movl $2,%ecx # set MP | |||
| 429 | ljmp $(__KERNEL_CS),$1f | 430 | ljmp $(__KERNEL_CS),$1f |
| 430 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | 431 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers |
| 431 | movl %eax,%ss # after changing gdt. | 432 | movl %eax,%ss # after changing gdt. |
| 432 | movl %eax,%fs # gets reset once there's real percpu | ||
| 433 | 433 | ||
| 434 | movl $(__USER_DS),%eax # DS/ES contains default USER segment | 434 | movl $(__USER_DS),%eax # DS/ES contains default USER segment |
| 435 | movl %eax,%ds | 435 | movl %eax,%ds |
| 436 | movl %eax,%es | 436 | movl %eax,%es |
| 437 | 437 | ||
| 438 | xorl %eax,%eax # Clear GS and LDT | 438 | movl $(__KERNEL_PERCPU), %eax |
| 439 | movl %eax,%fs # set this cpu's percpu | ||
| 440 | |||
| 441 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
| 442 | /* | ||
| 443 | * The linker can't handle this by relocation. Manually set | ||
| 444 | * base address in stack canary segment descriptor. | ||
| 445 | */ | ||
| 446 | cmpb $0,ready | ||
| 447 | jne 1f | ||
| 448 | movl $per_cpu__gdt_page,%eax | ||
| 449 | movl $per_cpu__stack_canary,%ecx | ||
| 450 | subl $20, %ecx | ||
| 451 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | ||
| 452 | shrl $16, %ecx | ||
| 453 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | ||
| 454 | movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) | ||
| 455 | 1: | ||
| 456 | #endif | ||
| 457 | movl $(__KERNEL_STACK_CANARY),%eax | ||
| 439 | movl %eax,%gs | 458 | movl %eax,%gs |
| 459 | |||
| 460 | xorl %eax,%eax # Clear LDT | ||
| 440 | lldt %ax | 461 | lldt %ax |
| 441 | 462 | ||
| 442 | cld # gcc2 wants the direction flag cleared at all times | 463 | cld # gcc2 wants the direction flag cleared at all times |
| @@ -446,8 +467,6 @@ is386: movl $2,%ecx # set MP | |||
| 446 | movb $1, ready | 467 | movb $1, ready |
| 447 | cmpb $0,%cl # the first CPU calls start_kernel | 468 | cmpb $0,%cl # the first CPU calls start_kernel |
| 448 | je 1f | 469 | je 1f |
| 449 | movl $(__KERNEL_PERCPU), %eax | ||
| 450 | movl %eax,%fs # set this cpu's percpu | ||
| 451 | movl (stack_start), %esp | 470 | movl (stack_start), %esp |
| 452 | 1: | 471 | 1: |
| 453 | #endif /* CONFIG_SMP */ | 472 | #endif /* CONFIG_SMP */ |
| @@ -548,12 +567,8 @@ early_fault: | |||
| 548 | pushl %eax | 567 | pushl %eax |
| 549 | pushl %edx /* trapno */ | 568 | pushl %edx /* trapno */ |
| 550 | pushl $fault_msg | 569 | pushl $fault_msg |
| 551 | #ifdef CONFIG_EARLY_PRINTK | ||
| 552 | call early_printk | ||
| 553 | #else | ||
| 554 | call printk | 570 | call printk |
| 555 | #endif | 571 | #endif |
| 556 | #endif | ||
| 557 | call dump_stack | 572 | call dump_stack |
| 558 | hlt_loop: | 573 | hlt_loop: |
| 559 | hlt | 574 | hlt |
| @@ -580,11 +595,10 @@ ignore_int: | |||
| 580 | pushl 32(%esp) | 595 | pushl 32(%esp) |
| 581 | pushl 40(%esp) | 596 | pushl 40(%esp) |
| 582 | pushl $int_msg | 597 | pushl $int_msg |
| 583 | #ifdef CONFIG_EARLY_PRINTK | ||
| 584 | call early_printk | ||
| 585 | #else | ||
| 586 | call printk | 598 | call printk |
| 587 | #endif | 599 | |
| 600 | call dump_stack | ||
| 601 | |||
| 588 | addl $(5*4),%esp | 602 | addl $(5*4),%esp |
| 589 | popl %ds | 603 | popl %ds |
| 590 | popl %es | 604 | popl %es |
| @@ -660,7 +674,7 @@ early_recursion_flag: | |||
| 660 | .long 0 | 674 | .long 0 |
| 661 | 675 | ||
| 662 | int_msg: | 676 | int_msg: |
| 663 | .asciz "Unknown interrupt or fault at EIP %p %p %p\n" | 677 | .asciz "Unknown interrupt or fault at: %p %p %p\n" |
| 664 | 678 | ||
| 665 | fault_msg: | 679 | fault_msg: |
| 666 | /* fault info: */ | 680 | /* fault info: */ |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d495563..54b29bb24e71 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <asm/msr.h> | 19 | #include <asm/msr.h> |
| 20 | #include <asm/cache.h> | 20 | #include <asm/cache.h> |
| 21 | #include <asm/processor-flags.h> | 21 | #include <asm/processor-flags.h> |
| 22 | #include <asm/percpu.h> | ||
| 22 | 23 | ||
| 23 | #ifdef CONFIG_PARAVIRT | 24 | #ifdef CONFIG_PARAVIRT |
| 24 | #include <asm/asm-offsets.h> | 25 | #include <asm/asm-offsets.h> |
| @@ -226,12 +227,15 @@ ENTRY(secondary_startup_64) | |||
| 226 | movl %eax,%fs | 227 | movl %eax,%fs |
| 227 | movl %eax,%gs | 228 | movl %eax,%gs |
| 228 | 229 | ||
| 229 | /* | 230 | /* Set up %gs. |
| 230 | * Setup up a dummy PDA. this is just for some early bootup code | 231 | * |
| 231 | * that does in_interrupt() | 232 | * The base of %gs always points to the bottom of the irqstack |
| 232 | */ | 233 | * union. If the stack protector canary is enabled, it is |
| 234 | * located at %gs:40. Note that, on SMP, the boot cpu uses | ||
| 235 | * init data section till per cpu areas are set up. | ||
| 236 | */ | ||
| 233 | movl $MSR_GS_BASE,%ecx | 237 | movl $MSR_GS_BASE,%ecx |
| 234 | movq $empty_zero_page,%rax | 238 | movq initial_gs(%rip),%rax |
| 235 | movq %rax,%rdx | 239 | movq %rax,%rdx |
| 236 | shrq $32,%rdx | 240 | shrq $32,%rdx |
| 237 | wrmsr | 241 | wrmsr |
| @@ -257,6 +261,8 @@ ENTRY(secondary_startup_64) | |||
| 257 | .align 8 | 261 | .align 8 |
| 258 | ENTRY(initial_code) | 262 | ENTRY(initial_code) |
| 259 | .quad x86_64_start_kernel | 263 | .quad x86_64_start_kernel |
| 264 | ENTRY(initial_gs) | ||
| 265 | .quad INIT_PER_CPU_VAR(irq_stack_union) | ||
| 260 | __FINITDATA | 266 | __FINITDATA |
| 261 | 267 | ||
| 262 | ENTRY(stack_start) | 268 | ENTRY(stack_start) |
| @@ -323,8 +329,6 @@ early_idt_ripmsg: | |||
| 323 | #endif /* CONFIG_EARLY_PRINTK */ | 329 | #endif /* CONFIG_EARLY_PRINTK */ |
| 324 | .previous | 330 | .previous |
| 325 | 331 | ||
| 326 | .balign PAGE_SIZE | ||
| 327 | |||
| 328 | #define NEXT_PAGE(name) \ | 332 | #define NEXT_PAGE(name) \ |
| 329 | .balign PAGE_SIZE; \ | 333 | .balign PAGE_SIZE; \ |
| 330 | ENTRY(name) | 334 | ENTRY(name) |
| @@ -401,7 +405,8 @@ NEXT_PAGE(level2_spare_pgt) | |||
| 401 | .globl early_gdt_descr | 405 | .globl early_gdt_descr |
| 402 | early_gdt_descr: | 406 | early_gdt_descr: |
| 403 | .word GDT_ENTRIES*8-1 | 407 | .word GDT_ENTRIES*8-1 |
| 404 | .quad per_cpu__gdt_page | 408 | early_gdt_descr_base: |
| 409 | .quad INIT_PER_CPU_VAR(gdt_page) | ||
| 405 | 410 | ||
| 406 | ENTRY(phys_base) | 411 | ENTRY(phys_base) |
| 407 | /* This must match the first entry in level2_kernel_pgt */ | 412 | /* This must match the first entry in level2_kernel_pgt */ |
| @@ -412,7 +417,7 @@ ENTRY(phys_base) | |||
| 412 | .section .bss, "aw", @nobits | 417 | .section .bss, "aw", @nobits |
| 413 | .align L1_CACHE_BYTES | 418 | .align L1_CACHE_BYTES |
| 414 | ENTRY(idt_table) | 419 | ENTRY(idt_table) |
| 415 | .skip 256 * 16 | 420 | .skip IDT_ENTRIES * 16 |
| 416 | 421 | ||
| 417 | .section .bss.page_aligned, "aw", @nobits | 422 | .section .bss.page_aligned, "aw", @nobits |
| 418 | .align PAGE_SIZE | 423 | .align PAGE_SIZE |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 11d5093eb281..df89102bef80 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
| @@ -22,7 +22,6 @@ | |||
| 22 | #include <asm/pgtable.h> | 22 | #include <asm/pgtable.h> |
| 23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
| 24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
| 25 | #include <asm/arch_hooks.h> | ||
| 26 | #include <asm/i8259.h> | 25 | #include <asm/i8259.h> |
| 27 | 26 | ||
| 28 | /* | 27 | /* |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index b12208f4dfee..99c4d308f16b 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
| @@ -85,19 +85,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
| 85 | 85 | ||
| 86 | t->io_bitmap_max = bytes; | 86 | t->io_bitmap_max = bytes; |
| 87 | 87 | ||
| 88 | #ifdef CONFIG_X86_32 | ||
| 89 | /* | ||
| 90 | * Sets the lazy trigger so that the next I/O operation will | ||
| 91 | * reload the correct bitmap. | ||
| 92 | * Reset the owner so that a process switch will not set | ||
| 93 | * tss->io_bitmap_base to IO_BITMAP_OFFSET. | ||
| 94 | */ | ||
| 95 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | ||
| 96 | tss->io_bitmap_owner = NULL; | ||
| 97 | #else | ||
| 98 | /* Update the TSS: */ | 88 | /* Update the TSS: */ |
| 99 | memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); | 89 | memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); |
| 100 | #endif | ||
| 101 | 90 | ||
| 102 | put_cpu(); | 91 | put_cpu(); |
| 103 | 92 | ||
| @@ -131,9 +120,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) | |||
| 131 | } | 120 | } |
| 132 | 121 | ||
| 133 | #ifdef CONFIG_X86_32 | 122 | #ifdef CONFIG_X86_32 |
| 134 | asmlinkage long sys_iopl(unsigned long regsp) | 123 | long sys_iopl(struct pt_regs *regs) |
| 135 | { | 124 | { |
| 136 | struct pt_regs *regs = (struct pt_regs *)®sp; | ||
| 137 | unsigned int level = regs->bx; | 125 | unsigned int level = regs->bx; |
| 138 | struct thread_struct *t = ¤t->thread; | 126 | struct thread_struct *t = ¤t->thread; |
| 139 | int rc; | 127 | int rc; |
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c deleted file mode 100644 index 285bbf8831fa..000000000000 --- a/arch/x86/kernel/ipi.c +++ /dev/null | |||
| @@ -1,190 +0,0 @@ | |||
| 1 | #include <linux/cpumask.h> | ||
| 2 | #include <linux/interrupt.h> | ||
| 3 | #include <linux/init.h> | ||
| 4 | |||
| 5 | #include <linux/mm.h> | ||
| 6 | #include <linux/delay.h> | ||
| 7 | #include <linux/spinlock.h> | ||
| 8 | #include <linux/kernel_stat.h> | ||
| 9 | #include <linux/mc146818rtc.h> | ||
| 10 | #include <linux/cache.h> | ||
| 11 | #include <linux/cpu.h> | ||
| 12 | #include <linux/module.h> | ||
| 13 | |||
| 14 | #include <asm/smp.h> | ||
| 15 | #include <asm/mtrr.h> | ||
| 16 | #include <asm/tlbflush.h> | ||
| 17 | #include <asm/mmu_context.h> | ||
| 18 | #include <asm/apic.h> | ||
| 19 | #include <asm/proto.h> | ||
| 20 | |||
| 21 | #ifdef CONFIG_X86_32 | ||
| 22 | #include <mach_apic.h> | ||
| 23 | #include <mach_ipi.h> | ||
| 24 | |||
| 25 | /* | ||
| 26 | * the following functions deal with sending IPIs between CPUs. | ||
| 27 | * | ||
| 28 | * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. | ||
| 29 | */ | ||
| 30 | |||
| 31 | static inline int __prepare_ICR(unsigned int shortcut, int vector) | ||
| 32 | { | ||
| 33 | unsigned int icr = shortcut | APIC_DEST_LOGICAL; | ||
| 34 | |||
| 35 | switch (vector) { | ||
| 36 | default: | ||
| 37 | icr |= APIC_DM_FIXED | vector; | ||
| 38 | break; | ||
| 39 | case NMI_VECTOR: | ||
| 40 | icr |= APIC_DM_NMI; | ||
| 41 | break; | ||
| 42 | } | ||
| 43 | return icr; | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline int __prepare_ICR2(unsigned int mask) | ||
| 47 | { | ||
| 48 | return SET_APIC_DEST_FIELD(mask); | ||
| 49 | } | ||
| 50 | |||
| 51 | void __send_IPI_shortcut(unsigned int shortcut, int vector) | ||
| 52 | { | ||
| 53 | /* | ||
| 54 | * Subtle. In the case of the 'never do double writes' workaround | ||
| 55 | * we have to lock out interrupts to be safe. As we don't care | ||
| 56 | * of the value read we use an atomic rmw access to avoid costly | ||
| 57 | * cli/sti. Otherwise we use an even cheaper single atomic write | ||
| 58 | * to the APIC. | ||
| 59 | */ | ||
| 60 | unsigned int cfg; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Wait for idle. | ||
| 64 | */ | ||
| 65 | apic_wait_icr_idle(); | ||
| 66 | |||
| 67 | /* | ||
| 68 | * No need to touch the target chip field | ||
| 69 | */ | ||
| 70 | cfg = __prepare_ICR(shortcut, vector); | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Send the IPI. The write to APIC_ICR fires this off. | ||
| 74 | */ | ||
| 75 | apic_write(APIC_ICR, cfg); | ||
| 76 | } | ||
| 77 | |||
| 78 | void send_IPI_self(int vector) | ||
| 79 | { | ||
| 80 | __send_IPI_shortcut(APIC_DEST_SELF, vector); | ||
| 81 | } | ||
| 82 | |||
| 83 | /* | ||
| 84 | * This is used to send an IPI with no shorthand notation (the destination is | ||
| 85 | * specified in bits 56 to 63 of the ICR). | ||
| 86 | */ | ||
| 87 | static inline void __send_IPI_dest_field(unsigned long mask, int vector) | ||
| 88 | { | ||
| 89 | unsigned long cfg; | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Wait for idle. | ||
| 93 | */ | ||
| 94 | if (unlikely(vector == NMI_VECTOR)) | ||
| 95 | safe_apic_wait_icr_idle(); | ||
| 96 | else | ||
| 97 | apic_wait_icr_idle(); | ||
| 98 | |||
| 99 | /* | ||
| 100 | * prepare target chip field | ||
| 101 | */ | ||
| 102 | cfg = __prepare_ICR2(mask); | ||
| 103 | apic_write(APIC_ICR2, cfg); | ||
| 104 | |||
| 105 | /* | ||
| 106 | * program the ICR | ||
| 107 | */ | ||
| 108 | cfg = __prepare_ICR(0, vector); | ||
| 109 | |||
| 110 | /* | ||
| 111 | * Send the IPI. The write to APIC_ICR fires this off. | ||
| 112 | */ | ||
| 113 | apic_write(APIC_ICR, cfg); | ||
| 114 | } | ||
| 115 | |||
| 116 | /* | ||
| 117 | * This is only used on smaller machines. | ||
| 118 | */ | ||
| 119 | void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) | ||
| 120 | { | ||
| 121 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
| 122 | unsigned long flags; | ||
| 123 | |||
| 124 | local_irq_save(flags); | ||
| 125 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); | ||
| 126 | __send_IPI_dest_field(mask, vector); | ||
| 127 | local_irq_restore(flags); | ||
| 128 | } | ||
| 129 | |||
| 130 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector) | ||
| 131 | { | ||
| 132 | unsigned long flags; | ||
| 133 | unsigned int query_cpu; | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Hack. The clustered APIC addressing mode doesn't allow us to send | ||
| 137 | * to an arbitrary mask, so I do a unicasts to each CPU instead. This | ||
| 138 | * should be modified to do 1 message per cluster ID - mbligh | ||
| 139 | */ | ||
| 140 | |||
| 141 | local_irq_save(flags); | ||
| 142 | for_each_cpu(query_cpu, mask) | ||
| 143 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); | ||
| 144 | local_irq_restore(flags); | ||
| 145 | } | ||
| 146 | |||
| 147 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
| 148 | { | ||
| 149 | unsigned long flags; | ||
| 150 | unsigned int query_cpu; | ||
| 151 | unsigned int this_cpu = smp_processor_id(); | ||
| 152 | |||
| 153 | /* See Hack comment above */ | ||
| 154 | |||
| 155 | local_irq_save(flags); | ||
| 156 | for_each_cpu(query_cpu, mask) | ||
| 157 | if (query_cpu != this_cpu) | ||
| 158 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), | ||
| 159 | vector); | ||
| 160 | local_irq_restore(flags); | ||
| 161 | } | ||
| 162 | |||
| 163 | /* must come after the send_IPI functions above for inlining */ | ||
| 164 | static int convert_apicid_to_cpu(int apic_id) | ||
| 165 | { | ||
| 166 | int i; | ||
| 167 | |||
| 168 | for_each_possible_cpu(i) { | ||
| 169 | if (per_cpu(x86_cpu_to_apicid, i) == apic_id) | ||
| 170 | return i; | ||
| 171 | } | ||
| 172 | return -1; | ||
| 173 | } | ||
| 174 | |||
| 175 | int safe_smp_processor_id(void) | ||
| 176 | { | ||
| 177 | int apicid, cpuid; | ||
| 178 | |||
| 179 | if (!boot_cpu_has(X86_FEATURE_APIC)) | ||
| 180 | return 0; | ||
| 181 | |||
| 182 | apicid = hard_smp_processor_id(); | ||
| 183 | if (apicid == BAD_APICID) | ||
| 184 | return 0; | ||
| 185 | |||
| 186 | cpuid = convert_apicid_to_cpu(apicid); | ||
| 187 | |||
| 188 | return cpuid >= 0 ? cpuid : 0; | ||
| 189 | } | ||
| 190 | #endif | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3973e2df7f87..b864341dcc45 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
| @@ -6,13 +6,18 @@ | |||
| 6 | #include <linux/kernel_stat.h> | 6 | #include <linux/kernel_stat.h> |
| 7 | #include <linux/seq_file.h> | 7 | #include <linux/seq_file.h> |
| 8 | #include <linux/smp.h> | 8 | #include <linux/smp.h> |
| 9 | #include <linux/ftrace.h> | ||
| 9 | 10 | ||
| 10 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
| 11 | #include <asm/io_apic.h> | 12 | #include <asm/io_apic.h> |
| 12 | #include <asm/irq.h> | 13 | #include <asm/irq.h> |
| 14 | #include <asm/idle.h> | ||
| 13 | 15 | ||
| 14 | atomic_t irq_err_count; | 16 | atomic_t irq_err_count; |
| 15 | 17 | ||
| 18 | /* Function pointer for generic interrupt vector handling */ | ||
| 19 | void (*generic_interrupt_extension)(void) = NULL; | ||
| 20 | |||
| 16 | /* | 21 | /* |
| 17 | * 'what should we do if we get a hw irq event on an illegal vector'. | 22 | * 'what should we do if we get a hw irq event on an illegal vector'. |
| 18 | * each architecture has to answer this themselves. | 23 | * each architecture has to answer this themselves. |
| @@ -36,11 +41,7 @@ void ack_bad_irq(unsigned int irq) | |||
| 36 | #endif | 41 | #endif |
| 37 | } | 42 | } |
| 38 | 43 | ||
| 39 | #ifdef CONFIG_X86_32 | 44 | #define irq_stats(x) (&per_cpu(irq_stat, x)) |
| 40 | # define irq_stats(x) (&per_cpu(irq_stat, x)) | ||
| 41 | #else | ||
| 42 | # define irq_stats(x) cpu_pda(x) | ||
| 43 | #endif | ||
| 44 | /* | 45 | /* |
| 45 | * /proc/interrupts printing: | 46 | * /proc/interrupts printing: |
| 46 | */ | 47 | */ |
| @@ -58,6 +59,12 @@ static int show_other_interrupts(struct seq_file *p) | |||
| 58 | seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); | 59 | seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); |
| 59 | seq_printf(p, " Local timer interrupts\n"); | 60 | seq_printf(p, " Local timer interrupts\n"); |
| 60 | #endif | 61 | #endif |
| 62 | if (generic_interrupt_extension) { | ||
| 63 | seq_printf(p, "PLT: "); | ||
| 64 | for_each_online_cpu(j) | ||
| 65 | seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); | ||
| 66 | seq_printf(p, " Platform interrupts\n"); | ||
| 67 | } | ||
| 61 | #ifdef CONFIG_SMP | 68 | #ifdef CONFIG_SMP |
| 62 | seq_printf(p, "RES: "); | 69 | seq_printf(p, "RES: "); |
| 63 | for_each_online_cpu(j) | 70 | for_each_online_cpu(j) |
| @@ -165,6 +172,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
| 165 | #ifdef CONFIG_X86_LOCAL_APIC | 172 | #ifdef CONFIG_X86_LOCAL_APIC |
| 166 | sum += irq_stats(cpu)->apic_timer_irqs; | 173 | sum += irq_stats(cpu)->apic_timer_irqs; |
| 167 | #endif | 174 | #endif |
| 175 | if (generic_interrupt_extension) | ||
| 176 | sum += irq_stats(cpu)->generic_irqs; | ||
| 168 | #ifdef CONFIG_SMP | 177 | #ifdef CONFIG_SMP |
| 169 | sum += irq_stats(cpu)->irq_resched_count; | 178 | sum += irq_stats(cpu)->irq_resched_count; |
| 170 | sum += irq_stats(cpu)->irq_call_count; | 179 | sum += irq_stats(cpu)->irq_call_count; |
| @@ -192,4 +201,63 @@ u64 arch_irq_stat(void) | |||
| 192 | return sum; | 201 | return sum; |
| 193 | } | 202 | } |
| 194 | 203 | ||
| 204 | |||
| 205 | /* | ||
| 206 | * do_IRQ handles all normal device IRQ's (the special | ||
| 207 | * SMP cross-CPU interrupts have their own specific | ||
| 208 | * handlers). | ||
| 209 | */ | ||
| 210 | unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | ||
| 211 | { | ||
| 212 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
| 213 | |||
| 214 | /* high bit used in ret_from_ code */ | ||
| 215 | unsigned vector = ~regs->orig_ax; | ||
| 216 | unsigned irq; | ||
| 217 | |||
| 218 | exit_idle(); | ||
| 219 | irq_enter(); | ||
| 220 | |||
| 221 | irq = __get_cpu_var(vector_irq)[vector]; | ||
| 222 | |||
| 223 | if (!handle_irq(irq, regs)) { | ||
| 224 | #ifdef CONFIG_X86_64 | ||
| 225 | if (!disable_apic) | ||
| 226 | ack_APIC_irq(); | ||
| 227 | #endif | ||
| 228 | |||
| 229 | if (printk_ratelimit()) | ||
| 230 | printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", | ||
| 231 | __func__, smp_processor_id(), vector, irq); | ||
| 232 | } | ||
| 233 | |||
| 234 | irq_exit(); | ||
| 235 | |||
| 236 | set_irq_regs(old_regs); | ||
| 237 | return 1; | ||
| 238 | } | ||
| 239 | |||
| 240 | /* | ||
| 241 | * Handler for GENERIC_INTERRUPT_VECTOR. | ||
| 242 | */ | ||
| 243 | void smp_generic_interrupt(struct pt_regs *regs) | ||
| 244 | { | ||
| 245 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
| 246 | |||
| 247 | ack_APIC_irq(); | ||
| 248 | |||
| 249 | exit_idle(); | ||
| 250 | |||
| 251 | irq_enter(); | ||
| 252 | |||
| 253 | inc_irq_stat(generic_irqs); | ||
| 254 | |||
| 255 | if (generic_interrupt_extension) | ||
| 256 | generic_interrupt_extension(); | ||
| 257 | |||
| 258 | irq_exit(); | ||
| 259 | |||
| 260 | set_irq_regs(old_regs); | ||
| 261 | } | ||
| 262 | |||
| 195 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 263 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 74b9ff7341e9..3b09634a5153 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
| 17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
| 18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
| 19 | #include <linux/percpu.h> | ||
| 19 | 20 | ||
| 20 | #include <asm/apic.h> | 21 | #include <asm/apic.h> |
| 21 | 22 | ||
| @@ -55,13 +56,13 @@ static inline void print_stack_overflow(void) { } | |||
| 55 | union irq_ctx { | 56 | union irq_ctx { |
| 56 | struct thread_info tinfo; | 57 | struct thread_info tinfo; |
| 57 | u32 stack[THREAD_SIZE/sizeof(u32)]; | 58 | u32 stack[THREAD_SIZE/sizeof(u32)]; |
| 58 | }; | 59 | } __attribute__((aligned(PAGE_SIZE))); |
| 59 | 60 | ||
| 60 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; | 61 | static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); |
| 61 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; | 62 | static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); |
| 62 | 63 | ||
| 63 | static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; | 64 | static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); |
| 64 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; | 65 | static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); |
| 65 | 66 | ||
| 66 | static void call_on_stack(void *func, void *stack) | 67 | static void call_on_stack(void *func, void *stack) |
| 67 | { | 68 | { |
| @@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
| 81 | u32 *isp, arg1, arg2; | 82 | u32 *isp, arg1, arg2; |
| 82 | 83 | ||
| 83 | curctx = (union irq_ctx *) current_thread_info(); | 84 | curctx = (union irq_ctx *) current_thread_info(); |
| 84 | irqctx = hardirq_ctx[smp_processor_id()]; | 85 | irqctx = __get_cpu_var(hardirq_ctx); |
| 85 | 86 | ||
| 86 | /* | 87 | /* |
| 87 | * this is where we switch to the IRQ stack. However, if we are | 88 | * this is where we switch to the IRQ stack. However, if we are |
| @@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu) | |||
| 125 | { | 126 | { |
| 126 | union irq_ctx *irqctx; | 127 | union irq_ctx *irqctx; |
| 127 | 128 | ||
| 128 | if (hardirq_ctx[cpu]) | 129 | if (per_cpu(hardirq_ctx, cpu)) |
| 129 | return; | 130 | return; |
| 130 | 131 | ||
| 131 | irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; | 132 | irqctx = &per_cpu(hardirq_stack, cpu); |
| 132 | irqctx->tinfo.task = NULL; | 133 | irqctx->tinfo.task = NULL; |
| 133 | irqctx->tinfo.exec_domain = NULL; | 134 | irqctx->tinfo.exec_domain = NULL; |
| 134 | irqctx->tinfo.cpu = cpu; | 135 | irqctx->tinfo.cpu = cpu; |
| 135 | irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; | 136 | irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; |
| 136 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | 137 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); |
| 137 | 138 | ||
| 138 | hardirq_ctx[cpu] = irqctx; | 139 | per_cpu(hardirq_ctx, cpu) = irqctx; |
| 139 | 140 | ||
| 140 | irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE]; | 141 | irqctx = &per_cpu(softirq_stack, cpu); |
| 141 | irqctx->tinfo.task = NULL; | 142 | irqctx->tinfo.task = NULL; |
| 142 | irqctx->tinfo.exec_domain = NULL; | 143 | irqctx->tinfo.exec_domain = NULL; |
| 143 | irqctx->tinfo.cpu = cpu; | 144 | irqctx->tinfo.cpu = cpu; |
| 144 | irqctx->tinfo.preempt_count = 0; | 145 | irqctx->tinfo.preempt_count = 0; |
| 145 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | 146 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); |
| 146 | 147 | ||
| 147 | softirq_ctx[cpu] = irqctx; | 148 | per_cpu(softirq_ctx, cpu) = irqctx; |
| 148 | 149 | ||
| 149 | printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", | 150 | printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", |
| 150 | cpu, hardirq_ctx[cpu], softirq_ctx[cpu]); | 151 | cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); |
| 151 | } | 152 | } |
| 152 | 153 | ||
| 153 | void irq_ctx_exit(int cpu) | 154 | void irq_ctx_exit(int cpu) |
| 154 | { | 155 | { |
| 155 | hardirq_ctx[cpu] = NULL; | 156 | per_cpu(hardirq_ctx, cpu) = NULL; |
| 156 | } | 157 | } |
| 157 | 158 | ||
| 158 | asmlinkage void do_softirq(void) | 159 | asmlinkage void do_softirq(void) |
| @@ -169,7 +170,7 @@ asmlinkage void do_softirq(void) | |||
| 169 | 170 | ||
| 170 | if (local_softirq_pending()) { | 171 | if (local_softirq_pending()) { |
| 171 | curctx = current_thread_info(); | 172 | curctx = current_thread_info(); |
| 172 | irqctx = softirq_ctx[smp_processor_id()]; | 173 | irqctx = __get_cpu_var(softirq_ctx); |
| 173 | irqctx->tinfo.task = curctx->task; | 174 | irqctx->tinfo.task = curctx->task; |
| 174 | irqctx->tinfo.previous_esp = current_stack_pointer; | 175 | irqctx->tinfo.previous_esp = current_stack_pointer; |
| 175 | 176 | ||
| @@ -191,33 +192,16 @@ static inline int | |||
| 191 | execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } | 192 | execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } |
| 192 | #endif | 193 | #endif |
| 193 | 194 | ||
| 194 | /* | 195 | bool handle_irq(unsigned irq, struct pt_regs *regs) |
| 195 | * do_IRQ handles all normal device IRQ's (the special | ||
| 196 | * SMP cross-CPU interrupts have their own specific | ||
| 197 | * handlers). | ||
| 198 | */ | ||
| 199 | unsigned int do_IRQ(struct pt_regs *regs) | ||
| 200 | { | 196 | { |
| 201 | struct pt_regs *old_regs; | ||
| 202 | /* high bit used in ret_from_ code */ | ||
| 203 | int overflow; | ||
| 204 | unsigned vector = ~regs->orig_ax; | ||
| 205 | struct irq_desc *desc; | 197 | struct irq_desc *desc; |
| 206 | unsigned irq; | 198 | int overflow; |
| 207 | |||
| 208 | |||
| 209 | old_regs = set_irq_regs(regs); | ||
| 210 | irq_enter(); | ||
| 211 | irq = __get_cpu_var(vector_irq)[vector]; | ||
| 212 | 199 | ||
| 213 | overflow = check_stack_overflow(); | 200 | overflow = check_stack_overflow(); |
| 214 | 201 | ||
| 215 | desc = irq_to_desc(irq); | 202 | desc = irq_to_desc(irq); |
| 216 | if (unlikely(!desc)) { | 203 | if (unlikely(!desc)) |
| 217 | printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", | 204 | return false; |
| 218 | __func__, irq, vector, smp_processor_id()); | ||
| 219 | BUG(); | ||
| 220 | } | ||
| 221 | 205 | ||
| 222 | if (!execute_on_irq_stack(overflow, desc, irq)) { | 206 | if (!execute_on_irq_stack(overflow, desc, irq)) { |
| 223 | if (unlikely(overflow)) | 207 | if (unlikely(overflow)) |
| @@ -225,13 +209,10 @@ unsigned int do_IRQ(struct pt_regs *regs) | |||
| 225 | desc->handle_irq(irq, desc); | 209 | desc->handle_irq(irq, desc); |
| 226 | } | 210 | } |
| 227 | 211 | ||
| 228 | irq_exit(); | 212 | return true; |
| 229 | set_irq_regs(old_regs); | ||
| 230 | return 1; | ||
| 231 | } | 213 | } |
| 232 | 214 | ||
| 233 | #ifdef CONFIG_HOTPLUG_CPU | 215 | #ifdef CONFIG_HOTPLUG_CPU |
| 234 | #include <mach_apic.h> | ||
| 235 | 216 | ||
| 236 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | 217 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
| 237 | void fixup_irqs(void) | 218 | void fixup_irqs(void) |
| @@ -248,7 +229,7 @@ void fixup_irqs(void) | |||
| 248 | if (irq == 2) | 229 | if (irq == 2) |
| 249 | continue; | 230 | continue; |
| 250 | 231 | ||
| 251 | affinity = &desc->affinity; | 232 | affinity = desc->affinity; |
| 252 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | 233 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
| 253 | printk("Breaking affinity for irq %i\n", irq); | 234 | printk("Breaking affinity for irq %i\n", irq); |
| 254 | affinity = cpu_all_mask; | 235 | affinity = cpu_all_mask; |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 63c88e6ec025..977d8b43a0dd 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
| @@ -18,6 +18,13 @@ | |||
| 18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
| 19 | #include <asm/io_apic.h> | 19 | #include <asm/io_apic.h> |
| 20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
| 21 | #include <asm/apic.h> | ||
| 22 | |||
| 23 | DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | ||
| 24 | EXPORT_PER_CPU_SYMBOL(irq_stat); | ||
| 25 | |||
| 26 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | ||
| 27 | EXPORT_PER_CPU_SYMBOL(irq_regs); | ||
| 21 | 28 | ||
| 22 | /* | 29 | /* |
| 23 | * Probabilistic stack overflow check: | 30 | * Probabilistic stack overflow check: |
| @@ -41,42 +48,18 @@ static inline void stack_overflow_check(struct pt_regs *regs) | |||
| 41 | #endif | 48 | #endif |
| 42 | } | 49 | } |
| 43 | 50 | ||
| 44 | /* | 51 | bool handle_irq(unsigned irq, struct pt_regs *regs) |
| 45 | * do_IRQ handles all normal device IRQ's (the special | ||
| 46 | * SMP cross-CPU interrupts have their own specific | ||
| 47 | * handlers). | ||
| 48 | */ | ||
| 49 | asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | ||
| 50 | { | 52 | { |
| 51 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
| 52 | struct irq_desc *desc; | 53 | struct irq_desc *desc; |
| 53 | 54 | ||
| 54 | /* high bit used in ret_from_ code */ | ||
| 55 | unsigned vector = ~regs->orig_ax; | ||
| 56 | unsigned irq; | ||
| 57 | |||
| 58 | exit_idle(); | ||
| 59 | irq_enter(); | ||
| 60 | irq = __get_cpu_var(vector_irq)[vector]; | ||
| 61 | |||
| 62 | stack_overflow_check(regs); | 55 | stack_overflow_check(regs); |
| 63 | 56 | ||
| 64 | desc = irq_to_desc(irq); | 57 | desc = irq_to_desc(irq); |
| 65 | if (likely(desc)) | 58 | if (unlikely(!desc)) |
| 66 | generic_handle_irq_desc(irq, desc); | 59 | return false; |
| 67 | else { | ||
| 68 | if (!disable_apic) | ||
| 69 | ack_APIC_irq(); | ||
| 70 | |||
| 71 | if (printk_ratelimit()) | ||
| 72 | printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", | ||
| 73 | __func__, smp_processor_id(), vector); | ||
| 74 | } | ||
| 75 | |||
| 76 | irq_exit(); | ||
| 77 | 60 | ||
| 78 | set_irq_regs(old_regs); | 61 | generic_handle_irq_desc(irq, desc); |
| 79 | return 1; | 62 | return true; |
| 80 | } | 63 | } |
| 81 | 64 | ||
| 82 | #ifdef CONFIG_HOTPLUG_CPU | 65 | #ifdef CONFIG_HOTPLUG_CPU |
| @@ -100,7 +83,7 @@ void fixup_irqs(void) | |||
| 100 | /* interrupt's are disabled at this point */ | 83 | /* interrupt's are disabled at this point */ |
| 101 | spin_lock(&desc->lock); | 84 | spin_lock(&desc->lock); |
| 102 | 85 | ||
| 103 | affinity = &desc->affinity; | 86 | affinity = desc->affinity; |
| 104 | if (!irq_has_action(irq) || | 87 | if (!irq_has_action(irq) || |
| 105 | cpumask_equal(affinity, cpu_online_mask)) { | 88 | cpumask_equal(affinity, cpu_online_mask)) { |
| 106 | spin_unlock(&desc->lock); | 89 | spin_unlock(&desc->lock); |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 10a09c2f1828..bc1326105448 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
| 19 | #include <asm/desc.h> | 19 | #include <asm/desc.h> |
| 20 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
| 21 | #include <asm/arch_hooks.h> | 21 | #include <asm/setup.h> |
| 22 | #include <asm/i8259.h> | 22 | #include <asm/i8259.h> |
| 23 | #include <asm/traps.h> | 23 | #include <asm/traps.h> |
| 24 | 24 | ||
| @@ -78,6 +78,15 @@ void __init init_ISA_irqs(void) | |||
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | /* | ||
| 82 | * IRQ2 is cascade interrupt to second interrupt controller | ||
| 83 | */ | ||
| 84 | static struct irqaction irq2 = { | ||
| 85 | .handler = no_action, | ||
| 86 | .mask = CPU_MASK_NONE, | ||
| 87 | .name = "cascade", | ||
| 88 | }; | ||
| 89 | |||
| 81 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | 90 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { |
| 82 | [0 ... IRQ0_VECTOR - 1] = -1, | 91 | [0 ... IRQ0_VECTOR - 1] = -1, |
| 83 | [IRQ0_VECTOR] = 0, | 92 | [IRQ0_VECTOR] = 0, |
| @@ -118,8 +127,8 @@ void __init native_init_IRQ(void) | |||
| 118 | { | 127 | { |
| 119 | int i; | 128 | int i; |
| 120 | 129 | ||
| 121 | /* all the set up before the call gates are initialised */ | 130 | /* Execute any quirks before the call gates are initialised: */ |
| 122 | pre_intr_init_hook(); | 131 | x86_quirk_pre_intr_init(); |
| 123 | 132 | ||
| 124 | /* | 133 | /* |
| 125 | * Cover the whole vector space, no vector can escape | 134 | * Cover the whole vector space, no vector can escape |
| @@ -140,8 +149,15 @@ void __init native_init_IRQ(void) | |||
| 140 | */ | 149 | */ |
| 141 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 150 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
| 142 | 151 | ||
| 143 | /* IPI for invalidation */ | 152 | /* IPIs for invalidation */ |
| 144 | alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); | 153 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); |
| 154 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); | ||
| 155 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); | ||
| 156 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); | ||
| 157 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); | ||
| 158 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); | ||
| 159 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); | ||
| 160 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); | ||
| 145 | 161 | ||
| 146 | /* IPI for generic function call */ | 162 | /* IPI for generic function call */ |
| 147 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 163 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
| @@ -159,6 +175,9 @@ void __init native_init_IRQ(void) | |||
| 159 | /* self generated IPI for local APIC timer */ | 175 | /* self generated IPI for local APIC timer */ |
| 160 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | 176 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); |
| 161 | 177 | ||
| 178 | /* generic IPI for platform specific use */ | ||
| 179 | alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); | ||
| 180 | |||
| 162 | /* IPI vectors for APIC spurious and error interrupts */ | 181 | /* IPI vectors for APIC spurious and error interrupts */ |
| 163 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 182 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
| 164 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 183 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
| @@ -169,10 +188,14 @@ void __init native_init_IRQ(void) | |||
| 169 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 188 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
| 170 | #endif | 189 | #endif |
| 171 | 190 | ||
| 172 | /* setup after call gates are initialised (usually add in | 191 | if (!acpi_ioapic) |
| 173 | * the architecture specific gates) | 192 | setup_irq(2, &irq2); |
| 193 | |||
| 194 | /* | ||
| 195 | * Call quirks after call gates are initialised (usually add in | ||
| 196 | * the architecture specific gates): | ||
| 174 | */ | 197 | */ |
| 175 | intr_init_hook(); | 198 | x86_quirk_intr_init(); |
| 176 | 199 | ||
| 177 | /* | 200 | /* |
| 178 | * External FPU? Set up irq13 if so, for | 201 | * External FPU? Set up irq13 if so, for |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index da481a1e3f30..c7a49e0ffbfb 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
| @@ -147,6 +147,9 @@ static void __init apic_intr_init(void) | |||
| 147 | /* self generated IPI for local APIC timer */ | 147 | /* self generated IPI for local APIC timer */ |
| 148 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | 148 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); |
| 149 | 149 | ||
| 150 | /* generic IPI for platform specific use */ | ||
| 151 | alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); | ||
| 152 | |||
| 150 | /* IPI vectors for APIC spurious and error interrupts */ | 153 | /* IPI vectors for APIC spurious and error interrupts */ |
| 151 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 154 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
| 152 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 155 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 10435a120d22..eedfaebe1063 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
| @@ -46,7 +46,7 @@ | |||
| 46 | #include <asm/apicdef.h> | 46 | #include <asm/apicdef.h> |
| 47 | #include <asm/system.h> | 47 | #include <asm/system.h> |
| 48 | 48 | ||
| 49 | #include <mach_ipi.h> | 49 | #include <asm/apic.h> |
| 50 | 50 | ||
| 51 | /* | 51 | /* |
| 52 | * Put the error code here just in case the user cares: | 52 | * Put the error code here just in case the user cares: |
| @@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) | |||
| 347 | */ | 347 | */ |
| 348 | void kgdb_roundup_cpus(unsigned long flags) | 348 | void kgdb_roundup_cpus(unsigned long flags) |
| 349 | { | 349 | { |
| 350 | send_IPI_allbutself(APIC_DM_NMI); | 350 | apic->send_IPI_allbutself(APIC_DM_NMI); |
| 351 | } | 351 | } |
| 352 | #endif | 352 | #endif |
| 353 | 353 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 652fce6d2cce..137f2e8132df 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include <linux/clocksource.h> | 19 | #include <linux/clocksource.h> |
| 20 | #include <linux/kvm_para.h> | 20 | #include <linux/kvm_para.h> |
| 21 | #include <asm/pvclock.h> | 21 | #include <asm/pvclock.h> |
| 22 | #include <asm/arch_hooks.h> | ||
| 23 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
| 24 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
| 25 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 37f420018a41..e7368c1da01d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
| @@ -14,12 +14,12 @@ | |||
| 14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
| 15 | #include <linux/suspend.h> | 15 | #include <linux/suspend.h> |
| 16 | #include <linux/gfp.h> | 16 | #include <linux/gfp.h> |
| 17 | #include <linux/io.h> | ||
| 17 | 18 | ||
| 18 | #include <asm/pgtable.h> | 19 | #include <asm/pgtable.h> |
| 19 | #include <asm/pgalloc.h> | 20 | #include <asm/pgalloc.h> |
| 20 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
| 21 | #include <asm/mmu_context.h> | 22 | #include <asm/mmu_context.h> |
| 22 | #include <asm/io.h> | ||
| 23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
| 24 | #include <asm/cpufeature.h> | 24 | #include <asm/cpufeature.h> |
| 25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
| @@ -63,7 +63,7 @@ static void load_segments(void) | |||
| 63 | "\tmovl %%eax,%%fs\n" | 63 | "\tmovl %%eax,%%fs\n" |
| 64 | "\tmovl %%eax,%%gs\n" | 64 | "\tmovl %%eax,%%gs\n" |
| 65 | "\tmovl %%eax,%%ss\n" | 65 | "\tmovl %%eax,%%ss\n" |
| 66 | ::: "eax", "memory"); | 66 | : : : "eax", "memory"); |
| 67 | #undef STR | 67 | #undef STR |
| 68 | #undef __STR | 68 | #undef __STR |
| 69 | } | 69 | } |
| @@ -121,7 +121,7 @@ static void machine_kexec_page_table_set_one( | |||
| 121 | static void machine_kexec_prepare_page_tables(struct kimage *image) | 121 | static void machine_kexec_prepare_page_tables(struct kimage *image) |
| 122 | { | 122 | { |
| 123 | void *control_page; | 123 | void *control_page; |
| 124 | pmd_t *pmd = 0; | 124 | pmd_t *pmd = NULL; |
| 125 | 125 | ||
| 126 | control_page = page_address(image->control_code_page); | 126 | control_page = page_address(image->control_code_page); |
| 127 | #ifdef CONFIG_X86_PAE | 127 | #ifdef CONFIG_X86_PAE |
| @@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image) | |||
| 205 | 205 | ||
| 206 | if (image->preserve_context) { | 206 | if (image->preserve_context) { |
| 207 | #ifdef CONFIG_X86_IO_APIC | 207 | #ifdef CONFIG_X86_IO_APIC |
| 208 | /* We need to put APICs in legacy mode so that we can | 208 | /* |
| 209 | * We need to put APICs in legacy mode so that we can | ||
| 209 | * get timer interrupts in second kernel. kexec/kdump | 210 | * get timer interrupts in second kernel. kexec/kdump |
| 210 | * paths already have calls to disable_IO_APIC() in | 211 | * paths already have calls to disable_IO_APIC() in |
| 211 | * one form or other. kexec jump path also need | 212 | * one form or other. kexec jump path also need |
| @@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image) | |||
| 227 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | 228 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) |
| 228 | << PAGE_SHIFT); | 229 | << PAGE_SHIFT); |
| 229 | 230 | ||
| 230 | /* The segment registers are funny things, they have both a | 231 | /* |
| 232 | * The segment registers are funny things, they have both a | ||
| 231 | * visible and an invisible part. Whenever the visible part is | 233 | * visible and an invisible part. Whenever the visible part is |
| 232 | * set to a specific selector, the invisible part is loaded | 234 | * set to a specific selector, the invisible part is loaded |
| 233 | * with from a table in memory. At no other time is the | 235 | * with from a table in memory. At no other time is the |
| @@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image) | |||
| 237 | * segments, before I zap the gdt with an invalid value. | 239 | * segments, before I zap the gdt with an invalid value. |
| 238 | */ | 240 | */ |
| 239 | load_segments(); | 241 | load_segments(); |
| 240 | /* The gdt & idt are now invalid. | 242 | /* |
| 243 | * The gdt & idt are now invalid. | ||
| 241 | * If you want to load them you must set up your own idt & gdt. | 244 | * If you want to load them you must set up your own idt & gdt. |
| 242 | */ | 245 | */ |
| 243 | set_gdt(phys_to_virt(0),0); | 246 | set_gdt(phys_to_virt(0), 0); |
| 244 | set_idt(phys_to_virt(0),0); | 247 | set_idt(phys_to_virt(0), 0); |
| 245 | 248 | ||
| 246 | /* now call it */ | 249 | /* now call it */ |
| 247 | image->start = relocate_kernel_ptr((unsigned long)image->head, | 250 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c43caa3a91f3..89cea4d44679 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
| @@ -12,20 +12,47 @@ | |||
| 12 | #include <linux/reboot.h> | 12 | #include <linux/reboot.h> |
| 13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
| 14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
| 15 | #include <linux/io.h> | ||
| 16 | #include <linux/suspend.h> | ||
| 15 | 17 | ||
| 16 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
| 17 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
| 18 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
| 19 | #include <asm/io.h> | ||
| 20 | 21 | ||
| 21 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
| 22 | static u64 kexec_pgd[512] PAGE_ALIGNED; | 23 | unsigned long addr) |
| 23 | static u64 kexec_pud0[512] PAGE_ALIGNED; | 24 | { |
| 24 | static u64 kexec_pmd0[512] PAGE_ALIGNED; | 25 | pud_t *pud; |
| 25 | static u64 kexec_pte0[512] PAGE_ALIGNED; | 26 | pmd_t *pmd; |
| 26 | static u64 kexec_pud1[512] PAGE_ALIGNED; | 27 | struct page *page; |
| 27 | static u64 kexec_pmd1[512] PAGE_ALIGNED; | 28 | int result = -ENOMEM; |
| 28 | static u64 kexec_pte1[512] PAGE_ALIGNED; | 29 | |
| 30 | addr &= PMD_MASK; | ||
| 31 | pgd += pgd_index(addr); | ||
| 32 | if (!pgd_present(*pgd)) { | ||
| 33 | page = kimage_alloc_control_pages(image, 0); | ||
| 34 | if (!page) | ||
| 35 | goto out; | ||
| 36 | pud = (pud_t *)page_address(page); | ||
| 37 | memset(pud, 0, PAGE_SIZE); | ||
| 38 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
| 39 | } | ||
| 40 | pud = pud_offset(pgd, addr); | ||
| 41 | if (!pud_present(*pud)) { | ||
| 42 | page = kimage_alloc_control_pages(image, 0); | ||
| 43 | if (!page) | ||
| 44 | goto out; | ||
| 45 | pmd = (pmd_t *)page_address(page); | ||
| 46 | memset(pmd, 0, PAGE_SIZE); | ||
| 47 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
| 48 | } | ||
| 49 | pmd = pmd_offset(pud, addr); | ||
| 50 | if (!pmd_present(*pmd)) | ||
| 51 | set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
| 52 | result = 0; | ||
| 53 | out: | ||
| 54 | return result; | ||
| 55 | } | ||
| 29 | 56 | ||
| 30 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | 57 | static void init_level2_page(pmd_t *level2p, unsigned long addr) |
| 31 | { | 58 | { |
| @@ -92,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p, | |||
| 92 | } | 119 | } |
| 93 | level3p = (pud_t *)page_address(page); | 120 | level3p = (pud_t *)page_address(page); |
| 94 | result = init_level3_page(image, level3p, addr, last_addr); | 121 | result = init_level3_page(image, level3p, addr, last_addr); |
| 95 | if (result) { | 122 | if (result) |
| 96 | goto out; | 123 | goto out; |
| 97 | } | ||
| 98 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); | 124 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); |
| 99 | addr += PGDIR_SIZE; | 125 | addr += PGDIR_SIZE; |
| 100 | } | 126 | } |
| @@ -107,12 +133,72 @@ out: | |||
| 107 | return result; | 133 | return result; |
| 108 | } | 134 | } |
| 109 | 135 | ||
| 136 | static void free_transition_pgtable(struct kimage *image) | ||
| 137 | { | ||
| 138 | free_page((unsigned long)image->arch.pud); | ||
| 139 | free_page((unsigned long)image->arch.pmd); | ||
| 140 | free_page((unsigned long)image->arch.pte); | ||
| 141 | } | ||
| 142 | |||
| 143 | static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) | ||
| 144 | { | ||
| 145 | pud_t *pud; | ||
| 146 | pmd_t *pmd; | ||
| 147 | pte_t *pte; | ||
| 148 | unsigned long vaddr, paddr; | ||
| 149 | int result = -ENOMEM; | ||
| 150 | |||
| 151 | vaddr = (unsigned long)relocate_kernel; | ||
| 152 | paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); | ||
| 153 | pgd += pgd_index(vaddr); | ||
| 154 | if (!pgd_present(*pgd)) { | ||
| 155 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL); | ||
| 156 | if (!pud) | ||
| 157 | goto err; | ||
| 158 | image->arch.pud = pud; | ||
| 159 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
| 160 | } | ||
| 161 | pud = pud_offset(pgd, vaddr); | ||
| 162 | if (!pud_present(*pud)) { | ||
| 163 | pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
| 164 | if (!pmd) | ||
| 165 | goto err; | ||
| 166 | image->arch.pmd = pmd; | ||
| 167 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
| 168 | } | ||
| 169 | pmd = pmd_offset(pud, vaddr); | ||
| 170 | if (!pmd_present(*pmd)) { | ||
| 171 | pte = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
| 172 | if (!pte) | ||
| 173 | goto err; | ||
| 174 | image->arch.pte = pte; | ||
| 175 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | ||
| 176 | } | ||
| 177 | pte = pte_offset_kernel(pmd, vaddr); | ||
| 178 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | ||
| 179 | return 0; | ||
| 180 | err: | ||
| 181 | free_transition_pgtable(image); | ||
| 182 | return result; | ||
| 183 | } | ||
| 184 | |||
| 110 | 185 | ||
| 111 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | 186 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) |
| 112 | { | 187 | { |
| 113 | pgd_t *level4p; | 188 | pgd_t *level4p; |
| 189 | int result; | ||
| 114 | level4p = (pgd_t *)__va(start_pgtable); | 190 | level4p = (pgd_t *)__va(start_pgtable); |
| 115 | return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); | 191 | result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); |
| 192 | if (result) | ||
| 193 | return result; | ||
| 194 | /* | ||
| 195 | * image->start may be outside 0 ~ max_pfn, for example when | ||
| 196 | * jump back to original kernel from kexeced kernel | ||
| 197 | */ | ||
| 198 | result = init_one_level2_page(image, level4p, image->start); | ||
| 199 | if (result) | ||
| 200 | return result; | ||
| 201 | return init_transition_pgtable(image, level4p); | ||
| 116 | } | 202 | } |
| 117 | 203 | ||
| 118 | static void set_idt(void *newidt, u16 limit) | 204 | static void set_idt(void *newidt, u16 limit) |
| @@ -174,7 +260,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
| 174 | 260 | ||
| 175 | void machine_kexec_cleanup(struct kimage *image) | 261 | void machine_kexec_cleanup(struct kimage *image) |
| 176 | { | 262 | { |
| 177 | return; | 263 | free_transition_pgtable(image); |
| 178 | } | 264 | } |
| 179 | 265 | ||
| 180 | /* | 266 | /* |
| @@ -185,36 +271,45 @@ void machine_kexec(struct kimage *image) | |||
| 185 | { | 271 | { |
| 186 | unsigned long page_list[PAGES_NR]; | 272 | unsigned long page_list[PAGES_NR]; |
| 187 | void *control_page; | 273 | void *control_page; |
| 274 | int save_ftrace_enabled; | ||
| 188 | 275 | ||
| 189 | tracer_disable(); | 276 | #ifdef CONFIG_KEXEC_JUMP |
| 277 | if (kexec_image->preserve_context) | ||
| 278 | save_processor_state(); | ||
| 279 | #endif | ||
| 280 | |||
| 281 | save_ftrace_enabled = __ftrace_enabled_save(); | ||
| 190 | 282 | ||
| 191 | /* Interrupts aren't acceptable while we reboot */ | 283 | /* Interrupts aren't acceptable while we reboot */ |
| 192 | local_irq_disable(); | 284 | local_irq_disable(); |
| 193 | 285 | ||
| 286 | if (image->preserve_context) { | ||
| 287 | #ifdef CONFIG_X86_IO_APIC | ||
| 288 | /* | ||
| 289 | * We need to put APICs in legacy mode so that we can | ||
| 290 | * get timer interrupts in second kernel. kexec/kdump | ||
| 291 | * paths already have calls to disable_IO_APIC() in | ||
| 292 | * one form or other. kexec jump path also need | ||
| 293 | * one. | ||
| 294 | */ | ||
| 295 | disable_IO_APIC(); | ||
| 296 | #endif | ||
| 297 | } | ||
| 298 | |||
| 194 | control_page = page_address(image->control_code_page) + PAGE_SIZE; | 299 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
| 195 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 300 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
| 196 | 301 | ||
| 197 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); | 302 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); |
| 198 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 303 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
| 199 | page_list[PA_PGD] = virt_to_phys(&kexec_pgd); | ||
| 200 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | ||
| 201 | page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); | ||
| 202 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; | ||
| 203 | page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); | ||
| 204 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | ||
| 205 | page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); | ||
| 206 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
| 207 | page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); | ||
| 208 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | ||
| 209 | page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); | ||
| 210 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
| 211 | page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); | ||
| 212 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
| 213 | |||
| 214 | page_list[PA_TABLE_PAGE] = | 304 | page_list[PA_TABLE_PAGE] = |
| 215 | (unsigned long)__pa(page_address(image->control_code_page)); | 305 | (unsigned long)__pa(page_address(image->control_code_page)); |
| 216 | 306 | ||
| 217 | /* The segment registers are funny things, they have both a | 307 | if (image->type == KEXEC_TYPE_DEFAULT) |
| 308 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | ||
| 309 | << PAGE_SHIFT); | ||
| 310 | |||
| 311 | /* | ||
| 312 | * The segment registers are funny things, they have both a | ||
| 218 | * visible and an invisible part. Whenever the visible part is | 313 | * visible and an invisible part. Whenever the visible part is |
| 219 | * set to a specific selector, the invisible part is loaded | 314 | * set to a specific selector, the invisible part is loaded |
| 220 | * with from a table in memory. At no other time is the | 315 | * with from a table in memory. At no other time is the |
| @@ -224,15 +319,25 @@ void machine_kexec(struct kimage *image) | |||
| 224 | * segments, before I zap the gdt with an invalid value. | 319 | * segments, before I zap the gdt with an invalid value. |
| 225 | */ | 320 | */ |
| 226 | load_segments(); | 321 | load_segments(); |
| 227 | /* The gdt & idt are now invalid. | 322 | /* |
| 323 | * The gdt & idt are now invalid. | ||
| 228 | * If you want to load them you must set up your own idt & gdt. | 324 | * If you want to load them you must set up your own idt & gdt. |
| 229 | */ | 325 | */ |
| 230 | set_gdt(phys_to_virt(0),0); | 326 | set_gdt(phys_to_virt(0), 0); |
| 231 | set_idt(phys_to_virt(0),0); | 327 | set_idt(phys_to_virt(0), 0); |
| 232 | 328 | ||
| 233 | /* now call it */ | 329 | /* now call it */ |
| 234 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 330 | image->start = relocate_kernel((unsigned long)image->head, |
| 235 | image->start); | 331 | (unsigned long)page_list, |
| 332 | image->start, | ||
| 333 | image->preserve_context); | ||
| 334 | |||
| 335 | #ifdef CONFIG_KEXEC_JUMP | ||
| 336 | if (kexec_image->preserve_context) | ||
| 337 | restore_processor_state(); | ||
| 338 | #endif | ||
| 339 | |||
| 340 | __ftrace_enabled_restore(save_ftrace_enabled); | ||
| 236 | } | 341 | } |
| 237 | 342 | ||
| 238 | void arch_crash_save_vmcoreinfo(void) | 343 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 2dc183758be3..845d80ce1ef1 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c | |||
| @@ -51,7 +51,6 @@ | |||
| 51 | #include <linux/ioport.h> | 51 | #include <linux/ioport.h> |
| 52 | #include <asm/uaccess.h> | 52 | #include <asm/uaccess.h> |
| 53 | #include <linux/init.h> | 53 | #include <linux/init.h> |
| 54 | #include <asm/arch_hooks.h> | ||
| 55 | 54 | ||
| 56 | static unsigned char which_scsi; | 55 | static unsigned char which_scsi; |
| 57 | 56 | ||
| @@ -474,6 +473,4 @@ void __kprobes mca_handle_nmi(void) | |||
| 474 | * adapter was responsible for the error. | 473 | * adapter was responsible for the error. |
| 475 | */ | 474 | */ |
| 476 | bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); | 475 | bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); |
| 477 | 476 | } | |
| 478 | mca_nmi_hook(); | ||
| 479 | } /* mca_handle_nmi */ | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index c25fdb382292..453b5795a5c6 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
| @@ -12,31 +12,30 @@ | |||
| 12 | * | 12 | * |
| 13 | * Licensed under the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
| 14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
| 15 | */ | 15 | */ |
| 16 | 16 | #include <linux/platform_device.h> | |
| 17 | #include <linux/capability.h> | 17 | #include <linux/capability.h> |
| 18 | #include <linux/kernel.h> | ||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/sched.h> | ||
| 21 | #include <linux/cpumask.h> | ||
| 22 | #include <linux/module.h> | ||
| 23 | #include <linux/slab.h> | ||
| 24 | #include <linux/vmalloc.h> | ||
| 25 | #include <linux/miscdevice.h> | 18 | #include <linux/miscdevice.h> |
| 19 | #include <linux/firmware.h> | ||
| 26 | #include <linux/spinlock.h> | 20 | #include <linux/spinlock.h> |
| 27 | #include <linux/mm.h> | 21 | #include <linux/cpumask.h> |
| 28 | #include <linux/fs.h> | 22 | #include <linux/pci_ids.h> |
| 23 | #include <linux/uaccess.h> | ||
| 24 | #include <linux/vmalloc.h> | ||
| 25 | #include <linux/kernel.h> | ||
| 26 | #include <linux/module.h> | ||
| 29 | #include <linux/mutex.h> | 27 | #include <linux/mutex.h> |
| 28 | #include <linux/sched.h> | ||
| 29 | #include <linux/init.h> | ||
| 30 | #include <linux/slab.h> | ||
| 30 | #include <linux/cpu.h> | 31 | #include <linux/cpu.h> |
| 31 | #include <linux/firmware.h> | ||
| 32 | #include <linux/platform_device.h> | ||
| 33 | #include <linux/pci.h> | 32 | #include <linux/pci.h> |
| 34 | #include <linux/pci_ids.h> | 33 | #include <linux/fs.h> |
| 35 | #include <linux/uaccess.h> | 34 | #include <linux/mm.h> |
| 36 | 35 | ||
| 37 | #include <asm/msr.h> | ||
| 38 | #include <asm/processor.h> | ||
| 39 | #include <asm/microcode.h> | 36 | #include <asm/microcode.h> |
| 37 | #include <asm/processor.h> | ||
| 38 | #include <asm/msr.h> | ||
| 40 | 39 | ||
| 41 | MODULE_DESCRIPTION("AMD Microcode Update Driver"); | 40 | MODULE_DESCRIPTION("AMD Microcode Update Driver"); |
| 42 | MODULE_AUTHOR("Peter Oruba"); | 41 | MODULE_AUTHOR("Peter Oruba"); |
| @@ -72,8 +71,8 @@ struct microcode_header_amd { | |||
| 72 | } __attribute__((packed)); | 71 | } __attribute__((packed)); |
| 73 | 72 | ||
| 74 | struct microcode_amd { | 73 | struct microcode_amd { |
| 75 | struct microcode_header_amd hdr; | 74 | struct microcode_header_amd hdr; |
| 76 | unsigned int mpb[0]; | 75 | unsigned int mpb[0]; |
| 77 | }; | 76 | }; |
| 78 | 77 | ||
| 79 | #define UCODE_MAX_SIZE 2048 | 78 | #define UCODE_MAX_SIZE 2048 |
| @@ -184,8 +183,8 @@ static int get_ucode_data(void *to, const u8 *from, size_t n) | |||
| 184 | return 0; | 183 | return 0; |
| 185 | } | 184 | } |
| 186 | 185 | ||
| 187 | static void *get_next_ucode(const u8 *buf, unsigned int size, | 186 | static void * |
| 188 | unsigned int *mc_size) | 187 | get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) |
| 189 | { | 188 | { |
| 190 | unsigned int total_size; | 189 | unsigned int total_size; |
| 191 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 190 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; |
| @@ -223,7 +222,6 @@ static void *get_next_ucode(const u8 *buf, unsigned int size, | |||
| 223 | return mc; | 222 | return mc; |
| 224 | } | 223 | } |
| 225 | 224 | ||
| 226 | |||
| 227 | static int install_equiv_cpu_table(const u8 *buf) | 225 | static int install_equiv_cpu_table(const u8 *buf) |
| 228 | { | 226 | { |
| 229 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; | 227 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; |
| @@ -372,4 +370,3 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
| 372 | { | 370 | { |
| 373 | return µcode_amd_ops; | 371 | return µcode_amd_ops; |
| 374 | } | 372 | } |
| 375 | |||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index c9b721ba968c..a0f3851ef310 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -70,67 +70,78 @@ | |||
| 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
| 71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
| 72 | */ | 72 | */ |
| 73 | #include <linux/platform_device.h> | ||
| 73 | #include <linux/capability.h> | 74 | #include <linux/capability.h> |
| 74 | #include <linux/kernel.h> | 75 | #include <linux/miscdevice.h> |
| 75 | #include <linux/init.h> | 76 | #include <linux/firmware.h> |
| 76 | #include <linux/sched.h> | ||
| 77 | #include <linux/smp_lock.h> | 77 | #include <linux/smp_lock.h> |
| 78 | #include <linux/spinlock.h> | ||
| 78 | #include <linux/cpumask.h> | 79 | #include <linux/cpumask.h> |
| 79 | #include <linux/module.h> | 80 | #include <linux/uaccess.h> |
| 80 | #include <linux/slab.h> | ||
| 81 | #include <linux/vmalloc.h> | 81 | #include <linux/vmalloc.h> |
| 82 | #include <linux/miscdevice.h> | 82 | #include <linux/kernel.h> |
| 83 | #include <linux/spinlock.h> | 83 | #include <linux/module.h> |
| 84 | #include <linux/mm.h> | ||
| 85 | #include <linux/fs.h> | ||
| 86 | #include <linux/mutex.h> | 84 | #include <linux/mutex.h> |
| 85 | #include <linux/sched.h> | ||
| 86 | #include <linux/init.h> | ||
| 87 | #include <linux/slab.h> | ||
| 87 | #include <linux/cpu.h> | 88 | #include <linux/cpu.h> |
| 88 | #include <linux/firmware.h> | 89 | #include <linux/fs.h> |
| 89 | #include <linux/platform_device.h> | 90 | #include <linux/mm.h> |
| 90 | 91 | ||
| 91 | #include <asm/msr.h> | ||
| 92 | #include <asm/uaccess.h> | ||
| 93 | #include <asm/processor.h> | ||
| 94 | #include <asm/microcode.h> | 92 | #include <asm/microcode.h> |
| 93 | #include <asm/processor.h> | ||
| 94 | #include <asm/msr.h> | ||
| 95 | 95 | ||
| 96 | MODULE_DESCRIPTION("Microcode Update Driver"); | 96 | MODULE_DESCRIPTION("Microcode Update Driver"); |
| 97 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | 97 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
| 98 | MODULE_LICENSE("GPL"); | 98 | MODULE_LICENSE("GPL"); |
| 99 | 99 | ||
| 100 | #define MICROCODE_VERSION "2.00" | 100 | #define MICROCODE_VERSION "2.00" |
| 101 | 101 | ||
| 102 | static struct microcode_ops *microcode_ops; | 102 | static struct microcode_ops *microcode_ops; |
| 103 | 103 | ||
| 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ |
| 105 | static DEFINE_MUTEX(microcode_mutex); | 105 | static DEFINE_MUTEX(microcode_mutex); |
| 106 | 106 | ||
| 107 | struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; | 107 | struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; |
| 108 | EXPORT_SYMBOL_GPL(ucode_cpu_info); | 108 | EXPORT_SYMBOL_GPL(ucode_cpu_info); |
| 109 | 109 | ||
| 110 | #ifdef CONFIG_MICROCODE_OLD_INTERFACE | 110 | #ifdef CONFIG_MICROCODE_OLD_INTERFACE |
| 111 | struct update_for_cpu { | ||
| 112 | const void __user *buf; | ||
| 113 | size_t size; | ||
| 114 | }; | ||
| 115 | |||
| 116 | static long update_for_cpu(void *_ufc) | ||
| 117 | { | ||
| 118 | struct update_for_cpu *ufc = _ufc; | ||
| 119 | int error; | ||
| 120 | |||
| 121 | error = microcode_ops->request_microcode_user(smp_processor_id(), | ||
| 122 | ufc->buf, ufc->size); | ||
| 123 | if (error < 0) | ||
| 124 | return error; | ||
| 125 | if (!error) | ||
| 126 | microcode_ops->apply_microcode(smp_processor_id()); | ||
| 127 | return error; | ||
| 128 | } | ||
| 129 | |||
| 111 | static int do_microcode_update(const void __user *buf, size_t size) | 130 | static int do_microcode_update(const void __user *buf, size_t size) |
| 112 | { | 131 | { |
| 113 | cpumask_t old; | ||
| 114 | int error = 0; | 132 | int error = 0; |
| 115 | int cpu; | 133 | int cpu; |
| 116 | 134 | struct update_for_cpu ufc = { .buf = buf, .size = size }; | |
| 117 | old = current->cpus_allowed; | ||
| 118 | 135 | ||
| 119 | for_each_online_cpu(cpu) { | 136 | for_each_online_cpu(cpu) { |
| 120 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 137 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
| 121 | 138 | ||
| 122 | if (!uci->valid) | 139 | if (!uci->valid) |
| 123 | continue; | 140 | continue; |
| 124 | 141 | error = work_on_cpu(cpu, update_for_cpu, &ufc); | |
| 125 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 126 | error = microcode_ops->request_microcode_user(cpu, buf, size); | ||
| 127 | if (error < 0) | 142 | if (error < 0) |
| 128 | goto out; | 143 | break; |
| 129 | if (!error) | ||
| 130 | microcode_ops->apply_microcode(cpu); | ||
| 131 | } | 144 | } |
| 132 | out: | ||
| 133 | set_cpus_allowed_ptr(current, &old); | ||
| 134 | return error; | 145 | return error; |
| 135 | } | 146 | } |
| 136 | 147 | ||
| @@ -198,18 +209,33 @@ static void microcode_dev_exit(void) | |||
| 198 | 209 | ||
| 199 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | 210 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); |
| 200 | #else | 211 | #else |
| 201 | #define microcode_dev_init() 0 | 212 | #define microcode_dev_init() 0 |
| 202 | #define microcode_dev_exit() do { } while (0) | 213 | #define microcode_dev_exit() do { } while (0) |
| 203 | #endif | 214 | #endif |
| 204 | 215 | ||
| 205 | /* fake device for request_firmware */ | 216 | /* fake device for request_firmware */ |
| 206 | static struct platform_device *microcode_pdev; | 217 | static struct platform_device *microcode_pdev; |
| 218 | |||
| 219 | static long reload_for_cpu(void *unused) | ||
| 220 | { | ||
| 221 | struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); | ||
| 222 | int err = 0; | ||
| 223 | |||
| 224 | mutex_lock(µcode_mutex); | ||
| 225 | if (uci->valid) { | ||
| 226 | err = microcode_ops->request_microcode_fw(smp_processor_id(), | ||
| 227 | µcode_pdev->dev); | ||
| 228 | if (!err) | ||
| 229 | microcode_ops->apply_microcode(smp_processor_id()); | ||
| 230 | } | ||
| 231 | mutex_unlock(µcode_mutex); | ||
| 232 | return err; | ||
| 233 | } | ||
| 207 | 234 | ||
| 208 | static ssize_t reload_store(struct sys_device *dev, | 235 | static ssize_t reload_store(struct sys_device *dev, |
| 209 | struct sysdev_attribute *attr, | 236 | struct sysdev_attribute *attr, |
| 210 | const char *buf, size_t sz) | 237 | const char *buf, size_t sz) |
| 211 | { | 238 | { |
| 212 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | ||
| 213 | char *end; | 239 | char *end; |
| 214 | unsigned long val = simple_strtoul(buf, &end, 0); | 240 | unsigned long val = simple_strtoul(buf, &end, 0); |
| 215 | int err = 0; | 241 | int err = 0; |
| @@ -218,21 +244,9 @@ static ssize_t reload_store(struct sys_device *dev, | |||
| 218 | if (end == buf) | 244 | if (end == buf) |
| 219 | return -EINVAL; | 245 | return -EINVAL; |
| 220 | if (val == 1) { | 246 | if (val == 1) { |
| 221 | cpumask_t old = current->cpus_allowed; | ||
| 222 | |||
| 223 | get_online_cpus(); | 247 | get_online_cpus(); |
| 224 | if (cpu_online(cpu)) { | 248 | if (cpu_online(cpu)) |
| 225 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 249 | err = work_on_cpu(cpu, reload_for_cpu, NULL); |
| 226 | mutex_lock(µcode_mutex); | ||
| 227 | if (uci->valid) { | ||
| 228 | err = microcode_ops->request_microcode_fw(cpu, | ||
| 229 | µcode_pdev->dev); | ||
| 230 | if (!err) | ||
| 231 | microcode_ops->apply_microcode(cpu); | ||
| 232 | } | ||
| 233 | mutex_unlock(µcode_mutex); | ||
| 234 | set_cpus_allowed_ptr(current, &old); | ||
| 235 | } | ||
| 236 | put_online_cpus(); | 250 | put_online_cpus(); |
| 237 | } | 251 | } |
| 238 | if (err) | 252 | if (err) |
| @@ -268,8 +282,8 @@ static struct attribute *mc_default_attrs[] = { | |||
| 268 | }; | 282 | }; |
| 269 | 283 | ||
| 270 | static struct attribute_group mc_attr_group = { | 284 | static struct attribute_group mc_attr_group = { |
| 271 | .attrs = mc_default_attrs, | 285 | .attrs = mc_default_attrs, |
| 272 | .name = "microcode", | 286 | .name = "microcode", |
| 273 | }; | 287 | }; |
| 274 | 288 | ||
| 275 | static void __microcode_fini_cpu(int cpu) | 289 | static void __microcode_fini_cpu(int cpu) |
| @@ -328,9 +342,9 @@ static int microcode_resume_cpu(int cpu) | |||
| 328 | return 0; | 342 | return 0; |
| 329 | } | 343 | } |
| 330 | 344 | ||
| 331 | static void microcode_update_cpu(int cpu) | 345 | static long microcode_update_cpu(void *unused) |
| 332 | { | 346 | { |
| 333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 347 | struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); |
| 334 | int err = 0; | 348 | int err = 0; |
| 335 | 349 | ||
| 336 | /* | 350 | /* |
| @@ -338,30 +352,27 @@ static void microcode_update_cpu(int cpu) | |||
| 338 | * otherwise just request a firmware: | 352 | * otherwise just request a firmware: |
| 339 | */ | 353 | */ |
| 340 | if (uci->valid) { | 354 | if (uci->valid) { |
| 341 | err = microcode_resume_cpu(cpu); | 355 | err = microcode_resume_cpu(smp_processor_id()); |
| 342 | } else { | 356 | } else { |
| 343 | collect_cpu_info(cpu); | 357 | collect_cpu_info(smp_processor_id()); |
| 344 | if (uci->valid && system_state == SYSTEM_RUNNING) | 358 | if (uci->valid && system_state == SYSTEM_RUNNING) |
| 345 | err = microcode_ops->request_microcode_fw(cpu, | 359 | err = microcode_ops->request_microcode_fw( |
| 360 | smp_processor_id(), | ||
| 346 | µcode_pdev->dev); | 361 | µcode_pdev->dev); |
| 347 | } | 362 | } |
| 348 | if (!err) | 363 | if (!err) |
| 349 | microcode_ops->apply_microcode(cpu); | 364 | microcode_ops->apply_microcode(smp_processor_id()); |
| 365 | return err; | ||
| 350 | } | 366 | } |
| 351 | 367 | ||
| 352 | static void microcode_init_cpu(int cpu) | 368 | static int microcode_init_cpu(int cpu) |
| 353 | { | 369 | { |
| 354 | cpumask_t old = current->cpus_allowed; | 370 | int err; |
| 355 | |||
| 356 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 357 | /* We should bind the task to the CPU */ | ||
| 358 | BUG_ON(raw_smp_processor_id() != cpu); | ||
| 359 | |||
| 360 | mutex_lock(µcode_mutex); | 371 | mutex_lock(µcode_mutex); |
| 361 | microcode_update_cpu(cpu); | 372 | err = work_on_cpu(cpu, microcode_update_cpu, NULL); |
| 362 | mutex_unlock(µcode_mutex); | 373 | mutex_unlock(µcode_mutex); |
| 363 | 374 | ||
| 364 | set_cpus_allowed_ptr(current, &old); | 375 | return err; |
| 365 | } | 376 | } |
| 366 | 377 | ||
| 367 | static int mc_sysdev_add(struct sys_device *sys_dev) | 378 | static int mc_sysdev_add(struct sys_device *sys_dev) |
| @@ -379,8 +390,11 @@ static int mc_sysdev_add(struct sys_device *sys_dev) | |||
| 379 | if (err) | 390 | if (err) |
| 380 | return err; | 391 | return err; |
| 381 | 392 | ||
| 382 | microcode_init_cpu(cpu); | 393 | err = microcode_init_cpu(cpu); |
| 383 | return 0; | 394 | if (err) |
| 395 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | ||
| 396 | |||
| 397 | return err; | ||
| 384 | } | 398 | } |
| 385 | 399 | ||
| 386 | static int mc_sysdev_remove(struct sys_device *sys_dev) | 400 | static int mc_sysdev_remove(struct sys_device *sys_dev) |
| @@ -404,14 +418,14 @@ static int mc_sysdev_resume(struct sys_device *dev) | |||
| 404 | return 0; | 418 | return 0; |
| 405 | 419 | ||
| 406 | /* only CPU 0 will apply ucode here */ | 420 | /* only CPU 0 will apply ucode here */ |
| 407 | microcode_update_cpu(0); | 421 | microcode_update_cpu(NULL); |
| 408 | return 0; | 422 | return 0; |
| 409 | } | 423 | } |
| 410 | 424 | ||
| 411 | static struct sysdev_driver mc_sysdev_driver = { | 425 | static struct sysdev_driver mc_sysdev_driver = { |
| 412 | .add = mc_sysdev_add, | 426 | .add = mc_sysdev_add, |
| 413 | .remove = mc_sysdev_remove, | 427 | .remove = mc_sysdev_remove, |
| 414 | .resume = mc_sysdev_resume, | 428 | .resume = mc_sysdev_resume, |
| 415 | }; | 429 | }; |
| 416 | 430 | ||
| 417 | static __cpuinit int | 431 | static __cpuinit int |
| @@ -424,7 +438,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
| 424 | switch (action) { | 438 | switch (action) { |
| 425 | case CPU_ONLINE: | 439 | case CPU_ONLINE: |
| 426 | case CPU_ONLINE_FROZEN: | 440 | case CPU_ONLINE_FROZEN: |
| 427 | microcode_init_cpu(cpu); | 441 | if (microcode_init_cpu(cpu)) |
| 442 | printk(KERN_ERR "microcode: failed to init CPU%d\n", | ||
| 443 | cpu); | ||
| 428 | case CPU_DOWN_FAILED: | 444 | case CPU_DOWN_FAILED: |
| 429 | case CPU_DOWN_FAILED_FROZEN: | 445 | case CPU_DOWN_FAILED_FROZEN: |
| 430 | pr_debug("microcode: CPU%d added\n", cpu); | 446 | pr_debug("microcode: CPU%d added\n", cpu); |
| @@ -448,7 +464,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
| 448 | } | 464 | } |
| 449 | 465 | ||
| 450 | static struct notifier_block __refdata mc_cpu_notifier = { | 466 | static struct notifier_block __refdata mc_cpu_notifier = { |
| 451 | .notifier_call = mc_cpu_callback, | 467 | .notifier_call = mc_cpu_callback, |
| 452 | }; | 468 | }; |
| 453 | 469 | ||
| 454 | static int __init microcode_init(void) | 470 | static int __init microcode_init(void) |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index b7f4c929e615..149b9ec7c1ab 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
| @@ -70,28 +70,28 @@ | |||
| 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
| 71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
| 72 | */ | 72 | */ |
| 73 | #include <linux/platform_device.h> | ||
| 73 | #include <linux/capability.h> | 74 | #include <linux/capability.h> |
| 74 | #include <linux/kernel.h> | 75 | #include <linux/miscdevice.h> |
| 75 | #include <linux/init.h> | 76 | #include <linux/firmware.h> |
| 76 | #include <linux/sched.h> | ||
| 77 | #include <linux/smp_lock.h> | 77 | #include <linux/smp_lock.h> |
| 78 | #include <linux/spinlock.h> | ||
| 78 | #include <linux/cpumask.h> | 79 | #include <linux/cpumask.h> |
| 79 | #include <linux/module.h> | 80 | #include <linux/uaccess.h> |
| 80 | #include <linux/slab.h> | ||
| 81 | #include <linux/vmalloc.h> | 81 | #include <linux/vmalloc.h> |
| 82 | #include <linux/miscdevice.h> | 82 | #include <linux/kernel.h> |
| 83 | #include <linux/spinlock.h> | 83 | #include <linux/module.h> |
| 84 | #include <linux/mm.h> | ||
| 85 | #include <linux/fs.h> | ||
| 86 | #include <linux/mutex.h> | 84 | #include <linux/mutex.h> |
| 85 | #include <linux/sched.h> | ||
| 86 | #include <linux/init.h> | ||
| 87 | #include <linux/slab.h> | ||
| 87 | #include <linux/cpu.h> | 88 | #include <linux/cpu.h> |
| 88 | #include <linux/firmware.h> | 89 | #include <linux/fs.h> |
| 89 | #include <linux/platform_device.h> | 90 | #include <linux/mm.h> |
| 90 | 91 | ||
| 91 | #include <asm/msr.h> | ||
| 92 | #include <asm/uaccess.h> | ||
| 93 | #include <asm/processor.h> | ||
| 94 | #include <asm/microcode.h> | 92 | #include <asm/microcode.h> |
| 93 | #include <asm/processor.h> | ||
| 94 | #include <asm/msr.h> | ||
| 95 | 95 | ||
| 96 | MODULE_DESCRIPTION("Microcode Update Driver"); | 96 | MODULE_DESCRIPTION("Microcode Update Driver"); |
| 97 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | 97 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
| @@ -129,12 +129,13 @@ struct extended_sigtable { | |||
| 129 | struct extended_signature sigs[0]; | 129 | struct extended_signature sigs[0]; |
| 130 | }; | 130 | }; |
| 131 | 131 | ||
| 132 | #define DEFAULT_UCODE_DATASIZE (2000) | 132 | #define DEFAULT_UCODE_DATASIZE (2000) |
| 133 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) | 133 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) |
| 134 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | 134 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) |
| 135 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) | 135 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) |
| 136 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) | 136 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) |
| 137 | #define DWSIZE (sizeof(u32)) | 137 | #define DWSIZE (sizeof(u32)) |
| 138 | |||
| 138 | #define get_totalsize(mc) \ | 139 | #define get_totalsize(mc) \ |
| 139 | (((struct microcode_intel *)mc)->hdr.totalsize ? \ | 140 | (((struct microcode_intel *)mc)->hdr.totalsize ? \ |
| 140 | ((struct microcode_intel *)mc)->hdr.totalsize : \ | 141 | ((struct microcode_intel *)mc)->hdr.totalsize : \ |
| @@ -196,31 +197,32 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) | |||
| 196 | return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; | 197 | return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; |
| 197 | } | 198 | } |
| 198 | 199 | ||
| 199 | static inline int | 200 | static inline int |
| 200 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | 201 | update_match_revision(struct microcode_header_intel *mc_header, int rev) |
| 201 | { | 202 | { |
| 202 | return (mc_header->rev <= rev) ? 0 : 1; | 203 | return (mc_header->rev <= rev) ? 0 : 1; |
| 203 | } | 204 | } |
| 204 | 205 | ||
| 205 | static int microcode_sanity_check(void *mc) | 206 | static int microcode_sanity_check(void *mc) |
| 206 | { | 207 | { |
| 208 | unsigned long total_size, data_size, ext_table_size; | ||
| 207 | struct microcode_header_intel *mc_header = mc; | 209 | struct microcode_header_intel *mc_header = mc; |
| 208 | struct extended_sigtable *ext_header = NULL; | 210 | struct extended_sigtable *ext_header = NULL; |
| 209 | struct extended_signature *ext_sig; | ||
| 210 | unsigned long total_size, data_size, ext_table_size; | ||
| 211 | int sum, orig_sum, ext_sigcount = 0, i; | 211 | int sum, orig_sum, ext_sigcount = 0, i; |
| 212 | struct extended_signature *ext_sig; | ||
| 212 | 213 | ||
| 213 | total_size = get_totalsize(mc_header); | 214 | total_size = get_totalsize(mc_header); |
| 214 | data_size = get_datasize(mc_header); | 215 | data_size = get_datasize(mc_header); |
| 216 | |||
| 215 | if (data_size + MC_HEADER_SIZE > total_size) { | 217 | if (data_size + MC_HEADER_SIZE > total_size) { |
| 216 | printk(KERN_ERR "microcode: error! " | 218 | printk(KERN_ERR "microcode: error! " |
| 217 | "Bad data size in microcode data file\n"); | 219 | "Bad data size in microcode data file\n"); |
| 218 | return -EINVAL; | 220 | return -EINVAL; |
| 219 | } | 221 | } |
| 220 | 222 | ||
| 221 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | 223 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { |
| 222 | printk(KERN_ERR "microcode: error! " | 224 | printk(KERN_ERR "microcode: error! " |
| 223 | "Unknown microcode update format\n"); | 225 | "Unknown microcode update format\n"); |
| 224 | return -EINVAL; | 226 | return -EINVAL; |
| 225 | } | 227 | } |
| 226 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | 228 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); |
| @@ -318,11 +320,15 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) | |||
| 318 | 320 | ||
| 319 | static void apply_microcode(int cpu) | 321 | static void apply_microcode(int cpu) |
| 320 | { | 322 | { |
| 323 | struct microcode_intel *mc_intel; | ||
| 324 | struct ucode_cpu_info *uci; | ||
| 321 | unsigned long flags; | 325 | unsigned long flags; |
| 322 | unsigned int val[2]; | 326 | unsigned int val[2]; |
| 323 | int cpu_num = raw_smp_processor_id(); | 327 | int cpu_num; |
| 324 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 328 | |
| 325 | struct microcode_intel *mc_intel = uci->mc; | 329 | cpu_num = raw_smp_processor_id(); |
| 330 | uci = ucode_cpu_info + cpu; | ||
| 331 | mc_intel = uci->mc; | ||
| 326 | 332 | ||
| 327 | /* We should bind the task to the CPU */ | 333 | /* We should bind the task to the CPU */ |
| 328 | BUG_ON(cpu_num != cpu); | 334 | BUG_ON(cpu_num != cpu); |
| @@ -348,15 +354,17 @@ static void apply_microcode(int cpu) | |||
| 348 | spin_unlock_irqrestore(µcode_update_lock, flags); | 354 | spin_unlock_irqrestore(µcode_update_lock, flags); |
| 349 | if (val[1] != mc_intel->hdr.rev) { | 355 | if (val[1] != mc_intel->hdr.rev) { |
| 350 | printk(KERN_ERR "microcode: CPU%d update from revision " | 356 | printk(KERN_ERR "microcode: CPU%d update from revision " |
| 351 | "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); | 357 | "0x%x to 0x%x failed\n", |
| 358 | cpu_num, uci->cpu_sig.rev, val[1]); | ||
| 352 | return; | 359 | return; |
| 353 | } | 360 | } |
| 354 | printk(KERN_INFO "microcode: CPU%d updated from revision " | 361 | printk(KERN_INFO "microcode: CPU%d updated from revision " |
| 355 | "0x%x to 0x%x, date = %04x-%02x-%02x \n", | 362 | "0x%x to 0x%x, date = %04x-%02x-%02x \n", |
| 356 | cpu_num, uci->cpu_sig.rev, val[1], | 363 | cpu_num, uci->cpu_sig.rev, val[1], |
| 357 | mc_intel->hdr.date & 0xffff, | 364 | mc_intel->hdr.date & 0xffff, |
| 358 | mc_intel->hdr.date >> 24, | 365 | mc_intel->hdr.date >> 24, |
| 359 | (mc_intel->hdr.date >> 16) & 0xff); | 366 | (mc_intel->hdr.date >> 16) & 0xff); |
| 367 | |||
| 360 | uci->cpu_sig.rev = val[1]; | 368 | uci->cpu_sig.rev = val[1]; |
| 361 | } | 369 | } |
| 362 | 370 | ||
| @@ -404,18 +412,23 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
| 404 | leftover -= mc_size; | 412 | leftover -= mc_size; |
| 405 | } | 413 | } |
| 406 | 414 | ||
| 407 | if (new_mc) { | 415 | if (!new_mc) |
| 408 | if (!leftover) { | 416 | goto out; |
| 409 | if (uci->mc) | 417 | |
| 410 | vfree(uci->mc); | 418 | if (leftover) { |
| 411 | uci->mc = (struct microcode_intel *)new_mc; | 419 | vfree(new_mc); |
| 412 | pr_debug("microcode: CPU%d found a matching microcode update with" | 420 | goto out; |
| 413 | " version 0x%x (current=0x%x)\n", | ||
| 414 | cpu, new_rev, uci->cpu_sig.rev); | ||
| 415 | } else | ||
| 416 | vfree(new_mc); | ||
| 417 | } | 421 | } |
| 418 | 422 | ||
| 423 | if (uci->mc) | ||
| 424 | vfree(uci->mc); | ||
| 425 | uci->mc = (struct microcode_intel *)new_mc; | ||
| 426 | |||
| 427 | pr_debug("microcode: CPU%d found a matching microcode update with" | ||
| 428 | " version 0x%x (current=0x%x)\n", | ||
| 429 | cpu, new_rev, uci->cpu_sig.rev); | ||
| 430 | |||
| 431 | out: | ||
| 419 | return (int)leftover; | 432 | return (int)leftover; |
| 420 | } | 433 | } |
| 421 | 434 | ||
| @@ -442,8 +455,8 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
| 442 | return ret; | 455 | return ret; |
| 443 | } | 456 | } |
| 444 | 457 | ||
| 445 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | 458 | ret = generic_load_microcode(cpu, (void *)firmware->data, |
| 446 | &get_ucode_fw); | 459 | firmware->size, &get_ucode_fw); |
| 447 | 460 | ||
| 448 | release_firmware(firmware); | 461 | release_firmware(firmware); |
| 449 | 462 | ||
| @@ -460,7 +473,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size) | |||
| 460 | /* We should bind the task to the CPU */ | 473 | /* We should bind the task to the CPU */ |
| 461 | BUG_ON(cpu != raw_smp_processor_id()); | 474 | BUG_ON(cpu != raw_smp_processor_id()); |
| 462 | 475 | ||
| 463 | return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); | 476 | return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); |
| 464 | } | 477 | } |
| 465 | 478 | ||
| 466 | static void microcode_fini_cpu(int cpu) | 479 | static void microcode_fini_cpu(int cpu) |
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c index 3db0a5442eb1..0edd819050e7 100644 --- a/arch/x86/kernel/module_32.c +++ b/arch/x86/kernel/module_32.c | |||
| @@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region) | |||
| 42 | { | 42 | { |
| 43 | vfree(module_region); | 43 | vfree(module_region); |
| 44 | /* FIXME: If module_region == mod->init_region, trim exception | 44 | /* FIXME: If module_region == mod->init_region, trim exception |
| 45 | table entries. */ | 45 | table entries. */ |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | /* We don't need anything special. */ | 48 | /* We don't need anything special. */ |
| @@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
| 113 | *para = NULL; | 113 | *para = NULL; |
| 114 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 114 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
| 115 | 115 | ||
| 116 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | 116 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { |
| 117 | if (!strcmp(".text", secstrings + s->sh_name)) | 117 | if (!strcmp(".text", secstrings + s->sh_name)) |
| 118 | text = s; | 118 | text = s; |
| 119 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) | 119 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) |
| 120 | alt = s; | 120 | alt = s; |
| 121 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | 121 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
| 122 | locks= s; | 122 | locks = s; |
| 123 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | 123 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) |
| 124 | para = s; | 124 | para = s; |
| 125 | } | 125 | } |
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 6ba87830d4b1..c23880b90b5c 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c | |||
| @@ -30,14 +30,14 @@ | |||
| 30 | #include <asm/page.h> | 30 | #include <asm/page.h> |
| 31 | #include <asm/pgtable.h> | 31 | #include <asm/pgtable.h> |
| 32 | 32 | ||
| 33 | #define DEBUGP(fmt...) | 33 | #define DEBUGP(fmt...) |
| 34 | 34 | ||
| 35 | #ifndef CONFIG_UML | 35 | #ifndef CONFIG_UML |
| 36 | void module_free(struct module *mod, void *module_region) | 36 | void module_free(struct module *mod, void *module_region) |
| 37 | { | 37 | { |
| 38 | vfree(module_region); | 38 | vfree(module_region); |
| 39 | /* FIXME: If module_region == mod->init_region, trim exception | 39 | /* FIXME: If module_region == mod->init_region, trim exception |
| 40 | table entries. */ | 40 | table entries. */ |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | void *module_alloc(unsigned long size) | 43 | void *module_alloc(unsigned long size) |
| @@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
| 77 | Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; | 77 | Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; |
| 78 | Elf64_Sym *sym; | 78 | Elf64_Sym *sym; |
| 79 | void *loc; | 79 | void *loc; |
| 80 | u64 val; | 80 | u64 val; |
| 81 | 81 | ||
| 82 | DEBUGP("Applying relocate section %u to %u\n", relsec, | 82 | DEBUGP("Applying relocate section %u to %u\n", relsec, |
| 83 | sechdrs[relsec].sh_info); | 83 | sechdrs[relsec].sh_info); |
| @@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
| 91 | sym = (Elf64_Sym *)sechdrs[symindex].sh_addr | 91 | sym = (Elf64_Sym *)sechdrs[symindex].sh_addr |
| 92 | + ELF64_R_SYM(rel[i].r_info); | 92 | + ELF64_R_SYM(rel[i].r_info); |
| 93 | 93 | ||
| 94 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", | 94 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", |
| 95 | (int)ELF64_R_TYPE(rel[i].r_info), | 95 | (int)ELF64_R_TYPE(rel[i].r_info), |
| 96 | sym->st_value, rel[i].r_addend, (u64)loc); | 96 | sym->st_value, rel[i].r_addend, (u64)loc); |
| 97 | 97 | ||
| 98 | val = sym->st_value + rel[i].r_addend; | 98 | val = sym->st_value + rel[i].r_addend; |
| 99 | 99 | ||
| 100 | switch (ELF64_R_TYPE(rel[i].r_info)) { | 100 | switch (ELF64_R_TYPE(rel[i].r_info)) { |
| 101 | case R_X86_64_NONE: | 101 | case R_X86_64_NONE: |
| @@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
| 113 | if ((s64)val != *(s32 *)loc) | 113 | if ((s64)val != *(s32 *)loc) |
| 114 | goto overflow; | 114 | goto overflow; |
| 115 | break; | 115 | break; |
| 116 | case R_X86_64_PC32: | 116 | case R_X86_64_PC32: |
| 117 | val -= (u64)loc; | 117 | val -= (u64)loc; |
| 118 | *(u32 *)loc = val; | 118 | *(u32 *)loc = val; |
| 119 | #if 0 | 119 | #if 0 |
| 120 | if ((s64)val != *(s32 *)loc) | 120 | if ((s64)val != *(s32 *)loc) |
| 121 | goto overflow; | 121 | goto overflow; |
| 122 | #endif | 122 | #endif |
| 123 | break; | 123 | break; |
| 124 | default: | 124 | default: |
| 125 | printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", | 125 | printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", |
| 126 | me->name, ELF64_R_TYPE(rel[i].r_info)); | 126 | me->name, ELF64_R_TYPE(rel[i].r_info)); |
| 127 | return -ENOEXEC; | 127 | return -ENOEXEC; |
| 128 | } | 128 | } |
| @@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
| 130 | return 0; | 130 | return 0; |
| 131 | 131 | ||
| 132 | overflow: | 132 | overflow: |
| 133 | printk(KERN_ERR "overflow in relocation type %d val %Lx\n", | 133 | printk(KERN_ERR "overflow in relocation type %d val %Lx\n", |
| 134 | (int)ELF64_R_TYPE(rel[i].r_info), val); | 134 | (int)ELF64_R_TYPE(rel[i].r_info), val); |
| 135 | printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", | 135 | printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", |
| 136 | me->name); | 136 | me->name); |
| @@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs, | |||
| 143 | unsigned int relsec, | 143 | unsigned int relsec, |
| 144 | struct module *me) | 144 | struct module *me) |
| 145 | { | 145 | { |
| 146 | printk("non add relocation not supported\n"); | 146 | printk(KERN_ERR "non add relocation not supported\n"); |
| 147 | return -ENOSYS; | 147 | return -ENOSYS; |
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | int module_finalize(const Elf_Ehdr *hdr, | 150 | int module_finalize(const Elf_Ehdr *hdr, |
| 151 | const Elf_Shdr *sechdrs, | 151 | const Elf_Shdr *sechdrs, |
| 152 | struct module *me) | 152 | struct module *me) |
| 153 | { | 153 | { |
| 154 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, | 154 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, |
| 155 | *para = NULL; | 155 | *para = NULL; |
| @@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
| 161 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) | 161 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) |
| 162 | alt = s; | 162 | alt = s; |
| 163 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | 163 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
| 164 | locks= s; | 164 | locks = s; |
| 165 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | 165 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) |
| 166 | para = s; | 166 | para = s; |
| 167 | } | 167 | } |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a649a4ccad43..e8192401da47 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | * compliant MP-table parsing routines. | 3 | * compliant MP-table parsing routines. |
| 4 | * | 4 | * |
| 5 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> | 5 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
| 6 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | 6 | * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
| 7 | * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> | 7 | * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| @@ -29,12 +29,7 @@ | |||
| 29 | #include <asm/setup.h> | 29 | #include <asm/setup.h> |
| 30 | #include <asm/smp.h> | 30 | #include <asm/smp.h> |
| 31 | 31 | ||
| 32 | #include <mach_apic.h> | 32 | #include <asm/apic.h> |
| 33 | #ifdef CONFIG_X86_32 | ||
| 34 | #include <mach_apicdef.h> | ||
| 35 | #include <mach_mpparse.h> | ||
| 36 | #endif | ||
| 37 | |||
| 38 | /* | 33 | /* |
| 39 | * Checksum an MP configuration block. | 34 | * Checksum an MP configuration block. |
| 40 | */ | 35 | */ |
| @@ -144,11 +139,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) | |||
| 144 | if (bad_ioapic(m->apicaddr)) | 139 | if (bad_ioapic(m->apicaddr)) |
| 145 | return; | 140 | return; |
| 146 | 141 | ||
| 147 | mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; | 142 | mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; |
| 148 | mp_ioapics[nr_ioapics].mp_apicid = m->apicid; | 143 | mp_ioapics[nr_ioapics].apicid = m->apicid; |
| 149 | mp_ioapics[nr_ioapics].mp_type = m->type; | 144 | mp_ioapics[nr_ioapics].type = m->type; |
| 150 | mp_ioapics[nr_ioapics].mp_apicver = m->apicver; | 145 | mp_ioapics[nr_ioapics].apicver = m->apicver; |
| 151 | mp_ioapics[nr_ioapics].mp_flags = m->flags; | 146 | mp_ioapics[nr_ioapics].flags = m->flags; |
| 152 | nr_ioapics++; | 147 | nr_ioapics++; |
| 153 | } | 148 | } |
| 154 | 149 | ||
| @@ -160,55 +155,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m) | |||
| 160 | m->srcbusirq, m->dstapic, m->dstirq); | 155 | m->srcbusirq, m->dstapic, m->dstirq); |
| 161 | } | 156 | } |
| 162 | 157 | ||
| 163 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | 158 | static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) |
| 164 | { | 159 | { |
| 165 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," | 160 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
| 166 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 161 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
| 167 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | 162 | mp_irq->irqtype, mp_irq->irqflag & 3, |
| 168 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | 163 | (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, |
| 169 | mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); | 164 | mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); |
| 170 | } | 165 | } |
| 171 | 166 | ||
| 172 | static void __init assign_to_mp_irq(struct mpc_intsrc *m, | 167 | static void __init assign_to_mp_irq(struct mpc_intsrc *m, |
| 173 | struct mp_config_intsrc *mp_irq) | 168 | struct mpc_intsrc *mp_irq) |
| 174 | { | 169 | { |
| 175 | mp_irq->mp_dstapic = m->dstapic; | 170 | mp_irq->dstapic = m->dstapic; |
| 176 | mp_irq->mp_type = m->type; | 171 | mp_irq->type = m->type; |
| 177 | mp_irq->mp_irqtype = m->irqtype; | 172 | mp_irq->irqtype = m->irqtype; |
| 178 | mp_irq->mp_irqflag = m->irqflag; | 173 | mp_irq->irqflag = m->irqflag; |
| 179 | mp_irq->mp_srcbus = m->srcbus; | 174 | mp_irq->srcbus = m->srcbus; |
| 180 | mp_irq->mp_srcbusirq = m->srcbusirq; | 175 | mp_irq->srcbusirq = m->srcbusirq; |
| 181 | mp_irq->mp_dstirq = m->dstirq; | 176 | mp_irq->dstirq = m->dstirq; |
| 182 | } | 177 | } |
| 183 | 178 | ||
| 184 | static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, | 179 | static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, |
| 185 | struct mpc_intsrc *m) | 180 | struct mpc_intsrc *m) |
| 186 | { | 181 | { |
| 187 | m->dstapic = mp_irq->mp_dstapic; | 182 | m->dstapic = mp_irq->dstapic; |
| 188 | m->type = mp_irq->mp_type; | 183 | m->type = mp_irq->type; |
| 189 | m->irqtype = mp_irq->mp_irqtype; | 184 | m->irqtype = mp_irq->irqtype; |
| 190 | m->irqflag = mp_irq->mp_irqflag; | 185 | m->irqflag = mp_irq->irqflag; |
| 191 | m->srcbus = mp_irq->mp_srcbus; | 186 | m->srcbus = mp_irq->srcbus; |
| 192 | m->srcbusirq = mp_irq->mp_srcbusirq; | 187 | m->srcbusirq = mp_irq->srcbusirq; |
| 193 | m->dstirq = mp_irq->mp_dstirq; | 188 | m->dstirq = mp_irq->dstirq; |
| 194 | } | 189 | } |
| 195 | 190 | ||
| 196 | static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, | 191 | static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, |
| 197 | struct mpc_intsrc *m) | 192 | struct mpc_intsrc *m) |
| 198 | { | 193 | { |
| 199 | if (mp_irq->mp_dstapic != m->dstapic) | 194 | if (mp_irq->dstapic != m->dstapic) |
| 200 | return 1; | 195 | return 1; |
| 201 | if (mp_irq->mp_type != m->type) | 196 | if (mp_irq->type != m->type) |
| 202 | return 2; | 197 | return 2; |
| 203 | if (mp_irq->mp_irqtype != m->irqtype) | 198 | if (mp_irq->irqtype != m->irqtype) |
| 204 | return 3; | 199 | return 3; |
| 205 | if (mp_irq->mp_irqflag != m->irqflag) | 200 | if (mp_irq->irqflag != m->irqflag) |
| 206 | return 4; | 201 | return 4; |
| 207 | if (mp_irq->mp_srcbus != m->srcbus) | 202 | if (mp_irq->srcbus != m->srcbus) |
| 208 | return 5; | 203 | return 5; |
| 209 | if (mp_irq->mp_srcbusirq != m->srcbusirq) | 204 | if (mp_irq->srcbusirq != m->srcbusirq) |
| 210 | return 6; | 205 | return 6; |
| 211 | if (mp_irq->mp_dstirq != m->dstirq) | 206 | if (mp_irq->dstirq != m->dstirq) |
| 212 | return 7; | 207 | return 7; |
| 213 | 208 | ||
| 214 | return 0; | 209 | return 0; |
| @@ -292,16 +287,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
| 292 | return 0; | 287 | return 0; |
| 293 | 288 | ||
| 294 | #ifdef CONFIG_X86_32 | 289 | #ifdef CONFIG_X86_32 |
| 295 | /* | 290 | generic_mps_oem_check(mpc, oem, str); |
| 296 | * need to make sure summit and es7000's mps_oem_check is safe to be | ||
| 297 | * called early via genericarch 's mps_oem_check | ||
| 298 | */ | ||
| 299 | if (early) { | ||
| 300 | #ifdef CONFIG_X86_NUMAQ | ||
| 301 | numaq_mps_oem_check(mpc, oem, str); | ||
| 302 | #endif | ||
| 303 | } else | ||
| 304 | mps_oem_check(mpc, oem, str); | ||
| 305 | #endif | 291 | #endif |
| 306 | /* save the local APIC address, it might be non-default */ | 292 | /* save the local APIC address, it might be non-default */ |
| 307 | if (!acpi_lapic) | 293 | if (!acpi_lapic) |
| @@ -386,13 +372,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
| 386 | (*x86_quirks->mpc_record)++; | 372 | (*x86_quirks->mpc_record)++; |
| 387 | } | 373 | } |
| 388 | 374 | ||
| 389 | #ifdef CONFIG_X86_GENERICARCH | 375 | #ifdef CONFIG_X86_BIGSMP |
| 390 | generic_bigsmp_probe(); | 376 | generic_bigsmp_probe(); |
| 391 | #endif | 377 | #endif |
| 392 | 378 | ||
| 393 | #ifdef CONFIG_X86_32 | 379 | if (apic->setup_apic_routing) |
| 394 | setup_apic_routing(); | 380 | apic->setup_apic_routing(); |
| 395 | #endif | 381 | |
| 396 | if (!num_processors) | 382 | if (!num_processors) |
| 397 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 383 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
| 398 | return num_processors; | 384 | return num_processors; |
| @@ -417,7 +403,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
| 417 | intsrc.type = MP_INTSRC; | 403 | intsrc.type = MP_INTSRC; |
| 418 | intsrc.irqflag = 0; /* conforming */ | 404 | intsrc.irqflag = 0; /* conforming */ |
| 419 | intsrc.srcbus = 0; | 405 | intsrc.srcbus = 0; |
| 420 | intsrc.dstapic = mp_ioapics[0].mp_apicid; | 406 | intsrc.dstapic = mp_ioapics[0].apicid; |
| 421 | 407 | ||
| 422 | intsrc.irqtype = mp_INT; | 408 | intsrc.irqtype = mp_INT; |
| 423 | 409 | ||
| @@ -570,14 +556,27 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
| 570 | } | 556 | } |
| 571 | } | 557 | } |
| 572 | 558 | ||
| 573 | static struct intel_mp_floating *mpf_found; | 559 | static struct mpf_intel *mpf_found; |
| 560 | |||
| 561 | static unsigned long __init get_mpc_size(unsigned long physptr) | ||
| 562 | { | ||
| 563 | struct mpc_table *mpc; | ||
| 564 | unsigned long size; | ||
| 565 | |||
| 566 | mpc = early_ioremap(physptr, PAGE_SIZE); | ||
| 567 | size = mpc->length; | ||
| 568 | early_iounmap(mpc, PAGE_SIZE); | ||
| 569 | apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size); | ||
| 570 | |||
| 571 | return size; | ||
| 572 | } | ||
| 574 | 573 | ||
| 575 | /* | 574 | /* |
| 576 | * Scan the memory blocks for an SMP configuration block. | 575 | * Scan the memory blocks for an SMP configuration block. |
| 577 | */ | 576 | */ |
| 578 | static void __init __get_smp_config(unsigned int early) | 577 | static void __init __get_smp_config(unsigned int early) |
| 579 | { | 578 | { |
| 580 | struct intel_mp_floating *mpf = mpf_found; | 579 | struct mpf_intel *mpf = mpf_found; |
| 581 | 580 | ||
| 582 | if (!mpf) | 581 | if (!mpf) |
| 583 | return; | 582 | return; |
| @@ -598,9 +597,9 @@ static void __init __get_smp_config(unsigned int early) | |||
| 598 | } | 597 | } |
| 599 | 598 | ||
| 600 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 599 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
| 601 | mpf->mpf_specification); | 600 | mpf->specification); |
| 602 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | 601 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
| 603 | if (mpf->mpf_feature2 & (1 << 7)) { | 602 | if (mpf->feature2 & (1 << 7)) { |
| 604 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); | 603 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); |
| 605 | pic_mode = 1; | 604 | pic_mode = 1; |
| 606 | } else { | 605 | } else { |
| @@ -611,7 +610,7 @@ static void __init __get_smp_config(unsigned int early) | |||
| 611 | /* | 610 | /* |
| 612 | * Now see if we need to read further. | 611 | * Now see if we need to read further. |
| 613 | */ | 612 | */ |
| 614 | if (mpf->mpf_feature1 != 0) { | 613 | if (mpf->feature1 != 0) { |
| 615 | if (early) { | 614 | if (early) { |
| 616 | /* | 615 | /* |
| 617 | * local APIC has default address | 616 | * local APIC has default address |
| @@ -621,16 +620,20 @@ static void __init __get_smp_config(unsigned int early) | |||
| 621 | } | 620 | } |
| 622 | 621 | ||
| 623 | printk(KERN_INFO "Default MP configuration #%d\n", | 622 | printk(KERN_INFO "Default MP configuration #%d\n", |
| 624 | mpf->mpf_feature1); | 623 | mpf->feature1); |
| 625 | construct_default_ISA_mptable(mpf->mpf_feature1); | 624 | construct_default_ISA_mptable(mpf->feature1); |
| 626 | 625 | ||
| 627 | } else if (mpf->mpf_physptr) { | 626 | } else if (mpf->physptr) { |
| 627 | struct mpc_table *mpc; | ||
| 628 | unsigned long size; | ||
| 628 | 629 | ||
| 630 | size = get_mpc_size(mpf->physptr); | ||
| 631 | mpc = early_ioremap(mpf->physptr, size); | ||
| 629 | /* | 632 | /* |
| 630 | * Read the physical hardware table. Anything here will | 633 | * Read the physical hardware table. Anything here will |
| 631 | * override the defaults. | 634 | * override the defaults. |
| 632 | */ | 635 | */ |
| 633 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { | 636 | if (!smp_read_mpc(mpc, early)) { |
| 634 | #ifdef CONFIG_X86_LOCAL_APIC | 637 | #ifdef CONFIG_X86_LOCAL_APIC |
| 635 | smp_found_config = 0; | 638 | smp_found_config = 0; |
| 636 | #endif | 639 | #endif |
| @@ -638,8 +641,10 @@ static void __init __get_smp_config(unsigned int early) | |||
| 638 | "BIOS bug, MP table errors detected!...\n"); | 641 | "BIOS bug, MP table errors detected!...\n"); |
| 639 | printk(KERN_ERR "... disabling SMP support. " | 642 | printk(KERN_ERR "... disabling SMP support. " |
| 640 | "(tell your hw vendor)\n"); | 643 | "(tell your hw vendor)\n"); |
| 644 | early_iounmap(mpc, size); | ||
| 641 | return; | 645 | return; |
| 642 | } | 646 | } |
| 647 | early_iounmap(mpc, size); | ||
| 643 | 648 | ||
| 644 | if (early) | 649 | if (early) |
| 645 | return; | 650 | return; |
| @@ -688,33 +693,33 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
| 688 | unsigned reserve) | 693 | unsigned reserve) |
| 689 | { | 694 | { |
| 690 | unsigned int *bp = phys_to_virt(base); | 695 | unsigned int *bp = phys_to_virt(base); |
| 691 | struct intel_mp_floating *mpf; | 696 | struct mpf_intel *mpf; |
| 692 | 697 | ||
| 693 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | 698 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
| 694 | bp, length); | 699 | bp, length); |
| 695 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 700 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
| 696 | 701 | ||
| 697 | while (length > 0) { | 702 | while (length > 0) { |
| 698 | mpf = (struct intel_mp_floating *)bp; | 703 | mpf = (struct mpf_intel *)bp; |
| 699 | if ((*bp == SMP_MAGIC_IDENT) && | 704 | if ((*bp == SMP_MAGIC_IDENT) && |
| 700 | (mpf->mpf_length == 1) && | 705 | (mpf->length == 1) && |
| 701 | !mpf_checksum((unsigned char *)bp, 16) && | 706 | !mpf_checksum((unsigned char *)bp, 16) && |
| 702 | ((mpf->mpf_specification == 1) | 707 | ((mpf->specification == 1) |
| 703 | || (mpf->mpf_specification == 4))) { | 708 | || (mpf->specification == 4))) { |
| 704 | #ifdef CONFIG_X86_LOCAL_APIC | 709 | #ifdef CONFIG_X86_LOCAL_APIC |
| 705 | smp_found_config = 1; | 710 | smp_found_config = 1; |
| 706 | #endif | 711 | #endif |
| 707 | mpf_found = mpf; | 712 | mpf_found = mpf; |
| 708 | 713 | ||
| 709 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | 714 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", |
| 710 | mpf, virt_to_phys(mpf)); | 715 | mpf, (u64)virt_to_phys(mpf)); |
| 711 | 716 | ||
| 712 | if (!reserve) | 717 | if (!reserve) |
| 713 | return 1; | 718 | return 1; |
| 714 | reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, | 719 | reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), |
| 715 | BOOTMEM_DEFAULT); | 720 | BOOTMEM_DEFAULT); |
| 716 | if (mpf->mpf_physptr) { | 721 | if (mpf->physptr) { |
| 717 | unsigned long size = PAGE_SIZE; | 722 | unsigned long size = get_mpc_size(mpf->physptr); |
| 718 | #ifdef CONFIG_X86_32 | 723 | #ifdef CONFIG_X86_32 |
| 719 | /* | 724 | /* |
| 720 | * We cannot access to MPC table to compute | 725 | * We cannot access to MPC table to compute |
| @@ -722,15 +727,24 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
| 722 | * the bottom is mapped now. | 727 | * the bottom is mapped now. |
| 723 | * PC-9800's MPC table places on the very last | 728 | * PC-9800's MPC table places on the very last |
| 724 | * of physical memory; so that simply reserving | 729 | * of physical memory; so that simply reserving |
| 725 | * PAGE_SIZE from mpg->mpf_physptr yields BUG() | 730 | * PAGE_SIZE from mpf->physptr yields BUG() |
| 726 | * in reserve_bootmem. | 731 | * in reserve_bootmem. |
| 732 | * also need to make sure physptr is below than | ||
| 733 | * max_low_pfn | ||
| 734 | * we don't need reserve the area above max_low_pfn | ||
| 727 | */ | 735 | */ |
| 728 | unsigned long end = max_low_pfn * PAGE_SIZE; | 736 | unsigned long end = max_low_pfn * PAGE_SIZE; |
| 729 | if (mpf->mpf_physptr + size > end) | 737 | |
| 730 | size = end - mpf->mpf_physptr; | 738 | if (mpf->physptr < end) { |
| 731 | #endif | 739 | if (mpf->physptr + size > end) |
| 732 | reserve_bootmem_generic(mpf->mpf_physptr, size, | 740 | size = end - mpf->physptr; |
| 741 | reserve_bootmem_generic(mpf->physptr, size, | ||
| 742 | BOOTMEM_DEFAULT); | ||
| 743 | } | ||
| 744 | #else | ||
| 745 | reserve_bootmem_generic(mpf->physptr, size, | ||
| 733 | BOOTMEM_DEFAULT); | 746 | BOOTMEM_DEFAULT); |
| 747 | #endif | ||
| 734 | } | 748 | } |
| 735 | 749 | ||
| 736 | return 1; | 750 | return 1; |
| @@ -809,15 +823,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) | |||
| 809 | /* not legacy */ | 823 | /* not legacy */ |
| 810 | 824 | ||
| 811 | for (i = 0; i < mp_irq_entries; i++) { | 825 | for (i = 0; i < mp_irq_entries; i++) { |
| 812 | if (mp_irqs[i].mp_irqtype != mp_INT) | 826 | if (mp_irqs[i].irqtype != mp_INT) |
| 813 | continue; | 827 | continue; |
| 814 | 828 | ||
| 815 | if (mp_irqs[i].mp_irqflag != 0x0f) | 829 | if (mp_irqs[i].irqflag != 0x0f) |
| 816 | continue; | 830 | continue; |
| 817 | 831 | ||
| 818 | if (mp_irqs[i].mp_srcbus != m->srcbus) | 832 | if (mp_irqs[i].srcbus != m->srcbus) |
| 819 | continue; | 833 | continue; |
| 820 | if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) | 834 | if (mp_irqs[i].srcbusirq != m->srcbusirq) |
| 821 | continue; | 835 | continue; |
| 822 | if (irq_used[i]) { | 836 | if (irq_used[i]) { |
| 823 | /* already claimed */ | 837 | /* already claimed */ |
| @@ -922,10 +936,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, | |||
| 922 | if (irq_used[i]) | 936 | if (irq_used[i]) |
| 923 | continue; | 937 | continue; |
| 924 | 938 | ||
| 925 | if (mp_irqs[i].mp_irqtype != mp_INT) | 939 | if (mp_irqs[i].irqtype != mp_INT) |
| 926 | continue; | 940 | continue; |
| 927 | 941 | ||
| 928 | if (mp_irqs[i].mp_irqflag != 0x0f) | 942 | if (mp_irqs[i].irqflag != 0x0f) |
| 929 | continue; | 943 | continue; |
| 930 | 944 | ||
| 931 | if (nr_m_spare > 0) { | 945 | if (nr_m_spare > 0) { |
| @@ -1001,7 +1015,7 @@ static int __init update_mp_table(void) | |||
| 1001 | { | 1015 | { |
| 1002 | char str[16]; | 1016 | char str[16]; |
| 1003 | char oem[10]; | 1017 | char oem[10]; |
| 1004 | struct intel_mp_floating *mpf; | 1018 | struct mpf_intel *mpf; |
| 1005 | struct mpc_table *mpc, *mpc_new; | 1019 | struct mpc_table *mpc, *mpc_new; |
| 1006 | 1020 | ||
| 1007 | if (!enable_update_mptable) | 1021 | if (!enable_update_mptable) |
| @@ -1014,19 +1028,19 @@ static int __init update_mp_table(void) | |||
| 1014 | /* | 1028 | /* |
| 1015 | * Now see if we need to go further. | 1029 | * Now see if we need to go further. |
| 1016 | */ | 1030 | */ |
| 1017 | if (mpf->mpf_feature1 != 0) | 1031 | if (mpf->feature1 != 0) |
| 1018 | return 0; | 1032 | return 0; |
| 1019 | 1033 | ||
| 1020 | if (!mpf->mpf_physptr) | 1034 | if (!mpf->physptr) |
| 1021 | return 0; | 1035 | return 0; |
| 1022 | 1036 | ||
| 1023 | mpc = phys_to_virt(mpf->mpf_physptr); | 1037 | mpc = phys_to_virt(mpf->physptr); |
| 1024 | 1038 | ||
| 1025 | if (!smp_check_mpc(mpc, oem, str)) | 1039 | if (!smp_check_mpc(mpc, oem, str)) |
| 1026 | return 0; | 1040 | return 0; |
| 1027 | 1041 | ||
| 1028 | printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); | 1042 | printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); |
| 1029 | printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); | 1043 | printk(KERN_INFO "physptr: %x\n", mpf->physptr); |
| 1030 | 1044 | ||
| 1031 | if (mpc_new_phys && mpc->length > mpc_new_length) { | 1045 | if (mpc_new_phys && mpc->length > mpc_new_length) { |
| 1032 | mpc_new_phys = 0; | 1046 | mpc_new_phys = 0; |
| @@ -1047,23 +1061,23 @@ static int __init update_mp_table(void) | |||
| 1047 | } | 1061 | } |
| 1048 | printk(KERN_INFO "use in-positon replacing\n"); | 1062 | printk(KERN_INFO "use in-positon replacing\n"); |
| 1049 | } else { | 1063 | } else { |
| 1050 | mpf->mpf_physptr = mpc_new_phys; | 1064 | mpf->physptr = mpc_new_phys; |
| 1051 | mpc_new = phys_to_virt(mpc_new_phys); | 1065 | mpc_new = phys_to_virt(mpc_new_phys); |
| 1052 | memcpy(mpc_new, mpc, mpc->length); | 1066 | memcpy(mpc_new, mpc, mpc->length); |
| 1053 | mpc = mpc_new; | 1067 | mpc = mpc_new; |
| 1054 | /* check if we can modify that */ | 1068 | /* check if we can modify that */ |
| 1055 | if (mpc_new_phys - mpf->mpf_physptr) { | 1069 | if (mpc_new_phys - mpf->physptr) { |
| 1056 | struct intel_mp_floating *mpf_new; | 1070 | struct mpf_intel *mpf_new; |
| 1057 | /* steal 16 bytes from [0, 1k) */ | 1071 | /* steal 16 bytes from [0, 1k) */ |
| 1058 | printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); | 1072 | printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); |
| 1059 | mpf_new = phys_to_virt(0x400 - 16); | 1073 | mpf_new = phys_to_virt(0x400 - 16); |
| 1060 | memcpy(mpf_new, mpf, 16); | 1074 | memcpy(mpf_new, mpf, 16); |
| 1061 | mpf = mpf_new; | 1075 | mpf = mpf_new; |
| 1062 | mpf->mpf_physptr = mpc_new_phys; | 1076 | mpf->physptr = mpc_new_phys; |
| 1063 | } | 1077 | } |
| 1064 | mpf->mpf_checksum = 0; | 1078 | mpf->checksum = 0; |
| 1065 | mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); | 1079 | mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); |
| 1066 | printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); | 1080 | printk(KERN_INFO "physptr new: %x\n", mpf->physptr); |
| 1067 | } | 1081 | } |
| 1068 | 1082 | ||
| 1069 | /* | 1083 | /* |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 726266695b2c..3cf3413ec626 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
| @@ -35,10 +35,10 @@ | |||
| 35 | #include <linux/device.h> | 35 | #include <linux/device.h> |
| 36 | #include <linux/cpu.h> | 36 | #include <linux/cpu.h> |
| 37 | #include <linux/notifier.h> | 37 | #include <linux/notifier.h> |
| 38 | #include <linux/uaccess.h> | ||
| 38 | 39 | ||
| 39 | #include <asm/processor.h> | 40 | #include <asm/processor.h> |
| 40 | #include <asm/msr.h> | 41 | #include <asm/msr.h> |
| 41 | #include <asm/uaccess.h> | ||
| 42 | #include <asm/system.h> | 42 | #include <asm/system.h> |
| 43 | 43 | ||
| 44 | static struct class *msr_class; | 44 | static struct class *msr_class; |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c deleted file mode 100644 index f2191d4f2717..000000000000 --- a/arch/x86/kernel/numaq_32.c +++ /dev/null | |||
| @@ -1,293 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Written by: Patricia Gaughen, IBM Corporation | ||
| 3 | * | ||
| 4 | * Copyright (C) 2002, IBM Corp. | ||
| 5 | * | ||
| 6 | * All rights reserved. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it will be useful, but | ||
| 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 16 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 17 | * details. | ||
| 18 | * | ||
| 19 | * You should have received a copy of the GNU General Public License | ||
| 20 | * along with this program; if not, write to the Free Software | ||
| 21 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 22 | * | ||
| 23 | * Send feedback to <gone@us.ibm.com> | ||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/mm.h> | ||
| 27 | #include <linux/bootmem.h> | ||
| 28 | #include <linux/mmzone.h> | ||
| 29 | #include <linux/module.h> | ||
| 30 | #include <linux/nodemask.h> | ||
| 31 | #include <asm/numaq.h> | ||
| 32 | #include <asm/topology.h> | ||
| 33 | #include <asm/processor.h> | ||
| 34 | #include <asm/genapic.h> | ||
| 35 | #include <asm/e820.h> | ||
| 36 | #include <asm/setup.h> | ||
| 37 | |||
| 38 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | ||
| 39 | |||
| 40 | /* | ||
| 41 | * Function: smp_dump_qct() | ||
| 42 | * | ||
| 43 | * Description: gets memory layout from the quad config table. This | ||
| 44 | * function also updates node_online_map with the nodes (quads) present. | ||
| 45 | */ | ||
| 46 | static void __init smp_dump_qct(void) | ||
| 47 | { | ||
| 48 | int node; | ||
| 49 | struct eachquadmem *eq; | ||
| 50 | struct sys_cfg_data *scd = | ||
| 51 | (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR); | ||
| 52 | |||
| 53 | nodes_clear(node_online_map); | ||
| 54 | for_each_node(node) { | ||
| 55 | if (scd->quads_present31_0 & (1 << node)) { | ||
| 56 | node_set_online(node); | ||
| 57 | eq = &scd->eq[node]; | ||
| 58 | /* Convert to pages */ | ||
| 59 | node_start_pfn[node] = MB_TO_PAGES( | ||
| 60 | eq->hi_shrd_mem_start - eq->priv_mem_size); | ||
| 61 | node_end_pfn[node] = MB_TO_PAGES( | ||
| 62 | eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | ||
| 63 | |||
| 64 | e820_register_active_regions(node, node_start_pfn[node], | ||
| 65 | node_end_pfn[node]); | ||
| 66 | memory_present(node, | ||
| 67 | node_start_pfn[node], node_end_pfn[node]); | ||
| 68 | node_remap_size[node] = node_memmap_size_bytes(node, | ||
| 69 | node_start_pfn[node], | ||
| 70 | node_end_pfn[node]); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | |||
| 76 | void __cpuinit numaq_tsc_disable(void) | ||
| 77 | { | ||
| 78 | if (!found_numaq) | ||
| 79 | return; | ||
| 80 | |||
| 81 | if (num_online_nodes() > 1) { | ||
| 82 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
| 83 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | static int __init numaq_pre_time_init(void) | ||
| 88 | { | ||
| 89 | numaq_tsc_disable(); | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | int found_numaq; | ||
| 94 | /* | ||
| 95 | * Have to match translation table entries to main table entries by counter | ||
| 96 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
| 97 | * doing this .... | ||
| 98 | */ | ||
| 99 | struct mpc_config_translation { | ||
| 100 | unsigned char mpc_type; | ||
| 101 | unsigned char trans_len; | ||
| 102 | unsigned char trans_type; | ||
| 103 | unsigned char trans_quad; | ||
| 104 | unsigned char trans_global; | ||
| 105 | unsigned char trans_local; | ||
| 106 | unsigned short trans_reserved; | ||
| 107 | }; | ||
| 108 | |||
| 109 | /* x86_quirks member */ | ||
| 110 | static int mpc_record; | ||
| 111 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | ||
| 112 | __cpuinitdata; | ||
| 113 | |||
| 114 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
| 115 | { | ||
| 116 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
| 117 | } | ||
| 118 | |||
| 119 | /* x86_quirks member */ | ||
| 120 | static int mpc_apic_id(struct mpc_cpu *m) | ||
| 121 | { | ||
| 122 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 123 | int logical_apicid = generate_logical_apicid(quad, m->apicid); | ||
| 124 | |||
| 125 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
| 126 | m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, | ||
| 127 | (m->cpufeature & CPU_MODEL_MASK) >> 4, | ||
| 128 | m->apicver, quad, logical_apicid); | ||
| 129 | return logical_apicid; | ||
| 130 | } | ||
| 131 | |||
| 132 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
| 133 | |||
| 134 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
| 135 | |||
| 136 | /* x86_quirks member */ | ||
| 137 | static void mpc_oem_bus_info(struct mpc_bus *m, char *name) | ||
| 138 | { | ||
| 139 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 140 | int local = translation_table[mpc_record]->trans_local; | ||
| 141 | |||
| 142 | mp_bus_id_to_node[m->busid] = quad; | ||
| 143 | mp_bus_id_to_local[m->busid] = local; | ||
| 144 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
| 145 | m->busid, name, quad); | ||
| 146 | } | ||
| 147 | |||
| 148 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
| 149 | |||
| 150 | /* x86_quirks member */ | ||
| 151 | static void mpc_oem_pci_bus(struct mpc_bus *m) | ||
| 152 | { | ||
| 153 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 154 | int local = translation_table[mpc_record]->trans_local; | ||
| 155 | |||
| 156 | quad_local_to_mp_bus_id[quad][local] = m->busid; | ||
| 157 | } | ||
| 158 | |||
| 159 | static void __init MP_translation_info(struct mpc_config_translation *m) | ||
| 160 | { | ||
| 161 | printk(KERN_INFO | ||
| 162 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
| 163 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
| 164 | m->trans_local); | ||
| 165 | |||
| 166 | if (mpc_record >= MAX_MPC_ENTRY) | ||
| 167 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
| 168 | else | ||
| 169 | translation_table[mpc_record] = m; /* stash this for later */ | ||
| 170 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
| 171 | node_set_online(m->trans_quad); | ||
| 172 | } | ||
| 173 | |||
| 174 | static int __init mpf_checksum(unsigned char *mp, int len) | ||
| 175 | { | ||
| 176 | int sum = 0; | ||
| 177 | |||
| 178 | while (len--) | ||
| 179 | sum += *mp++; | ||
| 180 | |||
| 181 | return sum & 0xFF; | ||
| 182 | } | ||
| 183 | |||
| 184 | /* | ||
| 185 | * Read/parse the MPC oem tables | ||
| 186 | */ | ||
| 187 | |||
| 188 | static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable, | ||
| 189 | unsigned short oemsize) | ||
| 190 | { | ||
| 191 | int count = sizeof(*oemtable); /* the header size */ | ||
| 192 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
| 193 | |||
| 194 | mpc_record = 0; | ||
| 195 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | ||
| 196 | oemtable); | ||
| 197 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { | ||
| 198 | printk(KERN_WARNING | ||
| 199 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
| 200 | oemtable->signature[0], oemtable->signature[1], | ||
| 201 | oemtable->signature[2], oemtable->signature[3]); | ||
| 202 | return; | ||
| 203 | } | ||
| 204 | if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { | ||
| 205 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
| 206 | return; | ||
| 207 | } | ||
| 208 | while (count < oemtable->length) { | ||
| 209 | switch (*oemptr) { | ||
| 210 | case MP_TRANSLATION: | ||
| 211 | { | ||
| 212 | struct mpc_config_translation *m = | ||
| 213 | (struct mpc_config_translation *)oemptr; | ||
| 214 | MP_translation_info(m); | ||
| 215 | oemptr += sizeof(*m); | ||
| 216 | count += sizeof(*m); | ||
| 217 | ++mpc_record; | ||
| 218 | break; | ||
| 219 | } | ||
| 220 | default: | ||
| 221 | { | ||
| 222 | printk(KERN_WARNING | ||
| 223 | "Unrecognised OEM table entry type! - %d\n", | ||
| 224 | (int)*oemptr); | ||
| 225 | return; | ||
| 226 | } | ||
| 227 | } | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | static int __init numaq_setup_ioapic_ids(void) | ||
| 232 | { | ||
| 233 | /* so can skip it */ | ||
| 234 | return 1; | ||
| 235 | } | ||
| 236 | |||
| 237 | static int __init numaq_update_genapic(void) | ||
| 238 | { | ||
| 239 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; | ||
| 240 | |||
| 241 | return 0; | ||
| 242 | } | ||
| 243 | |||
| 244 | static struct x86_quirks numaq_x86_quirks __initdata = { | ||
| 245 | .arch_pre_time_init = numaq_pre_time_init, | ||
| 246 | .arch_time_init = NULL, | ||
| 247 | .arch_pre_intr_init = NULL, | ||
| 248 | .arch_memory_setup = NULL, | ||
| 249 | .arch_intr_init = NULL, | ||
| 250 | .arch_trap_init = NULL, | ||
| 251 | .mach_get_smp_config = NULL, | ||
| 252 | .mach_find_smp_config = NULL, | ||
| 253 | .mpc_record = &mpc_record, | ||
| 254 | .mpc_apic_id = mpc_apic_id, | ||
| 255 | .mpc_oem_bus_info = mpc_oem_bus_info, | ||
| 256 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | ||
| 257 | .smp_read_mpc_oem = smp_read_mpc_oem, | ||
| 258 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | ||
| 259 | .update_genapic = numaq_update_genapic, | ||
| 260 | }; | ||
| 261 | |||
| 262 | void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
| 263 | { | ||
| 264 | if (strncmp(oem, "IBM NUMA", 8)) | ||
| 265 | printk("Warning! Not a NUMA-Q system!\n"); | ||
| 266 | else | ||
| 267 | found_numaq = 1; | ||
| 268 | } | ||
| 269 | |||
| 270 | static __init void early_check_numaq(void) | ||
| 271 | { | ||
| 272 | /* | ||
| 273 | * Find possible boot-time SMP configuration: | ||
| 274 | */ | ||
| 275 | early_find_smp_config(); | ||
| 276 | /* | ||
| 277 | * get boot-time SMP configuration: | ||
| 278 | */ | ||
| 279 | if (smp_found_config) | ||
| 280 | early_get_smp_config(); | ||
| 281 | |||
| 282 | if (found_numaq) | ||
| 283 | x86_quirks = &numaq_x86_quirks; | ||
| 284 | } | ||
| 285 | |||
| 286 | int __init get_memcfg_numaq(void) | ||
| 287 | { | ||
| 288 | early_check_numaq(); | ||
| 289 | if (!found_numaq) | ||
| 290 | return 0; | ||
| 291 | smp_dump_qct(); | ||
| 292 | return 1; | ||
| 293 | } | ||
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 95777b0faa73..3a7c5a44082e 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
| @@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = { | |||
| 26 | }; | 26 | }; |
| 27 | EXPORT_SYMBOL(pv_lock_ops); | 27 | EXPORT_SYMBOL(pv_lock_ops); |
| 28 | 28 | ||
| 29 | void __init paravirt_use_bytelocks(void) | ||
| 30 | { | ||
| 31 | #ifdef CONFIG_SMP | ||
| 32 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
| 33 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
| 34 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
| 35 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
| 36 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
| 37 | #endif | ||
| 38 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c6520a4e85d4..63dd358d8ee1 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include <asm/paravirt.h> | 28 | #include <asm/paravirt.h> |
| 29 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
| 30 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
| 31 | #include <asm/arch_hooks.h> | ||
| 32 | #include <asm/pgtable.h> | 31 | #include <asm/pgtable.h> |
| 33 | #include <asm/time.h> | 32 | #include <asm/time.h> |
| 34 | #include <asm/pgalloc.h> | 33 | #include <asm/pgalloc.h> |
| @@ -44,6 +43,17 @@ void _paravirt_nop(void) | |||
| 44 | { | 43 | { |
| 45 | } | 44 | } |
| 46 | 45 | ||
| 46 | /* identity function, which can be inlined */ | ||
| 47 | u32 _paravirt_ident_32(u32 x) | ||
| 48 | { | ||
| 49 | return x; | ||
| 50 | } | ||
| 51 | |||
| 52 | u64 _paravirt_ident_64(u64 x) | ||
| 53 | { | ||
| 54 | return x; | ||
| 55 | } | ||
| 56 | |||
| 47 | static void __init default_banner(void) | 57 | static void __init default_banner(void) |
| 48 | { | 58 | { |
| 49 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 59 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
| @@ -138,9 +148,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | |||
| 138 | if (opfunc == NULL) | 148 | if (opfunc == NULL) |
| 139 | /* If there's no function, patch it with a ud2a (BUG) */ | 149 | /* If there's no function, patch it with a ud2a (BUG) */ |
| 140 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); | 150 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
| 141 | else if (opfunc == paravirt_nop) | 151 | else if (opfunc == _paravirt_nop) |
| 142 | /* If the operation is a nop, then nop the callsite */ | 152 | /* If the operation is a nop, then nop the callsite */ |
| 143 | ret = paravirt_patch_nop(); | 153 | ret = paravirt_patch_nop(); |
| 154 | |||
| 155 | /* identity functions just return their single argument */ | ||
| 156 | else if (opfunc == _paravirt_ident_32) | ||
| 157 | ret = paravirt_patch_ident_32(insnbuf, len); | ||
| 158 | else if (opfunc == _paravirt_ident_64) | ||
| 159 | ret = paravirt_patch_ident_64(insnbuf, len); | ||
| 160 | |||
| 144 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || | 161 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
| 145 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || | 162 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || |
| 146 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || | 163 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || |
| @@ -318,10 +335,10 @@ struct pv_time_ops pv_time_ops = { | |||
| 318 | 335 | ||
| 319 | struct pv_irq_ops pv_irq_ops = { | 336 | struct pv_irq_ops pv_irq_ops = { |
| 320 | .init_IRQ = native_init_IRQ, | 337 | .init_IRQ = native_init_IRQ, |
| 321 | .save_fl = native_save_fl, | 338 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), |
| 322 | .restore_fl = native_restore_fl, | 339 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), |
| 323 | .irq_disable = native_irq_disable, | 340 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), |
| 324 | .irq_enable = native_irq_enable, | 341 | .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), |
| 325 | .safe_halt = native_safe_halt, | 342 | .safe_halt = native_safe_halt, |
| 326 | .halt = native_halt, | 343 | .halt = native_halt, |
| 327 | #ifdef CONFIG_X86_64 | 344 | #ifdef CONFIG_X86_64 |
| @@ -399,6 +416,14 @@ struct pv_apic_ops pv_apic_ops = { | |||
| 399 | #endif | 416 | #endif |
| 400 | }; | 417 | }; |
| 401 | 418 | ||
| 419 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) | ||
| 420 | /* 32-bit pagetable entries */ | ||
| 421 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) | ||
| 422 | #else | ||
| 423 | /* 64-bit pagetable entries */ | ||
| 424 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) | ||
| 425 | #endif | ||
| 426 | |||
| 402 | struct pv_mmu_ops pv_mmu_ops = { | 427 | struct pv_mmu_ops pv_mmu_ops = { |
| 403 | #ifndef CONFIG_X86_64 | 428 | #ifndef CONFIG_X86_64 |
| 404 | .pagetable_setup_start = native_pagetable_setup_start, | 429 | .pagetable_setup_start = native_pagetable_setup_start, |
| @@ -450,22 +475,23 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
| 450 | .pmd_clear = native_pmd_clear, | 475 | .pmd_clear = native_pmd_clear, |
| 451 | #endif | 476 | #endif |
| 452 | .set_pud = native_set_pud, | 477 | .set_pud = native_set_pud, |
| 453 | .pmd_val = native_pmd_val, | 478 | |
| 454 | .make_pmd = native_make_pmd, | 479 | .pmd_val = PTE_IDENT, |
| 480 | .make_pmd = PTE_IDENT, | ||
| 455 | 481 | ||
| 456 | #if PAGETABLE_LEVELS == 4 | 482 | #if PAGETABLE_LEVELS == 4 |
| 457 | .pud_val = native_pud_val, | 483 | .pud_val = PTE_IDENT, |
| 458 | .make_pud = native_make_pud, | 484 | .make_pud = PTE_IDENT, |
| 485 | |||
| 459 | .set_pgd = native_set_pgd, | 486 | .set_pgd = native_set_pgd, |
| 460 | #endif | 487 | #endif |
| 461 | #endif /* PAGETABLE_LEVELS >= 3 */ | 488 | #endif /* PAGETABLE_LEVELS >= 3 */ |
| 462 | 489 | ||
| 463 | .pte_val = native_pte_val, | 490 | .pte_val = PTE_IDENT, |
| 464 | .pte_flags = native_pte_flags, | 491 | .pgd_val = PTE_IDENT, |
| 465 | .pgd_val = native_pgd_val, | ||
| 466 | 492 | ||
| 467 | .make_pte = native_make_pte, | 493 | .make_pte = PTE_IDENT, |
| 468 | .make_pgd = native_make_pgd, | 494 | .make_pgd = PTE_IDENT, |
| 469 | 495 | ||
| 470 | .dup_mmap = paravirt_nop, | 496 | .dup_mmap = paravirt_nop, |
| 471 | .exit_mmap = paravirt_nop, | 497 | .exit_mmap = paravirt_nop, |
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 9fe644f4861d..d9f32e6d6ab6 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
| @@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | |||
| 12 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | 12 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); |
| 13 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); | 13 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); |
| 14 | 14 | ||
| 15 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) | ||
| 16 | { | ||
| 17 | /* arg in %eax, return in %eax */ | ||
| 18 | return 0; | ||
| 19 | } | ||
| 20 | |||
| 21 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) | ||
| 22 | { | ||
| 23 | /* arg in %edx:%eax, return in %edx:%eax */ | ||
| 24 | return 0; | ||
| 25 | } | ||
| 26 | |||
| 15 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 27 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
| 16 | unsigned long addr, unsigned len) | 28 | unsigned long addr, unsigned len) |
| 17 | { | 29 | { |
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 061d01df9ae6..3f08f34f93eb 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c | |||
| @@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); | |||
| 19 | DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); | 19 | DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); |
| 20 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); | 20 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); |
| 21 | 21 | ||
| 22 | DEF_NATIVE(, mov32, "mov %edi, %eax"); | ||
| 23 | DEF_NATIVE(, mov64, "mov %rdi, %rax"); | ||
| 24 | |||
| 25 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) | ||
| 26 | { | ||
| 27 | return paravirt_patch_insns(insnbuf, len, | ||
| 28 | start__mov32, end__mov32); | ||
| 29 | } | ||
| 30 | |||
| 31 | unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) | ||
| 32 | { | ||
| 33 | return paravirt_patch_insns(insnbuf, len, | ||
| 34 | start__mov64, end__mov64); | ||
| 35 | } | ||
| 36 | |||
| 22 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 37 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
| 23 | unsigned long addr, unsigned len) | 38 | unsigned long addr, unsigned len) |
| 24 | { | 39 | { |
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c index 675a48c404a5..071e7fea42e5 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms_32.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #include <asm/setup.h> | 18 | #include <asm/setup.h> |
| 19 | #include <asm/sections.h> | 19 | #include <asm/sections.h> |
| 20 | #include <asm/io.h> | 20 | #include <asm/io.h> |
| 21 | #include <setup_arch.h> | 21 | #include <asm/setup_arch.h> |
| 22 | 22 | ||
| 23 | static struct resource system_rom_resource = { | 23 | static struct resource system_rom_resource = { |
| 24 | .name = "System ROM", | 24 | .name = "System ROM", |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6d12f7e37f8c..78533a519d8f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | #include <linux/errno.h> | 1 | #include <linux/errno.h> |
| 2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
| 3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
| 4 | #include <asm/idle.h> | ||
| 5 | #include <linux/smp.h> | 4 | #include <linux/smp.h> |
| 5 | #include <linux/prctl.h> | ||
| 6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
| 7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| @@ -11,6 +11,9 @@ | |||
| 11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
| 12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
| 13 | #include <asm/apic.h> | 13 | #include <asm/apic.h> |
| 14 | #include <asm/idle.h> | ||
| 15 | #include <asm/uaccess.h> | ||
| 16 | #include <asm/i387.h> | ||
| 14 | 17 | ||
| 15 | unsigned long idle_halt; | 18 | unsigned long idle_halt; |
| 16 | EXPORT_SYMBOL(idle_halt); | 19 | EXPORT_SYMBOL(idle_halt); |
| @@ -56,6 +59,192 @@ void arch_task_cache_init(void) | |||
| 56 | } | 59 | } |
| 57 | 60 | ||
| 58 | /* | 61 | /* |
| 62 | * Free current thread data structures etc.. | ||
| 63 | */ | ||
| 64 | void exit_thread(void) | ||
| 65 | { | ||
| 66 | struct task_struct *me = current; | ||
| 67 | struct thread_struct *t = &me->thread; | ||
| 68 | |||
| 69 | if (me->thread.io_bitmap_ptr) { | ||
| 70 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); | ||
| 71 | |||
| 72 | kfree(t->io_bitmap_ptr); | ||
| 73 | t->io_bitmap_ptr = NULL; | ||
| 74 | clear_thread_flag(TIF_IO_BITMAP); | ||
| 75 | /* | ||
| 76 | * Careful, clear this in the TSS too: | ||
| 77 | */ | ||
| 78 | memset(tss->io_bitmap, 0xff, t->io_bitmap_max); | ||
| 79 | t->io_bitmap_max = 0; | ||
| 80 | put_cpu(); | ||
| 81 | } | ||
| 82 | |||
| 83 | ds_exit_thread(current); | ||
| 84 | } | ||
| 85 | |||
| 86 | void flush_thread(void) | ||
| 87 | { | ||
| 88 | struct task_struct *tsk = current; | ||
| 89 | |||
| 90 | #ifdef CONFIG_X86_64 | ||
| 91 | if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { | ||
| 92 | clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); | ||
| 93 | if (test_tsk_thread_flag(tsk, TIF_IA32)) { | ||
| 94 | clear_tsk_thread_flag(tsk, TIF_IA32); | ||
| 95 | } else { | ||
| 96 | set_tsk_thread_flag(tsk, TIF_IA32); | ||
| 97 | current_thread_info()->status |= TS_COMPAT; | ||
| 98 | } | ||
| 99 | } | ||
| 100 | #endif | ||
| 101 | |||
| 102 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | ||
| 103 | |||
| 104 | tsk->thread.debugreg0 = 0; | ||
| 105 | tsk->thread.debugreg1 = 0; | ||
| 106 | tsk->thread.debugreg2 = 0; | ||
| 107 | tsk->thread.debugreg3 = 0; | ||
| 108 | tsk->thread.debugreg6 = 0; | ||
| 109 | tsk->thread.debugreg7 = 0; | ||
| 110 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | ||
| 111 | /* | ||
| 112 | * Forget coprocessor state.. | ||
| 113 | */ | ||
| 114 | tsk->fpu_counter = 0; | ||
| 115 | clear_fpu(tsk); | ||
| 116 | clear_used_math(); | ||
| 117 | } | ||
| 118 | |||
| 119 | static void hard_disable_TSC(void) | ||
| 120 | { | ||
| 121 | write_cr4(read_cr4() | X86_CR4_TSD); | ||
| 122 | } | ||
| 123 | |||
| 124 | void disable_TSC(void) | ||
| 125 | { | ||
| 126 | preempt_disable(); | ||
| 127 | if (!test_and_set_thread_flag(TIF_NOTSC)) | ||
| 128 | /* | ||
| 129 | * Must flip the CPU state synchronously with | ||
| 130 | * TIF_NOTSC in the current running context. | ||
| 131 | */ | ||
| 132 | hard_disable_TSC(); | ||
| 133 | preempt_enable(); | ||
| 134 | } | ||
| 135 | |||
| 136 | static void hard_enable_TSC(void) | ||
| 137 | { | ||
| 138 | write_cr4(read_cr4() & ~X86_CR4_TSD); | ||
| 139 | } | ||
| 140 | |||
| 141 | static void enable_TSC(void) | ||
| 142 | { | ||
| 143 | preempt_disable(); | ||
| 144 | if (test_and_clear_thread_flag(TIF_NOTSC)) | ||
| 145 | /* | ||
| 146 | * Must flip the CPU state synchronously with | ||
| 147 | * TIF_NOTSC in the current running context. | ||
| 148 | */ | ||
| 149 | hard_enable_TSC(); | ||
| 150 | preempt_enable(); | ||
| 151 | } | ||
| 152 | |||
| 153 | int get_tsc_mode(unsigned long adr) | ||
| 154 | { | ||
| 155 | unsigned int val; | ||
| 156 | |||
| 157 | if (test_thread_flag(TIF_NOTSC)) | ||
| 158 | val = PR_TSC_SIGSEGV; | ||
| 159 | else | ||
| 160 | val = PR_TSC_ENABLE; | ||
| 161 | |||
| 162 | return put_user(val, (unsigned int __user *)adr); | ||
| 163 | } | ||
| 164 | |||
| 165 | int set_tsc_mode(unsigned int val) | ||
| 166 | { | ||
| 167 | if (val == PR_TSC_SIGSEGV) | ||
| 168 | disable_TSC(); | ||
| 169 | else if (val == PR_TSC_ENABLE) | ||
| 170 | enable_TSC(); | ||
| 171 | else | ||
| 172 | return -EINVAL; | ||
| 173 | |||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 177 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | ||
| 178 | struct tss_struct *tss) | ||
| 179 | { | ||
| 180 | struct thread_struct *prev, *next; | ||
| 181 | |||
| 182 | prev = &prev_p->thread; | ||
| 183 | next = &next_p->thread; | ||
| 184 | |||
| 185 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || | ||
| 186 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | ||
| 187 | ds_switch_to(prev_p, next_p); | ||
| 188 | else if (next->debugctlmsr != prev->debugctlmsr) | ||
| 189 | update_debugctlmsr(next->debugctlmsr); | ||
| 190 | |||
| 191 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
| 192 | set_debugreg(next->debugreg0, 0); | ||
| 193 | set_debugreg(next->debugreg1, 1); | ||
| 194 | set_debugreg(next->debugreg2, 2); | ||
| 195 | set_debugreg(next->debugreg3, 3); | ||
| 196 | /* no 4 and 5 */ | ||
| 197 | set_debugreg(next->debugreg6, 6); | ||
| 198 | set_debugreg(next->debugreg7, 7); | ||
| 199 | } | ||
| 200 | |||
| 201 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | ||
| 202 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | ||
| 203 | /* prev and next are different */ | ||
| 204 | if (test_tsk_thread_flag(next_p, TIF_NOTSC)) | ||
| 205 | hard_disable_TSC(); | ||
| 206 | else | ||
| 207 | hard_enable_TSC(); | ||
| 208 | } | ||
| 209 | |||
| 210 | if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | ||
| 211 | /* | ||
| 212 | * Copy the relevant range of the IO bitmap. | ||
| 213 | * Normally this is 128 bytes or less: | ||
| 214 | */ | ||
| 215 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, | ||
| 216 | max(prev->io_bitmap_max, next->io_bitmap_max)); | ||
| 217 | } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { | ||
| 218 | /* | ||
| 219 | * Clear any possible leftover bits: | ||
| 220 | */ | ||
| 221 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | int sys_fork(struct pt_regs *regs) | ||
| 226 | { | ||
| 227 | return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); | ||
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * This is trivial, and on the face of it looks like it | ||
| 232 | * could equally well be done in user mode. | ||
| 233 | * | ||
| 234 | * Not so, for quite unobvious reasons - register pressure. | ||
| 235 | * In user mode vfork() cannot have a stack frame, and if | ||
| 236 | * done by calling the "clone()" system call directly, you | ||
| 237 | * do not have enough call-clobbered registers to hold all | ||
| 238 | * the information you need. | ||
| 239 | */ | ||
| 240 | int sys_vfork(struct pt_regs *regs) | ||
| 241 | { | ||
| 242 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, | ||
| 243 | NULL, NULL); | ||
| 244 | } | ||
| 245 | |||
| 246 | |||
| 247 | /* | ||
| 59 | * Idle related variables and functions | 248 | * Idle related variables and functions |
| 60 | */ | 249 | */ |
| 61 | unsigned long boot_option_idle_override = 0; | 250 | unsigned long boot_option_idle_override = 0; |
| @@ -135,7 +324,7 @@ void stop_this_cpu(void *dummy) | |||
| 135 | /* | 324 | /* |
| 136 | * Remove this CPU: | 325 | * Remove this CPU: |
| 137 | */ | 326 | */ |
| 138 | cpu_clear(smp_processor_id(), cpu_online_map); | 327 | set_cpu_online(smp_processor_id(), false); |
| 139 | disable_local_APIC(); | 328 | disable_local_APIC(); |
| 140 | 329 | ||
| 141 | for (;;) { | 330 | for (;;) { |
| @@ -285,12 +474,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
| 285 | return 1; | 474 | return 1; |
| 286 | } | 475 | } |
| 287 | 476 | ||
| 288 | static cpumask_t c1e_mask = CPU_MASK_NONE; | 477 | static cpumask_var_t c1e_mask; |
| 289 | static int c1e_detected; | 478 | static int c1e_detected; |
| 290 | 479 | ||
| 291 | void c1e_remove_cpu(int cpu) | 480 | void c1e_remove_cpu(int cpu) |
| 292 | { | 481 | { |
| 293 | cpu_clear(cpu, c1e_mask); | 482 | if (c1e_mask != NULL) |
| 483 | cpumask_clear_cpu(cpu, c1e_mask); | ||
| 294 | } | 484 | } |
| 295 | 485 | ||
| 296 | /* | 486 | /* |
| @@ -319,8 +509,8 @@ static void c1e_idle(void) | |||
| 319 | if (c1e_detected) { | 509 | if (c1e_detected) { |
| 320 | int cpu = smp_processor_id(); | 510 | int cpu = smp_processor_id(); |
| 321 | 511 | ||
| 322 | if (!cpu_isset(cpu, c1e_mask)) { | 512 | if (!cpumask_test_cpu(cpu, c1e_mask)) { |
| 323 | cpu_set(cpu, c1e_mask); | 513 | cpumask_set_cpu(cpu, c1e_mask); |
| 324 | /* | 514 | /* |
| 325 | * Force broadcast so ACPI can not interfere. Needs | 515 | * Force broadcast so ACPI can not interfere. Needs |
| 326 | * to run with interrupts enabled as it uses | 516 | * to run with interrupts enabled as it uses |
| @@ -350,7 +540,7 @@ static void c1e_idle(void) | |||
| 350 | 540 | ||
| 351 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | 541 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) |
| 352 | { | 542 | { |
| 353 | #ifdef CONFIG_X86_SMP | 543 | #ifdef CONFIG_SMP |
| 354 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 544 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
| 355 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | 545 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," |
| 356 | " performance may degrade.\n"); | 546 | " performance may degrade.\n"); |
| @@ -372,6 +562,15 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
| 372 | pm_idle = default_idle; | 562 | pm_idle = default_idle; |
| 373 | } | 563 | } |
| 374 | 564 | ||
| 565 | void __init init_c1e_mask(void) | ||
| 566 | { | ||
| 567 | /* If we're using c1e_idle, we need to allocate c1e_mask. */ | ||
| 568 | if (pm_idle == c1e_idle) { | ||
| 569 | alloc_cpumask_var(&c1e_mask, GFP_KERNEL); | ||
| 570 | cpumask_clear(c1e_mask); | ||
| 571 | } | ||
| 572 | } | ||
| 573 | |||
| 375 | static int __init idle_setup(char *str) | 574 | static int __init idle_setup(char *str) |
| 376 | { | 575 | { |
| 377 | if (!str) | 576 | if (!str) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bd4da2af08ae..14014d766cad 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | #include <stdarg.h> | 12 | #include <stdarg.h> |
| 13 | 13 | ||
| 14 | #include <linux/stackprotector.h> | ||
| 14 | #include <linux/cpu.h> | 15 | #include <linux/cpu.h> |
| 15 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
| 16 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
| @@ -66,9 +67,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | |||
| 66 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 67 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
| 67 | EXPORT_PER_CPU_SYMBOL(current_task); | 68 | EXPORT_PER_CPU_SYMBOL(current_task); |
| 68 | 69 | ||
| 69 | DEFINE_PER_CPU(int, cpu_number); | ||
| 70 | EXPORT_PER_CPU_SYMBOL(cpu_number); | ||
| 71 | |||
| 72 | /* | 70 | /* |
| 73 | * Return saved PC of a blocked thread. | 71 | * Return saved PC of a blocked thread. |
| 74 | */ | 72 | */ |
| @@ -94,6 +92,15 @@ void cpu_idle(void) | |||
| 94 | { | 92 | { |
| 95 | int cpu = smp_processor_id(); | 93 | int cpu = smp_processor_id(); |
| 96 | 94 | ||
| 95 | /* | ||
| 96 | * If we're the non-boot CPU, nothing set the stack canary up | ||
| 97 | * for us. CPU0 already has it initialized but no harm in | ||
| 98 | * doing it again. This is a good place for updating it, as | ||
| 99 | * we wont ever return from this function (so the invalid | ||
| 100 | * canaries already on the stack wont ever trigger). | ||
| 101 | */ | ||
| 102 | boot_init_stack_canary(); | ||
| 103 | |||
| 97 | current_thread_info()->status |= TS_POLLING; | 104 | current_thread_info()->status |= TS_POLLING; |
| 98 | 105 | ||
| 99 | /* endless idle loop with no priority at all */ | 106 | /* endless idle loop with no priority at all */ |
| @@ -108,7 +115,6 @@ void cpu_idle(void) | |||
| 108 | play_dead(); | 115 | play_dead(); |
| 109 | 116 | ||
| 110 | local_irq_disable(); | 117 | local_irq_disable(); |
| 111 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | ||
| 112 | /* Don't trace irqs off for idle */ | 118 | /* Don't trace irqs off for idle */ |
| 113 | stop_critical_timings(); | 119 | stop_critical_timings(); |
| 114 | pm_idle(); | 120 | pm_idle(); |
| @@ -132,7 +138,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
| 132 | if (user_mode_vm(regs)) { | 138 | if (user_mode_vm(regs)) { |
| 133 | sp = regs->sp; | 139 | sp = regs->sp; |
| 134 | ss = regs->ss & 0xffff; | 140 | ss = regs->ss & 0xffff; |
| 135 | savesegment(gs, gs); | 141 | gs = get_user_gs(regs); |
| 136 | } else { | 142 | } else { |
| 137 | sp = (unsigned long) (®s->sp); | 143 | sp = (unsigned long) (®s->sp); |
| 138 | savesegment(ss, ss); | 144 | savesegment(ss, ss); |
| @@ -213,6 +219,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
| 213 | regs.ds = __USER_DS; | 219 | regs.ds = __USER_DS; |
| 214 | regs.es = __USER_DS; | 220 | regs.es = __USER_DS; |
| 215 | regs.fs = __KERNEL_PERCPU; | 221 | regs.fs = __KERNEL_PERCPU; |
| 222 | regs.gs = __KERNEL_STACK_CANARY; | ||
| 216 | regs.orig_ax = -1; | 223 | regs.orig_ax = -1; |
| 217 | regs.ip = (unsigned long) kernel_thread_helper; | 224 | regs.ip = (unsigned long) kernel_thread_helper; |
| 218 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | 225 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
| @@ -223,55 +230,6 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
| 223 | } | 230 | } |
| 224 | EXPORT_SYMBOL(kernel_thread); | 231 | EXPORT_SYMBOL(kernel_thread); |
| 225 | 232 | ||
| 226 | /* | ||
| 227 | * Free current thread data structures etc.. | ||
| 228 | */ | ||
| 229 | void exit_thread(void) | ||
| 230 | { | ||
| 231 | /* The process may have allocated an io port bitmap... nuke it. */ | ||
| 232 | if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { | ||
| 233 | struct task_struct *tsk = current; | ||
| 234 | struct thread_struct *t = &tsk->thread; | ||
| 235 | int cpu = get_cpu(); | ||
| 236 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
| 237 | |||
| 238 | kfree(t->io_bitmap_ptr); | ||
| 239 | t->io_bitmap_ptr = NULL; | ||
| 240 | clear_thread_flag(TIF_IO_BITMAP); | ||
| 241 | /* | ||
| 242 | * Careful, clear this in the TSS too: | ||
| 243 | */ | ||
| 244 | memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); | ||
| 245 | t->io_bitmap_max = 0; | ||
| 246 | tss->io_bitmap_owner = NULL; | ||
| 247 | tss->io_bitmap_max = 0; | ||
| 248 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | ||
| 249 | put_cpu(); | ||
| 250 | } | ||
| 251 | |||
| 252 | ds_exit_thread(current); | ||
| 253 | } | ||
| 254 | |||
| 255 | void flush_thread(void) | ||
| 256 | { | ||
| 257 | struct task_struct *tsk = current; | ||
| 258 | |||
| 259 | tsk->thread.debugreg0 = 0; | ||
| 260 | tsk->thread.debugreg1 = 0; | ||
| 261 | tsk->thread.debugreg2 = 0; | ||
| 262 | tsk->thread.debugreg3 = 0; | ||
| 263 | tsk->thread.debugreg6 = 0; | ||
| 264 | tsk->thread.debugreg7 = 0; | ||
| 265 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | ||
| 266 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | ||
| 267 | /* | ||
| 268 | * Forget coprocessor state.. | ||
| 269 | */ | ||
| 270 | tsk->fpu_counter = 0; | ||
| 271 | clear_fpu(tsk); | ||
| 272 | clear_used_math(); | ||
| 273 | } | ||
| 274 | |||
| 275 | void release_thread(struct task_struct *dead_task) | 233 | void release_thread(struct task_struct *dead_task) |
| 276 | { | 234 | { |
| 277 | BUG_ON(dead_task->mm); | 235 | BUG_ON(dead_task->mm); |
| @@ -305,7 +263,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
| 305 | 263 | ||
| 306 | p->thread.ip = (unsigned long) ret_from_fork; | 264 | p->thread.ip = (unsigned long) ret_from_fork; |
| 307 | 265 | ||
| 308 | savesegment(gs, p->thread.gs); | 266 | task_user_gs(p) = get_user_gs(regs); |
| 309 | 267 | ||
| 310 | tsk = current; | 268 | tsk = current; |
| 311 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 269 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
| @@ -343,7 +301,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
| 343 | void | 301 | void |
| 344 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | 302 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) |
| 345 | { | 303 | { |
| 346 | __asm__("movl %0, %%gs" : : "r"(0)); | 304 | set_user_gs(regs, 0); |
| 347 | regs->fs = 0; | 305 | regs->fs = 0; |
| 348 | set_fs(USER_DS); | 306 | set_fs(USER_DS); |
| 349 | regs->ds = __USER_DS; | 307 | regs->ds = __USER_DS; |
| @@ -359,127 +317,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
| 359 | } | 317 | } |
| 360 | EXPORT_SYMBOL_GPL(start_thread); | 318 | EXPORT_SYMBOL_GPL(start_thread); |
| 361 | 319 | ||
| 362 | static void hard_disable_TSC(void) | ||
| 363 | { | ||
| 364 | write_cr4(read_cr4() | X86_CR4_TSD); | ||
| 365 | } | ||
| 366 | |||
| 367 | void disable_TSC(void) | ||
| 368 | { | ||
| 369 | preempt_disable(); | ||
| 370 | if (!test_and_set_thread_flag(TIF_NOTSC)) | ||
| 371 | /* | ||
| 372 | * Must flip the CPU state synchronously with | ||
| 373 | * TIF_NOTSC in the current running context. | ||
| 374 | */ | ||
| 375 | hard_disable_TSC(); | ||
| 376 | preempt_enable(); | ||
| 377 | } | ||
| 378 | |||
| 379 | static void hard_enable_TSC(void) | ||
| 380 | { | ||
| 381 | write_cr4(read_cr4() & ~X86_CR4_TSD); | ||
| 382 | } | ||
| 383 | |||
| 384 | static void enable_TSC(void) | ||
| 385 | { | ||
| 386 | preempt_disable(); | ||
| 387 | if (test_and_clear_thread_flag(TIF_NOTSC)) | ||
| 388 | /* | ||
| 389 | * Must flip the CPU state synchronously with | ||
| 390 | * TIF_NOTSC in the current running context. | ||
| 391 | */ | ||
| 392 | hard_enable_TSC(); | ||
| 393 | preempt_enable(); | ||
| 394 | } | ||
| 395 | |||
| 396 | int get_tsc_mode(unsigned long adr) | ||
| 397 | { | ||
| 398 | unsigned int val; | ||
| 399 | |||
| 400 | if (test_thread_flag(TIF_NOTSC)) | ||
| 401 | val = PR_TSC_SIGSEGV; | ||
| 402 | else | ||
| 403 | val = PR_TSC_ENABLE; | ||
| 404 | |||
| 405 | return put_user(val, (unsigned int __user *)adr); | ||
| 406 | } | ||
| 407 | |||
| 408 | int set_tsc_mode(unsigned int val) | ||
| 409 | { | ||
| 410 | if (val == PR_TSC_SIGSEGV) | ||
| 411 | disable_TSC(); | ||
| 412 | else if (val == PR_TSC_ENABLE) | ||
| 413 | enable_TSC(); | ||
| 414 | else | ||
| 415 | return -EINVAL; | ||
| 416 | |||
| 417 | return 0; | ||
| 418 | } | ||
| 419 | |||
| 420 | static noinline void | ||
| 421 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | ||
| 422 | struct tss_struct *tss) | ||
| 423 | { | ||
| 424 | struct thread_struct *prev, *next; | ||
| 425 | |||
| 426 | prev = &prev_p->thread; | ||
| 427 | next = &next_p->thread; | ||
| 428 | |||
| 429 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || | ||
| 430 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | ||
| 431 | ds_switch_to(prev_p, next_p); | ||
| 432 | else if (next->debugctlmsr != prev->debugctlmsr) | ||
| 433 | update_debugctlmsr(next->debugctlmsr); | ||
| 434 | |||
| 435 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
| 436 | set_debugreg(next->debugreg0, 0); | ||
| 437 | set_debugreg(next->debugreg1, 1); | ||
| 438 | set_debugreg(next->debugreg2, 2); | ||
| 439 | set_debugreg(next->debugreg3, 3); | ||
| 440 | /* no 4 and 5 */ | ||
| 441 | set_debugreg(next->debugreg6, 6); | ||
| 442 | set_debugreg(next->debugreg7, 7); | ||
| 443 | } | ||
| 444 | |||
| 445 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | ||
| 446 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | ||
| 447 | /* prev and next are different */ | ||
| 448 | if (test_tsk_thread_flag(next_p, TIF_NOTSC)) | ||
| 449 | hard_disable_TSC(); | ||
| 450 | else | ||
| 451 | hard_enable_TSC(); | ||
| 452 | } | ||
| 453 | |||
| 454 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | ||
| 455 | /* | ||
| 456 | * Disable the bitmap via an invalid offset. We still cache | ||
| 457 | * the previous bitmap owner and the IO bitmap contents: | ||
| 458 | */ | ||
| 459 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | ||
| 460 | return; | ||
| 461 | } | ||
| 462 | |||
| 463 | if (likely(next == tss->io_bitmap_owner)) { | ||
| 464 | /* | ||
| 465 | * Previous owner of the bitmap (hence the bitmap content) | ||
| 466 | * matches the next task, we dont have to do anything but | ||
| 467 | * to set a valid offset in the TSS: | ||
| 468 | */ | ||
| 469 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
| 470 | return; | ||
| 471 | } | ||
| 472 | /* | ||
| 473 | * Lazy TSS's I/O bitmap copy. We set an invalid offset here | ||
| 474 | * and we let the task to get a GPF in case an I/O instruction | ||
| 475 | * is performed. The handler of the GPF will verify that the | ||
| 476 | * faulting task has a valid I/O bitmap and, it true, does the | ||
| 477 | * real copy and restart the instruction. This will save us | ||
| 478 | * redundant copies when the currently switched task does not | ||
| 479 | * perform any I/O during its timeslice. | ||
| 480 | */ | ||
| 481 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | ||
| 482 | } | ||
| 483 | 320 | ||
| 484 | /* | 321 | /* |
| 485 | * switch_to(x,yn) should switch tasks from x to y. | 322 | * switch_to(x,yn) should switch tasks from x to y. |
| @@ -540,7 +377,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 540 | * used %fs or %gs (it does not today), or if the kernel is | 377 | * used %fs or %gs (it does not today), or if the kernel is |
| 541 | * running inside of a hypervisor layer. | 378 | * running inside of a hypervisor layer. |
| 542 | */ | 379 | */ |
| 543 | savesegment(gs, prev->gs); | 380 | lazy_save_gs(prev->gs); |
| 544 | 381 | ||
| 545 | /* | 382 | /* |
| 546 | * Load the per-thread Thread-Local Storage descriptor. | 383 | * Load the per-thread Thread-Local Storage descriptor. |
| @@ -586,64 +423,44 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 586 | * Restore %gs if needed (which is common) | 423 | * Restore %gs if needed (which is common) |
| 587 | */ | 424 | */ |
| 588 | if (prev->gs | next->gs) | 425 | if (prev->gs | next->gs) |
| 589 | loadsegment(gs, next->gs); | 426 | lazy_load_gs(next->gs); |
| 590 | 427 | ||
| 591 | x86_write_percpu(current_task, next_p); | 428 | percpu_write(current_task, next_p); |
| 592 | 429 | ||
| 593 | return prev_p; | 430 | return prev_p; |
| 594 | } | 431 | } |
| 595 | 432 | ||
| 596 | asmlinkage int sys_fork(struct pt_regs regs) | 433 | int sys_clone(struct pt_regs *regs) |
| 597 | { | ||
| 598 | return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | ||
| 599 | } | ||
| 600 | |||
| 601 | asmlinkage int sys_clone(struct pt_regs regs) | ||
| 602 | { | 434 | { |
| 603 | unsigned long clone_flags; | 435 | unsigned long clone_flags; |
| 604 | unsigned long newsp; | 436 | unsigned long newsp; |
| 605 | int __user *parent_tidptr, *child_tidptr; | 437 | int __user *parent_tidptr, *child_tidptr; |
| 606 | 438 | ||
| 607 | clone_flags = regs.bx; | 439 | clone_flags = regs->bx; |
| 608 | newsp = regs.cx; | 440 | newsp = regs->cx; |
| 609 | parent_tidptr = (int __user *)regs.dx; | 441 | parent_tidptr = (int __user *)regs->dx; |
| 610 | child_tidptr = (int __user *)regs.di; | 442 | child_tidptr = (int __user *)regs->di; |
| 611 | if (!newsp) | 443 | if (!newsp) |
| 612 | newsp = regs.sp; | 444 | newsp = regs->sp; |
| 613 | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); | 445 | return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); |
| 614 | } | ||
| 615 | |||
| 616 | /* | ||
| 617 | * This is trivial, and on the face of it looks like it | ||
| 618 | * could equally well be done in user mode. | ||
| 619 | * | ||
| 620 | * Not so, for quite unobvious reasons - register pressure. | ||
| 621 | * In user mode vfork() cannot have a stack frame, and if | ||
| 622 | * done by calling the "clone()" system call directly, you | ||
| 623 | * do not have enough call-clobbered registers to hold all | ||
| 624 | * the information you need. | ||
| 625 | */ | ||
| 626 | asmlinkage int sys_vfork(struct pt_regs regs) | ||
| 627 | { | ||
| 628 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | ||
| 629 | } | 446 | } |
| 630 | 447 | ||
| 631 | /* | 448 | /* |
| 632 | * sys_execve() executes a new program. | 449 | * sys_execve() executes a new program. |
| 633 | */ | 450 | */ |
| 634 | asmlinkage int sys_execve(struct pt_regs regs) | 451 | int sys_execve(struct pt_regs *regs) |
| 635 | { | 452 | { |
| 636 | int error; | 453 | int error; |
| 637 | char *filename; | 454 | char *filename; |
| 638 | 455 | ||
| 639 | filename = getname((char __user *) regs.bx); | 456 | filename = getname((char __user *) regs->bx); |
| 640 | error = PTR_ERR(filename); | 457 | error = PTR_ERR(filename); |
| 641 | if (IS_ERR(filename)) | 458 | if (IS_ERR(filename)) |
| 642 | goto out; | 459 | goto out; |
| 643 | error = do_execve(filename, | 460 | error = do_execve(filename, |
| 644 | (char __user * __user *) regs.cx, | 461 | (char __user * __user *) regs->cx, |
| 645 | (char __user * __user *) regs.dx, | 462 | (char __user * __user *) regs->dx, |
| 646 | ®s); | 463 | regs); |
| 647 | if (error == 0) { | 464 | if (error == 0) { |
| 648 | /* Make sure we don't return using sysenter.. */ | 465 | /* Make sure we don't return using sysenter.. */ |
| 649 | set_thread_flag(TIF_IRET); | 466 | set_thread_flag(TIF_IRET); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 85b4cb5c1980..abb7e6a7f0c6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <stdarg.h> | 17 | #include <stdarg.h> |
| 18 | 18 | ||
| 19 | #include <linux/stackprotector.h> | ||
| 19 | #include <linux/cpu.h> | 20 | #include <linux/cpu.h> |
| 20 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
| 21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
| @@ -47,7 +48,6 @@ | |||
| 47 | #include <asm/processor.h> | 48 | #include <asm/processor.h> |
| 48 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
| 49 | #include <asm/mmu_context.h> | 50 | #include <asm/mmu_context.h> |
| 50 | #include <asm/pda.h> | ||
| 51 | #include <asm/prctl.h> | 51 | #include <asm/prctl.h> |
| 52 | #include <asm/desc.h> | 52 | #include <asm/desc.h> |
| 53 | #include <asm/proto.h> | 53 | #include <asm/proto.h> |
| @@ -58,6 +58,12 @@ | |||
| 58 | 58 | ||
| 59 | asmlinkage extern void ret_from_fork(void); | 59 | asmlinkage extern void ret_from_fork(void); |
| 60 | 60 | ||
| 61 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
| 62 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
| 63 | |||
| 64 | DEFINE_PER_CPU(unsigned long, old_rsp); | ||
| 65 | static DEFINE_PER_CPU(unsigned char, is_idle); | ||
| 66 | |||
| 61 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; | 67 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; |
| 62 | 68 | ||
| 63 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | 69 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); |
| @@ -76,13 +82,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); | |||
| 76 | 82 | ||
| 77 | void enter_idle(void) | 83 | void enter_idle(void) |
| 78 | { | 84 | { |
| 79 | write_pda(isidle, 1); | 85 | percpu_write(is_idle, 1); |
| 80 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | 86 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); |
| 81 | } | 87 | } |
| 82 | 88 | ||
| 83 | static void __exit_idle(void) | 89 | static void __exit_idle(void) |
| 84 | { | 90 | { |
| 85 | if (test_and_clear_bit_pda(0, isidle) == 0) | 91 | if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) |
| 86 | return; | 92 | return; |
| 87 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); | 93 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); |
| 88 | } | 94 | } |
| @@ -112,6 +118,16 @@ static inline void play_dead(void) | |||
| 112 | void cpu_idle(void) | 118 | void cpu_idle(void) |
| 113 | { | 119 | { |
| 114 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
| 121 | |||
| 122 | /* | ||
| 123 | * If we're the non-boot CPU, nothing set the stack canary up | ||
| 124 | * for us. CPU0 already has it initialized but no harm in | ||
| 125 | * doing it again. This is a good place for updating it, as | ||
| 126 | * we wont ever return from this function (so the invalid | ||
| 127 | * canaries already on the stack wont ever trigger). | ||
| 128 | */ | ||
| 129 | boot_init_stack_canary(); | ||
| 130 | |||
| 115 | /* endless idle loop with no priority at all */ | 131 | /* endless idle loop with no priority at all */ |
| 116 | while (1) { | 132 | while (1) { |
| 117 | tick_nohz_stop_sched_tick(1); | 133 | tick_nohz_stop_sched_tick(1); |
| @@ -221,61 +237,6 @@ void show_regs(struct pt_regs *regs) | |||
| 221 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | 237 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
| 222 | } | 238 | } |
| 223 | 239 | ||
| 224 | /* | ||
| 225 | * Free current thread data structures etc.. | ||
| 226 | */ | ||
| 227 | void exit_thread(void) | ||
| 228 | { | ||
| 229 | struct task_struct *me = current; | ||
| 230 | struct thread_struct *t = &me->thread; | ||
| 231 | |||
| 232 | if (me->thread.io_bitmap_ptr) { | ||
| 233 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); | ||
| 234 | |||
| 235 | kfree(t->io_bitmap_ptr); | ||
| 236 | t->io_bitmap_ptr = NULL; | ||
| 237 | clear_thread_flag(TIF_IO_BITMAP); | ||
| 238 | /* | ||
| 239 | * Careful, clear this in the TSS too: | ||
| 240 | */ | ||
| 241 | memset(tss->io_bitmap, 0xff, t->io_bitmap_max); | ||
| 242 | t->io_bitmap_max = 0; | ||
| 243 | put_cpu(); | ||
| 244 | } | ||
| 245 | |||
| 246 | ds_exit_thread(current); | ||
| 247 | } | ||
| 248 | |||
| 249 | void flush_thread(void) | ||
| 250 | { | ||
| 251 | struct task_struct *tsk = current; | ||
| 252 | |||
| 253 | if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { | ||
| 254 | clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); | ||
| 255 | if (test_tsk_thread_flag(tsk, TIF_IA32)) { | ||
| 256 | clear_tsk_thread_flag(tsk, TIF_IA32); | ||
| 257 | } else { | ||
| 258 | set_tsk_thread_flag(tsk, TIF_IA32); | ||
| 259 | current_thread_info()->status |= TS_COMPAT; | ||
| 260 | } | ||
| 261 | } | ||
| 262 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | ||
| 263 | |||
| 264 | tsk->thread.debugreg0 = 0; | ||
| 265 | tsk->thread.debugreg1 = 0; | ||
| 266 | tsk->thread.debugreg2 = 0; | ||
| 267 | tsk->thread.debugreg3 = 0; | ||
| 268 | tsk->thread.debugreg6 = 0; | ||
| 269 | tsk->thread.debugreg7 = 0; | ||
| 270 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | ||
| 271 | /* | ||
| 272 | * Forget coprocessor state.. | ||
| 273 | */ | ||
| 274 | tsk->fpu_counter = 0; | ||
| 275 | clear_fpu(tsk); | ||
| 276 | clear_used_math(); | ||
| 277 | } | ||
| 278 | |||
| 279 | void release_thread(struct task_struct *dead_task) | 240 | void release_thread(struct task_struct *dead_task) |
| 280 | { | 241 | { |
| 281 | if (dead_task->mm) { | 242 | if (dead_task->mm) { |
| @@ -397,7 +358,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
| 397 | load_gs_index(0); | 358 | load_gs_index(0); |
| 398 | regs->ip = new_ip; | 359 | regs->ip = new_ip; |
| 399 | regs->sp = new_sp; | 360 | regs->sp = new_sp; |
| 400 | write_pda(oldrsp, new_sp); | 361 | percpu_write(old_rsp, new_sp); |
| 401 | regs->cs = __USER_CS; | 362 | regs->cs = __USER_CS; |
| 402 | regs->ss = __USER_DS; | 363 | regs->ss = __USER_DS; |
| 403 | regs->flags = 0x200; | 364 | regs->flags = 0x200; |
| @@ -409,118 +370,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
| 409 | } | 370 | } |
| 410 | EXPORT_SYMBOL_GPL(start_thread); | 371 | EXPORT_SYMBOL_GPL(start_thread); |
| 411 | 372 | ||
| 412 | static void hard_disable_TSC(void) | ||
| 413 | { | ||
| 414 | write_cr4(read_cr4() | X86_CR4_TSD); | ||
| 415 | } | ||
| 416 | |||
| 417 | void disable_TSC(void) | ||
| 418 | { | ||
| 419 | preempt_disable(); | ||
| 420 | if (!test_and_set_thread_flag(TIF_NOTSC)) | ||
| 421 | /* | ||
| 422 | * Must flip the CPU state synchronously with | ||
| 423 | * TIF_NOTSC in the current running context. | ||
| 424 | */ | ||
| 425 | hard_disable_TSC(); | ||
| 426 | preempt_enable(); | ||
| 427 | } | ||
| 428 | |||
| 429 | static void hard_enable_TSC(void) | ||
| 430 | { | ||
| 431 | write_cr4(read_cr4() & ~X86_CR4_TSD); | ||
| 432 | } | ||
| 433 | |||
| 434 | static void enable_TSC(void) | ||
| 435 | { | ||
| 436 | preempt_disable(); | ||
| 437 | if (test_and_clear_thread_flag(TIF_NOTSC)) | ||
| 438 | /* | ||
| 439 | * Must flip the CPU state synchronously with | ||
| 440 | * TIF_NOTSC in the current running context. | ||
| 441 | */ | ||
| 442 | hard_enable_TSC(); | ||
| 443 | preempt_enable(); | ||
| 444 | } | ||
| 445 | |||
| 446 | int get_tsc_mode(unsigned long adr) | ||
| 447 | { | ||
| 448 | unsigned int val; | ||
| 449 | |||
| 450 | if (test_thread_flag(TIF_NOTSC)) | ||
| 451 | val = PR_TSC_SIGSEGV; | ||
| 452 | else | ||
| 453 | val = PR_TSC_ENABLE; | ||
| 454 | |||
| 455 | return put_user(val, (unsigned int __user *)adr); | ||
| 456 | } | ||
| 457 | |||
| 458 | int set_tsc_mode(unsigned int val) | ||
| 459 | { | ||
| 460 | if (val == PR_TSC_SIGSEGV) | ||
| 461 | disable_TSC(); | ||
| 462 | else if (val == PR_TSC_ENABLE) | ||
| 463 | enable_TSC(); | ||
| 464 | else | ||
| 465 | return -EINVAL; | ||
| 466 | |||
| 467 | return 0; | ||
| 468 | } | ||
| 469 | |||
| 470 | /* | ||
| 471 | * This special macro can be used to load a debugging register | ||
| 472 | */ | ||
| 473 | #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) | ||
| 474 | |||
| 475 | static inline void __switch_to_xtra(struct task_struct *prev_p, | ||
| 476 | struct task_struct *next_p, | ||
| 477 | struct tss_struct *tss) | ||
| 478 | { | ||
| 479 | struct thread_struct *prev, *next; | ||
| 480 | |||
| 481 | prev = &prev_p->thread, | ||
| 482 | next = &next_p->thread; | ||
| 483 | |||
| 484 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || | ||
| 485 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | ||
| 486 | ds_switch_to(prev_p, next_p); | ||
| 487 | else if (next->debugctlmsr != prev->debugctlmsr) | ||
| 488 | update_debugctlmsr(next->debugctlmsr); | ||
| 489 | |||
| 490 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
| 491 | loaddebug(next, 0); | ||
| 492 | loaddebug(next, 1); | ||
| 493 | loaddebug(next, 2); | ||
| 494 | loaddebug(next, 3); | ||
| 495 | /* no 4 and 5 */ | ||
| 496 | loaddebug(next, 6); | ||
| 497 | loaddebug(next, 7); | ||
| 498 | } | ||
| 499 | |||
| 500 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | ||
| 501 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | ||
| 502 | /* prev and next are different */ | ||
| 503 | if (test_tsk_thread_flag(next_p, TIF_NOTSC)) | ||
| 504 | hard_disable_TSC(); | ||
| 505 | else | ||
| 506 | hard_enable_TSC(); | ||
| 507 | } | ||
| 508 | |||
| 509 | if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | ||
| 510 | /* | ||
| 511 | * Copy the relevant range of the IO bitmap. | ||
| 512 | * Normally this is 128 bytes or less: | ||
| 513 | */ | ||
| 514 | memcpy(tss->io_bitmap, next->io_bitmap_ptr, | ||
| 515 | max(prev->io_bitmap_max, next->io_bitmap_max)); | ||
| 516 | } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { | ||
| 517 | /* | ||
| 518 | * Clear any possible leftover bits: | ||
| 519 | */ | ||
| 520 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 524 | /* | 373 | /* |
| 525 | * switch_to(x,y) should switch tasks from x to y. | 374 | * switch_to(x,y) should switch tasks from x to y. |
| 526 | * | 375 | * |
| @@ -618,21 +467,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 618 | /* | 467 | /* |
| 619 | * Switch the PDA and FPU contexts. | 468 | * Switch the PDA and FPU contexts. |
| 620 | */ | 469 | */ |
| 621 | prev->usersp = read_pda(oldrsp); | 470 | prev->usersp = percpu_read(old_rsp); |
| 622 | write_pda(oldrsp, next->usersp); | 471 | percpu_write(old_rsp, next->usersp); |
| 623 | write_pda(pcurrent, next_p); | 472 | percpu_write(current_task, next_p); |
| 624 | 473 | ||
| 625 | write_pda(kernelstack, | 474 | percpu_write(kernel_stack, |
| 626 | (unsigned long)task_stack_page(next_p) + | 475 | (unsigned long)task_stack_page(next_p) + |
| 627 | THREAD_SIZE - PDA_STACKOFFSET); | 476 | THREAD_SIZE - KERNEL_STACK_OFFSET); |
| 628 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
| 629 | write_pda(stack_canary, next_p->stack_canary); | ||
| 630 | /* | ||
| 631 | * Build time only check to make sure the stack_canary is at | ||
| 632 | * offset 40 in the pda; this is a gcc ABI requirement | ||
| 633 | */ | ||
| 634 | BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); | ||
| 635 | #endif | ||
| 636 | 477 | ||
| 637 | /* | 478 | /* |
| 638 | * Now maybe reload the debug registers and handle I/O bitmaps | 479 | * Now maybe reload the debug registers and handle I/O bitmaps |
| @@ -686,11 +527,6 @@ void set_personality_64bit(void) | |||
| 686 | current->personality &= ~READ_IMPLIES_EXEC; | 527 | current->personality &= ~READ_IMPLIES_EXEC; |
| 687 | } | 528 | } |
| 688 | 529 | ||
| 689 | asmlinkage long sys_fork(struct pt_regs *regs) | ||
| 690 | { | ||
| 691 | return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); | ||
| 692 | } | ||
| 693 | |||
| 694 | asmlinkage long | 530 | asmlinkage long |
| 695 | sys_clone(unsigned long clone_flags, unsigned long newsp, | 531 | sys_clone(unsigned long clone_flags, unsigned long newsp, |
| 696 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | 532 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) |
| @@ -700,22 +536,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, | |||
| 700 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | 536 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); |
| 701 | } | 537 | } |
| 702 | 538 | ||
| 703 | /* | ||
| 704 | * This is trivial, and on the face of it looks like it | ||
| 705 | * could equally well be done in user mode. | ||
| 706 | * | ||
| 707 | * Not so, for quite unobvious reasons - register pressure. | ||
| 708 | * In user mode vfork() cannot have a stack frame, and if | ||
| 709 | * done by calling the "clone()" system call directly, you | ||
| 710 | * do not have enough call-clobbered registers to hold all | ||
| 711 | * the information you need. | ||
| 712 | */ | ||
| 713 | asmlinkage long sys_vfork(struct pt_regs *regs) | ||
| 714 | { | ||
| 715 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, | ||
| 716 | NULL, NULL); | ||
| 717 | } | ||
| 718 | |||
| 719 | unsigned long get_wchan(struct task_struct *p) | 539 | unsigned long get_wchan(struct task_struct *p) |
| 720 | { | 540 | { |
| 721 | unsigned long stack; | 541 | unsigned long stack; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 06ca07f6ad86..3d9672e59c16 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) | |||
| 75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) | 75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) |
| 76 | { | 76 | { |
| 77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); | 77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); |
| 78 | regno >>= 2; | 78 | return ®s->bx + (regno >> 2); |
| 79 | if (regno > FS) | ||
| 80 | --regno; | ||
| 81 | return ®s->bx + regno; | ||
| 82 | } | 79 | } |
| 83 | 80 | ||
| 84 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | 81 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) |
| @@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | |||
| 90 | if (offset != offsetof(struct user_regs_struct, gs)) | 87 | if (offset != offsetof(struct user_regs_struct, gs)) |
| 91 | retval = *pt_regs_access(task_pt_regs(task), offset); | 88 | retval = *pt_regs_access(task_pt_regs(task), offset); |
| 92 | else { | 89 | else { |
| 93 | retval = task->thread.gs; | ||
| 94 | if (task == current) | 90 | if (task == current) |
| 95 | savesegment(gs, retval); | 91 | retval = get_user_gs(task_pt_regs(task)); |
| 92 | else | ||
| 93 | retval = task_user_gs(task); | ||
| 96 | } | 94 | } |
| 97 | return retval; | 95 | return retval; |
| 98 | } | 96 | } |
| @@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task, | |||
| 126 | break; | 124 | break; |
| 127 | 125 | ||
| 128 | case offsetof(struct user_regs_struct, gs): | 126 | case offsetof(struct user_regs_struct, gs): |
| 129 | task->thread.gs = value; | ||
| 130 | if (task == current) | 127 | if (task == current) |
| 131 | /* | 128 | set_user_gs(task_pt_regs(task), value); |
| 132 | * The user-mode %gs is not affected by | 129 | else |
| 133 | * kernel entry, so we must update the CPU. | 130 | task_user_gs(task) = value; |
| 134 | */ | ||
| 135 | loadsegment(gs, value); | ||
| 136 | } | 131 | } |
| 137 | 132 | ||
| 138 | return 0; | 133 | return 0; |
| @@ -273,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task) | |||
| 273 | if (test_tsk_thread_flag(task, TIF_IA32)) | 268 | if (test_tsk_thread_flag(task, TIF_IA32)) |
| 274 | return IA32_PAGE_OFFSET - 3; | 269 | return IA32_PAGE_OFFSET - 3; |
| 275 | #endif | 270 | #endif |
| 276 | return TASK_SIZE64 - 7; | 271 | return TASK_SIZE_MAX - 7; |
| 277 | } | 272 | } |
| 278 | 273 | ||
| 279 | #endif /* CONFIG_X86_32 */ | 274 | #endif /* CONFIG_X86_32 */ |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4526b3a75ed2..2aef36d8aca2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <asm/reboot.h> | 14 | #include <asm/reboot.h> |
| 15 | #include <asm/pci_x86.h> | 15 | #include <asm/pci_x86.h> |
| 16 | #include <asm/virtext.h> | 16 | #include <asm/virtext.h> |
| 17 | #include <asm/cpu.h> | ||
| 17 | 18 | ||
| 18 | #ifdef CONFIG_X86_32 | 19 | #ifdef CONFIG_X86_32 |
| 19 | # include <linux/dmi.h> | 20 | # include <linux/dmi.h> |
| @@ -23,8 +24,6 @@ | |||
| 23 | # include <asm/iommu.h> | 24 | # include <asm/iommu.h> |
| 24 | #endif | 25 | #endif |
| 25 | 26 | ||
| 26 | #include <mach_ipi.h> | ||
| 27 | |||
| 28 | /* | 27 | /* |
| 29 | * Power off function, if any | 28 | * Power off function, if any |
| 30 | */ | 29 | */ |
| @@ -658,7 +657,7 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
| 658 | 657 | ||
| 659 | static void smp_send_nmi_allbutself(void) | 658 | static void smp_send_nmi_allbutself(void) |
| 660 | { | 659 | { |
| 661 | send_IPI_allbutself(NMI_VECTOR); | 660 | apic->send_IPI_allbutself(NMI_VECTOR); |
| 662 | } | 661 | } |
| 663 | 662 | ||
| 664 | static struct notifier_block crash_nmi_nb = { | 663 | static struct notifier_block crash_nmi_nb = { |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index a160f3119725..41235531b11c 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
| 10 | #include <asm/page.h> | 10 | #include <asm/page_types.h> |
| 11 | #include <asm/kexec.h> | 11 | #include <asm/kexec.h> |
| 12 | #include <asm/processor-flags.h> | 12 | #include <asm/processor-flags.h> |
| 13 | 13 | ||
| @@ -17,7 +17,8 @@ | |||
| 17 | 17 | ||
| 18 | #define PTR(x) (x << 2) | 18 | #define PTR(x) (x << 2) |
| 19 | 19 | ||
| 20 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE | 20 | /* |
| 21 | * control_page + KEXEC_CONTROL_CODE_MAX_SIZE | ||
| 21 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | 22 | * ~ control_page + PAGE_SIZE are used as data storage and stack for |
| 22 | * jumping back | 23 | * jumping back |
| 23 | */ | 24 | */ |
| @@ -76,8 +77,10 @@ relocate_kernel: | |||
| 76 | movl %eax, CP_PA_SWAP_PAGE(%edi) | 77 | movl %eax, CP_PA_SWAP_PAGE(%edi) |
| 77 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) | 78 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) |
| 78 | 79 | ||
| 79 | /* get physical address of control page now */ | 80 | /* |
| 80 | /* this is impossible after page table switch */ | 81 | * get physical address of control page now |
| 82 | * this is impossible after page table switch | ||
| 83 | */ | ||
| 81 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | 84 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi |
| 82 | 85 | ||
| 83 | /* switch to new set of page tables */ | 86 | /* switch to new set of page tables */ |
| @@ -97,7 +100,8 @@ identity_mapped: | |||
| 97 | /* store the start address on the stack */ | 100 | /* store the start address on the stack */ |
| 98 | pushl %edx | 101 | pushl %edx |
| 99 | 102 | ||
| 100 | /* Set cr0 to a known state: | 103 | /* |
| 104 | * Set cr0 to a known state: | ||
| 101 | * - Paging disabled | 105 | * - Paging disabled |
| 102 | * - Alignment check disabled | 106 | * - Alignment check disabled |
| 103 | * - Write protect disabled | 107 | * - Write protect disabled |
| @@ -113,7 +117,8 @@ identity_mapped: | |||
| 113 | /* clear cr4 if applicable */ | 117 | /* clear cr4 if applicable */ |
| 114 | testl %ecx, %ecx | 118 | testl %ecx, %ecx |
| 115 | jz 1f | 119 | jz 1f |
| 116 | /* Set cr4 to a known state: | 120 | /* |
| 121 | * Set cr4 to a known state: | ||
| 117 | * Setting everything to zero seems safe. | 122 | * Setting everything to zero seems safe. |
| 118 | */ | 123 | */ |
| 119 | xorl %eax, %eax | 124 | xorl %eax, %eax |
| @@ -132,15 +137,18 @@ identity_mapped: | |||
| 132 | call swap_pages | 137 | call swap_pages |
| 133 | addl $8, %esp | 138 | addl $8, %esp |
| 134 | 139 | ||
| 135 | /* To be certain of avoiding problems with self-modifying code | 140 | /* |
| 141 | * To be certain of avoiding problems with self-modifying code | ||
| 136 | * I need to execute a serializing instruction here. | 142 | * I need to execute a serializing instruction here. |
| 137 | * So I flush the TLB, it's handy, and not processor dependent. | 143 | * So I flush the TLB, it's handy, and not processor dependent. |
| 138 | */ | 144 | */ |
| 139 | xorl %eax, %eax | 145 | xorl %eax, %eax |
| 140 | movl %eax, %cr3 | 146 | movl %eax, %cr3 |
| 141 | 147 | ||
| 142 | /* set all of the registers to known values */ | 148 | /* |
| 143 | /* leave %esp alone */ | 149 | * set all of the registers to known values |
| 150 | * leave %esp alone | ||
| 151 | */ | ||
| 144 | 152 | ||
| 145 | testl %esi, %esi | 153 | testl %esi, %esi |
| 146 | jnz 1f | 154 | jnz 1f |
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index f5afe665a82b..4de8f5b3d476 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S | |||
| @@ -7,10 +7,10 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
| 10 | #include <asm/page.h> | 10 | #include <asm/page_types.h> |
| 11 | #include <asm/kexec.h> | 11 | #include <asm/kexec.h> |
| 12 | #include <asm/processor-flags.h> | 12 | #include <asm/processor-flags.h> |
| 13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable_types.h> |
| 14 | 14 | ||
| 15 | /* | 15 | /* |
| 16 | * Must be relocatable PIC code callable as a C function | 16 | * Must be relocatable PIC code callable as a C function |
| @@ -19,145 +19,76 @@ | |||
| 19 | #define PTR(x) (x << 3) | 19 | #define PTR(x) (x << 3) |
| 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
| 21 | 21 | ||
| 22 | /* | ||
| 23 | * control_page + KEXEC_CONTROL_CODE_MAX_SIZE | ||
| 24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | ||
| 25 | * jumping back | ||
| 26 | */ | ||
| 27 | #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) | ||
| 28 | |||
| 29 | /* Minimal CPU state */ | ||
| 30 | #define RSP DATA(0x0) | ||
| 31 | #define CR0 DATA(0x8) | ||
| 32 | #define CR3 DATA(0x10) | ||
| 33 | #define CR4 DATA(0x18) | ||
| 34 | |||
| 35 | /* other data */ | ||
| 36 | #define CP_PA_TABLE_PAGE DATA(0x20) | ||
| 37 | #define CP_PA_SWAP_PAGE DATA(0x28) | ||
| 38 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x30) | ||
| 39 | |||
| 22 | .text | 40 | .text |
| 23 | .align PAGE_SIZE | 41 | .align PAGE_SIZE |
| 24 | .code64 | 42 | .code64 |
| 25 | .globl relocate_kernel | 43 | .globl relocate_kernel |
| 26 | relocate_kernel: | 44 | relocate_kernel: |
| 27 | /* %rdi indirection_page | 45 | /* |
| 46 | * %rdi indirection_page | ||
| 28 | * %rsi page_list | 47 | * %rsi page_list |
| 29 | * %rdx start address | 48 | * %rdx start address |
| 49 | * %rcx preserve_context | ||
| 30 | */ | 50 | */ |
| 31 | 51 | ||
| 32 | /* map the control page at its virtual address */ | 52 | /* Save the CPU context, used for jumping back */ |
| 33 | 53 | pushq %rbx | |
| 34 | movq $0x0000ff8000000000, %r10 /* mask */ | 54 | pushq %rbp |
| 35 | mov $(39 - 3), %cl /* bits to shift */ | 55 | pushq %r12 |
| 36 | movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ | 56 | pushq %r13 |
| 37 | 57 | pushq %r14 | |
| 38 | movq %r11, %r9 | 58 | pushq %r15 |
| 39 | andq %r10, %r9 | 59 | pushf |
| 40 | shrq %cl, %r9 | 60 | |
| 41 | 61 | movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 | |
| 42 | movq PTR(VA_PGD)(%rsi), %r8 | 62 | movq %rsp, RSP(%r11) |
| 43 | addq %r8, %r9 | 63 | movq %cr0, %rax |
| 44 | movq PTR(PA_PUD_0)(%rsi), %r8 | 64 | movq %rax, CR0(%r11) |
| 45 | orq $PAGE_ATTR, %r8 | 65 | movq %cr3, %rax |
| 46 | movq %r8, (%r9) | 66 | movq %rax, CR3(%r11) |
| 47 | 67 | movq %cr4, %rax | |
| 48 | shrq $9, %r10 | 68 | movq %rax, CR4(%r11) |
| 49 | sub $9, %cl | ||
| 50 | |||
| 51 | movq %r11, %r9 | ||
| 52 | andq %r10, %r9 | ||
| 53 | shrq %cl, %r9 | ||
| 54 | |||
| 55 | movq PTR(VA_PUD_0)(%rsi), %r8 | ||
| 56 | addq %r8, %r9 | ||
| 57 | movq PTR(PA_PMD_0)(%rsi), %r8 | ||
| 58 | orq $PAGE_ATTR, %r8 | ||
| 59 | movq %r8, (%r9) | ||
| 60 | |||
| 61 | shrq $9, %r10 | ||
| 62 | sub $9, %cl | ||
| 63 | |||
| 64 | movq %r11, %r9 | ||
| 65 | andq %r10, %r9 | ||
| 66 | shrq %cl, %r9 | ||
| 67 | |||
| 68 | movq PTR(VA_PMD_0)(%rsi), %r8 | ||
| 69 | addq %r8, %r9 | ||
| 70 | movq PTR(PA_PTE_0)(%rsi), %r8 | ||
| 71 | orq $PAGE_ATTR, %r8 | ||
| 72 | movq %r8, (%r9) | ||
| 73 | |||
| 74 | shrq $9, %r10 | ||
| 75 | sub $9, %cl | ||
| 76 | |||
| 77 | movq %r11, %r9 | ||
| 78 | andq %r10, %r9 | ||
| 79 | shrq %cl, %r9 | ||
| 80 | |||
| 81 | movq PTR(VA_PTE_0)(%rsi), %r8 | ||
| 82 | addq %r8, %r9 | ||
| 83 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
| 84 | orq $PAGE_ATTR, %r8 | ||
| 85 | movq %r8, (%r9) | ||
| 86 | |||
| 87 | /* identity map the control page at its physical address */ | ||
| 88 | |||
| 89 | movq $0x0000ff8000000000, %r10 /* mask */ | ||
| 90 | mov $(39 - 3), %cl /* bits to shift */ | ||
| 91 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ | ||
| 92 | |||
| 93 | movq %r11, %r9 | ||
| 94 | andq %r10, %r9 | ||
| 95 | shrq %cl, %r9 | ||
| 96 | |||
| 97 | movq PTR(VA_PGD)(%rsi), %r8 | ||
| 98 | addq %r8, %r9 | ||
| 99 | movq PTR(PA_PUD_1)(%rsi), %r8 | ||
| 100 | orq $PAGE_ATTR, %r8 | ||
| 101 | movq %r8, (%r9) | ||
| 102 | |||
| 103 | shrq $9, %r10 | ||
| 104 | sub $9, %cl | ||
| 105 | |||
| 106 | movq %r11, %r9 | ||
| 107 | andq %r10, %r9 | ||
| 108 | shrq %cl, %r9 | ||
| 109 | |||
| 110 | movq PTR(VA_PUD_1)(%rsi), %r8 | ||
| 111 | addq %r8, %r9 | ||
| 112 | movq PTR(PA_PMD_1)(%rsi), %r8 | ||
| 113 | orq $PAGE_ATTR, %r8 | ||
| 114 | movq %r8, (%r9) | ||
| 115 | |||
| 116 | shrq $9, %r10 | ||
| 117 | sub $9, %cl | ||
| 118 | |||
| 119 | movq %r11, %r9 | ||
| 120 | andq %r10, %r9 | ||
| 121 | shrq %cl, %r9 | ||
| 122 | |||
| 123 | movq PTR(VA_PMD_1)(%rsi), %r8 | ||
| 124 | addq %r8, %r9 | ||
| 125 | movq PTR(PA_PTE_1)(%rsi), %r8 | ||
| 126 | orq $PAGE_ATTR, %r8 | ||
| 127 | movq %r8, (%r9) | ||
| 128 | |||
| 129 | shrq $9, %r10 | ||
| 130 | sub $9, %cl | ||
| 131 | |||
| 132 | movq %r11, %r9 | ||
| 133 | andq %r10, %r9 | ||
| 134 | shrq %cl, %r9 | ||
| 135 | |||
| 136 | movq PTR(VA_PTE_1)(%rsi), %r8 | ||
| 137 | addq %r8, %r9 | ||
| 138 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | ||
| 139 | orq $PAGE_ATTR, %r8 | ||
| 140 | movq %r8, (%r9) | ||
| 141 | |||
| 142 | relocate_new_kernel: | ||
| 143 | /* %rdi indirection_page | ||
| 144 | * %rsi page_list | ||
| 145 | * %rdx start address | ||
| 146 | */ | ||
| 147 | 69 | ||
| 148 | /* zero out flags, and disable interrupts */ | 70 | /* zero out flags, and disable interrupts */ |
| 149 | pushq $0 | 71 | pushq $0 |
| 150 | popfq | 72 | popfq |
| 151 | 73 | ||
| 152 | /* get physical address of control page now */ | 74 | /* |
| 153 | /* this is impossible after page table switch */ | 75 | * get physical address of control page now |
| 76 | * this is impossible after page table switch | ||
| 77 | */ | ||
| 154 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 | 78 | movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 |
| 155 | 79 | ||
| 156 | /* get physical address of page table now too */ | 80 | /* get physical address of page table now too */ |
| 157 | movq PTR(PA_TABLE_PAGE)(%rsi), %rcx | 81 | movq PTR(PA_TABLE_PAGE)(%rsi), %r9 |
| 82 | |||
| 83 | /* get physical address of swap page now */ | ||
| 84 | movq PTR(PA_SWAP_PAGE)(%rsi), %r10 | ||
| 158 | 85 | ||
| 159 | /* switch to new set of page tables */ | 86 | /* save some information for jumping back */ |
| 160 | movq PTR(PA_PGD)(%rsi), %r9 | 87 | movq %r9, CP_PA_TABLE_PAGE(%r11) |
| 88 | movq %r10, CP_PA_SWAP_PAGE(%r11) | ||
| 89 | movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) | ||
| 90 | |||
| 91 | /* Switch to the identity mapped page tables */ | ||
| 161 | movq %r9, %cr3 | 92 | movq %r9, %cr3 |
| 162 | 93 | ||
| 163 | /* setup a new stack at the end of the physical control page */ | 94 | /* setup a new stack at the end of the physical control page */ |
| @@ -172,7 +103,8 @@ identity_mapped: | |||
| 172 | /* store the start address on the stack */ | 103 | /* store the start address on the stack */ |
| 173 | pushq %rdx | 104 | pushq %rdx |
| 174 | 105 | ||
| 175 | /* Set cr0 to a known state: | 106 | /* |
| 107 | * Set cr0 to a known state: | ||
| 176 | * - Paging enabled | 108 | * - Paging enabled |
| 177 | * - Alignment check disabled | 109 | * - Alignment check disabled |
| 178 | * - Write protect disabled | 110 | * - Write protect disabled |
| @@ -185,7 +117,8 @@ identity_mapped: | |||
| 185 | orl $(X86_CR0_PG | X86_CR0_PE), %eax | 117 | orl $(X86_CR0_PG | X86_CR0_PE), %eax |
| 186 | movq %rax, %cr0 | 118 | movq %rax, %cr0 |
| 187 | 119 | ||
| 188 | /* Set cr4 to a known state: | 120 | /* |
| 121 | * Set cr4 to a known state: | ||
| 189 | * - physical address extension enabled | 122 | * - physical address extension enabled |
| 190 | */ | 123 | */ |
| 191 | movq $X86_CR4_PAE, %rax | 124 | movq $X86_CR4_PAE, %rax |
| @@ -194,12 +127,88 @@ identity_mapped: | |||
| 194 | jmp 1f | 127 | jmp 1f |
| 195 | 1: | 128 | 1: |
| 196 | 129 | ||
| 197 | /* Switch to the identity mapped page tables, | 130 | /* Flush the TLB (needed?) */ |
| 198 | * and flush the TLB. | 131 | movq %r9, %cr3 |
| 199 | */ | 132 | |
| 200 | movq %rcx, %cr3 | 133 | movq %rcx, %r11 |
| 134 | call swap_pages | ||
| 135 | |||
| 136 | /* | ||
| 137 | * To be certain of avoiding problems with self-modifying code | ||
| 138 | * I need to execute a serializing instruction here. | ||
| 139 | * So I flush the TLB by reloading %cr3 here, it's handy, | ||
| 140 | * and not processor dependent. | ||
| 141 | */ | ||
| 142 | movq %cr3, %rax | ||
| 143 | movq %rax, %cr3 | ||
| 144 | |||
| 145 | /* | ||
| 146 | * set all of the registers to known values | ||
| 147 | * leave %rsp alone | ||
| 148 | */ | ||
| 149 | |||
| 150 | testq %r11, %r11 | ||
| 151 | jnz 1f | ||
| 152 | xorq %rax, %rax | ||
| 153 | xorq %rbx, %rbx | ||
| 154 | xorq %rcx, %rcx | ||
| 155 | xorq %rdx, %rdx | ||
| 156 | xorq %rsi, %rsi | ||
| 157 | xorq %rdi, %rdi | ||
| 158 | xorq %rbp, %rbp | ||
| 159 | xorq %r8, %r8 | ||
| 160 | xorq %r9, %r9 | ||
| 161 | xorq %r10, %r9 | ||
| 162 | xorq %r11, %r11 | ||
| 163 | xorq %r12, %r12 | ||
| 164 | xorq %r13, %r13 | ||
| 165 | xorq %r14, %r14 | ||
| 166 | xorq %r15, %r15 | ||
| 167 | |||
| 168 | ret | ||
| 169 | |||
| 170 | 1: | ||
| 171 | popq %rdx | ||
| 172 | leaq PAGE_SIZE(%r10), %rsp | ||
| 173 | call *%rdx | ||
| 174 | |||
| 175 | /* get the re-entry point of the peer system */ | ||
| 176 | movq 0(%rsp), %rbp | ||
| 177 | call 1f | ||
| 178 | 1: | ||
| 179 | popq %r8 | ||
| 180 | subq $(1b - relocate_kernel), %r8 | ||
| 181 | movq CP_PA_SWAP_PAGE(%r8), %r10 | ||
| 182 | movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi | ||
| 183 | movq CP_PA_TABLE_PAGE(%r8), %rax | ||
| 184 | movq %rax, %cr3 | ||
| 185 | lea PAGE_SIZE(%r8), %rsp | ||
| 186 | call swap_pages | ||
| 187 | movq $virtual_mapped, %rax | ||
| 188 | pushq %rax | ||
| 189 | ret | ||
| 190 | |||
| 191 | virtual_mapped: | ||
| 192 | movq RSP(%r8), %rsp | ||
| 193 | movq CR4(%r8), %rax | ||
| 194 | movq %rax, %cr4 | ||
| 195 | movq CR3(%r8), %rax | ||
| 196 | movq CR0(%r8), %r8 | ||
| 197 | movq %rax, %cr3 | ||
| 198 | movq %r8, %cr0 | ||
| 199 | movq %rbp, %rax | ||
| 200 | |||
| 201 | popf | ||
| 202 | popq %r15 | ||
| 203 | popq %r14 | ||
| 204 | popq %r13 | ||
| 205 | popq %r12 | ||
| 206 | popq %rbp | ||
| 207 | popq %rbx | ||
| 208 | ret | ||
| 201 | 209 | ||
| 202 | /* Do the copies */ | 210 | /* Do the copies */ |
| 211 | swap_pages: | ||
| 203 | movq %rdi, %rcx /* Put the page_list in %rcx */ | 212 | movq %rdi, %rcx /* Put the page_list in %rcx */ |
| 204 | xorq %rdi, %rdi | 213 | xorq %rdi, %rdi |
| 205 | xorq %rsi, %rsi | 214 | xorq %rsi, %rsi |
| @@ -231,36 +240,27 @@ identity_mapped: | |||
| 231 | movq %rcx, %rsi /* For ever source page do a copy */ | 240 | movq %rcx, %rsi /* For ever source page do a copy */ |
| 232 | andq $0xfffffffffffff000, %rsi | 241 | andq $0xfffffffffffff000, %rsi |
| 233 | 242 | ||
| 243 | movq %rdi, %rdx | ||
| 244 | movq %rsi, %rax | ||
| 245 | |||
| 246 | movq %r10, %rdi | ||
| 234 | movq $512, %rcx | 247 | movq $512, %rcx |
| 235 | rep ; movsq | 248 | rep ; movsq |
| 236 | jmp 0b | ||
| 237 | 3: | ||
| 238 | |||
| 239 | /* To be certain of avoiding problems with self-modifying code | ||
| 240 | * I need to execute a serializing instruction here. | ||
| 241 | * So I flush the TLB by reloading %cr3 here, it's handy, | ||
| 242 | * and not processor dependent. | ||
| 243 | */ | ||
| 244 | movq %cr3, %rax | ||
| 245 | movq %rax, %cr3 | ||
| 246 | 249 | ||
| 247 | /* set all of the registers to known values */ | 250 | movq %rax, %rdi |
| 248 | /* leave %rsp alone */ | 251 | movq %rdx, %rsi |
| 252 | movq $512, %rcx | ||
| 253 | rep ; movsq | ||
| 249 | 254 | ||
| 250 | xorq %rax, %rax | 255 | movq %rdx, %rdi |
| 251 | xorq %rbx, %rbx | 256 | movq %r10, %rsi |
| 252 | xorq %rcx, %rcx | 257 | movq $512, %rcx |
| 253 | xorq %rdx, %rdx | 258 | rep ; movsq |
| 254 | xorq %rsi, %rsi | ||
| 255 | xorq %rdi, %rdi | ||
| 256 | xorq %rbp, %rbp | ||
| 257 | xorq %r8, %r8 | ||
| 258 | xorq %r9, %r9 | ||
| 259 | xorq %r10, %r9 | ||
| 260 | xorq %r11, %r11 | ||
| 261 | xorq %r12, %r12 | ||
| 262 | xorq %r13, %r13 | ||
| 263 | xorq %r14, %r14 | ||
| 264 | xorq %r15, %r15 | ||
| 265 | 259 | ||
| 260 | lea PAGE_SIZE(%rax), %rsi | ||
| 261 | jmp 0b | ||
| 262 | 3: | ||
| 266 | ret | 263 | ret |
| 264 | |||
| 265 | .globl kexec_control_code_size | ||
| 266 | .set kexec_control_code_size, . - relocate_kernel | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6a8811a69324..f28c56e6bf94 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -74,14 +74,15 @@ | |||
| 74 | #include <asm/e820.h> | 74 | #include <asm/e820.h> |
| 75 | #include <asm/mpspec.h> | 75 | #include <asm/mpspec.h> |
| 76 | #include <asm/setup.h> | 76 | #include <asm/setup.h> |
| 77 | #include <asm/arch_hooks.h> | ||
| 78 | #include <asm/efi.h> | 77 | #include <asm/efi.h> |
| 78 | #include <asm/timer.h> | ||
| 79 | #include <asm/i8259.h> | ||
| 79 | #include <asm/sections.h> | 80 | #include <asm/sections.h> |
| 80 | #include <asm/dmi.h> | 81 | #include <asm/dmi.h> |
| 81 | #include <asm/io_apic.h> | 82 | #include <asm/io_apic.h> |
| 82 | #include <asm/ist.h> | 83 | #include <asm/ist.h> |
| 83 | #include <asm/vmi.h> | 84 | #include <asm/vmi.h> |
| 84 | #include <setup_arch.h> | 85 | #include <asm/setup_arch.h> |
| 85 | #include <asm/bios_ebda.h> | 86 | #include <asm/bios_ebda.h> |
| 86 | #include <asm/cacheflush.h> | 87 | #include <asm/cacheflush.h> |
| 87 | #include <asm/processor.h> | 88 | #include <asm/processor.h> |
| @@ -89,7 +90,7 @@ | |||
| 89 | 90 | ||
| 90 | #include <asm/system.h> | 91 | #include <asm/system.h> |
| 91 | #include <asm/vsyscall.h> | 92 | #include <asm/vsyscall.h> |
| 92 | #include <asm/smp.h> | 93 | #include <asm/cpu.h> |
| 93 | #include <asm/desc.h> | 94 | #include <asm/desc.h> |
| 94 | #include <asm/dma.h> | 95 | #include <asm/dma.h> |
| 95 | #include <asm/iommu.h> | 96 | #include <asm/iommu.h> |
| @@ -97,7 +98,6 @@ | |||
| 97 | #include <asm/mmu_context.h> | 98 | #include <asm/mmu_context.h> |
| 98 | #include <asm/proto.h> | 99 | #include <asm/proto.h> |
| 99 | 100 | ||
| 100 | #include <mach_apic.h> | ||
| 101 | #include <asm/paravirt.h> | 101 | #include <asm/paravirt.h> |
| 102 | #include <asm/hypervisor.h> | 102 | #include <asm/hypervisor.h> |
| 103 | 103 | ||
| @@ -112,6 +112,20 @@ | |||
| 112 | #define ARCH_SETUP | 112 | #define ARCH_SETUP |
| 113 | #endif | 113 | #endif |
| 114 | 114 | ||
| 115 | unsigned int boot_cpu_id __read_mostly; | ||
| 116 | |||
| 117 | #ifdef CONFIG_X86_64 | ||
| 118 | int default_cpu_present_to_apicid(int mps_cpu) | ||
| 119 | { | ||
| 120 | return __default_cpu_present_to_apicid(mps_cpu); | ||
| 121 | } | ||
| 122 | |||
| 123 | int default_check_phys_apicid_present(int boot_cpu_physical_apicid) | ||
| 124 | { | ||
| 125 | return __default_check_phys_apicid_present(boot_cpu_physical_apicid); | ||
| 126 | } | ||
| 127 | #endif | ||
| 128 | |||
| 115 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | 129 | #ifndef CONFIG_DEBUG_BOOT_PARAMS |
| 116 | struct boot_params __initdata boot_params; | 130 | struct boot_params __initdata boot_params; |
| 117 | #else | 131 | #else |
| @@ -188,7 +202,9 @@ struct ist_info ist_info; | |||
| 188 | #endif | 202 | #endif |
| 189 | 203 | ||
| 190 | #else | 204 | #else |
| 191 | struct cpuinfo_x86 boot_cpu_data __read_mostly; | 205 | struct cpuinfo_x86 boot_cpu_data __read_mostly = { |
| 206 | .x86_phys_bits = MAX_PHYSMEM_BITS, | ||
| 207 | }; | ||
| 192 | EXPORT_SYMBOL(boot_cpu_data); | 208 | EXPORT_SYMBOL(boot_cpu_data); |
| 193 | #endif | 209 | #endif |
| 194 | 210 | ||
| @@ -586,20 +602,7 @@ static int __init setup_elfcorehdr(char *arg) | |||
| 586 | early_param("elfcorehdr", setup_elfcorehdr); | 602 | early_param("elfcorehdr", setup_elfcorehdr); |
| 587 | #endif | 603 | #endif |
| 588 | 604 | ||
| 589 | static int __init default_update_genapic(void) | 605 | static struct x86_quirks default_x86_quirks __initdata; |
| 590 | { | ||
| 591 | #ifdef CONFIG_X86_SMP | ||
| 592 | # if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) | ||
| 593 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; | ||
| 594 | # endif | ||
| 595 | #endif | ||
| 596 | |||
| 597 | return 0; | ||
| 598 | } | ||
| 599 | |||
| 600 | static struct x86_quirks default_x86_quirks __initdata = { | ||
| 601 | .update_genapic = default_update_genapic, | ||
| 602 | }; | ||
| 603 | 606 | ||
| 604 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | 607 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; |
| 605 | 608 | ||
| @@ -656,7 +659,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 656 | #ifdef CONFIG_X86_32 | 659 | #ifdef CONFIG_X86_32 |
| 657 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 660 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
| 658 | visws_early_detect(); | 661 | visws_early_detect(); |
| 659 | pre_setup_arch_hook(); | ||
| 660 | #else | 662 | #else |
| 661 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 663 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
| 662 | #endif | 664 | #endif |
| @@ -824,8 +826,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 824 | #else | 826 | #else |
| 825 | num_physpages = max_pfn; | 827 | num_physpages = max_pfn; |
| 826 | 828 | ||
| 827 | if (cpu_has_x2apic) | 829 | check_x2apic(); |
| 828 | check_x2apic(); | ||
| 829 | 830 | ||
| 830 | /* How many end-of-memory variables you have, grandma! */ | 831 | /* How many end-of-memory variables you have, grandma! */ |
| 831 | /* need this before calling reserve_initrd */ | 832 | /* need this before calling reserve_initrd */ |
| @@ -865,9 +866,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 865 | 866 | ||
| 866 | reserve_initrd(); | 867 | reserve_initrd(); |
| 867 | 868 | ||
| 868 | #ifdef CONFIG_X86_64 | ||
| 869 | vsmp_init(); | 869 | vsmp_init(); |
| 870 | #endif | ||
| 871 | 870 | ||
| 872 | io_delay_init(); | 871 | io_delay_init(); |
| 873 | 872 | ||
| @@ -893,12 +892,11 @@ void __init setup_arch(char **cmdline_p) | |||
| 893 | */ | 892 | */ |
| 894 | acpi_reserve_bootmem(); | 893 | acpi_reserve_bootmem(); |
| 895 | #endif | 894 | #endif |
| 896 | #ifdef CONFIG_X86_FIND_SMP_CONFIG | ||
| 897 | /* | 895 | /* |
| 898 | * Find and reserve possible boot-time SMP configuration: | 896 | * Find and reserve possible boot-time SMP configuration: |
| 899 | */ | 897 | */ |
| 900 | find_smp_config(); | 898 | find_smp_config(); |
| 901 | #endif | 899 | |
| 902 | reserve_crashkernel(); | 900 | reserve_crashkernel(); |
| 903 | 901 | ||
| 904 | #ifdef CONFIG_X86_64 | 902 | #ifdef CONFIG_X86_64 |
| @@ -925,9 +923,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 925 | map_vsyscall(); | 923 | map_vsyscall(); |
| 926 | #endif | 924 | #endif |
| 927 | 925 | ||
| 928 | #ifdef CONFIG_X86_GENERICARCH | ||
| 929 | generic_apic_probe(); | 926 | generic_apic_probe(); |
| 930 | #endif | ||
| 931 | 927 | ||
| 932 | early_quirks(); | 928 | early_quirks(); |
| 933 | 929 | ||
| @@ -978,4 +974,95 @@ void __init setup_arch(char **cmdline_p) | |||
| 978 | #endif | 974 | #endif |
| 979 | } | 975 | } |
| 980 | 976 | ||
| 977 | #ifdef CONFIG_X86_32 | ||
| 978 | |||
| 979 | /** | ||
| 980 | * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors | ||
| 981 | * | ||
| 982 | * Description: | ||
| 983 | * Perform any necessary interrupt initialisation prior to setting up | ||
| 984 | * the "ordinary" interrupt call gates. For legacy reasons, the ISA | ||
| 985 | * interrupts should be initialised here if the machine emulates a PC | ||
| 986 | * in any way. | ||
| 987 | **/ | ||
| 988 | void __init x86_quirk_pre_intr_init(void) | ||
| 989 | { | ||
| 990 | if (x86_quirks->arch_pre_intr_init) { | ||
| 991 | if (x86_quirks->arch_pre_intr_init()) | ||
| 992 | return; | ||
| 993 | } | ||
| 994 | init_ISA_irqs(); | ||
| 995 | } | ||
| 996 | |||
| 997 | /** | ||
| 998 | * x86_quirk_intr_init - post gate setup interrupt initialisation | ||
| 999 | * | ||
| 1000 | * Description: | ||
| 1001 | * Fill in any interrupts that may have been left out by the general | ||
| 1002 | * init_IRQ() routine. interrupts having to do with the machine rather | ||
| 1003 | * than the devices on the I/O bus (like APIC interrupts in intel MP | ||
| 1004 | * systems) are started here. | ||
| 1005 | **/ | ||
| 1006 | void __init x86_quirk_intr_init(void) | ||
| 1007 | { | ||
| 1008 | if (x86_quirks->arch_intr_init) { | ||
| 1009 | if (x86_quirks->arch_intr_init()) | ||
| 1010 | return; | ||
| 1011 | } | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | /** | ||
| 1015 | * x86_quirk_trap_init - initialise system specific traps | ||
| 1016 | * | ||
| 1017 | * Description: | ||
| 1018 | * Called as the final act of trap_init(). Used in VISWS to initialise | ||
| 1019 | * the various board specific APIC traps. | ||
| 1020 | **/ | ||
| 1021 | void __init x86_quirk_trap_init(void) | ||
| 1022 | { | ||
| 1023 | if (x86_quirks->arch_trap_init) { | ||
| 1024 | if (x86_quirks->arch_trap_init()) | ||
| 1025 | return; | ||
| 1026 | } | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | static struct irqaction irq0 = { | ||
| 1030 | .handler = timer_interrupt, | ||
| 1031 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, | ||
| 1032 | .mask = CPU_MASK_NONE, | ||
| 1033 | .name = "timer" | ||
| 1034 | }; | ||
| 1035 | |||
| 1036 | /** | ||
| 1037 | * x86_quirk_pre_time_init - do any specific initialisations before. | ||
| 1038 | * | ||
| 1039 | **/ | ||
| 1040 | void __init x86_quirk_pre_time_init(void) | ||
| 1041 | { | ||
| 1042 | if (x86_quirks->arch_pre_time_init) | ||
| 1043 | x86_quirks->arch_pre_time_init(); | ||
| 1044 | } | ||
| 981 | 1045 | ||
| 1046 | /** | ||
| 1047 | * x86_quirk_time_init - do any specific initialisations for the system timer. | ||
| 1048 | * | ||
| 1049 | * Description: | ||
| 1050 | * Must plug the system timer interrupt source at HZ into the IRQ listed | ||
| 1051 | * in irq_vectors.h:TIMER_IRQ | ||
| 1052 | **/ | ||
| 1053 | void __init x86_quirk_time_init(void) | ||
| 1054 | { | ||
| 1055 | if (x86_quirks->arch_time_init) { | ||
| 1056 | /* | ||
| 1057 | * A nonzero return code does not mean failure, it means | ||
| 1058 | * that the architecture quirk does not want any | ||
| 1059 | * generic (timer) setup to be performed after this: | ||
| 1060 | */ | ||
| 1061 | if (x86_quirks->arch_time_init()) | ||
| 1062 | return; | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | irq0.mask = cpumask_of_cpu(0); | ||
| 1066 | setup_irq(0, &irq0); | ||
| 1067 | } | ||
| 1068 | #endif /* CONFIG_X86_32 */ | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 01161077a49c..efa615f2bf43 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -7,402 +7,482 @@ | |||
| 7 | #include <linux/crash_dump.h> | 7 | #include <linux/crash_dump.h> |
| 8 | #include <linux/smp.h> | 8 | #include <linux/smp.h> |
| 9 | #include <linux/topology.h> | 9 | #include <linux/topology.h> |
| 10 | #include <linux/pfn.h> | ||
| 10 | #include <asm/sections.h> | 11 | #include <asm/sections.h> |
| 11 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
| 12 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
| 13 | #include <asm/mpspec.h> | 14 | #include <asm/mpspec.h> |
| 14 | #include <asm/apicdef.h> | 15 | #include <asm/apicdef.h> |
| 15 | #include <asm/highmem.h> | 16 | #include <asm/highmem.h> |
| 17 | #include <asm/proto.h> | ||
| 18 | #include <asm/cpumask.h> | ||
| 19 | #include <asm/cpu.h> | ||
| 20 | #include <asm/stackprotector.h> | ||
| 16 | 21 | ||
| 17 | #ifdef CONFIG_X86_LOCAL_APIC | 22 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
| 18 | unsigned int num_processors; | 23 | # define DBG(x...) printk(KERN_DEBUG x) |
| 19 | unsigned disabled_cpus __cpuinitdata; | 24 | #else |
| 20 | /* Processor that is doing the boot up */ | 25 | # define DBG(x...) |
| 21 | unsigned int boot_cpu_physical_apicid = -1U; | ||
| 22 | EXPORT_SYMBOL(boot_cpu_physical_apicid); | ||
| 23 | unsigned int max_physical_apicid; | ||
| 24 | |||
| 25 | /* Bitmask of physically existing CPUs */ | ||
| 26 | physid_mask_t phys_cpu_present_map; | ||
| 27 | #endif | 26 | #endif |
| 28 | 27 | ||
| 29 | /* map cpu index to physical APIC ID */ | 28 | DEFINE_PER_CPU(int, cpu_number); |
| 30 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | 29 | EXPORT_PER_CPU_SYMBOL(cpu_number); |
| 31 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | ||
| 32 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
| 33 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
| 34 | |||
| 35 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 36 | #define X86_64_NUMA 1 | ||
| 37 | 30 | ||
| 38 | /* map cpu index to node index */ | 31 | #ifdef CONFIG_X86_64 |
| 39 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | 32 | #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) |
| 40 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | 33 | #else |
| 34 | #define BOOT_PERCPU_OFFSET 0 | ||
| 35 | #endif | ||
| 41 | 36 | ||
| 42 | /* which logical CPUs are on which nodes */ | 37 | DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; |
| 43 | cpumask_t *node_to_cpumask_map; | 38 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
| 44 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
| 45 | 39 | ||
| 46 | /* setup node_to_cpumask_map */ | 40 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { |
| 47 | static void __init setup_node_to_cpumask_map(void); | 41 | [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, |
| 42 | }; | ||
| 43 | EXPORT_SYMBOL(__per_cpu_offset); | ||
| 48 | 44 | ||
| 45 | /* | ||
| 46 | * On x86_64 symbols referenced from code should be reachable using | ||
| 47 | * 32bit relocations. Reserve space for static percpu variables in | ||
| 48 | * modules so that they are always served from the first chunk which | ||
| 49 | * is located at the percpu segment base. On x86_32, anything can | ||
| 50 | * address anywhere. No need to reserve space in the first chunk. | ||
| 51 | */ | ||
| 52 | #ifdef CONFIG_X86_64 | ||
| 53 | #define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE | ||
| 49 | #else | 54 | #else |
| 50 | static inline void setup_node_to_cpumask_map(void) { } | 55 | #define PERCPU_FIRST_CHUNK_RESERVE 0 |
| 51 | #endif | 56 | #endif |
| 52 | 57 | ||
| 53 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) | 58 | /** |
| 54 | /* | 59 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA |
| 55 | * Copy data used in early init routines from the initial arrays to the | 60 | * |
| 56 | * per cpu data areas. These arrays then become expendable and the | 61 | * If NUMA is not configured or there is only one NUMA node available, |
| 57 | * *_early_ptr's are zeroed indicating that the static arrays are gone. | 62 | * there is no reason to consider NUMA. This function determines |
| 63 | * whether percpu allocation should consider NUMA or not. | ||
| 64 | * | ||
| 65 | * RETURNS: | ||
| 66 | * true if NUMA should be considered; otherwise, false. | ||
| 58 | */ | 67 | */ |
| 59 | static void __init setup_per_cpu_maps(void) | 68 | static bool __init pcpu_need_numa(void) |
| 60 | { | 69 | { |
| 61 | int cpu; | 70 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
| 71 | pg_data_t *last = NULL; | ||
| 72 | unsigned int cpu; | ||
| 62 | 73 | ||
| 63 | for_each_possible_cpu(cpu) { | 74 | for_each_possible_cpu(cpu) { |
| 64 | per_cpu(x86_cpu_to_apicid, cpu) = | 75 | int node = early_cpu_to_node(cpu); |
| 65 | early_per_cpu_map(x86_cpu_to_apicid, cpu); | ||
| 66 | per_cpu(x86_bios_cpu_apicid, cpu) = | ||
| 67 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); | ||
| 68 | #ifdef X86_64_NUMA | ||
| 69 | per_cpu(x86_cpu_to_node_map, cpu) = | ||
| 70 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | ||
| 71 | #endif | ||
| 72 | } | ||
| 73 | 76 | ||
| 74 | /* indicate the early static arrays will soon be gone */ | 77 | if (node_online(node) && NODE_DATA(node) && |
| 75 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; | 78 | last && last != NODE_DATA(node)) |
| 76 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; | 79 | return true; |
| 77 | #ifdef X86_64_NUMA | 80 | |
| 78 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | 81 | last = NODE_DATA(node); |
| 82 | } | ||
| 79 | #endif | 83 | #endif |
| 84 | return false; | ||
| 80 | } | 85 | } |
| 81 | 86 | ||
| 82 | #ifdef CONFIG_X86_32 | 87 | /** |
| 83 | /* | 88 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu |
| 84 | * Great future not-so-futuristic plan: make i386 and x86_64 do it | 89 | * @cpu: cpu to allocate for |
| 85 | * the same way | 90 | * @size: size allocation in bytes |
| 86 | */ | 91 | * @align: alignment |
| 87 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | 92 | * |
| 88 | EXPORT_SYMBOL(__per_cpu_offset); | 93 | * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper |
| 89 | static inline void setup_cpu_pda_map(void) { } | 94 | * does the right thing for NUMA regardless of the current |
| 90 | 95 | * configuration. | |
| 91 | #elif !defined(CONFIG_SMP) | 96 | * |
| 92 | static inline void setup_cpu_pda_map(void) { } | 97 | * RETURNS: |
| 93 | 98 | * Pointer to the allocated area on success, NULL on failure. | |
| 94 | #else /* CONFIG_SMP && CONFIG_X86_64 */ | ||
| 95 | |||
| 96 | /* | ||
| 97 | * Allocate cpu_pda pointer table and array via alloc_bootmem. | ||
| 98 | */ | 99 | */ |
| 99 | static void __init setup_cpu_pda_map(void) | 100 | static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, |
| 101 | unsigned long align) | ||
| 100 | { | 102 | { |
| 101 | char *pda; | 103 | const unsigned long goal = __pa(MAX_DMA_ADDRESS); |
| 102 | struct x8664_pda **new_cpu_pda; | 104 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
| 103 | unsigned long size; | 105 | int node = early_cpu_to_node(cpu); |
| 104 | int cpu; | 106 | void *ptr; |
| 105 | 107 | ||
| 106 | size = roundup(sizeof(struct x8664_pda), cache_line_size()); | 108 | if (!node_online(node) || !NODE_DATA(node)) { |
| 107 | 109 | ptr = __alloc_bootmem_nopanic(size, align, goal); | |
| 108 | /* allocate cpu_pda array and pointer table */ | 110 | pr_info("cpu %d has no node %d or node-local memory\n", |
| 109 | { | 111 | cpu, node); |
| 110 | unsigned long tsize = nr_cpu_ids * sizeof(void *); | 112 | pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", |
| 111 | unsigned long asize = size * (nr_cpu_ids - 1); | 113 | cpu, size, __pa(ptr)); |
| 112 | 114 | } else { | |
| 113 | tsize = roundup(tsize, cache_line_size()); | 115 | ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), |
| 114 | new_cpu_pda = alloc_bootmem(tsize + asize); | 116 | size, align, goal); |
| 115 | pda = (char *)new_cpu_pda + tsize; | 117 | pr_debug("per cpu data for cpu%d %lu bytes on node%d at " |
| 118 | "%016lx\n", cpu, size, node, __pa(ptr)); | ||
| 116 | } | 119 | } |
| 117 | 120 | return ptr; | |
| 118 | /* initialize pointer table to static pda's */ | 121 | #else |
| 119 | for_each_possible_cpu(cpu) { | 122 | return __alloc_bootmem_nopanic(size, align, goal); |
| 120 | if (cpu == 0) { | 123 | #endif |
| 121 | /* leave boot cpu pda in place */ | ||
| 122 | new_cpu_pda[0] = cpu_pda(0); | ||
| 123 | continue; | ||
| 124 | } | ||
| 125 | new_cpu_pda[cpu] = (struct x8664_pda *)pda; | ||
| 126 | new_cpu_pda[cpu]->in_bootmem = 1; | ||
| 127 | pda += size; | ||
| 128 | } | ||
| 129 | |||
| 130 | /* point to new pointer table */ | ||
| 131 | _cpu_pda = new_cpu_pda; | ||
| 132 | } | 124 | } |
| 133 | 125 | ||
| 134 | #endif /* CONFIG_SMP && CONFIG_X86_64 */ | 126 | /* |
| 135 | 127 | * Remap allocator | |
| 136 | #ifdef CONFIG_X86_64 | 128 | * |
| 129 | * This allocator uses PMD page as unit. A PMD page is allocated for | ||
| 130 | * each cpu and each is remapped into vmalloc area using PMD mapping. | ||
| 131 | * As PMD page is quite large, only part of it is used for the first | ||
| 132 | * chunk. Unused part is returned to the bootmem allocator. | ||
| 133 | * | ||
| 134 | * So, the PMD pages are mapped twice - once to the physical mapping | ||
| 135 | * and to the vmalloc area for the first percpu chunk. The double | ||
| 136 | * mapping does add one more PMD TLB entry pressure but still is much | ||
| 137 | * better than only using 4k mappings while still being NUMA friendly. | ||
| 138 | */ | ||
| 139 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
| 140 | static size_t pcpur_size __initdata; | ||
| 141 | static void **pcpur_ptrs __initdata; | ||
| 137 | 142 | ||
| 138 | /* correctly size the local cpu masks */ | 143 | static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) |
| 139 | static void __init setup_cpu_local_masks(void) | ||
| 140 | { | 144 | { |
| 141 | alloc_bootmem_cpumask_var(&cpu_initialized_mask); | 145 | size_t off = (size_t)pageno << PAGE_SHIFT; |
| 142 | alloc_bootmem_cpumask_var(&cpu_callin_mask); | ||
| 143 | alloc_bootmem_cpumask_var(&cpu_callout_mask); | ||
| 144 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | ||
| 145 | } | ||
| 146 | 146 | ||
| 147 | #else /* CONFIG_X86_32 */ | 147 | if (off >= pcpur_size) |
| 148 | return NULL; | ||
| 148 | 149 | ||
| 149 | static inline void setup_cpu_local_masks(void) | 150 | return virt_to_page(pcpur_ptrs[cpu] + off); |
| 150 | { | ||
| 151 | } | 151 | } |
| 152 | 152 | ||
| 153 | #endif /* CONFIG_X86_32 */ | 153 | static ssize_t __init setup_pcpu_remap(size_t static_size) |
| 154 | |||
| 155 | /* | ||
| 156 | * Great future plan: | ||
| 157 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | ||
| 158 | * Always point %gs to its beginning | ||
| 159 | */ | ||
| 160 | void __init setup_per_cpu_areas(void) | ||
| 161 | { | 154 | { |
| 162 | ssize_t size, old_size; | 155 | static struct vm_struct vm; |
| 163 | char *ptr; | 156 | pg_data_t *last; |
| 164 | int cpu; | 157 | size_t ptrs_size, dyn_size; |
| 165 | unsigned long align = 1; | 158 | unsigned int cpu; |
| 166 | 159 | ssize_t ret; | |
| 167 | /* Setup cpu_pda map */ | 160 | |
| 168 | setup_cpu_pda_map(); | 161 | /* |
| 162 | * If large page isn't supported, there's no benefit in doing | ||
| 163 | * this. Also, on non-NUMA, embedding is better. | ||
| 164 | */ | ||
| 165 | if (!cpu_has_pse || pcpu_need_numa()) | ||
| 166 | return -EINVAL; | ||
| 167 | |||
| 168 | last = NULL; | ||
| 169 | for_each_possible_cpu(cpu) { | ||
| 170 | int node = early_cpu_to_node(cpu); | ||
| 169 | 171 | ||
| 170 | /* Copy section for each CPU (we discard the original) */ | 172 | if (node_online(node) && NODE_DATA(node) && |
| 171 | old_size = PERCPU_ENOUGH_ROOM; | 173 | last && last != NODE_DATA(node)) |
| 172 | align = max_t(unsigned long, PAGE_SIZE, align); | 174 | goto proceed; |
| 173 | size = roundup(old_size, align); | ||
| 174 | 175 | ||
| 175 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", | 176 | last = NODE_DATA(node); |
| 176 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | 177 | } |
| 178 | return -EINVAL; | ||
| 179 | |||
| 180 | proceed: | ||
| 181 | /* | ||
| 182 | * Currently supports only single page. Supporting multiple | ||
| 183 | * pages won't be too difficult if it ever becomes necessary. | ||
| 184 | */ | ||
| 185 | pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | ||
| 186 | PERCPU_DYNAMIC_RESERVE); | ||
| 187 | if (pcpur_size > PMD_SIZE) { | ||
| 188 | pr_warning("PERCPU: static data is larger than large page, " | ||
| 189 | "can't use large page\n"); | ||
| 190 | return -EINVAL; | ||
| 191 | } | ||
| 192 | dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | ||
| 177 | 193 | ||
| 178 | pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); | 194 | /* allocate pointer array and alloc large pages */ |
| 195 | ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); | ||
| 196 | pcpur_ptrs = alloc_bootmem(ptrs_size); | ||
| 179 | 197 | ||
| 180 | for_each_possible_cpu(cpu) { | 198 | for_each_possible_cpu(cpu) { |
| 181 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 199 | pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); |
| 182 | ptr = __alloc_bootmem(size, align, | 200 | if (!pcpur_ptrs[cpu]) |
| 183 | __pa(MAX_DMA_ADDRESS)); | 201 | goto enomem; |
| 184 | #else | 202 | |
| 185 | int node = early_cpu_to_node(cpu); | 203 | /* |
| 186 | if (!node_online(node) || !NODE_DATA(node)) { | 204 | * Only use pcpur_size bytes and give back the rest. |
| 187 | ptr = __alloc_bootmem(size, align, | 205 | * |
| 188 | __pa(MAX_DMA_ADDRESS)); | 206 | * Ingo: The 2MB up-rounding bootmem is needed to make |
| 189 | pr_info("cpu %d has no node %d or node-local memory\n", | 207 | * sure the partial 2MB page is still fully RAM - it's |
| 190 | cpu, node); | 208 | * not well-specified to have a PAT-incompatible area |
| 191 | pr_debug("per cpu data for cpu%d at %016lx\n", | 209 | * (unmapped RAM, device memory, etc.) in that hole. |
| 192 | cpu, __pa(ptr)); | 210 | */ |
| 193 | } else { | 211 | free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), |
| 194 | ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, | 212 | PMD_SIZE - pcpur_size); |
| 195 | __pa(MAX_DMA_ADDRESS)); | 213 | |
| 196 | pr_debug("per cpu data for cpu%d on node%d at %016lx\n", | 214 | memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); |
| 197 | cpu, node, __pa(ptr)); | ||
| 198 | } | ||
| 199 | #endif | ||
| 200 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | ||
| 201 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | ||
| 202 | } | 215 | } |
| 203 | 216 | ||
| 204 | /* Setup percpu data maps */ | 217 | /* allocate address and map */ |
| 205 | setup_per_cpu_maps(); | 218 | vm.flags = VM_ALLOC; |
| 219 | vm.size = num_possible_cpus() * PMD_SIZE; | ||
| 220 | vm_area_register_early(&vm, PMD_SIZE); | ||
| 206 | 221 | ||
| 207 | /* Setup node to cpumask map */ | 222 | for_each_possible_cpu(cpu) { |
| 208 | setup_node_to_cpumask_map(); | 223 | pmd_t *pmd; |
| 209 | 224 | ||
| 210 | /* Setup cpu initialized, callin, callout masks */ | 225 | pmd = populate_extra_pmd((unsigned long)vm.addr |
| 211 | setup_cpu_local_masks(); | 226 | + cpu * PMD_SIZE); |
| 212 | } | 227 | set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), |
| 228 | PAGE_KERNEL_LARGE)); | ||
| 229 | } | ||
| 213 | 230 | ||
| 231 | /* we're ready, commit */ | ||
| 232 | pr_info("PERCPU: Remapped at %p with large pages, static data " | ||
| 233 | "%zu bytes\n", vm.addr, static_size); | ||
| 234 | |||
| 235 | ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, | ||
| 236 | PERCPU_FIRST_CHUNK_RESERVE, | ||
| 237 | PMD_SIZE, dyn_size, vm.addr, NULL); | ||
| 238 | goto out_free_ar; | ||
| 239 | |||
| 240 | enomem: | ||
| 241 | for_each_possible_cpu(cpu) | ||
| 242 | if (pcpur_ptrs[cpu]) | ||
| 243 | free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); | ||
| 244 | ret = -ENOMEM; | ||
| 245 | out_free_ar: | ||
| 246 | free_bootmem(__pa(pcpur_ptrs), ptrs_size); | ||
| 247 | return ret; | ||
| 248 | } | ||
| 249 | #else | ||
| 250 | static ssize_t __init setup_pcpu_remap(size_t static_size) | ||
| 251 | { | ||
| 252 | return -EINVAL; | ||
| 253 | } | ||
| 214 | #endif | 254 | #endif |
| 215 | 255 | ||
| 216 | #ifdef X86_64_NUMA | ||
| 217 | |||
| 218 | /* | 256 | /* |
| 219 | * Allocate node_to_cpumask_map based on number of available nodes | 257 | * Embedding allocator |
| 220 | * Requires node_possible_map to be valid. | ||
| 221 | * | 258 | * |
| 222 | * Note: node_to_cpumask() is not valid until after this is done. | 259 | * The first chunk is sized to just contain the static area plus |
| 260 | * module and dynamic reserves, and allocated as a contiguous area | ||
| 261 | * using bootmem allocator and used as-is without being mapped into | ||
| 262 | * vmalloc area. This enables the first chunk to piggy back on the | ||
| 263 | * linear physical PMD mapping and doesn't add any additional pressure | ||
| 264 | * to TLB. Note that if the needed size is smaller than the minimum | ||
| 265 | * unit size, the leftover is returned to the bootmem allocator. | ||
| 223 | */ | 266 | */ |
| 224 | static void __init setup_node_to_cpumask_map(void) | 267 | static void *pcpue_ptr __initdata; |
| 225 | { | 268 | static size_t pcpue_size __initdata; |
| 226 | unsigned int node, num = 0; | 269 | static size_t pcpue_unit_size __initdata; |
| 227 | cpumask_t *map; | ||
| 228 | |||
| 229 | /* setup nr_node_ids if not done yet */ | ||
| 230 | if (nr_node_ids == MAX_NUMNODES) { | ||
| 231 | for_each_node_mask(node, node_possible_map) | ||
| 232 | num = node; | ||
| 233 | nr_node_ids = num + 1; | ||
| 234 | } | ||
| 235 | 270 | ||
| 236 | /* allocate the map */ | 271 | static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) |
| 237 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | 272 | { |
| 273 | size_t off = (size_t)pageno << PAGE_SHIFT; | ||
| 238 | 274 | ||
| 239 | pr_debug("Node to cpumask map at %p for %d nodes\n", | 275 | if (off >= pcpue_size) |
| 240 | map, nr_node_ids); | 276 | return NULL; |
| 241 | 277 | ||
| 242 | /* node_to_cpumask() will now work */ | 278 | return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); |
| 243 | node_to_cpumask_map = map; | ||
| 244 | } | 279 | } |
| 245 | 280 | ||
| 246 | void __cpuinit numa_set_node(int cpu, int node) | 281 | static ssize_t __init setup_pcpu_embed(size_t static_size) |
| 247 | { | 282 | { |
| 248 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | 283 | unsigned int cpu; |
| 249 | 284 | size_t dyn_size; | |
| 250 | if (cpu_pda(cpu) && node != NUMA_NO_NODE) | 285 | |
| 251 | cpu_pda(cpu)->nodenumber = node; | 286 | /* |
| 287 | * If large page isn't supported, there's no benefit in doing | ||
| 288 | * this. Also, embedding allocation doesn't play well with | ||
| 289 | * NUMA. | ||
| 290 | */ | ||
| 291 | if (!cpu_has_pse || pcpu_need_numa()) | ||
| 292 | return -EINVAL; | ||
| 293 | |||
| 294 | /* allocate and copy */ | ||
| 295 | pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | ||
| 296 | PERCPU_DYNAMIC_RESERVE); | ||
| 297 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); | ||
| 298 | dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | ||
| 299 | |||
| 300 | pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, | ||
| 301 | PAGE_SIZE); | ||
| 302 | if (!pcpue_ptr) | ||
| 303 | return -ENOMEM; | ||
| 252 | 304 | ||
| 253 | if (cpu_to_node_map) | 305 | for_each_possible_cpu(cpu) { |
| 254 | cpu_to_node_map[cpu] = node; | 306 | void *ptr = pcpue_ptr + cpu * pcpue_unit_size; |
| 255 | 307 | ||
| 256 | else if (per_cpu_offset(cpu)) | 308 | free_bootmem(__pa(ptr + pcpue_size), |
| 257 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 309 | pcpue_unit_size - pcpue_size); |
| 310 | memcpy(ptr, __per_cpu_load, static_size); | ||
| 311 | } | ||
| 258 | 312 | ||
| 259 | else | 313 | /* we're ready, commit */ |
| 260 | pr_debug("Setting node for non-present cpu %d\n", cpu); | 314 | pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", |
| 261 | } | 315 | pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); |
| 262 | 316 | ||
| 263 | void __cpuinit numa_clear_node(int cpu) | 317 | return pcpu_setup_first_chunk(pcpue_get_page, static_size, |
| 264 | { | 318 | PERCPU_FIRST_CHUNK_RESERVE, |
| 265 | numa_set_node(cpu, NUMA_NO_NODE); | 319 | pcpue_unit_size, dyn_size, |
| 320 | pcpue_ptr, NULL); | ||
| 266 | } | 321 | } |
| 267 | 322 | ||
| 268 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | 323 | /* |
| 324 | * 4k page allocator | ||
| 325 | * | ||
| 326 | * This is the basic allocator. Static percpu area is allocated | ||
| 327 | * page-by-page and most of initialization is done by the generic | ||
| 328 | * setup function. | ||
| 329 | */ | ||
| 330 | static struct page **pcpu4k_pages __initdata; | ||
| 331 | static int pcpu4k_nr_static_pages __initdata; | ||
| 269 | 332 | ||
| 270 | void __cpuinit numa_add_cpu(int cpu) | 333 | static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) |
| 271 | { | 334 | { |
| 272 | cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 335 | if (pageno < pcpu4k_nr_static_pages) |
| 336 | return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; | ||
| 337 | return NULL; | ||
| 273 | } | 338 | } |
| 274 | 339 | ||
| 275 | void __cpuinit numa_remove_cpu(int cpu) | 340 | static void __init pcpu4k_populate_pte(unsigned long addr) |
| 276 | { | 341 | { |
| 277 | cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); | 342 | populate_extra_pte(addr); |
| 278 | } | 343 | } |
| 279 | 344 | ||
| 280 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | 345 | static ssize_t __init setup_pcpu_4k(size_t static_size) |
| 281 | |||
| 282 | /* | ||
| 283 | * --------- debug versions of the numa functions --------- | ||
| 284 | */ | ||
| 285 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
| 286 | { | 346 | { |
| 287 | int node = cpu_to_node(cpu); | 347 | size_t pages_size; |
| 288 | cpumask_t *mask; | 348 | unsigned int cpu; |
| 289 | char buf[64]; | 349 | int i, j; |
| 290 | 350 | ssize_t ret; | |
| 291 | if (node_to_cpumask_map == NULL) { | 351 | |
| 292 | printk(KERN_ERR "node_to_cpumask_map NULL\n"); | 352 | pcpu4k_nr_static_pages = PFN_UP(static_size); |
| 293 | dump_stack(); | 353 | |
| 294 | return; | 354 | /* unaligned allocations can't be freed, round up to page size */ |
| 295 | } | 355 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() |
| 296 | 356 | * sizeof(pcpu4k_pages[0])); | |
| 297 | mask = &node_to_cpumask_map[node]; | 357 | pcpu4k_pages = alloc_bootmem(pages_size); |
| 298 | if (enable) | 358 | |
| 299 | cpu_set(cpu, *mask); | 359 | /* allocate and copy */ |
| 300 | else | 360 | j = 0; |
| 301 | cpu_clear(cpu, *mask); | 361 | for_each_possible_cpu(cpu) |
| 302 | 362 | for (i = 0; i < pcpu4k_nr_static_pages; i++) { | |
| 303 | cpulist_scnprintf(buf, sizeof(buf), mask); | 363 | void *ptr; |
| 304 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 364 | |
| 305 | enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); | 365 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); |
| 306 | } | 366 | if (!ptr) |
| 367 | goto enomem; | ||
| 368 | |||
| 369 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | ||
| 370 | pcpu4k_pages[j++] = virt_to_page(ptr); | ||
| 371 | } | ||
| 307 | 372 | ||
| 308 | void __cpuinit numa_add_cpu(int cpu) | 373 | /* we're ready, commit */ |
| 309 | { | 374 | pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", |
| 310 | numa_set_cpumask(cpu, 1); | 375 | pcpu4k_nr_static_pages, static_size); |
| 376 | |||
| 377 | ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, | ||
| 378 | PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, | ||
| 379 | pcpu4k_populate_pte); | ||
| 380 | goto out_free_ar; | ||
| 381 | |||
| 382 | enomem: | ||
| 383 | while (--j >= 0) | ||
| 384 | free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); | ||
| 385 | ret = -ENOMEM; | ||
| 386 | out_free_ar: | ||
| 387 | free_bootmem(__pa(pcpu4k_pages), pages_size); | ||
| 388 | return ret; | ||
| 311 | } | 389 | } |
| 312 | 390 | ||
| 313 | void __cpuinit numa_remove_cpu(int cpu) | 391 | static inline void setup_percpu_segment(int cpu) |
| 314 | { | 392 | { |
| 315 | numa_set_cpumask(cpu, 0); | 393 | #ifdef CONFIG_X86_32 |
| 316 | } | 394 | struct desc_struct gdt; |
| 317 | 395 | ||
| 318 | int cpu_to_node(int cpu) | 396 | pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, |
| 319 | { | 397 | 0x2 | DESCTYPE_S, 0x8); |
| 320 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | 398 | gdt.s = 1; |
| 321 | printk(KERN_WARNING | 399 | write_gdt_entry(get_cpu_gdt_table(cpu), |
| 322 | "cpu_to_node(%d): usage too early!\n", cpu); | 400 | GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); |
| 323 | dump_stack(); | 401 | #endif |
| 324 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
| 325 | } | ||
| 326 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
| 327 | } | 402 | } |
| 328 | EXPORT_SYMBOL(cpu_to_node); | ||
| 329 | 403 | ||
| 330 | /* | 404 | /* |
| 331 | * Same function as cpu_to_node() but used if called before the | 405 | * Great future plan: |
| 332 | * per_cpu areas are setup. | 406 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. |
| 407 | * Always point %gs to its beginning | ||
| 333 | */ | 408 | */ |
| 334 | int early_cpu_to_node(int cpu) | 409 | void __init setup_per_cpu_areas(void) |
| 335 | { | 410 | { |
| 336 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | 411 | size_t static_size = __per_cpu_end - __per_cpu_start; |
| 337 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | 412 | unsigned int cpu; |
| 338 | 413 | unsigned long delta; | |
| 339 | if (!per_cpu_offset(cpu)) { | 414 | size_t pcpu_unit_size; |
| 340 | printk(KERN_WARNING | 415 | ssize_t ret; |
| 341 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
| 342 | dump_stack(); | ||
| 343 | return NUMA_NO_NODE; | ||
| 344 | } | ||
| 345 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
| 346 | } | ||
| 347 | |||
| 348 | |||
| 349 | /* empty cpumask */ | ||
| 350 | static const cpumask_t cpu_mask_none; | ||
| 351 | 416 | ||
| 352 | /* | 417 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", |
| 353 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 418 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); |
| 354 | */ | ||
| 355 | const cpumask_t *cpumask_of_node(int node) | ||
| 356 | { | ||
| 357 | if (node_to_cpumask_map == NULL) { | ||
| 358 | printk(KERN_WARNING | ||
| 359 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", | ||
| 360 | node); | ||
| 361 | dump_stack(); | ||
| 362 | return (const cpumask_t *)&cpu_online_map; | ||
| 363 | } | ||
| 364 | if (node >= nr_node_ids) { | ||
| 365 | printk(KERN_WARNING | ||
| 366 | "cpumask_of_node(%d): node > nr_node_ids(%d)\n", | ||
| 367 | node, nr_node_ids); | ||
| 368 | dump_stack(); | ||
| 369 | return &cpu_mask_none; | ||
| 370 | } | ||
| 371 | return &node_to_cpumask_map[node]; | ||
| 372 | } | ||
| 373 | EXPORT_SYMBOL(cpumask_of_node); | ||
| 374 | 419 | ||
| 375 | /* | 420 | /* |
| 376 | * Returns a bitmask of CPUs on Node 'node'. | 421 | * Allocate percpu area. If PSE is supported, try to make use |
| 377 | * | 422 | * of large page mappings. Please read comments on top of |
| 378 | * Side note: this function creates the returned cpumask on the stack | 423 | * each allocator for details. |
| 379 | * so with a high NR_CPUS count, excessive stack space is used. The | 424 | */ |
| 380 | * node_to_cpumask_ptr function should be used whenever possible. | 425 | ret = setup_pcpu_remap(static_size); |
| 381 | */ | 426 | if (ret < 0) |
| 382 | cpumask_t node_to_cpumask(int node) | 427 | ret = setup_pcpu_embed(static_size); |
| 383 | { | 428 | if (ret < 0) |
| 384 | if (node_to_cpumask_map == NULL) { | 429 | ret = setup_pcpu_4k(static_size); |
| 385 | printk(KERN_WARNING | 430 | if (ret < 0) |
| 386 | "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); | 431 | panic("cannot allocate static percpu area (%zu bytes, err=%zd)", |
| 387 | dump_stack(); | 432 | static_size, ret); |
| 388 | return cpu_online_map; | 433 | |
| 389 | } | 434 | pcpu_unit_size = ret; |
| 390 | if (node >= nr_node_ids) { | 435 | |
| 391 | printk(KERN_WARNING | 436 | /* alrighty, percpu areas up and running */ |
| 392 | "node_to_cpumask(%d): node > nr_node_ids(%d)\n", | 437 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
| 393 | node, nr_node_ids); | 438 | for_each_possible_cpu(cpu) { |
| 394 | dump_stack(); | 439 | per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; |
| 395 | return cpu_mask_none; | 440 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); |
| 441 | per_cpu(cpu_number, cpu) = cpu; | ||
| 442 | setup_percpu_segment(cpu); | ||
| 443 | setup_stack_canary_segment(cpu); | ||
| 444 | /* | ||
| 445 | * Copy data used in early init routines from the | ||
| 446 | * initial arrays to the per cpu data areas. These | ||
| 447 | * arrays then become expendable and the *_early_ptr's | ||
| 448 | * are zeroed indicating that the static arrays are | ||
| 449 | * gone. | ||
| 450 | */ | ||
| 451 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 452 | per_cpu(x86_cpu_to_apicid, cpu) = | ||
| 453 | early_per_cpu_map(x86_cpu_to_apicid, cpu); | ||
| 454 | per_cpu(x86_bios_cpu_apicid, cpu) = | ||
| 455 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); | ||
| 456 | #endif | ||
| 457 | #ifdef CONFIG_X86_64 | ||
| 458 | per_cpu(irq_stack_ptr, cpu) = | ||
| 459 | per_cpu(irq_stack_union.irq_stack, cpu) + | ||
| 460 | IRQ_STACK_SIZE - 64; | ||
| 461 | #ifdef CONFIG_NUMA | ||
| 462 | per_cpu(x86_cpu_to_node_map, cpu) = | ||
| 463 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | ||
| 464 | #endif | ||
| 465 | #endif | ||
| 466 | /* | ||
| 467 | * Up to this point, the boot CPU has been using .data.init | ||
| 468 | * area. Reload any changed state for the boot CPU. | ||
| 469 | */ | ||
| 470 | if (cpu == boot_cpu_id) | ||
| 471 | switch_to_new_gdt(cpu); | ||
| 396 | } | 472 | } |
| 397 | return node_to_cpumask_map[node]; | ||
| 398 | } | ||
| 399 | EXPORT_SYMBOL(node_to_cpumask); | ||
| 400 | |||
| 401 | /* | ||
| 402 | * --------- end of debug versions of the numa functions --------- | ||
| 403 | */ | ||
| 404 | 473 | ||
| 405 | #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ | 474 | /* indicate the early static arrays will soon be gone */ |
| 475 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 476 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; | ||
| 477 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; | ||
| 478 | #endif | ||
| 479 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | ||
| 480 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | ||
| 481 | #endif | ||
| 406 | 482 | ||
| 407 | #endif /* X86_64_NUMA */ | 483 | /* Setup node to cpumask map */ |
| 484 | setup_node_to_cpumask_map(); | ||
| 408 | 485 | ||
| 486 | /* Setup cpu initialized, callin, callout masks */ | ||
| 487 | setup_cpu_local_masks(); | ||
| 488 | } | ||
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index df0587f24c54..d2cc6428c587 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -50,27 +50,23 @@ | |||
| 50 | # define FIX_EFLAGS __FIX_EFLAGS | 50 | # define FIX_EFLAGS __FIX_EFLAGS |
| 51 | #endif | 51 | #endif |
| 52 | 52 | ||
| 53 | #define COPY(x) { \ | 53 | #define COPY(x) do { \ |
| 54 | err |= __get_user(regs->x, &sc->x); \ | 54 | get_user_ex(regs->x, &sc->x); \ |
| 55 | } | 55 | } while (0) |
| 56 | 56 | ||
| 57 | #define COPY_SEG(seg) { \ | 57 | #define GET_SEG(seg) ({ \ |
| 58 | unsigned short tmp; \ | 58 | unsigned short tmp; \ |
| 59 | err |= __get_user(tmp, &sc->seg); \ | 59 | get_user_ex(tmp, &sc->seg); \ |
| 60 | regs->seg = tmp; \ | 60 | tmp; \ |
| 61 | } | 61 | }) |
| 62 | 62 | ||
| 63 | #define COPY_SEG_CPL3(seg) { \ | 63 | #define COPY_SEG(seg) do { \ |
| 64 | unsigned short tmp; \ | 64 | regs->seg = GET_SEG(seg); \ |
| 65 | err |= __get_user(tmp, &sc->seg); \ | 65 | } while (0) |
| 66 | regs->seg = tmp | 3; \ | ||
| 67 | } | ||
| 68 | 66 | ||
| 69 | #define GET_SEG(seg) { \ | 67 | #define COPY_SEG_CPL3(seg) do { \ |
| 70 | unsigned short tmp; \ | 68 | regs->seg = GET_SEG(seg) | 3; \ |
| 71 | err |= __get_user(tmp, &sc->seg); \ | 69 | } while (0) |
| 72 | loadsegment(seg, tmp); \ | ||
| 73 | } | ||
| 74 | 70 | ||
| 75 | static int | 71 | static int |
| 76 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 72 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
| @@ -83,45 +79,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
| 83 | /* Always make any pending restarted system calls return -EINTR */ | 79 | /* Always make any pending restarted system calls return -EINTR */ |
| 84 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 80 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
| 85 | 81 | ||
| 82 | get_user_try { | ||
| 83 | |||
| 86 | #ifdef CONFIG_X86_32 | 84 | #ifdef CONFIG_X86_32 |
| 87 | GET_SEG(gs); | 85 | set_user_gs(regs, GET_SEG(gs)); |
| 88 | COPY_SEG(fs); | 86 | COPY_SEG(fs); |
| 89 | COPY_SEG(es); | 87 | COPY_SEG(es); |
| 90 | COPY_SEG(ds); | 88 | COPY_SEG(ds); |
| 91 | #endif /* CONFIG_X86_32 */ | 89 | #endif /* CONFIG_X86_32 */ |
| 92 | 90 | ||
| 93 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 91 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
| 94 | COPY(dx); COPY(cx); COPY(ip); | 92 | COPY(dx); COPY(cx); COPY(ip); |
| 95 | 93 | ||
| 96 | #ifdef CONFIG_X86_64 | 94 | #ifdef CONFIG_X86_64 |
| 97 | COPY(r8); | 95 | COPY(r8); |
| 98 | COPY(r9); | 96 | COPY(r9); |
| 99 | COPY(r10); | 97 | COPY(r10); |
| 100 | COPY(r11); | 98 | COPY(r11); |
| 101 | COPY(r12); | 99 | COPY(r12); |
| 102 | COPY(r13); | 100 | COPY(r13); |
| 103 | COPY(r14); | 101 | COPY(r14); |
| 104 | COPY(r15); | 102 | COPY(r15); |
| 105 | #endif /* CONFIG_X86_64 */ | 103 | #endif /* CONFIG_X86_64 */ |
| 106 | 104 | ||
| 107 | #ifdef CONFIG_X86_32 | 105 | #ifdef CONFIG_X86_32 |
| 108 | COPY_SEG_CPL3(cs); | 106 | COPY_SEG_CPL3(cs); |
| 109 | COPY_SEG_CPL3(ss); | 107 | COPY_SEG_CPL3(ss); |
| 110 | #else /* !CONFIG_X86_32 */ | 108 | #else /* !CONFIG_X86_32 */ |
| 111 | /* Kernel saves and restores only the CS segment register on signals, | 109 | /* Kernel saves and restores only the CS segment register on signals, |
| 112 | * which is the bare minimum needed to allow mixed 32/64-bit code. | 110 | * which is the bare minimum needed to allow mixed 32/64-bit code. |
| 113 | * App's signal handler can save/restore other segments if needed. */ | 111 | * App's signal handler can save/restore other segments if needed. */ |
| 114 | COPY_SEG_CPL3(cs); | 112 | COPY_SEG_CPL3(cs); |
| 115 | #endif /* CONFIG_X86_32 */ | 113 | #endif /* CONFIG_X86_32 */ |
| 116 | 114 | ||
| 117 | err |= __get_user(tmpflags, &sc->flags); | 115 | get_user_ex(tmpflags, &sc->flags); |
| 118 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 116 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
| 119 | regs->orig_ax = -1; /* disable syscall checks */ | 117 | regs->orig_ax = -1; /* disable syscall checks */ |
| 120 | 118 | ||
| 121 | err |= __get_user(buf, &sc->fpstate); | 119 | get_user_ex(buf, &sc->fpstate); |
| 122 | err |= restore_i387_xstate(buf); | 120 | err |= restore_i387_xstate(buf); |
| 121 | |||
| 122 | get_user_ex(*pax, &sc->ax); | ||
| 123 | } get_user_catch(err); | ||
| 123 | 124 | ||
| 124 | err |= __get_user(*pax, &sc->ax); | ||
| 125 | return err; | 125 | return err; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| @@ -131,57 +131,55 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
| 131 | { | 131 | { |
| 132 | int err = 0; | 132 | int err = 0; |
| 133 | 133 | ||
| 134 | #ifdef CONFIG_X86_32 | 134 | put_user_try { |
| 135 | { | ||
| 136 | unsigned int tmp; | ||
| 137 | 135 | ||
| 138 | savesegment(gs, tmp); | 136 | #ifdef CONFIG_X86_32 |
| 139 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | 137 | put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs); |
| 140 | } | 138 | put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); |
| 141 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | 139 | put_user_ex(regs->es, (unsigned int __user *)&sc->es); |
| 142 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); | 140 | put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); |
| 143 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); | ||
| 144 | #endif /* CONFIG_X86_32 */ | 141 | #endif /* CONFIG_X86_32 */ |
| 145 | 142 | ||
| 146 | err |= __put_user(regs->di, &sc->di); | 143 | put_user_ex(regs->di, &sc->di); |
| 147 | err |= __put_user(regs->si, &sc->si); | 144 | put_user_ex(regs->si, &sc->si); |
| 148 | err |= __put_user(regs->bp, &sc->bp); | 145 | put_user_ex(regs->bp, &sc->bp); |
| 149 | err |= __put_user(regs->sp, &sc->sp); | 146 | put_user_ex(regs->sp, &sc->sp); |
| 150 | err |= __put_user(regs->bx, &sc->bx); | 147 | put_user_ex(regs->bx, &sc->bx); |
| 151 | err |= __put_user(regs->dx, &sc->dx); | 148 | put_user_ex(regs->dx, &sc->dx); |
| 152 | err |= __put_user(regs->cx, &sc->cx); | 149 | put_user_ex(regs->cx, &sc->cx); |
| 153 | err |= __put_user(regs->ax, &sc->ax); | 150 | put_user_ex(regs->ax, &sc->ax); |
| 154 | #ifdef CONFIG_X86_64 | 151 | #ifdef CONFIG_X86_64 |
| 155 | err |= __put_user(regs->r8, &sc->r8); | 152 | put_user_ex(regs->r8, &sc->r8); |
| 156 | err |= __put_user(regs->r9, &sc->r9); | 153 | put_user_ex(regs->r9, &sc->r9); |
| 157 | err |= __put_user(regs->r10, &sc->r10); | 154 | put_user_ex(regs->r10, &sc->r10); |
| 158 | err |= __put_user(regs->r11, &sc->r11); | 155 | put_user_ex(regs->r11, &sc->r11); |
| 159 | err |= __put_user(regs->r12, &sc->r12); | 156 | put_user_ex(regs->r12, &sc->r12); |
| 160 | err |= __put_user(regs->r13, &sc->r13); | 157 | put_user_ex(regs->r13, &sc->r13); |
| 161 | err |= __put_user(regs->r14, &sc->r14); | 158 | put_user_ex(regs->r14, &sc->r14); |
| 162 | err |= __put_user(regs->r15, &sc->r15); | 159 | put_user_ex(regs->r15, &sc->r15); |
| 163 | #endif /* CONFIG_X86_64 */ | 160 | #endif /* CONFIG_X86_64 */ |
| 164 | 161 | ||
| 165 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 162 | put_user_ex(current->thread.trap_no, &sc->trapno); |
| 166 | err |= __put_user(current->thread.error_code, &sc->err); | 163 | put_user_ex(current->thread.error_code, &sc->err); |
| 167 | err |= __put_user(regs->ip, &sc->ip); | 164 | put_user_ex(regs->ip, &sc->ip); |
| 168 | #ifdef CONFIG_X86_32 | 165 | #ifdef CONFIG_X86_32 |
| 169 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); | 166 | put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); |
| 170 | err |= __put_user(regs->flags, &sc->flags); | 167 | put_user_ex(regs->flags, &sc->flags); |
| 171 | err |= __put_user(regs->sp, &sc->sp_at_signal); | 168 | put_user_ex(regs->sp, &sc->sp_at_signal); |
| 172 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | 169 | put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); |
| 173 | #else /* !CONFIG_X86_32 */ | 170 | #else /* !CONFIG_X86_32 */ |
| 174 | err |= __put_user(regs->flags, &sc->flags); | 171 | put_user_ex(regs->flags, &sc->flags); |
| 175 | err |= __put_user(regs->cs, &sc->cs); | 172 | put_user_ex(regs->cs, &sc->cs); |
| 176 | err |= __put_user(0, &sc->gs); | 173 | put_user_ex(0, &sc->gs); |
| 177 | err |= __put_user(0, &sc->fs); | 174 | put_user_ex(0, &sc->fs); |
| 178 | #endif /* CONFIG_X86_32 */ | 175 | #endif /* CONFIG_X86_32 */ |
| 179 | 176 | ||
| 180 | err |= __put_user(fpstate, &sc->fpstate); | 177 | put_user_ex(fpstate, &sc->fpstate); |
| 181 | 178 | ||
| 182 | /* non-iBCS2 extensions.. */ | 179 | /* non-iBCS2 extensions.. */ |
| 183 | err |= __put_user(mask, &sc->oldmask); | 180 | put_user_ex(mask, &sc->oldmask); |
| 184 | err |= __put_user(current->thread.cr2, &sc->cr2); | 181 | put_user_ex(current->thread.cr2, &sc->cr2); |
| 182 | } put_user_catch(err); | ||
| 185 | 183 | ||
| 186 | return err; | 184 | return err; |
| 187 | } | 185 | } |
| @@ -189,40 +187,35 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
| 189 | /* | 187 | /* |
| 190 | * Set up a signal frame. | 188 | * Set up a signal frame. |
| 191 | */ | 189 | */ |
| 192 | #ifdef CONFIG_X86_32 | ||
| 193 | static const struct { | ||
| 194 | u16 poplmovl; | ||
| 195 | u32 val; | ||
| 196 | u16 int80; | ||
| 197 | } __attribute__((packed)) retcode = { | ||
| 198 | 0xb858, /* popl %eax; movl $..., %eax */ | ||
| 199 | __NR_sigreturn, | ||
| 200 | 0x80cd, /* int $0x80 */ | ||
| 201 | }; | ||
| 202 | |||
| 203 | static const struct { | ||
| 204 | u8 movl; | ||
| 205 | u32 val; | ||
| 206 | u16 int80; | ||
| 207 | u8 pad; | ||
| 208 | } __attribute__((packed)) rt_retcode = { | ||
| 209 | 0xb8, /* movl $..., %eax */ | ||
| 210 | __NR_rt_sigreturn, | ||
| 211 | 0x80cd, /* int $0x80 */ | ||
| 212 | 0 | ||
| 213 | }; | ||
| 214 | 190 | ||
| 215 | /* | 191 | /* |
| 216 | * Determine which stack to use.. | 192 | * Determine which stack to use.. |
| 217 | */ | 193 | */ |
| 194 | static unsigned long align_sigframe(unsigned long sp) | ||
| 195 | { | ||
| 196 | #ifdef CONFIG_X86_32 | ||
| 197 | /* | ||
| 198 | * Align the stack pointer according to the i386 ABI, | ||
| 199 | * i.e. so that on function entry ((sp + 4) & 15) == 0. | ||
| 200 | */ | ||
| 201 | sp = ((sp + 4) & -16ul) - 4; | ||
| 202 | #else /* !CONFIG_X86_32 */ | ||
| 203 | sp = round_down(sp, 16) - 8; | ||
| 204 | #endif | ||
| 205 | return sp; | ||
| 206 | } | ||
| 207 | |||
| 218 | static inline void __user * | 208 | static inline void __user * |
| 219 | get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | 209 | get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, |
| 220 | void **fpstate) | 210 | void __user **fpstate) |
| 221 | { | 211 | { |
| 222 | unsigned long sp; | ||
| 223 | |||
| 224 | /* Default to using normal stack */ | 212 | /* Default to using normal stack */ |
| 225 | sp = regs->sp; | 213 | unsigned long sp = regs->sp; |
| 214 | |||
| 215 | #ifdef CONFIG_X86_64 | ||
| 216 | /* redzone */ | ||
| 217 | sp -= 128; | ||
| 218 | #endif /* CONFIG_X86_64 */ | ||
| 226 | 219 | ||
| 227 | /* | 220 | /* |
| 228 | * If we are on the alternate signal stack and would overflow it, don't. | 221 | * If we are on the alternate signal stack and would overflow it, don't. |
| @@ -236,30 +229,52 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
| 236 | if (sas_ss_flags(sp) == 0) | 229 | if (sas_ss_flags(sp) == 0) |
| 237 | sp = current->sas_ss_sp + current->sas_ss_size; | 230 | sp = current->sas_ss_sp + current->sas_ss_size; |
| 238 | } else { | 231 | } else { |
| 232 | #ifdef CONFIG_X86_32 | ||
| 239 | /* This is the legacy signal stack switching. */ | 233 | /* This is the legacy signal stack switching. */ |
| 240 | if ((regs->ss & 0xffff) != __USER_DS && | 234 | if ((regs->ss & 0xffff) != __USER_DS && |
| 241 | !(ka->sa.sa_flags & SA_RESTORER) && | 235 | !(ka->sa.sa_flags & SA_RESTORER) && |
| 242 | ka->sa.sa_restorer) | 236 | ka->sa.sa_restorer) |
| 243 | sp = (unsigned long) ka->sa.sa_restorer; | 237 | sp = (unsigned long) ka->sa.sa_restorer; |
| 238 | #endif /* CONFIG_X86_32 */ | ||
| 244 | } | 239 | } |
| 245 | 240 | ||
| 246 | if (used_math()) { | 241 | if (used_math()) { |
| 247 | sp = sp - sig_xstate_size; | 242 | sp -= sig_xstate_size; |
| 248 | *fpstate = (struct _fpstate *) sp; | 243 | #ifdef CONFIG_X86_64 |
| 244 | sp = round_down(sp, 64); | ||
| 245 | #endif /* CONFIG_X86_64 */ | ||
| 246 | *fpstate = (void __user *)sp; | ||
| 247 | |||
| 249 | if (save_i387_xstate(*fpstate) < 0) | 248 | if (save_i387_xstate(*fpstate) < 0) |
| 250 | return (void __user *)-1L; | 249 | return (void __user *)-1L; |
| 251 | } | 250 | } |
| 252 | 251 | ||
| 253 | sp -= frame_size; | 252 | return (void __user *)align_sigframe(sp - frame_size); |
| 254 | /* | ||
| 255 | * Align the stack pointer according to the i386 ABI, | ||
| 256 | * i.e. so that on function entry ((sp + 4) & 15) == 0. | ||
| 257 | */ | ||
| 258 | sp = ((sp + 4) & -16ul) - 4; | ||
| 259 | |||
| 260 | return (void __user *) sp; | ||
| 261 | } | 253 | } |
| 262 | 254 | ||
| 255 | #ifdef CONFIG_X86_32 | ||
| 256 | static const struct { | ||
| 257 | u16 poplmovl; | ||
| 258 | u32 val; | ||
| 259 | u16 int80; | ||
| 260 | } __attribute__((packed)) retcode = { | ||
| 261 | 0xb858, /* popl %eax; movl $..., %eax */ | ||
| 262 | __NR_sigreturn, | ||
| 263 | 0x80cd, /* int $0x80 */ | ||
| 264 | }; | ||
| 265 | |||
| 266 | static const struct { | ||
| 267 | u8 movl; | ||
| 268 | u32 val; | ||
| 269 | u16 int80; | ||
| 270 | u8 pad; | ||
| 271 | } __attribute__((packed)) rt_retcode = { | ||
| 272 | 0xb8, /* movl $..., %eax */ | ||
| 273 | __NR_rt_sigreturn, | ||
| 274 | 0x80cd, /* int $0x80 */ | ||
| 275 | 0 | ||
| 276 | }; | ||
| 277 | |||
| 263 | static int | 278 | static int |
| 264 | __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | 279 | __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, |
| 265 | struct pt_regs *regs) | 280 | struct pt_regs *regs) |
| @@ -336,43 +351,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 336 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 351 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
| 337 | return -EFAULT; | 352 | return -EFAULT; |
| 338 | 353 | ||
| 339 | err |= __put_user(sig, &frame->sig); | 354 | put_user_try { |
| 340 | err |= __put_user(&frame->info, &frame->pinfo); | 355 | put_user_ex(sig, &frame->sig); |
| 341 | err |= __put_user(&frame->uc, &frame->puc); | 356 | put_user_ex(&frame->info, &frame->pinfo); |
| 342 | err |= copy_siginfo_to_user(&frame->info, info); | 357 | put_user_ex(&frame->uc, &frame->puc); |
| 343 | if (err) | 358 | err |= copy_siginfo_to_user(&frame->info, info); |
| 344 | return -EFAULT; | ||
| 345 | 359 | ||
| 346 | /* Create the ucontext. */ | 360 | /* Create the ucontext. */ |
| 347 | if (cpu_has_xsave) | 361 | if (cpu_has_xsave) |
| 348 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | 362 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); |
| 349 | else | 363 | else |
| 350 | err |= __put_user(0, &frame->uc.uc_flags); | 364 | put_user_ex(0, &frame->uc.uc_flags); |
| 351 | err |= __put_user(0, &frame->uc.uc_link); | 365 | put_user_ex(0, &frame->uc.uc_link); |
| 352 | err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 366 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); |
| 353 | err |= __put_user(sas_ss_flags(regs->sp), | 367 | put_user_ex(sas_ss_flags(regs->sp), |
| 354 | &frame->uc.uc_stack.ss_flags); | 368 | &frame->uc.uc_stack.ss_flags); |
| 355 | err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | 369 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); |
| 356 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | 370 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, |
| 357 | regs, set->sig[0]); | 371 | regs, set->sig[0]); |
| 358 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | 372 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); |
| 359 | if (err) | 373 | |
| 360 | return -EFAULT; | 374 | /* Set up to return from userspace. */ |
| 361 | 375 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | |
| 362 | /* Set up to return from userspace. */ | 376 | if (ka->sa.sa_flags & SA_RESTORER) |
| 363 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 377 | restorer = ka->sa.sa_restorer; |
| 364 | if (ka->sa.sa_flags & SA_RESTORER) | 378 | put_user_ex(restorer, &frame->pretcode); |
| 365 | restorer = ka->sa.sa_restorer; | ||
| 366 | err |= __put_user(restorer, &frame->pretcode); | ||
| 367 | 379 | ||
| 368 | /* | 380 | /* |
| 369 | * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 | 381 | * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 |
| 370 | * | 382 | * |
| 371 | * WE DO NOT USE IT ANY MORE! It's only left here for historical | 383 | * WE DO NOT USE IT ANY MORE! It's only left here for historical |
| 372 | * reasons and because gdb uses it as a signature to notice | 384 | * reasons and because gdb uses it as a signature to notice |
| 373 | * signal handler stack frames. | 385 | * signal handler stack frames. |
| 374 | */ | 386 | */ |
| 375 | err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); | 387 | put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
| 388 | } put_user_catch(err); | ||
| 376 | 389 | ||
| 377 | if (err) | 390 | if (err) |
| 378 | return -EFAULT; | 391 | return -EFAULT; |
| @@ -392,24 +405,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 392 | return 0; | 405 | return 0; |
| 393 | } | 406 | } |
| 394 | #else /* !CONFIG_X86_32 */ | 407 | #else /* !CONFIG_X86_32 */ |
| 395 | /* | ||
| 396 | * Determine which stack to use.. | ||
| 397 | */ | ||
| 398 | static void __user * | ||
| 399 | get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size) | ||
| 400 | { | ||
| 401 | /* Default to using normal stack - redzone*/ | ||
| 402 | sp -= 128; | ||
| 403 | |||
| 404 | /* This is the X/Open sanctioned signal stack switching. */ | ||
| 405 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
| 406 | if (sas_ss_flags(sp) == 0) | ||
| 407 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
| 408 | } | ||
| 409 | |||
| 410 | return (void __user *)round_down(sp - size, 64); | ||
| 411 | } | ||
| 412 | |||
| 413 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 408 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
| 414 | sigset_t *set, struct pt_regs *regs) | 409 | sigset_t *set, struct pt_regs *regs) |
| 415 | { | 410 | { |
| @@ -418,15 +413,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 418 | int err = 0; | 413 | int err = 0; |
| 419 | struct task_struct *me = current; | 414 | struct task_struct *me = current; |
| 420 | 415 | ||
| 421 | if (used_math()) { | 416 | frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); |
| 422 | fp = get_stack(ka, regs->sp, sig_xstate_size); | ||
| 423 | frame = (void __user *)round_down( | ||
| 424 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
| 425 | |||
| 426 | if (save_i387_xstate(fp) < 0) | ||
| 427 | return -EFAULT; | ||
| 428 | } else | ||
| 429 | frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8; | ||
| 430 | 417 | ||
| 431 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 418 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
| 432 | return -EFAULT; | 419 | return -EFAULT; |
| @@ -436,28 +423,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 436 | return -EFAULT; | 423 | return -EFAULT; |
| 437 | } | 424 | } |
| 438 | 425 | ||
| 439 | /* Create the ucontext. */ | 426 | put_user_try { |
| 440 | if (cpu_has_xsave) | 427 | /* Create the ucontext. */ |
| 441 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | 428 | if (cpu_has_xsave) |
| 442 | else | 429 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); |
| 443 | err |= __put_user(0, &frame->uc.uc_flags); | 430 | else |
| 444 | err |= __put_user(0, &frame->uc.uc_link); | 431 | put_user_ex(0, &frame->uc.uc_flags); |
| 445 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 432 | put_user_ex(0, &frame->uc.uc_link); |
| 446 | err |= __put_user(sas_ss_flags(regs->sp), | 433 | put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); |
| 447 | &frame->uc.uc_stack.ss_flags); | 434 | put_user_ex(sas_ss_flags(regs->sp), |
| 448 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | 435 | &frame->uc.uc_stack.ss_flags); |
| 449 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | 436 | put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); |
| 450 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | 437 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); |
| 451 | 438 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | |
| 452 | /* Set up to return from userspace. If provided, use a stub | 439 | |
| 453 | already in userspace. */ | 440 | /* Set up to return from userspace. If provided, use a stub |
| 454 | /* x86-64 should always use SA_RESTORER. */ | 441 | already in userspace. */ |
| 455 | if (ka->sa.sa_flags & SA_RESTORER) { | 442 | /* x86-64 should always use SA_RESTORER. */ |
| 456 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | 443 | if (ka->sa.sa_flags & SA_RESTORER) { |
| 457 | } else { | 444 | put_user_ex(ka->sa.sa_restorer, &frame->pretcode); |
| 458 | /* could use a vstub here */ | 445 | } else { |
| 459 | return -EFAULT; | 446 | /* could use a vstub here */ |
| 460 | } | 447 | err |= -EFAULT; |
| 448 | } | ||
| 449 | } put_user_catch(err); | ||
| 461 | 450 | ||
| 462 | if (err) | 451 | if (err) |
| 463 | return -EFAULT; | 452 | return -EFAULT; |
| @@ -509,31 +498,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, | |||
| 509 | struct old_sigaction __user *oact) | 498 | struct old_sigaction __user *oact) |
| 510 | { | 499 | { |
| 511 | struct k_sigaction new_ka, old_ka; | 500 | struct k_sigaction new_ka, old_ka; |
| 512 | int ret; | 501 | int ret = 0; |
| 513 | 502 | ||
| 514 | if (act) { | 503 | if (act) { |
| 515 | old_sigset_t mask; | 504 | old_sigset_t mask; |
| 516 | 505 | ||
| 517 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | 506 | if (!access_ok(VERIFY_READ, act, sizeof(*act))) |
| 518 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
| 519 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
| 520 | return -EFAULT; | 507 | return -EFAULT; |
| 521 | 508 | ||
| 522 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | 509 | get_user_try { |
| 523 | __get_user(mask, &act->sa_mask); | 510 | get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); |
| 511 | get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); | ||
| 512 | get_user_ex(mask, &act->sa_mask); | ||
| 513 | get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); | ||
| 514 | } get_user_catch(ret); | ||
| 515 | |||
| 516 | if (ret) | ||
| 517 | return -EFAULT; | ||
| 524 | siginitset(&new_ka.sa.sa_mask, mask); | 518 | siginitset(&new_ka.sa.sa_mask, mask); |
| 525 | } | 519 | } |
| 526 | 520 | ||
| 527 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | 521 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); |
| 528 | 522 | ||
| 529 | if (!ret && oact) { | 523 | if (!ret && oact) { |
| 530 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | 524 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) |
| 531 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
| 532 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
| 533 | return -EFAULT; | 525 | return -EFAULT; |
| 534 | 526 | ||
| 535 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | 527 | put_user_try { |
| 536 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | 528 | put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); |
| 529 | put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); | ||
| 530 | put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
| 531 | put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); | ||
| 532 | } put_user_catch(ret); | ||
| 533 | |||
| 534 | if (ret) | ||
| 535 | return -EFAULT; | ||
| 537 | } | 536 | } |
| 538 | 537 | ||
| 539 | return ret; | 538 | return ret; |
| @@ -541,14 +540,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, | |||
| 541 | #endif /* CONFIG_X86_32 */ | 540 | #endif /* CONFIG_X86_32 */ |
| 542 | 541 | ||
| 543 | #ifdef CONFIG_X86_32 | 542 | #ifdef CONFIG_X86_32 |
| 544 | asmlinkage int sys_sigaltstack(unsigned long bx) | 543 | int sys_sigaltstack(struct pt_regs *regs) |
| 545 | { | 544 | { |
| 546 | /* | 545 | const stack_t __user *uss = (const stack_t __user *)regs->bx; |
| 547 | * This is needed to make gcc realize it doesn't own the | ||
| 548 | * "struct pt_regs" | ||
| 549 | */ | ||
| 550 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
| 551 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
| 552 | stack_t __user *uoss = (stack_t __user *)regs->cx; | 546 | stack_t __user *uoss = (stack_t __user *)regs->cx; |
| 553 | 547 | ||
| 554 | return do_sigaltstack(uss, uoss, regs->sp); | 548 | return do_sigaltstack(uss, uoss, regs->sp); |
| @@ -566,14 +560,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | |||
| 566 | * Do a signal return; undo the signal stack. | 560 | * Do a signal return; undo the signal stack. |
| 567 | */ | 561 | */ |
| 568 | #ifdef CONFIG_X86_32 | 562 | #ifdef CONFIG_X86_32 |
| 569 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | 563 | unsigned long sys_sigreturn(struct pt_regs *regs) |
| 570 | { | 564 | { |
| 571 | struct sigframe __user *frame; | 565 | struct sigframe __user *frame; |
| 572 | struct pt_regs *regs; | ||
| 573 | unsigned long ax; | 566 | unsigned long ax; |
| 574 | sigset_t set; | 567 | sigset_t set; |
| 575 | 568 | ||
| 576 | regs = (struct pt_regs *) &__unused; | ||
| 577 | frame = (struct sigframe __user *)(regs->sp - 8); | 569 | frame = (struct sigframe __user *)(regs->sp - 8); |
| 578 | 570 | ||
| 579 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 571 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
| @@ -600,7 +592,7 @@ badframe: | |||
| 600 | } | 592 | } |
| 601 | #endif /* CONFIG_X86_32 */ | 593 | #endif /* CONFIG_X86_32 */ |
| 602 | 594 | ||
| 603 | static long do_rt_sigreturn(struct pt_regs *regs) | 595 | long sys_rt_sigreturn(struct pt_regs *regs) |
| 604 | { | 596 | { |
| 605 | struct rt_sigframe __user *frame; | 597 | struct rt_sigframe __user *frame; |
| 606 | unsigned long ax; | 598 | unsigned long ax; |
| @@ -631,25 +623,6 @@ badframe: | |||
| 631 | return 0; | 623 | return 0; |
| 632 | } | 624 | } |
| 633 | 625 | ||
| 634 | #ifdef CONFIG_X86_32 | ||
| 635 | /* | ||
| 636 | * Note: do not pass in pt_regs directly as with tail-call optimization | ||
| 637 | * GCC will incorrectly stomp on the caller's frame and corrupt user-space | ||
| 638 | * register state: | ||
| 639 | */ | ||
| 640 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | ||
| 641 | { | ||
| 642 | struct pt_regs *regs = (struct pt_regs *)&__unused; | ||
| 643 | |||
| 644 | return do_rt_sigreturn(regs); | ||
| 645 | } | ||
| 646 | #else /* !CONFIG_X86_32 */ | ||
| 647 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
| 648 | { | ||
| 649 | return do_rt_sigreturn(regs); | ||
| 650 | } | ||
| 651 | #endif /* CONFIG_X86_32 */ | ||
| 652 | |||
| 653 | /* | 626 | /* |
| 654 | * OK, we're invoking a handler: | 627 | * OK, we're invoking a handler: |
| 655 | */ | 628 | */ |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index e6faa3316bd2..13f33ea8ccaa 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Intel SMP support routines. | 2 | * Intel SMP support routines. |
| 3 | * | 3 | * |
| 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
| 5 | * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> | 5 | * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
| 6 | * (c) 2002,2003 Andi Kleen, SuSE Labs. | 6 | * (c) 2002,2003 Andi Kleen, SuSE Labs. |
| 7 | * | 7 | * |
| 8 | * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> | 8 | * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> |
| @@ -26,8 +26,7 @@ | |||
| 26 | #include <asm/tlbflush.h> | 26 | #include <asm/tlbflush.h> |
| 27 | #include <asm/mmu_context.h> | 27 | #include <asm/mmu_context.h> |
| 28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
| 29 | #include <mach_ipi.h> | 29 | #include <asm/apic.h> |
| 30 | #include <mach_apic.h> | ||
| 31 | /* | 30 | /* |
| 32 | * Some notes on x86 processor bugs affecting SMP operation: | 31 | * Some notes on x86 processor bugs affecting SMP operation: |
| 33 | * | 32 | * |
| @@ -118,12 +117,12 @@ static void native_smp_send_reschedule(int cpu) | |||
| 118 | WARN_ON(1); | 117 | WARN_ON(1); |
| 119 | return; | 118 | return; |
| 120 | } | 119 | } |
| 121 | send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); | 120 | apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); |
| 122 | } | 121 | } |
| 123 | 122 | ||
| 124 | void native_send_call_func_single_ipi(int cpu) | 123 | void native_send_call_func_single_ipi(int cpu) |
| 125 | { | 124 | { |
| 126 | send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); | 125 | apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); |
| 127 | } | 126 | } |
| 128 | 127 | ||
| 129 | void native_send_call_func_ipi(const struct cpumask *mask) | 128 | void native_send_call_func_ipi(const struct cpumask *mask) |
| @@ -131,7 +130,7 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
| 131 | cpumask_var_t allbutself; | 130 | cpumask_var_t allbutself; |
| 132 | 131 | ||
| 133 | if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { | 132 | if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { |
| 134 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 133 | apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
| 135 | return; | 134 | return; |
| 136 | } | 135 | } |
| 137 | 136 | ||
| @@ -140,9 +139,9 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
| 140 | 139 | ||
| 141 | if (cpumask_equal(mask, allbutself) && | 140 | if (cpumask_equal(mask, allbutself) && |
| 142 | cpumask_equal(cpu_online_mask, cpu_callout_mask)) | 141 | cpumask_equal(cpu_online_mask, cpu_callout_mask)) |
| 143 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 142 | apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
| 144 | else | 143 | else |
| 145 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 144 | apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
| 146 | 145 | ||
| 147 | free_cpumask_var(allbutself); | 146 | free_cpumask_var(allbutself); |
| 148 | } | 147 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb1a3b1fc87f..58d24ef917d8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * x86 SMP booting functions | 2 | * x86 SMP booting functions |
| 3 | * | 3 | * |
| 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
| 5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | 5 | * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
| 6 | * Copyright 2001 Andi Kleen, SuSE Labs. | 6 | * Copyright 2001 Andi Kleen, SuSE Labs. |
| 7 | * | 7 | * |
| 8 | * Much of the core SMP work is based on previous work by Thomas Radke, to | 8 | * Much of the core SMP work is based on previous work by Thomas Radke, to |
| @@ -53,7 +53,6 @@ | |||
| 53 | #include <asm/nmi.h> | 53 | #include <asm/nmi.h> |
| 54 | #include <asm/irq.h> | 54 | #include <asm/irq.h> |
| 55 | #include <asm/idle.h> | 55 | #include <asm/idle.h> |
| 56 | #include <asm/smp.h> | ||
| 57 | #include <asm/trampoline.h> | 56 | #include <asm/trampoline.h> |
| 58 | #include <asm/cpu.h> | 57 | #include <asm/cpu.h> |
| 59 | #include <asm/numa.h> | 58 | #include <asm/numa.h> |
| @@ -61,13 +60,12 @@ | |||
| 61 | #include <asm/tlbflush.h> | 60 | #include <asm/tlbflush.h> |
| 62 | #include <asm/mtrr.h> | 61 | #include <asm/mtrr.h> |
| 63 | #include <asm/vmi.h> | 62 | #include <asm/vmi.h> |
| 64 | #include <asm/genapic.h> | 63 | #include <asm/apic.h> |
| 65 | #include <asm/setup.h> | 64 | #include <asm/setup.h> |
| 65 | #include <asm/uv/uv.h> | ||
| 66 | #include <linux/mc146818rtc.h> | 66 | #include <linux/mc146818rtc.h> |
| 67 | 67 | ||
| 68 | #include <mach_apic.h> | 68 | #include <asm/smpboot_hooks.h> |
| 69 | #include <mach_wakecpu.h> | ||
| 70 | #include <smpboot_hooks.h> | ||
| 71 | 69 | ||
| 72 | #ifdef CONFIG_X86_32 | 70 | #ifdef CONFIG_X86_32 |
| 73 | u8 apicid_2_node[MAX_APICID]; | 71 | u8 apicid_2_node[MAX_APICID]; |
| @@ -103,29 +101,20 @@ EXPORT_SYMBOL(smp_num_siblings); | |||
| 103 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 101 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; |
| 104 | 102 | ||
| 105 | /* representing HT siblings of each logical CPU */ | 103 | /* representing HT siblings of each logical CPU */ |
| 106 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); | 104 | DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); |
| 107 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); | 105 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
| 108 | 106 | ||
| 109 | /* representing HT and core siblings of each logical CPU */ | 107 | /* representing HT and core siblings of each logical CPU */ |
| 110 | DEFINE_PER_CPU(cpumask_t, cpu_core_map); | 108 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); |
| 111 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); | 109 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
| 112 | 110 | ||
| 113 | /* Per CPU bogomips and other parameters */ | 111 | /* Per CPU bogomips and other parameters */ |
| 114 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 112 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
| 115 | EXPORT_PER_CPU_SYMBOL(cpu_info); | 113 | EXPORT_PER_CPU_SYMBOL(cpu_info); |
| 116 | 114 | ||
| 117 | static atomic_t init_deasserted; | 115 | atomic_t init_deasserted; |
| 118 | |||
| 119 | |||
| 120 | /* Set if we find a B stepping CPU */ | ||
| 121 | static int __cpuinitdata smp_b_stepping; | ||
| 122 | 116 | ||
| 123 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) | 117 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) |
| 124 | |||
| 125 | /* which logical CPUs are on which nodes */ | ||
| 126 | cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly = | ||
| 127 | { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; | ||
| 128 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
| 129 | /* which node each logical CPU is on */ | 118 | /* which node each logical CPU is on */ |
| 130 | int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; | 119 | int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; |
| 131 | EXPORT_SYMBOL(cpu_to_node_map); | 120 | EXPORT_SYMBOL(cpu_to_node_map); |
| @@ -134,7 +123,7 @@ EXPORT_SYMBOL(cpu_to_node_map); | |||
| 134 | static void map_cpu_to_node(int cpu, int node) | 123 | static void map_cpu_to_node(int cpu, int node) |
| 135 | { | 124 | { |
| 136 | printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); | 125 | printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); |
| 137 | cpumask_set_cpu(cpu, &node_to_cpumask_map[node]); | 126 | cpumask_set_cpu(cpu, node_to_cpumask_map[node]); |
| 138 | cpu_to_node_map[cpu] = node; | 127 | cpu_to_node_map[cpu] = node; |
| 139 | } | 128 | } |
| 140 | 129 | ||
| @@ -145,7 +134,7 @@ static void unmap_cpu_to_node(int cpu) | |||
| 145 | 134 | ||
| 146 | printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); | 135 | printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); |
| 147 | for (node = 0; node < MAX_NUMNODES; node++) | 136 | for (node = 0; node < MAX_NUMNODES; node++) |
| 148 | cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]); | 137 | cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); |
| 149 | cpu_to_node_map[cpu] = 0; | 138 | cpu_to_node_map[cpu] = 0; |
| 150 | } | 139 | } |
| 151 | #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ | 140 | #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ |
| @@ -163,7 +152,7 @@ static void map_cpu_to_logical_apicid(void) | |||
| 163 | { | 152 | { |
| 164 | int cpu = smp_processor_id(); | 153 | int cpu = smp_processor_id(); |
| 165 | int apicid = logical_smp_processor_id(); | 154 | int apicid = logical_smp_processor_id(); |
| 166 | int node = apicid_to_node(apicid); | 155 | int node = apic->apicid_to_node(apicid); |
| 167 | 156 | ||
| 168 | if (!node_online(node)) | 157 | if (!node_online(node)) |
| 169 | node = first_online_node; | 158 | node = first_online_node; |
| @@ -196,7 +185,8 @@ static void __cpuinit smp_callin(void) | |||
| 196 | * our local APIC. We have to wait for the IPI or we'll | 185 | * our local APIC. We have to wait for the IPI or we'll |
| 197 | * lock up on an APIC access. | 186 | * lock up on an APIC access. |
| 198 | */ | 187 | */ |
| 199 | wait_for_init_deassert(&init_deasserted); | 188 | if (apic->wait_for_init_deassert) |
| 189 | apic->wait_for_init_deassert(&init_deasserted); | ||
| 200 | 190 | ||
| 201 | /* | 191 | /* |
| 202 | * (This works even if the APIC is not enabled.) | 192 | * (This works even if the APIC is not enabled.) |
| @@ -243,7 +233,8 @@ static void __cpuinit smp_callin(void) | |||
| 243 | */ | 233 | */ |
| 244 | 234 | ||
| 245 | pr_debug("CALLIN, before setup_local_APIC().\n"); | 235 | pr_debug("CALLIN, before setup_local_APIC().\n"); |
| 246 | smp_callin_clear_local_apic(); | 236 | if (apic->smp_callin_clear_local_apic) |
| 237 | apic->smp_callin_clear_local_apic(); | ||
| 247 | setup_local_APIC(); | 238 | setup_local_APIC(); |
| 248 | end_local_APIC_setup(); | 239 | end_local_APIC_setup(); |
| 249 | map_cpu_to_logical_apicid(); | 240 | map_cpu_to_logical_apicid(); |
| @@ -271,8 +262,6 @@ static void __cpuinit smp_callin(void) | |||
| 271 | cpumask_set_cpu(cpuid, cpu_callin_mask); | 262 | cpumask_set_cpu(cpuid, cpu_callin_mask); |
| 272 | } | 263 | } |
| 273 | 264 | ||
| 274 | static int __cpuinitdata unsafe_smp; | ||
| 275 | |||
| 276 | /* | 265 | /* |
| 277 | * Activate a secondary processor. | 266 | * Activate a secondary processor. |
| 278 | */ | 267 | */ |
| @@ -307,7 +296,7 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 307 | __flush_tlb_all(); | 296 | __flush_tlb_all(); |
| 308 | #endif | 297 | #endif |
| 309 | 298 | ||
| 310 | /* This must be done before setting cpu_online_map */ | 299 | /* This must be done before setting cpu_online_mask */ |
| 311 | set_cpu_sibling_map(raw_smp_processor_id()); | 300 | set_cpu_sibling_map(raw_smp_processor_id()); |
| 312 | wmb(); | 301 | wmb(); |
| 313 | 302 | ||
| @@ -340,75 +329,22 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 340 | cpu_idle(); | 329 | cpu_idle(); |
| 341 | } | 330 | } |
| 342 | 331 | ||
| 343 | static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) | 332 | #ifdef CONFIG_CPUMASK_OFFSTACK |
| 333 | /* In this case, llc_shared_map is a pointer to a cpumask. */ | ||
| 334 | static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, | ||
| 335 | const struct cpuinfo_x86 *src) | ||
| 344 | { | 336 | { |
| 345 | /* | 337 | struct cpumask *llc = dst->llc_shared_map; |
| 346 | * Mask B, Pentium, but not Pentium MMX | 338 | *dst = *src; |
| 347 | */ | 339 | dst->llc_shared_map = llc; |
| 348 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
| 349 | c->x86 == 5 && | ||
| 350 | c->x86_mask >= 1 && c->x86_mask <= 4 && | ||
| 351 | c->x86_model <= 3) | ||
| 352 | /* | ||
| 353 | * Remember we have B step Pentia with bugs | ||
| 354 | */ | ||
| 355 | smp_b_stepping = 1; | ||
| 356 | |||
| 357 | /* | ||
| 358 | * Certain Athlons might work (for various values of 'work') in SMP | ||
| 359 | * but they are not certified as MP capable. | ||
| 360 | */ | ||
| 361 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { | ||
| 362 | |||
| 363 | if (num_possible_cpus() == 1) | ||
| 364 | goto valid_k7; | ||
| 365 | |||
| 366 | /* Athlon 660/661 is valid. */ | ||
| 367 | if ((c->x86_model == 6) && ((c->x86_mask == 0) || | ||
| 368 | (c->x86_mask == 1))) | ||
| 369 | goto valid_k7; | ||
| 370 | |||
| 371 | /* Duron 670 is valid */ | ||
| 372 | if ((c->x86_model == 7) && (c->x86_mask == 0)) | ||
| 373 | goto valid_k7; | ||
| 374 | |||
| 375 | /* | ||
| 376 | * Athlon 662, Duron 671, and Athlon >model 7 have capability | ||
| 377 | * bit. It's worth noting that the A5 stepping (662) of some | ||
| 378 | * Athlon XP's have the MP bit set. | ||
| 379 | * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for | ||
| 380 | * more. | ||
| 381 | */ | ||
| 382 | if (((c->x86_model == 6) && (c->x86_mask >= 2)) || | ||
| 383 | ((c->x86_model == 7) && (c->x86_mask >= 1)) || | ||
| 384 | (c->x86_model > 7)) | ||
| 385 | if (cpu_has_mp) | ||
| 386 | goto valid_k7; | ||
| 387 | |||
| 388 | /* If we get here, not a certified SMP capable AMD system. */ | ||
| 389 | unsafe_smp = 1; | ||
| 390 | } | ||
| 391 | |||
| 392 | valid_k7: | ||
| 393 | ; | ||
| 394 | } | 340 | } |
| 395 | 341 | #else | |
| 396 | static void __cpuinit smp_checks(void) | 342 | static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, |
| 343 | const struct cpuinfo_x86 *src) | ||
| 397 | { | 344 | { |
| 398 | if (smp_b_stepping) | 345 | *dst = *src; |
| 399 | printk(KERN_WARNING "WARNING: SMP operation may be unreliable" | ||
| 400 | "with B stepping processors.\n"); | ||
| 401 | |||
| 402 | /* | ||
| 403 | * Don't taint if we are running SMP kernel on a single non-MP | ||
| 404 | * approved Athlon | ||
| 405 | */ | ||
| 406 | if (unsafe_smp && num_online_cpus() > 1) { | ||
| 407 | printk(KERN_INFO "WARNING: This combination of AMD" | ||
| 408 | "processors is not suitable for SMP.\n"); | ||
| 409 | add_taint(TAINT_UNSAFE_SMP); | ||
| 410 | } | ||
| 411 | } | 346 | } |
| 347 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | ||
| 412 | 348 | ||
| 413 | /* | 349 | /* |
| 414 | * The bootstrap kernel entry code has set these up. Save them for | 350 | * The bootstrap kernel entry code has set these up. Save them for |
| @@ -419,11 +355,10 @@ void __cpuinit smp_store_cpu_info(int id) | |||
| 419 | { | 355 | { |
| 420 | struct cpuinfo_x86 *c = &cpu_data(id); | 356 | struct cpuinfo_x86 *c = &cpu_data(id); |
| 421 | 357 | ||
| 422 | *c = boot_cpu_data; | 358 | copy_cpuinfo_x86(c, &boot_cpu_data); |
| 423 | c->cpu_index = id; | 359 | c->cpu_index = id; |
| 424 | if (id != 0) | 360 | if (id != 0) |
| 425 | identify_secondary_cpu(c); | 361 | identify_secondary_cpu(c); |
| 426 | smp_apply_quirks(c); | ||
| 427 | } | 362 | } |
| 428 | 363 | ||
| 429 | 364 | ||
| @@ -444,15 +379,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 444 | cpumask_set_cpu(cpu, cpu_sibling_mask(i)); | 379 | cpumask_set_cpu(cpu, cpu_sibling_mask(i)); |
| 445 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 380 | cpumask_set_cpu(i, cpu_core_mask(cpu)); |
| 446 | cpumask_set_cpu(cpu, cpu_core_mask(i)); | 381 | cpumask_set_cpu(cpu, cpu_core_mask(i)); |
| 447 | cpumask_set_cpu(i, &c->llc_shared_map); | 382 | cpumask_set_cpu(i, c->llc_shared_map); |
| 448 | cpumask_set_cpu(cpu, &o->llc_shared_map); | 383 | cpumask_set_cpu(cpu, o->llc_shared_map); |
| 449 | } | 384 | } |
| 450 | } | 385 | } |
| 451 | } else { | 386 | } else { |
| 452 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); | 387 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); |
| 453 | } | 388 | } |
| 454 | 389 | ||
| 455 | cpumask_set_cpu(cpu, &c->llc_shared_map); | 390 | cpumask_set_cpu(cpu, c->llc_shared_map); |
| 456 | 391 | ||
| 457 | if (current_cpu_data.x86_max_cores == 1) { | 392 | if (current_cpu_data.x86_max_cores == 1) { |
| 458 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); | 393 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); |
| @@ -463,8 +398,8 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 463 | for_each_cpu(i, cpu_sibling_setup_mask) { | 398 | for_each_cpu(i, cpu_sibling_setup_mask) { |
| 464 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 399 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
| 465 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 400 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
| 466 | cpumask_set_cpu(i, &c->llc_shared_map); | 401 | cpumask_set_cpu(i, c->llc_shared_map); |
| 467 | cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map); | 402 | cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); |
| 468 | } | 403 | } |
| 469 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { | 404 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { |
| 470 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 405 | cpumask_set_cpu(i, cpu_core_mask(cpu)); |
| @@ -502,12 +437,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
| 502 | if (sched_mc_power_savings || sched_smt_power_savings) | 437 | if (sched_mc_power_savings || sched_smt_power_savings) |
| 503 | return cpu_core_mask(cpu); | 438 | return cpu_core_mask(cpu); |
| 504 | else | 439 | else |
| 505 | return &c->llc_shared_map; | 440 | return c->llc_shared_map; |
| 506 | } | ||
| 507 | |||
| 508 | cpumask_t cpu_coregroup_map(int cpu) | ||
| 509 | { | ||
| 510 | return *cpu_coregroup_mask(cpu); | ||
| 511 | } | 441 | } |
| 512 | 442 | ||
| 513 | static void impress_friends(void) | 443 | static void impress_friends(void) |
| @@ -583,7 +513,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
| 583 | /* Target chip */ | 513 | /* Target chip */ |
| 584 | /* Boot on the stack */ | 514 | /* Boot on the stack */ |
| 585 | /* Kick the second */ | 515 | /* Kick the second */ |
| 586 | apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); | 516 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); |
| 587 | 517 | ||
| 588 | pr_debug("Waiting for send to finish...\n"); | 518 | pr_debug("Waiting for send to finish...\n"); |
| 589 | send_status = safe_apic_wait_icr_idle(); | 519 | send_status = safe_apic_wait_icr_idle(); |
| @@ -614,12 +544,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
| 614 | unsigned long send_status, accept_status = 0; | 544 | unsigned long send_status, accept_status = 0; |
| 615 | int maxlvt, num_starts, j; | 545 | int maxlvt, num_starts, j; |
| 616 | 546 | ||
| 617 | if (get_uv_system_type() == UV_NON_UNIQUE_APIC) { | ||
| 618 | send_status = uv_wakeup_secondary(phys_apicid, start_eip); | ||
| 619 | atomic_set(&init_deasserted, 1); | ||
| 620 | return send_status; | ||
| 621 | } | ||
| 622 | |||
| 623 | maxlvt = lapic_get_maxlvt(); | 547 | maxlvt = lapic_get_maxlvt(); |
| 624 | 548 | ||
| 625 | /* | 549 | /* |
| @@ -745,78 +669,23 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
| 745 | complete(&c_idle->done); | 669 | complete(&c_idle->done); |
| 746 | } | 670 | } |
| 747 | 671 | ||
| 748 | #ifdef CONFIG_X86_64 | ||
| 749 | |||
| 750 | /* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ | ||
| 751 | static void __ref free_bootmem_pda(struct x8664_pda *oldpda) | ||
| 752 | { | ||
| 753 | if (!after_bootmem) | ||
| 754 | free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); | ||
| 755 | } | ||
| 756 | |||
| 757 | /* | ||
| 758 | * Allocate node local memory for the AP pda. | ||
| 759 | * | ||
| 760 | * Must be called after the _cpu_pda pointer table is initialized. | ||
| 761 | */ | ||
| 762 | int __cpuinit get_local_pda(int cpu) | ||
| 763 | { | ||
| 764 | struct x8664_pda *oldpda, *newpda; | ||
| 765 | unsigned long size = sizeof(struct x8664_pda); | ||
| 766 | int node = cpu_to_node(cpu); | ||
| 767 | |||
| 768 | if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) | ||
| 769 | return 0; | ||
| 770 | |||
| 771 | oldpda = cpu_pda(cpu); | ||
| 772 | newpda = kmalloc_node(size, GFP_ATOMIC, node); | ||
| 773 | if (!newpda) { | ||
| 774 | printk(KERN_ERR "Could not allocate node local PDA " | ||
| 775 | "for CPU %d on node %d\n", cpu, node); | ||
| 776 | |||
| 777 | if (oldpda) | ||
| 778 | return 0; /* have a usable pda */ | ||
| 779 | else | ||
| 780 | return -1; | ||
| 781 | } | ||
| 782 | |||
| 783 | if (oldpda) { | ||
| 784 | memcpy(newpda, oldpda, size); | ||
| 785 | free_bootmem_pda(oldpda); | ||
| 786 | } | ||
| 787 | |||
| 788 | newpda->in_bootmem = 0; | ||
| 789 | cpu_pda(cpu) = newpda; | ||
| 790 | return 0; | ||
| 791 | } | ||
| 792 | #endif /* CONFIG_X86_64 */ | ||
| 793 | |||
| 794 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | ||
| 795 | /* | 672 | /* |
| 796 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 673 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
| 797 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 674 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
| 798 | * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. | 675 | * Returns zero if CPU booted OK, else error code from |
| 676 | * ->wakeup_secondary_cpu. | ||
| 799 | */ | 677 | */ |
| 678 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | ||
| 800 | { | 679 | { |
| 801 | unsigned long boot_error = 0; | 680 | unsigned long boot_error = 0; |
| 802 | int timeout; | ||
| 803 | unsigned long start_ip; | 681 | unsigned long start_ip; |
| 804 | unsigned short nmi_high = 0, nmi_low = 0; | 682 | int timeout; |
| 805 | struct create_idle c_idle = { | 683 | struct create_idle c_idle = { |
| 806 | .cpu = cpu, | 684 | .cpu = cpu, |
| 807 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | 685 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), |
| 808 | }; | 686 | }; |
| 809 | INIT_WORK(&c_idle.work, do_fork_idle); | ||
| 810 | 687 | ||
| 811 | #ifdef CONFIG_X86_64 | 688 | INIT_WORK(&c_idle.work, do_fork_idle); |
| 812 | /* Allocate node local memory for AP pdas */ | ||
| 813 | if (cpu > 0) { | ||
| 814 | boot_error = get_local_pda(cpu); | ||
| 815 | if (boot_error) | ||
| 816 | goto restore_state; | ||
| 817 | /* if can't get pda memory, can't start cpu */ | ||
| 818 | } | ||
| 819 | #endif | ||
| 820 | 689 | ||
| 821 | alternatives_smp_switch(1); | 690 | alternatives_smp_switch(1); |
| 822 | 691 | ||
| @@ -847,14 +716,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
| 847 | 716 | ||
| 848 | set_idle_for_cpu(cpu, c_idle.idle); | 717 | set_idle_for_cpu(cpu, c_idle.idle); |
| 849 | do_rest: | 718 | do_rest: |
| 850 | #ifdef CONFIG_X86_32 | ||
| 851 | per_cpu(current_task, cpu) = c_idle.idle; | 719 | per_cpu(current_task, cpu) = c_idle.idle; |
| 852 | init_gdt(cpu); | 720 | #ifdef CONFIG_X86_32 |
| 853 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 721 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
| 854 | irq_ctx_init(cpu); | 722 | irq_ctx_init(cpu); |
| 855 | #else | 723 | #else |
| 856 | cpu_pda(cpu)->pcurrent = c_idle.idle; | ||
| 857 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 724 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
| 725 | initial_gs = per_cpu_offset(cpu); | ||
| 726 | per_cpu(kernel_stack, cpu) = | ||
| 727 | (unsigned long)task_stack_page(c_idle.idle) - | ||
| 728 | KERNEL_STACK_OFFSET + THREAD_SIZE; | ||
| 858 | #endif | 729 | #endif |
| 859 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 730 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
| 860 | initial_code = (unsigned long)start_secondary; | 731 | initial_code = (unsigned long)start_secondary; |
| @@ -878,8 +749,6 @@ do_rest: | |||
| 878 | 749 | ||
| 879 | pr_debug("Setting warm reset code and vector.\n"); | 750 | pr_debug("Setting warm reset code and vector.\n"); |
| 880 | 751 | ||
| 881 | store_NMI_vector(&nmi_high, &nmi_low); | ||
| 882 | |||
| 883 | smpboot_setup_warm_reset_vector(start_ip); | 752 | smpboot_setup_warm_reset_vector(start_ip); |
| 884 | /* | 753 | /* |
| 885 | * Be paranoid about clearing APIC errors. | 754 | * Be paranoid about clearing APIC errors. |
| @@ -891,9 +760,13 @@ do_rest: | |||
| 891 | } | 760 | } |
| 892 | 761 | ||
| 893 | /* | 762 | /* |
| 894 | * Starting actual IPI sequence... | 763 | * Kick the secondary CPU. Use the method in the APIC driver |
| 764 | * if it's defined - or use an INIT boot APIC message otherwise: | ||
| 895 | */ | 765 | */ |
| 896 | boot_error = wakeup_secondary_cpu(apicid, start_ip); | 766 | if (apic->wakeup_secondary_cpu) |
| 767 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); | ||
| 768 | else | ||
| 769 | boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); | ||
| 897 | 770 | ||
| 898 | if (!boot_error) { | 771 | if (!boot_error) { |
| 899 | /* | 772 | /* |
| @@ -927,13 +800,11 @@ do_rest: | |||
| 927 | else | 800 | else |
| 928 | /* trampoline code not run */ | 801 | /* trampoline code not run */ |
| 929 | printk(KERN_ERR "Not responding.\n"); | 802 | printk(KERN_ERR "Not responding.\n"); |
| 930 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) | 803 | if (apic->inquire_remote_apic) |
| 931 | inquire_remote_apic(apicid); | 804 | apic->inquire_remote_apic(apicid); |
| 932 | } | 805 | } |
| 933 | } | 806 | } |
| 934 | #ifdef CONFIG_X86_64 | 807 | |
| 935 | restore_state: | ||
| 936 | #endif | ||
| 937 | if (boot_error) { | 808 | if (boot_error) { |
| 938 | /* Try to put things back the way they were before ... */ | 809 | /* Try to put things back the way they were before ... */ |
| 939 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ | 810 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ |
| @@ -961,7 +832,7 @@ restore_state: | |||
| 961 | 832 | ||
| 962 | int __cpuinit native_cpu_up(unsigned int cpu) | 833 | int __cpuinit native_cpu_up(unsigned int cpu) |
| 963 | { | 834 | { |
| 964 | int apicid = cpu_present_to_apicid(cpu); | 835 | int apicid = apic->cpu_present_to_apicid(cpu); |
| 965 | unsigned long flags; | 836 | unsigned long flags; |
| 966 | int err; | 837 | int err; |
| 967 | 838 | ||
| @@ -1033,9 +904,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
| 1033 | */ | 904 | */ |
| 1034 | static __init void disable_smp(void) | 905 | static __init void disable_smp(void) |
| 1035 | { | 906 | { |
| 1036 | /* use the read/write pointers to the present and possible maps */ | 907 | init_cpu_present(cpumask_of(0)); |
| 1037 | cpumask_copy(&cpu_present_map, cpumask_of(0)); | 908 | init_cpu_possible(cpumask_of(0)); |
| 1038 | cpumask_copy(&cpu_possible_map, cpumask_of(0)); | ||
| 1039 | smpboot_clear_io_apic_irqs(); | 909 | smpboot_clear_io_apic_irqs(); |
| 1040 | 910 | ||
| 1041 | if (smp_found_config) | 911 | if (smp_found_config) |
| @@ -1054,14 +924,14 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
| 1054 | { | 924 | { |
| 1055 | preempt_disable(); | 925 | preempt_disable(); |
| 1056 | 926 | ||
| 1057 | #if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | 927 | #if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) |
| 1058 | if (def_to_bigsmp && nr_cpu_ids > 8) { | 928 | if (def_to_bigsmp && nr_cpu_ids > 8) { |
| 1059 | unsigned int cpu; | 929 | unsigned int cpu; |
| 1060 | unsigned nr; | 930 | unsigned nr; |
| 1061 | 931 | ||
| 1062 | printk(KERN_WARNING | 932 | printk(KERN_WARNING |
| 1063 | "More than 8 CPUs detected - skipping them.\n" | 933 | "More than 8 CPUs detected - skipping them.\n" |
| 1064 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | 934 | "Use CONFIG_X86_BIGSMP.\n"); |
| 1065 | 935 | ||
| 1066 | nr = 0; | 936 | nr = 0; |
| 1067 | for_each_present_cpu(cpu) { | 937 | for_each_present_cpu(cpu) { |
| @@ -1107,7 +977,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
| 1107 | * Should not be necessary because the MP table should list the boot | 977 | * Should not be necessary because the MP table should list the boot |
| 1108 | * CPU too, but we do it for the sake of robustness anyway. | 978 | * CPU too, but we do it for the sake of robustness anyway. |
| 1109 | */ | 979 | */ |
| 1110 | if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { | 980 | if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { |
| 1111 | printk(KERN_NOTICE | 981 | printk(KERN_NOTICE |
| 1112 | "weird, boot CPU (#%d) not listed by the BIOS.\n", | 982 | "weird, boot CPU (#%d) not listed by the BIOS.\n", |
| 1113 | boot_cpu_physical_apicid); | 983 | boot_cpu_physical_apicid); |
| @@ -1125,6 +995,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
| 1125 | printk(KERN_ERR "... forcing use of dummy APIC emulation." | 995 | printk(KERN_ERR "... forcing use of dummy APIC emulation." |
| 1126 | "(tell your hw vendor)\n"); | 996 | "(tell your hw vendor)\n"); |
| 1127 | smpboot_clear_io_apic(); | 997 | smpboot_clear_io_apic(); |
| 998 | arch_disable_smp_support(); | ||
| 1128 | return -1; | 999 | return -1; |
| 1129 | } | 1000 | } |
| 1130 | 1001 | ||
| @@ -1166,6 +1037,8 @@ static void __init smp_cpu_index_default(void) | |||
| 1166 | */ | 1037 | */ |
| 1167 | void __init native_smp_prepare_cpus(unsigned int max_cpus) | 1038 | void __init native_smp_prepare_cpus(unsigned int max_cpus) |
| 1168 | { | 1039 | { |
| 1040 | unsigned int i; | ||
| 1041 | |||
| 1169 | preempt_disable(); | 1042 | preempt_disable(); |
| 1170 | smp_cpu_index_default(); | 1043 | smp_cpu_index_default(); |
| 1171 | current_cpu_data = boot_cpu_data; | 1044 | current_cpu_data = boot_cpu_data; |
| @@ -1179,11 +1052,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1179 | boot_cpu_logical_apicid = logical_smp_processor_id(); | 1052 | boot_cpu_logical_apicid = logical_smp_processor_id(); |
| 1180 | #endif | 1053 | #endif |
| 1181 | current_thread_info()->cpu = 0; /* needed? */ | 1054 | current_thread_info()->cpu = 0; /* needed? */ |
| 1055 | for_each_possible_cpu(i) { | ||
| 1056 | alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); | ||
| 1057 | alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); | ||
| 1058 | alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); | ||
| 1059 | cpumask_clear(per_cpu(cpu_core_map, i)); | ||
| 1060 | cpumask_clear(per_cpu(cpu_sibling_map, i)); | ||
| 1061 | cpumask_clear(cpu_data(i).llc_shared_map); | ||
| 1062 | } | ||
| 1182 | set_cpu_sibling_map(0); | 1063 | set_cpu_sibling_map(0); |
| 1183 | 1064 | ||
| 1184 | #ifdef CONFIG_X86_64 | ||
| 1185 | enable_IR_x2apic(); | 1065 | enable_IR_x2apic(); |
| 1186 | setup_apic_routing(); | 1066 | #ifdef CONFIG_X86_64 |
| 1067 | default_setup_apic_routing(); | ||
| 1187 | #endif | 1068 | #endif |
| 1188 | 1069 | ||
| 1189 | if (smp_sanity_check(max_cpus) < 0) { | 1070 | if (smp_sanity_check(max_cpus) < 0) { |
| @@ -1207,18 +1088,18 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1207 | */ | 1088 | */ |
| 1208 | setup_local_APIC(); | 1089 | setup_local_APIC(); |
| 1209 | 1090 | ||
| 1210 | #ifdef CONFIG_X86_64 | ||
| 1211 | /* | 1091 | /* |
| 1212 | * Enable IO APIC before setting up error vector | 1092 | * Enable IO APIC before setting up error vector |
| 1213 | */ | 1093 | */ |
| 1214 | if (!skip_ioapic_setup && nr_ioapics) | 1094 | if (!skip_ioapic_setup && nr_ioapics) |
| 1215 | enable_IO_APIC(); | 1095 | enable_IO_APIC(); |
| 1216 | #endif | 1096 | |
| 1217 | end_local_APIC_setup(); | 1097 | end_local_APIC_setup(); |
| 1218 | 1098 | ||
| 1219 | map_cpu_to_logical_apicid(); | 1099 | map_cpu_to_logical_apicid(); |
| 1220 | 1100 | ||
| 1221 | setup_portio_remap(); | 1101 | if (apic->setup_portio_remap) |
| 1102 | apic->setup_portio_remap(); | ||
| 1222 | 1103 | ||
| 1223 | smpboot_setup_io_apic(); | 1104 | smpboot_setup_io_apic(); |
| 1224 | /* | 1105 | /* |
| @@ -1240,10 +1121,7 @@ out: | |||
| 1240 | void __init native_smp_prepare_boot_cpu(void) | 1121 | void __init native_smp_prepare_boot_cpu(void) |
| 1241 | { | 1122 | { |
| 1242 | int me = smp_processor_id(); | 1123 | int me = smp_processor_id(); |
| 1243 | #ifdef CONFIG_X86_32 | 1124 | switch_to_new_gdt(me); |
| 1244 | init_gdt(me); | ||
| 1245 | #endif | ||
| 1246 | switch_to_new_gdt(); | ||
| 1247 | /* already set me in cpu_online_mask in boot_cpu_init() */ | 1125 | /* already set me in cpu_online_mask in boot_cpu_init() */ |
| 1248 | cpumask_set_cpu(me, cpu_callout_mask); | 1126 | cpumask_set_cpu(me, cpu_callout_mask); |
| 1249 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1127 | per_cpu(cpu_state, me) = CPU_ONLINE; |
| @@ -1254,7 +1132,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
| 1254 | pr_debug("Boot done.\n"); | 1132 | pr_debug("Boot done.\n"); |
| 1255 | 1133 | ||
| 1256 | impress_friends(); | 1134 | impress_friends(); |
| 1257 | smp_checks(); | ||
| 1258 | #ifdef CONFIG_X86_IO_APIC | 1135 | #ifdef CONFIG_X86_IO_APIC |
| 1259 | setup_ioapic_dest(); | 1136 | setup_ioapic_dest(); |
| 1260 | #endif | 1137 | #endif |
| @@ -1271,11 +1148,11 @@ early_param("possible_cpus", _setup_possible_cpus); | |||
| 1271 | 1148 | ||
| 1272 | 1149 | ||
| 1273 | /* | 1150 | /* |
| 1274 | * cpu_possible_map should be static, it cannot change as cpu's | 1151 | * cpu_possible_mask should be static, it cannot change as cpu's |
| 1275 | * are onlined, or offlined. The reason is per-cpu data-structures | 1152 | * are onlined, or offlined. The reason is per-cpu data-structures |
| 1276 | * are allocated by some modules at init time, and dont expect to | 1153 | * are allocated by some modules at init time, and dont expect to |
| 1277 | * do this dynamically on cpu arrival/departure. | 1154 | * do this dynamically on cpu arrival/departure. |
| 1278 | * cpu_present_map on the other hand can change dynamically. | 1155 | * cpu_present_mask on the other hand can change dynamically. |
| 1279 | * In case when cpu_hotplug is not compiled, then we resort to current | 1156 | * In case when cpu_hotplug is not compiled, then we resort to current |
| 1280 | * behaviour, which is cpu_possible == cpu_present. | 1157 | * behaviour, which is cpu_possible == cpu_present. |
| 1281 | * - Ashok Raj | 1158 | * - Ashok Raj |
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 397e309839dd..000000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null | |||
| @@ -1,30 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * SMP stuff which is common to all sub-architectures. | ||
| 3 | */ | ||
| 4 | #include <linux/module.h> | ||
| 5 | #include <asm/smp.h> | ||
| 6 | |||
| 7 | #ifdef CONFIG_X86_32 | ||
| 8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | ||
| 9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Initialize the CPU's GDT. This is either the boot CPU doing itself | ||
| 13 | * (still using the master per-cpu area), or a CPU doing it for a | ||
| 14 | * secondary which will soon come up. | ||
| 15 | */ | ||
| 16 | __cpuinit void init_gdt(int cpu) | ||
| 17 | { | ||
| 18 | struct desc_struct gdt; | ||
| 19 | |||
| 20 | pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, | ||
| 21 | 0x2 | DESCTYPE_S, 0x8); | ||
| 22 | gdt.s = 1; | ||
| 23 | |||
| 24 | write_gdt_entry(get_cpu_gdt_table(cpu), | ||
| 25 | GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); | ||
| 26 | |||
| 27 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | ||
| 28 | per_cpu(cpu_number, cpu) = cpu; | ||
| 29 | } | ||
| 30 | #endif | ||
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 10786af95545..f7bddc2e37d1 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Stack trace management functions | 2 | * Stack trace management functions |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 4 | * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
| 5 | */ | 5 | */ |
| 6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/stacktrace.h> | 7 | #include <linux/stacktrace.h> |
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c deleted file mode 100644 index 7b987852e876..000000000000 --- a/arch/x86/kernel/summit_32.c +++ /dev/null | |||
| @@ -1,188 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * IBM Summit-Specific Code | ||
| 3 | * | ||
| 4 | * Written By: Matthew Dobson, IBM Corporation | ||
| 5 | * | ||
| 6 | * Copyright (c) 2003 IBM Corp. | ||
| 7 | * | ||
| 8 | * All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License as published by | ||
| 12 | * the Free Software Foundation; either version 2 of the License, or (at | ||
| 13 | * your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, but | ||
| 16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 18 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 19 | * details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 24 | * | ||
| 25 | * Send feedback to <colpatch@us.ibm.com> | ||
| 26 | * | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include <linux/mm.h> | ||
| 30 | #include <linux/init.h> | ||
| 31 | #include <asm/io.h> | ||
| 32 | #include <asm/bios_ebda.h> | ||
| 33 | #include <asm/summit/mpparse.h> | ||
| 34 | |||
| 35 | static struct rio_table_hdr *rio_table_hdr __initdata; | ||
| 36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; | ||
| 37 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; | ||
| 38 | |||
| 39 | #ifndef CONFIG_X86_NUMAQ | ||
| 40 | static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; | ||
| 41 | #endif | ||
| 42 | |||
| 43 | static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | ||
| 44 | { | ||
| 45 | int twister = 0, node = 0; | ||
| 46 | int i, bus, num_buses; | ||
| 47 | |||
| 48 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { | ||
| 49 | if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) { | ||
| 50 | twister = rio_devs[i]->owner_id; | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | } | ||
| 54 | if (i == rio_table_hdr->num_rio_dev) { | ||
| 55 | printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); | ||
| 56 | return last_bus; | ||
| 57 | } | ||
| 58 | |||
| 59 | for (i = 0; i < rio_table_hdr->num_scal_dev; i++) { | ||
| 60 | if (scal_devs[i]->node_id == twister) { | ||
| 61 | node = scal_devs[i]->node_id; | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | if (i == rio_table_hdr->num_scal_dev) { | ||
| 66 | printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); | ||
| 67 | return last_bus; | ||
| 68 | } | ||
| 69 | |||
| 70 | switch (rio_devs[wpeg_num]->type) { | ||
| 71 | case CompatWPEG: | ||
| 72 | /* | ||
| 73 | * The Compatibility Winnipeg controls the 2 legacy buses, | ||
| 74 | * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case | ||
| 75 | * a PCI-PCI bridge card is used in either slot: total 5 buses. | ||
| 76 | */ | ||
| 77 | num_buses = 5; | ||
| 78 | break; | ||
| 79 | case AltWPEG: | ||
| 80 | /* | ||
| 81 | * The Alternate Winnipeg controls the 2 133MHz buses [1 slot | ||
| 82 | * each], their 2 "extra" buses, the 100MHz bus [2 slots] and | ||
| 83 | * the "extra" buses for each of those slots: total 7 buses. | ||
| 84 | */ | ||
| 85 | num_buses = 7; | ||
| 86 | break; | ||
| 87 | case LookOutAWPEG: | ||
| 88 | case LookOutBWPEG: | ||
| 89 | /* | ||
| 90 | * A Lookout Winnipeg controls 3 100MHz buses [2 slots each] | ||
| 91 | * & the "extra" buses for each of those slots: total 9 buses. | ||
| 92 | */ | ||
| 93 | num_buses = 9; | ||
| 94 | break; | ||
| 95 | default: | ||
| 96 | printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); | ||
| 97 | return last_bus; | ||
| 98 | } | ||
| 99 | |||
| 100 | for (bus = last_bus; bus < last_bus + num_buses; bus++) | ||
| 101 | mp_bus_id_to_node[bus] = node; | ||
| 102 | return bus; | ||
| 103 | } | ||
| 104 | |||
| 105 | static int __init build_detail_arrays(void) | ||
| 106 | { | ||
| 107 | unsigned long ptr; | ||
| 108 | int i, scal_detail_size, rio_detail_size; | ||
| 109 | |||
| 110 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { | ||
| 111 | printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | switch (rio_table_hdr->version) { | ||
| 116 | default: | ||
| 117 | printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); | ||
| 118 | return 0; | ||
| 119 | case 2: | ||
| 120 | scal_detail_size = 11; | ||
| 121 | rio_detail_size = 13; | ||
| 122 | break; | ||
| 123 | case 3: | ||
| 124 | scal_detail_size = 12; | ||
| 125 | rio_detail_size = 15; | ||
| 126 | break; | ||
| 127 | } | ||
| 128 | |||
| 129 | ptr = (unsigned long)rio_table_hdr + 3; | ||
| 130 | for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) | ||
| 131 | scal_devs[i] = (struct scal_detail *)ptr; | ||
| 132 | |||
| 133 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) | ||
| 134 | rio_devs[i] = (struct rio_detail *)ptr; | ||
| 135 | |||
| 136 | return 1; | ||
| 137 | } | ||
| 138 | |||
| 139 | void __init setup_summit(void) | ||
| 140 | { | ||
| 141 | unsigned long ptr; | ||
| 142 | unsigned short offset; | ||
| 143 | int i, next_wpeg, next_bus = 0; | ||
| 144 | |||
| 145 | /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ | ||
| 146 | ptr = get_bios_ebda(); | ||
| 147 | ptr = (unsigned long)phys_to_virt(ptr); | ||
| 148 | |||
| 149 | rio_table_hdr = NULL; | ||
| 150 | offset = 0x180; | ||
| 151 | while (offset) { | ||
| 152 | /* The block id is stored in the 2nd word */ | ||
| 153 | if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) { | ||
| 154 | /* set the pointer past the offset & block id */ | ||
| 155 | rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | /* The next offset is stored in the 1st word. 0 means no more */ | ||
| 159 | offset = *((unsigned short *)(ptr + offset)); | ||
| 160 | } | ||
| 161 | if (!rio_table_hdr) { | ||
| 162 | printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | |||
| 166 | if (!build_detail_arrays()) | ||
| 167 | return; | ||
| 168 | |||
| 169 | /* The first Winnipeg we're looking for has an index of 0 */ | ||
| 170 | next_wpeg = 0; | ||
| 171 | do { | ||
| 172 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { | ||
| 173 | if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) { | ||
| 174 | /* It's the Winnipeg we're looking for! */ | ||
| 175 | next_bus = setup_pci_node_map_for_wpeg(i, next_bus); | ||
| 176 | next_wpeg++; | ||
| 177 | break; | ||
| 178 | } | ||
| 179 | } | ||
| 180 | /* | ||
| 181 | * If we go through all Rio devices and don't find one with | ||
| 182 | * the next index, it means we've found all the Winnipegs, | ||
| 183 | * and thus all the PCI buses. | ||
| 184 | */ | ||
| 185 | if (i == rio_table_hdr->num_rio_dev) | ||
| 186 | next_wpeg = 0; | ||
| 187 | } while (next_wpeg != 0); | ||
| 188 | } | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index e2e86a08f31d..3bdb64829b82 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | ENTRY(sys_call_table) | 1 | ENTRY(sys_call_table) |
| 2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ | 2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ |
| 3 | .long sys_exit | 3 | .long sys_exit |
| 4 | .long sys_fork | 4 | .long ptregs_fork |
| 5 | .long sys_read | 5 | .long sys_read |
| 6 | .long sys_write | 6 | .long sys_write |
| 7 | .long sys_open /* 5 */ | 7 | .long sys_open /* 5 */ |
| @@ -10,7 +10,7 @@ ENTRY(sys_call_table) | |||
| 10 | .long sys_creat | 10 | .long sys_creat |
| 11 | .long sys_link | 11 | .long sys_link |
| 12 | .long sys_unlink /* 10 */ | 12 | .long sys_unlink /* 10 */ |
| 13 | .long sys_execve | 13 | .long ptregs_execve |
| 14 | .long sys_chdir | 14 | .long sys_chdir |
| 15 | .long sys_time | 15 | .long sys_time |
| 16 | .long sys_mknod | 16 | .long sys_mknod |
| @@ -109,17 +109,17 @@ ENTRY(sys_call_table) | |||
| 109 | .long sys_newlstat | 109 | .long sys_newlstat |
| 110 | .long sys_newfstat | 110 | .long sys_newfstat |
| 111 | .long sys_uname | 111 | .long sys_uname |
| 112 | .long sys_iopl /* 110 */ | 112 | .long ptregs_iopl /* 110 */ |
| 113 | .long sys_vhangup | 113 | .long sys_vhangup |
| 114 | .long sys_ni_syscall /* old "idle" system call */ | 114 | .long sys_ni_syscall /* old "idle" system call */ |
| 115 | .long sys_vm86old | 115 | .long ptregs_vm86old |
| 116 | .long sys_wait4 | 116 | .long sys_wait4 |
| 117 | .long sys_swapoff /* 115 */ | 117 | .long sys_swapoff /* 115 */ |
| 118 | .long sys_sysinfo | 118 | .long sys_sysinfo |
| 119 | .long sys_ipc | 119 | .long sys_ipc |
| 120 | .long sys_fsync | 120 | .long sys_fsync |
| 121 | .long sys_sigreturn | 121 | .long ptregs_sigreturn |
| 122 | .long sys_clone /* 120 */ | 122 | .long ptregs_clone /* 120 */ |
| 123 | .long sys_setdomainname | 123 | .long sys_setdomainname |
| 124 | .long sys_newuname | 124 | .long sys_newuname |
| 125 | .long sys_modify_ldt | 125 | .long sys_modify_ldt |
| @@ -165,14 +165,14 @@ ENTRY(sys_call_table) | |||
| 165 | .long sys_mremap | 165 | .long sys_mremap |
| 166 | .long sys_setresuid16 | 166 | .long sys_setresuid16 |
| 167 | .long sys_getresuid16 /* 165 */ | 167 | .long sys_getresuid16 /* 165 */ |
| 168 | .long sys_vm86 | 168 | .long ptregs_vm86 |
| 169 | .long sys_ni_syscall /* Old sys_query_module */ | 169 | .long sys_ni_syscall /* Old sys_query_module */ |
| 170 | .long sys_poll | 170 | .long sys_poll |
| 171 | .long sys_nfsservctl | 171 | .long sys_nfsservctl |
| 172 | .long sys_setresgid16 /* 170 */ | 172 | .long sys_setresgid16 /* 170 */ |
| 173 | .long sys_getresgid16 | 173 | .long sys_getresgid16 |
| 174 | .long sys_prctl | 174 | .long sys_prctl |
| 175 | .long sys_rt_sigreturn | 175 | .long ptregs_rt_sigreturn |
| 176 | .long sys_rt_sigaction | 176 | .long sys_rt_sigaction |
| 177 | .long sys_rt_sigprocmask /* 175 */ | 177 | .long sys_rt_sigprocmask /* 175 */ |
| 178 | .long sys_rt_sigpending | 178 | .long sys_rt_sigpending |
| @@ -185,11 +185,11 @@ ENTRY(sys_call_table) | |||
| 185 | .long sys_getcwd | 185 | .long sys_getcwd |
| 186 | .long sys_capget | 186 | .long sys_capget |
| 187 | .long sys_capset /* 185 */ | 187 | .long sys_capset /* 185 */ |
| 188 | .long sys_sigaltstack | 188 | .long ptregs_sigaltstack |
| 189 | .long sys_sendfile | 189 | .long sys_sendfile |
| 190 | .long sys_ni_syscall /* reserved for streams1 */ | 190 | .long sys_ni_syscall /* reserved for streams1 */ |
| 191 | .long sys_ni_syscall /* reserved for streams2 */ | 191 | .long sys_ni_syscall /* reserved for streams2 */ |
| 192 | .long sys_vfork /* 190 */ | 192 | .long ptregs_vfork /* 190 */ |
| 193 | .long sys_getrlimit | 193 | .long sys_getrlimit |
| 194 | .long sys_mmap2 | 194 | .long sys_mmap2 |
| 195 | .long sys_truncate64 | 195 | .long sys_truncate64 |
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 3985cac0ed47..5c5d87f0b2e1 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
| @@ -33,12 +33,12 @@ | |||
| 33 | #include <linux/time.h> | 33 | #include <linux/time.h> |
| 34 | #include <linux/mca.h> | 34 | #include <linux/mca.h> |
| 35 | 35 | ||
| 36 | #include <asm/arch_hooks.h> | 36 | #include <asm/setup.h> |
| 37 | #include <asm/hpet.h> | 37 | #include <asm/hpet.h> |
| 38 | #include <asm/time.h> | 38 | #include <asm/time.h> |
| 39 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
| 40 | 40 | ||
| 41 | #include "do_timer.h" | 41 | #include <asm/do_timer.h> |
| 42 | 42 | ||
| 43 | int timer_ack; | 43 | int timer_ack; |
| 44 | 44 | ||
| @@ -118,7 +118,7 @@ void __init hpet_time_init(void) | |||
| 118 | { | 118 | { |
| 119 | if (!hpet_enable()) | 119 | if (!hpet_enable()) |
| 120 | setup_pit_timer(); | 120 | setup_pit_timer(); |
| 121 | time_init_hook(); | 121 | x86_quirk_time_init(); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | /* | 124 | /* |
| @@ -131,7 +131,7 @@ void __init hpet_time_init(void) | |||
| 131 | */ | 131 | */ |
| 132 | void __init time_init(void) | 132 | void __init time_init(void) |
| 133 | { | 133 | { |
| 134 | pre_time_init_hook(); | 134 | x86_quirk_pre_time_init(); |
| 135 | tsc_init(); | 135 | tsc_init(); |
| 136 | late_time_init = choose_time_init(); | 136 | late_time_init = choose_time_init(); |
| 137 | } | 137 | } |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c deleted file mode 100644 index ce5054642247..000000000000 --- a/arch/x86/kernel/tlb_32.c +++ /dev/null | |||
| @@ -1,256 +0,0 @@ | |||
| 1 | #include <linux/spinlock.h> | ||
| 2 | #include <linux/cpu.h> | ||
| 3 | #include <linux/interrupt.h> | ||
| 4 | |||
| 5 | #include <asm/tlbflush.h> | ||
| 6 | |||
| 7 | DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) | ||
| 8 | ____cacheline_aligned = { &init_mm, 0, }; | ||
| 9 | |||
| 10 | /* must come after the send_IPI functions above for inlining */ | ||
| 11 | #include <mach_ipi.h> | ||
| 12 | |||
| 13 | /* | ||
| 14 | * Smarter SMP flushing macros. | ||
| 15 | * c/o Linus Torvalds. | ||
| 16 | * | ||
| 17 | * These mean you can really definitely utterly forget about | ||
| 18 | * writing to user space from interrupts. (Its not allowed anyway). | ||
| 19 | * | ||
| 20 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | ||
| 21 | */ | ||
| 22 | |||
| 23 | static cpumask_t flush_cpumask; | ||
| 24 | static struct mm_struct *flush_mm; | ||
| 25 | static unsigned long flush_va; | ||
| 26 | static DEFINE_SPINLOCK(tlbstate_lock); | ||
| 27 | |||
| 28 | /* | ||
| 29 | * We cannot call mmdrop() because we are in interrupt context, | ||
| 30 | * instead update mm->cpu_vm_mask. | ||
| 31 | * | ||
| 32 | * We need to reload %cr3 since the page tables may be going | ||
| 33 | * away from under us.. | ||
| 34 | */ | ||
| 35 | void leave_mm(int cpu) | ||
| 36 | { | ||
| 37 | BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); | ||
| 38 | cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); | ||
| 39 | load_cr3(swapper_pg_dir); | ||
| 40 | } | ||
| 41 | EXPORT_SYMBOL_GPL(leave_mm); | ||
| 42 | |||
| 43 | /* | ||
| 44 | * | ||
| 45 | * The flush IPI assumes that a thread switch happens in this order: | ||
| 46 | * [cpu0: the cpu that switches] | ||
| 47 | * 1) switch_mm() either 1a) or 1b) | ||
| 48 | * 1a) thread switch to a different mm | ||
| 49 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
| 50 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
| 51 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | ||
| 52 | * for the wrong mm, and in the worst case we perform a superfluous | ||
| 53 | * tlb flush. | ||
| 54 | * 1a2) set cpu_tlbstate to TLBSTATE_OK | ||
| 55 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
| 56 | * was in lazy tlb mode. | ||
| 57 | * 1a3) update cpu_tlbstate[].active_mm | ||
| 58 | * Now cpu0 accepts tlb flushes for the new mm. | ||
| 59 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | ||
| 60 | * Now the other cpus will send tlb flush ipis. | ||
| 61 | * 1a4) change cr3. | ||
| 62 | * 1b) thread switch without mm change | ||
| 63 | * cpu_tlbstate[].active_mm is correct, cpu0 already handles | ||
| 64 | * flush ipis. | ||
| 65 | * 1b1) set cpu_tlbstate to TLBSTATE_OK | ||
| 66 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | ||
| 67 | * Atomically set the bit [other cpus will start sending flush ipis], | ||
| 68 | * and test the bit. | ||
| 69 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | ||
| 70 | * 2) switch %%esp, ie current | ||
| 71 | * | ||
| 72 | * The interrupt must handle 2 special cases: | ||
| 73 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | ||
| 74 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | ||
| 75 | * runs in kernel space, the cpu could load tlb entries for user space | ||
| 76 | * pages. | ||
| 77 | * | ||
| 78 | * The good news is that cpu_tlbstate is local to each cpu, no | ||
| 79 | * write/read ordering problems. | ||
| 80 | */ | ||
| 81 | |||
| 82 | /* | ||
| 83 | * TLB flush IPI: | ||
| 84 | * | ||
| 85 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | ||
| 86 | * 2) Leave the mm if we are in the lazy tlb mode. | ||
| 87 | */ | ||
| 88 | |||
| 89 | void smp_invalidate_interrupt(struct pt_regs *regs) | ||
| 90 | { | ||
| 91 | unsigned long cpu; | ||
| 92 | |||
| 93 | cpu = get_cpu(); | ||
| 94 | |||
| 95 | if (!cpu_isset(cpu, flush_cpumask)) | ||
| 96 | goto out; | ||
| 97 | /* | ||
| 98 | * This was a BUG() but until someone can quote me the | ||
| 99 | * line from the intel manual that guarantees an IPI to | ||
| 100 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
| 101 | * its staying as a return | ||
| 102 | * | ||
| 103 | * BUG(); | ||
| 104 | */ | ||
| 105 | |||
| 106 | if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { | ||
| 107 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { | ||
| 108 | if (flush_va == TLB_FLUSH_ALL) | ||
| 109 | local_flush_tlb(); | ||
| 110 | else | ||
| 111 | __flush_tlb_one(flush_va); | ||
| 112 | } else | ||
| 113 | leave_mm(cpu); | ||
| 114 | } | ||
| 115 | ack_APIC_irq(); | ||
| 116 | smp_mb__before_clear_bit(); | ||
| 117 | cpu_clear(cpu, flush_cpumask); | ||
| 118 | smp_mb__after_clear_bit(); | ||
| 119 | out: | ||
| 120 | put_cpu_no_resched(); | ||
| 121 | inc_irq_stat(irq_tlb_count); | ||
| 122 | } | ||
| 123 | |||
| 124 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | ||
| 125 | unsigned long va) | ||
| 126 | { | ||
| 127 | cpumask_t cpumask = *cpumaskp; | ||
| 128 | |||
| 129 | /* | ||
| 130 | * A couple of (to be removed) sanity checks: | ||
| 131 | * | ||
| 132 | * - current CPU must not be in mask | ||
| 133 | * - mask must exist :) | ||
| 134 | */ | ||
| 135 | BUG_ON(cpus_empty(cpumask)); | ||
| 136 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
| 137 | BUG_ON(!mm); | ||
| 138 | |||
| 139 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 140 | /* If a CPU which we ran on has gone down, OK. */ | ||
| 141 | cpus_and(cpumask, cpumask, cpu_online_map); | ||
| 142 | if (unlikely(cpus_empty(cpumask))) | ||
| 143 | return; | ||
| 144 | #endif | ||
| 145 | |||
| 146 | /* | ||
| 147 | * i'm not happy about this global shared spinlock in the | ||
| 148 | * MM hot path, but we'll see how contended it is. | ||
| 149 | * AK: x86-64 has a faster method that could be ported. | ||
| 150 | */ | ||
| 151 | spin_lock(&tlbstate_lock); | ||
| 152 | |||
| 153 | flush_mm = mm; | ||
| 154 | flush_va = va; | ||
| 155 | cpus_or(flush_cpumask, cpumask, flush_cpumask); | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Make the above memory operations globally visible before | ||
| 159 | * sending the IPI. | ||
| 160 | */ | ||
| 161 | smp_mb(); | ||
| 162 | /* | ||
| 163 | * We have to send the IPI only to | ||
| 164 | * CPUs affected. | ||
| 165 | */ | ||
| 166 | send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); | ||
| 167 | |||
| 168 | while (!cpus_empty(flush_cpumask)) | ||
| 169 | /* nothing. lockup detection does not belong here */ | ||
| 170 | cpu_relax(); | ||
| 171 | |||
| 172 | flush_mm = NULL; | ||
| 173 | flush_va = 0; | ||
| 174 | spin_unlock(&tlbstate_lock); | ||
| 175 | } | ||
| 176 | |||
| 177 | void flush_tlb_current_task(void) | ||
| 178 | { | ||
| 179 | struct mm_struct *mm = current->mm; | ||
| 180 | cpumask_t cpu_mask; | ||
| 181 | |||
| 182 | preempt_disable(); | ||
| 183 | cpu_mask = mm->cpu_vm_mask; | ||
| 184 | cpu_clear(smp_processor_id(), cpu_mask); | ||
| 185 | |||
| 186 | local_flush_tlb(); | ||
| 187 | if (!cpus_empty(cpu_mask)) | ||
| 188 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
| 189 | preempt_enable(); | ||
| 190 | } | ||
| 191 | |||
| 192 | void flush_tlb_mm(struct mm_struct *mm) | ||
| 193 | { | ||
| 194 | cpumask_t cpu_mask; | ||
| 195 | |||
| 196 | preempt_disable(); | ||
| 197 | cpu_mask = mm->cpu_vm_mask; | ||
| 198 | cpu_clear(smp_processor_id(), cpu_mask); | ||
| 199 | |||
| 200 | if (current->active_mm == mm) { | ||
| 201 | if (current->mm) | ||
| 202 | local_flush_tlb(); | ||
| 203 | else | ||
| 204 | leave_mm(smp_processor_id()); | ||
| 205 | } | ||
| 206 | if (!cpus_empty(cpu_mask)) | ||
| 207 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
| 208 | |||
| 209 | preempt_enable(); | ||
| 210 | } | ||
| 211 | |||
| 212 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | ||
| 213 | { | ||
| 214 | struct mm_struct *mm = vma->vm_mm; | ||
| 215 | cpumask_t cpu_mask; | ||
| 216 | |||
| 217 | preempt_disable(); | ||
| 218 | cpu_mask = mm->cpu_vm_mask; | ||
| 219 | cpu_clear(smp_processor_id(), cpu_mask); | ||
| 220 | |||
| 221 | if (current->active_mm == mm) { | ||
| 222 | if (current->mm) | ||
| 223 | __flush_tlb_one(va); | ||
| 224 | else | ||
| 225 | leave_mm(smp_processor_id()); | ||
| 226 | } | ||
| 227 | |||
| 228 | if (!cpus_empty(cpu_mask)) | ||
| 229 | flush_tlb_others(cpu_mask, mm, va); | ||
| 230 | |||
| 231 | preempt_enable(); | ||
| 232 | } | ||
| 233 | EXPORT_SYMBOL(flush_tlb_page); | ||
| 234 | |||
| 235 | static void do_flush_tlb_all(void *info) | ||
| 236 | { | ||
| 237 | unsigned long cpu = smp_processor_id(); | ||
| 238 | |||
| 239 | __flush_tlb_all(); | ||
| 240 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) | ||
| 241 | leave_mm(cpu); | ||
| 242 | } | ||
| 243 | |||
| 244 | void flush_tlb_all(void) | ||
| 245 | { | ||
| 246 | on_each_cpu(do_flush_tlb_all, NULL, 1); | ||
| 247 | } | ||
| 248 | |||
| 249 | void reset_lazy_tlbstate(void) | ||
| 250 | { | ||
| 251 | int cpu = raw_smp_processor_id(); | ||
| 252 | |||
| 253 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
| 254 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
| 255 | } | ||
| 256 | |||
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c deleted file mode 100644 index f8be6f1d2e48..000000000000 --- a/arch/x86/kernel/tlb_64.c +++ /dev/null | |||
| @@ -1,284 +0,0 @@ | |||
| 1 | #include <linux/init.h> | ||
| 2 | |||
| 3 | #include <linux/mm.h> | ||
| 4 | #include <linux/delay.h> | ||
| 5 | #include <linux/spinlock.h> | ||
| 6 | #include <linux/smp.h> | ||
| 7 | #include <linux/kernel_stat.h> | ||
| 8 | #include <linux/mc146818rtc.h> | ||
| 9 | #include <linux/interrupt.h> | ||
| 10 | |||
| 11 | #include <asm/mtrr.h> | ||
| 12 | #include <asm/pgalloc.h> | ||
| 13 | #include <asm/tlbflush.h> | ||
| 14 | #include <asm/mmu_context.h> | ||
| 15 | #include <asm/proto.h> | ||
| 16 | #include <asm/apicdef.h> | ||
| 17 | #include <asm/idle.h> | ||
| 18 | #include <asm/uv/uv_hub.h> | ||
| 19 | #include <asm/uv/uv_bau.h> | ||
| 20 | |||
| 21 | #include <mach_ipi.h> | ||
| 22 | /* | ||
| 23 | * Smarter SMP flushing macros. | ||
| 24 | * c/o Linus Torvalds. | ||
| 25 | * | ||
| 26 | * These mean you can really definitely utterly forget about | ||
| 27 | * writing to user space from interrupts. (Its not allowed anyway). | ||
| 28 | * | ||
| 29 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | ||
| 30 | * | ||
| 31 | * More scalable flush, from Andi Kleen | ||
| 32 | * | ||
| 33 | * To avoid global state use 8 different call vectors. | ||
| 34 | * Each CPU uses a specific vector to trigger flushes on other | ||
| 35 | * CPUs. Depending on the received vector the target CPUs look into | ||
| 36 | * the right per cpu variable for the flush data. | ||
| 37 | * | ||
| 38 | * With more than 8 CPUs they are hashed to the 8 available | ||
| 39 | * vectors. The limited global vector space forces us to this right now. | ||
| 40 | * In future when interrupts are split into per CPU domains this could be | ||
| 41 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
| 42 | */ | ||
| 43 | |||
| 44 | union smp_flush_state { | ||
| 45 | struct { | ||
| 46 | cpumask_t flush_cpumask; | ||
| 47 | struct mm_struct *flush_mm; | ||
| 48 | unsigned long flush_va; | ||
| 49 | spinlock_t tlbstate_lock; | ||
| 50 | }; | ||
| 51 | char pad[SMP_CACHE_BYTES]; | ||
| 52 | } ____cacheline_aligned; | ||
| 53 | |||
| 54 | /* State is put into the per CPU data section, but padded | ||
| 55 | to a full cache line because other CPUs can access it and we don't | ||
| 56 | want false sharing in the per cpu data segment. */ | ||
| 57 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | ||
| 58 | |||
| 59 | /* | ||
| 60 | * We cannot call mmdrop() because we are in interrupt context, | ||
| 61 | * instead update mm->cpu_vm_mask. | ||
| 62 | */ | ||
| 63 | void leave_mm(int cpu) | ||
| 64 | { | ||
| 65 | if (read_pda(mmu_state) == TLBSTATE_OK) | ||
| 66 | BUG(); | ||
| 67 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); | ||
| 68 | load_cr3(swapper_pg_dir); | ||
| 69 | } | ||
| 70 | EXPORT_SYMBOL_GPL(leave_mm); | ||
| 71 | |||
| 72 | /* | ||
| 73 | * | ||
| 74 | * The flush IPI assumes that a thread switch happens in this order: | ||
| 75 | * [cpu0: the cpu that switches] | ||
| 76 | * 1) switch_mm() either 1a) or 1b) | ||
| 77 | * 1a) thread switch to a different mm | ||
| 78 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
| 79 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
| 80 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | ||
| 81 | * for the wrong mm, and in the worst case we perform a superfluous | ||
| 82 | * tlb flush. | ||
| 83 | * 1a2) set cpu mmu_state to TLBSTATE_OK | ||
| 84 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
| 85 | * was in lazy tlb mode. | ||
| 86 | * 1a3) update cpu active_mm | ||
| 87 | * Now cpu0 accepts tlb flushes for the new mm. | ||
| 88 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | ||
| 89 | * Now the other cpus will send tlb flush ipis. | ||
| 90 | * 1a4) change cr3. | ||
| 91 | * 1b) thread switch without mm change | ||
| 92 | * cpu active_mm is correct, cpu0 already handles | ||
| 93 | * flush ipis. | ||
| 94 | * 1b1) set cpu mmu_state to TLBSTATE_OK | ||
| 95 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | ||
| 96 | * Atomically set the bit [other cpus will start sending flush ipis], | ||
| 97 | * and test the bit. | ||
| 98 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | ||
| 99 | * 2) switch %%esp, ie current | ||
| 100 | * | ||
| 101 | * The interrupt must handle 2 special cases: | ||
| 102 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | ||
| 103 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | ||
| 104 | * runs in kernel space, the cpu could load tlb entries for user space | ||
| 105 | * pages. | ||
| 106 | * | ||
| 107 | * The good news is that cpu mmu_state is local to each cpu, no | ||
| 108 | * write/read ordering problems. | ||
| 109 | */ | ||
| 110 | |||
| 111 | /* | ||
| 112 | * TLB flush IPI: | ||
| 113 | * | ||
| 114 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | ||
| 115 | * 2) Leave the mm if we are in the lazy tlb mode. | ||
| 116 | * | ||
| 117 | * Interrupts are disabled. | ||
| 118 | */ | ||
| 119 | |||
| 120 | asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | ||
| 121 | { | ||
| 122 | int cpu; | ||
| 123 | int sender; | ||
| 124 | union smp_flush_state *f; | ||
| 125 | |||
| 126 | cpu = smp_processor_id(); | ||
| 127 | /* | ||
| 128 | * orig_rax contains the negated interrupt vector. | ||
| 129 | * Use that to determine where the sender put the data. | ||
| 130 | */ | ||
| 131 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | ||
| 132 | f = &per_cpu(flush_state, sender); | ||
| 133 | |||
| 134 | if (!cpu_isset(cpu, f->flush_cpumask)) | ||
| 135 | goto out; | ||
| 136 | /* | ||
| 137 | * This was a BUG() but until someone can quote me the | ||
| 138 | * line from the intel manual that guarantees an IPI to | ||
| 139 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
| 140 | * its staying as a return | ||
| 141 | * | ||
| 142 | * BUG(); | ||
| 143 | */ | ||
| 144 | |||
| 145 | if (f->flush_mm == read_pda(active_mm)) { | ||
| 146 | if (read_pda(mmu_state) == TLBSTATE_OK) { | ||
| 147 | if (f->flush_va == TLB_FLUSH_ALL) | ||
| 148 | local_flush_tlb(); | ||
| 149 | else | ||
| 150 | __flush_tlb_one(f->flush_va); | ||
| 151 | } else | ||
| 152 | leave_mm(cpu); | ||
| 153 | } | ||
| 154 | out: | ||
| 155 | ack_APIC_irq(); | ||
| 156 | cpu_clear(cpu, f->flush_cpumask); | ||
| 157 | inc_irq_stat(irq_tlb_count); | ||
| 158 | } | ||
| 159 | |||
| 160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | ||
| 161 | unsigned long va) | ||
| 162 | { | ||
| 163 | int sender; | ||
| 164 | union smp_flush_state *f; | ||
| 165 | cpumask_t cpumask = *cpumaskp; | ||
| 166 | |||
| 167 | if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) | ||
| 168 | return; | ||
| 169 | |||
| 170 | /* Caller has disabled preemption */ | ||
| 171 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | ||
| 172 | f = &per_cpu(flush_state, sender); | ||
| 173 | |||
| 174 | /* | ||
| 175 | * Could avoid this lock when | ||
| 176 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | ||
| 177 | * probably not worth checking this for a cache-hot lock. | ||
| 178 | */ | ||
| 179 | spin_lock(&f->tlbstate_lock); | ||
| 180 | |||
| 181 | f->flush_mm = mm; | ||
| 182 | f->flush_va = va; | ||
| 183 | cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); | ||
| 184 | |||
| 185 | /* | ||
| 186 | * Make the above memory operations globally visible before | ||
| 187 | * sending the IPI. | ||
| 188 | */ | ||
| 189 | smp_mb(); | ||
| 190 | /* | ||
| 191 | * We have to send the IPI only to | ||
| 192 | * CPUs affected. | ||
| 193 | */ | ||
| 194 | send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); | ||
| 195 | |||
| 196 | while (!cpus_empty(f->flush_cpumask)) | ||
| 197 | cpu_relax(); | ||
| 198 | |||
| 199 | f->flush_mm = NULL; | ||
| 200 | f->flush_va = 0; | ||
| 201 | spin_unlock(&f->tlbstate_lock); | ||
| 202 | } | ||
| 203 | |||
| 204 | static int __cpuinit init_smp_flush(void) | ||
| 205 | { | ||
| 206 | int i; | ||
| 207 | |||
| 208 | for_each_possible_cpu(i) | ||
| 209 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); | ||
| 210 | |||
| 211 | return 0; | ||
| 212 | } | ||
| 213 | core_initcall(init_smp_flush); | ||
| 214 | |||
| 215 | void flush_tlb_current_task(void) | ||
| 216 | { | ||
| 217 | struct mm_struct *mm = current->mm; | ||
| 218 | cpumask_t cpu_mask; | ||
| 219 | |||
| 220 | preempt_disable(); | ||
| 221 | cpu_mask = mm->cpu_vm_mask; | ||
| 222 | cpu_clear(smp_processor_id(), cpu_mask); | ||
| 223 | |||
| 224 | local_flush_tlb(); | ||
| 225 | if (!cpus_empty(cpu_mask)) | ||
| 226 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
| 227 | preempt_enable(); | ||
| 228 | } | ||
| 229 | |||
| 230 | void flush_tlb_mm(struct mm_struct *mm) | ||
| 231 | { | ||
| 232 | cpumask_t cpu_mask; | ||
| 233 | |||
| 234 | preempt_disable(); | ||
| 235 | cpu_mask = mm->cpu_vm_mask; | ||
| 236 | cpu_clear(smp_processor_id(), cpu_mask); | ||
| 237 | |||
| 238 | if (current->active_mm == mm) { | ||
| 239 | if (current->mm) | ||
| 240 | local_flush_tlb(); | ||
| 241 | else | ||
| 242 | leave_mm(smp_processor_id()); | ||
| 243 | } | ||
| 244 | if (!cpus_empty(cpu_mask)) | ||
| 245 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); | ||
| 246 | |||
| 247 | preempt_enable(); | ||
| 248 | } | ||
| 249 | |||
| 250 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | ||
| 251 | { | ||
| 252 | struct mm_struct *mm = vma->vm_mm; | ||
| 253 | cpumask_t cpu_mask; | ||
| 254 | |||
| 255 | preempt_disable(); | ||
| 256 | cpu_mask = mm->cpu_vm_mask; | ||
| 257 | cpu_clear(smp_processor_id(), cpu_mask); | ||
| 258 | |||
| 259 | if (current->active_mm == mm) { | ||
| 260 | if (current->mm) | ||
| 261 | __flush_tlb_one(va); | ||
| 262 | else | ||
| 263 | leave_mm(smp_processor_id()); | ||
| 264 | } | ||
| 265 | |||
| 266 | if (!cpus_empty(cpu_mask)) | ||
| 267 | flush_tlb_others(cpu_mask, mm, va); | ||
| 268 | |||
| 269 | preempt_enable(); | ||
| 270 | } | ||
| 271 | |||
| 272 | static void do_flush_tlb_all(void *info) | ||
| 273 | { | ||
| 274 | unsigned long cpu = smp_processor_id(); | ||
| 275 | |||
| 276 | __flush_tlb_all(); | ||
| 277 | if (read_pda(mmu_state) == TLBSTATE_LAZY) | ||
| 278 | leave_mm(cpu); | ||
| 279 | } | ||
| 280 | |||
| 281 | void flush_tlb_all(void) | ||
| 282 | { | ||
| 283 | on_each_cpu(do_flush_tlb_all, NULL, 1); | ||
| 284 | } | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 6812b829ed83..8afb69180c9b 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
| @@ -11,16 +11,15 @@ | |||
| 11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
| 12 | 12 | ||
| 13 | #include <asm/mmu_context.h> | 13 | #include <asm/mmu_context.h> |
| 14 | #include <asm/uv/uv.h> | ||
| 14 | #include <asm/uv/uv_mmrs.h> | 15 | #include <asm/uv/uv_mmrs.h> |
| 15 | #include <asm/uv/uv_hub.h> | 16 | #include <asm/uv/uv_hub.h> |
| 16 | #include <asm/uv/uv_bau.h> | 17 | #include <asm/uv/uv_bau.h> |
| 17 | #include <asm/genapic.h> | 18 | #include <asm/apic.h> |
| 18 | #include <asm/idle.h> | 19 | #include <asm/idle.h> |
| 19 | #include <asm/tsc.h> | 20 | #include <asm/tsc.h> |
| 20 | #include <asm/irq_vectors.h> | 21 | #include <asm/irq_vectors.h> |
| 21 | 22 | ||
| 22 | #include <mach_apic.h> | ||
| 23 | |||
| 24 | static struct bau_control **uv_bau_table_bases __read_mostly; | 23 | static struct bau_control **uv_bau_table_bases __read_mostly; |
| 25 | static int uv_bau_retry_limit __read_mostly; | 24 | static int uv_bau_retry_limit __read_mostly; |
| 26 | 25 | ||
| @@ -210,14 +209,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
| 210 | * | 209 | * |
| 211 | * Send a broadcast and wait for a broadcast message to complete. | 210 | * Send a broadcast and wait for a broadcast message to complete. |
| 212 | * | 211 | * |
| 213 | * The cpumaskp mask contains the cpus the broadcast was sent to. | 212 | * The flush_mask contains the cpus the broadcast was sent to. |
| 214 | * | 213 | * |
| 215 | * Returns 1 if all remote flushing was done. The mask is zeroed. | 214 | * Returns NULL if all remote flushing was done. The mask is zeroed. |
| 216 | * Returns 0 if some remote flushing remains to be done. The mask is left | 215 | * Returns @flush_mask if some remote flushing remains to be done. The |
| 217 | * unchanged. | 216 | * mask will have some bits still set. |
| 218 | */ | 217 | */ |
| 219 | int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, | 218 | const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, |
| 220 | cpumask_t *cpumaskp) | 219 | struct bau_desc *bau_desc, |
| 220 | struct cpumask *flush_mask) | ||
| 221 | { | 221 | { |
| 222 | int completion_status = 0; | 222 | int completion_status = 0; |
| 223 | int right_shift; | 223 | int right_shift; |
| @@ -257,66 +257,75 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, | |||
| 257 | * the cpu's, all of which are still in the mask. | 257 | * the cpu's, all of which are still in the mask. |
| 258 | */ | 258 | */ |
| 259 | __get_cpu_var(ptcstats).ptc_i++; | 259 | __get_cpu_var(ptcstats).ptc_i++; |
| 260 | return 0; | 260 | return flush_mask; |
| 261 | } | 261 | } |
| 262 | 262 | ||
| 263 | /* | 263 | /* |
| 264 | * Success, so clear the remote cpu's from the mask so we don't | 264 | * Success, so clear the remote cpu's from the mask so we don't |
| 265 | * use the IPI method of shootdown on them. | 265 | * use the IPI method of shootdown on them. |
| 266 | */ | 266 | */ |
| 267 | for_each_cpu_mask(bit, *cpumaskp) { | 267 | for_each_cpu(bit, flush_mask) { |
| 268 | blade = uv_cpu_to_blade_id(bit); | 268 | blade = uv_cpu_to_blade_id(bit); |
| 269 | if (blade == this_blade) | 269 | if (blade == this_blade) |
| 270 | continue; | 270 | continue; |
| 271 | cpu_clear(bit, *cpumaskp); | 271 | cpumask_clear_cpu(bit, flush_mask); |
| 272 | } | 272 | } |
| 273 | if (!cpus_empty(*cpumaskp)) | 273 | if (!cpumask_empty(flush_mask)) |
| 274 | return 0; | 274 | return flush_mask; |
| 275 | return 1; | 275 | return NULL; |
| 276 | } | 276 | } |
| 277 | 277 | ||
| 278 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
| 279 | |||
| 278 | /** | 280 | /** |
| 279 | * uv_flush_tlb_others - globally purge translation cache of a virtual | 281 | * uv_flush_tlb_others - globally purge translation cache of a virtual |
| 280 | * address or all TLB's | 282 | * address or all TLB's |
| 281 | * @cpumaskp: mask of all cpu's in which the address is to be removed | 283 | * @cpumask: mask of all cpu's in which the address is to be removed |
| 282 | * @mm: mm_struct containing virtual address range | 284 | * @mm: mm_struct containing virtual address range |
| 283 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) | 285 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) |
| 286 | * @cpu: the current cpu | ||
| 284 | * | 287 | * |
| 285 | * This is the entry point for initiating any UV global TLB shootdown. | 288 | * This is the entry point for initiating any UV global TLB shootdown. |
| 286 | * | 289 | * |
| 287 | * Purges the translation caches of all specified processors of the given | 290 | * Purges the translation caches of all specified processors of the given |
| 288 | * virtual address, or purges all TLB's on specified processors. | 291 | * virtual address, or purges all TLB's on specified processors. |
| 289 | * | 292 | * |
| 290 | * The caller has derived the cpumaskp from the mm_struct and has subtracted | 293 | * The caller has derived the cpumask from the mm_struct. This function |
| 291 | * the local cpu from the mask. This function is called only if there | 294 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) |
| 292 | * are bits set in the mask. (e.g. flush_tlb_page()) | ||
| 293 | * | 295 | * |
| 294 | * The cpumaskp is converted into a nodemask of the nodes containing | 296 | * The cpumask is converted into a nodemask of the nodes containing |
| 295 | * the cpus. | 297 | * the cpus. |
| 296 | * | 298 | * |
| 297 | * Returns 1 if all remote flushing was done. | 299 | * Note that this function should be called with preemption disabled. |
| 298 | * Returns 0 if some remote flushing remains to be done. | 300 | * |
| 301 | * Returns NULL if all remote flushing was done. | ||
| 302 | * Returns pointer to cpumask if some remote flushing remains to be | ||
| 303 | * done. The returned pointer is valid till preemption is re-enabled. | ||
| 299 | */ | 304 | */ |
| 300 | int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, | 305 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
| 301 | unsigned long va) | 306 | struct mm_struct *mm, |
| 307 | unsigned long va, unsigned int cpu) | ||
| 302 | { | 308 | { |
| 309 | struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); | ||
| 303 | int i; | 310 | int i; |
| 304 | int bit; | 311 | int bit; |
| 305 | int blade; | 312 | int blade; |
| 306 | int cpu; | 313 | int uv_cpu; |
| 307 | int this_blade; | 314 | int this_blade; |
| 308 | int locals = 0; | 315 | int locals = 0; |
| 309 | struct bau_desc *bau_desc; | 316 | struct bau_desc *bau_desc; |
| 310 | 317 | ||
| 311 | cpu = uv_blade_processor_id(); | 318 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); |
| 319 | |||
| 320 | uv_cpu = uv_blade_processor_id(); | ||
| 312 | this_blade = uv_numa_blade_id(); | 321 | this_blade = uv_numa_blade_id(); |
| 313 | bau_desc = __get_cpu_var(bau_control).descriptor_base; | 322 | bau_desc = __get_cpu_var(bau_control).descriptor_base; |
| 314 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; | 323 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; |
| 315 | 324 | ||
| 316 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 325 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
| 317 | 326 | ||
| 318 | i = 0; | 327 | i = 0; |
| 319 | for_each_cpu_mask(bit, *cpumaskp) { | 328 | for_each_cpu(bit, flush_mask) { |
| 320 | blade = uv_cpu_to_blade_id(bit); | 329 | blade = uv_cpu_to_blade_id(bit); |
| 321 | BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); | 330 | BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); |
| 322 | if (blade == this_blade) { | 331 | if (blade == this_blade) { |
| @@ -331,17 +340,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, | |||
| 331 | * no off_node flushing; return status for local node | 340 | * no off_node flushing; return status for local node |
| 332 | */ | 341 | */ |
| 333 | if (locals) | 342 | if (locals) |
| 334 | return 0; | 343 | return flush_mask; |
| 335 | else | 344 | else |
| 336 | return 1; | 345 | return NULL; |
| 337 | } | 346 | } |
| 338 | __get_cpu_var(ptcstats).requestor++; | 347 | __get_cpu_var(ptcstats).requestor++; |
| 339 | __get_cpu_var(ptcstats).ntargeted += i; | 348 | __get_cpu_var(ptcstats).ntargeted += i; |
| 340 | 349 | ||
| 341 | bau_desc->payload.address = va; | 350 | bau_desc->payload.address = va; |
| 342 | bau_desc->payload.sending_cpu = smp_processor_id(); | 351 | bau_desc->payload.sending_cpu = cpu; |
| 343 | 352 | ||
| 344 | return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); | 353 | return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); |
| 345 | } | 354 | } |
| 346 | 355 | ||
| 347 | /* | 356 | /* |
| @@ -747,6 +756,10 @@ static int __init uv_bau_init(void) | |||
| 747 | if (!is_uv_system()) | 756 | if (!is_uv_system()) |
| 748 | return 0; | 757 | return 0; |
| 749 | 758 | ||
| 759 | for_each_possible_cpu(cur_cpu) | ||
| 760 | alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | ||
| 761 | GFP_KERNEL, cpu_to_node(cur_cpu)); | ||
| 762 | |||
| 750 | uv_bau_retry_limit = 1; | 763 | uv_bau_retry_limit = 1; |
| 751 | uv_nshift = uv_hub_info->n_val; | 764 | uv_nshift = uv_hub_info->n_val; |
| 752 | uv_mmask = (1UL << uv_hub_info->n_val) - 1; | 765 | uv_mmask = (1UL << uv_hub_info->n_val) - 1; |
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index d8ccc3c6552f..66d874e5404c 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | 29 | ||
| 30 | #include <linux/linkage.h> | 30 | #include <linux/linkage.h> |
| 31 | #include <asm/segment.h> | 31 | #include <asm/segment.h> |
| 32 | #include <asm/page.h> | 32 | #include <asm/page_types.h> |
| 33 | 33 | ||
| 34 | /* We can free up trampoline after bootup if cpu hotplug is not supported. */ | 34 | /* We can free up trampoline after bootup if cpu hotplug is not supported. */ |
| 35 | #ifndef CONFIG_HOTPLUG_CPU | 35 | #ifndef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 894293c598db..cddfb8d386b9 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
| @@ -25,10 +25,11 @@ | |||
| 25 | */ | 25 | */ |
| 26 | 26 | ||
| 27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
| 28 | #include <asm/pgtable.h> | 28 | #include <asm/pgtable_types.h> |
| 29 | #include <asm/page.h> | 29 | #include <asm/page_types.h> |
| 30 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
| 31 | #include <asm/segment.h> | 31 | #include <asm/segment.h> |
| 32 | #include <asm/processor-flags.h> | ||
| 32 | 33 | ||
| 33 | .section .rodata, "a", @progbits | 34 | .section .rodata, "a", @progbits |
| 34 | 35 | ||
| @@ -37,7 +38,7 @@ | |||
| 37 | ENTRY(trampoline_data) | 38 | ENTRY(trampoline_data) |
| 38 | r_base = . | 39 | r_base = . |
| 39 | cli # We should be safe anyway | 40 | cli # We should be safe anyway |
| 40 | wbinvd | 41 | wbinvd |
| 41 | mov %cs, %ax # Code and data in the same place | 42 | mov %cs, %ax # Code and data in the same place |
| 42 | mov %ax, %ds | 43 | mov %ax, %ds |
| 43 | mov %ax, %es | 44 | mov %ax, %es |
| @@ -73,9 +74,8 @@ r_base = . | |||
| 73 | lidtl tidt - r_base # load idt with 0, 0 | 74 | lidtl tidt - r_base # load idt with 0, 0 |
| 74 | lgdtl tgdt - r_base # load gdt with whatever is appropriate | 75 | lgdtl tgdt - r_base # load gdt with whatever is appropriate |
| 75 | 76 | ||
| 76 | xor %ax, %ax | 77 | mov $X86_CR0_PE, %ax # protected mode (PE) bit |
| 77 | inc %ax # protected mode (PE) bit | 78 | lmsw %ax # into protected mode |
| 78 | lmsw %ax # into protected mode | ||
| 79 | 79 | ||
| 80 | # flush prefetch and jump to startup_32 | 80 | # flush prefetch and jump to startup_32 |
| 81 | ljmpl *(startup_32_vector - r_base) | 81 | ljmpl *(startup_32_vector - r_base) |
| @@ -86,9 +86,8 @@ startup_32: | |||
| 86 | movl $__KERNEL_DS, %eax # Initialize the %ds segment register | 86 | movl $__KERNEL_DS, %eax # Initialize the %ds segment register |
| 87 | movl %eax, %ds | 87 | movl %eax, %ds |
| 88 | 88 | ||
| 89 | xorl %eax, %eax | 89 | movl $X86_CR4_PAE, %eax |
| 90 | btsl $5, %eax # Enable PAE mode | 90 | movl %eax, %cr4 # Enable PAE mode |
| 91 | movl %eax, %cr4 | ||
| 92 | 91 | ||
| 93 | # Setup trampoline 4 level pagetables | 92 | # Setup trampoline 4 level pagetables |
| 94 | leal (trampoline_level4_pgt - r_base)(%esi), %eax | 93 | leal (trampoline_level4_pgt - r_base)(%esi), %eax |
| @@ -99,9 +98,9 @@ startup_32: | |||
| 99 | xorl %edx, %edx | 98 | xorl %edx, %edx |
| 100 | wrmsr | 99 | wrmsr |
| 101 | 100 | ||
| 102 | xorl %eax, %eax | 101 | # Enable paging and in turn activate Long Mode |
| 103 | btsl $31, %eax # Enable paging and in turn activate Long Mode | 102 | # Enable protected mode |
| 104 | btsl $0, %eax # Enable protected mode | 103 | movl $(X86_CR0_PG | X86_CR0_PE), %eax |
| 105 | movl %eax, %cr0 | 104 | movl %eax, %cr0 |
| 106 | 105 | ||
| 107 | /* | 106 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a9e7548e1790..a1d288327ff0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -54,15 +54,14 @@ | |||
| 54 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
| 55 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
| 56 | 56 | ||
| 57 | #include <mach_traps.h> | 57 | #include <asm/mach_traps.h> |
| 58 | 58 | ||
| 59 | #ifdef CONFIG_X86_64 | 59 | #ifdef CONFIG_X86_64 |
| 60 | #include <asm/pgalloc.h> | 60 | #include <asm/pgalloc.h> |
| 61 | #include <asm/proto.h> | 61 | #include <asm/proto.h> |
| 62 | #include <asm/pda.h> | ||
| 63 | #else | 62 | #else |
| 64 | #include <asm/processor-flags.h> | 63 | #include <asm/processor-flags.h> |
| 65 | #include <asm/arch_hooks.h> | 64 | #include <asm/setup.h> |
| 66 | #include <asm/traps.h> | 65 | #include <asm/traps.h> |
| 67 | 66 | ||
| 68 | #include "cpu/mcheck/mce.h" | 67 | #include "cpu/mcheck/mce.h" |
| @@ -119,47 +118,6 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) | |||
| 119 | if (!user_mode_vm(regs)) | 118 | if (!user_mode_vm(regs)) |
| 120 | die(str, regs, err); | 119 | die(str, regs, err); |
| 121 | } | 120 | } |
| 122 | |||
| 123 | /* | ||
| 124 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an | ||
| 125 | * invalid offset set (the LAZY one) and the faulting thread has | ||
| 126 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, | ||
| 127 | * we set the offset field correctly and return 1. | ||
| 128 | */ | ||
| 129 | static int lazy_iobitmap_copy(void) | ||
| 130 | { | ||
| 131 | struct thread_struct *thread; | ||
| 132 | struct tss_struct *tss; | ||
| 133 | int cpu; | ||
| 134 | |||
| 135 | cpu = get_cpu(); | ||
| 136 | tss = &per_cpu(init_tss, cpu); | ||
| 137 | thread = ¤t->thread; | ||
| 138 | |||
| 139 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | ||
| 140 | thread->io_bitmap_ptr) { | ||
| 141 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | ||
| 142 | thread->io_bitmap_max); | ||
| 143 | /* | ||
| 144 | * If the previously set map was extending to higher ports | ||
| 145 | * than the current one, pad extra space with 0xff (no access). | ||
| 146 | */ | ||
| 147 | if (thread->io_bitmap_max < tss->io_bitmap_max) { | ||
| 148 | memset((char *) tss->io_bitmap + | ||
| 149 | thread->io_bitmap_max, 0xff, | ||
| 150 | tss->io_bitmap_max - thread->io_bitmap_max); | ||
| 151 | } | ||
| 152 | tss->io_bitmap_max = thread->io_bitmap_max; | ||
| 153 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
| 154 | tss->io_bitmap_owner = thread; | ||
| 155 | put_cpu(); | ||
| 156 | |||
| 157 | return 1; | ||
| 158 | } | ||
| 159 | put_cpu(); | ||
| 160 | |||
| 161 | return 0; | ||
| 162 | } | ||
| 163 | #endif | 121 | #endif |
| 164 | 122 | ||
| 165 | static void __kprobes | 123 | static void __kprobes |
| @@ -310,11 +268,6 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
| 310 | conditional_sti(regs); | 268 | conditional_sti(regs); |
| 311 | 269 | ||
| 312 | #ifdef CONFIG_X86_32 | 270 | #ifdef CONFIG_X86_32 |
| 313 | if (lazy_iobitmap_copy()) { | ||
| 314 | /* restart the faulting instruction */ | ||
| 315 | return; | ||
| 316 | } | ||
| 317 | |||
| 318 | if (regs->flags & X86_VM_MASK) | 271 | if (regs->flags & X86_VM_MASK) |
| 319 | goto gp_in_vm86; | 272 | goto gp_in_vm86; |
| 320 | #endif | 273 | #endif |
| @@ -914,19 +867,20 @@ void math_emulate(struct math_emu_info *info) | |||
| 914 | } | 867 | } |
| 915 | #endif /* CONFIG_MATH_EMULATION */ | 868 | #endif /* CONFIG_MATH_EMULATION */ |
| 916 | 869 | ||
| 917 | dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) | 870 | dotraplinkage void __kprobes |
| 871 | do_device_not_available(struct pt_regs *regs, long error_code) | ||
| 918 | { | 872 | { |
| 919 | #ifdef CONFIG_X86_32 | 873 | #ifdef CONFIG_X86_32 |
| 920 | if (read_cr0() & X86_CR0_EM) { | 874 | if (read_cr0() & X86_CR0_EM) { |
| 921 | struct math_emu_info info = { }; | 875 | struct math_emu_info info = { }; |
| 922 | 876 | ||
| 923 | conditional_sti(®s); | 877 | conditional_sti(regs); |
| 924 | 878 | ||
| 925 | info.regs = ®s; | 879 | info.regs = regs; |
| 926 | math_emulate(&info); | 880 | math_emulate(&info); |
| 927 | } else { | 881 | } else { |
| 928 | math_state_restore(); /* interrupts still off */ | 882 | math_state_restore(); /* interrupts still off */ |
| 929 | conditional_sti(®s); | 883 | conditional_sti(regs); |
| 930 | } | 884 | } |
| 931 | #else | 885 | #else |
| 932 | math_state_restore(); | 886 | math_state_restore(); |
| @@ -942,7 +896,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
| 942 | info.si_signo = SIGILL; | 896 | info.si_signo = SIGILL; |
| 943 | info.si_errno = 0; | 897 | info.si_errno = 0; |
| 944 | info.si_code = ILL_BADSTK; | 898 | info.si_code = ILL_BADSTK; |
| 945 | info.si_addr = 0; | 899 | info.si_addr = NULL; |
| 946 | if (notify_die(DIE_TRAP, "iret exception", | 900 | if (notify_die(DIE_TRAP, "iret exception", |
| 947 | regs, error_code, 32, SIGILL) == NOTIFY_STOP) | 901 | regs, error_code, 32, SIGILL) == NOTIFY_STOP) |
| 948 | return; | 902 | return; |
| @@ -1026,6 +980,6 @@ void __init trap_init(void) | |||
| 1026 | cpu_init(); | 980 | cpu_init(); |
| 1027 | 981 | ||
| 1028 | #ifdef CONFIG_X86_32 | 982 | #ifdef CONFIG_X86_32 |
| 1029 | trap_init_hook(); | 983 | x86_quirk_trap_init(); |
| 1030 | #endif | 984 | #endif |
| 1031 | } | 985 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index d5cebb52d45b..462b9ba67e92 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
| @@ -793,7 +793,7 @@ __cpuinit int unsynchronized_tsc(void) | |||
| 793 | if (!cpu_has_tsc || tsc_unstable) | 793 | if (!cpu_has_tsc || tsc_unstable) |
| 794 | return 1; | 794 | return 1; |
| 795 | 795 | ||
| 796 | #ifdef CONFIG_X86_SMP | 796 | #ifdef CONFIG_SMP |
| 797 | if (apic_is_clustered_box()) | 797 | if (apic_is_clustered_box()) |
| 798 | return 1; | 798 | return 1; |
| 799 | #endif | 799 | #endif |
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c new file mode 100644 index 000000000000..2ffb6c53326e --- /dev/null +++ b/arch/x86/kernel/uv_time.c | |||
| @@ -0,0 +1,393 @@ | |||
| 1 | /* | ||
| 2 | * SGI RTC clock/timer routines. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 17 | * | ||
| 18 | * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. | ||
| 19 | * Copyright (c) Dimitri Sivanich | ||
| 20 | */ | ||
| 21 | #include <linux/clockchips.h> | ||
| 22 | |||
| 23 | #include <asm/uv/uv_mmrs.h> | ||
| 24 | #include <asm/uv/uv_hub.h> | ||
| 25 | #include <asm/uv/bios.h> | ||
| 26 | #include <asm/uv/uv.h> | ||
| 27 | #include <asm/apic.h> | ||
| 28 | #include <asm/cpu.h> | ||
| 29 | |||
| 30 | #define RTC_NAME "sgi_rtc" | ||
| 31 | |||
| 32 | static cycle_t uv_read_rtc(void); | ||
| 33 | static int uv_rtc_next_event(unsigned long, struct clock_event_device *); | ||
| 34 | static void uv_rtc_timer_setup(enum clock_event_mode, | ||
| 35 | struct clock_event_device *); | ||
| 36 | |||
| 37 | static struct clocksource clocksource_uv = { | ||
| 38 | .name = RTC_NAME, | ||
| 39 | .rating = 400, | ||
| 40 | .read = uv_read_rtc, | ||
| 41 | .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, | ||
| 42 | .shift = 10, | ||
| 43 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
| 44 | }; | ||
| 45 | |||
| 46 | static struct clock_event_device clock_event_device_uv = { | ||
| 47 | .name = RTC_NAME, | ||
| 48 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
| 49 | .shift = 20, | ||
| 50 | .rating = 400, | ||
| 51 | .irq = -1, | ||
| 52 | .set_next_event = uv_rtc_next_event, | ||
| 53 | .set_mode = uv_rtc_timer_setup, | ||
| 54 | .event_handler = NULL, | ||
| 55 | }; | ||
| 56 | |||
| 57 | static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | ||
| 58 | |||
| 59 | /* There is one of these allocated per node */ | ||
| 60 | struct uv_rtc_timer_head { | ||
| 61 | spinlock_t lock; | ||
| 62 | /* next cpu waiting for timer, local node relative: */ | ||
| 63 | int next_cpu; | ||
| 64 | /* number of cpus on this node: */ | ||
| 65 | int ncpus; | ||
| 66 | struct { | ||
| 67 | int lcpu; /* systemwide logical cpu number */ | ||
| 68 | u64 expires; /* next timer expiration for this cpu */ | ||
| 69 | } cpu[1]; | ||
| 70 | }; | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Access to uv_rtc_timer_head via blade id. | ||
| 74 | */ | ||
| 75 | static struct uv_rtc_timer_head **blade_info __read_mostly; | ||
| 76 | |||
| 77 | static int uv_rtc_enable; | ||
| 78 | |||
| 79 | /* | ||
| 80 | * Hardware interface routines | ||
| 81 | */ | ||
| 82 | |||
| 83 | /* Send IPIs to another node */ | ||
| 84 | static void uv_rtc_send_IPI(int cpu) | ||
| 85 | { | ||
| 86 | unsigned long apicid, val; | ||
| 87 | int pnode; | ||
| 88 | |||
| 89 | apicid = cpu_physical_id(cpu); | ||
| 90 | pnode = uv_apicid_to_pnode(apicid); | ||
| 91 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | ||
| 92 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | | ||
| 93 | (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT); | ||
| 94 | |||
| 95 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | ||
| 96 | } | ||
| 97 | |||
| 98 | /* Check for an RTC interrupt pending */ | ||
| 99 | static int uv_intr_pending(int pnode) | ||
| 100 | { | ||
| 101 | return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & | ||
| 102 | UVH_EVENT_OCCURRED0_RTC1_MASK; | ||
| 103 | } | ||
| 104 | |||
| 105 | /* Setup interrupt and return non-zero if early expiration occurred. */ | ||
| 106 | static int uv_setup_intr(int cpu, u64 expires) | ||
| 107 | { | ||
| 108 | u64 val; | ||
| 109 | int pnode = uv_cpu_to_pnode(cpu); | ||
| 110 | |||
| 111 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, | ||
| 112 | UVH_RTC1_INT_CONFIG_M_MASK); | ||
| 113 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); | ||
| 114 | |||
| 115 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, | ||
| 116 | UVH_EVENT_OCCURRED0_RTC1_MASK); | ||
| 117 | |||
| 118 | val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | | ||
| 119 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); | ||
| 120 | |||
| 121 | /* Set configuration */ | ||
| 122 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); | ||
| 123 | /* Initialize comparator value */ | ||
| 124 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); | ||
| 125 | |||
| 126 | return (expires < uv_read_rtc() && !uv_intr_pending(pnode)); | ||
| 127 | } | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Per-cpu timer tracking routines | ||
| 131 | */ | ||
| 132 | |||
| 133 | static __init void uv_rtc_deallocate_timers(void) | ||
| 134 | { | ||
| 135 | int bid; | ||
| 136 | |||
| 137 | for_each_possible_blade(bid) { | ||
| 138 | kfree(blade_info[bid]); | ||
| 139 | } | ||
| 140 | kfree(blade_info); | ||
| 141 | } | ||
| 142 | |||
| 143 | /* Allocate per-node list of cpu timer expiration times. */ | ||
| 144 | static __init int uv_rtc_allocate_timers(void) | ||
| 145 | { | ||
| 146 | int cpu; | ||
| 147 | |||
| 148 | blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); | ||
| 149 | if (!blade_info) | ||
| 150 | return -ENOMEM; | ||
| 151 | memset(blade_info, 0, uv_possible_blades * sizeof(void *)); | ||
| 152 | |||
| 153 | for_each_present_cpu(cpu) { | ||
| 154 | int nid = cpu_to_node(cpu); | ||
| 155 | int bid = uv_cpu_to_blade_id(cpu); | ||
| 156 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
| 157 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
| 158 | |||
| 159 | if (!head) { | ||
| 160 | head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + | ||
| 161 | (uv_blade_nr_possible_cpus(bid) * | ||
| 162 | 2 * sizeof(u64)), | ||
| 163 | GFP_KERNEL, nid); | ||
| 164 | if (!head) { | ||
| 165 | uv_rtc_deallocate_timers(); | ||
| 166 | return -ENOMEM; | ||
| 167 | } | ||
| 168 | spin_lock_init(&head->lock); | ||
| 169 | head->ncpus = uv_blade_nr_possible_cpus(bid); | ||
| 170 | head->next_cpu = -1; | ||
| 171 | blade_info[bid] = head; | ||
| 172 | } | ||
| 173 | |||
| 174 | head->cpu[bcpu].lcpu = cpu; | ||
| 175 | head->cpu[bcpu].expires = ULLONG_MAX; | ||
| 176 | } | ||
| 177 | |||
| 178 | return 0; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* Find and set the next expiring timer. */ | ||
| 182 | static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode) | ||
| 183 | { | ||
| 184 | u64 lowest = ULLONG_MAX; | ||
| 185 | int c, bcpu = -1; | ||
| 186 | |||
| 187 | head->next_cpu = -1; | ||
| 188 | for (c = 0; c < head->ncpus; c++) { | ||
| 189 | u64 exp = head->cpu[c].expires; | ||
| 190 | if (exp < lowest) { | ||
| 191 | bcpu = c; | ||
| 192 | lowest = exp; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | if (bcpu >= 0) { | ||
| 196 | head->next_cpu = bcpu; | ||
| 197 | c = head->cpu[bcpu].lcpu; | ||
| 198 | if (uv_setup_intr(c, lowest)) | ||
| 199 | /* If we didn't set it up in time, trigger */ | ||
| 200 | uv_rtc_send_IPI(c); | ||
| 201 | } else { | ||
| 202 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, | ||
| 203 | UVH_RTC1_INT_CONFIG_M_MASK); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | /* | ||
| 208 | * Set expiration time for current cpu. | ||
| 209 | * | ||
| 210 | * Returns 1 if we missed the expiration time. | ||
| 211 | */ | ||
| 212 | static int uv_rtc_set_timer(int cpu, u64 expires) | ||
| 213 | { | ||
| 214 | int pnode = uv_cpu_to_pnode(cpu); | ||
| 215 | int bid = uv_cpu_to_blade_id(cpu); | ||
| 216 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
| 217 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
| 218 | u64 *t = &head->cpu[bcpu].expires; | ||
| 219 | unsigned long flags; | ||
| 220 | int next_cpu; | ||
| 221 | |||
| 222 | spin_lock_irqsave(&head->lock, flags); | ||
| 223 | |||
| 224 | next_cpu = head->next_cpu; | ||
| 225 | *t = expires; | ||
| 226 | /* Will this one be next to go off? */ | ||
| 227 | if (next_cpu < 0 || bcpu == next_cpu || | ||
| 228 | expires < head->cpu[next_cpu].expires) { | ||
| 229 | head->next_cpu = bcpu; | ||
| 230 | if (uv_setup_intr(cpu, expires)) { | ||
| 231 | *t = ULLONG_MAX; | ||
| 232 | uv_rtc_find_next_timer(head, pnode); | ||
| 233 | spin_unlock_irqrestore(&head->lock, flags); | ||
| 234 | return 1; | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | spin_unlock_irqrestore(&head->lock, flags); | ||
| 239 | return 0; | ||
| 240 | } | ||
| 241 | |||
| 242 | /* | ||
| 243 | * Unset expiration time for current cpu. | ||
| 244 | * | ||
| 245 | * Returns 1 if this timer was pending. | ||
| 246 | */ | ||
| 247 | static int uv_rtc_unset_timer(int cpu) | ||
| 248 | { | ||
| 249 | int pnode = uv_cpu_to_pnode(cpu); | ||
| 250 | int bid = uv_cpu_to_blade_id(cpu); | ||
| 251 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
| 252 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
| 253 | u64 *t = &head->cpu[bcpu].expires; | ||
| 254 | unsigned long flags; | ||
| 255 | int rc = 0; | ||
| 256 | |||
| 257 | spin_lock_irqsave(&head->lock, flags); | ||
| 258 | |||
| 259 | if (head->next_cpu == bcpu && uv_read_rtc() >= *t) | ||
| 260 | rc = 1; | ||
| 261 | |||
| 262 | *t = ULLONG_MAX; | ||
| 263 | |||
| 264 | /* Was the hardware setup for this timer? */ | ||
| 265 | if (head->next_cpu == bcpu) | ||
| 266 | uv_rtc_find_next_timer(head, pnode); | ||
| 267 | |||
| 268 | spin_unlock_irqrestore(&head->lock, flags); | ||
| 269 | |||
| 270 | return rc; | ||
| 271 | } | ||
| 272 | |||
| 273 | |||
| 274 | /* | ||
| 275 | * Kernel interface routines. | ||
| 276 | */ | ||
| 277 | |||
| 278 | /* | ||
| 279 | * Read the RTC. | ||
| 280 | */ | ||
| 281 | static cycle_t uv_read_rtc(void) | ||
| 282 | { | ||
| 283 | return (cycle_t)uv_read_local_mmr(UVH_RTC); | ||
| 284 | } | ||
| 285 | |||
| 286 | /* | ||
| 287 | * Program the next event, relative to now | ||
| 288 | */ | ||
| 289 | static int uv_rtc_next_event(unsigned long delta, | ||
| 290 | struct clock_event_device *ced) | ||
| 291 | { | ||
| 292 | int ced_cpu = cpumask_first(ced->cpumask); | ||
| 293 | |||
| 294 | return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc()); | ||
| 295 | } | ||
| 296 | |||
| 297 | /* | ||
| 298 | * Setup the RTC timer in oneshot mode | ||
| 299 | */ | ||
| 300 | static void uv_rtc_timer_setup(enum clock_event_mode mode, | ||
| 301 | struct clock_event_device *evt) | ||
| 302 | { | ||
| 303 | int ced_cpu = cpumask_first(evt->cpumask); | ||
| 304 | |||
| 305 | switch (mode) { | ||
| 306 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 307 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 308 | case CLOCK_EVT_MODE_RESUME: | ||
| 309 | /* Nothing to do here yet */ | ||
| 310 | break; | ||
| 311 | case CLOCK_EVT_MODE_UNUSED: | ||
| 312 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
| 313 | uv_rtc_unset_timer(ced_cpu); | ||
| 314 | break; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | |||
| 318 | static void uv_rtc_interrupt(void) | ||
| 319 | { | ||
| 320 | struct clock_event_device *ced = &__get_cpu_var(cpu_ced); | ||
| 321 | int cpu = smp_processor_id(); | ||
| 322 | |||
| 323 | if (!ced || !ced->event_handler) | ||
| 324 | return; | ||
| 325 | |||
| 326 | if (uv_rtc_unset_timer(cpu) != 1) | ||
| 327 | return; | ||
| 328 | |||
| 329 | ced->event_handler(ced); | ||
| 330 | } | ||
| 331 | |||
| 332 | static int __init uv_enable_rtc(char *str) | ||
| 333 | { | ||
| 334 | uv_rtc_enable = 1; | ||
| 335 | |||
| 336 | return 1; | ||
| 337 | } | ||
| 338 | __setup("uvrtc", uv_enable_rtc); | ||
| 339 | |||
| 340 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) | ||
| 341 | { | ||
| 342 | struct clock_event_device *ced = &__get_cpu_var(cpu_ced); | ||
| 343 | |||
| 344 | *ced = clock_event_device_uv; | ||
| 345 | ced->cpumask = cpumask_of(smp_processor_id()); | ||
| 346 | clockevents_register_device(ced); | ||
| 347 | } | ||
| 348 | |||
| 349 | static __init int uv_rtc_setup_clock(void) | ||
| 350 | { | ||
| 351 | int rc; | ||
| 352 | |||
| 353 | if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension) | ||
| 354 | return -ENODEV; | ||
| 355 | |||
| 356 | generic_interrupt_extension = uv_rtc_interrupt; | ||
| 357 | |||
| 358 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, | ||
| 359 | clocksource_uv.shift); | ||
| 360 | |||
| 361 | rc = clocksource_register(&clocksource_uv); | ||
| 362 | if (rc) { | ||
| 363 | generic_interrupt_extension = NULL; | ||
| 364 | return rc; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* Setup and register clockevents */ | ||
| 368 | rc = uv_rtc_allocate_timers(); | ||
| 369 | if (rc) { | ||
| 370 | clocksource_unregister(&clocksource_uv); | ||
| 371 | generic_interrupt_extension = NULL; | ||
| 372 | return rc; | ||
| 373 | } | ||
| 374 | |||
| 375 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, | ||
| 376 | NSEC_PER_SEC, clock_event_device_uv.shift); | ||
| 377 | |||
| 378 | clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / | ||
| 379 | sn_rtc_cycles_per_second; | ||
| 380 | |||
| 381 | clock_event_device_uv.max_delta_ns = clocksource_uv.mask * | ||
| 382 | (NSEC_PER_SEC / sn_rtc_cycles_per_second); | ||
| 383 | |||
| 384 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); | ||
| 385 | if (rc) { | ||
| 386 | clocksource_unregister(&clocksource_uv); | ||
| 387 | generic_interrupt_extension = NULL; | ||
| 388 | uv_rtc_deallocate_timers(); | ||
| 389 | } | ||
| 390 | |||
| 391 | return rc; | ||
| 392 | } | ||
| 393 | arch_initcall(uv_rtc_setup_clock); | ||
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index d801d06af068..31ffc24eec4d 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
| @@ -24,18 +24,14 @@ | |||
| 24 | 24 | ||
| 25 | #include <asm/visws/cobalt.h> | 25 | #include <asm/visws/cobalt.h> |
| 26 | #include <asm/visws/piix4.h> | 26 | #include <asm/visws/piix4.h> |
| 27 | #include <asm/arch_hooks.h> | ||
| 28 | #include <asm/io_apic.h> | 27 | #include <asm/io_apic.h> |
| 29 | #include <asm/fixmap.h> | 28 | #include <asm/fixmap.h> |
| 30 | #include <asm/reboot.h> | 29 | #include <asm/reboot.h> |
| 31 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
| 31 | #include <asm/apic.h> | ||
| 32 | #include <asm/e820.h> | 32 | #include <asm/e820.h> |
| 33 | #include <asm/io.h> | 33 | #include <asm/io.h> |
| 34 | 34 | ||
| 35 | #include <mach_ipi.h> | ||
| 36 | |||
| 37 | #include "mach_apic.h" | ||
| 38 | |||
| 39 | #include <linux/kernel_stat.h> | 35 | #include <linux/kernel_stat.h> |
| 40 | 36 | ||
| 41 | #include <asm/i8259.h> | 37 | #include <asm/i8259.h> |
| @@ -49,8 +45,6 @@ | |||
| 49 | 45 | ||
| 50 | extern int no_broadcast; | 46 | extern int no_broadcast; |
| 51 | 47 | ||
| 52 | #include <asm/apic.h> | ||
| 53 | |||
| 54 | char visws_board_type = -1; | 48 | char visws_board_type = -1; |
| 55 | char visws_board_rev = -1; | 49 | char visws_board_rev = -1; |
| 56 | 50 | ||
| @@ -200,7 +194,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
| 200 | return; | 194 | return; |
| 201 | } | 195 | } |
| 202 | 196 | ||
| 203 | apic_cpus = apicid_to_cpu_present(m->apicid); | 197 | apic_cpus = apic->apicid_to_cpu_present(m->apicid); |
| 204 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | 198 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); |
| 205 | /* | 199 | /* |
| 206 | * Validate version | 200 | * Validate version |
| @@ -584,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = { | |||
| 584 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) | 578 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) |
| 585 | { | 579 | { |
| 586 | int realirq; | 580 | int realirq; |
| 587 | irq_desc_t *desc; | 581 | struct irq_desc *desc; |
| 588 | unsigned long flags; | 582 | unsigned long flags; |
| 589 | 583 | ||
| 590 | spin_lock_irqsave(&i8259A_lock, flags); | 584 | spin_lock_irqsave(&i8259A_lock, flags); |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 4eeb5cf9720d..d7ac84e7fc1c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
| @@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | |||
| 158 | ret = KVM86->regs32; | 158 | ret = KVM86->regs32; |
| 159 | 159 | ||
| 160 | ret->fs = current->thread.saved_fs; | 160 | ret->fs = current->thread.saved_fs; |
| 161 | loadsegment(gs, current->thread.saved_gs); | 161 | set_user_gs(ret, current->thread.saved_gs); |
| 162 | 162 | ||
| 163 | return ret; | 163 | return ret; |
| 164 | } | 164 | } |
| @@ -197,9 +197,9 @@ out: | |||
| 197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | 197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); |
| 198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | 198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); |
| 199 | 199 | ||
| 200 | asmlinkage int sys_vm86old(struct pt_regs regs) | 200 | int sys_vm86old(struct pt_regs *regs) |
| 201 | { | 201 | { |
| 202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; | 202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; |
| 203 | struct kernel_vm86_struct info; /* declare this _on top_, | 203 | struct kernel_vm86_struct info; /* declare this _on top_, |
| 204 | * this avoids wasting of stack space. | 204 | * this avoids wasting of stack space. |
| 205 | * This remains on the stack until we | 205 | * This remains on the stack until we |
| @@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) | |||
| 218 | if (tmp) | 218 | if (tmp) |
| 219 | goto out; | 219 | goto out; |
| 220 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); | 220 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); |
| 221 | info.regs32 = ®s; | 221 | info.regs32 = regs; |
| 222 | tsk->thread.vm86_info = v86; | 222 | tsk->thread.vm86_info = v86; |
| 223 | do_sys_vm86(&info, tsk); | 223 | do_sys_vm86(&info, tsk); |
| 224 | ret = 0; /* we never return here */ | 224 | ret = 0; /* we never return here */ |
| @@ -227,7 +227,7 @@ out: | |||
| 227 | } | 227 | } |
| 228 | 228 | ||
| 229 | 229 | ||
| 230 | asmlinkage int sys_vm86(struct pt_regs regs) | 230 | int sys_vm86(struct pt_regs *regs) |
| 231 | { | 231 | { |
| 232 | struct kernel_vm86_struct info; /* declare this _on top_, | 232 | struct kernel_vm86_struct info; /* declare this _on top_, |
| 233 | * this avoids wasting of stack space. | 233 | * this avoids wasting of stack space. |
| @@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
| 239 | struct vm86plus_struct __user *v86; | 239 | struct vm86plus_struct __user *v86; |
| 240 | 240 | ||
| 241 | tsk = current; | 241 | tsk = current; |
| 242 | switch (regs.bx) { | 242 | switch (regs->bx) { |
| 243 | case VM86_REQUEST_IRQ: | 243 | case VM86_REQUEST_IRQ: |
| 244 | case VM86_FREE_IRQ: | 244 | case VM86_FREE_IRQ: |
| 245 | case VM86_GET_IRQ_BITS: | 245 | case VM86_GET_IRQ_BITS: |
| 246 | case VM86_GET_AND_RESET_IRQ: | 246 | case VM86_GET_AND_RESET_IRQ: |
| 247 | ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); | 247 | ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); |
| 248 | goto out; | 248 | goto out; |
| 249 | case VM86_PLUS_INSTALL_CHECK: | 249 | case VM86_PLUS_INSTALL_CHECK: |
| 250 | /* | 250 | /* |
| @@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
| 261 | ret = -EPERM; | 261 | ret = -EPERM; |
| 262 | if (tsk->thread.saved_sp0) | 262 | if (tsk->thread.saved_sp0) |
| 263 | goto out; | 263 | goto out; |
| 264 | v86 = (struct vm86plus_struct __user *)regs.cx; | 264 | v86 = (struct vm86plus_struct __user *)regs->cx; |
| 265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | 265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, |
| 266 | offsetof(struct kernel_vm86_struct, regs32) - | 266 | offsetof(struct kernel_vm86_struct, regs32) - |
| 267 | sizeof(info.regs)); | 267 | sizeof(info.regs)); |
| 268 | ret = -EFAULT; | 268 | ret = -EFAULT; |
| 269 | if (tmp) | 269 | if (tmp) |
| 270 | goto out; | 270 | goto out; |
| 271 | info.regs32 = ®s; | 271 | info.regs32 = regs; |
| 272 | info.vm86plus.is_vm86pus = 1; | 272 | info.vm86plus.is_vm86pus = 1; |
| 273 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; | 273 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; |
| 274 | do_sys_vm86(&info, tsk); | 274 | do_sys_vm86(&info, tsk); |
| @@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
| 323 | info->regs32->ax = 0; | 323 | info->regs32->ax = 0; |
| 324 | tsk->thread.saved_sp0 = tsk->thread.sp0; | 324 | tsk->thread.saved_sp0 = tsk->thread.sp0; |
| 325 | tsk->thread.saved_fs = info->regs32->fs; | 325 | tsk->thread.saved_fs = info->regs32->fs; |
| 326 | savesegment(gs, tsk->thread.saved_gs); | 326 | tsk->thread.saved_gs = get_user_gs(info->regs32); |
| 327 | 327 | ||
| 328 | tss = &per_cpu(init_tss, get_cpu()); | 328 | tss = &per_cpu(init_tss, get_cpu()); |
| 329 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; | 329 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index bef58b4982db..2cc4a90e2cb3 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
| @@ -680,10 +680,11 @@ static inline int __init activate_vmi(void) | |||
| 680 | para_fill(pv_mmu_ops.write_cr2, SetCR2); | 680 | para_fill(pv_mmu_ops.write_cr2, SetCR2); |
| 681 | para_fill(pv_mmu_ops.write_cr3, SetCR3); | 681 | para_fill(pv_mmu_ops.write_cr3, SetCR3); |
| 682 | para_fill(pv_cpu_ops.write_cr4, SetCR4); | 682 | para_fill(pv_cpu_ops.write_cr4, SetCR4); |
| 683 | para_fill(pv_irq_ops.save_fl, GetInterruptMask); | 683 | |
| 684 | para_fill(pv_irq_ops.restore_fl, SetInterruptMask); | 684 | para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); |
| 685 | para_fill(pv_irq_ops.irq_disable, DisableInterrupts); | 685 | para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); |
| 686 | para_fill(pv_irq_ops.irq_enable, EnableInterrupts); | 686 | para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); |
| 687 | para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); | ||
| 687 | 688 | ||
| 688 | para_fill(pv_cpu_ops.wbinvd, WBINVD); | 689 | para_fill(pv_cpu_ops.wbinvd, WBINVD); |
| 689 | para_fill(pv_cpu_ops.read_tsc, RDTSC); | 690 | para_fill(pv_cpu_ops.read_tsc, RDTSC); |
| @@ -797,8 +798,8 @@ static inline int __init activate_vmi(void) | |||
| 797 | #endif | 798 | #endif |
| 798 | 799 | ||
| 799 | #ifdef CONFIG_X86_LOCAL_APIC | 800 | #ifdef CONFIG_X86_LOCAL_APIC |
| 800 | para_fill(apic_ops->read, APICRead); | 801 | para_fill(apic->read, APICRead); |
| 801 | para_fill(apic_ops->write, APICWrite); | 802 | para_fill(apic->write, APICWrite); |
| 802 | #endif | 803 | #endif |
| 803 | 804 | ||
| 804 | /* | 805 | /* |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index e5b088fffa40..33a788d5879c 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | 28 | ||
| 29 | #include <asm/vmi.h> | 29 | #include <asm/vmi.h> |
| 30 | #include <asm/vmi_time.h> | 30 | #include <asm/vmi_time.h> |
| 31 | #include <asm/arch_hooks.h> | ||
| 32 | #include <asm/apicdef.h> | 31 | #include <asm/apicdef.h> |
| 33 | #include <asm/apic.h> | 32 | #include <asm/apic.h> |
| 34 | #include <asm/timer.h> | 33 | #include <asm/timer.h> |
| @@ -256,7 +255,7 @@ void __devinit vmi_time_bsp_init(void) | |||
| 256 | */ | 255 | */ |
| 257 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | 256 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); |
| 258 | local_irq_disable(); | 257 | local_irq_disable(); |
| 259 | #ifdef CONFIG_X86_SMP | 258 | #ifdef CONFIG_SMP |
| 260 | /* | 259 | /* |
| 261 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | 260 | * XXX handle_percpu_irq only defined for SMP; we need to switch over |
| 262 | * to using it, since this is a local interrupt, which each CPU must | 261 | * to using it, since this is a local interrupt, which each CPU must |
| @@ -288,8 +287,7 @@ static struct clocksource clocksource_vmi; | |||
| 288 | static cycle_t read_real_cycles(void) | 287 | static cycle_t read_real_cycles(void) |
| 289 | { | 288 | { |
| 290 | cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | 289 | cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); |
| 291 | return ret >= clocksource_vmi.cycle_last ? | 290 | return max(ret, clocksource_vmi.cycle_last); |
| 292 | ret : clocksource_vmi.cycle_last; | ||
| 293 | } | 291 | } |
| 294 | 292 | ||
| 295 | static struct clocksource clocksource_vmi = { | 293 | static struct clocksource clocksource_vmi = { |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 82c67559dde7..0d860963f268 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #include <asm-generic/vmlinux.lds.h> | 13 | #include <asm-generic/vmlinux.lds.h> |
| 14 | #include <asm/thread_info.h> | 14 | #include <asm/thread_info.h> |
| 15 | #include <asm/page.h> | 15 | #include <asm/page_types.h> |
| 16 | #include <asm/cache.h> | 16 | #include <asm/cache.h> |
| 17 | #include <asm/boot.h> | 17 | #include <asm/boot.h> |
| 18 | 18 | ||
| @@ -178,14 +178,7 @@ SECTIONS | |||
| 178 | __initramfs_end = .; | 178 | __initramfs_end = .; |
| 179 | } | 179 | } |
| 180 | #endif | 180 | #endif |
| 181 | . = ALIGN(PAGE_SIZE); | 181 | PERCPU(PAGE_SIZE) |
| 182 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { | ||
| 183 | __per_cpu_start = .; | ||
| 184 | *(.data.percpu.page_aligned) | ||
| 185 | *(.data.percpu) | ||
| 186 | *(.data.percpu.shared_aligned) | ||
| 187 | __per_cpu_end = .; | ||
| 188 | } | ||
| 189 | . = ALIGN(PAGE_SIZE); | 182 | . = ALIGN(PAGE_SIZE); |
| 190 | /* freed after init ends here */ | 183 | /* freed after init ends here */ |
| 191 | 184 | ||
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6bef..5bf54e40c6ef 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | #define LOAD_OFFSET __START_KERNEL_map | 5 | #define LOAD_OFFSET __START_KERNEL_map |
| 6 | 6 | ||
| 7 | #include <asm-generic/vmlinux.lds.h> | 7 | #include <asm-generic/vmlinux.lds.h> |
| 8 | #include <asm/page.h> | 8 | #include <asm/asm-offsets.h> |
| 9 | #include <asm/page_types.h> | ||
| 9 | 10 | ||
| 10 | #undef i386 /* in case the preprocessor is a 32bit one */ | 11 | #undef i386 /* in case the preprocessor is a 32bit one */ |
| 11 | 12 | ||
| @@ -13,12 +14,15 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") | |||
| 13 | OUTPUT_ARCH(i386:x86-64) | 14 | OUTPUT_ARCH(i386:x86-64) |
| 14 | ENTRY(phys_startup_64) | 15 | ENTRY(phys_startup_64) |
| 15 | jiffies_64 = jiffies; | 16 | jiffies_64 = jiffies; |
| 16 | _proxy_pda = 1; | ||
| 17 | PHDRS { | 17 | PHDRS { |
| 18 | text PT_LOAD FLAGS(5); /* R_E */ | 18 | text PT_LOAD FLAGS(5); /* R_E */ |
| 19 | data PT_LOAD FLAGS(7); /* RWE */ | 19 | data PT_LOAD FLAGS(7); /* RWE */ |
| 20 | user PT_LOAD FLAGS(7); /* RWE */ | 20 | user PT_LOAD FLAGS(7); /* RWE */ |
| 21 | data.init PT_LOAD FLAGS(7); /* RWE */ | 21 | data.init PT_LOAD FLAGS(7); /* RWE */ |
| 22 | #ifdef CONFIG_SMP | ||
| 23 | percpu PT_LOAD FLAGS(7); /* RWE */ | ||
| 24 | #endif | ||
| 25 | data.init2 PT_LOAD FLAGS(7); /* RWE */ | ||
| 22 | note PT_NOTE FLAGS(0); /* ___ */ | 26 | note PT_NOTE FLAGS(0); /* ___ */ |
| 23 | } | 27 | } |
| 24 | SECTIONS | 28 | SECTIONS |
| @@ -208,14 +212,28 @@ SECTIONS | |||
| 208 | __initramfs_end = .; | 212 | __initramfs_end = .; |
| 209 | #endif | 213 | #endif |
| 210 | 214 | ||
| 215 | #ifdef CONFIG_SMP | ||
| 216 | /* | ||
| 217 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
| 218 | * output PHDR, so the next output section - __data_nosave - should | ||
| 219 | * start another section data.init2. Also, pda should be at the head of | ||
| 220 | * percpu area. Preallocate it and define the percpu offset symbol | ||
| 221 | * so that it can be accessed as a percpu variable. | ||
| 222 | */ | ||
| 223 | . = ALIGN(PAGE_SIZE); | ||
| 224 | PERCPU_VADDR(0, :percpu) | ||
| 225 | #else | ||
| 211 | PERCPU(PAGE_SIZE) | 226 | PERCPU(PAGE_SIZE) |
| 227 | #endif | ||
| 212 | 228 | ||
| 213 | . = ALIGN(PAGE_SIZE); | 229 | . = ALIGN(PAGE_SIZE); |
| 214 | __init_end = .; | 230 | __init_end = .; |
| 215 | 231 | ||
| 216 | . = ALIGN(PAGE_SIZE); | 232 | . = ALIGN(PAGE_SIZE); |
| 217 | __nosave_begin = .; | 233 | __nosave_begin = .; |
| 218 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } | 234 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
| 235 | *(.data.nosave) | ||
| 236 | } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ | ||
| 219 | . = ALIGN(PAGE_SIZE); | 237 | . = ALIGN(PAGE_SIZE); |
| 220 | __nosave_end = .; | 238 | __nosave_end = .; |
| 221 | 239 | ||
| @@ -239,8 +257,28 @@ SECTIONS | |||
| 239 | DWARF_DEBUG | 257 | DWARF_DEBUG |
| 240 | } | 258 | } |
| 241 | 259 | ||
| 260 | /* | ||
| 261 | * Per-cpu symbols which need to be offset from __per_cpu_load | ||
| 262 | * for the boot processor. | ||
| 263 | */ | ||
| 264 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | ||
| 265 | INIT_PER_CPU(gdt_page); | ||
| 266 | INIT_PER_CPU(irq_stack_union); | ||
| 267 | |||
| 242 | /* | 268 | /* |
| 243 | * Build-time check on the image size: | 269 | * Build-time check on the image size: |
| 244 | */ | 270 | */ |
| 245 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | 271 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), |
| 246 | "kernel image bigger than KERNEL_IMAGE_SIZE") | 272 | "kernel image bigger than KERNEL_IMAGE_SIZE") |
| 273 | |||
| 274 | #ifdef CONFIG_SMP | ||
| 275 | ASSERT((per_cpu__irq_stack_union == 0), | ||
| 276 | "irq_stack_union is not at start of per-cpu area"); | ||
| 277 | #endif | ||
| 278 | |||
| 279 | #ifdef CONFIG_KEXEC | ||
| 280 | #include <asm/kexec.h> | ||
| 281 | |||
| 282 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
| 283 | "kexec control code size is too big") | ||
| 284 | #endif | ||
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a688f3bfaec2..74de562812cc 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include <asm/paravirt.h> | 22 | #include <asm/paravirt.h> |
| 23 | #include <asm/setup.h> | 23 | #include <asm/setup.h> |
| 24 | 24 | ||
| 25 | #if defined CONFIG_PCI && defined CONFIG_PARAVIRT | 25 | #ifdef CONFIG_PARAVIRT |
| 26 | /* | 26 | /* |
| 27 | * Interrupt control on vSMPowered systems: | 27 | * Interrupt control on vSMPowered systems: |
| 28 | * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' | 28 | * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' |
| @@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void) | |||
| 37 | flags &= ~X86_EFLAGS_IF; | 37 | flags &= ~X86_EFLAGS_IF; |
| 38 | return flags; | 38 | return flags; |
| 39 | } | 39 | } |
| 40 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); | ||
| 40 | 41 | ||
| 41 | static void vsmp_restore_fl(unsigned long flags) | 42 | static void vsmp_restore_fl(unsigned long flags) |
| 42 | { | 43 | { |
| @@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags) | |||
| 46 | flags |= X86_EFLAGS_AC; | 47 | flags |= X86_EFLAGS_AC; |
| 47 | native_restore_fl(flags); | 48 | native_restore_fl(flags); |
| 48 | } | 49 | } |
| 50 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); | ||
| 49 | 51 | ||
| 50 | static void vsmp_irq_disable(void) | 52 | static void vsmp_irq_disable(void) |
| 51 | { | 53 | { |
| @@ -53,6 +55,7 @@ static void vsmp_irq_disable(void) | |||
| 53 | 55 | ||
| 54 | native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); | 56 | native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); |
| 55 | } | 57 | } |
| 58 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); | ||
| 56 | 59 | ||
| 57 | static void vsmp_irq_enable(void) | 60 | static void vsmp_irq_enable(void) |
| 58 | { | 61 | { |
| @@ -60,6 +63,7 @@ static void vsmp_irq_enable(void) | |||
| 60 | 63 | ||
| 61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); | 64 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
| 62 | } | 65 | } |
| 66 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); | ||
| 63 | 67 | ||
| 64 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, | 68 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, |
| 65 | unsigned long addr, unsigned len) | 69 | unsigned long addr, unsigned len) |
| @@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void) | |||
| 90 | cap, ctl); | 94 | cap, ctl); |
| 91 | if (cap & ctl & (1 << 4)) { | 95 | if (cap & ctl & (1 << 4)) { |
| 92 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ | 96 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ |
| 93 | pv_irq_ops.irq_disable = vsmp_irq_disable; | 97 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); |
| 94 | pv_irq_ops.irq_enable = vsmp_irq_enable; | 98 | pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); |
| 95 | pv_irq_ops.save_fl = vsmp_save_fl; | 99 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); |
| 96 | pv_irq_ops.restore_fl = vsmp_restore_fl; | 100 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); |
| 97 | pv_init_ops.patch = vsmp_patch; | 101 | pv_init_ops.patch = vsmp_patch; |
| 98 | 102 | ||
| 99 | ctl &= ~(1 << 4); | 103 | ctl &= ~(1 << 4); |
| @@ -110,7 +114,6 @@ static void __init set_vsmp_pv_ops(void) | |||
| 110 | } | 114 | } |
| 111 | #endif | 115 | #endif |
| 112 | 116 | ||
| 113 | #ifdef CONFIG_PCI | ||
| 114 | static int is_vsmp = -1; | 117 | static int is_vsmp = -1; |
| 115 | 118 | ||
| 116 | static void __init detect_vsmp_box(void) | 119 | static void __init detect_vsmp_box(void) |
| @@ -135,15 +138,6 @@ int is_vsmp_box(void) | |||
| 135 | return 0; | 138 | return 0; |
| 136 | } | 139 | } |
| 137 | } | 140 | } |
| 138 | #else | ||
| 139 | static void __init detect_vsmp_box(void) | ||
| 140 | { | ||
| 141 | } | ||
| 142 | int is_vsmp_box(void) | ||
| 143 | { | ||
| 144 | return 0; | ||
| 145 | } | ||
| 146 | #endif | ||
| 147 | 141 | ||
| 148 | void __init vsmp_init(void) | 142 | void __init vsmp_init(void) |
| 149 | { | 143 | { |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 695e426aa354..3909e3ba5ce3 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
| @@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy); | |||
| 58 | EXPORT_SYMBOL(empty_zero_page); | 58 | EXPORT_SYMBOL(empty_zero_page); |
| 59 | EXPORT_SYMBOL(init_level4_pgt); | 59 | EXPORT_SYMBOL(init_level4_pgt); |
| 60 | EXPORT_SYMBOL(load_gs_index); | 60 | EXPORT_SYMBOL(load_gs_index); |
| 61 | |||
| 62 | EXPORT_SYMBOL(_proxy_pda); | ||
