diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:26:12 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:26:12 -0400 |
| commit | 7a9787e1eba95a166265e6a260cf30af04ef0a99 (patch) | |
| tree | e730a4565e0318140d2fbd2f0415d18a339d7336 /arch/x86/kernel | |
| parent | 41b9eb264c8407655db57b60b4457fe1b2ec9977 (diff) | |
| parent | 0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff) | |
Merge commit 'v2.6.28-rc2' into x86/pci-ioapic-boot-irq-quirks
Diffstat (limited to 'arch/x86/kernel')
167 files changed, 16356 insertions, 13064 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index da140611bb57..d7e5a58ee22f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -7,9 +7,10 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu | |||
| 7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
| 8 | 8 | ||
| 9 | ifdef CONFIG_FTRACE | 9 | ifdef CONFIG_FTRACE |
| 10 | # Do not profile debug utilities | 10 | # Do not profile debug and lowlevel utilities |
| 11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
| 12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
| 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | ||
| 13 | endif | 14 | endif |
| 14 | 15 | ||
| 15 | # | 16 | # |
| @@ -22,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp) | |||
| 22 | CFLAGS_tsc.o := $(nostackp) | 23 | CFLAGS_tsc.o := $(nostackp) |
| 23 | 24 | ||
| 24 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 25 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o |
| 25 | obj-y += traps_$(BITS).o irq_$(BITS).o | 26 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
| 26 | obj-y += time_$(BITS).o ioport.o ldt.o | 27 | obj-y += time_$(BITS).o ioport.o ldt.o |
| 27 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 28 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o |
| 28 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 29 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
| @@ -37,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o | |||
| 37 | 38 | ||
| 38 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 39 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
| 39 | obj-y += process.o | 40 | obj-y += process.o |
| 40 | obj-y += i387.o | 41 | obj-y += i387.o xsave.o |
| 41 | obj-y += ptrace.o | 42 | obj-y += ptrace.o |
| 42 | obj-y += ds.o | 43 | obj-y += ds.o |
| 43 | obj-$(CONFIG_X86_32) += tls.o | 44 | obj-$(CONFIG_X86_32) += tls.o |
| @@ -50,7 +51,6 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | |||
| 50 | obj-$(CONFIG_MCA) += mca_32.o | 51 | obj-$(CONFIG_MCA) += mca_32.o |
| 51 | obj-$(CONFIG_X86_MSR) += msr.o | 52 | obj-$(CONFIG_X86_MSR) += msr.o |
| 52 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 53 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
| 53 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
| 54 | obj-$(CONFIG_PCI) += early-quirks.o | 54 | obj-$(CONFIG_PCI) += early-quirks.o |
| 55 | apm-y := apm_32.o | 55 | apm-y := apm_32.o |
| 56 | obj-$(CONFIG_APM) += apm.o | 56 | obj-$(CONFIG_APM) += apm.o |
| @@ -60,14 +60,15 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o | |||
| 60 | obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o | 60 | obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o |
| 61 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o | 61 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o |
| 62 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 62 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
| 63 | obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o | 63 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o |
| 64 | obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o | 64 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
| 65 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 65 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
| 66 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 66 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
| 67 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 67 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
| 68 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 68 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
| 69 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 69 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
| 70 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 70 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
| 71 | obj-$(CONFIG_X86_ES7000) += es7000_32.o | ||
| 71 | obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o | 72 | obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o |
| 72 | obj-y += vsmp_64.o | 73 | obj-y += vsmp_64.o |
| 73 | obj-$(CONFIG_KPROBES) += kprobes.o | 74 | obj-$(CONFIG_KPROBES) += kprobes.o |
| @@ -88,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | |||
| 88 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | 89 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o |
| 89 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 90 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
| 90 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 91 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
| 91 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 92 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o |
| 92 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | 93 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o |
| 93 | 94 | ||
| 94 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 95 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
| @@ -98,10 +99,18 @@ scx200-y += scx200_32.o | |||
| 98 | 99 | ||
| 99 | obj-$(CONFIG_OLPC) += olpc.o | 100 | obj-$(CONFIG_OLPC) += olpc.o |
| 100 | 101 | ||
| 102 | microcode-y := microcode_core.o | ||
| 103 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | ||
| 104 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | ||
| 105 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
| 106 | |||
| 101 | ### | 107 | ### |
| 102 | # 64 bit specific files | 108 | # 64 bit specific files |
| 103 | ifeq ($(CONFIG_X86_64),y) | 109 | ifeq ($(CONFIG_X86_64),y) |
| 104 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o | 110 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o |
| 111 | obj-y += bios_uv.o uv_irq.o uv_sysfs.o | ||
| 112 | obj-y += genx2apic_cluster.o | ||
| 113 | obj-y += genx2apic_phys.o | ||
| 105 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | 114 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o |
| 106 | obj-$(CONFIG_AUDIT) += audit_64.o | 115 | obj-$(CONFIG_AUDIT) += audit_64.o |
| 107 | 116 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f489d7a9be92..8c1f76abae9e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
| @@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
| 58 | #ifdef CONFIG_X86_64 | 58 | #ifdef CONFIG_X86_64 |
| 59 | 59 | ||
| 60 | #include <asm/proto.h> | 60 | #include <asm/proto.h> |
| 61 | #include <asm/genapic.h> | ||
| 62 | 61 | ||
| 63 | #else /* X86 */ | 62 | #else /* X86 */ |
| 64 | 63 | ||
| @@ -154,10 +153,21 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
| 154 | } | 153 | } |
| 155 | 154 | ||
| 156 | #ifdef CONFIG_PCI_MMCONFIG | 155 | #ifdef CONFIG_PCI_MMCONFIG |
| 156 | |||
| 157 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
| 158 | |||
| 157 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ | 159 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ |
| 158 | struct acpi_mcfg_allocation *pci_mmcfg_config; | 160 | struct acpi_mcfg_allocation *pci_mmcfg_config; |
| 159 | int pci_mmcfg_config_num; | 161 | int pci_mmcfg_config_num; |
| 160 | 162 | ||
| 163 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
| 164 | { | ||
| 165 | if (!strcmp(mcfg->header.oem_id, "SGI")) | ||
| 166 | acpi_mcfg_64bit_base_addr = TRUE; | ||
| 167 | |||
| 168 | return 0; | ||
| 169 | } | ||
| 170 | |||
| 161 | int __init acpi_parse_mcfg(struct acpi_table_header *header) | 171 | int __init acpi_parse_mcfg(struct acpi_table_header *header) |
| 162 | { | 172 | { |
| 163 | struct acpi_table_mcfg *mcfg; | 173 | struct acpi_table_mcfg *mcfg; |
| @@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header) | |||
| 190 | } | 200 | } |
| 191 | 201 | ||
| 192 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | 202 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); |
| 203 | |||
| 204 | acpi_mcfg_oem_check(mcfg); | ||
| 205 | |||
| 193 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | 206 | for (i = 0; i < pci_mmcfg_config_num; ++i) { |
| 194 | if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { | 207 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && |
| 208 | !acpi_mcfg_64bit_base_addr) { | ||
| 195 | printk(KERN_ERR PREFIX | 209 | printk(KERN_ERR PREFIX |
| 196 | "MMCONFIG not in low 4GB of memory\n"); | 210 | "MMCONFIG not in low 4GB of memory\n"); |
| 197 | kfree(pci_mmcfg_config); | 211 | kfree(pci_mmcfg_config); |
| @@ -239,10 +253,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) | |||
| 239 | return; | 253 | return; |
| 240 | } | 254 | } |
| 241 | 255 | ||
| 242 | #ifdef CONFIG_X86_32 | ||
| 243 | if (boot_cpu_physical_apicid != -1U) | 256 | if (boot_cpu_physical_apicid != -1U) |
| 244 | ver = apic_version[boot_cpu_physical_apicid]; | 257 | ver = apic_version[boot_cpu_physical_apicid]; |
| 245 | #endif | ||
| 246 | 258 | ||
| 247 | generic_processor_info(id, ver); | 259 | generic_processor_info(id, ver); |
| 248 | } | 260 | } |
| @@ -761,11 +773,9 @@ static void __init acpi_register_lapic_address(unsigned long address) | |||
| 761 | 773 | ||
| 762 | set_fixmap_nocache(FIX_APIC_BASE, address); | 774 | set_fixmap_nocache(FIX_APIC_BASE, address); |
| 763 | if (boot_cpu_physical_apicid == -1U) { | 775 | if (boot_cpu_physical_apicid == -1U) { |
| 764 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); | 776 | boot_cpu_physical_apicid = read_apic_id(); |
| 765 | #ifdef CONFIG_X86_32 | ||
| 766 | apic_version[boot_cpu_physical_apicid] = | 777 | apic_version[boot_cpu_physical_apicid] = |
| 767 | GET_APIC_VERSION(apic_read(APIC_LVR)); | 778 | GET_APIC_VERSION(apic_read(APIC_LVR)); |
| 768 | #endif | ||
| 769 | } | 779 | } |
| 770 | } | 780 | } |
| 771 | 781 | ||
| @@ -1021,7 +1031,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 1021 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | 1031 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; |
| 1022 | #endif | 1032 | #endif |
| 1023 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | 1033 | set_bit(MP_ISA_BUS, mp_bus_not_pci); |
| 1024 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | 1034 | pr_debug("Bus #%d is ISA\n", MP_ISA_BUS); |
| 1025 | 1035 | ||
| 1026 | #ifdef CONFIG_X86_ES7000 | 1036 | #ifdef CONFIG_X86_ES7000 |
| 1027 | /* | 1037 | /* |
| @@ -1127,8 +1137,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
| 1127 | return gsi; | 1137 | return gsi; |
| 1128 | } | 1138 | } |
| 1129 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | 1139 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { |
| 1130 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | 1140 | pr_debug("Pin %d-%d already programmed\n", |
| 1131 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | 1141 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); |
| 1132 | #ifdef CONFIG_X86_32 | 1142 | #ifdef CONFIG_X86_32 |
| 1133 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | 1143 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); |
| 1134 | #else | 1144 | #else |
| @@ -1247,7 +1257,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
| 1247 | 1257 | ||
| 1248 | count = | 1258 | count = |
| 1249 | acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, | 1259 | acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, |
| 1250 | NR_IRQ_VECTORS); | 1260 | nr_irqs); |
| 1251 | if (count < 0) { | 1261 | if (count < 0) { |
| 1252 | printk(KERN_ERR PREFIX | 1262 | printk(KERN_ERR PREFIX |
| 1253 | "Error parsing interrupt source overrides entry\n"); | 1263 | "Error parsing interrupt source overrides entry\n"); |
| @@ -1267,7 +1277,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
| 1267 | 1277 | ||
| 1268 | count = | 1278 | count = |
| 1269 | acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, | 1279 | acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, |
| 1270 | NR_IRQ_VECTORS); | 1280 | nr_irqs); |
| 1271 | if (count < 0) { | 1281 | if (count < 0) { |
| 1272 | printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); | 1282 | printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); |
| 1273 | /* TBD: Cleanup to allow fallback to MPS */ | 1283 | /* TBD: Cleanup to allow fallback to MPS */ |
| @@ -1337,7 +1347,9 @@ static void __init acpi_process_madt(void) | |||
| 1337 | acpi_ioapic = 1; | 1347 | acpi_ioapic = 1; |
| 1338 | 1348 | ||
| 1339 | smp_found_config = 1; | 1349 | smp_found_config = 1; |
| 1350 | #ifdef CONFIG_X86_32 | ||
| 1340 | setup_apic_routing(); | 1351 | setup_apic_routing(); |
| 1352 | #endif | ||
| 1341 | } | 1353 | } |
| 1342 | } | 1354 | } |
| 1343 | if (error == -EINVAL) { | 1355 | if (error == -EINVAL) { |
| @@ -1407,8 +1419,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d) | |||
| 1407 | */ | 1419 | */ |
| 1408 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) | 1420 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) |
| 1409 | { | 1421 | { |
| 1410 | pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident); | 1422 | /* |
| 1411 | acpi_skip_timer_override = 1; | 1423 | * The ati_ixp4x0_rev() early PCI quirk should have set |
| 1424 | * the acpi_skip_timer_override flag already: | ||
| 1425 | */ | ||
| 1426 | if (!acpi_skip_timer_override) { | ||
| 1427 | WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); | ||
| 1428 | pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", | ||
| 1429 | d->ident); | ||
| 1430 | acpi_skip_timer_override = 1; | ||
| 1431 | } | ||
| 1412 | return 0; | 1432 | return 0; |
| 1413 | } | 1433 | } |
| 1414 | 1434 | ||
| @@ -1579,6 +1599,11 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
| 1579 | DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), | 1599 | DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), |
| 1580 | }, | 1600 | }, |
| 1581 | }, | 1601 | }, |
| 1602 | {} | ||
| 1603 | }; | ||
| 1604 | |||
| 1605 | /* second table for DMI checks that should run after early-quirks */ | ||
| 1606 | static struct dmi_system_id __initdata acpi_dmi_table_late[] = { | ||
| 1582 | /* | 1607 | /* |
| 1583 | * HP laptops which use a DSDT reporting as HP/SB400/10000, | 1608 | * HP laptops which use a DSDT reporting as HP/SB400/10000, |
| 1584 | * which includes some code which overrides all temperature | 1609 | * which includes some code which overrides all temperature |
| @@ -1591,6 +1616,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
| 1591 | */ | 1616 | */ |
| 1592 | { | 1617 | { |
| 1593 | .callback = dmi_ignore_irq0_timer_override, | 1618 | .callback = dmi_ignore_irq0_timer_override, |
| 1619 | .ident = "HP nx6115 laptop", | ||
| 1620 | .matches = { | ||
| 1621 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
| 1622 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), | ||
| 1623 | }, | ||
| 1624 | }, | ||
| 1625 | { | ||
| 1626 | .callback = dmi_ignore_irq0_timer_override, | ||
| 1594 | .ident = "HP NX6125 laptop", | 1627 | .ident = "HP NX6125 laptop", |
| 1595 | .matches = { | 1628 | .matches = { |
| 1596 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | 1629 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), |
| @@ -1605,6 +1638,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
| 1605 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), | 1638 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), |
| 1606 | }, | 1639 | }, |
| 1607 | }, | 1640 | }, |
| 1641 | { | ||
| 1642 | .callback = dmi_ignore_irq0_timer_override, | ||
| 1643 | .ident = "HP 6715b laptop", | ||
| 1644 | .matches = { | ||
| 1645 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
| 1646 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), | ||
| 1647 | }, | ||
| 1648 | }, | ||
| 1608 | {} | 1649 | {} |
| 1609 | }; | 1650 | }; |
| 1610 | 1651 | ||
| @@ -1691,6 +1732,9 @@ int __init early_acpi_boot_init(void) | |||
| 1691 | 1732 | ||
| 1692 | int __init acpi_boot_init(void) | 1733 | int __init acpi_boot_init(void) |
| 1693 | { | 1734 | { |
| 1735 | /* those are executed after early-quirks are executed */ | ||
| 1736 | dmi_check_system(acpi_dmi_table_late); | ||
| 1737 | |||
| 1694 | /* | 1738 | /* |
| 1695 | * If acpi_disabled, bail out | 1739 | * If acpi_disabled, bail out |
| 1696 | * One exception: acpi=ht continues far enough to enumerate LAPICs | 1740 | * One exception: acpi=ht continues far enough to enumerate LAPICs |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 868de3d5c39d..806b4e9051b4 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
| @@ -9,6 +9,8 @@ | |||
| 9 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
| 10 | #include <linux/dmi.h> | 10 | #include <linux/dmi.h> |
| 11 | #include <linux/cpumask.h> | 11 | #include <linux/cpumask.h> |
| 12 | #include <asm/segment.h> | ||
| 13 | #include <asm/desc.h> | ||
| 12 | 14 | ||
| 13 | #include "realmode/wakeup.h" | 15 | #include "realmode/wakeup.h" |
| 14 | #include "sleep.h" | 16 | #include "sleep.h" |
| @@ -19,19 +21,10 @@ unsigned long acpi_realmode_flags; | |||
| 19 | /* address in low memory of the wakeup routine. */ | 21 | /* address in low memory of the wakeup routine. */ |
| 20 | static unsigned long acpi_realmode; | 22 | static unsigned long acpi_realmode; |
| 21 | 23 | ||
| 22 | #ifdef CONFIG_64BIT | 24 | #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) |
| 23 | static char temp_stack[10240]; | 25 | static char temp_stack[4096]; |
| 24 | #endif | 26 | #endif |
| 25 | 27 | ||
| 26 | /* XXX: this macro should move to asm-x86/segment.h and be shared with the | ||
| 27 | boot code... */ | ||
| 28 | #define GDT_ENTRY(flags, base, limit) \ | ||
| 29 | (((u64)(base & 0xff000000) << 32) | \ | ||
| 30 | ((u64)flags << 40) | \ | ||
| 31 | ((u64)(limit & 0x00ff0000) << 32) | \ | ||
| 32 | ((u64)(base & 0x00ffffff) << 16) | \ | ||
| 33 | ((u64)(limit & 0x0000ffff))) | ||
| 34 | |||
| 35 | /** | 28 | /** |
| 36 | * acpi_save_state_mem - save kernel state | 29 | * acpi_save_state_mem - save kernel state |
| 37 | * | 30 | * |
| @@ -94,7 +87,7 @@ int acpi_save_state_mem(void) | |||
| 94 | #endif /* !CONFIG_64BIT */ | 87 | #endif /* !CONFIG_64BIT */ |
| 95 | 88 | ||
| 96 | header->pmode_cr0 = read_cr0(); | 89 | header->pmode_cr0 = read_cr0(); |
| 97 | header->pmode_cr4 = read_cr4(); | 90 | header->pmode_cr4 = read_cr4_safe(); |
| 98 | header->realmode_flags = acpi_realmode_flags; | 91 | header->realmode_flags = acpi_realmode_flags; |
| 99 | header->real_magic = 0x12345678; | 92 | header->real_magic = 0x12345678; |
| 100 | 93 | ||
| @@ -105,7 +98,9 @@ int acpi_save_state_mem(void) | |||
| 105 | #else /* CONFIG_64BIT */ | 98 | #else /* CONFIG_64BIT */ |
| 106 | header->trampoline_segment = setup_trampoline() >> 4; | 99 | header->trampoline_segment = setup_trampoline() >> 4; |
| 107 | #ifdef CONFIG_SMP | 100 | #ifdef CONFIG_SMP |
| 108 | stack_start.sp = temp_stack + 4096; | 101 | stack_start.sp = temp_stack + sizeof(temp_stack); |
| 102 | early_gdt_descr.address = | ||
| 103 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); | ||
| 109 | #endif | 104 | #endif |
| 110 | initial_code = (unsigned long)wakeup_long64; | 105 | initial_code = (unsigned long)wakeup_long64; |
| 111 | saved_magic = 0x123456789abcdef0; | 106 | saved_magic = 0x123456789abcdef0; |
| @@ -158,6 +153,10 @@ static int __init acpi_sleep_setup(char *str) | |||
| 158 | acpi_realmode_flags |= 2; | 153 | acpi_realmode_flags |= 2; |
| 159 | if (strncmp(str, "s3_beep", 7) == 0) | 154 | if (strncmp(str, "s3_beep", 7) == 0) |
| 160 | acpi_realmode_flags |= 4; | 155 | acpi_realmode_flags |= 4; |
| 156 | #ifdef CONFIG_HIBERNATION | ||
| 157 | if (strncmp(str, "s4_nohwsig", 10) == 0) | ||
| 158 | acpi_no_s4_hw_signature(); | ||
| 159 | #endif | ||
| 161 | if (strncmp(str, "old_ordering", 12) == 0) | 160 | if (strncmp(str, "old_ordering", 12) == 0) |
| 162 | acpi_old_suspend_ordering(); | 161 | acpi_old_suspend_ordering(); |
| 163 | str = strchr(str, ','); | 162 | str = strchr(str, ','); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b553..a84ac7b570e6 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | |||
| 145 | extern char __vsyscall_0; | 145 | extern char __vsyscall_0; |
| 146 | const unsigned char *const *find_nop_table(void) | 146 | const unsigned char *const *find_nop_table(void) |
| 147 | { | 147 | { |
| 148 | return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || | 148 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
| 149 | boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; | 149 | boot_cpu_has(X86_FEATURE_NOPL)) |
| 150 | return p6_nops; | ||
| 151 | else | ||
| 152 | return k8_nops; | ||
| 150 | } | 153 | } |
| 151 | 154 | ||
| 152 | #else /* CONFIG_X86_64 */ | 155 | #else /* CONFIG_X86_64 */ |
| 153 | 156 | ||
| 154 | static const struct nop { | ||
| 155 | int cpuid; | ||
| 156 | const unsigned char *const *noptable; | ||
| 157 | } noptypes[] = { | ||
| 158 | { X86_FEATURE_K8, k8_nops }, | ||
| 159 | { X86_FEATURE_K7, k7_nops }, | ||
| 160 | { X86_FEATURE_P4, p6_nops }, | ||
| 161 | { X86_FEATURE_P3, p6_nops }, | ||
| 162 | { -1, NULL } | ||
| 163 | }; | ||
| 164 | |||
| 165 | const unsigned char *const *find_nop_table(void) | 157 | const unsigned char *const *find_nop_table(void) |
| 166 | { | 158 | { |
| 167 | const unsigned char *const *noptable = intel_nops; | 159 | if (boot_cpu_has(X86_FEATURE_K8)) |
| 168 | int i; | 160 | return k8_nops; |
| 169 | 161 | else if (boot_cpu_has(X86_FEATURE_K7)) | |
| 170 | for (i = 0; noptypes[i].cpuid >= 0; i++) { | 162 | return k7_nops; |
| 171 | if (boot_cpu_has(noptypes[i].cpuid)) { | 163 | else if (boot_cpu_has(X86_FEATURE_NOPL)) |
| 172 | noptable = noptypes[i].noptable; | 164 | return p6_nops; |
| 173 | break; | 165 | else |
| 174 | } | 166 | return intel_nops; |
| 175 | } | ||
| 176 | return noptable; | ||
| 177 | } | 167 | } |
| 178 | 168 | ||
| 179 | #endif /* CONFIG_X86_64 */ | 169 | #endif /* CONFIG_X86_64 */ |
| @@ -241,25 +231,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | |||
| 241 | continue; | 231 | continue; |
| 242 | if (*ptr > text_end) | 232 | if (*ptr > text_end) |
| 243 | continue; | 233 | continue; |
| 244 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ | 234 | /* turn DS segment override prefix into lock prefix */ |
| 235 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); | ||
| 245 | }; | 236 | }; |
| 246 | } | 237 | } |
| 247 | 238 | ||
| 248 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 239 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) |
| 249 | { | 240 | { |
| 250 | u8 **ptr; | 241 | u8 **ptr; |
| 251 | char insn[1]; | ||
| 252 | 242 | ||
| 253 | if (noreplace_smp) | 243 | if (noreplace_smp) |
| 254 | return; | 244 | return; |
| 255 | 245 | ||
| 256 | add_nops(insn, 1); | ||
| 257 | for (ptr = start; ptr < end; ptr++) { | 246 | for (ptr = start; ptr < end; ptr++) { |
| 258 | if (*ptr < text) | 247 | if (*ptr < text) |
| 259 | continue; | 248 | continue; |
| 260 | if (*ptr > text_end) | 249 | if (*ptr > text_end) |
| 261 | continue; | 250 | continue; |
| 262 | text_poke(*ptr, insn, 1); | 251 | /* turn lock prefix into DS segment override prefix */ |
| 252 | text_poke(*ptr, ((unsigned char []){0x3E}), 1); | ||
| 263 | }; | 253 | }; |
| 264 | } | 254 | } |
| 265 | 255 | ||
| @@ -454,7 +444,7 @@ void __init alternative_instructions(void) | |||
| 454 | _text, _etext); | 444 | _text, _etext); |
| 455 | 445 | ||
| 456 | /* Only switch to UP mode if we don't immediately boot others */ | 446 | /* Only switch to UP mode if we don't immediately boot others */ |
| 457 | if (num_possible_cpus() == 1 || setup_max_cpus <= 1) | 447 | if (num_present_cpus() == 1 || setup_max_cpus <= 1) |
| 458 | alternatives_smp_switch(0); | 448 | alternatives_smp_switch(0); |
| 459 | } | 449 | } |
| 460 | #endif | 450 | #endif |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2766d84c7a0..a8fd9ebdc8e2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -23,36 +23,149 @@ | |||
| 23 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
| 24 | #include <linux/iommu-helper.h> | 24 | #include <linux/iommu-helper.h> |
| 25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
| 26 | #include <asm/gart.h> | 26 | #include <asm/iommu.h> |
| 27 | #include <asm/amd_iommu_types.h> | 27 | #include <asm/amd_iommu_types.h> |
| 28 | #include <asm/amd_iommu.h> | 28 | #include <asm/amd_iommu.h> |
| 29 | 29 | ||
| 30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) | 30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) |
| 31 | 31 | ||
| 32 | #define to_pages(addr, size) \ | 32 | #define EXIT_LOOP_COUNT 10000000 |
| 33 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
| 34 | 33 | ||
| 35 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
| 36 | 35 | ||
| 37 | struct command { | 36 | /* A list of preallocated protection domains */ |
| 37 | static LIST_HEAD(iommu_pd_list); | ||
| 38 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | ||
| 39 | |||
| 40 | /* | ||
| 41 | * general struct to manage commands send to an IOMMU | ||
| 42 | */ | ||
| 43 | struct iommu_cmd { | ||
| 38 | u32 data[4]; | 44 | u32 data[4]; |
| 39 | }; | 45 | }; |
| 40 | 46 | ||
| 41 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 47 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
| 42 | struct unity_map_entry *e); | 48 | struct unity_map_entry *e); |
| 43 | 49 | ||
| 50 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | ||
| 44 | static int iommu_has_npcache(struct amd_iommu *iommu) | 51 | static int iommu_has_npcache(struct amd_iommu *iommu) |
| 45 | { | 52 | { |
| 46 | return iommu->cap & IOMMU_CAP_NPCACHE; | 53 | return iommu->cap & IOMMU_CAP_NPCACHE; |
| 47 | } | 54 | } |
| 48 | 55 | ||
| 49 | static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | 56 | /**************************************************************************** |
| 57 | * | ||
| 58 | * Interrupt handling functions | ||
| 59 | * | ||
| 60 | ****************************************************************************/ | ||
| 61 | |||
| 62 | static void iommu_print_event(void *__evt) | ||
| 63 | { | ||
| 64 | u32 *event = __evt; | ||
| 65 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | ||
| 66 | int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; | ||
| 67 | int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; | ||
| 68 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | ||
| 69 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | ||
| 70 | |||
| 71 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | ||
| 72 | |||
| 73 | switch (type) { | ||
| 74 | case EVENT_TYPE_ILL_DEV: | ||
| 75 | printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " | ||
| 76 | "address=0x%016llx flags=0x%04x]\n", | ||
| 77 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 78 | address, flags); | ||
| 79 | break; | ||
| 80 | case EVENT_TYPE_IO_FAULT: | ||
| 81 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | ||
| 82 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
| 83 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 84 | domid, address, flags); | ||
| 85 | break; | ||
| 86 | case EVENT_TYPE_DEV_TAB_ERR: | ||
| 87 | printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
| 88 | "address=0x%016llx flags=0x%04x]\n", | ||
| 89 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 90 | address, flags); | ||
| 91 | break; | ||
| 92 | case EVENT_TYPE_PAGE_TAB_ERR: | ||
| 93 | printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
| 94 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
| 95 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 96 | domid, address, flags); | ||
| 97 | break; | ||
| 98 | case EVENT_TYPE_ILL_CMD: | ||
| 99 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | ||
| 100 | break; | ||
| 101 | case EVENT_TYPE_CMD_HARD_ERR: | ||
| 102 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | ||
| 103 | "flags=0x%04x]\n", address, flags); | ||
| 104 | break; | ||
| 105 | case EVENT_TYPE_IOTLB_INV_TO: | ||
| 106 | printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " | ||
| 107 | "address=0x%016llx]\n", | ||
| 108 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 109 | address); | ||
| 110 | break; | ||
| 111 | case EVENT_TYPE_INV_DEV_REQ: | ||
| 112 | printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " | ||
| 113 | "address=0x%016llx flags=0x%04x]\n", | ||
| 114 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
| 115 | address, flags); | ||
| 116 | break; | ||
| 117 | default: | ||
| 118 | printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static void iommu_poll_events(struct amd_iommu *iommu) | ||
| 123 | { | ||
| 124 | u32 head, tail; | ||
| 125 | unsigned long flags; | ||
| 126 | |||
| 127 | spin_lock_irqsave(&iommu->lock, flags); | ||
| 128 | |||
| 129 | head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
| 130 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
| 131 | |||
| 132 | while (head != tail) { | ||
| 133 | iommu_print_event(iommu->evt_buf + head); | ||
| 134 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | ||
| 135 | } | ||
| 136 | |||
| 137 | writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
| 138 | |||
| 139 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 140 | } | ||
| 141 | |||
| 142 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | ||
| 143 | { | ||
| 144 | struct amd_iommu *iommu; | ||
| 145 | |||
| 146 | list_for_each_entry(iommu, &amd_iommu_list, list) | ||
| 147 | iommu_poll_events(iommu); | ||
| 148 | |||
| 149 | return IRQ_HANDLED; | ||
| 150 | } | ||
| 151 | |||
| 152 | /**************************************************************************** | ||
| 153 | * | ||
| 154 | * IOMMU command queuing functions | ||
| 155 | * | ||
| 156 | ****************************************************************************/ | ||
| 157 | |||
| 158 | /* | ||
| 159 | * Writes the command to the IOMMUs command buffer and informs the | ||
| 160 | * hardware about the new command. Must be called with iommu->lock held. | ||
| 161 | */ | ||
| 162 | static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | ||
| 50 | { | 163 | { |
| 51 | u32 tail, head; | 164 | u32 tail, head; |
| 52 | u8 *target; | 165 | u8 *target; |
| 53 | 166 | ||
| 54 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 167 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
| 55 | target = (iommu->cmd_buf + tail); | 168 | target = iommu->cmd_buf + tail; |
| 56 | memcpy_toio(target, cmd, sizeof(*cmd)); | 169 | memcpy_toio(target, cmd, sizeof(*cmd)); |
| 57 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | 170 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; |
| 58 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 171 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
| @@ -63,7 +176,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | |||
| 63 | return 0; | 176 | return 0; |
| 64 | } | 177 | } |
| 65 | 178 | ||
| 66 | static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | 179 | /* |
| 180 | * General queuing function for commands. Takes iommu->lock and calls | ||
| 181 | * __iommu_queue_command(). | ||
| 182 | */ | ||
| 183 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | ||
| 67 | { | 184 | { |
| 68 | unsigned long flags; | 185 | unsigned long flags; |
| 69 | int ret; | 186 | int ret; |
| @@ -75,35 +192,59 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | |||
| 75 | return ret; | 192 | return ret; |
| 76 | } | 193 | } |
| 77 | 194 | ||
| 195 | /* | ||
| 196 | * This function is called whenever we need to ensure that the IOMMU has | ||
| 197 | * completed execution of all commands we sent. It sends a | ||
| 198 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs | ||
| 199 | * us about that by writing a value to a physical address we pass with | ||
| 200 | * the command. | ||
| 201 | */ | ||
| 78 | static int iommu_completion_wait(struct amd_iommu *iommu) | 202 | static int iommu_completion_wait(struct amd_iommu *iommu) |
| 79 | { | 203 | { |
| 80 | int ret; | 204 | int ret = 0, ready = 0; |
| 81 | struct command cmd; | 205 | unsigned status = 0; |
| 82 | volatile u64 ready = 0; | 206 | struct iommu_cmd cmd; |
| 83 | unsigned long ready_phys = virt_to_phys(&ready); | 207 | unsigned long flags, i = 0; |
| 84 | 208 | ||
| 85 | memset(&cmd, 0, sizeof(cmd)); | 209 | memset(&cmd, 0, sizeof(cmd)); |
| 86 | cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; | 210 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; |
| 87 | cmd.data[1] = HIGH_U32(ready_phys); | ||
| 88 | cmd.data[2] = 1; /* value written to 'ready' */ | ||
| 89 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | 211 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); |
| 90 | 212 | ||
| 91 | iommu->need_sync = 0; | 213 | iommu->need_sync = 0; |
| 92 | 214 | ||
| 93 | ret = iommu_queue_command(iommu, &cmd); | 215 | spin_lock_irqsave(&iommu->lock, flags); |
| 216 | |||
| 217 | ret = __iommu_queue_command(iommu, &cmd); | ||
| 94 | 218 | ||
| 95 | if (ret) | 219 | if (ret) |
| 96 | return ret; | 220 | goto out; |
| 221 | |||
| 222 | while (!ready && (i < EXIT_LOOP_COUNT)) { | ||
| 223 | ++i; | ||
| 224 | /* wait for the bit to become one */ | ||
| 225 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
| 226 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* set bit back to zero */ | ||
| 230 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | ||
| 231 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
| 97 | 232 | ||
| 98 | while (!ready) | 233 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) |
| 99 | cpu_relax(); | 234 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); |
| 235 | out: | ||
| 236 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
| 100 | 237 | ||
| 101 | return 0; | 238 | return 0; |
| 102 | } | 239 | } |
| 103 | 240 | ||
| 241 | /* | ||
| 242 | * Command send function for invalidating a device table entry | ||
| 243 | */ | ||
| 104 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 244 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) |
| 105 | { | 245 | { |
| 106 | struct command cmd; | 246 | struct iommu_cmd cmd; |
| 247 | int ret; | ||
| 107 | 248 | ||
| 108 | BUG_ON(iommu == NULL); | 249 | BUG_ON(iommu == NULL); |
| 109 | 250 | ||
| @@ -111,37 +252,50 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | |||
| 111 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | 252 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); |
| 112 | cmd.data[0] = devid; | 253 | cmd.data[0] = devid; |
| 113 | 254 | ||
| 255 | ret = iommu_queue_command(iommu, &cmd); | ||
| 256 | |||
| 114 | iommu->need_sync = 1; | 257 | iommu->need_sync = 1; |
| 115 | 258 | ||
| 116 | return iommu_queue_command(iommu, &cmd); | 259 | return ret; |
| 117 | } | 260 | } |
| 118 | 261 | ||
| 262 | /* | ||
| 263 | * Generic command send function for invalidaing TLB entries | ||
| 264 | */ | ||
| 119 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | 265 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, |
| 120 | u64 address, u16 domid, int pde, int s) | 266 | u64 address, u16 domid, int pde, int s) |
| 121 | { | 267 | { |
| 122 | struct command cmd; | 268 | struct iommu_cmd cmd; |
| 269 | int ret; | ||
| 123 | 270 | ||
| 124 | memset(&cmd, 0, sizeof(cmd)); | 271 | memset(&cmd, 0, sizeof(cmd)); |
| 125 | address &= PAGE_MASK; | 272 | address &= PAGE_MASK; |
| 126 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); | 273 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); |
| 127 | cmd.data[1] |= domid; | 274 | cmd.data[1] |= domid; |
| 128 | cmd.data[2] = LOW_U32(address); | 275 | cmd.data[2] = lower_32_bits(address); |
| 129 | cmd.data[3] = HIGH_U32(address); | 276 | cmd.data[3] = upper_32_bits(address); |
| 130 | if (s) | 277 | if (s) /* size bit - we flush more than one 4kb page */ |
| 131 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | 278 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; |
| 132 | if (pde) | 279 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ |
| 133 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | 280 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; |
| 134 | 281 | ||
| 282 | ret = iommu_queue_command(iommu, &cmd); | ||
| 283 | |||
| 135 | iommu->need_sync = 1; | 284 | iommu->need_sync = 1; |
| 136 | 285 | ||
| 137 | return iommu_queue_command(iommu, &cmd); | 286 | return ret; |
| 138 | } | 287 | } |
| 139 | 288 | ||
| 289 | /* | ||
| 290 | * TLB invalidation function which is called from the mapping functions. | ||
| 291 | * It invalidates a single PTE if the range to flush is within a single | ||
| 292 | * page. Otherwise it flushes the whole TLB of the IOMMU. | ||
| 293 | */ | ||
| 140 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | 294 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, |
| 141 | u64 address, size_t size) | 295 | u64 address, size_t size) |
| 142 | { | 296 | { |
| 143 | int s = 0; | 297 | int s = 0; |
| 144 | unsigned pages = to_pages(address, size); | 298 | unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); |
| 145 | 299 | ||
| 146 | address &= PAGE_MASK; | 300 | address &= PAGE_MASK; |
| 147 | 301 | ||
| @@ -159,6 +313,28 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
| 159 | return 0; | 313 | return 0; |
| 160 | } | 314 | } |
| 161 | 315 | ||
| 316 | /* Flush the whole IO/TLB for a given protection domain */ | ||
| 317 | static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | ||
| 318 | { | ||
| 319 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
| 320 | |||
| 321 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | ||
| 322 | } | ||
| 323 | |||
| 324 | /**************************************************************************** | ||
| 325 | * | ||
| 326 | * The functions below are used the create the page table mappings for | ||
| 327 | * unity mapped regions. | ||
| 328 | * | ||
| 329 | ****************************************************************************/ | ||
| 330 | |||
| 331 | /* | ||
| 332 | * Generic mapping functions. It maps a physical address into a DMA | ||
| 333 | * address space. It allocates the page table pages if necessary. | ||
| 334 | * In the future it can be extended to a generic mapping function | ||
| 335 | * supporting all features of AMD IOMMU page tables like level skipping | ||
| 336 | * and full 64 bit address spaces. | ||
| 337 | */ | ||
| 162 | static int iommu_map(struct protection_domain *dom, | 338 | static int iommu_map(struct protection_domain *dom, |
| 163 | unsigned long bus_addr, | 339 | unsigned long bus_addr, |
| 164 | unsigned long phys_addr, | 340 | unsigned long phys_addr, |
| @@ -209,6 +385,10 @@ static int iommu_map(struct protection_domain *dom, | |||
| 209 | return 0; | 385 | return 0; |
| 210 | } | 386 | } |
| 211 | 387 | ||
| 388 | /* | ||
| 389 | * This function checks if a specific unity mapping entry is needed for | ||
| 390 | * this specific IOMMU. | ||
| 391 | */ | ||
| 212 | static int iommu_for_unity_map(struct amd_iommu *iommu, | 392 | static int iommu_for_unity_map(struct amd_iommu *iommu, |
| 213 | struct unity_map_entry *entry) | 393 | struct unity_map_entry *entry) |
| 214 | { | 394 | { |
| @@ -223,6 +403,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, | |||
| 223 | return 0; | 403 | return 0; |
| 224 | } | 404 | } |
| 225 | 405 | ||
| 406 | /* | ||
| 407 | * Init the unity mappings for a specific IOMMU in the system | ||
| 408 | * | ||
| 409 | * Basically iterates over all unity mapping entries and applies them to | ||
| 410 | * the default domain DMA of that IOMMU if necessary. | ||
| 411 | */ | ||
| 226 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | 412 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) |
| 227 | { | 413 | { |
| 228 | struct unity_map_entry *entry; | 414 | struct unity_map_entry *entry; |
| @@ -239,6 +425,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu) | |||
| 239 | return 0; | 425 | return 0; |
| 240 | } | 426 | } |
| 241 | 427 | ||
| 428 | /* | ||
| 429 | * This function actually applies the mapping to the page table of the | ||
| 430 | * dma_ops domain. | ||
| 431 | */ | ||
| 242 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 432 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
| 243 | struct unity_map_entry *e) | 433 | struct unity_map_entry *e) |
| 244 | { | 434 | { |
| @@ -261,6 +451,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
| 261 | return 0; | 451 | return 0; |
| 262 | } | 452 | } |
| 263 | 453 | ||
| 454 | /* | ||
| 455 | * Inits the unity mappings required for a specific device | ||
| 456 | */ | ||
| 264 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | 457 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, |
| 265 | u16 devid) | 458 | u16 devid) |
| 266 | { | 459 | { |
| @@ -278,33 +471,48 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
| 278 | return 0; | 471 | return 0; |
| 279 | } | 472 | } |
| 280 | 473 | ||
| 281 | static unsigned long dma_mask_to_pages(unsigned long mask) | 474 | /**************************************************************************** |
| 282 | { | 475 | * |
| 283 | return (mask >> PAGE_SHIFT) + | 476 | * The next functions belong to the address allocator for the dma_ops |
| 284 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); | 477 | * interface functions. They work like the allocators in the other IOMMU |
| 285 | } | 478 | * drivers. Its basically a bitmap which marks the allocated pages in |
| 479 | * the aperture. Maybe it could be enhanced in the future to a more | ||
| 480 | * efficient allocator. | ||
| 481 | * | ||
| 482 | ****************************************************************************/ | ||
| 286 | 483 | ||
| 484 | /* | ||
| 485 | * The address allocator core function. | ||
| 486 | * | ||
| 487 | * called with domain->lock held | ||
| 488 | */ | ||
| 287 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | 489 | static unsigned long dma_ops_alloc_addresses(struct device *dev, |
| 288 | struct dma_ops_domain *dom, | 490 | struct dma_ops_domain *dom, |
| 289 | unsigned int pages) | 491 | unsigned int pages, |
| 492 | unsigned long align_mask, | ||
| 493 | u64 dma_mask) | ||
| 290 | { | 494 | { |
| 291 | unsigned long limit = dma_mask_to_pages(*dev->dma_mask); | 495 | unsigned long limit; |
| 292 | unsigned long address; | 496 | unsigned long address; |
| 293 | unsigned long size = dom->aperture_size >> PAGE_SHIFT; | ||
| 294 | unsigned long boundary_size; | 497 | unsigned long boundary_size; |
| 295 | 498 | ||
| 296 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 499 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, |
| 297 | PAGE_SIZE) >> PAGE_SHIFT; | 500 | PAGE_SIZE) >> PAGE_SHIFT; |
| 298 | limit = limit < size ? limit : size; | 501 | limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, |
| 502 | dma_mask >> PAGE_SHIFT); | ||
| 299 | 503 | ||
| 300 | if (dom->next_bit >= limit) | 504 | if (dom->next_bit >= limit) { |
| 301 | dom->next_bit = 0; | 505 | dom->next_bit = 0; |
| 506 | dom->need_flush = true; | ||
| 507 | } | ||
| 302 | 508 | ||
| 303 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, | 509 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, |
| 304 | 0 , boundary_size, 0); | 510 | 0 , boundary_size, align_mask); |
| 305 | if (address == -1) | 511 | if (address == -1) { |
| 306 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, | 512 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, |
| 307 | 0, boundary_size, 0); | 513 | 0, boundary_size, align_mask); |
| 514 | dom->need_flush = true; | ||
| 515 | } | ||
| 308 | 516 | ||
| 309 | if (likely(address != -1)) { | 517 | if (likely(address != -1)) { |
| 310 | dom->next_bit = address + pages; | 518 | dom->next_bit = address + pages; |
| @@ -317,6 +525,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, | |||
| 317 | return address; | 525 | return address; |
| 318 | } | 526 | } |
| 319 | 527 | ||
| 528 | /* | ||
| 529 | * The address free function. | ||
| 530 | * | ||
| 531 | * called with domain->lock held | ||
| 532 | */ | ||
| 320 | static void dma_ops_free_addresses(struct dma_ops_domain *dom, | 533 | static void dma_ops_free_addresses(struct dma_ops_domain *dom, |
| 321 | unsigned long address, | 534 | unsigned long address, |
| 322 | unsigned int pages) | 535 | unsigned int pages) |
| @@ -325,6 +538,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
| 325 | iommu_area_free(dom->bitmap, address, pages); | 538 | iommu_area_free(dom->bitmap, address, pages); |
| 326 | } | 539 | } |
| 327 | 540 | ||
| 541 | /**************************************************************************** | ||
| 542 | * | ||
| 543 | * The next functions belong to the domain allocation. A domain is | ||
| 544 | * allocated for every IOMMU as the default domain. If device isolation | ||
| 545 | * is enabled, every device get its own domain. The most important thing | ||
| 546 | * about domains is the page table mapping the DMA address space they | ||
| 547 | * contain. | ||
| 548 | * | ||
| 549 | ****************************************************************************/ | ||
| 550 | |||
| 328 | static u16 domain_id_alloc(void) | 551 | static u16 domain_id_alloc(void) |
| 329 | { | 552 | { |
| 330 | unsigned long flags; | 553 | unsigned long flags; |
| @@ -342,6 +565,10 @@ static u16 domain_id_alloc(void) | |||
| 342 | return id; | 565 | return id; |
| 343 | } | 566 | } |
| 344 | 567 | ||
| 568 | /* | ||
| 569 | * Used to reserve address ranges in the aperture (e.g. for exclusion | ||
| 570 | * ranges. | ||
| 571 | */ | ||
| 345 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 572 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
| 346 | unsigned long start_page, | 573 | unsigned long start_page, |
| 347 | unsigned int pages) | 574 | unsigned int pages) |
| @@ -351,7 +578,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | |||
| 351 | if (start_page + pages > last_page) | 578 | if (start_page + pages > last_page) |
| 352 | pages = last_page - start_page; | 579 | pages = last_page - start_page; |
| 353 | 580 | ||
| 354 | set_bit_string(dom->bitmap, start_page, pages); | 581 | iommu_area_reserve(dom->bitmap, start_page, pages); |
| 355 | } | 582 | } |
| 356 | 583 | ||
| 357 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | 584 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) |
| @@ -382,6 +609,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | |||
| 382 | free_page((unsigned long)p1); | 609 | free_page((unsigned long)p1); |
| 383 | } | 610 | } |
| 384 | 611 | ||
| 612 | /* | ||
| 613 | * Free a domain, only used if something went wrong in the | ||
| 614 | * allocation path and we need to free an already allocated page table | ||
| 615 | */ | ||
| 385 | static void dma_ops_domain_free(struct dma_ops_domain *dom) | 616 | static void dma_ops_domain_free(struct dma_ops_domain *dom) |
| 386 | { | 617 | { |
| 387 | if (!dom) | 618 | if (!dom) |
| @@ -396,6 +627,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
| 396 | kfree(dom); | 627 | kfree(dom); |
| 397 | } | 628 | } |
| 398 | 629 | ||
| 630 | /* | ||
| 631 | * Allocates a new protection domain usable for the dma_ops functions. | ||
| 632 | * It also intializes the page table and the address allocator data | ||
| 633 | * structures required for the dma_ops interface | ||
| 634 | */ | ||
| 399 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | 635 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, |
| 400 | unsigned order) | 636 | unsigned order) |
| 401 | { | 637 | { |
| @@ -436,14 +672,24 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
| 436 | dma_dom->bitmap[0] = 1; | 672 | dma_dom->bitmap[0] = 1; |
| 437 | dma_dom->next_bit = 0; | 673 | dma_dom->next_bit = 0; |
| 438 | 674 | ||
| 675 | dma_dom->need_flush = false; | ||
| 676 | dma_dom->target_dev = 0xffff; | ||
| 677 | |||
| 678 | /* Intialize the exclusion range if necessary */ | ||
| 439 | if (iommu->exclusion_start && | 679 | if (iommu->exclusion_start && |
| 440 | iommu->exclusion_start < dma_dom->aperture_size) { | 680 | iommu->exclusion_start < dma_dom->aperture_size) { |
| 441 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 681 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; |
| 442 | int pages = to_pages(iommu->exclusion_start, | 682 | int pages = iommu_num_pages(iommu->exclusion_start, |
| 443 | iommu->exclusion_length); | 683 | iommu->exclusion_length, |
| 684 | PAGE_SIZE); | ||
| 444 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 685 | dma_ops_reserve_addresses(dma_dom, startpage, pages); |
| 445 | } | 686 | } |
| 446 | 687 | ||
| 688 | /* | ||
| 689 | * At the last step, build the page tables so we don't need to | ||
| 690 | * allocate page table pages in the dma_ops mapping/unmapping | ||
| 691 | * path. | ||
| 692 | */ | ||
| 447 | num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); | 693 | num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); |
| 448 | dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), | 694 | dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), |
| 449 | GFP_KERNEL); | 695 | GFP_KERNEL); |
| @@ -472,6 +718,10 @@ free_dma_dom: | |||
| 472 | return NULL; | 718 | return NULL; |
| 473 | } | 719 | } |
| 474 | 720 | ||
| 721 | /* | ||
| 722 | * Find out the protection domain structure for a given PCI device. This | ||
| 723 | * will give us the pointer to the page table root for example. | ||
| 724 | */ | ||
| 475 | static struct protection_domain *domain_for_device(u16 devid) | 725 | static struct protection_domain *domain_for_device(u16 devid) |
| 476 | { | 726 | { |
| 477 | struct protection_domain *dom; | 727 | struct protection_domain *dom; |
| @@ -484,6 +734,10 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
| 484 | return dom; | 734 | return dom; |
| 485 | } | 735 | } |
| 486 | 736 | ||
| 737 | /* | ||
| 738 | * If a device is not yet associated with a domain, this function does | ||
| 739 | * assigns it visible for the hardware | ||
| 740 | */ | ||
| 487 | static void set_device_domain(struct amd_iommu *iommu, | 741 | static void set_device_domain(struct amd_iommu *iommu, |
| 488 | struct protection_domain *domain, | 742 | struct protection_domain *domain, |
| 489 | u16 devid) | 743 | u16 devid) |
| @@ -492,12 +746,13 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
| 492 | 746 | ||
| 493 | u64 pte_root = virt_to_phys(domain->pt_root); | 747 | u64 pte_root = virt_to_phys(domain->pt_root); |
| 494 | 748 | ||
| 495 | pte_root |= (domain->mode & 0x07) << 9; | 749 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
| 496 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; | 750 | << DEV_ENTRY_MODE_SHIFT; |
| 751 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
| 497 | 752 | ||
| 498 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 753 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
| 499 | amd_iommu_dev_table[devid].data[0] = pte_root; | 754 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); |
| 500 | amd_iommu_dev_table[devid].data[1] = pte_root >> 32; | 755 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); |
| 501 | amd_iommu_dev_table[devid].data[2] = domain->id; | 756 | amd_iommu_dev_table[devid].data[2] = domain->id; |
| 502 | 757 | ||
| 503 | amd_iommu_pd_table[devid] = domain; | 758 | amd_iommu_pd_table[devid] = domain; |
| @@ -508,6 +763,58 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
| 508 | iommu->need_sync = 1; | 763 | iommu->need_sync = 1; |
| 509 | } | 764 | } |
| 510 | 765 | ||
| 766 | /***************************************************************************** | ||
| 767 | * | ||
| 768 | * The next functions belong to the dma_ops mapping/unmapping code. | ||
| 769 | * | ||
| 770 | *****************************************************************************/ | ||
| 771 | |||
| 772 | /* | ||
| 773 | * This function checks if the driver got a valid device from the caller to | ||
| 774 | * avoid dereferencing invalid pointers. | ||
| 775 | */ | ||
| 776 | static bool check_device(struct device *dev) | ||
| 777 | { | ||
| 778 | if (!dev || !dev->dma_mask) | ||
| 779 | return false; | ||
| 780 | |||
| 781 | return true; | ||
| 782 | } | ||
| 783 | |||
| 784 | /* | ||
| 785 | * In this function the list of preallocated protection domains is traversed to | ||
| 786 | * find the domain for a specific device | ||
| 787 | */ | ||
| 788 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
| 789 | { | ||
| 790 | struct dma_ops_domain *entry, *ret = NULL; | ||
| 791 | unsigned long flags; | ||
| 792 | |||
| 793 | if (list_empty(&iommu_pd_list)) | ||
| 794 | return NULL; | ||
| 795 | |||
| 796 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
| 797 | |||
| 798 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
| 799 | if (entry->target_dev == devid) { | ||
| 800 | ret = entry; | ||
| 801 | list_del(&ret->list); | ||
| 802 | break; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 806 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
| 807 | |||
| 808 | return ret; | ||
| 809 | } | ||
| 810 | |||
| 811 | /* | ||
| 812 | * In the dma_ops path we only have the struct device. This function | ||
| 813 | * finds the corresponding IOMMU, the protection domain and the | ||
| 814 | * requestor id for a given device. | ||
| 815 | * If the device is not yet associated with a domain this is also done | ||
| 816 | * in this function. | ||
| 817 | */ | ||
| 511 | static int get_device_resources(struct device *dev, | 818 | static int get_device_resources(struct device *dev, |
| 512 | struct amd_iommu **iommu, | 819 | struct amd_iommu **iommu, |
| 513 | struct protection_domain **domain, | 820 | struct protection_domain **domain, |
| @@ -517,26 +824,30 @@ static int get_device_resources(struct device *dev, | |||
| 517 | struct pci_dev *pcidev; | 824 | struct pci_dev *pcidev; |
| 518 | u16 _bdf; | 825 | u16 _bdf; |
| 519 | 826 | ||
| 520 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); | 827 | *iommu = NULL; |
| 828 | *domain = NULL; | ||
| 829 | *bdf = 0xffff; | ||
| 830 | |||
| 831 | if (dev->bus != &pci_bus_type) | ||
| 832 | return 0; | ||
| 521 | 833 | ||
| 522 | pcidev = to_pci_dev(dev); | 834 | pcidev = to_pci_dev(dev); |
| 523 | _bdf = (pcidev->bus->number << 8) | pcidev->devfn; | 835 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
| 524 | 836 | ||
| 525 | if (_bdf >= amd_iommu_last_bdf) { | 837 | /* device not translated by any IOMMU in the system? */ |
| 526 | *iommu = NULL; | 838 | if (_bdf > amd_iommu_last_bdf) |
| 527 | *domain = NULL; | ||
| 528 | *bdf = 0xffff; | ||
| 529 | return 0; | 839 | return 0; |
| 530 | } | ||
| 531 | 840 | ||
| 532 | *bdf = amd_iommu_alias_table[_bdf]; | 841 | *bdf = amd_iommu_alias_table[_bdf]; |
| 533 | 842 | ||
| 534 | *iommu = amd_iommu_rlookup_table[*bdf]; | 843 | *iommu = amd_iommu_rlookup_table[*bdf]; |
| 535 | if (*iommu == NULL) | 844 | if (*iommu == NULL) |
| 536 | return 0; | 845 | return 0; |
| 537 | dma_dom = (*iommu)->default_dom; | ||
| 538 | *domain = domain_for_device(*bdf); | 846 | *domain = domain_for_device(*bdf); |
| 539 | if (*domain == NULL) { | 847 | if (*domain == NULL) { |
| 848 | dma_dom = find_protection_domain(*bdf); | ||
| 849 | if (!dma_dom) | ||
| 850 | dma_dom = (*iommu)->default_dom; | ||
| 540 | *domain = &dma_dom->domain; | 851 | *domain = &dma_dom->domain; |
| 541 | set_device_domain(*iommu, *domain, *bdf); | 852 | set_device_domain(*iommu, *domain, *bdf); |
| 542 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | 853 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " |
| @@ -547,6 +858,10 @@ static int get_device_resources(struct device *dev, | |||
| 547 | return 1; | 858 | return 1; |
| 548 | } | 859 | } |
| 549 | 860 | ||
| 861 | /* | ||
| 862 | * This is the generic map function. It maps one 4kb page at paddr to | ||
| 863 | * the given address in the DMA address space for the domain. | ||
| 864 | */ | ||
| 550 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | 865 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, |
| 551 | struct dma_ops_domain *dom, | 866 | struct dma_ops_domain *dom, |
| 552 | unsigned long address, | 867 | unsigned long address, |
| @@ -578,6 +893,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
| 578 | return (dma_addr_t)address; | 893 | return (dma_addr_t)address; |
| 579 | } | 894 | } |
| 580 | 895 | ||
| 896 | /* | ||
| 897 | * The generic unmapping function for on page in the DMA address space. | ||
| 898 | */ | ||
| 581 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, | 899 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, |
| 582 | struct dma_ops_domain *dom, | 900 | struct dma_ops_domain *dom, |
| 583 | unsigned long address) | 901 | unsigned long address) |
| @@ -597,22 +915,35 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
| 597 | *pte = 0ULL; | 915 | *pte = 0ULL; |
| 598 | } | 916 | } |
| 599 | 917 | ||
| 918 | /* | ||
| 919 | * This function contains common code for mapping of a physically | ||
| 920 | * contiguous memory region into DMA address space. It is uses by all | ||
| 921 | * mapping functions provided by this IOMMU driver. | ||
| 922 | * Must be called with the domain lock held. | ||
| 923 | */ | ||
| 600 | static dma_addr_t __map_single(struct device *dev, | 924 | static dma_addr_t __map_single(struct device *dev, |
| 601 | struct amd_iommu *iommu, | 925 | struct amd_iommu *iommu, |
| 602 | struct dma_ops_domain *dma_dom, | 926 | struct dma_ops_domain *dma_dom, |
| 603 | phys_addr_t paddr, | 927 | phys_addr_t paddr, |
| 604 | size_t size, | 928 | size_t size, |
| 605 | int dir) | 929 | int dir, |
| 930 | bool align, | ||
| 931 | u64 dma_mask) | ||
| 606 | { | 932 | { |
| 607 | dma_addr_t offset = paddr & ~PAGE_MASK; | 933 | dma_addr_t offset = paddr & ~PAGE_MASK; |
| 608 | dma_addr_t address, start; | 934 | dma_addr_t address, start; |
| 609 | unsigned int pages; | 935 | unsigned int pages; |
| 936 | unsigned long align_mask = 0; | ||
| 610 | int i; | 937 | int i; |
| 611 | 938 | ||
| 612 | pages = to_pages(paddr, size); | 939 | pages = iommu_num_pages(paddr, size, PAGE_SIZE); |
| 613 | paddr &= PAGE_MASK; | 940 | paddr &= PAGE_MASK; |
| 614 | 941 | ||
| 615 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); | 942 | if (align) |
| 943 | align_mask = (1UL << get_order(size)) - 1; | ||
| 944 | |||
| 945 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | ||
| 946 | dma_mask); | ||
| 616 | if (unlikely(address == bad_dma_address)) | 947 | if (unlikely(address == bad_dma_address)) |
| 617 | goto out; | 948 | goto out; |
| 618 | 949 | ||
| @@ -624,10 +955,20 @@ static dma_addr_t __map_single(struct device *dev, | |||
| 624 | } | 955 | } |
| 625 | address += offset; | 956 | address += offset; |
| 626 | 957 | ||
| 958 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | ||
| 959 | iommu_flush_tlb(iommu, dma_dom->domain.id); | ||
| 960 | dma_dom->need_flush = false; | ||
| 961 | } else if (unlikely(iommu_has_npcache(iommu))) | ||
| 962 | iommu_flush_pages(iommu, dma_dom->domain.id, address, size); | ||
| 963 | |||
| 627 | out: | 964 | out: |
| 628 | return address; | 965 | return address; |
| 629 | } | 966 | } |
| 630 | 967 | ||
| 968 | /* | ||
| 969 | * Does the reverse of the __map_single function. Must be called with | ||
| 970 | * the domain lock held too | ||
| 971 | */ | ||
| 631 | static void __unmap_single(struct amd_iommu *iommu, | 972 | static void __unmap_single(struct amd_iommu *iommu, |
| 632 | struct dma_ops_domain *dma_dom, | 973 | struct dma_ops_domain *dma_dom, |
| 633 | dma_addr_t dma_addr, | 974 | dma_addr_t dma_addr, |
| @@ -640,7 +981,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
| 640 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) | 981 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) |
| 641 | return; | 982 | return; |
| 642 | 983 | ||
| 643 | pages = to_pages(dma_addr, size); | 984 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); |
| 644 | dma_addr &= PAGE_MASK; | 985 | dma_addr &= PAGE_MASK; |
| 645 | start = dma_addr; | 986 | start = dma_addr; |
| 646 | 987 | ||
| @@ -650,8 +991,14 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
| 650 | } | 991 | } |
| 651 | 992 | ||
| 652 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 993 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
| 994 | |||
| 995 | if (amd_iommu_unmap_flush) | ||
| 996 | iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); | ||
| 653 | } | 997 | } |
| 654 | 998 | ||
| 999 | /* | ||
| 1000 | * The exported map_single function for dma_ops. | ||
| 1001 | */ | ||
| 655 | static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | 1002 | static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, |
| 656 | size_t size, int dir) | 1003 | size_t size, int dir) |
| 657 | { | 1004 | { |
| @@ -660,21 +1007,26 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
| 660 | struct protection_domain *domain; | 1007 | struct protection_domain *domain; |
| 661 | u16 devid; | 1008 | u16 devid; |
| 662 | dma_addr_t addr; | 1009 | dma_addr_t addr; |
| 1010 | u64 dma_mask; | ||
| 1011 | |||
| 1012 | if (!check_device(dev)) | ||
| 1013 | return bad_dma_address; | ||
| 1014 | |||
| 1015 | dma_mask = *dev->dma_mask; | ||
| 663 | 1016 | ||
| 664 | get_device_resources(dev, &iommu, &domain, &devid); | 1017 | get_device_resources(dev, &iommu, &domain, &devid); |
| 665 | 1018 | ||
| 666 | if (iommu == NULL || domain == NULL) | 1019 | if (iommu == NULL || domain == NULL) |
| 1020 | /* device not handled by any AMD IOMMU */ | ||
| 667 | return (dma_addr_t)paddr; | 1021 | return (dma_addr_t)paddr; |
| 668 | 1022 | ||
| 669 | spin_lock_irqsave(&domain->lock, flags); | 1023 | spin_lock_irqsave(&domain->lock, flags); |
| 670 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); | 1024 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, |
| 1025 | dma_mask); | ||
| 671 | if (addr == bad_dma_address) | 1026 | if (addr == bad_dma_address) |
| 672 | goto out; | 1027 | goto out; |
| 673 | 1028 | ||
| 674 | if (iommu_has_npcache(iommu)) | 1029 | if (unlikely(iommu->need_sync)) |
| 675 | iommu_flush_pages(iommu, domain->id, addr, size); | ||
| 676 | |||
| 677 | if (iommu->need_sync) | ||
| 678 | iommu_completion_wait(iommu); | 1030 | iommu_completion_wait(iommu); |
| 679 | 1031 | ||
| 680 | out: | 1032 | out: |
| @@ -683,6 +1035,9 @@ out: | |||
| 683 | return addr; | 1035 | return addr; |
| 684 | } | 1036 | } |
| 685 | 1037 | ||
| 1038 | /* | ||
| 1039 | * The exported unmap_single function for dma_ops. | ||
| 1040 | */ | ||
| 686 | static void unmap_single(struct device *dev, dma_addr_t dma_addr, | 1041 | static void unmap_single(struct device *dev, dma_addr_t dma_addr, |
| 687 | size_t size, int dir) | 1042 | size_t size, int dir) |
| 688 | { | 1043 | { |
| @@ -691,21 +1046,25 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
| 691 | struct protection_domain *domain; | 1046 | struct protection_domain *domain; |
| 692 | u16 devid; | 1047 | u16 devid; |
| 693 | 1048 | ||
| 694 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1049 | if (!check_device(dev) || |
| 1050 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 1051 | /* device not handled by any AMD IOMMU */ | ||
| 695 | return; | 1052 | return; |
| 696 | 1053 | ||
| 697 | spin_lock_irqsave(&domain->lock, flags); | 1054 | spin_lock_irqsave(&domain->lock, flags); |
| 698 | 1055 | ||
| 699 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1056 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); |
| 700 | 1057 | ||
| 701 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | 1058 | if (unlikely(iommu->need_sync)) |
| 702 | |||
| 703 | if (iommu->need_sync) | ||
| 704 | iommu_completion_wait(iommu); | 1059 | iommu_completion_wait(iommu); |
| 705 | 1060 | ||
| 706 | spin_unlock_irqrestore(&domain->lock, flags); | 1061 | spin_unlock_irqrestore(&domain->lock, flags); |
| 707 | } | 1062 | } |
| 708 | 1063 | ||
| 1064 | /* | ||
| 1065 | * This is a special map_sg function which is used if we should map a | ||
| 1066 | * device which is not handled by an AMD IOMMU in the system. | ||
| 1067 | */ | ||
| 709 | static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, | 1068 | static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, |
| 710 | int nelems, int dir) | 1069 | int nelems, int dir) |
| 711 | { | 1070 | { |
| @@ -720,6 +1079,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, | |||
| 720 | return nelems; | 1079 | return nelems; |
| 721 | } | 1080 | } |
| 722 | 1081 | ||
| 1082 | /* | ||
| 1083 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
| 1084 | * lists). | ||
| 1085 | */ | ||
| 723 | static int map_sg(struct device *dev, struct scatterlist *sglist, | 1086 | static int map_sg(struct device *dev, struct scatterlist *sglist, |
| 724 | int nelems, int dir) | 1087 | int nelems, int dir) |
| 725 | { | 1088 | { |
| @@ -731,6 +1094,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
| 731 | struct scatterlist *s; | 1094 | struct scatterlist *s; |
| 732 | phys_addr_t paddr; | 1095 | phys_addr_t paddr; |
| 733 | int mapped_elems = 0; | 1096 | int mapped_elems = 0; |
| 1097 | u64 dma_mask; | ||
| 1098 | |||
| 1099 | if (!check_device(dev)) | ||
| 1100 | return 0; | ||
| 1101 | |||
| 1102 | dma_mask = *dev->dma_mask; | ||
| 734 | 1103 | ||
| 735 | get_device_resources(dev, &iommu, &domain, &devid); | 1104 | get_device_resources(dev, &iommu, &domain, &devid); |
| 736 | 1105 | ||
| @@ -743,19 +1112,17 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
| 743 | paddr = sg_phys(s); | 1112 | paddr = sg_phys(s); |
| 744 | 1113 | ||
| 745 | s->dma_address = __map_single(dev, iommu, domain->priv, | 1114 | s->dma_address = __map_single(dev, iommu, domain->priv, |
| 746 | paddr, s->length, dir); | 1115 | paddr, s->length, dir, false, |
| 1116 | dma_mask); | ||
| 747 | 1117 | ||
| 748 | if (s->dma_address) { | 1118 | if (s->dma_address) { |
| 749 | s->dma_length = s->length; | 1119 | s->dma_length = s->length; |
| 750 | mapped_elems++; | 1120 | mapped_elems++; |
| 751 | } else | 1121 | } else |
| 752 | goto unmap; | 1122 | goto unmap; |
| 753 | if (iommu_has_npcache(iommu)) | ||
| 754 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
| 755 | s->dma_length); | ||
| 756 | } | 1123 | } |
| 757 | 1124 | ||
| 758 | if (iommu->need_sync) | 1125 | if (unlikely(iommu->need_sync)) |
| 759 | iommu_completion_wait(iommu); | 1126 | iommu_completion_wait(iommu); |
| 760 | 1127 | ||
| 761 | out: | 1128 | out: |
| @@ -775,6 +1142,10 @@ unmap: | |||
| 775 | goto out; | 1142 | goto out; |
| 776 | } | 1143 | } |
| 777 | 1144 | ||
| 1145 | /* | ||
| 1146 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
| 1147 | * lists). | ||
| 1148 | */ | ||
| 778 | static void unmap_sg(struct device *dev, struct scatterlist *sglist, | 1149 | static void unmap_sg(struct device *dev, struct scatterlist *sglist, |
| 779 | int nelems, int dir) | 1150 | int nelems, int dir) |
| 780 | { | 1151 | { |
| @@ -785,7 +1156,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
| 785 | u16 devid; | 1156 | u16 devid; |
| 786 | int i; | 1157 | int i; |
| 787 | 1158 | ||
| 788 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1159 | if (!check_device(dev) || |
| 1160 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 789 | return; | 1161 | return; |
| 790 | 1162 | ||
| 791 | spin_lock_irqsave(&domain->lock, flags); | 1163 | spin_lock_irqsave(&domain->lock, flags); |
| @@ -793,17 +1165,18 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
| 793 | for_each_sg(sglist, s, nelems, i) { | 1165 | for_each_sg(sglist, s, nelems, i) { |
| 794 | __unmap_single(iommu, domain->priv, s->dma_address, | 1166 | __unmap_single(iommu, domain->priv, s->dma_address, |
| 795 | s->dma_length, dir); | 1167 | s->dma_length, dir); |
| 796 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
| 797 | s->dma_length); | ||
| 798 | s->dma_address = s->dma_length = 0; | 1168 | s->dma_address = s->dma_length = 0; |
| 799 | } | 1169 | } |
| 800 | 1170 | ||
| 801 | if (iommu->need_sync) | 1171 | if (unlikely(iommu->need_sync)) |
| 802 | iommu_completion_wait(iommu); | 1172 | iommu_completion_wait(iommu); |
| 803 | 1173 | ||
| 804 | spin_unlock_irqrestore(&domain->lock, flags); | 1174 | spin_unlock_irqrestore(&domain->lock, flags); |
| 805 | } | 1175 | } |
| 806 | 1176 | ||
| 1177 | /* | ||
| 1178 | * The exported alloc_coherent function for dma_ops. | ||
| 1179 | */ | ||
| 807 | static void *alloc_coherent(struct device *dev, size_t size, | 1180 | static void *alloc_coherent(struct device *dev, size_t size, |
| 808 | dma_addr_t *dma_addr, gfp_t flag) | 1181 | dma_addr_t *dma_addr, gfp_t flag) |
| 809 | { | 1182 | { |
| @@ -813,25 +1186,33 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
| 813 | struct protection_domain *domain; | 1186 | struct protection_domain *domain; |
| 814 | u16 devid; | 1187 | u16 devid; |
| 815 | phys_addr_t paddr; | 1188 | phys_addr_t paddr; |
| 1189 | u64 dma_mask = dev->coherent_dma_mask; | ||
| 1190 | |||
| 1191 | if (!check_device(dev)) | ||
| 1192 | return NULL; | ||
| 1193 | |||
| 1194 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | ||
| 1195 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 816 | 1196 | ||
| 1197 | flag |= __GFP_ZERO; | ||
| 817 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 1198 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
| 818 | if (!virt_addr) | 1199 | if (!virt_addr) |
| 819 | return 0; | 1200 | return 0; |
| 820 | 1201 | ||
| 821 | memset(virt_addr, 0, size); | ||
| 822 | paddr = virt_to_phys(virt_addr); | 1202 | paddr = virt_to_phys(virt_addr); |
| 823 | 1203 | ||
| 824 | get_device_resources(dev, &iommu, &domain, &devid); | ||
| 825 | |||
| 826 | if (!iommu || !domain) { | 1204 | if (!iommu || !domain) { |
| 827 | *dma_addr = (dma_addr_t)paddr; | 1205 | *dma_addr = (dma_addr_t)paddr; |
| 828 | return virt_addr; | 1206 | return virt_addr; |
| 829 | } | 1207 | } |
| 830 | 1208 | ||
| 1209 | if (!dma_mask) | ||
| 1210 | dma_mask = *dev->dma_mask; | ||
| 1211 | |||
| 831 | spin_lock_irqsave(&domain->lock, flags); | 1212 | spin_lock_irqsave(&domain->lock, flags); |
| 832 | 1213 | ||
| 833 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 1214 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, |
| 834 | size, DMA_BIDIRECTIONAL); | 1215 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
| 835 | 1216 | ||
| 836 | if (*dma_addr == bad_dma_address) { | 1217 | if (*dma_addr == bad_dma_address) { |
| 837 | free_pages((unsigned long)virt_addr, get_order(size)); | 1218 | free_pages((unsigned long)virt_addr, get_order(size)); |
| @@ -839,10 +1220,7 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
| 839 | goto out; | 1220 | goto out; |
| 840 | } | 1221 | } |
| 841 | 1222 | ||
| 842 | if (iommu_has_npcache(iommu)) | 1223 | if (unlikely(iommu->need_sync)) |
| 843 | iommu_flush_pages(iommu, domain->id, *dma_addr, size); | ||
| 844 | |||
| 845 | if (iommu->need_sync) | ||
| 846 | iommu_completion_wait(iommu); | 1224 | iommu_completion_wait(iommu); |
| 847 | 1225 | ||
| 848 | out: | 1226 | out: |
| @@ -851,6 +1229,9 @@ out: | |||
| 851 | return virt_addr; | 1229 | return virt_addr; |
| 852 | } | 1230 | } |
| 853 | 1231 | ||
| 1232 | /* | ||
| 1233 | * The exported free_coherent function for dma_ops. | ||
| 1234 | */ | ||
| 854 | static void free_coherent(struct device *dev, size_t size, | 1235 | static void free_coherent(struct device *dev, size_t size, |
| 855 | void *virt_addr, dma_addr_t dma_addr) | 1236 | void *virt_addr, dma_addr_t dma_addr) |
| 856 | { | 1237 | { |
| @@ -859,6 +1240,9 @@ static void free_coherent(struct device *dev, size_t size, | |||
| 859 | struct protection_domain *domain; | 1240 | struct protection_domain *domain; |
| 860 | u16 devid; | 1241 | u16 devid; |
| 861 | 1242 | ||
| 1243 | if (!check_device(dev)) | ||
| 1244 | return; | ||
| 1245 | |||
| 862 | get_device_resources(dev, &iommu, &domain, &devid); | 1246 | get_device_resources(dev, &iommu, &domain, &devid); |
| 863 | 1247 | ||
| 864 | if (!iommu || !domain) | 1248 | if (!iommu || !domain) |
| @@ -867,9 +1251,8 @@ static void free_coherent(struct device *dev, size_t size, | |||
| 867 | spin_lock_irqsave(&domain->lock, flags); | 1251 | spin_lock_irqsave(&domain->lock, flags); |
| 868 | 1252 | ||
| 869 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 1253 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
| 870 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | ||
| 871 | 1254 | ||
| 872 | if (iommu->need_sync) | 1255 | if (unlikely(iommu->need_sync)) |
| 873 | iommu_completion_wait(iommu); | 1256 | iommu_completion_wait(iommu); |
| 874 | 1257 | ||
| 875 | spin_unlock_irqrestore(&domain->lock, flags); | 1258 | spin_unlock_irqrestore(&domain->lock, flags); |
| @@ -879,6 +1262,32 @@ free_mem: | |||
| 879 | } | 1262 | } |
| 880 | 1263 | ||
| 881 | /* | 1264 | /* |
| 1265 | * This function is called by the DMA layer to find out if we can handle a | ||
| 1266 | * particular device. It is part of the dma_ops. | ||
| 1267 | */ | ||
| 1268 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | ||
| 1269 | { | ||
| 1270 | u16 bdf; | ||
| 1271 | struct pci_dev *pcidev; | ||
| 1272 | |||
| 1273 | /* No device or no PCI device */ | ||
| 1274 | if (!dev || dev->bus != &pci_bus_type) | ||
| 1275 | return 0; | ||
| 1276 | |||
| 1277 | pcidev = to_pci_dev(dev); | ||
| 1278 | |||
| 1279 | bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
| 1280 | |||
| 1281 | /* Out of our scope? */ | ||
| 1282 | if (bdf > amd_iommu_last_bdf) | ||
| 1283 | return 0; | ||
| 1284 | |||
| 1285 | return 1; | ||
| 1286 | } | ||
| 1287 | |||
| 1288 | /* | ||
| 1289 | * The function for pre-allocating protection domains. | ||
| 1290 | * | ||
| 882 | * If the driver core informs the DMA layer if a driver grabs a device | 1291 | * If the driver core informs the DMA layer if a driver grabs a device |
| 883 | * we don't need to preallocate the protection domains anymore. | 1292 | * we don't need to preallocate the protection domains anymore. |
| 884 | * For now we have to. | 1293 | * For now we have to. |
| @@ -893,7 +1302,7 @@ void prealloc_protection_domains(void) | |||
| 893 | 1302 | ||
| 894 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1303 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
| 895 | devid = (dev->bus->number << 8) | dev->devfn; | 1304 | devid = (dev->bus->number << 8) | dev->devfn; |
| 896 | if (devid >= amd_iommu_last_bdf) | 1305 | if (devid > amd_iommu_last_bdf) |
| 897 | continue; | 1306 | continue; |
| 898 | devid = amd_iommu_alias_table[devid]; | 1307 | devid = amd_iommu_alias_table[devid]; |
| 899 | if (domain_for_device(devid)) | 1308 | if (domain_for_device(devid)) |
| @@ -905,10 +1314,9 @@ void prealloc_protection_domains(void) | |||
| 905 | if (!dma_dom) | 1314 | if (!dma_dom) |
| 906 | continue; | 1315 | continue; |
| 907 | init_unity_mappings_for_device(dma_dom, devid); | 1316 | init_unity_mappings_for_device(dma_dom, devid); |
| 908 | set_device_domain(iommu, &dma_dom->domain, devid); | 1317 | dma_dom->target_dev = devid; |
| 909 | printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", | 1318 | |
| 910 | dma_dom->domain.id); | 1319 | list_add_tail(&dma_dom->list, &iommu_pd_list); |
| 911 | print_devid(devid, 1); | ||
| 912 | } | 1320 | } |
| 913 | } | 1321 | } |
| 914 | 1322 | ||
| @@ -919,14 +1327,23 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { | |||
| 919 | .unmap_single = unmap_single, | 1327 | .unmap_single = unmap_single, |
| 920 | .map_sg = map_sg, | 1328 | .map_sg = map_sg, |
| 921 | .unmap_sg = unmap_sg, | 1329 | .unmap_sg = unmap_sg, |
| 1330 | .dma_supported = amd_iommu_dma_supported, | ||
| 922 | }; | 1331 | }; |
| 923 | 1332 | ||
| 1333 | /* | ||
| 1334 | * The function which clues the AMD IOMMU driver into dma_ops. | ||
| 1335 | */ | ||
| 924 | int __init amd_iommu_init_dma_ops(void) | 1336 | int __init amd_iommu_init_dma_ops(void) |
| 925 | { | 1337 | { |
| 926 | struct amd_iommu *iommu; | 1338 | struct amd_iommu *iommu; |
| 927 | int order = amd_iommu_aperture_order; | 1339 | int order = amd_iommu_aperture_order; |
| 928 | int ret; | 1340 | int ret; |
| 929 | 1341 | ||
| 1342 | /* | ||
| 1343 | * first allocate a default protection domain for every IOMMU we | ||
| 1344 | * found in the system. Devices not assigned to any other | ||
| 1345 | * protection domain will be assigned to the default one. | ||
| 1346 | */ | ||
| 930 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 1347 | list_for_each_entry(iommu, &amd_iommu_list, list) { |
| 931 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); | 1348 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); |
| 932 | if (iommu->default_dom == NULL) | 1349 | if (iommu->default_dom == NULL) |
| @@ -936,6 +1353,10 @@ int __init amd_iommu_init_dma_ops(void) | |||
| 936 | goto free_domains; | 1353 | goto free_domains; |
| 937 | } | 1354 | } |
| 938 | 1355 | ||
| 1356 | /* | ||
| 1357 | * If device isolation is enabled, pre-allocate the protection | ||
| 1358 | * domains for each device. | ||
| 1359 | */ | ||
| 939 | if (amd_iommu_isolate) | 1360 | if (amd_iommu_isolate) |
| 940 | prealloc_protection_domains(); | 1361 | prealloc_protection_domains(); |
| 941 | 1362 | ||
| @@ -947,6 +1368,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
| 947 | gart_iommu_aperture = 0; | 1368 | gart_iommu_aperture = 0; |
| 948 | #endif | 1369 | #endif |
| 949 | 1370 | ||
| 1371 | /* Make the driver finally visible to the drivers */ | ||
| 950 | dma_ops = &amd_iommu_dma_ops; | 1372 | dma_ops = &amd_iommu_dma_ops; |
| 951 | 1373 | ||
| 952 | return 0; | 1374 | return 0; |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 2a13e430437d..0cdcda35a05f 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -22,23 +22,17 @@ | |||
| 22 | #include <linux/gfp.h> | 22 | #include <linux/gfp.h> |
| 23 | #include <linux/list.h> | 23 | #include <linux/list.h> |
| 24 | #include <linux/sysdev.h> | 24 | #include <linux/sysdev.h> |
| 25 | #include <linux/interrupt.h> | ||
| 26 | #include <linux/msi.h> | ||
| 25 | #include <asm/pci-direct.h> | 27 | #include <asm/pci-direct.h> |
| 26 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
| 27 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
| 28 | #include <asm/gart.h> | 30 | #include <asm/iommu.h> |
| 29 | 31 | ||
| 30 | /* | 32 | /* |
| 31 | * definitions for the ACPI scanning code | 33 | * definitions for the ACPI scanning code |
| 32 | */ | 34 | */ |
| 33 | #define UPDATE_LAST_BDF(x) do {\ | ||
| 34 | if ((x) > amd_iommu_last_bdf) \ | ||
| 35 | amd_iommu_last_bdf = (x); \ | ||
| 36 | } while (0); | ||
| 37 | |||
| 38 | #define DEVID(bus, devfn) (((bus) << 8) | (devfn)) | ||
| 39 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | ||
| 40 | #define IVRS_HEADER_LENGTH 48 | 35 | #define IVRS_HEADER_LENGTH 48 |
| 41 | #define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) | ||
| 42 | 36 | ||
| 43 | #define ACPI_IVHD_TYPE 0x10 | 37 | #define ACPI_IVHD_TYPE 0x10 |
| 44 | #define ACPI_IVMD_TYPE_ALL 0x20 | 38 | #define ACPI_IVMD_TYPE_ALL 0x20 |
| @@ -71,6 +65,17 @@ | |||
| 71 | #define ACPI_DEVFLAG_LINT1 0x80 | 65 | #define ACPI_DEVFLAG_LINT1 0x80 |
| 72 | #define ACPI_DEVFLAG_ATSDIS 0x10000000 | 66 | #define ACPI_DEVFLAG_ATSDIS 0x10000000 |
| 73 | 67 | ||
| 68 | /* | ||
| 69 | * ACPI table definitions | ||
| 70 | * | ||
| 71 | * These data structures are laid over the table to parse the important values | ||
| 72 | * out of it. | ||
| 73 | */ | ||
| 74 | |||
| 75 | /* | ||
| 76 | * structure describing one IOMMU in the ACPI table. Typically followed by one | ||
| 77 | * or more ivhd_entrys. | ||
| 78 | */ | ||
| 74 | struct ivhd_header { | 79 | struct ivhd_header { |
| 75 | u8 type; | 80 | u8 type; |
| 76 | u8 flags; | 81 | u8 flags; |
| @@ -83,6 +88,10 @@ struct ivhd_header { | |||
| 83 | u32 reserved; | 88 | u32 reserved; |
| 84 | } __attribute__((packed)); | 89 | } __attribute__((packed)); |
| 85 | 90 | ||
| 91 | /* | ||
| 92 | * A device entry describing which devices a specific IOMMU translates and | ||
| 93 | * which requestor ids they use. | ||
| 94 | */ | ||
| 86 | struct ivhd_entry { | 95 | struct ivhd_entry { |
| 87 | u8 type; | 96 | u8 type; |
| 88 | u16 devid; | 97 | u16 devid; |
| @@ -90,6 +99,10 @@ struct ivhd_entry { | |||
| 90 | u32 ext; | 99 | u32 ext; |
| 91 | } __attribute__((packed)); | 100 | } __attribute__((packed)); |
| 92 | 101 | ||
| 102 | /* | ||
| 103 | * An AMD IOMMU memory definition structure. It defines things like exclusion | ||
| 104 | * ranges for devices and regions that should be unity mapped. | ||
| 105 | */ | ||
| 93 | struct ivmd_header { | 106 | struct ivmd_header { |
| 94 | u8 type; | 107 | u8 type; |
| 95 | u8 flags; | 108 | u8 flags; |
| @@ -103,22 +116,81 @@ struct ivmd_header { | |||
| 103 | 116 | ||
| 104 | static int __initdata amd_iommu_detected; | 117 | static int __initdata amd_iommu_detected; |
| 105 | 118 | ||
| 106 | u16 amd_iommu_last_bdf; | 119 | u16 amd_iommu_last_bdf; /* largest PCI device id we have |
| 107 | struct list_head amd_iommu_unity_map; | 120 | to handle */ |
| 108 | unsigned amd_iommu_aperture_order = 26; | 121 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
| 109 | int amd_iommu_isolate; | 122 | we find in ACPI */ |
| 123 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | ||
| 124 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ | ||
| 125 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | ||
| 126 | |||
| 127 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | ||
| 128 | system */ | ||
| 110 | 129 | ||
| 111 | struct list_head amd_iommu_list; | 130 | /* |
| 131 | * Pointer to the device table which is shared by all AMD IOMMUs | ||
| 132 | * it is indexed by the PCI device id or the HT unit id and contains | ||
| 133 | * information about the domain the device belongs to as well as the | ||
| 134 | * page table root pointer. | ||
| 135 | */ | ||
| 112 | struct dev_table_entry *amd_iommu_dev_table; | 136 | struct dev_table_entry *amd_iommu_dev_table; |
| 137 | |||
| 138 | /* | ||
| 139 | * The alias table is a driver specific data structure which contains the | ||
| 140 | * mappings of the PCI device ids to the actual requestor ids on the IOMMU. | ||
| 141 | * More than one device can share the same requestor id. | ||
| 142 | */ | ||
| 113 | u16 *amd_iommu_alias_table; | 143 | u16 *amd_iommu_alias_table; |
| 144 | |||
| 145 | /* | ||
| 146 | * The rlookup table is used to find the IOMMU which is responsible | ||
| 147 | * for a specific device. It is also indexed by the PCI device id. | ||
| 148 | */ | ||
| 114 | struct amd_iommu **amd_iommu_rlookup_table; | 149 | struct amd_iommu **amd_iommu_rlookup_table; |
| 150 | |||
| 151 | /* | ||
| 152 | * The pd table (protection domain table) is used to find the protection domain | ||
| 153 | * data structure a device belongs to. Indexed with the PCI device id too. | ||
| 154 | */ | ||
| 115 | struct protection_domain **amd_iommu_pd_table; | 155 | struct protection_domain **amd_iommu_pd_table; |
| 156 | |||
| 157 | /* | ||
| 158 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap | ||
| 159 | * to know which ones are already in use. | ||
| 160 | */ | ||
| 116 | unsigned long *amd_iommu_pd_alloc_bitmap; | 161 | unsigned long *amd_iommu_pd_alloc_bitmap; |
| 117 | 162 | ||
| 118 | static u32 dev_table_size; | 163 | static u32 dev_table_size; /* size of the device table */ |
| 119 | static u32 alias_table_size; | 164 | static u32 alias_table_size; /* size of the alias table */ |
| 120 | static u32 rlookup_table_size; | 165 | static u32 rlookup_table_size; /* size if the rlookup table */ |
| 166 | |||
| 167 | static inline void update_last_devid(u16 devid) | ||
| 168 | { | ||
| 169 | if (devid > amd_iommu_last_bdf) | ||
| 170 | amd_iommu_last_bdf = devid; | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline unsigned long tbl_size(int entry_size) | ||
| 174 | { | ||
| 175 | unsigned shift = PAGE_SHIFT + | ||
| 176 | get_order(amd_iommu_last_bdf * entry_size); | ||
| 177 | |||
| 178 | return 1UL << shift; | ||
| 179 | } | ||
| 121 | 180 | ||
| 181 | /**************************************************************************** | ||
| 182 | * | ||
| 183 | * AMD IOMMU MMIO register space handling functions | ||
| 184 | * | ||
| 185 | * These functions are used to program the IOMMU device registers in | ||
| 186 | * MMIO space required for that driver. | ||
| 187 | * | ||
| 188 | ****************************************************************************/ | ||
| 189 | |||
| 190 | /* | ||
| 191 | * This function set the exclusion range in the IOMMU. DMA accesses to the | ||
| 192 | * exclusion range are passed through untranslated | ||
| 193 | */ | ||
| 122 | static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) | 194 | static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) |
| 123 | { | 195 | { |
| 124 | u64 start = iommu->exclusion_start & PAGE_MASK; | 196 | u64 start = iommu->exclusion_start & PAGE_MASK; |
| @@ -137,9 +209,10 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) | |||
| 137 | &entry, sizeof(entry)); | 209 | &entry, sizeof(entry)); |
| 138 | } | 210 | } |
| 139 | 211 | ||
| 212 | /* Programs the physical address of the device table into the IOMMU hardware */ | ||
| 140 | static void __init iommu_set_device_table(struct amd_iommu *iommu) | 213 | static void __init iommu_set_device_table(struct amd_iommu *iommu) |
| 141 | { | 214 | { |
| 142 | u32 entry; | 215 | u64 entry; |
| 143 | 216 | ||
| 144 | BUG_ON(iommu->mmio_base == NULL); | 217 | BUG_ON(iommu->mmio_base == NULL); |
| 145 | 218 | ||
| @@ -149,6 +222,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) | |||
| 149 | &entry, sizeof(entry)); | 222 | &entry, sizeof(entry)); |
| 150 | } | 223 | } |
| 151 | 224 | ||
| 225 | /* Generic functions to enable/disable certain features of the IOMMU. */ | ||
| 152 | static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) | 226 | static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) |
| 153 | { | 227 | { |
| 154 | u32 ctrl; | 228 | u32 ctrl; |
| @@ -162,20 +236,35 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
| 162 | { | 236 | { |
| 163 | u32 ctrl; | 237 | u32 ctrl; |
| 164 | 238 | ||
| 165 | ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); | 239 | ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); |
| 166 | ctrl &= ~(1 << bit); | 240 | ctrl &= ~(1 << bit); |
| 167 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | 241 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); |
| 168 | } | 242 | } |
| 169 | 243 | ||
| 244 | /* Function to enable the hardware */ | ||
| 170 | void __init iommu_enable(struct amd_iommu *iommu) | 245 | void __init iommu_enable(struct amd_iommu *iommu) |
| 171 | { | 246 | { |
| 172 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); | 247 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " |
| 173 | print_devid(iommu->devid, 0); | 248 | "at %02x:%02x.%x cap 0x%hx\n", |
| 174 | printk(" cap 0x%hx\n", iommu->cap_ptr); | 249 | iommu->dev->bus->number, |
| 250 | PCI_SLOT(iommu->dev->devfn), | ||
| 251 | PCI_FUNC(iommu->dev->devfn), | ||
| 252 | iommu->cap_ptr); | ||
| 175 | 253 | ||
| 176 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 254 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
| 177 | } | 255 | } |
| 178 | 256 | ||
| 257 | /* Function to enable IOMMU event logging and event interrupts */ | ||
| 258 | void __init iommu_enable_event_logging(struct amd_iommu *iommu) | ||
| 259 | { | ||
| 260 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | ||
| 261 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | ||
| 262 | } | ||
| 263 | |||
| 264 | /* | ||
| 265 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | ||
| 266 | * the system has one. | ||
| 267 | */ | ||
| 179 | static u8 * __init iommu_map_mmio_space(u64 address) | 268 | static u8 * __init iommu_map_mmio_space(u64 address) |
| 180 | { | 269 | { |
| 181 | u8 *ret; | 270 | u8 *ret; |
| @@ -199,16 +288,41 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | |||
| 199 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); | 288 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); |
| 200 | } | 289 | } |
| 201 | 290 | ||
| 291 | /**************************************************************************** | ||
| 292 | * | ||
| 293 | * The functions below belong to the first pass of AMD IOMMU ACPI table | ||
| 294 | * parsing. In this pass we try to find out the highest device id this | ||
| 295 | * code has to handle. Upon this information the size of the shared data | ||
| 296 | * structures is determined later. | ||
| 297 | * | ||
| 298 | ****************************************************************************/ | ||
| 299 | |||
| 300 | /* | ||
| 301 | * This function calculates the length of a given IVHD entry | ||
| 302 | */ | ||
| 303 | static inline int ivhd_entry_length(u8 *ivhd) | ||
| 304 | { | ||
| 305 | return 0x04 << (*ivhd >> 6); | ||
| 306 | } | ||
| 307 | |||
| 308 | /* | ||
| 309 | * This function reads the last device id the IOMMU has to handle from the PCI | ||
| 310 | * capability header for this IOMMU | ||
| 311 | */ | ||
| 202 | static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) | 312 | static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) |
| 203 | { | 313 | { |
| 204 | u32 cap; | 314 | u32 cap; |
| 205 | 315 | ||
| 206 | cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | 316 | cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); |
| 207 | UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); | 317 | update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); |
| 208 | 318 | ||
| 209 | return 0; | 319 | return 0; |
| 210 | } | 320 | } |
| 211 | 321 | ||
| 322 | /* | ||
| 323 | * After reading the highest device id from the IOMMU PCI capability header | ||
| 324 | * this function looks if there is a higher device id defined in the ACPI table | ||
| 325 | */ | ||
| 212 | static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | 326 | static int __init find_last_devid_from_ivhd(struct ivhd_header *h) |
| 213 | { | 327 | { |
| 214 | u8 *p = (void *)h, *end = (void *)h; | 328 | u8 *p = (void *)h, *end = (void *)h; |
| @@ -229,12 +343,13 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
| 229 | case IVHD_DEV_RANGE_END: | 343 | case IVHD_DEV_RANGE_END: |
| 230 | case IVHD_DEV_ALIAS: | 344 | case IVHD_DEV_ALIAS: |
| 231 | case IVHD_DEV_EXT_SELECT: | 345 | case IVHD_DEV_EXT_SELECT: |
| 232 | UPDATE_LAST_BDF(dev->devid); | 346 | /* all the above subfield types refer to device ids */ |
| 347 | update_last_devid(dev->devid); | ||
| 233 | break; | 348 | break; |
| 234 | default: | 349 | default: |
| 235 | break; | 350 | break; |
| 236 | } | 351 | } |
| 237 | p += 0x04 << (*p >> 6); | 352 | p += ivhd_entry_length(p); |
| 238 | } | 353 | } |
| 239 | 354 | ||
| 240 | WARN_ON(p != end); | 355 | WARN_ON(p != end); |
| @@ -242,6 +357,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
| 242 | return 0; | 357 | return 0; |
| 243 | } | 358 | } |
| 244 | 359 | ||
| 360 | /* | ||
| 361 | * Iterate over all IVHD entries in the ACPI table and find the highest device | ||
| 362 | * id which we need to handle. This is the first of three functions which parse | ||
| 363 | * the ACPI table. So we check the checksum here. | ||
| 364 | */ | ||
| 245 | static int __init find_last_devid_acpi(struct acpi_table_header *table) | 365 | static int __init find_last_devid_acpi(struct acpi_table_header *table) |
| 246 | { | 366 | { |
| 247 | int i; | 367 | int i; |
| @@ -277,19 +397,31 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) | |||
| 277 | return 0; | 397 | return 0; |
| 278 | } | 398 | } |
| 279 | 399 | ||
| 400 | /**************************************************************************** | ||
| 401 | * | ||
| 402 | * The following functions belong the the code path which parses the ACPI table | ||
| 403 | * the second time. In this ACPI parsing iteration we allocate IOMMU specific | ||
| 404 | * data structures, initialize the device/alias/rlookup table and also | ||
| 405 | * basically initialize the hardware. | ||
| 406 | * | ||
| 407 | ****************************************************************************/ | ||
| 408 | |||
| 409 | /* | ||
| 410 | * Allocates the command buffer. This buffer is per AMD IOMMU. We can | ||
| 411 | * write commands to that buffer later and the IOMMU will execute them | ||
| 412 | * asynchronously | ||
| 413 | */ | ||
| 280 | static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | 414 | static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) |
| 281 | { | 415 | { |
| 282 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, | 416 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 283 | get_order(CMD_BUFFER_SIZE)); | 417 | get_order(CMD_BUFFER_SIZE)); |
| 284 | u64 entry = 0; | 418 | u64 entry; |
| 285 | 419 | ||
| 286 | if (cmd_buf == NULL) | 420 | if (cmd_buf == NULL) |
| 287 | return NULL; | 421 | return NULL; |
| 288 | 422 | ||
| 289 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; | 423 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; |
| 290 | 424 | ||
| 291 | memset(cmd_buf, 0, CMD_BUFFER_SIZE); | ||
| 292 | |||
| 293 | entry = (u64)virt_to_phys(cmd_buf); | 425 | entry = (u64)virt_to_phys(cmd_buf); |
| 294 | entry |= MMIO_CMD_SIZE_512; | 426 | entry |= MMIO_CMD_SIZE_512; |
| 295 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 427 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
| @@ -302,11 +434,35 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
| 302 | 434 | ||
| 303 | static void __init free_command_buffer(struct amd_iommu *iommu) | 435 | static void __init free_command_buffer(struct amd_iommu *iommu) |
| 304 | { | 436 | { |
| 305 | if (iommu->cmd_buf) | 437 | free_pages((unsigned long)iommu->cmd_buf, |
| 306 | free_pages((unsigned long)iommu->cmd_buf, | 438 | get_order(iommu->cmd_buf_size)); |
| 307 | get_order(CMD_BUFFER_SIZE)); | 439 | } |
| 440 | |||
| 441 | /* allocates the memory where the IOMMU will log its events to */ | ||
| 442 | static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) | ||
| 443 | { | ||
| 444 | u64 entry; | ||
| 445 | iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
| 446 | get_order(EVT_BUFFER_SIZE)); | ||
| 447 | |||
| 448 | if (iommu->evt_buf == NULL) | ||
| 449 | return NULL; | ||
| 450 | |||
| 451 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; | ||
| 452 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | ||
| 453 | &entry, sizeof(entry)); | ||
| 454 | |||
| 455 | iommu->evt_buf_size = EVT_BUFFER_SIZE; | ||
| 456 | |||
| 457 | return iommu->evt_buf; | ||
| 458 | } | ||
| 459 | |||
| 460 | static void __init free_event_buffer(struct amd_iommu *iommu) | ||
| 461 | { | ||
| 462 | free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); | ||
| 308 | } | 463 | } |
| 309 | 464 | ||
| 465 | /* sets a specific bit in the device table entry. */ | ||
| 310 | static void set_dev_entry_bit(u16 devid, u8 bit) | 466 | static void set_dev_entry_bit(u16 devid, u8 bit) |
| 311 | { | 467 | { |
| 312 | int i = (bit >> 5) & 0x07; | 468 | int i = (bit >> 5) & 0x07; |
| @@ -315,7 +471,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit) | |||
| 315 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); | 471 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); |
| 316 | } | 472 | } |
| 317 | 473 | ||
| 318 | static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) | 474 | /* Writes the specific IOMMU for a device into the rlookup table */ |
| 475 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | ||
| 476 | { | ||
| 477 | amd_iommu_rlookup_table[devid] = iommu; | ||
| 478 | } | ||
| 479 | |||
| 480 | /* | ||
| 481 | * This function takes the device specific flags read from the ACPI | ||
| 482 | * table and sets up the device table entry with that information | ||
| 483 | */ | ||
| 484 | static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, | ||
| 485 | u16 devid, u32 flags, u32 ext_flags) | ||
| 319 | { | 486 | { |
| 320 | if (flags & ACPI_DEVFLAG_INITPASS) | 487 | if (flags & ACPI_DEVFLAG_INITPASS) |
| 321 | set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); | 488 | set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); |
| @@ -331,13 +498,14 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) | |||
| 331 | set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); | 498 | set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); |
| 332 | if (flags & ACPI_DEVFLAG_LINT1) | 499 | if (flags & ACPI_DEVFLAG_LINT1) |
| 333 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); | 500 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); |
| 334 | } | ||
| 335 | 501 | ||
| 336 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | 502 | set_iommu_for_device(iommu, devid); |
| 337 | { | ||
| 338 | amd_iommu_rlookup_table[devid] = iommu; | ||
| 339 | } | 503 | } |
| 340 | 504 | ||
| 505 | /* | ||
| 506 | * Reads the device exclusion range from ACPI and initialize IOMMU with | ||
| 507 | * it | ||
| 508 | */ | ||
| 341 | static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | 509 | static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) |
| 342 | { | 510 | { |
| 343 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | 511 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; |
| @@ -346,27 +514,45 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
| 346 | return; | 514 | return; |
| 347 | 515 | ||
| 348 | if (iommu) { | 516 | if (iommu) { |
| 517 | /* | ||
| 518 | * We only can configure exclusion ranges per IOMMU, not | ||
| 519 | * per device. But we can enable the exclusion range per | ||
| 520 | * device. This is done here | ||
| 521 | */ | ||
| 349 | set_dev_entry_bit(m->devid, DEV_ENTRY_EX); | 522 | set_dev_entry_bit(m->devid, DEV_ENTRY_EX); |
| 350 | iommu->exclusion_start = m->range_start; | 523 | iommu->exclusion_start = m->range_start; |
| 351 | iommu->exclusion_length = m->range_length; | 524 | iommu->exclusion_length = m->range_length; |
| 352 | } | 525 | } |
| 353 | } | 526 | } |
| 354 | 527 | ||
| 528 | /* | ||
| 529 | * This function reads some important data from the IOMMU PCI space and | ||
| 530 | * initializes the driver data structure with it. It reads the hardware | ||
| 531 | * capabilities and the first/last device entries | ||
| 532 | */ | ||
| 355 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 533 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
| 356 | { | 534 | { |
| 357 | int bus = PCI_BUS(iommu->devid); | ||
| 358 | int dev = PCI_SLOT(iommu->devid); | ||
| 359 | int fn = PCI_FUNC(iommu->devid); | ||
| 360 | int cap_ptr = iommu->cap_ptr; | 535 | int cap_ptr = iommu->cap_ptr; |
| 361 | u32 range; | 536 | u32 range, misc; |
| 362 | 537 | ||
| 363 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); | 538 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
| 364 | 539 | &iommu->cap); | |
| 365 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | 540 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, |
| 366 | iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); | 541 | &range); |
| 367 | iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); | 542 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, |
| 543 | &misc); | ||
| 544 | |||
| 545 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), | ||
| 546 | MMIO_GET_FD(range)); | ||
| 547 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | ||
| 548 | MMIO_GET_LD(range)); | ||
| 549 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | ||
| 368 | } | 550 | } |
| 369 | 551 | ||
| 552 | /* | ||
| 553 | * Takes a pointer to an AMD IOMMU entry in the ACPI table and | ||
| 554 | * initializes the hardware and our data structures with it. | ||
| 555 | */ | ||
| 370 | static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | 556 | static void __init init_iommu_from_acpi(struct amd_iommu *iommu, |
| 371 | struct ivhd_header *h) | 557 | struct ivhd_header *h) |
| 372 | { | 558 | { |
| @@ -374,7 +560,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 374 | u8 *end = p, flags = 0; | 560 | u8 *end = p, flags = 0; |
| 375 | u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; | 561 | u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; |
| 376 | u32 ext_flags = 0; | 562 | u32 ext_flags = 0; |
| 377 | bool alias = 0; | 563 | bool alias = false; |
| 378 | struct ivhd_entry *e; | 564 | struct ivhd_entry *e; |
| 379 | 565 | ||
| 380 | /* | 566 | /* |
| @@ -414,22 +600,23 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 414 | case IVHD_DEV_ALL: | 600 | case IVHD_DEV_ALL: |
| 415 | for (dev_i = iommu->first_device; | 601 | for (dev_i = iommu->first_device; |
| 416 | dev_i <= iommu->last_device; ++dev_i) | 602 | dev_i <= iommu->last_device; ++dev_i) |
| 417 | set_dev_entry_from_acpi(dev_i, e->flags, 0); | 603 | set_dev_entry_from_acpi(iommu, dev_i, |
| 604 | e->flags, 0); | ||
| 418 | break; | 605 | break; |
| 419 | case IVHD_DEV_SELECT: | 606 | case IVHD_DEV_SELECT: |
| 420 | devid = e->devid; | 607 | devid = e->devid; |
| 421 | set_dev_entry_from_acpi(devid, e->flags, 0); | 608 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); |
| 422 | break; | 609 | break; |
| 423 | case IVHD_DEV_SELECT_RANGE_START: | 610 | case IVHD_DEV_SELECT_RANGE_START: |
| 424 | devid_start = e->devid; | 611 | devid_start = e->devid; |
| 425 | flags = e->flags; | 612 | flags = e->flags; |
| 426 | ext_flags = 0; | 613 | ext_flags = 0; |
| 427 | alias = 0; | 614 | alias = false; |
| 428 | break; | 615 | break; |
| 429 | case IVHD_DEV_ALIAS: | 616 | case IVHD_DEV_ALIAS: |
| 430 | devid = e->devid; | 617 | devid = e->devid; |
| 431 | devid_to = e->ext >> 8; | 618 | devid_to = e->ext >> 8; |
| 432 | set_dev_entry_from_acpi(devid, e->flags, 0); | 619 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); |
| 433 | amd_iommu_alias_table[devid] = devid_to; | 620 | amd_iommu_alias_table[devid] = devid_to; |
| 434 | break; | 621 | break; |
| 435 | case IVHD_DEV_ALIAS_RANGE: | 622 | case IVHD_DEV_ALIAS_RANGE: |
| @@ -437,24 +624,25 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 437 | flags = e->flags; | 624 | flags = e->flags; |
| 438 | devid_to = e->ext >> 8; | 625 | devid_to = e->ext >> 8; |
| 439 | ext_flags = 0; | 626 | ext_flags = 0; |
| 440 | alias = 1; | 627 | alias = true; |
| 441 | break; | 628 | break; |
| 442 | case IVHD_DEV_EXT_SELECT: | 629 | case IVHD_DEV_EXT_SELECT: |
| 443 | devid = e->devid; | 630 | devid = e->devid; |
| 444 | set_dev_entry_from_acpi(devid, e->flags, e->ext); | 631 | set_dev_entry_from_acpi(iommu, devid, e->flags, |
| 632 | e->ext); | ||
| 445 | break; | 633 | break; |
| 446 | case IVHD_DEV_EXT_SELECT_RANGE: | 634 | case IVHD_DEV_EXT_SELECT_RANGE: |
| 447 | devid_start = e->devid; | 635 | devid_start = e->devid; |
| 448 | flags = e->flags; | 636 | flags = e->flags; |
| 449 | ext_flags = e->ext; | 637 | ext_flags = e->ext; |
| 450 | alias = 0; | 638 | alias = false; |
| 451 | break; | 639 | break; |
| 452 | case IVHD_DEV_RANGE_END: | 640 | case IVHD_DEV_RANGE_END: |
| 453 | devid = e->devid; | 641 | devid = e->devid; |
| 454 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { | 642 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { |
| 455 | if (alias) | 643 | if (alias) |
| 456 | amd_iommu_alias_table[dev_i] = devid_to; | 644 | amd_iommu_alias_table[dev_i] = devid_to; |
| 457 | set_dev_entry_from_acpi( | 645 | set_dev_entry_from_acpi(iommu, |
| 458 | amd_iommu_alias_table[dev_i], | 646 | amd_iommu_alias_table[dev_i], |
| 459 | flags, ext_flags); | 647 | flags, ext_flags); |
| 460 | } | 648 | } |
| @@ -463,10 +651,11 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
| 463 | break; | 651 | break; |
| 464 | } | 652 | } |
| 465 | 653 | ||
| 466 | p += 0x04 << (e->type >> 6); | 654 | p += ivhd_entry_length(p); |
| 467 | } | 655 | } |
| 468 | } | 656 | } |
| 469 | 657 | ||
| 658 | /* Initializes the device->iommu mapping for the driver */ | ||
| 470 | static int __init init_iommu_devices(struct amd_iommu *iommu) | 659 | static int __init init_iommu_devices(struct amd_iommu *iommu) |
| 471 | { | 660 | { |
| 472 | u16 i; | 661 | u16 i; |
| @@ -480,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) | |||
| 480 | static void __init free_iommu_one(struct amd_iommu *iommu) | 669 | static void __init free_iommu_one(struct amd_iommu *iommu) |
| 481 | { | 670 | { |
| 482 | free_command_buffer(iommu); | 671 | free_command_buffer(iommu); |
| 672 | free_event_buffer(iommu); | ||
| 483 | iommu_unmap_mmio_space(iommu); | 673 | iommu_unmap_mmio_space(iommu); |
| 484 | } | 674 | } |
| 485 | 675 | ||
| @@ -494,6 +684,11 @@ static void __init free_iommu_all(void) | |||
| 494 | } | 684 | } |
| 495 | } | 685 | } |
| 496 | 686 | ||
| 687 | /* | ||
| 688 | * This function clues the initialization function for one IOMMU | ||
| 689 | * together and also allocates the command buffer and programs the | ||
| 690 | * hardware. It does NOT enable the IOMMU. This is done afterwards. | ||
| 691 | */ | ||
| 497 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | 692 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) |
| 498 | { | 693 | { |
| 499 | spin_lock_init(&iommu->lock); | 694 | spin_lock_init(&iommu->lock); |
| @@ -502,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
| 502 | /* | 697 | /* |
| 503 | * Copy data from ACPI table entry to the iommu struct | 698 | * Copy data from ACPI table entry to the iommu struct |
| 504 | */ | 699 | */ |
| 505 | iommu->devid = h->devid; | 700 | iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); |
| 701 | if (!iommu->dev) | ||
| 702 | return 1; | ||
| 703 | |||
| 506 | iommu->cap_ptr = h->cap_ptr; | 704 | iommu->cap_ptr = h->cap_ptr; |
| 705 | iommu->pci_seg = h->pci_seg; | ||
| 507 | iommu->mmio_phys = h->mmio_phys; | 706 | iommu->mmio_phys = h->mmio_phys; |
| 508 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); | 707 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); |
| 509 | if (!iommu->mmio_base) | 708 | if (!iommu->mmio_base) |
| @@ -514,13 +713,23 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
| 514 | if (!iommu->cmd_buf) | 713 | if (!iommu->cmd_buf) |
| 515 | return -ENOMEM; | 714 | return -ENOMEM; |
| 516 | 715 | ||
| 716 | iommu->evt_buf = alloc_event_buffer(iommu); | ||
| 717 | if (!iommu->evt_buf) | ||
| 718 | return -ENOMEM; | ||
| 719 | |||
| 720 | iommu->int_enabled = false; | ||
| 721 | |||
| 517 | init_iommu_from_pci(iommu); | 722 | init_iommu_from_pci(iommu); |
| 518 | init_iommu_from_acpi(iommu, h); | 723 | init_iommu_from_acpi(iommu, h); |
| 519 | init_iommu_devices(iommu); | 724 | init_iommu_devices(iommu); |
| 520 | 725 | ||
| 521 | return 0; | 726 | return pci_enable_device(iommu->dev); |
| 522 | } | 727 | } |
| 523 | 728 | ||
| 729 | /* | ||
| 730 | * Iterates over all IOMMU entries in the ACPI table, allocates the | ||
| 731 | * IOMMU structure and initializes it with init_iommu_one() | ||
| 732 | */ | ||
| 524 | static int __init init_iommu_all(struct acpi_table_header *table) | 733 | static int __init init_iommu_all(struct acpi_table_header *table) |
| 525 | { | 734 | { |
| 526 | u8 *p = (u8 *)table, *end = (u8 *)table; | 735 | u8 *p = (u8 *)table, *end = (u8 *)table; |
| @@ -528,8 +737,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
| 528 | struct amd_iommu *iommu; | 737 | struct amd_iommu *iommu; |
| 529 | int ret; | 738 | int ret; |
| 530 | 739 | ||
| 531 | INIT_LIST_HEAD(&amd_iommu_list); | ||
| 532 | |||
| 533 | end += table->length; | 740 | end += table->length; |
| 534 | p += IVRS_HEADER_LENGTH; | 741 | p += IVRS_HEADER_LENGTH; |
| 535 | 742 | ||
| @@ -555,6 +762,103 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
| 555 | return 0; | 762 | return 0; |
| 556 | } | 763 | } |
| 557 | 764 | ||
| 765 | /**************************************************************************** | ||
| 766 | * | ||
| 767 | * The following functions initialize the MSI interrupts for all IOMMUs | ||
| 768 | * in the system. Its a bit challenging because there could be multiple | ||
| 769 | * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per | ||
| 770 | * pci_dev. | ||
| 771 | * | ||
| 772 | ****************************************************************************/ | ||
| 773 | |||
| 774 | static int __init iommu_setup_msix(struct amd_iommu *iommu) | ||
| 775 | { | ||
| 776 | struct amd_iommu *curr; | ||
| 777 | struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ | ||
| 778 | int nvec = 0, i; | ||
| 779 | |||
| 780 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
| 781 | if (curr->dev == iommu->dev) { | ||
| 782 | entries[nvec].entry = curr->evt_msi_num; | ||
| 783 | entries[nvec].vector = 0; | ||
| 784 | curr->int_enabled = true; | ||
| 785 | nvec++; | ||
| 786 | } | ||
| 787 | } | ||
| 788 | |||
| 789 | if (pci_enable_msix(iommu->dev, entries, nvec)) { | ||
| 790 | pci_disable_msix(iommu->dev); | ||
| 791 | return 1; | ||
| 792 | } | ||
| 793 | |||
| 794 | for (i = 0; i < nvec; ++i) { | ||
| 795 | int r = request_irq(entries->vector, amd_iommu_int_handler, | ||
| 796 | IRQF_SAMPLE_RANDOM, | ||
| 797 | "AMD IOMMU", | ||
| 798 | NULL); | ||
| 799 | if (r) | ||
| 800 | goto out_free; | ||
| 801 | } | ||
| 802 | |||
| 803 | return 0; | ||
| 804 | |||
| 805 | out_free: | ||
| 806 | for (i -= 1; i >= 0; --i) | ||
| 807 | free_irq(entries->vector, NULL); | ||
| 808 | |||
| 809 | pci_disable_msix(iommu->dev); | ||
| 810 | |||
| 811 | return 1; | ||
| 812 | } | ||
| 813 | |||
| 814 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | ||
| 815 | { | ||
| 816 | int r; | ||
| 817 | struct amd_iommu *curr; | ||
| 818 | |||
| 819 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
| 820 | if (curr->dev == iommu->dev) | ||
| 821 | curr->int_enabled = true; | ||
| 822 | } | ||
| 823 | |||
| 824 | |||
| 825 | if (pci_enable_msi(iommu->dev)) | ||
| 826 | return 1; | ||
| 827 | |||
| 828 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | ||
| 829 | IRQF_SAMPLE_RANDOM, | ||
| 830 | "AMD IOMMU", | ||
| 831 | NULL); | ||
| 832 | |||
| 833 | if (r) { | ||
| 834 | pci_disable_msi(iommu->dev); | ||
| 835 | return 1; | ||
| 836 | } | ||
| 837 | |||
| 838 | return 0; | ||
| 839 | } | ||
| 840 | |||
| 841 | static int __init iommu_init_msi(struct amd_iommu *iommu) | ||
| 842 | { | ||
| 843 | if (iommu->int_enabled) | ||
| 844 | return 0; | ||
| 845 | |||
| 846 | if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) | ||
| 847 | return iommu_setup_msix(iommu); | ||
| 848 | else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) | ||
| 849 | return iommu_setup_msi(iommu); | ||
| 850 | |||
| 851 | return 1; | ||
| 852 | } | ||
| 853 | |||
| 854 | /**************************************************************************** | ||
| 855 | * | ||
| 856 | * The next functions belong to the third pass of parsing the ACPI | ||
| 857 | * table. In this last pass the memory mapping requirements are | ||
| 858 | * gathered (like exclusion and unity mapping reanges). | ||
| 859 | * | ||
| 860 | ****************************************************************************/ | ||
| 861 | |||
| 558 | static void __init free_unity_maps(void) | 862 | static void __init free_unity_maps(void) |
| 559 | { | 863 | { |
| 560 | struct unity_map_entry *entry, *next; | 864 | struct unity_map_entry *entry, *next; |
| @@ -565,6 +869,7 @@ static void __init free_unity_maps(void) | |||
| 565 | } | 869 | } |
| 566 | } | 870 | } |
| 567 | 871 | ||
| 872 | /* called when we find an exclusion range definition in ACPI */ | ||
| 568 | static int __init init_exclusion_range(struct ivmd_header *m) | 873 | static int __init init_exclusion_range(struct ivmd_header *m) |
| 569 | { | 874 | { |
| 570 | int i; | 875 | int i; |
| @@ -574,7 +879,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
| 574 | set_device_exclusion_range(m->devid, m); | 879 | set_device_exclusion_range(m->devid, m); |
| 575 | break; | 880 | break; |
| 576 | case ACPI_IVMD_TYPE_ALL: | 881 | case ACPI_IVMD_TYPE_ALL: |
| 577 | for (i = 0; i < amd_iommu_last_bdf; ++i) | 882 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
| 578 | set_device_exclusion_range(i, m); | 883 | set_device_exclusion_range(i, m); |
| 579 | break; | 884 | break; |
| 580 | case ACPI_IVMD_TYPE_RANGE: | 885 | case ACPI_IVMD_TYPE_RANGE: |
| @@ -588,6 +893,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
| 588 | return 0; | 893 | return 0; |
| 589 | } | 894 | } |
| 590 | 895 | ||
| 896 | /* called for unity map ACPI definition */ | ||
| 591 | static int __init init_unity_map_range(struct ivmd_header *m) | 897 | static int __init init_unity_map_range(struct ivmd_header *m) |
| 592 | { | 898 | { |
| 593 | struct unity_map_entry *e = 0; | 899 | struct unity_map_entry *e = 0; |
| @@ -619,13 +925,12 @@ static int __init init_unity_map_range(struct ivmd_header *m) | |||
| 619 | return 0; | 925 | return 0; |
| 620 | } | 926 | } |
| 621 | 927 | ||
| 928 | /* iterates over all memory definitions we find in the ACPI table */ | ||
| 622 | static int __init init_memory_definitions(struct acpi_table_header *table) | 929 | static int __init init_memory_definitions(struct acpi_table_header *table) |
| 623 | { | 930 | { |
| 624 | u8 *p = (u8 *)table, *end = (u8 *)table; | 931 | u8 *p = (u8 *)table, *end = (u8 *)table; |
| 625 | struct ivmd_header *m; | 932 | struct ivmd_header *m; |
| 626 | 933 | ||
| 627 | INIT_LIST_HEAD(&amd_iommu_unity_map); | ||
| 628 | |||
| 629 | end += table->length; | 934 | end += table->length; |
| 630 | p += IVRS_HEADER_LENGTH; | 935 | p += IVRS_HEADER_LENGTH; |
| 631 | 936 | ||
| @@ -642,12 +947,32 @@ static int __init init_memory_definitions(struct acpi_table_header *table) | |||
| 642 | return 0; | 947 | return 0; |
| 643 | } | 948 | } |
| 644 | 949 | ||
| 950 | /* | ||
| 951 | * Init the device table to not allow DMA access for devices and | ||
| 952 | * suppress all page faults | ||
| 953 | */ | ||
| 954 | static void init_device_table(void) | ||
| 955 | { | ||
| 956 | u16 devid; | ||
| 957 | |||
| 958 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { | ||
| 959 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); | ||
| 960 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); | ||
| 961 | } | ||
| 962 | } | ||
| 963 | |||
| 964 | /* | ||
| 965 | * This function finally enables all IOMMUs found in the system after | ||
| 966 | * they have been initialized | ||
| 967 | */ | ||
| 645 | static void __init enable_iommus(void) | 968 | static void __init enable_iommus(void) |
| 646 | { | 969 | { |
| 647 | struct amd_iommu *iommu; | 970 | struct amd_iommu *iommu; |
| 648 | 971 | ||
| 649 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 972 | list_for_each_entry(iommu, &amd_iommu_list, list) { |
| 650 | iommu_set_exclusion_range(iommu); | 973 | iommu_set_exclusion_range(iommu); |
| 974 | iommu_init_msi(iommu); | ||
| 975 | iommu_enable_event_logging(iommu); | ||
| 651 | iommu_enable(iommu); | 976 | iommu_enable(iommu); |
| 652 | } | 977 | } |
| 653 | } | 978 | } |
| @@ -678,6 +1003,34 @@ static struct sys_device device_amd_iommu = { | |||
| 678 | .cls = &amd_iommu_sysdev_class, | 1003 | .cls = &amd_iommu_sysdev_class, |
| 679 | }; | 1004 | }; |
| 680 | 1005 | ||
| 1006 | /* | ||
| 1007 | * This is the core init function for AMD IOMMU hardware in the system. | ||
| 1008 | * This function is called from the generic x86 DMA layer initialization | ||
| 1009 | * code. | ||
| 1010 | * | ||
| 1011 | * This function basically parses the ACPI table for AMD IOMMU (IVRS) | ||
| 1012 | * three times: | ||
| 1013 | * | ||
| 1014 | * 1 pass) Find the highest PCI device id the driver has to handle. | ||
| 1015 | * Upon this information the size of the data structures is | ||
| 1016 | * determined that needs to be allocated. | ||
| 1017 | * | ||
| 1018 | * 2 pass) Initialize the data structures just allocated with the | ||
| 1019 | * information in the ACPI table about available AMD IOMMUs | ||
| 1020 | * in the system. It also maps the PCI devices in the | ||
| 1021 | * system to specific IOMMUs | ||
| 1022 | * | ||
| 1023 | * 3 pass) After the basic data structures are allocated and | ||
| 1024 | * initialized we update them with information about memory | ||
| 1025 | * remapping requirements parsed out of the ACPI table in | ||
| 1026 | * this last pass. | ||
| 1027 | * | ||
| 1028 | * After that the hardware is initialized and ready to go. In the last | ||
| 1029 | * step we do some Linux specific things like registering the driver in | ||
| 1030 | * the dma_ops interface and initializing the suspend/resume support | ||
| 1031 | * functions. Finally it prints some information about AMD IOMMUs and | ||
| 1032 | * the driver state and enables the hardware. | ||
| 1033 | */ | ||
| 681 | int __init amd_iommu_init(void) | 1034 | int __init amd_iommu_init(void) |
| 682 | { | 1035 | { |
| 683 | int i, ret = 0; | 1036 | int i, ret = 0; |
| @@ -699,14 +1052,14 @@ int __init amd_iommu_init(void) | |||
| 699 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) | 1052 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) |
| 700 | return -ENODEV; | 1053 | return -ENODEV; |
| 701 | 1054 | ||
| 702 | dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); | 1055 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); |
| 703 | alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); | 1056 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); |
| 704 | rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); | 1057 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); |
| 705 | 1058 | ||
| 706 | ret = -ENOMEM; | 1059 | ret = -ENOMEM; |
| 707 | 1060 | ||
| 708 | /* Device table - directly used by all IOMMUs */ | 1061 | /* Device table - directly used by all IOMMUs */ |
| 709 | amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, | 1062 | amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 710 | get_order(dev_table_size)); | 1063 | get_order(dev_table_size)); |
| 711 | if (amd_iommu_dev_table == NULL) | 1064 | if (amd_iommu_dev_table == NULL) |
| 712 | goto out; | 1065 | goto out; |
| @@ -730,27 +1083,26 @@ int __init amd_iommu_init(void) | |||
| 730 | * Protection Domain table - maps devices to protection domains | 1083 | * Protection Domain table - maps devices to protection domains |
| 731 | * This table has the same size as the rlookup_table | 1084 | * This table has the same size as the rlookup_table |
| 732 | */ | 1085 | */ |
| 733 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, | 1086 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 734 | get_order(rlookup_table_size)); | 1087 | get_order(rlookup_table_size)); |
| 735 | if (amd_iommu_pd_table == NULL) | 1088 | if (amd_iommu_pd_table == NULL) |
| 736 | goto free; | 1089 | goto free; |
| 737 | 1090 | ||
| 738 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, | 1091 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( |
| 1092 | GFP_KERNEL | __GFP_ZERO, | ||
| 739 | get_order(MAX_DOMAIN_ID/8)); | 1093 | get_order(MAX_DOMAIN_ID/8)); |
| 740 | if (amd_iommu_pd_alloc_bitmap == NULL) | 1094 | if (amd_iommu_pd_alloc_bitmap == NULL) |
| 741 | goto free; | 1095 | goto free; |
| 742 | 1096 | ||
| 1097 | /* init the device table */ | ||
| 1098 | init_device_table(); | ||
| 1099 | |||
| 743 | /* | 1100 | /* |
| 744 | * memory is allocated now; initialize the device table with all zeroes | 1101 | * let all alias entries point to itself |
| 745 | * and let all alias entries point to itself | ||
| 746 | */ | 1102 | */ |
| 747 | memset(amd_iommu_dev_table, 0, dev_table_size); | 1103 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
| 748 | for (i = 0; i < amd_iommu_last_bdf; ++i) | ||
| 749 | amd_iommu_alias_table[i] = i; | 1104 | amd_iommu_alias_table[i] = i; |
| 750 | 1105 | ||
| 751 | memset(amd_iommu_pd_table, 0, rlookup_table_size); | ||
| 752 | memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); | ||
| 753 | |||
| 754 | /* | 1106 | /* |
| 755 | * never allocate domain 0 because its used as the non-allocated and | 1107 | * never allocate domain 0 because its used as the non-allocated and |
| 756 | * error value placeholder | 1108 | * error value placeholder |
| @@ -768,15 +1120,15 @@ int __init amd_iommu_init(void) | |||
| 768 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) | 1120 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) |
| 769 | goto free; | 1121 | goto free; |
| 770 | 1122 | ||
| 771 | ret = amd_iommu_init_dma_ops(); | 1123 | ret = sysdev_class_register(&amd_iommu_sysdev_class); |
| 772 | if (ret) | 1124 | if (ret) |
| 773 | goto free; | 1125 | goto free; |
| 774 | 1126 | ||
| 775 | ret = sysdev_class_register(&amd_iommu_sysdev_class); | 1127 | ret = sysdev_register(&device_amd_iommu); |
| 776 | if (ret) | 1128 | if (ret) |
| 777 | goto free; | 1129 | goto free; |
| 778 | 1130 | ||
| 779 | ret = sysdev_register(&device_amd_iommu); | 1131 | ret = amd_iommu_init_dma_ops(); |
| 780 | if (ret) | 1132 | if (ret) |
| 781 | goto free; | 1133 | goto free; |
| 782 | 1134 | ||
| @@ -791,28 +1143,29 @@ int __init amd_iommu_init(void) | |||
| 791 | else | 1143 | else |
| 792 | printk("disabled\n"); | 1144 | printk("disabled\n"); |
| 793 | 1145 | ||
| 1146 | if (amd_iommu_unmap_flush) | ||
| 1147 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | ||
| 1148 | else | ||
| 1149 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | ||
| 1150 | |||
| 794 | out: | 1151 | out: |
| 795 | return ret; | 1152 | return ret; |
| 796 | 1153 | ||
| 797 | free: | 1154 | free: |
| 798 | if (amd_iommu_pd_alloc_bitmap) | 1155 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, |
| 799 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); | 1156 | get_order(MAX_DOMAIN_ID/8)); |
| 800 | 1157 | ||
| 801 | if (amd_iommu_pd_table) | 1158 | free_pages((unsigned long)amd_iommu_pd_table, |
| 802 | free_pages((unsigned long)amd_iommu_pd_table, | 1159 | get_order(rlookup_table_size)); |
| 803 | get_order(rlookup_table_size)); | ||
| 804 | 1160 | ||
| 805 | if (amd_iommu_rlookup_table) | 1161 | free_pages((unsigned long)amd_iommu_rlookup_table, |
| 806 | free_pages((unsigned long)amd_iommu_rlookup_table, | 1162 | get_order(rlookup_table_size)); |
| 807 | get_order(rlookup_table_size)); | ||
| 808 | 1163 | ||
| 809 | if (amd_iommu_alias_table) | 1164 | free_pages((unsigned long)amd_iommu_alias_table, |
| 810 | free_pages((unsigned long)amd_iommu_alias_table, | 1165 | get_order(alias_table_size)); |
| 811 | get_order(alias_table_size)); | ||
| 812 | 1166 | ||
| 813 | if (amd_iommu_dev_table) | 1167 | free_pages((unsigned long)amd_iommu_dev_table, |
| 814 | free_pages((unsigned long)amd_iommu_dev_table, | 1168 | get_order(dev_table_size)); |
| 815 | get_order(dev_table_size)); | ||
| 816 | 1169 | ||
| 817 | free_iommu_all(); | 1170 | free_iommu_all(); |
| 818 | 1171 | ||
| @@ -821,6 +1174,13 @@ free: | |||
| 821 | goto out; | 1174 | goto out; |
| 822 | } | 1175 | } |
| 823 | 1176 | ||
| 1177 | /**************************************************************************** | ||
| 1178 | * | ||
| 1179 | * Early detect code. This code runs at IOMMU detection time in the DMA | ||
| 1180 | * layer. It just looks if there is an IVRS ACPI table to detect AMD | ||
| 1181 | * IOMMUs | ||
| 1182 | * | ||
| 1183 | ****************************************************************************/ | ||
| 824 | static int __init early_amd_iommu_detect(struct acpi_table_header *table) | 1184 | static int __init early_amd_iommu_detect(struct acpi_table_header *table) |
| 825 | { | 1185 | { |
| 826 | return 0; | 1186 | return 0; |
| @@ -828,7 +1188,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
| 828 | 1188 | ||
| 829 | void __init amd_iommu_detect(void) | 1189 | void __init amd_iommu_detect(void) |
| 830 | { | 1190 | { |
| 831 | if (swiotlb || no_iommu || iommu_detected) | 1191 | if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) |
| 832 | return; | 1192 | return; |
| 833 | 1193 | ||
| 834 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1194 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
| @@ -841,11 +1201,20 @@ void __init amd_iommu_detect(void) | |||
| 841 | } | 1201 | } |
| 842 | } | 1202 | } |
| 843 | 1203 | ||
| 1204 | /**************************************************************************** | ||
| 1205 | * | ||
| 1206 | * Parsing functions for the AMD IOMMU specific kernel command line | ||
| 1207 | * options. | ||
| 1208 | * | ||
| 1209 | ****************************************************************************/ | ||
| 1210 | |||
| 844 | static int __init parse_amd_iommu_options(char *str) | 1211 | static int __init parse_amd_iommu_options(char *str) |
| 845 | { | 1212 | { |
| 846 | for (; *str; ++str) { | 1213 | for (; *str; ++str) { |
| 847 | if (strcmp(str, "isolate") == 0) | 1214 | if (strncmp(str, "isolate", 7) == 0) |
| 848 | amd_iommu_isolate = 1; | 1215 | amd_iommu_isolate = 1; |
| 1216 | if (strncmp(str, "fullflush", 11) == 0) | ||
| 1217 | amd_iommu_unmap_flush = true; | ||
| 849 | } | 1218 | } |
| 850 | 1219 | ||
| 851 | return 1; | 1220 | return 1; |
| @@ -853,20 +1222,10 @@ static int __init parse_amd_iommu_options(char *str) | |||
| 853 | 1222 | ||
| 854 | static int __init parse_amd_iommu_size_options(char *str) | 1223 | static int __init parse_amd_iommu_size_options(char *str) |
| 855 | { | 1224 | { |
| 856 | for (; *str; ++str) { | 1225 | unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); |
| 857 | if (strcmp(str, "32M") == 0) | 1226 | |
| 858 | amd_iommu_aperture_order = 25; | 1227 | if ((order > 24) && (order < 31)) |
| 859 | if (strcmp(str, "64M") == 0) | 1228 | amd_iommu_aperture_order = order; |
| 860 | amd_iommu_aperture_order = 26; | ||
| 861 | if (strcmp(str, "128M") == 0) | ||
| 862 | amd_iommu_aperture_order = 27; | ||
| 863 | if (strcmp(str, "256M") == 0) | ||
| 864 | amd_iommu_aperture_order = 28; | ||
| 865 | if (strcmp(str, "512M") == 0) | ||
| 866 | amd_iommu_aperture_order = 29; | ||
| 867 | if (strcmp(str, "1G") == 0) | ||
| 868 | amd_iommu_aperture_order = 30; | ||
| 869 | } | ||
| 870 | 1229 | ||
| 871 | return 1; | 1230 | return 1; |
| 872 | } | 1231 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9f907806c1a5..9a32b37ee2ee 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
| 22 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
| 23 | #include <asm/io.h> | 23 | #include <asm/io.h> |
| 24 | #include <asm/iommu.h> | ||
| 24 | #include <asm/gart.h> | 25 | #include <asm/gart.h> |
| 25 | #include <asm/pci-direct.h> | 26 | #include <asm/pci-direct.h> |
| 26 | #include <asm/dma.h> | 27 | #include <asm/dma.h> |
| @@ -454,11 +455,11 @@ out: | |||
| 454 | force_iommu || | 455 | force_iommu || |
| 455 | valid_agp || | 456 | valid_agp || |
| 456 | fallback_aper_force) { | 457 | fallback_aper_force) { |
| 457 | printk(KERN_ERR | 458 | printk(KERN_INFO |
| 458 | "Your BIOS doesn't leave a aperture memory hole\n"); | 459 | "Your BIOS doesn't leave a aperture memory hole\n"); |
| 459 | printk(KERN_ERR | 460 | printk(KERN_INFO |
| 460 | "Please enable the IOMMU option in the BIOS setup\n"); | 461 | "Please enable the IOMMU option in the BIOS setup\n"); |
| 461 | printk(KERN_ERR | 462 | printk(KERN_INFO |
| 462 | "This costs you %d MB of RAM\n", | 463 | "This costs you %d MB of RAM\n", |
| 463 | 32 << fallback_aper_order); | 464 | 32 << fallback_aper_order); |
| 464 | 465 | ||
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic.c index a437d027f20b..04a7f960bbc0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic.c | |||
| @@ -23,11 +23,13 @@ | |||
| 23 | #include <linux/mc146818rtc.h> | 23 | #include <linux/mc146818rtc.h> |
| 24 | #include <linux/kernel_stat.h> | 24 | #include <linux/kernel_stat.h> |
| 25 | #include <linux/sysdev.h> | 25 | #include <linux/sysdev.h> |
| 26 | #include <linux/ioport.h> | ||
| 26 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
| 27 | #include <linux/clockchips.h> | 28 | #include <linux/clockchips.h> |
| 28 | #include <linux/acpi_pmtmr.h> | 29 | #include <linux/acpi_pmtmr.h> |
| 29 | #include <linux/module.h> | 30 | #include <linux/module.h> |
| 30 | #include <linux/dmi.h> | 31 | #include <linux/dmi.h> |
| 32 | #include <linux/dmar.h> | ||
| 31 | 33 | ||
| 32 | #include <asm/atomic.h> | 34 | #include <asm/atomic.h> |
| 33 | #include <asm/smp.h> | 35 | #include <asm/smp.h> |
| @@ -36,8 +38,14 @@ | |||
| 36 | #include <asm/desc.h> | 38 | #include <asm/desc.h> |
| 37 | #include <asm/arch_hooks.h> | 39 | #include <asm/arch_hooks.h> |
| 38 | #include <asm/hpet.h> | 40 | #include <asm/hpet.h> |
| 41 | #include <asm/pgalloc.h> | ||
| 39 | #include <asm/i8253.h> | 42 | #include <asm/i8253.h> |
| 40 | #include <asm/nmi.h> | 43 | #include <asm/nmi.h> |
| 44 | #include <asm/idle.h> | ||
| 45 | #include <asm/proto.h> | ||
| 46 | #include <asm/timex.h> | ||
| 47 | #include <asm/apic.h> | ||
| 48 | #include <asm/i8259.h> | ||
| 41 | 49 | ||
| 42 | #include <mach_apic.h> | 50 | #include <mach_apic.h> |
| 43 | #include <mach_apicdef.h> | 51 | #include <mach_apicdef.h> |
| @@ -50,20 +58,60 @@ | |||
| 50 | # error SPURIOUS_APIC_VECTOR definition error | 58 | # error SPURIOUS_APIC_VECTOR definition error |
| 51 | #endif | 59 | #endif |
| 52 | 60 | ||
| 53 | unsigned long mp_lapic_addr; | 61 | #ifdef CONFIG_X86_32 |
| 54 | |||
| 55 | /* | 62 | /* |
| 56 | * Knob to control our willingness to enable the local APIC. | 63 | * Knob to control our willingness to enable the local APIC. |
| 57 | * | 64 | * |
| 58 | * +1=force-enable | 65 | * +1=force-enable |
| 59 | */ | 66 | */ |
| 60 | static int force_enable_local_apic; | 67 | static int force_enable_local_apic; |
| 61 | int disable_apic; | 68 | /* |
| 69 | * APIC command line parameters | ||
| 70 | */ | ||
| 71 | static int __init parse_lapic(char *arg) | ||
| 72 | { | ||
| 73 | force_enable_local_apic = 1; | ||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | early_param("lapic", parse_lapic); | ||
| 77 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ | ||
| 78 | static int enabled_via_apicbase; | ||
| 79 | |||
| 80 | #endif | ||
| 62 | 81 | ||
| 63 | /* Local APIC timer verification ok */ | 82 | #ifdef CONFIG_X86_64 |
| 64 | static int local_apic_timer_verify_ok; | 83 | static int apic_calibrate_pmtmr __initdata; |
| 84 | static __init int setup_apicpmtimer(char *s) | ||
| 85 | { | ||
| 86 | apic_calibrate_pmtmr = 1; | ||
| 87 | notsc_setup(NULL); | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | __setup("apicpmtimer", setup_apicpmtimer); | ||
| 91 | #endif | ||
| 92 | |||
| 93 | #ifdef CONFIG_X86_64 | ||
| 94 | #define HAVE_X2APIC | ||
| 95 | #endif | ||
| 96 | |||
| 97 | #ifdef HAVE_X2APIC | ||
| 98 | int x2apic; | ||
| 99 | /* x2apic enabled before OS handover */ | ||
| 100 | int x2apic_preenabled; | ||
| 101 | int disable_x2apic; | ||
| 102 | static __init int setup_nox2apic(char *str) | ||
| 103 | { | ||
| 104 | disable_x2apic = 1; | ||
| 105 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | early_param("nox2apic", setup_nox2apic); | ||
| 109 | #endif | ||
| 110 | |||
| 111 | unsigned long mp_lapic_addr; | ||
| 112 | int disable_apic; | ||
| 65 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ | 113 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ |
| 66 | static int local_apic_timer_disabled; | 114 | static int disable_apic_timer __cpuinitdata; |
| 67 | /* Local APIC timer works in C2 */ | 115 | /* Local APIC timer works in C2 */ |
| 68 | int local_apic_timer_c2_ok; | 116 | int local_apic_timer_c2_ok; |
| 69 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | 117 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); |
| @@ -75,7 +123,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | |||
| 75 | /* | 123 | /* |
| 76 | * Debug level, exported for io_apic.c | 124 | * Debug level, exported for io_apic.c |
| 77 | */ | 125 | */ |
| 78 | int apic_verbosity; | 126 | unsigned int apic_verbosity; |
| 79 | 127 | ||
| 80 | int pic_mode; | 128 | int pic_mode; |
| 81 | 129 | ||
| @@ -112,9 +160,6 @@ static struct clock_event_device lapic_clockevent = { | |||
| 112 | }; | 160 | }; |
| 113 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | 161 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); |
| 114 | 162 | ||
| 115 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ | ||
| 116 | static int enabled_via_apicbase; | ||
| 117 | |||
| 118 | static unsigned long apic_phys; | 163 | static unsigned long apic_phys; |
| 119 | 164 | ||
| 120 | /* | 165 | /* |
| @@ -130,7 +175,11 @@ static inline int lapic_get_version(void) | |||
| 130 | */ | 175 | */ |
| 131 | static inline int lapic_is_integrated(void) | 176 | static inline int lapic_is_integrated(void) |
| 132 | { | 177 | { |
| 178 | #ifdef CONFIG_X86_64 | ||
| 179 | return 1; | ||
| 180 | #else | ||
| 133 | return APIC_INTEGRATED(lapic_get_version()); | 181 | return APIC_INTEGRATED(lapic_get_version()); |
| 182 | #endif | ||
| 134 | } | 183 | } |
| 135 | 184 | ||
| 136 | /* | 185 | /* |
| @@ -145,13 +194,18 @@ static int modern_apic(void) | |||
| 145 | return lapic_get_version() >= 0x14; | 194 | return lapic_get_version() >= 0x14; |
| 146 | } | 195 | } |
| 147 | 196 | ||
| 148 | void apic_wait_icr_idle(void) | 197 | /* |
| 198 | * Paravirt kernels also might be using these below ops. So we still | ||
| 199 | * use generic apic_read()/apic_write(), which might be pointing to different | ||
| 200 | * ops in PARAVIRT case. | ||
| 201 | */ | ||
| 202 | void xapic_wait_icr_idle(void) | ||
| 149 | { | 203 | { |
| 150 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | 204 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) |
| 151 | cpu_relax(); | 205 | cpu_relax(); |
| 152 | } | 206 | } |
| 153 | 207 | ||
| 154 | u32 safe_apic_wait_icr_idle(void) | 208 | u32 safe_xapic_wait_icr_idle(void) |
| 155 | { | 209 | { |
| 156 | u32 send_status; | 210 | u32 send_status; |
| 157 | int timeout; | 211 | int timeout; |
| @@ -167,19 +221,88 @@ u32 safe_apic_wait_icr_idle(void) | |||
| 167 | return send_status; | 221 | return send_status; |
| 168 | } | 222 | } |
| 169 | 223 | ||
| 224 | void xapic_icr_write(u32 low, u32 id) | ||
| 225 | { | ||
| 226 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); | ||
| 227 | apic_write(APIC_ICR, low); | ||
| 228 | } | ||
| 229 | |||
| 230 | u64 xapic_icr_read(void) | ||
| 231 | { | ||
| 232 | u32 icr1, icr2; | ||
| 233 | |||
| 234 | icr2 = apic_read(APIC_ICR2); | ||
| 235 | icr1 = apic_read(APIC_ICR); | ||
| 236 | |||
| 237 | return icr1 | ((u64)icr2 << 32); | ||
| 238 | } | ||
| 239 | |||
| 240 | static struct apic_ops xapic_ops = { | ||
| 241 | .read = native_apic_mem_read, | ||
| 242 | .write = native_apic_mem_write, | ||
| 243 | .icr_read = xapic_icr_read, | ||
| 244 | .icr_write = xapic_icr_write, | ||
| 245 | .wait_icr_idle = xapic_wait_icr_idle, | ||
| 246 | .safe_wait_icr_idle = safe_xapic_wait_icr_idle, | ||
| 247 | }; | ||
| 248 | |||
| 249 | struct apic_ops __read_mostly *apic_ops = &xapic_ops; | ||
| 250 | EXPORT_SYMBOL_GPL(apic_ops); | ||
| 251 | |||
| 252 | #ifdef HAVE_X2APIC | ||
| 253 | static void x2apic_wait_icr_idle(void) | ||
| 254 | { | ||
| 255 | /* no need to wait for icr idle in x2apic */ | ||
| 256 | return; | ||
| 257 | } | ||
| 258 | |||
| 259 | static u32 safe_x2apic_wait_icr_idle(void) | ||
| 260 | { | ||
| 261 | /* no need to wait for icr idle in x2apic */ | ||
| 262 | return 0; | ||
| 263 | } | ||
| 264 | |||
| 265 | void x2apic_icr_write(u32 low, u32 id) | ||
| 266 | { | ||
| 267 | wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); | ||
| 268 | } | ||
| 269 | |||
| 270 | u64 x2apic_icr_read(void) | ||
| 271 | { | ||
| 272 | unsigned long val; | ||
| 273 | |||
| 274 | rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); | ||
| 275 | return val; | ||
| 276 | } | ||
| 277 | |||
| 278 | static struct apic_ops x2apic_ops = { | ||
| 279 | .read = native_apic_msr_read, | ||
| 280 | .write = native_apic_msr_write, | ||
| 281 | .icr_read = x2apic_icr_read, | ||
| 282 | .icr_write = x2apic_icr_write, | ||
| 283 | .wait_icr_idle = x2apic_wait_icr_idle, | ||
| 284 | .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, | ||
| 285 | }; | ||
| 286 | #endif | ||
| 287 | |||
| 170 | /** | 288 | /** |
| 171 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | 289 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 |
| 172 | */ | 290 | */ |
| 173 | void __cpuinit enable_NMI_through_LVT0(void) | 291 | void __cpuinit enable_NMI_through_LVT0(void) |
| 174 | { | 292 | { |
| 175 | unsigned int v = APIC_DM_NMI; | 293 | unsigned int v; |
| 294 | |||
| 295 | /* unmask and set to NMI */ | ||
| 296 | v = APIC_DM_NMI; | ||
| 176 | 297 | ||
| 177 | /* Level triggered for 82489DX */ | 298 | /* Level triggered for 82489DX (32bit mode) */ |
| 178 | if (!lapic_is_integrated()) | 299 | if (!lapic_is_integrated()) |
| 179 | v |= APIC_LVT_LEVEL_TRIGGER; | 300 | v |= APIC_LVT_LEVEL_TRIGGER; |
| 180 | apic_write_around(APIC_LVT0, v); | 301 | |
| 302 | apic_write(APIC_LVT0, v); | ||
| 181 | } | 303 | } |
| 182 | 304 | ||
| 305 | #ifdef CONFIG_X86_32 | ||
| 183 | /** | 306 | /** |
| 184 | * get_physical_broadcast - Get number of physical broadcast IDs | 307 | * get_physical_broadcast - Get number of physical broadcast IDs |
| 185 | */ | 308 | */ |
| @@ -187,15 +310,20 @@ int get_physical_broadcast(void) | |||
| 187 | { | 310 | { |
| 188 | return modern_apic() ? 0xff : 0xf; | 311 | return modern_apic() ? 0xff : 0xf; |
| 189 | } | 312 | } |
| 313 | #endif | ||
| 190 | 314 | ||
| 191 | /** | 315 | /** |
| 192 | * lapic_get_maxlvt - get the maximum number of local vector table entries | 316 | * lapic_get_maxlvt - get the maximum number of local vector table entries |
| 193 | */ | 317 | */ |
| 194 | int lapic_get_maxlvt(void) | 318 | int lapic_get_maxlvt(void) |
| 195 | { | 319 | { |
| 196 | unsigned int v = apic_read(APIC_LVR); | 320 | unsigned int v; |
| 197 | 321 | ||
| 198 | /* 82489DXs do not report # of LVT entries. */ | 322 | v = apic_read(APIC_LVR); |
| 323 | /* | ||
| 324 | * - we always have APIC integrated on 64bit mode | ||
| 325 | * - 82489DXs do not report # of LVT entries | ||
| 326 | */ | ||
| 199 | return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; | 327 | return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; |
| 200 | } | 328 | } |
| 201 | 329 | ||
| @@ -203,7 +331,7 @@ int lapic_get_maxlvt(void) | |||
| 203 | * Local APIC timer | 331 | * Local APIC timer |
| 204 | */ | 332 | */ |
| 205 | 333 | ||
| 206 | /* Clock divisor is set to 16 */ | 334 | /* Clock divisor */ |
| 207 | #define APIC_DIVISOR 16 | 335 | #define APIC_DIVISOR 16 |
| 208 | 336 | ||
| 209 | /* | 337 | /* |
| @@ -229,27 +357,61 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
| 229 | if (!irqen) | 357 | if (!irqen) |
| 230 | lvtt_value |= APIC_LVT_MASKED; | 358 | lvtt_value |= APIC_LVT_MASKED; |
| 231 | 359 | ||
| 232 | apic_write_around(APIC_LVTT, lvtt_value); | 360 | apic_write(APIC_LVTT, lvtt_value); |
| 233 | 361 | ||
| 234 | /* | 362 | /* |
| 235 | * Divide PICLK by 16 | 363 | * Divide PICLK by 16 |
| 236 | */ | 364 | */ |
| 237 | tmp_value = apic_read(APIC_TDCR); | 365 | tmp_value = apic_read(APIC_TDCR); |
| 238 | apic_write_around(APIC_TDCR, (tmp_value | 366 | apic_write(APIC_TDCR, |
| 239 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 367 | (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | |
| 240 | | APIC_TDR_DIV_16); | 368 | APIC_TDR_DIV_16); |
| 241 | 369 | ||
| 242 | if (!oneshot) | 370 | if (!oneshot) |
| 243 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | 371 | apic_write(APIC_TMICT, clocks / APIC_DIVISOR); |
| 244 | } | 372 | } |
| 245 | 373 | ||
| 246 | /* | 374 | /* |
| 375 | * Setup extended LVT, AMD specific (K8, family 10h) | ||
| 376 | * | ||
| 377 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | ||
| 378 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | ||
| 379 | * | ||
| 380 | * If mask=1, the LVT entry does not generate interrupts while mask=0 | ||
| 381 | * enables the vector. See also the BKDGs. | ||
| 382 | */ | ||
| 383 | |||
| 384 | #define APIC_EILVT_LVTOFF_MCE 0 | ||
| 385 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
| 386 | |||
| 387 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | ||
| 388 | { | ||
| 389 | unsigned long reg = (lvt_off << 4) + APIC_EILVT0; | ||
| 390 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | ||
| 391 | |||
| 392 | apic_write(reg, v); | ||
| 393 | } | ||
| 394 | |||
| 395 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | ||
| 396 | { | ||
| 397 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | ||
| 398 | return APIC_EILVT_LVTOFF_MCE; | ||
| 399 | } | ||
| 400 | |||
| 401 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | ||
| 402 | { | ||
| 403 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | ||
| 404 | return APIC_EILVT_LVTOFF_IBS; | ||
| 405 | } | ||
| 406 | EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); | ||
| 407 | |||
| 408 | /* | ||
| 247 | * Program the next event, relative to now | 409 | * Program the next event, relative to now |
| 248 | */ | 410 | */ |
| 249 | static int lapic_next_event(unsigned long delta, | 411 | static int lapic_next_event(unsigned long delta, |
| 250 | struct clock_event_device *evt) | 412 | struct clock_event_device *evt) |
| 251 | { | 413 | { |
| 252 | apic_write_around(APIC_TMICT, delta); | 414 | apic_write(APIC_TMICT, delta); |
| 253 | return 0; | 415 | return 0; |
| 254 | } | 416 | } |
| 255 | 417 | ||
| @@ -262,8 +424,8 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
| 262 | unsigned long flags; | 424 | unsigned long flags; |
| 263 | unsigned int v; | 425 | unsigned int v; |
| 264 | 426 | ||
| 265 | /* Lapic used for broadcast ? */ | 427 | /* Lapic used as dummy for broadcast ? */ |
| 266 | if (!local_apic_timer_verify_ok) | 428 | if (evt->features & CLOCK_EVT_FEAT_DUMMY) |
| 267 | return; | 429 | return; |
| 268 | 430 | ||
| 269 | local_irq_save(flags); | 431 | local_irq_save(flags); |
| @@ -278,7 +440,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
| 278 | case CLOCK_EVT_MODE_SHUTDOWN: | 440 | case CLOCK_EVT_MODE_SHUTDOWN: |
| 279 | v = apic_read(APIC_LVTT); | 441 | v = apic_read(APIC_LVTT); |
| 280 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 442 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
| 281 | apic_write_around(APIC_LVTT, v); | 443 | apic_write(APIC_LVTT, v); |
| 282 | break; | 444 | break; |
| 283 | case CLOCK_EVT_MODE_RESUME: | 445 | case CLOCK_EVT_MODE_RESUME: |
| 284 | /* Nothing to do here */ | 446 | /* Nothing to do here */ |
| @@ -302,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask) | |||
| 302 | * Setup the local APIC timer for this CPU. Copy the initilized values | 464 | * Setup the local APIC timer for this CPU. Copy the initilized values |
| 303 | * of the boot CPU and register the clock event in the framework. | 465 | * of the boot CPU and register the clock event in the framework. |
| 304 | */ | 466 | */ |
| 305 | static void __devinit setup_APIC_timer(void) | 467 | static void __cpuinit setup_APIC_timer(void) |
| 306 | { | 468 | { |
| 307 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 469 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
| 308 | 470 | ||
| @@ -372,39 +534,53 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) | |||
| 372 | } | 534 | } |
| 373 | } | 535 | } |
| 374 | 536 | ||
| 375 | /* | 537 | static int __init calibrate_by_pmtimer(long deltapm, long *delta) |
| 376 | * Setup the boot APIC | 538 | { |
| 377 | * | 539 | const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; |
| 378 | * Calibrate and verify the result. | 540 | const long pm_thresh = pm_100ms / 100; |
| 379 | */ | 541 | unsigned long mult; |
| 380 | void __init setup_boot_APIC_clock(void) | 542 | u64 res; |
| 543 | |||
| 544 | #ifndef CONFIG_X86_PM_TIMER | ||
| 545 | return -1; | ||
| 546 | #endif | ||
| 547 | |||
| 548 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | ||
| 549 | |||
| 550 | /* Check, if the PM timer is available */ | ||
| 551 | if (!deltapm) | ||
| 552 | return -1; | ||
| 553 | |||
| 554 | mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); | ||
| 555 | |||
| 556 | if (deltapm > (pm_100ms - pm_thresh) && | ||
| 557 | deltapm < (pm_100ms + pm_thresh)) { | ||
| 558 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | ||
| 559 | } else { | ||
| 560 | res = (((u64)deltapm) * mult) >> 22; | ||
| 561 | do_div(res, 1000000); | ||
| 562 | printk(KERN_WARNING "APIC calibration not consistent " | ||
| 563 | "with PM Timer: %ldms instead of 100ms\n", | ||
| 564 | (long)res); | ||
| 565 | /* Correct the lapic counter value */ | ||
| 566 | res = (((u64)(*delta)) * pm_100ms); | ||
| 567 | do_div(res, deltapm); | ||
| 568 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | ||
| 569 | "%lu (%ld)\n", (unsigned long)res, *delta); | ||
| 570 | *delta = (long)res; | ||
| 571 | } | ||
| 572 | |||
| 573 | return 0; | ||
| 574 | } | ||
| 575 | |||
| 576 | static int __init calibrate_APIC_clock(void) | ||
| 381 | { | 577 | { |
| 382 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 578 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
| 383 | const long pm_100ms = PMTMR_TICKS_PER_SEC/10; | ||
| 384 | const long pm_thresh = pm_100ms/100; | ||
| 385 | void (*real_handler)(struct clock_event_device *dev); | 579 | void (*real_handler)(struct clock_event_device *dev); |
| 386 | unsigned long deltaj; | 580 | unsigned long deltaj; |
| 387 | long delta, deltapm; | 581 | long delta; |
| 388 | int pm_referenced = 0; | 582 | int pm_referenced = 0; |
| 389 | 583 | ||
| 390 | /* | ||
| 391 | * The local apic timer can be disabled via the kernel | ||
| 392 | * commandline or from the CPU detection code. Register the lapic | ||
| 393 | * timer as a dummy clock event source on SMP systems, so the | ||
| 394 | * broadcast mechanism is used. On UP systems simply ignore it. | ||
| 395 | */ | ||
| 396 | if (local_apic_timer_disabled) { | ||
| 397 | /* No broadcast on UP ! */ | ||
| 398 | if (num_possible_cpus() > 1) { | ||
| 399 | lapic_clockevent.mult = 1; | ||
| 400 | setup_APIC_timer(); | ||
| 401 | } | ||
| 402 | return; | ||
| 403 | } | ||
| 404 | |||
| 405 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
| 406 | "calibrating APIC timer ...\n"); | ||
| 407 | |||
| 408 | local_irq_disable(); | 584 | local_irq_disable(); |
| 409 | 585 | ||
| 410 | /* Replace the global interrupt handler */ | 586 | /* Replace the global interrupt handler */ |
| @@ -412,10 +588,10 @@ void __init setup_boot_APIC_clock(void) | |||
| 412 | global_clock_event->event_handler = lapic_cal_handler; | 588 | global_clock_event->event_handler = lapic_cal_handler; |
| 413 | 589 | ||
| 414 | /* | 590 | /* |
| 415 | * Setup the APIC counter to 1e9. There is no way the lapic | 591 | * Setup the APIC counter to maximum. There is no way the lapic |
| 416 | * can underflow in the 100ms detection time frame | 592 | * can underflow in the 100ms detection time frame |
| 417 | */ | 593 | */ |
| 418 | __setup_APIC_LVTT(1000000000, 0, 0); | 594 | __setup_APIC_LVTT(0xffffffff, 0, 0); |
| 419 | 595 | ||
| 420 | /* Let the interrupts run */ | 596 | /* Let the interrupts run */ |
| 421 | local_irq_enable(); | 597 | local_irq_enable(); |
| @@ -432,34 +608,9 @@ void __init setup_boot_APIC_clock(void) | |||
| 432 | delta = lapic_cal_t1 - lapic_cal_t2; | 608 | delta = lapic_cal_t1 - lapic_cal_t2; |
| 433 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); | 609 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); |
| 434 | 610 | ||
| 435 | /* Check, if the PM timer is available */ | 611 | /* we trust the PM based calibration if possible */ |
| 436 | deltapm = lapic_cal_pm2 - lapic_cal_pm1; | 612 | pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, |
| 437 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | 613 | &delta); |
| 438 | |||
| 439 | if (deltapm) { | ||
| 440 | unsigned long mult; | ||
| 441 | u64 res; | ||
| 442 | |||
| 443 | mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); | ||
| 444 | |||
| 445 | if (deltapm > (pm_100ms - pm_thresh) && | ||
| 446 | deltapm < (pm_100ms + pm_thresh)) { | ||
| 447 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | ||
| 448 | } else { | ||
| 449 | res = (((u64) deltapm) * mult) >> 22; | ||
| 450 | do_div(res, 1000000); | ||
| 451 | printk(KERN_WARNING "APIC calibration not consistent " | ||
| 452 | "with PM Timer: %ldms instead of 100ms\n", | ||
| 453 | (long)res); | ||
| 454 | /* Correct the lapic counter value */ | ||
| 455 | res = (((u64) delta) * pm_100ms); | ||
| 456 | do_div(res, deltapm); | ||
| 457 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | ||
| 458 | "%lu (%ld)\n", (unsigned long) res, delta); | ||
| 459 | delta = (long) res; | ||
| 460 | } | ||
| 461 | pm_referenced = 1; | ||
| 462 | } | ||
| 463 | 614 | ||
| 464 | /* Calculate the scaled math multiplication factor */ | 615 | /* Calculate the scaled math multiplication factor */ |
| 465 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, | 616 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, |
| @@ -489,8 +640,6 @@ void __init setup_boot_APIC_clock(void) | |||
| 489 | calibration_result / (1000000 / HZ), | 640 | calibration_result / (1000000 / HZ), |
| 490 | calibration_result % (1000000 / HZ)); | 641 | calibration_result % (1000000 / HZ)); |
| 491 | 642 | ||
| 492 | local_apic_timer_verify_ok = 1; | ||
| 493 | |||
| 494 | /* | 643 | /* |
| 495 | * Do a sanity check on the APIC calibration result | 644 | * Do a sanity check on the APIC calibration result |
| 496 | */ | 645 | */ |
| @@ -498,13 +647,15 @@ void __init setup_boot_APIC_clock(void) | |||
| 498 | local_irq_enable(); | 647 | local_irq_enable(); |
| 499 | printk(KERN_WARNING | 648 | printk(KERN_WARNING |
| 500 | "APIC frequency too slow, disabling apic timer\n"); | 649 | "APIC frequency too slow, disabling apic timer\n"); |
| 501 | /* No broadcast on UP ! */ | 650 | return -1; |
| 502 | if (num_possible_cpus() > 1) | ||
| 503 | setup_APIC_timer(); | ||
| 504 | return; | ||
| 505 | } | 651 | } |
| 506 | 652 | ||
| 507 | /* We trust the pm timer based calibration */ | 653 | levt->features &= ~CLOCK_EVT_FEAT_DUMMY; |
| 654 | |||
| 655 | /* | ||
| 656 | * PM timer calibration failed or not turned on | ||
| 657 | * so lets try APIC timer based calibration | ||
| 658 | */ | ||
| 508 | if (!pm_referenced) { | 659 | if (!pm_referenced) { |
| 509 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); | 660 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); |
| 510 | 661 | ||
| @@ -536,34 +687,68 @@ void __init setup_boot_APIC_clock(void) | |||
| 536 | if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) | 687 | if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) |
| 537 | apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); | 688 | apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); |
| 538 | else | 689 | else |
| 539 | local_apic_timer_verify_ok = 0; | 690 | levt->features |= CLOCK_EVT_FEAT_DUMMY; |
| 540 | } else | 691 | } else |
| 541 | local_irq_enable(); | 692 | local_irq_enable(); |
| 542 | 693 | ||
| 543 | if (!local_apic_timer_verify_ok) { | 694 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { |
| 544 | printk(KERN_WARNING | 695 | printk(KERN_WARNING |
| 545 | "APIC timer disabled due to verification failure.\n"); | 696 | "APIC timer disabled due to verification failure.\n"); |
| 697 | return -1; | ||
| 698 | } | ||
| 699 | |||
| 700 | return 0; | ||
| 701 | } | ||
| 702 | |||
| 703 | /* | ||
| 704 | * Setup the boot APIC | ||
| 705 | * | ||
| 706 | * Calibrate and verify the result. | ||
| 707 | */ | ||
| 708 | void __init setup_boot_APIC_clock(void) | ||
| 709 | { | ||
| 710 | /* | ||
| 711 | * The local apic timer can be disabled via the kernel | ||
| 712 | * commandline or from the CPU detection code. Register the lapic | ||
| 713 | * timer as a dummy clock event source on SMP systems, so the | ||
| 714 | * broadcast mechanism is used. On UP systems simply ignore it. | ||
| 715 | */ | ||
| 716 | if (disable_apic_timer) { | ||
| 717 | printk(KERN_INFO "Disabling APIC timer\n"); | ||
| 546 | /* No broadcast on UP ! */ | 718 | /* No broadcast on UP ! */ |
| 547 | if (num_possible_cpus() == 1) | 719 | if (num_possible_cpus() > 1) { |
| 548 | return; | 720 | lapic_clockevent.mult = 1; |
| 549 | } else { | 721 | setup_APIC_timer(); |
| 550 | /* | 722 | } |
| 551 | * If nmi_watchdog is set to IO_APIC, we need the | 723 | return; |
| 552 | * PIT/HPET going. Otherwise register lapic as a dummy | 724 | } |
| 553 | * device. | 725 | |
| 554 | */ | 726 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" |
| 555 | if (nmi_watchdog != NMI_IO_APIC) | 727 | "calibrating APIC timer ...\n"); |
| 556 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | 728 | |
| 557 | else | 729 | if (calibrate_APIC_clock()) { |
| 558 | printk(KERN_WARNING "APIC timer registered as dummy," | 730 | /* No broadcast on UP ! */ |
| 559 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | 731 | if (num_possible_cpus() > 1) |
| 732 | setup_APIC_timer(); | ||
| 733 | return; | ||
| 560 | } | 734 | } |
| 561 | 735 | ||
| 736 | /* | ||
| 737 | * If nmi_watchdog is set to IO_APIC, we need the | ||
| 738 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
| 739 | * device. | ||
| 740 | */ | ||
| 741 | if (nmi_watchdog != NMI_IO_APIC) | ||
| 742 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
| 743 | else | ||
| 744 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
| 745 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
| 746 | |||
| 562 | /* Setup the lapic or request the broadcast */ | 747 | /* Setup the lapic or request the broadcast */ |
| 563 | setup_APIC_timer(); | 748 | setup_APIC_timer(); |
| 564 | } | 749 | } |
| 565 | 750 | ||
| 566 | void __devinit setup_secondary_APIC_clock(void) | 751 | void __cpuinit setup_secondary_APIC_clock(void) |
| 567 | { | 752 | { |
| 568 | setup_APIC_timer(); | 753 | setup_APIC_timer(); |
| 569 | } | 754 | } |
| @@ -598,7 +783,11 @@ static void local_apic_timer_interrupt(void) | |||
| 598 | /* | 783 | /* |
| 599 | * the NMI deadlock-detector uses this. | 784 | * the NMI deadlock-detector uses this. |
| 600 | */ | 785 | */ |
| 786 | #ifdef CONFIG_X86_64 | ||
| 787 | add_pda(apic_timer_irqs, 1); | ||
| 788 | #else | ||
| 601 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | 789 | per_cpu(irq_stat, cpu).apic_timer_irqs++; |
| 790 | #endif | ||
| 602 | 791 | ||
| 603 | evt->event_handler(evt); | 792 | evt->event_handler(evt); |
| 604 | } | 793 | } |
| @@ -625,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
| 625 | * Besides, if we don't timer interrupts ignore the global | 814 | * Besides, if we don't timer interrupts ignore the global |
| 626 | * interrupt lock, which is the WrongThing (tm) to do. | 815 | * interrupt lock, which is the WrongThing (tm) to do. |
| 627 | */ | 816 | */ |
| 817 | #ifdef CONFIG_X86_64 | ||
| 818 | exit_idle(); | ||
| 819 | #endif | ||
| 628 | irq_enter(); | 820 | irq_enter(); |
| 629 | local_apic_timer_interrupt(); | 821 | local_apic_timer_interrupt(); |
| 630 | irq_exit(); | 822 | irq_exit(); |
| @@ -638,35 +830,6 @@ int setup_profiling_timer(unsigned int multiplier) | |||
| 638 | } | 830 | } |
| 639 | 831 | ||
| 640 | /* | 832 | /* |
| 641 | * Setup extended LVT, AMD specific (K8, family 10h) | ||
| 642 | * | ||
| 643 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | ||
| 644 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | ||
| 645 | */ | ||
| 646 | |||
| 647 | #define APIC_EILVT_LVTOFF_MCE 0 | ||
| 648 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
| 649 | |||
| 650 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | ||
| 651 | { | ||
| 652 | unsigned long reg = (lvt_off << 4) + APIC_EILVT0; | ||
| 653 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | ||
| 654 | apic_write(reg, v); | ||
| 655 | } | ||
| 656 | |||
| 657 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | ||
| 658 | { | ||
| 659 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | ||
| 660 | return APIC_EILVT_LVTOFF_MCE; | ||
| 661 | } | ||
| 662 | |||
| 663 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | ||
| 664 | { | ||
| 665 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | ||
| 666 | return APIC_EILVT_LVTOFF_IBS; | ||
| 667 | } | ||
| 668 | |||
| 669 | /* | ||
| 670 | * Local APIC start and shutdown | 833 | * Local APIC start and shutdown |
| 671 | */ | 834 | */ |
| 672 | 835 | ||
| @@ -693,45 +856,41 @@ void clear_local_APIC(void) | |||
| 693 | */ | 856 | */ |
| 694 | if (maxlvt >= 3) { | 857 | if (maxlvt >= 3) { |
| 695 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ | 858 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ |
| 696 | apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); | 859 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); |
| 697 | } | 860 | } |
| 698 | /* | 861 | /* |
| 699 | * Careful: we have to set masks only first to deassert | 862 | * Careful: we have to set masks only first to deassert |
| 700 | * any level-triggered sources. | 863 | * any level-triggered sources. |
| 701 | */ | 864 | */ |
| 702 | v = apic_read(APIC_LVTT); | 865 | v = apic_read(APIC_LVTT); |
| 703 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | 866 | apic_write(APIC_LVTT, v | APIC_LVT_MASKED); |
| 704 | v = apic_read(APIC_LVT0); | 867 | v = apic_read(APIC_LVT0); |
| 705 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 868 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
| 706 | v = apic_read(APIC_LVT1); | 869 | v = apic_read(APIC_LVT1); |
| 707 | apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); | 870 | apic_write(APIC_LVT1, v | APIC_LVT_MASKED); |
| 708 | if (maxlvt >= 4) { | 871 | if (maxlvt >= 4) { |
| 709 | v = apic_read(APIC_LVTPC); | 872 | v = apic_read(APIC_LVTPC); |
| 710 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); | 873 | apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); |
| 711 | } | 874 | } |
| 712 | 875 | ||
| 713 | /* lets not touch this if we didn't frob it */ | 876 | /* lets not touch this if we didn't frob it */ |
| 714 | #ifdef CONFIG_X86_MCE_P4THERMAL | 877 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) |
| 715 | if (maxlvt >= 5) { | 878 | if (maxlvt >= 5) { |
| 716 | v = apic_read(APIC_LVTTHMR); | 879 | v = apic_read(APIC_LVTTHMR); |
| 717 | apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); | 880 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); |
| 718 | } | 881 | } |
| 719 | #endif | 882 | #endif |
| 720 | /* | 883 | /* |
| 721 | * Clean APIC state for other OSs: | 884 | * Clean APIC state for other OSs: |
| 722 | */ | 885 | */ |
| 723 | apic_write_around(APIC_LVTT, APIC_LVT_MASKED); | 886 | apic_write(APIC_LVTT, APIC_LVT_MASKED); |
| 724 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | 887 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
| 725 | apic_write_around(APIC_LVT1, APIC_LVT_MASKED); | 888 | apic_write(APIC_LVT1, APIC_LVT_MASKED); |
| 726 | if (maxlvt >= 3) | 889 | if (maxlvt >= 3) |
| 727 | apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); | 890 | apic_write(APIC_LVTERR, APIC_LVT_MASKED); |
| 728 | if (maxlvt >= 4) | 891 | if (maxlvt >= 4) |
| 729 | apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); | 892 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); |
| 730 | 893 | ||
| 731 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
| 732 | if (maxlvt >= 5) | ||
| 733 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); | ||
| 734 | #endif | ||
| 735 | /* Integrated APIC (!82489DX) ? */ | 894 | /* Integrated APIC (!82489DX) ? */ |
| 736 | if (lapic_is_integrated()) { | 895 | if (lapic_is_integrated()) { |
| 737 | if (maxlvt > 3) | 896 | if (maxlvt > 3) |
| @@ -746,7 +905,7 @@ void clear_local_APIC(void) | |||
| 746 | */ | 905 | */ |
| 747 | void disable_local_APIC(void) | 906 | void disable_local_APIC(void) |
| 748 | { | 907 | { |
| 749 | unsigned long value; | 908 | unsigned int value; |
| 750 | 909 | ||
| 751 | clear_local_APIC(); | 910 | clear_local_APIC(); |
| 752 | 911 | ||
| @@ -756,8 +915,9 @@ void disable_local_APIC(void) | |||
| 756 | */ | 915 | */ |
| 757 | value = apic_read(APIC_SPIV); | 916 | value = apic_read(APIC_SPIV); |
| 758 | value &= ~APIC_SPIV_APIC_ENABLED; | 917 | value &= ~APIC_SPIV_APIC_ENABLED; |
| 759 | apic_write_around(APIC_SPIV, value); | 918 | apic_write(APIC_SPIV, value); |
| 760 | 919 | ||
| 920 | #ifdef CONFIG_X86_32 | ||
| 761 | /* | 921 | /* |
| 762 | * When LAPIC was disabled by the BIOS and enabled by the kernel, | 922 | * When LAPIC was disabled by the BIOS and enabled by the kernel, |
| 763 | * restore the disabled state. | 923 | * restore the disabled state. |
| @@ -769,6 +929,7 @@ void disable_local_APIC(void) | |||
| 769 | l &= ~MSR_IA32_APICBASE_ENABLE; | 929 | l &= ~MSR_IA32_APICBASE_ENABLE; |
| 770 | wrmsr(MSR_IA32_APICBASE, l, h); | 930 | wrmsr(MSR_IA32_APICBASE, l, h); |
| 771 | } | 931 | } |
| 932 | #endif | ||
| 772 | } | 933 | } |
| 773 | 934 | ||
| 774 | /* | 935 | /* |
| @@ -785,11 +946,15 @@ void lapic_shutdown(void) | |||
| 785 | return; | 946 | return; |
| 786 | 947 | ||
| 787 | local_irq_save(flags); | 948 | local_irq_save(flags); |
| 788 | clear_local_APIC(); | ||
| 789 | 949 | ||
| 790 | if (enabled_via_apicbase) | 950 | #ifdef CONFIG_X86_32 |
| 951 | if (!enabled_via_apicbase) | ||
| 952 | clear_local_APIC(); | ||
| 953 | else | ||
| 954 | #endif | ||
| 791 | disable_local_APIC(); | 955 | disable_local_APIC(); |
| 792 | 956 | ||
| 957 | |||
| 793 | local_irq_restore(flags); | 958 | local_irq_restore(flags); |
| 794 | } | 959 | } |
| 795 | 960 | ||
| @@ -834,6 +999,12 @@ int __init verify_local_APIC(void) | |||
| 834 | */ | 999 | */ |
| 835 | reg0 = apic_read(APIC_ID); | 1000 | reg0 = apic_read(APIC_ID); |
| 836 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); | 1001 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); |
| 1002 | apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); | ||
| 1003 | reg1 = apic_read(APIC_ID); | ||
| 1004 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); | ||
| 1005 | apic_write(APIC_ID, reg0); | ||
| 1006 | if (reg1 != (reg0 ^ APIC_ID_MASK)) | ||
| 1007 | return 0; | ||
| 837 | 1008 | ||
| 838 | /* | 1009 | /* |
| 839 | * The next two are just to see if we have sane values. | 1010 | * The next two are just to see if we have sane values. |
| @@ -859,14 +1030,15 @@ void __init sync_Arb_IDs(void) | |||
| 859 | */ | 1030 | */ |
| 860 | if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | 1031 | if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) |
| 861 | return; | 1032 | return; |
| 1033 | |||
| 862 | /* | 1034 | /* |
| 863 | * Wait for idle. | 1035 | * Wait for idle. |
| 864 | */ | 1036 | */ |
| 865 | apic_wait_icr_idle(); | 1037 | apic_wait_icr_idle(); |
| 866 | 1038 | ||
| 867 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | 1039 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); |
| 868 | apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | 1040 | apic_write(APIC_ICR, APIC_DEST_ALLINC | |
| 869 | | APIC_DM_INIT); | 1041 | APIC_INT_LEVELTRIG | APIC_DM_INIT); |
| 870 | } | 1042 | } |
| 871 | 1043 | ||
| 872 | /* | 1044 | /* |
| @@ -874,7 +1046,7 @@ void __init sync_Arb_IDs(void) | |||
| 874 | */ | 1046 | */ |
| 875 | void __init init_bsp_APIC(void) | 1047 | void __init init_bsp_APIC(void) |
| 876 | { | 1048 | { |
| 877 | unsigned long value; | 1049 | unsigned int value; |
| 878 | 1050 | ||
| 879 | /* | 1051 | /* |
| 880 | * Don't do the setup now if we have a SMP BIOS as the | 1052 | * Don't do the setup now if we have a SMP BIOS as the |
| @@ -895,60 +1067,66 @@ void __init init_bsp_APIC(void) | |||
| 895 | value &= ~APIC_VECTOR_MASK; | 1067 | value &= ~APIC_VECTOR_MASK; |
| 896 | value |= APIC_SPIV_APIC_ENABLED; | 1068 | value |= APIC_SPIV_APIC_ENABLED; |
| 897 | 1069 | ||
| 1070 | #ifdef CONFIG_X86_32 | ||
| 898 | /* This bit is reserved on P4/Xeon and should be cleared */ | 1071 | /* This bit is reserved on P4/Xeon and should be cleared */ |
| 899 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | 1072 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && |
| 900 | (boot_cpu_data.x86 == 15)) | 1073 | (boot_cpu_data.x86 == 15)) |
| 901 | value &= ~APIC_SPIV_FOCUS_DISABLED; | 1074 | value &= ~APIC_SPIV_FOCUS_DISABLED; |
| 902 | else | 1075 | else |
| 1076 | #endif | ||
| 903 | value |= APIC_SPIV_FOCUS_DISABLED; | 1077 | value |= APIC_SPIV_FOCUS_DISABLED; |
| 904 | value |= SPURIOUS_APIC_VECTOR; | 1078 | value |= SPURIOUS_APIC_VECTOR; |
| 905 | apic_write_around(APIC_SPIV, value); | 1079 | apic_write(APIC_SPIV, value); |
| 906 | 1080 | ||
| 907 | /* | 1081 | /* |
| 908 | * Set up the virtual wire mode. | 1082 | * Set up the virtual wire mode. |
| 909 | */ | 1083 | */ |
| 910 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 1084 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
| 911 | value = APIC_DM_NMI; | 1085 | value = APIC_DM_NMI; |
| 912 | if (!lapic_is_integrated()) /* 82489DX */ | 1086 | if (!lapic_is_integrated()) /* 82489DX */ |
| 913 | value |= APIC_LVT_LEVEL_TRIGGER; | 1087 | value |= APIC_LVT_LEVEL_TRIGGER; |
| 914 | apic_write_around(APIC_LVT1, value); | 1088 | apic_write(APIC_LVT1, value); |
| 915 | } | 1089 | } |
| 916 | 1090 | ||
| 917 | static void __cpuinit lapic_setup_esr(void) | 1091 | static void __cpuinit lapic_setup_esr(void) |
| 918 | { | 1092 | { |
| 919 | unsigned long oldvalue, value, maxlvt; | 1093 | unsigned int oldvalue, value, maxlvt; |
| 920 | if (lapic_is_integrated() && !esr_disable) { | ||
| 921 | /* !82489DX */ | ||
| 922 | maxlvt = lapic_get_maxlvt(); | ||
| 923 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | ||
| 924 | apic_write(APIC_ESR, 0); | ||
| 925 | oldvalue = apic_read(APIC_ESR); | ||
| 926 | 1094 | ||
| 927 | /* enables sending errors */ | 1095 | if (!lapic_is_integrated()) { |
| 928 | value = ERROR_APIC_VECTOR; | 1096 | printk(KERN_INFO "No ESR for 82489DX.\n"); |
| 929 | apic_write_around(APIC_LVTERR, value); | 1097 | return; |
| 1098 | } | ||
| 1099 | |||
| 1100 | if (esr_disable) { | ||
| 930 | /* | 1101 | /* |
| 931 | * spec says clear errors after enabling vector. | 1102 | * Something untraceable is creating bad interrupts on |
| 1103 | * secondary quads ... for the moment, just leave the | ||
| 1104 | * ESR disabled - we can't do anything useful with the | ||
| 1105 | * errors anyway - mbligh | ||
| 932 | */ | 1106 | */ |
| 933 | if (maxlvt > 3) | 1107 | printk(KERN_INFO "Leaving ESR disabled.\n"); |
| 934 | apic_write(APIC_ESR, 0); | 1108 | return; |
| 935 | value = apic_read(APIC_ESR); | ||
| 936 | if (value != oldvalue) | ||
| 937 | apic_printk(APIC_VERBOSE, "ESR value before enabling " | ||
| 938 | "vector: 0x%08lx after: 0x%08lx\n", | ||
| 939 | oldvalue, value); | ||
| 940 | } else { | ||
| 941 | if (esr_disable) | ||
| 942 | /* | ||
| 943 | * Something untraceable is creating bad interrupts on | ||
| 944 | * secondary quads ... for the moment, just leave the | ||
| 945 | * ESR disabled - we can't do anything useful with the | ||
| 946 | * errors anyway - mbligh | ||
| 947 | */ | ||
| 948 | printk(KERN_INFO "Leaving ESR disabled.\n"); | ||
| 949 | else | ||
| 950 | printk(KERN_INFO "No ESR for 82489DX.\n"); | ||
| 951 | } | 1109 | } |
| 1110 | |||
| 1111 | maxlvt = lapic_get_maxlvt(); | ||
| 1112 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | ||
| 1113 | apic_write(APIC_ESR, 0); | ||
| 1114 | oldvalue = apic_read(APIC_ESR); | ||
| 1115 | |||
| 1116 | /* enables sending errors */ | ||
| 1117 | value = ERROR_APIC_VECTOR; | ||
| 1118 | apic_write(APIC_LVTERR, value); | ||
| 1119 | |||
| 1120 | /* | ||
| 1121 | * spec says clear errors after enabling vector. | ||
| 1122 | */ | ||
| 1123 | if (maxlvt > 3) | ||
| 1124 | apic_write(APIC_ESR, 0); | ||
| 1125 | value = apic_read(APIC_ESR); | ||
| 1126 | if (value != oldvalue) | ||
| 1127 | apic_printk(APIC_VERBOSE, "ESR value before enabling " | ||
| 1128 | "vector: 0x%08x after: 0x%08x\n", | ||
| 1129 | oldvalue, value); | ||
| 952 | } | 1130 | } |
| 953 | 1131 | ||
| 954 | 1132 | ||
| @@ -957,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void) | |||
| 957 | */ | 1135 | */ |
| 958 | void __cpuinit setup_local_APIC(void) | 1136 | void __cpuinit setup_local_APIC(void) |
| 959 | { | 1137 | { |
| 960 | unsigned long value, integrated; | 1138 | unsigned int value; |
| 961 | int i, j; | 1139 | int i, j; |
| 962 | 1140 | ||
| 1141 | #ifdef CONFIG_X86_32 | ||
| 963 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ | 1142 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ |
| 964 | if (esr_disable) { | 1143 | if (lapic_is_integrated() && esr_disable) { |
| 965 | apic_write(APIC_ESR, 0); | 1144 | apic_write(APIC_ESR, 0); |
| 966 | apic_write(APIC_ESR, 0); | 1145 | apic_write(APIC_ESR, 0); |
| 967 | apic_write(APIC_ESR, 0); | 1146 | apic_write(APIC_ESR, 0); |
| 968 | apic_write(APIC_ESR, 0); | 1147 | apic_write(APIC_ESR, 0); |
| 969 | } | 1148 | } |
| 1149 | #endif | ||
| 970 | 1150 | ||
| 971 | integrated = lapic_is_integrated(); | 1151 | preempt_disable(); |
| 972 | 1152 | ||
| 973 | /* | 1153 | /* |
| 974 | * Double-check whether this APIC is really registered. | 1154 | * Double-check whether this APIC is really registered. |
| 1155 | * This is meaningless in clustered apic mode, so we skip it. | ||
| 975 | */ | 1156 | */ |
| 976 | if (!apic_id_registered()) | 1157 | if (!apic_id_registered()) |
| 977 | WARN_ON_ONCE(1); | 1158 | BUG(); |
| 978 | 1159 | ||
| 979 | /* | 1160 | /* |
| 980 | * Intel recommends to set DFR, LDR and TPR before enabling | 1161 | * Intel recommends to set DFR, LDR and TPR before enabling |
| @@ -989,7 +1170,7 @@ void __cpuinit setup_local_APIC(void) | |||
| 989 | */ | 1170 | */ |
| 990 | value = apic_read(APIC_TASKPRI); | 1171 | value = apic_read(APIC_TASKPRI); |
| 991 | value &= ~APIC_TPRI_MASK; | 1172 | value &= ~APIC_TPRI_MASK; |
| 992 | apic_write_around(APIC_TASKPRI, value); | 1173 | apic_write(APIC_TASKPRI, value); |
| 993 | 1174 | ||
| 994 | /* | 1175 | /* |
| 995 | * After a crash, we no longer service the interrupts and a pending | 1176 | * After a crash, we no longer service the interrupts and a pending |
| @@ -1020,6 +1201,7 @@ void __cpuinit setup_local_APIC(void) | |||
| 1020 | */ | 1201 | */ |
| 1021 | value |= APIC_SPIV_APIC_ENABLED; | 1202 | value |= APIC_SPIV_APIC_ENABLED; |
| 1022 | 1203 | ||
| 1204 | #ifdef CONFIG_X86_32 | ||
| 1023 | /* | 1205 | /* |
| 1024 | * Some unknown Intel IO/APIC (or APIC) errata is biting us with | 1206 | * Some unknown Intel IO/APIC (or APIC) errata is biting us with |
| 1025 | * certain networking cards. If high frequency interrupts are | 1207 | * certain networking cards. If high frequency interrupts are |
| @@ -1040,14 +1222,19 @@ void __cpuinit setup_local_APIC(void) | |||
| 1040 | * See also the comment in end_level_ioapic_irq(). --macro | 1222 | * See also the comment in end_level_ioapic_irq(). --macro |
| 1041 | */ | 1223 | */ |
| 1042 | 1224 | ||
| 1043 | /* Enable focus processor (bit==0) */ | 1225 | /* |
| 1226 | * - enable focus processor (bit==0) | ||
| 1227 | * - 64bit mode always use processor focus | ||
| 1228 | * so no need to set it | ||
| 1229 | */ | ||
| 1044 | value &= ~APIC_SPIV_FOCUS_DISABLED; | 1230 | value &= ~APIC_SPIV_FOCUS_DISABLED; |
| 1231 | #endif | ||
| 1045 | 1232 | ||
| 1046 | /* | 1233 | /* |
| 1047 | * Set spurious IRQ vector | 1234 | * Set spurious IRQ vector |
| 1048 | */ | 1235 | */ |
| 1049 | value |= SPURIOUS_APIC_VECTOR; | 1236 | value |= SPURIOUS_APIC_VECTOR; |
| 1050 | apic_write_around(APIC_SPIV, value); | 1237 | apic_write(APIC_SPIV, value); |
| 1051 | 1238 | ||
| 1052 | /* | 1239 | /* |
| 1053 | * Set up LVT0, LVT1: | 1240 | * Set up LVT0, LVT1: |
| @@ -1069,7 +1256,7 @@ void __cpuinit setup_local_APIC(void) | |||
| 1069 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", | 1256 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", |
| 1070 | smp_processor_id()); | 1257 | smp_processor_id()); |
| 1071 | } | 1258 | } |
| 1072 | apic_write_around(APIC_LVT0, value); | 1259 | apic_write(APIC_LVT0, value); |
| 1073 | 1260 | ||
| 1074 | /* | 1261 | /* |
| 1075 | * only the BP should see the LINT1 NMI signal, obviously. | 1262 | * only the BP should see the LINT1 NMI signal, obviously. |
| @@ -1078,25 +1265,178 @@ void __cpuinit setup_local_APIC(void) | |||
| 1078 | value = APIC_DM_NMI; | 1265 | value = APIC_DM_NMI; |
| 1079 | else | 1266 | else |
| 1080 | value = APIC_DM_NMI | APIC_LVT_MASKED; | 1267 | value = APIC_DM_NMI | APIC_LVT_MASKED; |
| 1081 | if (!integrated) /* 82489DX */ | 1268 | if (!lapic_is_integrated()) /* 82489DX */ |
| 1082 | value |= APIC_LVT_LEVEL_TRIGGER; | 1269 | value |= APIC_LVT_LEVEL_TRIGGER; |
| 1083 | apic_write_around(APIC_LVT1, value); | 1270 | apic_write(APIC_LVT1, value); |
| 1271 | |||
| 1272 | preempt_enable(); | ||
| 1084 | } | 1273 | } |
| 1085 | 1274 | ||
| 1086 | void __cpuinit end_local_APIC_setup(void) | 1275 | void __cpuinit end_local_APIC_setup(void) |
| 1087 | { | 1276 | { |
| 1088 | unsigned long value; | ||
| 1089 | |||
| 1090 | lapic_setup_esr(); | 1277 | lapic_setup_esr(); |
| 1091 | /* Disable the local apic timer */ | 1278 | |
| 1092 | value = apic_read(APIC_LVTT); | 1279 | #ifdef CONFIG_X86_32 |
| 1093 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 1280 | { |
| 1094 | apic_write_around(APIC_LVTT, value); | 1281 | unsigned int value; |
| 1282 | /* Disable the local apic timer */ | ||
| 1283 | value = apic_read(APIC_LVTT); | ||
| 1284 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
| 1285 | apic_write(APIC_LVTT, value); | ||
| 1286 | } | ||
| 1287 | #endif | ||
| 1095 | 1288 | ||
| 1096 | setup_apic_nmi_watchdog(NULL); | 1289 | setup_apic_nmi_watchdog(NULL); |
| 1097 | apic_pm_activate(); | 1290 | apic_pm_activate(); |
| 1098 | } | 1291 | } |
| 1099 | 1292 | ||
| 1293 | #ifdef HAVE_X2APIC | ||
| 1294 | void check_x2apic(void) | ||
| 1295 | { | ||
| 1296 | int msr, msr2; | ||
| 1297 | |||
| 1298 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | ||
| 1299 | |||
| 1300 | if (msr & X2APIC_ENABLE) { | ||
| 1301 | printk("x2apic enabled by BIOS, switching to x2apic ops\n"); | ||
| 1302 | x2apic_preenabled = x2apic = 1; | ||
| 1303 | apic_ops = &x2apic_ops; | ||
| 1304 | } | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | void enable_x2apic(void) | ||
| 1308 | { | ||
| 1309 | int msr, msr2; | ||
| 1310 | |||
| 1311 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | ||
| 1312 | if (!(msr & X2APIC_ENABLE)) { | ||
| 1313 | printk("Enabling x2apic\n"); | ||
| 1314 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | ||
| 1315 | } | ||
| 1316 | } | ||
| 1317 | |||
| 1318 | void enable_IR_x2apic(void) | ||
| 1319 | { | ||
| 1320 | #ifdef CONFIG_INTR_REMAP | ||
| 1321 | int ret; | ||
| 1322 | unsigned long flags; | ||
| 1323 | |||
| 1324 | if (!cpu_has_x2apic) | ||
| 1325 | return; | ||
| 1326 | |||
| 1327 | if (!x2apic_preenabled && disable_x2apic) { | ||
| 1328 | printk(KERN_INFO | ||
| 1329 | "Skipped enabling x2apic and Interrupt-remapping " | ||
| 1330 | "because of nox2apic\n"); | ||
| 1331 | return; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | if (x2apic_preenabled && disable_x2apic) | ||
| 1335 | panic("Bios already enabled x2apic, can't enforce nox2apic"); | ||
| 1336 | |||
| 1337 | if (!x2apic_preenabled && skip_ioapic_setup) { | ||
| 1338 | printk(KERN_INFO | ||
| 1339 | "Skipped enabling x2apic and Interrupt-remapping " | ||
| 1340 | "because of skipping io-apic setup\n"); | ||
| 1341 | return; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | ret = dmar_table_init(); | ||
| 1345 | if (ret) { | ||
| 1346 | printk(KERN_INFO | ||
| 1347 | "dmar_table_init() failed with %d:\n", ret); | ||
| 1348 | |||
| 1349 | if (x2apic_preenabled) | ||
| 1350 | panic("x2apic enabled by bios. But IR enabling failed"); | ||
| 1351 | else | ||
| 1352 | printk(KERN_INFO | ||
| 1353 | "Not enabling x2apic,Intr-remapping\n"); | ||
| 1354 | return; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | local_irq_save(flags); | ||
| 1358 | mask_8259A(); | ||
| 1359 | |||
| 1360 | ret = save_mask_IO_APIC_setup(); | ||
| 1361 | if (ret) { | ||
| 1362 | printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); | ||
| 1363 | goto end; | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | ret = enable_intr_remapping(1); | ||
| 1367 | |||
| 1368 | if (ret && x2apic_preenabled) { | ||
| 1369 | local_irq_restore(flags); | ||
| 1370 | panic("x2apic enabled by bios. But IR enabling failed"); | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (ret) | ||
| 1374 | goto end_restore; | ||
| 1375 | |||
| 1376 | if (!x2apic) { | ||
| 1377 | x2apic = 1; | ||
| 1378 | apic_ops = &x2apic_ops; | ||
| 1379 | enable_x2apic(); | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | end_restore: | ||
| 1383 | if (ret) | ||
| 1384 | /* | ||
| 1385 | * IR enabling failed | ||
| 1386 | */ | ||
| 1387 | restore_IO_APIC_setup(); | ||
| 1388 | else | ||
| 1389 | reinit_intr_remapped_IO_APIC(x2apic_preenabled); | ||
| 1390 | |||
| 1391 | end: | ||
| 1392 | unmask_8259A(); | ||
| 1393 | local_irq_restore(flags); | ||
| 1394 | |||
| 1395 | if (!ret) { | ||
| 1396 | if (!x2apic_preenabled) | ||
| 1397 | printk(KERN_INFO | ||
| 1398 | "Enabled x2apic and interrupt-remapping\n"); | ||
| 1399 | else | ||
| 1400 | printk(KERN_INFO | ||
| 1401 | "Enabled Interrupt-remapping\n"); | ||
| 1402 | } else | ||
| 1403 | printk(KERN_ERR | ||
| 1404 | "Failed to enable Interrupt-remapping and x2apic\n"); | ||
| 1405 | #else | ||
| 1406 | if (!cpu_has_x2apic) | ||
| 1407 | return; | ||
| 1408 | |||
| 1409 | if (x2apic_preenabled) | ||
| 1410 | panic("x2apic enabled prior OS handover," | ||
| 1411 | " enable CONFIG_INTR_REMAP"); | ||
| 1412 | |||
| 1413 | printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " | ||
| 1414 | " and x2apic\n"); | ||
| 1415 | #endif | ||
| 1416 | |||
| 1417 | return; | ||
| 1418 | } | ||
| 1419 | #endif /* HAVE_X2APIC */ | ||
| 1420 | |||
| 1421 | #ifdef CONFIG_X86_64 | ||
| 1422 | /* | ||
| 1423 | * Detect and enable local APICs on non-SMP boards. | ||
| 1424 | * Original code written by Keir Fraser. | ||
| 1425 | * On AMD64 we trust the BIOS - if it says no APIC it is likely | ||
| 1426 | * not correctly set up (usually the APIC timer won't work etc.) | ||
| 1427 | */ | ||
| 1428 | static int __init detect_init_APIC(void) | ||
| 1429 | { | ||
| 1430 | if (!cpu_has_apic) { | ||
| 1431 | printk(KERN_INFO "No local APIC present\n"); | ||
| 1432 | return -1; | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
| 1436 | boot_cpu_physical_apicid = 0; | ||
| 1437 | return 0; | ||
| 1438 | } | ||
| 1439 | #else | ||
| 1100 | /* | 1440 | /* |
| 1101 | * Detect and initialize APIC | 1441 | * Detect and initialize APIC |
| 1102 | */ | 1442 | */ |
| @@ -1175,12 +1515,46 @@ no_apic: | |||
| 1175 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); | 1515 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); |
| 1176 | return -1; | 1516 | return -1; |
| 1177 | } | 1517 | } |
| 1518 | #endif | ||
| 1519 | |||
| 1520 | #ifdef CONFIG_X86_64 | ||
| 1521 | void __init early_init_lapic_mapping(void) | ||
| 1522 | { | ||
| 1523 | unsigned long phys_addr; | ||
| 1524 | |||
| 1525 | /* | ||
| 1526 | * If no local APIC can be found then go out | ||
| 1527 | * : it means there is no mpatable and MADT | ||
| 1528 | */ | ||
| 1529 | if (!smp_found_config) | ||
| 1530 | return; | ||
| 1531 | |||
| 1532 | phys_addr = mp_lapic_addr; | ||
| 1533 | |||
| 1534 | set_fixmap_nocache(FIX_APIC_BASE, phys_addr); | ||
| 1535 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | ||
| 1536 | APIC_BASE, phys_addr); | ||
| 1537 | |||
| 1538 | /* | ||
| 1539 | * Fetch the APIC ID of the BSP in case we have a | ||
| 1540 | * default configuration (or the MP table is broken). | ||
| 1541 | */ | ||
| 1542 | boot_cpu_physical_apicid = read_apic_id(); | ||
| 1543 | } | ||
| 1544 | #endif | ||
| 1178 | 1545 | ||
| 1179 | /** | 1546 | /** |
| 1180 | * init_apic_mappings - initialize APIC mappings | 1547 | * init_apic_mappings - initialize APIC mappings |
| 1181 | */ | 1548 | */ |
| 1182 | void __init init_apic_mappings(void) | 1549 | void __init init_apic_mappings(void) |
| 1183 | { | 1550 | { |
| 1551 | #ifdef HAVE_X2APIC | ||
| 1552 | if (x2apic) { | ||
| 1553 | boot_cpu_physical_apicid = read_apic_id(); | ||
| 1554 | return; | ||
| 1555 | } | ||
| 1556 | #endif | ||
| 1557 | |||
| 1184 | /* | 1558 | /* |
| 1185 | * If no local APIC can be found then set up a fake all | 1559 | * If no local APIC can be found then set up a fake all |
| 1186 | * zeroes page to simulate the local APIC and another | 1560 | * zeroes page to simulate the local APIC and another |
| @@ -1193,30 +1567,36 @@ void __init init_apic_mappings(void) | |||
| 1193 | apic_phys = mp_lapic_addr; | 1567 | apic_phys = mp_lapic_addr; |
| 1194 | 1568 | ||
| 1195 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | 1569 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); |
| 1196 | printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, | 1570 | apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", |
| 1197 | apic_phys); | 1571 | APIC_BASE, apic_phys); |
| 1198 | 1572 | ||
| 1199 | /* | 1573 | /* |
| 1200 | * Fetch the APIC ID of the BSP in case we have a | 1574 | * Fetch the APIC ID of the BSP in case we have a |
| 1201 | * default configuration (or the MP table is broken). | 1575 | * default configuration (or the MP table is broken). |
| 1202 | */ | 1576 | */ |
| 1203 | if (boot_cpu_physical_apicid == -1U) | 1577 | if (boot_cpu_physical_apicid == -1U) |
| 1204 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); | 1578 | boot_cpu_physical_apicid = read_apic_id(); |
| 1205 | |||
| 1206 | } | 1579 | } |
| 1207 | 1580 | ||
| 1208 | /* | 1581 | /* |
| 1209 | * This initializes the IO-APIC and APIC hardware if this is | 1582 | * This initializes the IO-APIC and APIC hardware if this is |
| 1210 | * a UP kernel. | 1583 | * a UP kernel. |
| 1211 | */ | 1584 | */ |
| 1212 | |||
| 1213 | int apic_version[MAX_APICS]; | 1585 | int apic_version[MAX_APICS]; |
| 1214 | 1586 | ||
| 1215 | int __init APIC_init_uniprocessor(void) | 1587 | int __init APIC_init_uniprocessor(void) |
| 1216 | { | 1588 | { |
| 1217 | if (disable_apic) | 1589 | #ifdef CONFIG_X86_64 |
| 1218 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1590 | if (disable_apic) { |
| 1219 | 1591 | printk(KERN_INFO "Apic disabled\n"); | |
| 1592 | return -1; | ||
| 1593 | } | ||
| 1594 | if (!cpu_has_apic) { | ||
| 1595 | disable_apic = 1; | ||
| 1596 | printk(KERN_INFO "Apic disabled by BIOS\n"); | ||
| 1597 | return -1; | ||
| 1598 | } | ||
| 1599 | #else | ||
| 1220 | if (!smp_found_config && !cpu_has_apic) | 1600 | if (!smp_found_config && !cpu_has_apic) |
| 1221 | return -1; | 1601 | return -1; |
| 1222 | 1602 | ||
| @@ -1225,39 +1605,68 @@ int __init APIC_init_uniprocessor(void) | |||
| 1225 | */ | 1605 | */ |
| 1226 | if (!cpu_has_apic && | 1606 | if (!cpu_has_apic && |
| 1227 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1607 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
| 1228 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 1608 | printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", |
| 1229 | boot_cpu_physical_apicid); | 1609 | boot_cpu_physical_apicid); |
| 1230 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1610 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
| 1231 | return -1; | 1611 | return -1; |
| 1232 | } | 1612 | } |
| 1613 | #endif | ||
| 1233 | 1614 | ||
| 1234 | verify_local_APIC(); | 1615 | #ifdef HAVE_X2APIC |
| 1616 | enable_IR_x2apic(); | ||
| 1617 | #endif | ||
| 1618 | #ifdef CONFIG_X86_64 | ||
| 1619 | setup_apic_routing(); | ||
| 1620 | #endif | ||
| 1235 | 1621 | ||
| 1622 | verify_local_APIC(); | ||
| 1236 | connect_bsp_APIC(); | 1623 | connect_bsp_APIC(); |
| 1237 | 1624 | ||
| 1625 | #ifdef CONFIG_X86_64 | ||
| 1626 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); | ||
| 1627 | #else | ||
| 1238 | /* | 1628 | /* |
| 1239 | * Hack: In case of kdump, after a crash, kernel might be booting | 1629 | * Hack: In case of kdump, after a crash, kernel might be booting |
| 1240 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | 1630 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid |
| 1241 | * might be zero if read from MP tables. Get it from LAPIC. | 1631 | * might be zero if read from MP tables. Get it from LAPIC. |
| 1242 | */ | 1632 | */ |
| 1243 | #ifdef CONFIG_CRASH_DUMP | 1633 | # ifdef CONFIG_CRASH_DUMP |
| 1244 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); | 1634 | boot_cpu_physical_apicid = read_apic_id(); |
| 1635 | # endif | ||
| 1245 | #endif | 1636 | #endif |
| 1246 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | 1637 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); |
| 1247 | |||
| 1248 | setup_local_APIC(); | 1638 | setup_local_APIC(); |
| 1249 | 1639 | ||
| 1640 | #ifdef CONFIG_X86_64 | ||
| 1641 | /* | ||
| 1642 | * Now enable IO-APICs, actually call clear_IO_APIC | ||
| 1643 | * We need clear_IO_APIC before enabling vector on BP | ||
| 1644 | */ | ||
| 1645 | if (!skip_ioapic_setup && nr_ioapics) | ||
| 1646 | enable_IO_APIC(); | ||
| 1647 | #endif | ||
| 1648 | |||
| 1250 | #ifdef CONFIG_X86_IO_APIC | 1649 | #ifdef CONFIG_X86_IO_APIC |
| 1251 | if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) | 1650 | if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) |
| 1252 | #endif | 1651 | #endif |
| 1253 | localise_nmi_watchdog(); | 1652 | localise_nmi_watchdog(); |
| 1254 | end_local_APIC_setup(); | 1653 | end_local_APIC_setup(); |
| 1654 | |||
| 1255 | #ifdef CONFIG_X86_IO_APIC | 1655 | #ifdef CONFIG_X86_IO_APIC |
| 1256 | if (smp_found_config) | 1656 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) |
| 1257 | if (!skip_ioapic_setup && nr_ioapics) | 1657 | setup_IO_APIC(); |
| 1258 | setup_IO_APIC(); | 1658 | # ifdef CONFIG_X86_64 |
| 1659 | else | ||
| 1660 | nr_ioapics = 0; | ||
| 1661 | # endif | ||
| 1259 | #endif | 1662 | #endif |
| 1663 | |||
| 1664 | #ifdef CONFIG_X86_64 | ||
| 1665 | setup_boot_APIC_clock(); | ||
| 1666 | check_nmi_watchdog(); | ||
| 1667 | #else | ||
| 1260 | setup_boot_clock(); | 1668 | setup_boot_clock(); |
| 1669 | #endif | ||
| 1261 | 1670 | ||
| 1262 | return 0; | 1671 | return 0; |
| 1263 | } | 1672 | } |
| @@ -1271,8 +1680,11 @@ int __init APIC_init_uniprocessor(void) | |||
| 1271 | */ | 1680 | */ |
| 1272 | void smp_spurious_interrupt(struct pt_regs *regs) | 1681 | void smp_spurious_interrupt(struct pt_regs *regs) |
| 1273 | { | 1682 | { |
| 1274 | unsigned long v; | 1683 | u32 v; |
| 1275 | 1684 | ||
| 1685 | #ifdef CONFIG_X86_64 | ||
| 1686 | exit_idle(); | ||
| 1687 | #endif | ||
| 1276 | irq_enter(); | 1688 | irq_enter(); |
| 1277 | /* | 1689 | /* |
| 1278 | * Check if this really is a spurious interrupt and ACK it | 1690 | * Check if this really is a spurious interrupt and ACK it |
| @@ -1283,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
| 1283 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | 1695 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) |
| 1284 | ack_APIC_irq(); | 1696 | ack_APIC_irq(); |
| 1285 | 1697 | ||
| 1698 | #ifdef CONFIG_X86_64 | ||
| 1699 | add_pda(irq_spurious_count, 1); | ||
| 1700 | #else | ||
| 1286 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | 1701 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ |
| 1287 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " | 1702 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " |
| 1288 | "should never happen.\n", smp_processor_id()); | 1703 | "should never happen.\n", smp_processor_id()); |
| 1289 | __get_cpu_var(irq_stat).irq_spurious_count++; | 1704 | __get_cpu_var(irq_stat).irq_spurious_count++; |
| 1705 | #endif | ||
| 1290 | irq_exit(); | 1706 | irq_exit(); |
| 1291 | } | 1707 | } |
| 1292 | 1708 | ||
| @@ -1295,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
| 1295 | */ | 1711 | */ |
| 1296 | void smp_error_interrupt(struct pt_regs *regs) | 1712 | void smp_error_interrupt(struct pt_regs *regs) |
| 1297 | { | 1713 | { |
| 1298 | unsigned long v, v1; | 1714 | u32 v, v1; |
| 1299 | 1715 | ||
| 1716 | #ifdef CONFIG_X86_64 | ||
| 1717 | exit_idle(); | ||
| 1718 | #endif | ||
| 1300 | irq_enter(); | 1719 | irq_enter(); |
| 1301 | /* First tickle the hardware, only then report what went on. -- REW */ | 1720 | /* First tickle the hardware, only then report what went on. -- REW */ |
| 1302 | v = apic_read(APIC_ESR); | 1721 | v = apic_read(APIC_ESR); |
| @@ -1315,64 +1734,17 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
| 1315 | 6: Received illegal vector | 1734 | 6: Received illegal vector |
| 1316 | 7: Illegal register address | 1735 | 7: Illegal register address |
| 1317 | */ | 1736 | */ |
| 1318 | printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", | 1737 | printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", |
| 1319 | smp_processor_id(), v , v1); | 1738 | smp_processor_id(), v , v1); |
| 1320 | irq_exit(); | 1739 | irq_exit(); |
| 1321 | } | 1740 | } |
| 1322 | 1741 | ||
| 1323 | #ifdef CONFIG_SMP | ||
| 1324 | void __init smp_intr_init(void) | ||
| 1325 | { | ||
| 1326 | /* | ||
| 1327 | * IRQ0 must be given a fixed assignment and initialized, | ||
| 1328 | * because it's used before the IO-APIC is set up. | ||
| 1329 | */ | ||
| 1330 | set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); | ||
| 1331 | |||
| 1332 | /* | ||
| 1333 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | ||
| 1334 | * IPI, driven by wakeup. | ||
| 1335 | */ | ||
| 1336 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | ||
| 1337 | |||
| 1338 | /* IPI for invalidation */ | ||
| 1339 | alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); | ||
| 1340 | |||
| 1341 | /* IPI for generic function call */ | ||
| 1342 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | ||
| 1343 | |||
| 1344 | /* IPI for single call function */ | ||
| 1345 | set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | ||
| 1346 | call_function_single_interrupt); | ||
| 1347 | } | ||
| 1348 | #endif | ||
| 1349 | |||
| 1350 | /* | ||
| 1351 | * Initialize APIC interrupts | ||
| 1352 | */ | ||
| 1353 | void __init apic_intr_init(void) | ||
| 1354 | { | ||
| 1355 | #ifdef CONFIG_SMP | ||
| 1356 | smp_intr_init(); | ||
| 1357 | #endif | ||
| 1358 | /* self generated IPI for local APIC timer */ | ||
| 1359 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
| 1360 | |||
| 1361 | /* IPI vectors for APIC spurious and error interrupts */ | ||
| 1362 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | ||
| 1363 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
| 1364 | |||
| 1365 | /* thermal monitor LVT interrupt */ | ||
| 1366 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
| 1367 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
| 1368 | #endif | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | /** | 1742 | /** |
| 1372 | * connect_bsp_APIC - attach the APIC to the interrupt system | 1743 | * connect_bsp_APIC - attach the APIC to the interrupt system |
| 1373 | */ | 1744 | */ |
| 1374 | void __init connect_bsp_APIC(void) | 1745 | void __init connect_bsp_APIC(void) |
| 1375 | { | 1746 | { |
| 1747 | #ifdef CONFIG_X86_32 | ||
| 1376 | if (pic_mode) { | 1748 | if (pic_mode) { |
| 1377 | /* | 1749 | /* |
| 1378 | * Do not trust the local APIC being empty at bootup. | 1750 | * Do not trust the local APIC being empty at bootup. |
| @@ -1387,6 +1759,7 @@ void __init connect_bsp_APIC(void) | |||
| 1387 | outb(0x70, 0x22); | 1759 | outb(0x70, 0x22); |
| 1388 | outb(0x01, 0x23); | 1760 | outb(0x01, 0x23); |
| 1389 | } | 1761 | } |
| 1762 | #endif | ||
| 1390 | enable_apic_mode(); | 1763 | enable_apic_mode(); |
| 1391 | } | 1764 | } |
| 1392 | 1765 | ||
| @@ -1399,6 +1772,9 @@ void __init connect_bsp_APIC(void) | |||
| 1399 | */ | 1772 | */ |
| 1400 | void disconnect_bsp_APIC(int virt_wire_setup) | 1773 | void disconnect_bsp_APIC(int virt_wire_setup) |
| 1401 | { | 1774 | { |
| 1775 | unsigned int value; | ||
| 1776 | |||
| 1777 | #ifdef CONFIG_X86_32 | ||
| 1402 | if (pic_mode) { | 1778 | if (pic_mode) { |
| 1403 | /* | 1779 | /* |
| 1404 | * Put the board back into PIC mode (has an effect only on | 1780 | * Put the board back into PIC mode (has an effect only on |
| @@ -1410,56 +1786,53 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
| 1410 | "entering PIC mode.\n"); | 1786 | "entering PIC mode.\n"); |
| 1411 | outb(0x70, 0x22); | 1787 | outb(0x70, 0x22); |
| 1412 | outb(0x00, 0x23); | 1788 | outb(0x00, 0x23); |
| 1413 | } else { | 1789 | return; |
| 1414 | /* Go back to Virtual Wire compatibility mode */ | 1790 | } |
| 1415 | unsigned long value; | 1791 | #endif |
| 1416 | 1792 | ||
| 1417 | /* For the spurious interrupt use vector F, and enable it */ | 1793 | /* Go back to Virtual Wire compatibility mode */ |
| 1418 | value = apic_read(APIC_SPIV); | 1794 | |
| 1419 | value &= ~APIC_VECTOR_MASK; | 1795 | /* For the spurious interrupt use vector F, and enable it */ |
| 1420 | value |= APIC_SPIV_APIC_ENABLED; | 1796 | value = apic_read(APIC_SPIV); |
| 1421 | value |= 0xf; | 1797 | value &= ~APIC_VECTOR_MASK; |
| 1422 | apic_write_around(APIC_SPIV, value); | 1798 | value |= APIC_SPIV_APIC_ENABLED; |
| 1423 | 1799 | value |= 0xf; | |
| 1424 | if (!virt_wire_setup) { | 1800 | apic_write(APIC_SPIV, value); |
| 1425 | /* | ||
| 1426 | * For LVT0 make it edge triggered, active high, | ||
| 1427 | * external and enabled | ||
| 1428 | */ | ||
| 1429 | value = apic_read(APIC_LVT0); | ||
| 1430 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1431 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
| 1432 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
| 1433 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
| 1434 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
| 1435 | apic_write_around(APIC_LVT0, value); | ||
| 1436 | } else { | ||
| 1437 | /* Disable LVT0 */ | ||
| 1438 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | ||
| 1439 | } | ||
| 1440 | 1801 | ||
| 1802 | if (!virt_wire_setup) { | ||
| 1441 | /* | 1803 | /* |
| 1442 | * For LVT1 make it edge triggered, active high, nmi and | 1804 | * For LVT0 make it edge triggered, active high, |
| 1443 | * enabled | 1805 | * external and enabled |
| 1444 | */ | 1806 | */ |
| 1445 | value = apic_read(APIC_LVT1); | 1807 | value = apic_read(APIC_LVT0); |
| 1446 | value &= ~( | 1808 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | |
| 1447 | APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1448 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | 1809 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | |
| 1449 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 1810 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
| 1450 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 1811 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
| 1451 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | 1812 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); |
| 1452 | apic_write_around(APIC_LVT1, value); | 1813 | apic_write(APIC_LVT0, value); |
| 1814 | } else { | ||
| 1815 | /* Disable LVT0 */ | ||
| 1816 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
| 1453 | } | 1817 | } |
| 1454 | } | ||
| 1455 | 1818 | ||
| 1456 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | 1819 | /* |
| 1820 | * For LVT1 make it edge triggered, active high, | ||
| 1821 | * nmi and enabled | ||
| 1822 | */ | ||
| 1823 | value = apic_read(APIC_LVT1); | ||
| 1824 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1825 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
| 1826 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
| 1827 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
| 1828 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
| 1829 | apic_write(APIC_LVT1, value); | ||
| 1830 | } | ||
| 1457 | 1831 | ||
| 1458 | void __cpuinit generic_processor_info(int apicid, int version) | 1832 | void __cpuinit generic_processor_info(int apicid, int version) |
| 1459 | { | 1833 | { |
| 1460 | int cpu; | 1834 | int cpu; |
| 1461 | cpumask_t tmp_map; | 1835 | cpumask_t tmp_map; |
| 1462 | physid_mask_t phys_cpu; | ||
| 1463 | 1836 | ||
| 1464 | /* | 1837 | /* |
| 1465 | * Validate version | 1838 | * Validate version |
| @@ -1472,36 +1845,29 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1472 | } | 1845 | } |
| 1473 | apic_version[apicid] = version; | 1846 | apic_version[apicid] = version; |
| 1474 | 1847 | ||
| 1475 | phys_cpu = apicid_to_cpu_present(apicid); | ||
| 1476 | physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); | ||
| 1477 | |||
| 1478 | if (num_processors >= NR_CPUS) { | 1848 | if (num_processors >= NR_CPUS) { |
| 1479 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 1849 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." |
| 1480 | " Processor ignored.\n", NR_CPUS); | 1850 | " Processor ignored.\n", NR_CPUS); |
| 1481 | return; | 1851 | return; |
| 1482 | } | 1852 | } |
| 1483 | 1853 | ||
| 1484 | if (num_processors >= maxcpus) { | ||
| 1485 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
| 1486 | " Processor ignored.\n", maxcpus); | ||
| 1487 | return; | ||
| 1488 | } | ||
| 1489 | |||
| 1490 | num_processors++; | 1854 | num_processors++; |
| 1491 | cpus_complement(tmp_map, cpu_present_map); | 1855 | cpus_complement(tmp_map, cpu_present_map); |
| 1492 | cpu = first_cpu(tmp_map); | 1856 | cpu = first_cpu(tmp_map); |
| 1493 | 1857 | ||
| 1494 | if (apicid == boot_cpu_physical_apicid) | 1858 | physid_set(apicid, phys_cpu_present_map); |
| 1859 | if (apicid == boot_cpu_physical_apicid) { | ||
| 1495 | /* | 1860 | /* |
| 1496 | * x86_bios_cpu_apicid is required to have processors listed | 1861 | * x86_bios_cpu_apicid is required to have processors listed |
| 1497 | * in same order as logical cpu numbers. Hence the first | 1862 | * in same order as logical cpu numbers. Hence the first |
| 1498 | * entry is BSP, and so on. | 1863 | * entry is BSP, and so on. |
| 1499 | */ | 1864 | */ |
| 1500 | cpu = 0; | 1865 | cpu = 0; |
| 1501 | 1866 | } | |
| 1502 | if (apicid > max_physical_apicid) | 1867 | if (apicid > max_physical_apicid) |
| 1503 | max_physical_apicid = apicid; | 1868 | max_physical_apicid = apicid; |
| 1504 | 1869 | ||
| 1870 | #ifdef CONFIG_X86_32 | ||
| 1505 | /* | 1871 | /* |
| 1506 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y | 1872 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y |
| 1507 | * but we need to work other dependencies like SMP_SUSPEND etc | 1873 | * but we need to work other dependencies like SMP_SUSPEND etc |
| @@ -1521,7 +1887,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1521 | def_to_bigsmp = 1; | 1887 | def_to_bigsmp = 1; |
| 1522 | } | 1888 | } |
| 1523 | } | 1889 | } |
| 1524 | #ifdef CONFIG_SMP | 1890 | #endif |
| 1891 | |||
| 1892 | #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) | ||
| 1525 | /* are we being called early in kernel startup? */ | 1893 | /* are we being called early in kernel startup? */ |
| 1526 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { | 1894 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { |
| 1527 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | 1895 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
| @@ -1534,16 +1902,29 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
| 1534 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | 1902 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
| 1535 | } | 1903 | } |
| 1536 | #endif | 1904 | #endif |
| 1905 | |||
| 1537 | cpu_set(cpu, cpu_possible_map); | 1906 | cpu_set(cpu, cpu_possible_map); |
| 1538 | cpu_set(cpu, cpu_present_map); | 1907 | cpu_set(cpu, cpu_present_map); |
| 1539 | } | 1908 | } |
| 1540 | 1909 | ||
| 1910 | #ifdef CONFIG_X86_64 | ||
| 1911 | int hard_smp_processor_id(void) | ||
| 1912 | { | ||
| 1913 | return read_apic_id(); | ||
| 1914 | } | ||
| 1915 | #endif | ||
| 1916 | |||
| 1541 | /* | 1917 | /* |
| 1542 | * Power management | 1918 | * Power management |
| 1543 | */ | 1919 | */ |
| 1544 | #ifdef CONFIG_PM | 1920 | #ifdef CONFIG_PM |
| 1545 | 1921 | ||
| 1546 | static struct { | 1922 | static struct { |
| 1923 | /* | ||
| 1924 | * 'active' is true if the local APIC was enabled by us and | ||
| 1925 | * not the BIOS; this signifies that we are also responsible | ||
| 1926 | * for disabling it before entering apm/acpi suspend | ||
| 1927 | */ | ||
| 1547 | int active; | 1928 | int active; |
| 1548 | /* r/w apic fields */ | 1929 | /* r/w apic fields */ |
| 1549 | unsigned int apic_id; | 1930 | unsigned int apic_id; |
| @@ -1584,7 +1965,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) | |||
| 1584 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | 1965 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); |
| 1585 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | 1966 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); |
| 1586 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | 1967 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); |
| 1587 | #ifdef CONFIG_X86_MCE_P4THERMAL | 1968 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) |
| 1588 | if (maxlvt >= 5) | 1969 | if (maxlvt >= 5) |
| 1589 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | 1970 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); |
| 1590 | #endif | 1971 | #endif |
| @@ -1608,16 +1989,23 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1608 | 1989 | ||
| 1609 | local_irq_save(flags); | 1990 | local_irq_save(flags); |
| 1610 | 1991 | ||
| 1611 | /* | 1992 | #ifdef HAVE_X2APIC |
| 1612 | * Make sure the APICBASE points to the right address | 1993 | if (x2apic) |
| 1613 | * | 1994 | enable_x2apic(); |
| 1614 | * FIXME! This will be wrong if we ever support suspend on | 1995 | else |
| 1615 | * SMP! We'll need to do this as part of the CPU restore! | 1996 | #endif |
| 1616 | */ | 1997 | { |
| 1617 | rdmsr(MSR_IA32_APICBASE, l, h); | 1998 | /* |
| 1618 | l &= ~MSR_IA32_APICBASE_BASE; | 1999 | * Make sure the APICBASE points to the right address |
| 1619 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | 2000 | * |
| 1620 | wrmsr(MSR_IA32_APICBASE, l, h); | 2001 | * FIXME! This will be wrong if we ever support suspend on |
| 2002 | * SMP! We'll need to do this as part of the CPU restore! | ||
| 2003 | */ | ||
| 2004 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
| 2005 | l &= ~MSR_IA32_APICBASE_BASE; | ||
| 2006 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
| 2007 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
| 2008 | } | ||
| 1621 | 2009 | ||
| 1622 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | 2010 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); |
| 1623 | apic_write(APIC_ID, apic_pm_state.apic_id); | 2011 | apic_write(APIC_ID, apic_pm_state.apic_id); |
| @@ -1627,7 +2015,7 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1627 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | 2015 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); |
| 1628 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | 2016 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); |
| 1629 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | 2017 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); |
| 1630 | #ifdef CONFIG_X86_MCE_P4THERMAL | 2018 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) |
| 1631 | if (maxlvt >= 5) | 2019 | if (maxlvt >= 5) |
| 1632 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | 2020 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); |
| 1633 | #endif | 2021 | #endif |
| @@ -1641,7 +2029,9 @@ static int lapic_resume(struct sys_device *dev) | |||
| 1641 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | 2029 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); |
| 1642 | apic_write(APIC_ESR, 0); | 2030 | apic_write(APIC_ESR, 0); |
| 1643 | apic_read(APIC_ESR); | 2031 | apic_read(APIC_ESR); |
| 2032 | |||
| 1644 | local_irq_restore(flags); | 2033 | local_irq_restore(flags); |
| 2034 | |||
| 1645 | return 0; | 2035 | return 0; |
| 1646 | } | 2036 | } |
| 1647 | 2037 | ||
| @@ -1661,7 +2051,7 @@ static struct sys_device device_lapic = { | |||
| 1661 | .cls = &lapic_sysclass, | 2051 | .cls = &lapic_sysclass, |
| 1662 | }; | 2052 | }; |
| 1663 | 2053 | ||
| 1664 | static void __devinit apic_pm_activate(void) | 2054 | static void __cpuinit apic_pm_activate(void) |
| 1665 | { | 2055 | { |
| 1666 | apic_pm_state.active = 1; | 2056 | apic_pm_state.active = 1; |
| 1667 | } | 2057 | } |
| @@ -1687,30 +2077,101 @@ static void apic_pm_activate(void) { } | |||
| 1687 | 2077 | ||
| 1688 | #endif /* CONFIG_PM */ | 2078 | #endif /* CONFIG_PM */ |
| 1689 | 2079 | ||
| 2080 | #ifdef CONFIG_X86_64 | ||
| 1690 | /* | 2081 | /* |
| 1691 | * APIC command line parameters | 2082 | * apic_is_clustered_box() -- Check if we can expect good TSC |
| 2083 | * | ||
| 2084 | * Thus far, the major user of this is IBM's Summit2 series: | ||
| 2085 | * | ||
| 2086 | * Clustered boxes may have unsynced TSC problems if they are | ||
| 2087 | * multi-chassis. Use available data to take a good guess. | ||
| 2088 | * If in doubt, go HPET. | ||
| 1692 | */ | 2089 | */ |
| 1693 | static int __init parse_lapic(char *arg) | 2090 | __cpuinit int apic_is_clustered_box(void) |
| 1694 | { | 2091 | { |
| 1695 | force_enable_local_apic = 1; | 2092 | int i, clusters, zeros; |
| 1696 | return 0; | 2093 | unsigned id; |
| 2094 | u16 *bios_cpu_apicid; | ||
| 2095 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); | ||
| 2096 | |||
| 2097 | /* | ||
| 2098 | * there is not this kind of box with AMD CPU yet. | ||
| 2099 | * Some AMD box with quadcore cpu and 8 sockets apicid | ||
| 2100 | * will be [4, 0x23] or [8, 0x27] could be thought to | ||
| 2101 | * vsmp box still need checking... | ||
| 2102 | */ | ||
| 2103 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) | ||
| 2104 | return 0; | ||
| 2105 | |||
| 2106 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | ||
| 2107 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | ||
| 2108 | |||
| 2109 | for (i = 0; i < NR_CPUS; i++) { | ||
| 2110 | /* are we being called early in kernel startup? */ | ||
| 2111 | if (bios_cpu_apicid) { | ||
| 2112 | id = bios_cpu_apicid[i]; | ||
| 2113 | } | ||
| 2114 | else if (i < nr_cpu_ids) { | ||
| 2115 | if (cpu_present(i)) | ||
| 2116 | id = per_cpu(x86_bios_cpu_apicid, i); | ||
| 2117 | else | ||
| 2118 | continue; | ||
| 2119 | } | ||
| 2120 | else | ||
| 2121 | break; | ||
| 2122 | |||
| 2123 | if (id != BAD_APICID) | ||
| 2124 | __set_bit(APIC_CLUSTERID(id), clustermap); | ||
| 2125 | } | ||
| 2126 | |||
| 2127 | /* Problem: Partially populated chassis may not have CPUs in some of | ||
| 2128 | * the APIC clusters they have been allocated. Only present CPUs have | ||
| 2129 | * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. | ||
| 2130 | * Since clusters are allocated sequentially, count zeros only if | ||
| 2131 | * they are bounded by ones. | ||
| 2132 | */ | ||
| 2133 | clusters = 0; | ||
| 2134 | zeros = 0; | ||
| 2135 | for (i = 0; i < NUM_APIC_CLUSTERS; i++) { | ||
| 2136 | if (test_bit(i, clustermap)) { | ||
| 2137 | clusters += 1 + zeros; | ||
| 2138 | zeros = 0; | ||
| 2139 | } else | ||
| 2140 | ++zeros; | ||
| 2141 | } | ||
| 2142 | |||
| 2143 | /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are | ||
| 2144 | * not guaranteed to be synced between boards | ||
| 2145 | */ | ||
| 2146 | if (is_vsmp_box() && clusters > 1) | ||
| 2147 | return 1; | ||
| 2148 | |||
| 2149 | /* | ||
| 2150 | * If clusters > 2, then should be multi-chassis. | ||
| 2151 | * May have to revisit this when multi-core + hyperthreaded CPUs come | ||
| 2152 | * out, but AFAIK this will work even for them. | ||
| 2153 | */ | ||
| 2154 | return (clusters > 2); | ||
| 1697 | } | 2155 | } |
| 1698 | early_param("lapic", parse_lapic); | 2156 | #endif |
| 1699 | 2157 | ||
| 1700 | static int __init parse_nolapic(char *arg) | 2158 | /* |
| 2159 | * APIC command line parameters | ||
| 2160 | */ | ||
| 2161 | static int __init setup_disableapic(char *arg) | ||
| 1701 | { | 2162 | { |
| 1702 | disable_apic = 1; | 2163 | disable_apic = 1; |
| 1703 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 2164 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
| 1704 | return 0; | 2165 | return 0; |
| 1705 | } | 2166 | } |
| 1706 | early_param("nolapic", parse_nolapic); | 2167 | early_param("disableapic", setup_disableapic); |
| 1707 | 2168 | ||
| 1708 | static int __init parse_disable_lapic_timer(char *arg) | 2169 | /* same as disableapic, for compatibility */ |
| 2170 | static int __init setup_nolapic(char *arg) | ||
| 1709 | { | 2171 | { |
| 1710 | local_apic_timer_disabled = 1; | 2172 | return setup_disableapic(arg); |
| 1711 | return 0; | ||
| 1712 | } | 2173 | } |
| 1713 | early_param("nolapic_timer", parse_disable_lapic_timer); | 2174 | early_param("nolapic", setup_nolapic); |
| 1714 | 2175 | ||
| 1715 | static int __init parse_lapic_timer_c2_ok(char *arg) | 2176 | static int __init parse_lapic_timer_c2_ok(char *arg) |
| 1716 | { | 2177 | { |
| @@ -1719,15 +2180,43 @@ static int __init parse_lapic_timer_c2_ok(char *arg) | |||
| 1719 | } | 2180 | } |
| 1720 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | 2181 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); |
| 1721 | 2182 | ||
| 1722 | static int __init apic_set_verbosity(char *str) | 2183 | static int __init parse_disable_apic_timer(char *arg) |
| 1723 | { | 2184 | { |
| 1724 | if (strcmp("debug", str) == 0) | 2185 | disable_apic_timer = 1; |
| 2186 | return 0; | ||
| 2187 | } | ||
| 2188 | early_param("noapictimer", parse_disable_apic_timer); | ||
| 2189 | |||
| 2190 | static int __init parse_nolapic_timer(char *arg) | ||
| 2191 | { | ||
| 2192 | disable_apic_timer = 1; | ||
| 2193 | return 0; | ||
| 2194 | } | ||
| 2195 | early_param("nolapic_timer", parse_nolapic_timer); | ||
| 2196 | |||
| 2197 | static int __init apic_set_verbosity(char *arg) | ||
| 2198 | { | ||
| 2199 | if (!arg) { | ||
| 2200 | #ifdef CONFIG_X86_64 | ||
| 2201 | skip_ioapic_setup = 0; | ||
| 2202 | return 0; | ||
| 2203 | #endif | ||
| 2204 | return -EINVAL; | ||
| 2205 | } | ||
| 2206 | |||
| 2207 | if (strcmp("debug", arg) == 0) | ||
| 1725 | apic_verbosity = APIC_DEBUG; | 2208 | apic_verbosity = APIC_DEBUG; |
| 1726 | else if (strcmp("verbose", str) == 0) | 2209 | else if (strcmp("verbose", arg) == 0) |
| 1727 | apic_verbosity = APIC_VERBOSE; | 2210 | apic_verbosity = APIC_VERBOSE; |
| 1728 | return 1; | 2211 | else { |
| 2212 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
| 2213 | " use apic=verbose or apic=debug\n", arg); | ||
| 2214 | return -EINVAL; | ||
| 2215 | } | ||
| 2216 | |||
| 2217 | return 0; | ||
| 1729 | } | 2218 | } |
| 1730 | __setup("apic=", apic_set_verbosity); | 2219 | early_param("apic", apic_set_verbosity); |
| 1731 | 2220 | ||
| 1732 | static int __init lapic_insert_resource(void) | 2221 | static int __init lapic_insert_resource(void) |
| 1733 | { | 2222 | { |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c deleted file mode 100644 index 1e3d32e27c14..000000000000 --- a/arch/x86/kernel/apic_64.c +++ /dev/null | |||
| @@ -1,1393 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Local APIC handling, local APIC timers | ||
| 3 | * | ||
| 4 | * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> | ||
| 5 | * | ||
| 6 | * Fixes | ||
| 7 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; | ||
| 8 | * thanks to Eric Gilmore | ||
| 9 | * and Rolf G. Tews | ||
| 10 | * for testing these extensively. | ||
| 11 | * Maciej W. Rozycki : Various updates and fixes. | ||
| 12 | * Mikael Pettersson : Power Management for UP-APIC. | ||
| 13 | * Pavel Machek and | ||
| 14 | * Mikael Pettersson : PM converted to driver model. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/init.h> | ||
| 18 | |||
| 19 | #include <linux/mm.h> | ||
| 20 | #include <linux/delay.h> | ||
| 21 | #include <linux/bootmem.h> | ||
| 22 | #include <linux/interrupt.h> | ||
| 23 | #include <linux/mc146818rtc.h> | ||
| 24 | #include <linux/kernel_stat.h> | ||
| 25 | #include <linux/sysdev.h> | ||
| 26 | #include <linux/ioport.h> | ||
| 27 | #include <linux/clockchips.h> | ||
| 28 | #include <linux/acpi_pmtmr.h> | ||
| 29 | #include <linux/module.h> | ||
| 30 | |||
| 31 | #include <asm/atomic.h> | ||
| 32 | #include <asm/smp.h> | ||
| 33 | #include <asm/mtrr.h> | ||
| 34 | #include <asm/mpspec.h> | ||
| 35 | #include <asm/hpet.h> | ||
| 36 | #include <asm/pgalloc.h> | ||
| 37 | #include <asm/nmi.h> | ||
| 38 | #include <asm/idle.h> | ||
| 39 | #include <asm/proto.h> | ||
| 40 | #include <asm/timex.h> | ||
| 41 | #include <asm/apic.h> | ||
| 42 | |||
| 43 | #include <mach_ipi.h> | ||
| 44 | #include <mach_apic.h> | ||
| 45 | |||
| 46 | static int disable_apic_timer __cpuinitdata; | ||
| 47 | static int apic_calibrate_pmtmr __initdata; | ||
| 48 | int disable_apic; | ||
| 49 | |||
| 50 | /* Local APIC timer works in C2 */ | ||
| 51 | int local_apic_timer_c2_ok; | ||
| 52 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Debug level, exported for io_apic.c | ||
| 56 | */ | ||
| 57 | int apic_verbosity; | ||
| 58 | |||
| 59 | /* Have we found an MP table */ | ||
| 60 | int smp_found_config; | ||
| 61 | |||
| 62 | static struct resource lapic_resource = { | ||
| 63 | .name = "Local APIC", | ||
| 64 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | ||
| 65 | }; | ||
| 66 | |||
| 67 | static unsigned int calibration_result; | ||
| 68 | |||
| 69 | static int lapic_next_event(unsigned long delta, | ||
| 70 | struct clock_event_device *evt); | ||
| 71 | static void lapic_timer_setup(enum clock_event_mode mode, | ||
| 72 | struct clock_event_device *evt); | ||
| 73 | static void lapic_timer_broadcast(cpumask_t mask); | ||
| 74 | static void apic_pm_activate(void); | ||
| 75 | |||
| 76 | static struct clock_event_device lapic_clockevent = { | ||
| 77 | .name = "lapic", | ||
| 78 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | ||
| 79 | | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, | ||
| 80 | .shift = 32, | ||
| 81 | .set_mode = lapic_timer_setup, | ||
| 82 | .set_next_event = lapic_next_event, | ||
| 83 | .broadcast = lapic_timer_broadcast, | ||
| 84 | .rating = 100, | ||
| 85 | .irq = -1, | ||
| 86 | }; | ||
| 87 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | ||
| 88 | |||
| 89 | static unsigned long apic_phys; | ||
| 90 | |||
| 91 | unsigned long mp_lapic_addr; | ||
| 92 | |||
| 93 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | ||
| 94 | /* | ||
| 95 | * Get the LAPIC version | ||
| 96 | */ | ||
| 97 | static inline int lapic_get_version(void) | ||
| 98 | { | ||
| 99 | return GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
| 100 | } | ||
| 101 | |||
| 102 | /* | ||
| 103 | * Check, if the APIC is integrated or a seperate chip | ||
| 104 | */ | ||
| 105 | static inline int lapic_is_integrated(void) | ||
| 106 | { | ||
| 107 | return 1; | ||
| 108 | } | ||
| 109 | |||
| 110 | /* | ||
| 111 | * Check, whether this is a modern or a first generation APIC | ||
| 112 | */ | ||
| 113 | static int modern_apic(void) | ||
| 114 | { | ||
| 115 | /* AMD systems use old APIC versions, so check the CPU */ | ||
| 116 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
| 117 | boot_cpu_data.x86 >= 0xf) | ||
| 118 | return 1; | ||
| 119 | return lapic_get_version() >= 0x14; | ||
| 120 | } | ||
| 121 | |||
| 122 | void apic_wait_icr_idle(void) | ||
| 123 | { | ||
| 124 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | ||
| 125 | cpu_relax(); | ||
| 126 | } | ||
| 127 | |||
| 128 | u32 safe_apic_wait_icr_idle(void) | ||
| 129 | { | ||
| 130 | u32 send_status; | ||
| 131 | int timeout; | ||
| 132 | |||
| 133 | timeout = 0; | ||
| 134 | do { | ||
| 135 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
| 136 | if (!send_status) | ||
| 137 | break; | ||
| 138 | udelay(100); | ||
| 139 | } while (timeout++ < 1000); | ||
| 140 | |||
| 141 | return send_status; | ||
| 142 | } | ||
| 143 | |||
| 144 | /** | ||
| 145 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | ||
| 146 | */ | ||
| 147 | void __cpuinit enable_NMI_through_LVT0(void) | ||
| 148 | { | ||
| 149 | unsigned int v; | ||
| 150 | |||
| 151 | /* unmask and set to NMI */ | ||
| 152 | v = APIC_DM_NMI; | ||
| 153 | apic_write(APIC_LVT0, v); | ||
| 154 | } | ||
| 155 | |||
| 156 | /** | ||
| 157 | * lapic_get_maxlvt - get the maximum number of local vector table entries | ||
| 158 | */ | ||
| 159 | int lapic_get_maxlvt(void) | ||
| 160 | { | ||
| 161 | unsigned int v, maxlvt; | ||
| 162 | |||
| 163 | v = apic_read(APIC_LVR); | ||
| 164 | maxlvt = GET_APIC_MAXLVT(v); | ||
| 165 | return maxlvt; | ||
| 166 | } | ||
| 167 | |||
| 168 | /* | ||
| 169 | * This function sets up the local APIC timer, with a timeout of | ||
| 170 | * 'clocks' APIC bus clock. During calibration we actually call | ||
| 171 | * this function twice on the boot CPU, once with a bogus timeout | ||
| 172 | * value, second time for real. The other (noncalibrating) CPUs | ||
| 173 | * call this function only once, with the real, calibrated value. | ||
| 174 | * | ||
| 175 | * We do reads before writes even if unnecessary, to get around the | ||
| 176 | * P5 APIC double write bug. | ||
| 177 | */ | ||
| 178 | |||
| 179 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | ||
| 180 | { | ||
| 181 | unsigned int lvtt_value, tmp_value; | ||
| 182 | |||
| 183 | lvtt_value = LOCAL_TIMER_VECTOR; | ||
| 184 | if (!oneshot) | ||
| 185 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; | ||
| 186 | if (!irqen) | ||
| 187 | lvtt_value |= APIC_LVT_MASKED; | ||
| 188 | |||
| 189 | apic_write(APIC_LVTT, lvtt_value); | ||
| 190 | |||
| 191 | /* | ||
| 192 | * Divide PICLK by 16 | ||
| 193 | */ | ||
| 194 | tmp_value = apic_read(APIC_TDCR); | ||
| 195 | apic_write(APIC_TDCR, (tmp_value | ||
| 196 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | ||
| 197 | | APIC_TDR_DIV_16); | ||
| 198 | |||
| 199 | if (!oneshot) | ||
| 200 | apic_write(APIC_TMICT, clocks); | ||
| 201 | } | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Setup extended LVT, AMD specific (K8, family 10h) | ||
| 205 | * | ||
| 206 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | ||
| 207 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | ||
| 208 | */ | ||
| 209 | |||
| 210 | #define APIC_EILVT_LVTOFF_MCE 0 | ||
| 211 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
| 212 | |||
| 213 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | ||
| 214 | { | ||
| 215 | unsigned long reg = (lvt_off << 4) + APIC_EILVT0; | ||
| 216 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | ||
| 217 | |||
| 218 | apic_write(reg, v); | ||
| 219 | } | ||
| 220 | |||
| 221 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | ||
| 222 | { | ||
| 223 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | ||
| 224 | return APIC_EILVT_LVTOFF_MCE; | ||
| 225 | } | ||
| 226 | |||
| 227 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | ||
| 228 | { | ||
| 229 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | ||
| 230 | return APIC_EILVT_LVTOFF_IBS; | ||
| 231 | } | ||
| 232 | |||
| 233 | /* | ||
| 234 | * Program the next event, relative to now | ||
| 235 | */ | ||
| 236 | static int lapic_next_event(unsigned long delta, | ||
| 237 | struct clock_event_device *evt) | ||
| 238 | { | ||
| 239 | apic_write(APIC_TMICT, delta); | ||
| 240 | return 0; | ||
| 241 | } | ||
| 242 | |||
| 243 | /* | ||
| 244 | * Setup the lapic timer in periodic or oneshot mode | ||
| 245 | */ | ||
| 246 | static void lapic_timer_setup(enum clock_event_mode mode, | ||
| 247 | struct clock_event_device *evt) | ||
| 248 | { | ||
| 249 | unsigned long flags; | ||
| 250 | unsigned int v; | ||
| 251 | |||
| 252 | /* Lapic used as dummy for broadcast ? */ | ||
| 253 | if (evt->features & CLOCK_EVT_FEAT_DUMMY) | ||
| 254 | return; | ||
| 255 | |||
| 256 | local_irq_save(flags); | ||
| 257 | |||
| 258 | switch (mode) { | ||
| 259 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 260 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 261 | __setup_APIC_LVTT(calibration_result, | ||
| 262 | mode != CLOCK_EVT_MODE_PERIODIC, 1); | ||
| 263 | break; | ||
| 264 | case CLOCK_EVT_MODE_UNUSED: | ||
| 265 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
| 266 | v = apic_read(APIC_LVTT); | ||
| 267 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
| 268 | apic_write(APIC_LVTT, v); | ||
| 269 | break; | ||
| 270 | case CLOCK_EVT_MODE_RESUME: | ||
| 271 | /* Nothing to do here */ | ||
| 272 | break; | ||
| 273 | } | ||
| 274 | |||
| 275 | local_irq_restore(flags); | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | ||
| 279 | * Local APIC timer broadcast function | ||
| 280 | */ | ||
| 281 | static void lapic_timer_broadcast(cpumask_t mask) | ||
| 282 | { | ||
| 283 | #ifdef CONFIG_SMP | ||
| 284 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | ||
| 285 | #endif | ||
| 286 | } | ||
| 287 | |||
| 288 | /* | ||
| 289 | * Setup the local APIC timer for this CPU. Copy the initilized values | ||
| 290 | * of the boot CPU and register the clock event in the framework. | ||
| 291 | */ | ||
| 292 | static void setup_APIC_timer(void) | ||
| 293 | { | ||
| 294 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | ||
| 295 | |||
| 296 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); | ||
| 297 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); | ||
| 298 | |||
| 299 | clockevents_register_device(levt); | ||
| 300 | } | ||
| 301 | |||
| 302 | /* | ||
| 303 | * In this function we calibrate APIC bus clocks to the external | ||
| 304 | * timer. Unfortunately we cannot use jiffies and the timer irq | ||
| 305 | * to calibrate, since some later bootup code depends on getting | ||
| 306 | * the first irq? Ugh. | ||
| 307 | * | ||
| 308 | * We want to do the calibration only once since we | ||
| 309 | * want to have local timer irqs syncron. CPUs connected | ||
| 310 | * by the same APIC bus have the very same bus frequency. | ||
| 311 | * And we want to have irqs off anyways, no accidental | ||
| 312 | * APIC irq that way. | ||
| 313 | */ | ||
| 314 | |||
| 315 | #define TICK_COUNT 100000000 | ||
| 316 | |||
| 317 | static void __init calibrate_APIC_clock(void) | ||
| 318 | { | ||
| 319 | unsigned apic, apic_start; | ||
| 320 | unsigned long tsc, tsc_start; | ||
| 321 | int result; | ||
| 322 | |||
| 323 | local_irq_disable(); | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Put whatever arbitrary (but long enough) timeout | ||
| 327 | * value into the APIC clock, we just want to get the | ||
| 328 | * counter running for calibration. | ||
| 329 | * | ||
| 330 | * No interrupt enable ! | ||
| 331 | */ | ||
| 332 | __setup_APIC_LVTT(250000000, 0, 0); | ||
| 333 | |||
| 334 | apic_start = apic_read(APIC_TMCCT); | ||
| 335 | #ifdef CONFIG_X86_PM_TIMER | ||
| 336 | if (apic_calibrate_pmtmr && pmtmr_ioport) { | ||
| 337 | pmtimer_wait(5000); /* 5ms wait */ | ||
| 338 | apic = apic_read(APIC_TMCCT); | ||
| 339 | result = (apic_start - apic) * 1000L / 5; | ||
| 340 | } else | ||
| 341 | #endif | ||
| 342 | { | ||
| 343 | rdtscll(tsc_start); | ||
| 344 | |||
| 345 | do { | ||
| 346 | apic = apic_read(APIC_TMCCT); | ||
| 347 | rdtscll(tsc); | ||
| 348 | } while ((tsc - tsc_start) < TICK_COUNT && | ||
| 349 | (apic_start - apic) < TICK_COUNT); | ||
| 350 | |||
| 351 | result = (apic_start - apic) * 1000L * tsc_khz / | ||
| 352 | (tsc - tsc_start); | ||
| 353 | } | ||
| 354 | |||
| 355 | local_irq_enable(); | ||
| 356 | |||
| 357 | printk(KERN_DEBUG "APIC timer calibration result %d\n", result); | ||
| 358 | |||
| 359 | printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", | ||
| 360 | result / 1000 / 1000, result / 1000 % 1000); | ||
| 361 | |||
| 362 | /* Calculate the scaled math multiplication factor */ | ||
| 363 | lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, | ||
| 364 | lapic_clockevent.shift); | ||
| 365 | lapic_clockevent.max_delta_ns = | ||
| 366 | clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); | ||
| 367 | lapic_clockevent.min_delta_ns = | ||
| 368 | clockevent_delta2ns(0xF, &lapic_clockevent); | ||
| 369 | |||
| 370 | calibration_result = result / HZ; | ||
| 371 | } | ||
| 372 | |||
| 373 | /* | ||
| 374 | * Setup the boot APIC | ||
| 375 | * | ||
| 376 | * Calibrate and verify the result. | ||
| 377 | */ | ||
| 378 | void __init setup_boot_APIC_clock(void) | ||
| 379 | { | ||
| 380 | /* | ||
| 381 | * The local apic timer can be disabled via the kernel commandline. | ||
| 382 | * Register the lapic timer as a dummy clock event source on SMP | ||
| 383 | * systems, so the broadcast mechanism is used. On UP systems simply | ||
| 384 | * ignore it. | ||
| 385 | */ | ||
| 386 | if (disable_apic_timer) { | ||
| 387 | printk(KERN_INFO "Disabling APIC timer\n"); | ||
| 388 | /* No broadcast on UP ! */ | ||
| 389 | if (num_possible_cpus() > 1) { | ||
| 390 | lapic_clockevent.mult = 1; | ||
| 391 | setup_APIC_timer(); | ||
| 392 | } | ||
| 393 | return; | ||
| 394 | } | ||
| 395 | |||
| 396 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); | ||
| 397 | calibrate_APIC_clock(); | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Do a sanity check on the APIC calibration result | ||
| 401 | */ | ||
| 402 | if (calibration_result < (1000000 / HZ)) { | ||
| 403 | printk(KERN_WARNING | ||
| 404 | "APIC frequency too slow, disabling apic timer\n"); | ||
| 405 | /* No broadcast on UP ! */ | ||
| 406 | if (num_possible_cpus() > 1) | ||
| 407 | setup_APIC_timer(); | ||
| 408 | return; | ||
| 409 | } | ||
| 410 | |||
| 411 | /* | ||
| 412 | * If nmi_watchdog is set to IO_APIC, we need the | ||
| 413 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
| 414 | * device. | ||
| 415 | */ | ||
| 416 | if (nmi_watchdog != NMI_IO_APIC) | ||
| 417 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
| 418 | else | ||
| 419 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
| 420 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
| 421 | |||
| 422 | setup_APIC_timer(); | ||
| 423 | } | ||
| 424 | |||
| 425 | void __cpuinit setup_secondary_APIC_clock(void) | ||
| 426 | { | ||
| 427 | setup_APIC_timer(); | ||
| 428 | } | ||
| 429 | |||
| 430 | /* | ||
| 431 | * The guts of the apic timer interrupt | ||
| 432 | */ | ||
| 433 | static void local_apic_timer_interrupt(void) | ||
| 434 | { | ||
| 435 | int cpu = smp_processor_id(); | ||
| 436 | struct clock_event_device *evt = &per_cpu(lapic_events, cpu); | ||
| 437 | |||
| 438 | /* | ||
| 439 | * Normally we should not be here till LAPIC has been initialized but | ||
| 440 | * in some cases like kdump, its possible that there is a pending LAPIC | ||
| 441 | * timer interrupt from previous kernel's context and is delivered in | ||
| 442 | * new kernel the moment interrupts are enabled. | ||
| 443 | * | ||
| 444 | * Interrupts are enabled early and LAPIC is setup much later, hence | ||
| 445 | * its possible that when we get here evt->event_handler is NULL. | ||
| 446 | * Check for event_handler being NULL and discard the interrupt as | ||
| 447 | * spurious. | ||
| 448 | */ | ||
| 449 | if (!evt->event_handler) { | ||
| 450 | printk(KERN_WARNING | ||
| 451 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
| 452 | /* Switch it off */ | ||
| 453 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | ||
| 454 | return; | ||
| 455 | } | ||
| 456 | |||
| 457 | /* | ||
| 458 | * the NMI deadlock-detector uses this. | ||
| 459 | */ | ||
| 460 | add_pda(apic_timer_irqs, 1); | ||
| 461 | |||
| 462 | evt->event_handler(evt); | ||
| 463 | } | ||
| 464 | |||
| 465 | /* | ||
| 466 | * Local APIC timer interrupt. This is the most natural way for doing | ||
| 467 | * local interrupts, but local timer interrupts can be emulated by | ||
| 468 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
| 469 | * | ||
| 470 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
| 471 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
| 472 | */ | ||
| 473 | void smp_apic_timer_interrupt(struct pt_regs *regs) | ||
| 474 | { | ||
| 475 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
| 476 | |||
| 477 | /* | ||
| 478 | * NOTE! We'd better ACK the irq immediately, | ||
| 479 | * because timer handling can be slow. | ||
| 480 | */ | ||
| 481 | ack_APIC_irq(); | ||
| 482 | /* | ||
| 483 | * update_process_times() expects us to have done irq_enter(). | ||
| 484 | * Besides, if we don't timer interrupts ignore the global | ||
| 485 | * interrupt lock, which is the WrongThing (tm) to do. | ||
| 486 | */ | ||
| 487 | exit_idle(); | ||
| 488 | irq_enter(); | ||
| 489 | local_apic_timer_interrupt(); | ||
| 490 | irq_exit(); | ||
| 491 | set_irq_regs(old_regs); | ||
| 492 | } | ||
| 493 | |||
| 494 | int setup_profiling_timer(unsigned int multiplier) | ||
| 495 | { | ||
| 496 | return -EINVAL; | ||
| 497 | } | ||
| 498 | |||
| 499 | |||
| 500 | /* | ||
| 501 | * Local APIC start and shutdown | ||
| 502 | */ | ||
| 503 | |||
| 504 | /** | ||
| 505 | * clear_local_APIC - shutdown the local APIC | ||
| 506 | * | ||
| 507 | * This is called, when a CPU is disabled and before rebooting, so the state of | ||
| 508 | * the local APIC has no dangling leftovers. Also used to cleanout any BIOS | ||
| 509 | * leftovers during boot. | ||
| 510 | */ | ||
| 511 | void clear_local_APIC(void) | ||
| 512 | { | ||
| 513 | int maxlvt; | ||
| 514 | u32 v; | ||
| 515 | |||
| 516 | /* APIC hasn't been mapped yet */ | ||
| 517 | if (!apic_phys) | ||
| 518 | return; | ||
| 519 | |||
| 520 | maxlvt = lapic_get_maxlvt(); | ||
| 521 | /* | ||
| 522 | * Masking an LVT entry can trigger a local APIC error | ||
| 523 | * if the vector is zero. Mask LVTERR first to prevent this. | ||
| 524 | */ | ||
| 525 | if (maxlvt >= 3) { | ||
| 526 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ | ||
| 527 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); | ||
| 528 | } | ||
| 529 | /* | ||
| 530 | * Careful: we have to set masks only first to deassert | ||
| 531 | * any level-triggered sources. | ||
| 532 | */ | ||
| 533 | v = apic_read(APIC_LVTT); | ||
| 534 | apic_write(APIC_LVTT, v | APIC_LVT_MASKED); | ||
| 535 | v = apic_read(APIC_LVT0); | ||
| 536 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); | ||
| 537 | v = apic_read(APIC_LVT1); | ||
| 538 | apic_write(APIC_LVT1, v | APIC_LVT_MASKED); | ||
| 539 | if (maxlvt >= 4) { | ||
| 540 | v = apic_read(APIC_LVTPC); | ||
| 541 | apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); | ||
| 542 | } | ||
| 543 | |||
| 544 | /* | ||
| 545 | * Clean APIC state for other OSs: | ||
| 546 | */ | ||
| 547 | apic_write(APIC_LVTT, APIC_LVT_MASKED); | ||
| 548 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
| 549 | apic_write(APIC_LVT1, APIC_LVT_MASKED); | ||
| 550 | if (maxlvt >= 3) | ||
| 551 | apic_write(APIC_LVTERR, APIC_LVT_MASKED); | ||
| 552 | if (maxlvt >= 4) | ||
| 553 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); | ||
| 554 | apic_write(APIC_ESR, 0); | ||
| 555 | apic_read(APIC_ESR); | ||
| 556 | } | ||
| 557 | |||
| 558 | /** | ||
| 559 | * disable_local_APIC - clear and disable the local APIC | ||
| 560 | */ | ||
| 561 | void disable_local_APIC(void) | ||
| 562 | { | ||
| 563 | unsigned int value; | ||
| 564 | |||
| 565 | clear_local_APIC(); | ||
| 566 | |||
| 567 | /* | ||
| 568 | * Disable APIC (implies clearing of registers | ||
| 569 | * for 82489DX!). | ||
| 570 | */ | ||
| 571 | value = apic_read(APIC_SPIV); | ||
| 572 | value &= ~APIC_SPIV_APIC_ENABLED; | ||
| 573 | apic_write(APIC_SPIV, value); | ||
| 574 | } | ||
| 575 | |||
| 576 | void lapic_shutdown(void) | ||
| 577 | { | ||
| 578 | unsigned long flags; | ||
| 579 | |||
| 580 | if (!cpu_has_apic) | ||
| 581 | return; | ||
| 582 | |||
| 583 | local_irq_save(flags); | ||
| 584 | |||
| 585 | disable_local_APIC(); | ||
| 586 | |||
| 587 | local_irq_restore(flags); | ||
| 588 | } | ||
| 589 | |||
| 590 | /* | ||
| 591 | * This is to verify that we're looking at a real local APIC. | ||
| 592 | * Check these against your board if the CPUs aren't getting | ||
| 593 | * started for no apparent reason. | ||
| 594 | */ | ||
| 595 | int __init verify_local_APIC(void) | ||
| 596 | { | ||
| 597 | unsigned int reg0, reg1; | ||
| 598 | |||
| 599 | /* | ||
| 600 | * The version register is read-only in a real APIC. | ||
| 601 | */ | ||
| 602 | reg0 = apic_read(APIC_LVR); | ||
| 603 | apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); | ||
| 604 | apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); | ||
| 605 | reg1 = apic_read(APIC_LVR); | ||
| 606 | apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); | ||
| 607 | |||
| 608 | /* | ||
| 609 | * The two version reads above should print the same | ||
| 610 | * numbers. If the second one is different, then we | ||
| 611 | * poke at a non-APIC. | ||
| 612 | */ | ||
| 613 | if (reg1 != reg0) | ||
| 614 | return 0; | ||
| 615 | |||
| 616 | /* | ||
| 617 | * Check if the version looks reasonably. | ||
| 618 | */ | ||
| 619 | reg1 = GET_APIC_VERSION(reg0); | ||
| 620 | if (reg1 == 0x00 || reg1 == 0xff) | ||
| 621 | return 0; | ||
| 622 | reg1 = lapic_get_maxlvt(); | ||
| 623 | if (reg1 < 0x02 || reg1 == 0xff) | ||
| 624 | return 0; | ||
| 625 | |||
| 626 | /* | ||
| 627 | * The ID register is read/write in a real APIC. | ||
| 628 | */ | ||
| 629 | reg0 = read_apic_id(); | ||
| 630 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); | ||
| 631 | apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); | ||
| 632 | reg1 = read_apic_id(); | ||
| 633 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); | ||
| 634 | apic_write(APIC_ID, reg0); | ||
| 635 | if (reg1 != (reg0 ^ APIC_ID_MASK)) | ||
| 636 | return 0; | ||
| 637 | |||
| 638 | /* | ||
| 639 | * The next two are just to see if we have sane values. | ||
| 640 | * They're only really relevant if we're in Virtual Wire | ||
| 641 | * compatibility mode, but most boxes are anymore. | ||
| 642 | */ | ||
| 643 | reg0 = apic_read(APIC_LVT0); | ||
| 644 | apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); | ||
| 645 | reg1 = apic_read(APIC_LVT1); | ||
| 646 | apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); | ||
| 647 | |||
| 648 | return 1; | ||
| 649 | } | ||
| 650 | |||
| 651 | /** | ||
| 652 | * sync_Arb_IDs - synchronize APIC bus arbitration IDs | ||
| 653 | */ | ||
| 654 | void __init sync_Arb_IDs(void) | ||
| 655 | { | ||
| 656 | /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ | ||
| 657 | if (modern_apic()) | ||
| 658 | return; | ||
| 659 | |||
| 660 | /* | ||
| 661 | * Wait for idle. | ||
| 662 | */ | ||
| 663 | apic_wait_icr_idle(); | ||
| 664 | |||
| 665 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | ||
| 666 | apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | ||
| 667 | | APIC_DM_INIT); | ||
| 668 | } | ||
| 669 | |||
| 670 | /* | ||
| 671 | * An initial setup of the virtual wire mode. | ||
| 672 | */ | ||
| 673 | void __init init_bsp_APIC(void) | ||
| 674 | { | ||
| 675 | unsigned int value; | ||
| 676 | |||
| 677 | /* | ||
| 678 | * Don't do the setup now if we have a SMP BIOS as the | ||
| 679 | * through-I/O-APIC virtual wire mode might be active. | ||
| 680 | */ | ||
| 681 | if (smp_found_config || !cpu_has_apic) | ||
| 682 | return; | ||
| 683 | |||
| 684 | value = apic_read(APIC_LVR); | ||
| 685 | |||
| 686 | /* | ||
| 687 | * Do not trust the local APIC being empty at bootup. | ||
| 688 | */ | ||
| 689 | clear_local_APIC(); | ||
| 690 | |||
| 691 | /* | ||
| 692 | * Enable APIC. | ||
| 693 | */ | ||
| 694 | value = apic_read(APIC_SPIV); | ||
| 695 | value &= ~APIC_VECTOR_MASK; | ||
| 696 | value |= APIC_SPIV_APIC_ENABLED; | ||
| 697 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
| 698 | value |= SPURIOUS_APIC_VECTOR; | ||
| 699 | apic_write(APIC_SPIV, value); | ||
| 700 | |||
| 701 | /* | ||
| 702 | * Set up the virtual wire mode. | ||
| 703 | */ | ||
| 704 | apic_write(APIC_LVT0, APIC_DM_EXTINT); | ||
| 705 | value = APIC_DM_NMI; | ||
| 706 | apic_write(APIC_LVT1, value); | ||
| 707 | } | ||
| 708 | |||
| 709 | /** | ||
| 710 | * setup_local_APIC - setup the local APIC | ||
| 711 | */ | ||
| 712 | void __cpuinit setup_local_APIC(void) | ||
| 713 | { | ||
| 714 | unsigned int value; | ||
| 715 | int i, j; | ||
| 716 | |||
| 717 | preempt_disable(); | ||
| 718 | value = apic_read(APIC_LVR); | ||
| 719 | |||
| 720 | BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f); | ||
| 721 | |||
| 722 | /* | ||
| 723 | * Double-check whether this APIC is really registered. | ||
| 724 | * This is meaningless in clustered apic mode, so we skip it. | ||
| 725 | */ | ||
| 726 | if (!apic_id_registered()) | ||
| 727 | BUG(); | ||
| 728 | |||
| 729 | /* | ||
| 730 | * Intel recommends to set DFR, LDR and TPR before enabling | ||
| 731 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
| 732 | * document number 292116). So here it goes... | ||
| 733 | */ | ||
| 734 | init_apic_ldr(); | ||
| 735 | |||
| 736 | /* | ||
| 737 | * Set Task Priority to 'accept all'. We never change this | ||
| 738 | * later on. | ||
| 739 | */ | ||
| 740 | value = apic_read(APIC_TASKPRI); | ||
| 741 | value &= ~APIC_TPRI_MASK; | ||
| 742 | apic_write(APIC_TASKPRI, value); | ||
| 743 | |||
| 744 | /* | ||
| 745 | * After a crash, we no longer service the interrupts and a pending | ||
| 746 | * interrupt from previous kernel might still have ISR bit set. | ||
| 747 | * | ||
| 748 | * Most probably by now CPU has serviced that pending interrupt and | ||
| 749 | * it might not have done the ack_APIC_irq() because it thought, | ||
| 750 | * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it | ||
| 751 | * does not clear the ISR bit and cpu thinks it has already serivced | ||
| 752 | * the interrupt. Hence a vector might get locked. It was noticed | ||
| 753 | * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. | ||
| 754 | */ | ||
| 755 | for (i = APIC_ISR_NR - 1; i >= 0; i--) { | ||
| 756 | value = apic_read(APIC_ISR + i*0x10); | ||
| 757 | for (j = 31; j >= 0; j--) { | ||
| 758 | if (value & (1<<j)) | ||
| 759 | ack_APIC_irq(); | ||
| 760 | } | ||
| 761 | } | ||
| 762 | |||
| 763 | /* | ||
| 764 | * Now that we are all set up, enable the APIC | ||
| 765 | */ | ||
| 766 | value = apic_read(APIC_SPIV); | ||
| 767 | value &= ~APIC_VECTOR_MASK; | ||
| 768 | /* | ||
| 769 | * Enable APIC | ||
| 770 | */ | ||
| 771 | value |= APIC_SPIV_APIC_ENABLED; | ||
| 772 | |||
| 773 | /* We always use processor focus */ | ||
| 774 | |||
| 775 | /* | ||
| 776 | * Set spurious IRQ vector | ||
| 777 | */ | ||
| 778 | value |= SPURIOUS_APIC_VECTOR; | ||
| 779 | apic_write(APIC_SPIV, value); | ||
| 780 | |||
| 781 | /* | ||
| 782 | * Set up LVT0, LVT1: | ||
| 783 | * | ||
| 784 | * set up through-local-APIC on the BP's LINT0. This is not | ||
| 785 | * strictly necessary in pure symmetric-IO mode, but sometimes | ||
| 786 | * we delegate interrupts to the 8259A. | ||
| 787 | */ | ||
| 788 | /* | ||
| 789 | * TODO: set up through-local-APIC from through-I/O-APIC? --macro | ||
| 790 | */ | ||
| 791 | value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; | ||
| 792 | if (!smp_processor_id() && !value) { | ||
| 793 | value = APIC_DM_EXTINT; | ||
| 794 | apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", | ||
| 795 | smp_processor_id()); | ||
| 796 | } else { | ||
| 797 | value = APIC_DM_EXTINT | APIC_LVT_MASKED; | ||
| 798 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", | ||
| 799 | smp_processor_id()); | ||
| 800 | } | ||
| 801 | apic_write(APIC_LVT0, value); | ||
| 802 | |||
| 803 | /* | ||
| 804 | * only the BP should see the LINT1 NMI signal, obviously. | ||
| 805 | */ | ||
| 806 | if (!smp_processor_id()) | ||
| 807 | value = APIC_DM_NMI; | ||
| 808 | else | ||
| 809 | value = APIC_DM_NMI | APIC_LVT_MASKED; | ||
| 810 | apic_write(APIC_LVT1, value); | ||
| 811 | preempt_enable(); | ||
| 812 | } | ||
| 813 | |||
| 814 | static void __cpuinit lapic_setup_esr(void) | ||
| 815 | { | ||
| 816 | unsigned maxlvt = lapic_get_maxlvt(); | ||
| 817 | |||
| 818 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); | ||
| 819 | /* | ||
| 820 | * spec says clear errors after enabling vector. | ||
| 821 | */ | ||
| 822 | if (maxlvt > 3) | ||
| 823 | apic_write(APIC_ESR, 0); | ||
| 824 | } | ||
| 825 | |||
| 826 | void __cpuinit end_local_APIC_setup(void) | ||
| 827 | { | ||
| 828 | lapic_setup_esr(); | ||
| 829 | setup_apic_nmi_watchdog(NULL); | ||
| 830 | apic_pm_activate(); | ||
| 831 | } | ||
| 832 | |||
| 833 | /* | ||
| 834 | * Detect and enable local APICs on non-SMP boards. | ||
| 835 | * Original code written by Keir Fraser. | ||
| 836 | * On AMD64 we trust the BIOS - if it says no APIC it is likely | ||
| 837 | * not correctly set up (usually the APIC timer won't work etc.) | ||
| 838 | */ | ||
| 839 | static int __init detect_init_APIC(void) | ||
| 840 | { | ||
| 841 | if (!cpu_has_apic) { | ||
| 842 | printk(KERN_INFO "No local APIC present\n"); | ||
| 843 | return -1; | ||
| 844 | } | ||
| 845 | |||
| 846 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
| 847 | boot_cpu_physical_apicid = 0; | ||
| 848 | return 0; | ||
| 849 | } | ||
| 850 | |||
| 851 | void __init early_init_lapic_mapping(void) | ||
| 852 | { | ||
| 853 | unsigned long phys_addr; | ||
| 854 | |||
| 855 | /* | ||
| 856 | * If no local APIC can be found then go out | ||
| 857 | * : it means there is no mpatable and MADT | ||
| 858 | */ | ||
| 859 | if (!smp_found_config) | ||
| 860 | return; | ||
| 861 | |||
| 862 | phys_addr = mp_lapic_addr; | ||
| 863 | |||
| 864 | set_fixmap_nocache(FIX_APIC_BASE, phys_addr); | ||
| 865 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | ||
| 866 | APIC_BASE, phys_addr); | ||
| 867 | |||
| 868 | /* | ||
| 869 | * Fetch the APIC ID of the BSP in case we have a | ||
| 870 | * default configuration (or the MP table is broken). | ||
| 871 | */ | ||
| 872 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); | ||
| 873 | } | ||
| 874 | |||
| 875 | /** | ||
| 876 | * init_apic_mappings - initialize APIC mappings | ||
| 877 | */ | ||
| 878 | void __init init_apic_mappings(void) | ||
| 879 | { | ||
| 880 | /* | ||
| 881 | * If no local APIC can be found then set up a fake all | ||
| 882 | * zeroes page to simulate the local APIC and another | ||
| 883 | * one for the IO-APIC. | ||
| 884 | */ | ||
| 885 | if (!smp_found_config && detect_init_APIC()) { | ||
| 886 | apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | ||
| 887 | apic_phys = __pa(apic_phys); | ||
| 888 | } else | ||
| 889 | apic_phys = mp_lapic_addr; | ||
| 890 | |||
| 891 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | ||
| 892 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | ||
| 893 | APIC_BASE, apic_phys); | ||
| 894 | |||
| 895 | /* | ||
| 896 | * Fetch the APIC ID of the BSP in case we have a | ||
| 897 | * default configuration (or the MP table is broken). | ||
| 898 | */ | ||
| 899 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); | ||
| 900 | } | ||
| 901 | |||
| 902 | /* | ||
| 903 | * This initializes the IO-APIC and APIC hardware if this is | ||
| 904 | * a UP kernel. | ||
| 905 | */ | ||
| 906 | int __init APIC_init_uniprocessor(void) | ||
| 907 | { | ||
| 908 | if (disable_apic) { | ||
| 909 | printk(KERN_INFO "Apic disabled\n"); | ||
| 910 | return -1; | ||
| 911 | } | ||
| 912 | if (!cpu_has_apic) { | ||
| 913 | disable_apic = 1; | ||
| 914 | printk(KERN_INFO "Apic disabled by BIOS\n"); | ||
| 915 | return -1; | ||
| 916 | } | ||
| 917 | |||
| 918 | verify_local_APIC(); | ||
| 919 | |||
| 920 | connect_bsp_APIC(); | ||
| 921 | |||
| 922 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | ||
| 923 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); | ||
| 924 | |||
| 925 | setup_local_APIC(); | ||
| 926 | |||
| 927 | /* | ||
| 928 | * Now enable IO-APICs, actually call clear_IO_APIC | ||
| 929 | * We need clear_IO_APIC before enabling vector on BP | ||
| 930 | */ | ||
| 931 | if (!skip_ioapic_setup && nr_ioapics) | ||
| 932 | enable_IO_APIC(); | ||
| 933 | |||
| 934 | if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) | ||
| 935 | localise_nmi_watchdog(); | ||
| 936 | end_local_APIC_setup(); | ||
| 937 | |||
| 938 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | ||
| 939 | setup_IO_APIC(); | ||
| 940 | else | ||
| 941 | nr_ioapics = 0; | ||
| 942 | setup_boot_APIC_clock(); | ||
| 943 | check_nmi_watchdog(); | ||
| 944 | return 0; | ||
| 945 | } | ||
| 946 | |||
| 947 | /* | ||
| 948 | * Local APIC interrupts | ||
| 949 | */ | ||
| 950 | |||
| 951 | /* | ||
| 952 | * This interrupt should _never_ happen with our APIC/SMP architecture | ||
| 953 | */ | ||
| 954 | asmlinkage void smp_spurious_interrupt(void) | ||
| 955 | { | ||
| 956 | unsigned int v; | ||
| 957 | exit_idle(); | ||
| 958 | irq_enter(); | ||
| 959 | /* | ||
| 960 | * Check if this really is a spurious interrupt and ACK it | ||
| 961 | * if it is a vectored one. Just in case... | ||
| 962 | * Spurious interrupts should not be ACKed. | ||
| 963 | */ | ||
| 964 | v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); | ||
| 965 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | ||
| 966 | ack_APIC_irq(); | ||
| 967 | |||
| 968 | add_pda(irq_spurious_count, 1); | ||
| 969 | irq_exit(); | ||
| 970 | } | ||
| 971 | |||
| 972 | /* | ||
| 973 | * This interrupt should never happen with our APIC/SMP architecture | ||
| 974 | */ | ||
| 975 | asmlinkage void smp_error_interrupt(void) | ||
| 976 | { | ||
| 977 | unsigned int v, v1; | ||
| 978 | |||
| 979 | exit_idle(); | ||
| 980 | irq_enter(); | ||
| 981 | /* First tickle the hardware, only then report what went on. -- REW */ | ||
| 982 | v = apic_read(APIC_ESR); | ||
| 983 | apic_write(APIC_ESR, 0); | ||
| 984 | v1 = apic_read(APIC_ESR); | ||
| 985 | ack_APIC_irq(); | ||
| 986 | atomic_inc(&irq_err_count); | ||
| 987 | |||
| 988 | /* Here is what the APIC error bits mean: | ||
| 989 | 0: Send CS error | ||
| 990 | 1: Receive CS error | ||
| 991 | 2: Send accept error | ||
| 992 | 3: Receive accept error | ||
| 993 | 4: Reserved | ||
| 994 | 5: Send illegal vector | ||
| 995 | 6: Received illegal vector | ||
| 996 | 7: Illegal register address | ||
| 997 | */ | ||
| 998 | printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", | ||
| 999 | smp_processor_id(), v , v1); | ||
| 1000 | irq_exit(); | ||
| 1001 | } | ||
| 1002 | |||
| 1003 | /** | ||
| 1004 | * * connect_bsp_APIC - attach the APIC to the interrupt system | ||
| 1005 | * */ | ||
| 1006 | void __init connect_bsp_APIC(void) | ||
| 1007 | { | ||
| 1008 | enable_apic_mode(); | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | void disconnect_bsp_APIC(int virt_wire_setup) | ||
| 1012 | { | ||
| 1013 | /* Go back to Virtual Wire compatibility mode */ | ||
| 1014 | unsigned long value; | ||
| 1015 | |||
| 1016 | /* For the spurious interrupt use vector F, and enable it */ | ||
| 1017 | value = apic_read(APIC_SPIV); | ||
| 1018 | value &= ~APIC_VECTOR_MASK; | ||
| 1019 | value |= APIC_SPIV_APIC_ENABLED; | ||
| 1020 | value |= 0xf; | ||
| 1021 | apic_write(APIC_SPIV, value); | ||
| 1022 | |||
| 1023 | if (!virt_wire_setup) { | ||
| 1024 | /* | ||
| 1025 | * For LVT0 make it edge triggered, active high, | ||
| 1026 | * external and enabled | ||
| 1027 | */ | ||
| 1028 | value = apic_read(APIC_LVT0); | ||
| 1029 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1030 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
| 1031 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
| 1032 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
| 1033 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
| 1034 | apic_write(APIC_LVT0, value); | ||
| 1035 | } else { | ||
| 1036 | /* Disable LVT0 */ | ||
| 1037 | apic_write(APIC_LVT0, APIC_LVT_MASKED); | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | ||
| 1041 | value = apic_read(APIC_LVT1); | ||
| 1042 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
| 1043 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
| 1044 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
| 1045 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
| 1046 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
| 1047 | apic_write(APIC_LVT1, value); | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | void __cpuinit generic_processor_info(int apicid, int version) | ||
| 1051 | { | ||
| 1052 | int cpu; | ||
| 1053 | cpumask_t tmp_map; | ||
| 1054 | |||
| 1055 | if (num_processors >= NR_CPUS) { | ||
| 1056 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | ||
| 1057 | " Processor ignored.\n", NR_CPUS); | ||
| 1058 | return; | ||
| 1059 | } | ||
| 1060 | |||
| 1061 | if (num_processors >= maxcpus) { | ||
| 1062 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
| 1063 | " Processor ignored.\n", maxcpus); | ||
| 1064 | return; | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | num_processors++; | ||
| 1068 | cpus_complement(tmp_map, cpu_present_map); | ||
| 1069 | cpu = first_cpu(tmp_map); | ||
| 1070 | |||
| 1071 | physid_set(apicid, phys_cpu_present_map); | ||
| 1072 | if (apicid == boot_cpu_physical_apicid) { | ||
| 1073 | /* | ||
| 1074 | * x86_bios_cpu_apicid is required to have processors listed | ||
| 1075 | * in same order as logical cpu numbers. Hence the first | ||
| 1076 | * entry is BSP, and so on. | ||
| 1077 | */ | ||
| 1078 | cpu = 0; | ||
| 1079 | } | ||
| 1080 | if (apicid > max_physical_apicid) | ||
| 1081 | max_physical_apicid = apicid; | ||
| 1082 | |||
| 1083 | /* are we being called early in kernel startup? */ | ||
| 1084 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { | ||
| 1085 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
| 1086 | u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | ||
| 1087 | |||
| 1088 | cpu_to_apicid[cpu] = apicid; | ||
| 1089 | bios_cpu_apicid[cpu] = apicid; | ||
| 1090 | } else { | ||
| 1091 | per_cpu(x86_cpu_to_apicid, cpu) = apicid; | ||
| 1092 | per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | cpu_set(cpu, cpu_possible_map); | ||
| 1096 | cpu_set(cpu, cpu_present_map); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | /* | ||
| 1100 | * Power management | ||
| 1101 | */ | ||
| 1102 | #ifdef CONFIG_PM | ||
| 1103 | |||
| 1104 | static struct { | ||
| 1105 | /* 'active' is true if the local APIC was enabled by us and | ||
| 1106 | not the BIOS; this signifies that we are also responsible | ||
| 1107 | for disabling it before entering apm/acpi suspend */ | ||
| 1108 | int active; | ||
| 1109 | /* r/w apic fields */ | ||
| 1110 | unsigned int apic_id; | ||
| 1111 | unsigned int apic_taskpri; | ||
| 1112 | unsigned int apic_ldr; | ||
| 1113 | unsigned int apic_dfr; | ||
| 1114 | unsigned int apic_spiv; | ||
| 1115 | unsigned int apic_lvtt; | ||
| 1116 | unsigned int apic_lvtpc; | ||
| 1117 | unsigned int apic_lvt0; | ||
| 1118 | unsigned int apic_lvt1; | ||
| 1119 | unsigned int apic_lvterr; | ||
| 1120 | unsigned int apic_tmict; | ||
| 1121 | unsigned int apic_tdcr; | ||
| 1122 | unsigned int apic_thmr; | ||
| 1123 | } apic_pm_state; | ||
| 1124 | |||
| 1125 | static int lapic_suspend(struct sys_device *dev, pm_message_t state) | ||
| 1126 | { | ||
| 1127 | unsigned long flags; | ||
| 1128 | int maxlvt; | ||
| 1129 | |||
| 1130 | if (!apic_pm_state.active) | ||
| 1131 | return 0; | ||
| 1132 | |||
| 1133 | maxlvt = lapic_get_maxlvt(); | ||
| 1134 | |||
| 1135 | apic_pm_state.apic_id = read_apic_id(); | ||
| 1136 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); | ||
| 1137 | apic_pm_state.apic_ldr = apic_read(APIC_LDR); | ||
| 1138 | apic_pm_state.apic_dfr = apic_read(APIC_DFR); | ||
| 1139 | apic_pm_state.apic_spiv = apic_read(APIC_SPIV); | ||
| 1140 | apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); | ||
| 1141 | if (maxlvt >= 4) | ||
| 1142 | apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); | ||
| 1143 | apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); | ||
| 1144 | apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); | ||
| 1145 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | ||
| 1146 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | ||
| 1147 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | ||
| 1148 | #ifdef CONFIG_X86_MCE_INTEL | ||
| 1149 | if (maxlvt >= 5) | ||
| 1150 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | ||
| 1151 | #endif | ||
| 1152 | local_irq_save(flags); | ||
| 1153 | disable_local_APIC(); | ||
| 1154 | local_irq_restore(flags); | ||
| 1155 | return 0; | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | static int lapic_resume(struct sys_device *dev) | ||
| 1159 | { | ||
| 1160 | unsigned int l, h; | ||
| 1161 | unsigned long flags; | ||
| 1162 | int maxlvt; | ||
| 1163 | |||
| 1164 | if (!apic_pm_state.active) | ||
| 1165 | return 0; | ||
| 1166 | |||
| 1167 | maxlvt = lapic_get_maxlvt(); | ||
| 1168 | |||
| 1169 | local_irq_save(flags); | ||
| 1170 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
| 1171 | l &= ~MSR_IA32_APICBASE_BASE; | ||
| 1172 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
| 1173 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
| 1174 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | ||
| 1175 | apic_write(APIC_ID, apic_pm_state.apic_id); | ||
| 1176 | apic_write(APIC_DFR, apic_pm_state.apic_dfr); | ||
| 1177 | apic_write(APIC_LDR, apic_pm_state.apic_ldr); | ||
| 1178 | apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); | ||
| 1179 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | ||
| 1180 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | ||
| 1181 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | ||
| 1182 | #ifdef CONFIG_X86_MCE_INTEL | ||
| 1183 | if (maxlvt >= 5) | ||
| 1184 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | ||
| 1185 | #endif | ||
| 1186 | if (maxlvt >= 4) | ||
| 1187 | apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); | ||
| 1188 | apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); | ||
| 1189 | apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); | ||
| 1190 | apic_write(APIC_TMICT, apic_pm_state.apic_tmict); | ||
| 1191 | apic_write(APIC_ESR, 0); | ||
| 1192 | apic_read(APIC_ESR); | ||
| 1193 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | ||
| 1194 | apic_write(APIC_ESR, 0); | ||
| 1195 | apic_read(APIC_ESR); | ||
| 1196 | local_irq_restore(flags); | ||
| 1197 | return 0; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | static struct sysdev_class lapic_sysclass = { | ||
| 1201 | .name = "lapic", | ||
| 1202 | .resume = lapic_resume, | ||
| 1203 | .suspend = lapic_suspend, | ||
| 1204 | }; | ||
| 1205 | |||
| 1206 | static struct sys_device device_lapic = { | ||
| 1207 | .id = 0, | ||
| 1208 | .cls = &lapic_sysclass, | ||
| 1209 | }; | ||
| 1210 | |||
| 1211 | static void __cpuinit apic_pm_activate(void) | ||
| 1212 | { | ||
| 1213 | apic_pm_state.active = 1; | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | static int __init init_lapic_sysfs(void) | ||
| 1217 | { | ||
| 1218 | int error; | ||
| 1219 | |||
| 1220 | if (!cpu_has_apic) | ||
| 1221 | return 0; | ||
| 1222 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | ||
| 1223 | |||
| 1224 | error = sysdev_class_register(&lapic_sysclass); | ||
| 1225 | if (!error) | ||
| 1226 | error = sysdev_register(&device_lapic); | ||
| 1227 | return error; | ||
| 1228 | } | ||
| 1229 | device_initcall(init_lapic_sysfs); | ||
| 1230 | |||
| 1231 | #else /* CONFIG_PM */ | ||
| 1232 | |||
| 1233 | static void apic_pm_activate(void) { } | ||
| 1234 | |||
| 1235 | #endif /* CONFIG_PM */ | ||
| 1236 | |||
| 1237 | /* | ||
| 1238 | * apic_is_clustered_box() -- Check if we can expect good TSC | ||
| 1239 | * | ||
| 1240 | * Thus far, the major user of this is IBM's Summit2 series: | ||
| 1241 | * | ||
| 1242 | * Clustered boxes may have unsynced TSC problems if they are | ||
| 1243 | * multi-chassis. Use available data to take a good guess. | ||
| 1244 | * If in doubt, go HPET. | ||
| 1245 | */ | ||
| 1246 | __cpuinit int apic_is_clustered_box(void) | ||
| 1247 | { | ||
| 1248 | int i, clusters, zeros; | ||
| 1249 | unsigned id; | ||
| 1250 | u16 *bios_cpu_apicid; | ||
| 1251 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); | ||
| 1252 | |||
| 1253 | /* | ||
| 1254 | * there is not this kind of box with AMD CPU yet. | ||
| 1255 | * Some AMD box with quadcore cpu and 8 sockets apicid | ||
| 1256 | * will be [4, 0x23] or [8, 0x27] could be thought to | ||
| 1257 | * vsmp box still need checking... | ||
| 1258 | */ | ||
| 1259 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) | ||
| 1260 | return 0; | ||
| 1261 | |||
| 1262 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | ||
| 1263 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | ||
| 1264 | |||
| 1265 | for (i = 0; i < NR_CPUS; i++) { | ||
| 1266 | /* are we being called early in kernel startup? */ | ||
| 1267 | if (bios_cpu_apicid) { | ||
| 1268 | id = bios_cpu_apicid[i]; | ||
| 1269 | } | ||
| 1270 | else if (i < nr_cpu_ids) { | ||
| 1271 | if (cpu_present(i)) | ||
| 1272 | id = per_cpu(x86_bios_cpu_apicid, i); | ||
| 1273 | else | ||
| 1274 | continue; | ||
| 1275 | } | ||
| 1276 | else | ||
| 1277 | break; | ||
| 1278 | |||
| 1279 | if (id != BAD_APICID) | ||
| 1280 | __set_bit(APIC_CLUSTERID(id), clustermap); | ||
| 1281 | } | ||
| 1282 | |||
| 1283 | /* Problem: Partially populated chassis may not have CPUs in some of | ||
| 1284 | * the APIC clusters they have been allocated. Only present CPUs have | ||
| 1285 | * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. | ||
| 1286 | * Since clusters are allocated sequentially, count zeros only if | ||
| 1287 | * they are bounded by ones. | ||
| 1288 | */ | ||
| 1289 | clusters = 0; | ||
| 1290 | zeros = 0; | ||
| 1291 | for (i = 0; i < NUM_APIC_CLUSTERS; i++) { | ||
| 1292 | if (test_bit(i, clustermap)) { | ||
| 1293 | clusters += 1 + zeros; | ||
| 1294 | zeros = 0; | ||
| 1295 | } else | ||
| 1296 | ++zeros; | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are | ||
| 1300 | * not guaranteed to be synced between boards | ||
| 1301 | */ | ||
| 1302 | if (is_vsmp_box() && clusters > 1) | ||
| 1303 | return 1; | ||
| 1304 | |||
| 1305 | /* | ||
| 1306 | * If clusters > 2, then should be multi-chassis. | ||
| 1307 | * May have to revisit this when multi-core + hyperthreaded CPUs come | ||
| 1308 | * out, but AFAIK this will work even for them. | ||
| 1309 | */ | ||
| 1310 | return (clusters > 2); | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | /* | ||
| 1314 | * APIC command line parameters | ||
| 1315 | */ | ||
| 1316 | static int __init apic_set_verbosity(char *str) | ||
| 1317 | { | ||
| 1318 | if (str == NULL) { | ||
| 1319 | skip_ioapic_setup = 0; | ||
| 1320 | ioapic_force = 1; | ||
| 1321 | return 0; | ||
| 1322 | } | ||
| 1323 | if (strcmp("debug", str) == 0) | ||
| 1324 | apic_verbosity = APIC_DEBUG; | ||
| 1325 | else if (strcmp("verbose", str) == 0) | ||
| 1326 | apic_verbosity = APIC_VERBOSE; | ||
| 1327 | else { | ||
| 1328 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
| 1329 | " use apic=verbose or apic=debug\n", str); | ||
| 1330 | return -EINVAL; | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | return 0; | ||
| 1334 | } | ||
| 1335 | early_param("apic", apic_set_verbosity); | ||
| 1336 | |||
| 1337 | static __init int setup_disableapic(char *str) | ||
| 1338 | { | ||
| 1339 | disable_apic = 1; | ||
| 1340 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
| 1341 | return 0; | ||
| 1342 | } | ||
| 1343 | early_param("disableapic", setup_disableapic); | ||
| 1344 | |||
| 1345 | /* same as disableapic, for compatibility */ | ||
| 1346 | static __init int setup_nolapic(char *str) | ||
| 1347 | { | ||
| 1348 | return setup_disableapic(str); | ||
| 1349 | } | ||
| 1350 | early_param("nolapic", setup_nolapic); | ||
| 1351 | |||
| 1352 | static int __init parse_lapic_timer_c2_ok(char *arg) | ||
| 1353 | { | ||
| 1354 | local_apic_timer_c2_ok = 1; | ||
| 1355 | return 0; | ||
| 1356 | } | ||
| 1357 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | ||
| 1358 | |||
| 1359 | static __init int setup_noapictimer(char *str) | ||
| 1360 | { | ||
| 1361 | if (str[0] != ' ' && str[0] != 0) | ||
| 1362 | return 0; | ||
| 1363 | disable_apic_timer = 1; | ||
| 1364 | return 1; | ||
| 1365 | } | ||
| 1366 | __setup("noapictimer", setup_noapictimer); | ||
| 1367 | |||
| 1368 | static __init int setup_apicpmtimer(char *s) | ||
| 1369 | { | ||
| 1370 | apic_calibrate_pmtmr = 1; | ||
| 1371 | notsc_setup(NULL); | ||
| 1372 | return 0; | ||
| 1373 | } | ||
| 1374 | __setup("apicpmtimer", setup_apicpmtimer); | ||
| 1375 | |||
| 1376 | static int __init lapic_insert_resource(void) | ||
| 1377 | { | ||
| 1378 | if (!apic_phys) | ||
| 1379 | return -1; | ||
| 1380 | |||
| 1381 | /* Put local APIC into the resource map. */ | ||
| 1382 | lapic_resource.start = apic_phys; | ||
| 1383 | lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; | ||
| 1384 | insert_resource(&iomem_resource, &lapic_resource); | ||
| 1385 | |||
| 1386 | return 0; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | /* | ||
| 1390 | * need call insert after e820_reserve_resources() | ||
| 1391 | * that is using request_resource | ||
| 1392 | */ | ||
| 1393 | late_initcall(lapic_insert_resource); | ||
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9b441331e9..5145a6e72bbb 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -219,7 +219,6 @@ | |||
| 219 | #include <linux/time.h> | 219 | #include <linux/time.h> |
| 220 | #include <linux/sched.h> | 220 | #include <linux/sched.h> |
| 221 | #include <linux/pm.h> | 221 | #include <linux/pm.h> |
| 222 | #include <linux/pm_legacy.h> | ||
| 223 | #include <linux/capability.h> | 222 | #include <linux/capability.h> |
| 224 | #include <linux/device.h> | 223 | #include <linux/device.h> |
| 225 | #include <linux/kernel.h> | 224 | #include <linux/kernel.h> |
| @@ -229,12 +228,12 @@ | |||
| 229 | #include <linux/suspend.h> | 228 | #include <linux/suspend.h> |
| 230 | #include <linux/kthread.h> | 229 | #include <linux/kthread.h> |
| 231 | #include <linux/jiffies.h> | 230 | #include <linux/jiffies.h> |
| 232 | #include <linux/smp_lock.h> | ||
| 233 | 231 | ||
| 234 | #include <asm/system.h> | 232 | #include <asm/system.h> |
| 235 | #include <asm/uaccess.h> | 233 | #include <asm/uaccess.h> |
| 236 | #include <asm/desc.h> | 234 | #include <asm/desc.h> |
| 237 | #include <asm/i8253.h> | 235 | #include <asm/i8253.h> |
| 236 | #include <asm/olpc.h> | ||
| 238 | #include <asm/paravirt.h> | 237 | #include <asm/paravirt.h> |
| 239 | #include <asm/reboot.h> | 238 | #include <asm/reboot.h> |
| 240 | 239 | ||
| @@ -2218,7 +2217,7 @@ static int __init apm_init(void) | |||
| 2218 | 2217 | ||
| 2219 | dmi_check_system(apm_dmi_table); | 2218 | dmi_check_system(apm_dmi_table); |
| 2220 | 2219 | ||
| 2221 | if (apm_info.bios.version == 0 || paravirt_enabled()) { | 2220 | if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { |
| 2222 | printk(KERN_INFO "apm: BIOS not found.\n"); | 2221 | printk(KERN_INFO "apm: BIOS not found.\n"); |
| 2223 | return -ENODEV; | 2222 | return -ENODEV; |
| 2224 | } | 2223 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bacf5deeec2d..7fcf63d22f8b 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
| @@ -18,9 +18,11 @@ | |||
| 18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
| 19 | #include <asm/bootparam.h> | 19 | #include <asm/bootparam.h> |
| 20 | 20 | ||
| 21 | #include <xen/interface/xen.h> | ||
| 22 | |||
| 21 | #define __NO_STUBS 1 | 23 | #define __NO_STUBS 1 |
| 22 | #undef __SYSCALL | 24 | #undef __SYSCALL |
| 23 | #undef _ASM_X86_64_UNISTD_H_ | 25 | #undef _ASM_X86_UNISTD_64_H |
| 24 | #define __SYSCALL(nr, sym) [nr] = 1, | 26 | #define __SYSCALL(nr, sym) [nr] = 1, |
| 25 | static char syscalls[] = { | 27 | static char syscalls[] = { |
| 26 | #include <asm/unistd.h> | 28 | #include <asm/unistd.h> |
| @@ -131,5 +133,14 @@ int main(void) | |||
| 131 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | 133 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); |
| 132 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 134 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
| 133 | OFFSET(BP_version, boot_params, hdr.version); | 135 | OFFSET(BP_version, boot_params, hdr.version); |
| 136 | |||
| 137 | BLANK(); | ||
| 138 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | ||
| 139 | #ifdef CONFIG_XEN | ||
| 140 | BLANK(); | ||
| 141 | OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); | ||
| 142 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | ||
| 143 | #undef ENTRY | ||
| 144 | #endif | ||
| 134 | return 0; | 145 | return 0; |
| 135 | } | 146 | } |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c new file mode 100644 index 000000000000..f0dfe6f17e7e --- /dev/null +++ b/arch/x86/kernel/bios_uv.c | |||
| @@ -0,0 +1,141 @@ | |||
| 1 | /* | ||
| 2 | * BIOS run time interface routines. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 17 | * | ||
| 18 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
| 19 | * Copyright (c) Russ Anderson | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/efi.h> | ||
| 23 | #include <asm/efi.h> | ||
| 24 | #include <linux/io.h> | ||
| 25 | #include <asm/uv/bios.h> | ||
| 26 | #include <asm/uv/uv_hub.h> | ||
| 27 | |||
| 28 | struct uv_systab uv_systab; | ||
| 29 | |||
| 30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | ||
| 31 | { | ||
| 32 | struct uv_systab *tab = &uv_systab; | ||
| 33 | |||
| 34 | if (!tab->function) | ||
| 35 | /* | ||
| 36 | * BIOS does not support UV systab | ||
| 37 | */ | ||
| 38 | return BIOS_STATUS_UNIMPLEMENTED; | ||
| 39 | |||
| 40 | return efi_call6((void *)__va(tab->function), | ||
| 41 | (u64)which, a1, a2, a3, a4, a5); | ||
| 42 | } | ||
| 43 | |||
| 44 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | ||
| 45 | u64 a4, u64 a5) | ||
| 46 | { | ||
| 47 | unsigned long bios_flags; | ||
| 48 | s64 ret; | ||
| 49 | |||
| 50 | local_irq_save(bios_flags); | ||
| 51 | ret = uv_bios_call(which, a1, a2, a3, a4, a5); | ||
| 52 | local_irq_restore(bios_flags); | ||
| 53 | |||
| 54 | return ret; | ||
| 55 | } | ||
| 56 | |||
| 57 | s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | ||
| 58 | u64 a4, u64 a5) | ||
| 59 | { | ||
| 60 | s64 ret; | ||
| 61 | |||
| 62 | preempt_disable(); | ||
| 63 | ret = uv_bios_call(which, a1, a2, a3, a4, a5); | ||
| 64 | preempt_enable(); | ||
| 65 | |||
| 66 | return ret; | ||
| 67 | } | ||
| 68 | |||
| 69 | |||
| 70 | long sn_partition_id; | ||
| 71 | EXPORT_SYMBOL_GPL(sn_partition_id); | ||
| 72 | long uv_coherency_id; | ||
| 73 | EXPORT_SYMBOL_GPL(uv_coherency_id); | ||
| 74 | long uv_region_size; | ||
| 75 | EXPORT_SYMBOL_GPL(uv_region_size); | ||
| 76 | int uv_type; | ||
| 77 | |||
| 78 | |||
| 79 | s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | ||
| 80 | long *region) | ||
| 81 | { | ||
| 82 | s64 ret; | ||
| 83 | u64 v0, v1; | ||
| 84 | union partition_info_u part; | ||
| 85 | |||
| 86 | ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, | ||
| 87 | (u64)(&v0), (u64)(&v1), 0, 0); | ||
| 88 | if (ret != BIOS_STATUS_SUCCESS) | ||
| 89 | return ret; | ||
| 90 | |||
| 91 | part.val = v0; | ||
| 92 | if (uvtype) | ||
| 93 | *uvtype = part.hub_version; | ||
| 94 | if (partid) | ||
| 95 | *partid = part.partition_id; | ||
| 96 | if (coher) | ||
| 97 | *coher = part.coherence_id; | ||
| 98 | if (region) | ||
| 99 | *region = part.region_size; | ||
| 100 | return ret; | ||
| 101 | } | ||
| 102 | |||
| 103 | |||
| 104 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | ||
| 105 | { | ||
| 106 | return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, | ||
| 107 | (u64)ticks_per_second, 0, 0, 0); | ||
| 108 | } | ||
| 109 | EXPORT_SYMBOL_GPL(uv_bios_freq_base); | ||
| 110 | |||
| 111 | |||
| 112 | #ifdef CONFIG_EFI | ||
| 113 | void uv_bios_init(void) | ||
| 114 | { | ||
| 115 | struct uv_systab *tab; | ||
| 116 | |||
| 117 | if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || | ||
| 118 | (efi.uv_systab == (unsigned long)NULL)) { | ||
| 119 | printk(KERN_CRIT "No EFI UV System Table.\n"); | ||
| 120 | uv_systab.function = (unsigned long)NULL; | ||
| 121 | return; | ||
| 122 | } | ||
| 123 | |||
| 124 | tab = (struct uv_systab *)ioremap(efi.uv_systab, | ||
| 125 | sizeof(struct uv_systab)); | ||
| 126 | if (strncmp(tab->signature, "UVST", 4) != 0) | ||
| 127 | printk(KERN_ERR "bad signature in UV system table!"); | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Copy table to permanent spot for later use. | ||
| 131 | */ | ||
| 132 | memcpy(&uv_systab, tab, sizeof(struct uv_systab)); | ||
| 133 | iounmap(tab); | ||
| 134 | |||
| 135 | printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision); | ||
| 136 | } | ||
| 137 | #else /* !CONFIG_EFI */ | ||
| 138 | |||
| 139 | void uv_bios_init(void) { } | ||
| 140 | #endif | ||
| 141 | |||
diff --git a/arch/x86/kernel/cpu/.gitignore b/arch/x86/kernel/cpu/.gitignore new file mode 100644 index 000000000000..667df55a4399 --- /dev/null +++ b/arch/x86/kernel/cpu/.gitignore | |||
| @@ -0,0 +1 @@ | |||
| capflags.c | |||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee76eaad3001..82ec6075c057 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -3,22 +3,30 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
| 6 | obj-y += proc.o feature_names.o | 6 | obj-y += proc.o capflags.o powerflags.o common.o |
| 7 | 7 | ||
| 8 | obj-$(CONFIG_X86_32) += common.o bugs.o | 8 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
| 9 | obj-$(CONFIG_X86_64) += common_64.o bugs_64.o | 9 | obj-$(CONFIG_X86_64) += bugs_64.o |
| 10 | obj-$(CONFIG_X86_32) += amd.o | 10 | |
| 11 | obj-$(CONFIG_X86_64) += amd_64.o | 11 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
| 12 | obj-$(CONFIG_X86_32) += cyrix.o | 12 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o |
| 13 | obj-$(CONFIG_X86_32) += centaur.o | 13 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o |
| 14 | obj-$(CONFIG_X86_64) += centaur_64.o | 14 | obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o |
| 15 | obj-$(CONFIG_X86_32) += transmeta.o | 15 | obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o |
| 16 | obj-$(CONFIG_X86_32) += intel.o | 16 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
| 17 | obj-$(CONFIG_X86_64) += intel_64.o | 17 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
| 18 | obj-$(CONFIG_X86_32) += umc.o | ||
| 19 | 18 | ||
| 20 | obj-$(CONFIG_X86_MCE) += mcheck/ | 19 | obj-$(CONFIG_X86_MCE) += mcheck/ |
| 21 | obj-$(CONFIG_MTRR) += mtrr/ | 20 | obj-$(CONFIG_MTRR) += mtrr/ |
| 22 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | 21 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ |
| 23 | 22 | ||
| 24 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 23 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o |
| 24 | |||
| 25 | quiet_cmd_mkcapflags = MKCAP $@ | ||
| 26 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ | ||
| 27 | |||
| 28 | cpufeature = $(src)/../../include/asm/cpufeature.h | ||
| 29 | |||
| 30 | targets += capflags.c | ||
| 31 | $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE | ||
| 32 | $(call if_changed,mkcapflags) | ||
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a6072..0d9c993aa93e 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | #include <asm/pat.h> | 7 | #include <asm/pat.h> |
| 8 | #include <asm/processor.h> | 8 | #include <asm/processor.h> |
| 9 | 9 | ||
| 10 | #include <mach_apic.h> | ||
| 11 | |||
| 10 | struct cpuid_bit { | 12 | struct cpuid_bit { |
| 11 | u16 feature; | 13 | u16 feature; |
| 12 | u8 reg; | 14 | u8 reg; |
| @@ -48,6 +50,92 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 48 | } | 50 | } |
| 49 | } | 51 | } |
| 50 | 52 | ||
| 53 | /* leaf 0xb SMT level */ | ||
| 54 | #define SMT_LEVEL 0 | ||
| 55 | |||
| 56 | /* leaf 0xb sub-leaf types */ | ||
| 57 | #define INVALID_TYPE 0 | ||
| 58 | #define SMT_TYPE 1 | ||
| 59 | #define CORE_TYPE 2 | ||
| 60 | |||
| 61 | #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) | ||
| 62 | #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) | ||
| 63 | #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Check for extended topology enumeration cpuid leaf 0xb and if it | ||
| 67 | * exists, use it for populating initial_apicid and cpu topology | ||
| 68 | * detection. | ||
| 69 | */ | ||
| 70 | void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | ||
| 71 | { | ||
| 72 | #ifdef CONFIG_SMP | ||
| 73 | unsigned int eax, ebx, ecx, edx, sub_index; | ||
| 74 | unsigned int ht_mask_width, core_plus_mask_width; | ||
| 75 | unsigned int core_select_mask, core_level_siblings; | ||
| 76 | |||
| 77 | if (c->cpuid_level < 0xb) | ||
| 78 | return; | ||
| 79 | |||
| 80 | cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); | ||
| 81 | |||
| 82 | /* | ||
| 83 | * check if the cpuid leaf 0xb is actually implemented. | ||
| 84 | */ | ||
| 85 | if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) | ||
| 86 | return; | ||
| 87 | |||
| 88 | set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); | ||
| 89 | |||
| 90 | /* | ||
| 91 | * initial apic id, which also represents 32-bit extended x2apic id. | ||
| 92 | */ | ||
| 93 | c->initial_apicid = edx; | ||
| 94 | |||
| 95 | /* | ||
| 96 | * Populate HT related information from sub-leaf level 0. | ||
| 97 | */ | ||
| 98 | core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); | ||
| 99 | core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); | ||
| 100 | |||
| 101 | sub_index = 1; | ||
| 102 | do { | ||
| 103 | cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); | ||
| 104 | |||
| 105 | /* | ||
| 106 | * Check for the Core type in the implemented sub leaves. | ||
| 107 | */ | ||
| 108 | if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { | ||
| 109 | core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); | ||
| 110 | core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | |||
| 114 | sub_index++; | ||
| 115 | } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); | ||
| 116 | |||
| 117 | core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; | ||
| 118 | |||
| 119 | #ifdef CONFIG_X86_32 | ||
| 120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) | ||
| 121 | & core_select_mask; | ||
| 122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); | ||
| 123 | #else | ||
| 124 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; | ||
| 125 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); | ||
| 126 | #endif | ||
| 127 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | ||
| 128 | |||
| 129 | |||
| 130 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | ||
| 131 | c->phys_proc_id); | ||
| 132 | if (c->x86_max_cores > 1) | ||
| 133 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | ||
| 134 | c->cpu_core_id); | ||
| 135 | return; | ||
| 136 | #endif | ||
| 137 | } | ||
| 138 | |||
| 51 | #ifdef CONFIG_X86_PAT | 139 | #ifdef CONFIG_X86_PAT |
| 52 | void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) | 140 | void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) |
| 53 | { | 141 | { |
| @@ -56,9 +144,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) | |||
| 56 | 144 | ||
| 57 | switch (c->x86_vendor) { | 145 | switch (c->x86_vendor) { |
| 58 | case X86_VENDOR_INTEL: | 146 | case X86_VENDOR_INTEL: |
| 59 | if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) | 147 | /* |
| 148 | * There is a known erratum on Pentium III and Core Solo | ||
| 149 | * and Core Duo CPUs. | ||
| 150 | * " Page with PAT set to WC while associated MTRR is UC | ||
| 151 | * may consolidate to UC " | ||
| 152 | * Because of this erratum, it is better to stick with | ||
| 153 | * setting WC in MTRR rather than using PAT on these CPUs. | ||
| 154 | * | ||
| 155 | * Enable PAT WC only on P4, Core 2 or later CPUs. | ||
| 156 | */ | ||
| 157 | if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) | ||
| 60 | return; | 158 | return; |
| 61 | break; | 159 | |
| 160 | pat_disable("PAT WC disabled due to known CPU erratum."); | ||
| 161 | return; | ||
| 162 | |||
| 62 | case X86_VENDOR_AMD: | 163 | case X86_VENDOR_AMD: |
| 63 | case X86_VENDOR_CENTAUR: | 164 | case X86_VENDOR_CENTAUR: |
| 64 | case X86_VENDOR_TRANSMETA: | 165 | case X86_VENDOR_TRANSMETA: |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 81a07ca65d44..8f1e31db2ad5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -1,13 +1,22 @@ | |||
| 1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
| 2 | #include <linux/bitops.h> | 2 | #include <linux/bitops.h> |
| 3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
| 4 | |||
| 4 | #include <asm/io.h> | 5 | #include <asm/io.h> |
| 5 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
| 6 | #include <asm/apic.h> | 7 | #include <asm/apic.h> |
| 7 | 8 | ||
| 9 | #ifdef CONFIG_X86_64 | ||
| 10 | # include <asm/numa_64.h> | ||
| 11 | # include <asm/mmconfig.h> | ||
| 12 | # include <asm/cacheflush.h> | ||
| 13 | #endif | ||
| 14 | |||
| 8 | #include <mach_apic.h> | 15 | #include <mach_apic.h> |
| 16 | |||
| 9 | #include "cpu.h" | 17 | #include "cpu.h" |
| 10 | 18 | ||
| 19 | #ifdef CONFIG_X86_32 | ||
| 11 | /* | 20 | /* |
| 12 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause | 21 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause |
| 13 | * misexecution of code under Linux. Owners of such processors should | 22 | * misexecution of code under Linux. Owners of such processors should |
| @@ -24,23 +33,273 @@ | |||
| 24 | extern void vide(void); | 33 | extern void vide(void); |
| 25 | __asm__(".align 4\nvide: ret"); | 34 | __asm__(".align 4\nvide: ret"); |
| 26 | 35 | ||
| 27 | int force_mwait __cpuinitdata; | 36 | static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) |
| 28 | |||
| 29 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
| 30 | { | 37 | { |
| 31 | if (cpuid_eax(0x80000000) >= 0x80000007) { | 38 | /* |
| 32 | c->x86_power = cpuid_edx(0x80000007); | 39 | * General Systems BIOSen alias the cpu frequency registers |
| 33 | if (c->x86_power & (1<<8)) | 40 | * of the Elan at 0x000df000. Unfortuantly, one of the Linux |
| 34 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 41 | * drivers subsequently pokes it, and changes the CPU speed. |
| 42 | * Workaround : Remove the unneeded alias. | ||
| 43 | */ | ||
| 44 | #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ | ||
| 45 | #define CBAR_ENB (0x80000000) | ||
| 46 | #define CBAR_KEY (0X000000CB) | ||
| 47 | if (c->x86_model == 9 || c->x86_model == 10) { | ||
| 48 | if (inl (CBAR) & CBAR_ENB) | ||
| 49 | outl (0 | CBAR_KEY, CBAR); | ||
| 35 | } | 50 | } |
| 36 | } | 51 | } |
| 37 | 52 | ||
| 38 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 53 | |
| 54 | static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | ||
| 39 | { | 55 | { |
| 40 | u32 l, h; | 56 | u32 l, h; |
| 41 | int mbytes = num_physpages >> (20-PAGE_SHIFT); | 57 | int mbytes = num_physpages >> (20-PAGE_SHIFT); |
| 42 | int r; | ||
| 43 | 58 | ||
| 59 | if (c->x86_model < 6) { | ||
| 60 | /* Based on AMD doc 20734R - June 2000 */ | ||
| 61 | if (c->x86_model == 0) { | ||
| 62 | clear_cpu_cap(c, X86_FEATURE_APIC); | ||
| 63 | set_cpu_cap(c, X86_FEATURE_PGE); | ||
| 64 | } | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | |||
| 68 | if (c->x86_model == 6 && c->x86_mask == 1) { | ||
| 69 | const int K6_BUG_LOOP = 1000000; | ||
| 70 | int n; | ||
| 71 | void (*f_vide)(void); | ||
| 72 | unsigned long d, d2; | ||
| 73 | |||
| 74 | printk(KERN_INFO "AMD K6 stepping B detected - "); | ||
| 75 | |||
| 76 | /* | ||
| 77 | * It looks like AMD fixed the 2.6.2 bug and improved indirect | ||
| 78 | * calls at the same time. | ||
| 79 | */ | ||
| 80 | |||
| 81 | n = K6_BUG_LOOP; | ||
| 82 | f_vide = vide; | ||
| 83 | rdtscl(d); | ||
| 84 | while (n--) | ||
| 85 | f_vide(); | ||
| 86 | rdtscl(d2); | ||
| 87 | d = d2-d; | ||
| 88 | |||
| 89 | if (d > 20*K6_BUG_LOOP) | ||
| 90 | printk("system stability may be impaired when more than 32 MB are used.\n"); | ||
| 91 | else | ||
| 92 | printk("probably OK (after B9730xxxx).\n"); | ||
| 93 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | ||
| 94 | } | ||
| 95 | |||
| 96 | /* K6 with old style WHCR */ | ||
| 97 | if (c->x86_model < 8 || | ||
| 98 | (c->x86_model == 8 && c->x86_mask < 8)) { | ||
| 99 | /* We can only write allocate on the low 508Mb */ | ||
| 100 | if (mbytes > 508) | ||
| 101 | mbytes = 508; | ||
| 102 | |||
| 103 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 104 | if ((l&0x0000FFFF) == 0) { | ||
| 105 | unsigned long flags; | ||
| 106 | l = (1<<0)|((mbytes/4)<<1); | ||
| 107 | local_irq_save(flags); | ||
| 108 | wbinvd(); | ||
| 109 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 110 | local_irq_restore(flags); | ||
| 111 | printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", | ||
| 112 | mbytes); | ||
| 113 | } | ||
| 114 | return; | ||
| 115 | } | ||
| 116 | |||
| 117 | if ((c->x86_model == 8 && c->x86_mask > 7) || | ||
| 118 | c->x86_model == 9 || c->x86_model == 13) { | ||
| 119 | /* The more serious chips .. */ | ||
| 120 | |||
| 121 | if (mbytes > 4092) | ||
| 122 | mbytes = 4092; | ||
| 123 | |||
| 124 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 125 | if ((l&0xFFFF0000) == 0) { | ||
| 126 | unsigned long flags; | ||
| 127 | l = ((mbytes>>2)<<22)|(1<<16); | ||
| 128 | local_irq_save(flags); | ||
| 129 | wbinvd(); | ||
| 130 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 131 | local_irq_restore(flags); | ||
| 132 | printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", | ||
| 133 | mbytes); | ||
| 134 | } | ||
| 135 | |||
| 136 | return; | ||
| 137 | } | ||
| 138 | |||
| 139 | if (c->x86_model == 10) { | ||
| 140 | /* AMD Geode LX is model 10 */ | ||
| 141 | /* placeholder for any needed mods */ | ||
| 142 | return; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | ||
| 147 | { | ||
| 148 | u32 l, h; | ||
| 149 | |||
| 150 | /* | ||
| 151 | * Bit 15 of Athlon specific MSR 15, needs to be 0 | ||
| 152 | * to enable SSE on Palomino/Morgan/Barton CPU's. | ||
| 153 | * If the BIOS didn't enable it already, enable it here. | ||
| 154 | */ | ||
| 155 | if (c->x86_model >= 6 && c->x86_model <= 10) { | ||
| 156 | if (!cpu_has(c, X86_FEATURE_XMM)) { | ||
| 157 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); | ||
| 158 | rdmsr(MSR_K7_HWCR, l, h); | ||
| 159 | l &= ~0x00008000; | ||
| 160 | wrmsr(MSR_K7_HWCR, l, h); | ||
| 161 | set_cpu_cap(c, X86_FEATURE_XMM); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 166 | * It's been determined by AMD that Athlons since model 8 stepping 1 | ||
| 167 | * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx | ||
| 168 | * As per AMD technical note 27212 0.2 | ||
| 169 | */ | ||
| 170 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { | ||
| 171 | rdmsr(MSR_K7_CLK_CTL, l, h); | ||
| 172 | if ((l & 0xfff00000) != 0x20000000) { | ||
| 173 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | ||
| 174 | ((l & 0x000fffff)|0x20000000)); | ||
| 175 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | set_cpu_cap(c, X86_FEATURE_K7); | ||
| 180 | } | ||
| 181 | #endif | ||
| 182 | |||
| 183 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 184 | static int __cpuinit nearby_node(int apicid) | ||
| 185 | { | ||
| 186 | int i, node; | ||
| 187 | |||
| 188 | for (i = apicid - 1; i >= 0; i--) { | ||
| 189 | node = apicid_to_node[i]; | ||
| 190 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 191 | return node; | ||
| 192 | } | ||
| 193 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | ||
| 194 | node = apicid_to_node[i]; | ||
| 195 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 196 | return node; | ||
| 197 | } | ||
| 198 | return first_node(node_online_map); /* Shouldn't happen */ | ||
| 199 | } | ||
| 200 | #endif | ||
| 201 | |||
| 202 | /* | ||
| 203 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | ||
| 204 | * Assumes number of cores is a power of two. | ||
| 205 | */ | ||
| 206 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | ||
| 207 | { | ||
| 208 | #ifdef CONFIG_X86_HT | ||
| 209 | unsigned bits; | ||
| 210 | |||
| 211 | bits = c->x86_coreid_bits; | ||
| 212 | |||
| 213 | /* Low order bits define the core id (index of core in socket) */ | ||
| 214 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | ||
| 215 | /* Convert the initial APIC ID into the socket ID */ | ||
| 216 | c->phys_proc_id = c->initial_apicid >> bits; | ||
| 217 | #endif | ||
| 218 | } | ||
| 219 | |||
| 220 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | ||
| 221 | { | ||
| 222 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 223 | int cpu = smp_processor_id(); | ||
| 224 | int node; | ||
| 225 | unsigned apicid = hard_smp_processor_id(); | ||
| 226 | |||
| 227 | node = c->phys_proc_id; | ||
| 228 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
| 229 | node = apicid_to_node[apicid]; | ||
| 230 | if (!node_online(node)) { | ||
| 231 | /* Two possibilities here: | ||
| 232 | - The CPU is missing memory and no node was created. | ||
| 233 | In that case try picking one from a nearby CPU | ||
| 234 | - The APIC IDs differ from the HyperTransport node IDs | ||
| 235 | which the K8 northbridge parsing fills in. | ||
| 236 | Assume they are all increased by a constant offset, | ||
| 237 | but in the same order as the HT nodeids. | ||
| 238 | If that doesn't result in a usable node fall back to the | ||
| 239 | path for the previous case. */ | ||
| 240 | |||
| 241 | int ht_nodeid = c->initial_apicid; | ||
| 242 | |||
| 243 | if (ht_nodeid >= 0 && | ||
| 244 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
| 245 | node = apicid_to_node[ht_nodeid]; | ||
| 246 | /* Pick a nearby node */ | ||
| 247 | if (!node_online(node)) | ||
| 248 | node = nearby_node(apicid); | ||
| 249 | } | ||
| 250 | numa_set_node(cpu, node); | ||
| 251 | |||
| 252 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | ||
| 253 | #endif | ||
| 254 | } | ||
| 255 | |||
| 256 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | ||
| 257 | { | ||
| 258 | #ifdef CONFIG_X86_HT | ||
| 259 | unsigned bits, ecx; | ||
| 260 | |||
| 261 | /* Multi core CPU? */ | ||
| 262 | if (c->extended_cpuid_level < 0x80000008) | ||
| 263 | return; | ||
| 264 | |||
| 265 | ecx = cpuid_ecx(0x80000008); | ||
| 266 | |||
| 267 | c->x86_max_cores = (ecx & 0xff) + 1; | ||
| 268 | |||
| 269 | /* CPU telling us the core id bits shift? */ | ||
| 270 | bits = (ecx >> 12) & 0xF; | ||
| 271 | |||
| 272 | /* Otherwise recompute */ | ||
| 273 | if (bits == 0) { | ||
| 274 | while ((1 << bits) < c->x86_max_cores) | ||
| 275 | bits++; | ||
| 276 | } | ||
| 277 | |||
| 278 | c->x86_coreid_bits = bits; | ||
| 279 | #endif | ||
| 280 | } | ||
| 281 | |||
| 282 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
| 283 | { | ||
| 284 | early_init_amd_mc(c); | ||
| 285 | |||
| 286 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
| 287 | if (c->x86_power & (1<<8)) | ||
| 288 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 289 | |||
| 290 | #ifdef CONFIG_X86_64 | ||
| 291 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
| 292 | #else | ||
| 293 | /* Set MTRR capability flag if appropriate */ | ||
| 294 | if (c->x86 == 5) | ||
| 295 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
| 296 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
| 297 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
| 298 | #endif | ||
| 299 | } | ||
| 300 | |||
| 301 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | ||
| 302 | { | ||
| 44 | #ifdef CONFIG_SMP | 303 | #ifdef CONFIG_SMP |
| 45 | unsigned long long value; | 304 | unsigned long long value; |
| 46 | 305 | ||
| @@ -51,7 +310,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
| 51 | * Errata 63 for SH-B3 steppings | 310 | * Errata 63 for SH-B3 steppings |
| 52 | * Errata 122 for all steppings (F+ have it disabled by default) | 311 | * Errata 122 for all steppings (F+ have it disabled by default) |
| 53 | */ | 312 | */ |
| 54 | if (c->x86 == 15) { | 313 | if (c->x86 == 0xf) { |
| 55 | rdmsrl(MSR_K7_HWCR, value); | 314 | rdmsrl(MSR_K7_HWCR, value); |
| 56 | value |= 1 << 6; | 315 | value |= 1 << 6; |
| 57 | wrmsrl(MSR_K7_HWCR, value); | 316 | wrmsrl(MSR_K7_HWCR, value); |
| @@ -61,213 +320,119 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
| 61 | early_init_amd(c); | 320 | early_init_amd(c); |
| 62 | 321 | ||
| 63 | /* | 322 | /* |
| 64 | * FIXME: We should handle the K5 here. Set up the write | ||
| 65 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, | ||
| 66 | * no bus pipeline) | ||
| 67 | */ | ||
| 68 | |||
| 69 | /* | ||
| 70 | * Bit 31 in normal CPUID used for nonstandard 3DNow ID; | 323 | * Bit 31 in normal CPUID used for nonstandard 3DNow ID; |
| 71 | * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway | 324 | * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway |
| 72 | */ | 325 | */ |
| 73 | clear_cpu_cap(c, 0*32+31); | 326 | clear_cpu_cap(c, 0*32+31); |
| 74 | 327 | ||
| 75 | r = get_model_name(c); | 328 | #ifdef CONFIG_X86_64 |
| 76 | 329 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | |
| 77 | switch (c->x86) { | 330 | if (c->x86 == 0xf) { |
| 78 | case 4: | 331 | u32 level; |
| 79 | /* | ||
| 80 | * General Systems BIOSen alias the cpu frequency registers | ||
| 81 | * of the Elan at 0x000df000. Unfortuantly, one of the Linux | ||
| 82 | * drivers subsequently pokes it, and changes the CPU speed. | ||
| 83 | * Workaround : Remove the unneeded alias. | ||
| 84 | */ | ||
| 85 | #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ | ||
| 86 | #define CBAR_ENB (0x80000000) | ||
| 87 | #define CBAR_KEY (0X000000CB) | ||
| 88 | if (c->x86_model == 9 || c->x86_model == 10) { | ||
| 89 | if (inl (CBAR) & CBAR_ENB) | ||
| 90 | outl (0 | CBAR_KEY, CBAR); | ||
| 91 | } | ||
| 92 | break; | ||
| 93 | case 5: | ||
| 94 | if (c->x86_model < 6) { | ||
| 95 | /* Based on AMD doc 20734R - June 2000 */ | ||
| 96 | if (c->x86_model == 0) { | ||
| 97 | clear_cpu_cap(c, X86_FEATURE_APIC); | ||
| 98 | set_cpu_cap(c, X86_FEATURE_PGE); | ||
| 99 | } | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | |||
| 103 | if (c->x86_model == 6 && c->x86_mask == 1) { | ||
| 104 | const int K6_BUG_LOOP = 1000000; | ||
| 105 | int n; | ||
| 106 | void (*f_vide)(void); | ||
| 107 | unsigned long d, d2; | ||
| 108 | |||
| 109 | printk(KERN_INFO "AMD K6 stepping B detected - "); | ||
| 110 | |||
| 111 | /* | ||
| 112 | * It looks like AMD fixed the 2.6.2 bug and improved indirect | ||
| 113 | * calls at the same time. | ||
| 114 | */ | ||
| 115 | |||
| 116 | n = K6_BUG_LOOP; | ||
| 117 | f_vide = vide; | ||
| 118 | rdtscl(d); | ||
| 119 | while (n--) | ||
| 120 | f_vide(); | ||
| 121 | rdtscl(d2); | ||
| 122 | d = d2-d; | ||
| 123 | |||
| 124 | if (d > 20*K6_BUG_LOOP) | ||
| 125 | printk("system stability may be impaired when more than 32 MB are used.\n"); | ||
| 126 | else | ||
| 127 | printk("probably OK (after B9730xxxx).\n"); | ||
| 128 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | ||
| 129 | } | ||
| 130 | |||
| 131 | /* K6 with old style WHCR */ | ||
| 132 | if (c->x86_model < 8 || | ||
| 133 | (c->x86_model == 8 && c->x86_mask < 8)) { | ||
| 134 | /* We can only write allocate on the low 508Mb */ | ||
| 135 | if (mbytes > 508) | ||
| 136 | mbytes = 508; | ||
| 137 | |||
| 138 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 139 | if ((l&0x0000FFFF) == 0) { | ||
| 140 | unsigned long flags; | ||
| 141 | l = (1<<0)|((mbytes/4)<<1); | ||
| 142 | local_irq_save(flags); | ||
| 143 | wbinvd(); | ||
| 144 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 145 | local_irq_restore(flags); | ||
| 146 | printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", | ||
| 147 | mbytes); | ||
| 148 | } | ||
| 149 | break; | ||
| 150 | } | ||
| 151 | |||
| 152 | if ((c->x86_model == 8 && c->x86_mask > 7) || | ||
| 153 | c->x86_model == 9 || c->x86_model == 13) { | ||
| 154 | /* The more serious chips .. */ | ||
| 155 | |||
| 156 | if (mbytes > 4092) | ||
| 157 | mbytes = 4092; | ||
| 158 | |||
| 159 | rdmsr(MSR_K6_WHCR, l, h); | ||
| 160 | if ((l&0xFFFF0000) == 0) { | ||
| 161 | unsigned long flags; | ||
| 162 | l = ((mbytes>>2)<<22)|(1<<16); | ||
| 163 | local_irq_save(flags); | ||
| 164 | wbinvd(); | ||
| 165 | wrmsr(MSR_K6_WHCR, l, h); | ||
| 166 | local_irq_restore(flags); | ||
| 167 | printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", | ||
| 168 | mbytes); | ||
| 169 | } | ||
| 170 | |||
| 171 | /* Set MTRR capability flag if appropriate */ | ||
| 172 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
| 173 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
| 174 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | |||
| 178 | if (c->x86_model == 10) { | ||
| 179 | /* AMD Geode LX is model 10 */ | ||
| 180 | /* placeholder for any needed mods */ | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | break; | ||
| 184 | case 6: /* An Athlon/Duron */ | ||
| 185 | 332 | ||
| 186 | /* | 333 | level = cpuid_eax(1); |
| 187 | * Bit 15 of Athlon specific MSR 15, needs to be 0 | 334 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) |
| 188 | * to enable SSE on Palomino/Morgan/Barton CPU's. | 335 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
| 189 | * If the BIOS didn't enable it already, enable it here. | ||
| 190 | */ | ||
| 191 | if (c->x86_model >= 6 && c->x86_model <= 10) { | ||
| 192 | if (!cpu_has(c, X86_FEATURE_XMM)) { | ||
| 193 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); | ||
| 194 | rdmsr(MSR_K7_HWCR, l, h); | ||
| 195 | l &= ~0x00008000; | ||
| 196 | wrmsr(MSR_K7_HWCR, l, h); | ||
| 197 | set_cpu_cap(c, X86_FEATURE_XMM); | ||
| 198 | } | ||
| 199 | } | ||
| 200 | |||
| 201 | /* | ||
| 202 | * It's been determined by AMD that Athlons since model 8 stepping 1 | ||
| 203 | * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx | ||
| 204 | * As per AMD technical note 27212 0.2 | ||
| 205 | */ | ||
| 206 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { | ||
| 207 | rdmsr(MSR_K7_CLK_CTL, l, h); | ||
| 208 | if ((l & 0xfff00000) != 0x20000000) { | ||
| 209 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | ||
| 210 | ((l & 0x000fffff)|0x20000000)); | ||
| 211 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | break; | ||
| 215 | } | 336 | } |
| 337 | if (c->x86 == 0x10 || c->x86 == 0x11) | ||
| 338 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 339 | #else | ||
| 340 | |||
| 341 | /* | ||
| 342 | * FIXME: We should handle the K5 here. Set up the write | ||
| 343 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, | ||
| 344 | * no bus pipeline) | ||
| 345 | */ | ||
| 216 | 346 | ||
| 217 | switch (c->x86) { | 347 | switch (c->x86) { |
| 218 | case 15: | 348 | case 4: |
| 219 | /* Use K8 tuning for Fam10h and Fam11h */ | 349 | init_amd_k5(c); |
| 220 | case 0x10: | ||
| 221 | case 0x11: | ||
| 222 | set_cpu_cap(c, X86_FEATURE_K8); | ||
| 223 | break; | 350 | break; |
| 224 | case 6: | 351 | case 5: |
| 225 | set_cpu_cap(c, X86_FEATURE_K7); | 352 | init_amd_k6(c); |
| 353 | break; | ||
| 354 | case 6: /* An Athlon/Duron */ | ||
| 355 | init_amd_k7(c); | ||
| 226 | break; | 356 | break; |
| 227 | } | 357 | } |
| 358 | |||
| 359 | /* K6s reports MCEs but don't actually have all the MSRs */ | ||
| 360 | if (c->x86 < 6) | ||
| 361 | clear_cpu_cap(c, X86_FEATURE_MCE); | ||
| 362 | #endif | ||
| 363 | |||
| 364 | /* Enable workaround for FXSAVE leak */ | ||
| 228 | if (c->x86 >= 6) | 365 | if (c->x86 >= 6) |
| 229 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | 366 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); |
| 230 | 367 | ||
| 231 | display_cacheinfo(c); | 368 | if (!c->x86_model_id[0]) { |
| 232 | 369 | switch (c->x86) { | |
| 233 | if (cpuid_eax(0x80000000) >= 0x80000008) | 370 | case 0xf: |
| 234 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 371 | /* Should distinguish Models here, but this is only |
| 372 | a fallback anyways. */ | ||
| 373 | strcpy(c->x86_model_id, "Hammer"); | ||
| 374 | break; | ||
| 375 | } | ||
| 376 | } | ||
| 235 | 377 | ||
| 236 | #ifdef CONFIG_X86_HT | 378 | display_cacheinfo(c); |
| 237 | /* | ||
| 238 | * On a AMD multi core setup the lower bits of the APIC id | ||
| 239 | * distinguish the cores. | ||
| 240 | */ | ||
| 241 | if (c->x86_max_cores > 1) { | ||
| 242 | int cpu = smp_processor_id(); | ||
| 243 | unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; | ||
| 244 | 379 | ||
| 245 | if (bits == 0) { | 380 | /* Multi core CPU? */ |
| 246 | while ((1 << bits) < c->x86_max_cores) | 381 | if (c->extended_cpuid_level >= 0x80000008) { |
| 247 | bits++; | 382 | amd_detect_cmp(c); |
| 248 | } | 383 | srat_detect_node(c); |
| 249 | c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1); | ||
| 250 | c->phys_proc_id >>= bits; | ||
| 251 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", | ||
| 252 | cpu, c->x86_max_cores, c->cpu_core_id); | ||
| 253 | } | 384 | } |
| 385 | |||
| 386 | #ifdef CONFIG_X86_32 | ||
| 387 | detect_ht(c); | ||
| 254 | #endif | 388 | #endif |
| 255 | 389 | ||
| 256 | if (cpuid_eax(0x80000000) >= 0x80000006) { | 390 | if (c->extended_cpuid_level >= 0x80000006) { |
| 257 | if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) | 391 | if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) |
| 258 | num_cache_leaves = 4; | 392 | num_cache_leaves = 4; |
| 259 | else | 393 | else |
| 260 | num_cache_leaves = 3; | 394 | num_cache_leaves = 3; |
| 261 | } | 395 | } |
| 262 | 396 | ||
| 263 | /* K6s reports MCEs but don't actually have all the MSRs */ | 397 | if (c->x86 >= 0xf && c->x86 <= 0x11) |
| 264 | if (c->x86 < 6) | 398 | set_cpu_cap(c, X86_FEATURE_K8); |
| 265 | clear_cpu_cap(c, X86_FEATURE_MCE); | ||
| 266 | 399 | ||
| 267 | if (cpu_has_xmm2) | 400 | if (cpu_has_xmm2) { |
| 401 | /* MFENCE stops RDTSC speculation */ | ||
| 268 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | 402 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); |
| 403 | } | ||
| 404 | |||
| 405 | #ifdef CONFIG_X86_64 | ||
| 406 | if (c->x86 == 0x10) { | ||
| 407 | /* do this for boot cpu */ | ||
| 408 | if (c == &boot_cpu_data) | ||
| 409 | check_enable_amd_mmconf_dmi(); | ||
| 410 | |||
| 411 | fam10h_check_enable_mmcfg(); | ||
| 412 | } | ||
| 413 | |||
| 414 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | ||
| 415 | unsigned long long tseg; | ||
| 416 | |||
| 417 | /* | ||
| 418 | * Split up direct mapping around the TSEG SMM area. | ||
| 419 | * Don't do it for gbpages because there seems very little | ||
| 420 | * benefit in doing so. | ||
| 421 | */ | ||
| 422 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | ||
| 423 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | ||
| 424 | if ((tseg>>PMD_SHIFT) < | ||
| 425 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
| 426 | ((tseg>>PMD_SHIFT) < | ||
| 427 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
| 428 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
| 429 | set_memory_4k((unsigned long)__va(tseg), 1); | ||
| 430 | } | ||
| 431 | } | ||
| 432 | #endif | ||
| 269 | } | 433 | } |
| 270 | 434 | ||
| 435 | #ifdef CONFIG_X86_32 | ||
| 271 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) | 436 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) |
| 272 | { | 437 | { |
| 273 | /* AMD errata T13 (order #21922) */ | 438 | /* AMD errata T13 (order #21922) */ |
| @@ -280,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int | |||
| 280 | } | 445 | } |
| 281 | return size; | 446 | return size; |
| 282 | } | 447 | } |
| 448 | #endif | ||
| 283 | 449 | ||
| 284 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { | 450 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { |
| 285 | .c_vendor = "AMD", | 451 | .c_vendor = "AMD", |
| 286 | .c_ident = { "AuthenticAMD" }, | 452 | .c_ident = { "AuthenticAMD" }, |
| 453 | #ifdef CONFIG_X86_32 | ||
| 287 | .c_models = { | 454 | .c_models = { |
| 288 | { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = | 455 | { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = |
| 289 | { | 456 | { |
| @@ -296,9 +463,11 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { | |||
| 296 | } | 463 | } |
| 297 | }, | 464 | }, |
| 298 | }, | 465 | }, |
| 466 | .c_size_cache = amd_size_cache, | ||
| 467 | #endif | ||
| 299 | .c_early_init = early_init_amd, | 468 | .c_early_init = early_init_amd, |
| 300 | .c_init = init_amd, | 469 | .c_init = init_amd, |
| 301 | .c_size_cache = amd_size_cache, | 470 | .c_x86_vendor = X86_VENDOR_AMD, |
| 302 | }; | 471 | }; |
| 303 | 472 | ||
| 304 | cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); | 473 | cpu_dev_register(amd_cpu_dev); |
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c deleted file mode 100644 index 7c36fb8a28d4..000000000000 --- a/arch/x86/kernel/cpu/amd_64.c +++ /dev/null | |||
| @@ -1,222 +0,0 @@ | |||
| 1 | #include <linux/init.h> | ||
| 2 | #include <linux/mm.h> | ||
| 3 | |||
| 4 | #include <asm/numa_64.h> | ||
| 5 | #include <asm/mmconfig.h> | ||
| 6 | #include <asm/cacheflush.h> | ||
| 7 | |||
| 8 | #include <mach_apic.h> | ||
| 9 | |||
| 10 | #include "cpu.h" | ||
| 11 | |||
| 12 | int force_mwait __cpuinitdata; | ||
| 13 | |||
| 14 | #ifdef CONFIG_NUMA | ||
| 15 | static int __cpuinit nearby_node(int apicid) | ||
| 16 | { | ||
| 17 | int i, node; | ||
| 18 | |||
| 19 | for (i = apicid - 1; i >= 0; i--) { | ||
| 20 | node = apicid_to_node[i]; | ||
| 21 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 22 | return node; | ||
| 23 | } | ||
| 24 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | ||
| 25 | node = apicid_to_node[i]; | ||
| 26 | if (node != NUMA_NO_NODE && node_online(node)) | ||
| 27 | return node; | ||
| 28 | } | ||
| 29 | return first_node(node_online_map); /* Shouldn't happen */ | ||
| 30 | } | ||
| 31 | #endif | ||
| 32 | |||
| 33 | /* | ||
| 34 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | ||
| 35 | * Assumes number of cores is a power of two. | ||
| 36 | */ | ||
| 37 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | ||
| 38 | { | ||
| 39 | #ifdef CONFIG_SMP | ||
| 40 | unsigned bits; | ||
| 41 | #ifdef CONFIG_NUMA | ||
| 42 | int cpu = smp_processor_id(); | ||
| 43 | int node = 0; | ||
| 44 | unsigned apicid = hard_smp_processor_id(); | ||
| 45 | #endif | ||
| 46 | bits = c->x86_coreid_bits; | ||
| 47 | |||
| 48 | /* Low order bits define the core id (index of core in socket) */ | ||
| 49 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | ||
| 50 | /* Convert the initial APIC ID into the socket ID */ | ||
| 51 | c->phys_proc_id = c->initial_apicid >> bits; | ||
| 52 | |||
| 53 | #ifdef CONFIG_NUMA | ||
| 54 | node = c->phys_proc_id; | ||
| 55 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
| 56 | node = apicid_to_node[apicid]; | ||
| 57 | if (!node_online(node)) { | ||
| 58 | /* Two possibilities here: | ||
| 59 | - The CPU is missing memory and no node was created. | ||
| 60 | In that case try picking one from a nearby CPU | ||
| 61 | - The APIC IDs differ from the HyperTransport node IDs | ||
| 62 | which the K8 northbridge parsing fills in. | ||
| 63 | Assume they are all increased by a constant offset, | ||
| 64 | but in the same order as the HT nodeids. | ||
| 65 | If that doesn't result in a usable node fall back to the | ||
| 66 | path for the previous case. */ | ||
| 67 | |||
| 68 | int ht_nodeid = c->initial_apicid; | ||
| 69 | |||
| 70 | if (ht_nodeid >= 0 && | ||
| 71 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
| 72 | node = apicid_to_node[ht_nodeid]; | ||
| 73 | /* Pick a nearby node */ | ||
| 74 | if (!node_online(node)) | ||
| 75 | node = nearby_node(apicid); | ||
| 76 | } | ||
| 77 | numa_set_node(cpu, node); | ||
| 78 | |||
| 79 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
| 80 | #endif | ||
| 81 | #endif | ||
| 82 | } | ||
| 83 | |||
| 84 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | ||
| 85 | { | ||
| 86 | #ifdef CONFIG_SMP | ||
| 87 | unsigned bits, ecx; | ||
| 88 | |||
| 89 | /* Multi core CPU? */ | ||
| 90 | if (c->extended_cpuid_level < 0x80000008) | ||
| 91 | return; | ||
| 92 | |||
| 93 | ecx = cpuid_ecx(0x80000008); | ||
| 94 | |||
| 95 | c->x86_max_cores = (ecx & 0xff) + 1; | ||
| 96 | |||
| 97 | /* CPU telling us the core id bits shift? */ | ||
| 98 | bits = (ecx >> 12) & 0xF; | ||
| 99 | |||
| 100 | /* Otherwise recompute */ | ||
| 101 | if (bits == 0) { | ||
| 102 | while ((1 << bits) < c->x86_max_cores) | ||
| 103 | bits++; | ||
| 104 | } | ||
| 105 | |||
| 106 | c->x86_coreid_bits = bits; | ||
| 107 | |||
| 108 | #endif | ||
| 109 | } | ||
| 110 | |||
| 111 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
| 112 | { | ||
| 113 | early_init_amd_mc(c); | ||
| 114 | |||
| 115 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
| 116 | if (c->x86_power & (1<<8)) | ||
| 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 118 | } | ||
| 119 | |||
| 120 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | ||
| 121 | { | ||
| 122 | unsigned level; | ||
| 123 | |||
| 124 | #ifdef CONFIG_SMP | ||
| 125 | unsigned long value; | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | ||
| 129 | * bit 6 of msr C001_0015 | ||
| 130 | * | ||
| 131 | * Errata 63 for SH-B3 steppings | ||
| 132 | * Errata 122 for all steppings (F+ have it disabled by default) | ||
| 133 | */ | ||
| 134 | if (c->x86 == 0xf) { | ||
| 135 | rdmsrl(MSR_K8_HWCR, value); | ||
| 136 | value |= 1 << 6; | ||
| 137 | wrmsrl(MSR_K8_HWCR, value); | ||
| 138 | } | ||
| 139 | #endif | ||
| 140 | |||
| 141 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | ||
| 142 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | ||
| 143 | clear_cpu_cap(c, 0*32+31); | ||
| 144 | |||
| 145 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | ||
| 146 | if (c->x86 == 0xf) { | ||
| 147 | level = cpuid_eax(1); | ||
| 148 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) | ||
| 149 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 150 | } | ||
| 151 | if (c->x86 == 0x10 || c->x86 == 0x11) | ||
| 152 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 153 | |||
| 154 | /* Enable workaround for FXSAVE leak */ | ||
| 155 | if (c->x86 >= 6) | ||
| 156 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | ||
| 157 | |||
| 158 | level = get_model_name(c); | ||
| 159 | if (!level) { | ||
| 160 | switch (c->x86) { | ||
| 161 | case 0xf: | ||
| 162 | /* Should distinguish Models here, but this is only | ||
| 163 | a fallback anyways. */ | ||
| 164 | strcpy(c->x86_model_id, "Hammer"); | ||
| 165 | break; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | display_cacheinfo(c); | ||
| 169 | |||
| 170 | /* Multi core CPU? */ | ||
| 171 | if (c->extended_cpuid_level >= 0x80000008) | ||
| 172 | amd_detect_cmp(c); | ||
| 173 | |||
| 174 | if (c->extended_cpuid_level >= 0x80000006 && | ||
| 175 | (cpuid_edx(0x80000006) & 0xf000)) | ||
| 176 | num_cache_leaves = 4; | ||
| 177 | else | ||
| 178 | num_cache_leaves = 3; | ||
| 179 | |||
| 180 | if (c->x86 >= 0xf && c->x86 <= 0x11) | ||
| 181 | set_cpu_cap(c, X86_FEATURE_K8); | ||
| 182 | |||
| 183 | /* MFENCE stops RDTSC speculation */ | ||
| 184 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | ||
| 185 | |||
| 186 | if (c->x86 == 0x10) { | ||
| 187 | /* do this for boot cpu */ | ||
| 188 | if (c == &boot_cpu_data) | ||
| 189 | check_enable_amd_mmconf_dmi(); | ||
| 190 | |||
| 191 | fam10h_check_enable_mmcfg(); | ||
| 192 | } | ||
| 193 | |||
| 194 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | ||
| 195 | unsigned long long tseg; | ||
| 196 | |||
| 197 | /* | ||
| 198 | * Split up direct mapping around the TSEG SMM area. | ||
| 199 | * Don't do it for gbpages because there seems very little | ||
| 200 | * benefit in doing so. | ||
| 201 | */ | ||
| 202 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | ||
| 203 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | ||
| 204 | if ((tseg>>PMD_SHIFT) < | ||
| 205 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
| 206 | ((tseg>>PMD_SHIFT) < | ||
| 207 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
| 208 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
| 209 | set_memory_4k((unsigned long)__va(tseg), 1); | ||
| 210 | } | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { | ||
| 215 | .c_vendor = "AMD", | ||
| 216 | .c_ident = { "AuthenticAMD" }, | ||
| 217 | .c_early_init = early_init_amd, | ||
| 218 | .c_init = init_amd, | ||
| 219 | }; | ||
| 220 | |||
| 221 | cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); | ||
| 222 | |||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1b1c56bb338f..c8e315f1aa83 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
| @@ -50,6 +50,8 @@ static double __initdata y = 3145727.0; | |||
| 50 | */ | 50 | */ |
| 51 | static void __init check_fpu(void) | 51 | static void __init check_fpu(void) |
| 52 | { | 52 | { |
| 53 | s32 fdiv_bug; | ||
| 54 | |||
| 53 | if (!boot_cpu_data.hard_math) { | 55 | if (!boot_cpu_data.hard_math) { |
| 54 | #ifndef CONFIG_MATH_EMULATION | 56 | #ifndef CONFIG_MATH_EMULATION |
| 55 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); | 57 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); |
| @@ -74,8 +76,10 @@ static void __init check_fpu(void) | |||
| 74 | "fistpl %0\n\t" | 76 | "fistpl %0\n\t" |
| 75 | "fwait\n\t" | 77 | "fwait\n\t" |
| 76 | "fninit" | 78 | "fninit" |
| 77 | : "=m" (*&boot_cpu_data.fdiv_bug) | 79 | : "=m" (*&fdiv_bug) |
| 78 | : "m" (*&x), "m" (*&y)); | 80 | : "m" (*&x), "m" (*&y)); |
| 81 | |||
| 82 | boot_cpu_data.fdiv_bug = fdiv_bug; | ||
| 79 | if (boot_cpu_data.fdiv_bug) | 83 | if (boot_cpu_data.fdiv_bug) |
| 80 | printk("Hmm, FPU with FDIV bug.\n"); | 84 | printk("Hmm, FPU with FDIV bug.\n"); |
| 81 | } | 85 | } |
| @@ -131,13 +135,7 @@ static void __init check_popad(void) | |||
| 131 | * (for due to lack of "invlpg" and working WP on a i386) | 135 | * (for due to lack of "invlpg" and working WP on a i386) |
| 132 | * - In order to run on anything without a TSC, we need to be | 136 | * - In order to run on anything without a TSC, we need to be |
| 133 | * compiled for a i486. | 137 | * compiled for a i486. |
| 134 | * - In order to support the local APIC on a buggy Pentium machine, | 138 | */ |
| 135 | * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, | ||
| 136 | * which happens implicitly if compiled for a Pentium or lower | ||
| 137 | * (unless an advanced selection of CPU features is used) as an | ||
| 138 | * otherwise config implies a properly working local APIC without | ||
| 139 | * the need to do extra reads from the APIC. | ||
| 140 | */ | ||
| 141 | 139 | ||
| 142 | static void __init check_config(void) | 140 | static void __init check_config(void) |
| 143 | { | 141 | { |
| @@ -151,21 +149,6 @@ static void __init check_config(void) | |||
| 151 | if (boot_cpu_data.x86 == 3) | 149 | if (boot_cpu_data.x86 == 3) |
| 152 | panic("Kernel requires i486+ for 'invlpg' and other features"); | 150 | panic("Kernel requires i486+ for 'invlpg' and other features"); |
| 153 | #endif | 151 | #endif |
| 154 | |||
| 155 | /* | ||
| 156 | * If we were told we had a good local APIC, check for buggy Pentia, | ||
| 157 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
| 158 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
| 159 | * Specification Update"). | ||
| 160 | */ | ||
| 161 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) | ||
| 162 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL | ||
| 163 | && cpu_has_apic | ||
| 164 | && boot_cpu_data.x86 == 5 | ||
| 165 | && boot_cpu_data.x86_model == 2 | ||
| 166 | && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) | ||
| 167 | panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); | ||
| 168 | #endif | ||
| 169 | } | 152 | } |
| 170 | 153 | ||
| 171 | 154 | ||
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a55..89bfdd9cacc6 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
| @@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) | |||
| 289 | if (c->x86_model >= 6 && c->x86_model < 9) | 289 | if (c->x86_model >= 6 && c->x86_model < 9) |
| 290 | set_cpu_cap(c, X86_FEATURE_3DNOW); | 290 | set_cpu_cap(c, X86_FEATURE_3DNOW); |
| 291 | 291 | ||
| 292 | get_model_name(c); | ||
| 293 | display_cacheinfo(c); | 292 | display_cacheinfo(c); |
| 294 | } | 293 | } |
| 295 | 294 | ||
| @@ -314,6 +313,16 @@ enum { | |||
| 314 | EAMD3D = 1<<20, | 313 | EAMD3D = 1<<20, |
| 315 | }; | 314 | }; |
| 316 | 315 | ||
| 316 | static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | ||
| 317 | { | ||
| 318 | switch (c->x86) { | ||
| 319 | case 5: | ||
| 320 | /* Emulate MTRRs using Centaur's MCR. */ | ||
| 321 | set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); | ||
| 322 | break; | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 317 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | 326 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) |
| 318 | { | 327 | { |
| 319 | 328 | ||
| @@ -462,8 +471,10 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) | |||
| 462 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { | 471 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { |
| 463 | .c_vendor = "Centaur", | 472 | .c_vendor = "Centaur", |
| 464 | .c_ident = { "CentaurHauls" }, | 473 | .c_ident = { "CentaurHauls" }, |
| 474 | .c_early_init = early_init_centaur, | ||
| 465 | .c_init = init_centaur, | 475 | .c_init = init_centaur, |
| 466 | .c_size_cache = centaur_size_cache, | 476 | .c_size_cache = centaur_size_cache, |
| 477 | .c_x86_vendor = X86_VENDOR_CENTAUR, | ||
| 467 | }; | 478 | }; |
| 468 | 479 | ||
| 469 | cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); | 480 | cpu_dev_register(centaur_cpu_dev); |
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 1d181c40e2e1..a1625f5a1e78 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c | |||
| @@ -16,9 +16,10 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | |||
| 16 | 16 | ||
| 17 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | 17 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) |
| 18 | { | 18 | { |
| 19 | early_init_centaur(c); | ||
| 20 | |||
| 19 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { | 21 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { |
| 20 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 22 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
| 21 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 22 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 23 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
| 23 | } | 24 | } |
| 24 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 25 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
| @@ -29,7 +30,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { | |||
| 29 | .c_ident = { "CentaurHauls" }, | 30 | .c_ident = { "CentaurHauls" }, |
| 30 | .c_early_init = early_init_centaur, | 31 | .c_early_init = early_init_centaur, |
| 31 | .c_init = init_centaur, | 32 | .c_init = init_centaur, |
| 33 | .c_x86_vendor = X86_VENDOR_CENTAUR, | ||
| 32 | }; | 34 | }; |
| 33 | 35 | ||
| 34 | cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); | 36 | cpu_dev_register(centaur_cpu_dev); |
| 35 | 37 | ||
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c new file mode 100644 index 000000000000..2056ccf572cc --- /dev/null +++ b/arch/x86/kernel/cpu/cmpxchg.c | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | /* | ||
| 2 | * cmpxchg*() fallbacks for CPU not supporting these instructions | ||
| 3 | */ | ||
| 4 | |||
| 5 | #include <linux/kernel.h> | ||
| 6 | #include <linux/smp.h> | ||
| 7 | #include <linux/module.h> | ||
| 8 | |||
| 9 | #ifndef CONFIG_X86_CMPXCHG | ||
| 10 | unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) | ||
| 11 | { | ||
| 12 | u8 prev; | ||
| 13 | unsigned long flags; | ||
| 14 | |||
| 15 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
| 16 | local_irq_save(flags); | ||
| 17 | prev = *(u8 *)ptr; | ||
| 18 | if (prev == old) | ||
| 19 | *(u8 *)ptr = new; | ||
| 20 | local_irq_restore(flags); | ||
| 21 | return prev; | ||
| 22 | } | ||
| 23 | EXPORT_SYMBOL(cmpxchg_386_u8); | ||
| 24 | |||
| 25 | unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) | ||
| 26 | { | ||
| 27 | u16 prev; | ||
| 28 | unsigned long flags; | ||
| 29 | |||
| 30 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
| 31 | local_irq_save(flags); | ||
| 32 | prev = *(u16 *)ptr; | ||
| 33 | if (prev == old) | ||
| 34 | *(u16 *)ptr = new; | ||
| 35 | local_irq_restore(flags); | ||
| 36 | return prev; | ||
| 37 | } | ||
| 38 | EXPORT_SYMBOL(cmpxchg_386_u16); | ||
| 39 | |||
| 40 | unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) | ||
| 41 | { | ||
| 42 | u32 prev; | ||
| 43 | unsigned long flags; | ||
| 44 | |||
| 45 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
| 46 | local_irq_save(flags); | ||
| 47 | prev = *(u32 *)ptr; | ||
| 48 | if (prev == old) | ||
| 49 | *(u32 *)ptr = new; | ||
| 50 | local_irq_restore(flags); | ||
| 51 | return prev; | ||
| 52 | } | ||
| 53 | EXPORT_SYMBOL(cmpxchg_386_u32); | ||
| 54 | #endif | ||
| 55 | |||
| 56 | #ifndef CONFIG_X86_CMPXCHG64 | ||
| 57 | unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) | ||
| 58 | { | ||
| 59 | u64 prev; | ||
| 60 | unsigned long flags; | ||
| 61 | |||
| 62 | /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ | ||
| 63 | local_irq_save(flags); | ||
| 64 | prev = *(u64 *)ptr; | ||
| 65 | if (prev == old) | ||
| 66 | *(u64 *)ptr = new; | ||
| 67 | local_irq_restore(flags); | ||
| 68 | return prev; | ||
| 69 | } | ||
| 70 | EXPORT_SYMBOL(cmpxchg_486_u64); | ||
| 71 | #endif | ||
| 72 | |||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..25581dcb280e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1,27 +1,62 @@ | |||
| 1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
| 2 | #include <linux/kernel.h> | ||
| 3 | #include <linux/sched.h> | ||
| 2 | #include <linux/string.h> | 4 | #include <linux/string.h> |
| 5 | #include <linux/bootmem.h> | ||
| 6 | #include <linux/bitops.h> | ||
| 7 | #include <linux/module.h> | ||
| 8 | #include <linux/kgdb.h> | ||
| 9 | #include <linux/topology.h> | ||
| 3 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
| 4 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
| 5 | #include <linux/module.h> | ||
| 6 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
| 7 | #include <linux/bootmem.h> | ||
| 8 | #include <asm/processor.h> | ||
| 9 | #include <asm/i387.h> | 13 | #include <asm/i387.h> |
| 10 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
| 11 | #include <asm/io.h> | 15 | #include <asm/io.h> |
| 16 | #include <asm/linkage.h> | ||
| 12 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
| 13 | #include <asm/mtrr.h> | 18 | #include <asm/mtrr.h> |
| 14 | #include <asm/mce.h> | 19 | #include <asm/mce.h> |
| 15 | #include <asm/pat.h> | 20 | #include <asm/pat.h> |
| 21 | #include <asm/asm.h> | ||
| 22 | #include <asm/numa.h> | ||
| 16 | #ifdef CONFIG_X86_LOCAL_APIC | 23 | #ifdef CONFIG_X86_LOCAL_APIC |
| 17 | #include <asm/mpspec.h> | 24 | #include <asm/mpspec.h> |
| 18 | #include <asm/apic.h> | 25 | #include <asm/apic.h> |
| 19 | #include <mach_apic.h> | 26 | #include <mach_apic.h> |
| 27 | #include <asm/genapic.h> | ||
| 20 | #endif | 28 | #endif |
| 21 | 29 | ||
| 30 | #include <asm/pda.h> | ||
| 31 | #include <asm/pgtable.h> | ||
| 32 | #include <asm/processor.h> | ||
| 33 | #include <asm/desc.h> | ||
| 34 | #include <asm/atomic.h> | ||
| 35 | #include <asm/proto.h> | ||
| 36 | #include <asm/sections.h> | ||
| 37 | #include <asm/setup.h> | ||
| 38 | |||
| 22 | #include "cpu.h" | 39 | #include "cpu.h" |
| 23 | 40 | ||
| 41 | static struct cpu_dev *this_cpu __cpuinitdata; | ||
| 42 | |||
| 43 | #ifdef CONFIG_X86_64 | ||
| 44 | /* We need valid kernel segments for data and code in long mode too | ||
| 45 | * IRET will check the segment types kkeil 2000/10/28 | ||
| 46 | * Also sysret mandates a special GDT layout | ||
| 47 | */ | ||
| 48 | /* The TLS descriptors are currently at a different place compared to i386. | ||
| 49 | Hopefully nobody expects them at a fixed place (Wine?) */ | ||
| 24 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | 50 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { |
| 51 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | ||
| 52 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | ||
| 53 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | ||
| 54 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | ||
| 55 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | ||
| 56 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | ||
| 57 | } }; | ||
| 58 | #else | ||
| 59 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | ||
| 25 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | 60 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, |
| 26 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 61 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
| 27 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, | 62 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, |
| @@ -55,17 +90,157 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | |||
| 55 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 90 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, |
| 56 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, | 91 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, |
| 57 | } }; | 92 | } }; |
| 93 | #endif | ||
| 58 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 94 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
| 59 | 95 | ||
| 60 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | 96 | #ifdef CONFIG_X86_32 |
| 61 | |||
| 62 | static int cachesize_override __cpuinitdata = -1; | 97 | static int cachesize_override __cpuinitdata = -1; |
| 63 | static int disable_x86_serial_nr __cpuinitdata = 1; | 98 | static int disable_x86_serial_nr __cpuinitdata = 1; |
| 64 | 99 | ||
| 65 | struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | 100 | static int __init cachesize_setup(char *str) |
| 101 | { | ||
| 102 | get_option(&str, &cachesize_override); | ||
| 103 | return 1; | ||
| 104 | } | ||
| 105 | __setup("cachesize=", cachesize_setup); | ||
| 106 | |||
| 107 | static int __init x86_fxsr_setup(char *s) | ||
| 108 | { | ||
| 109 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | ||
| 110 | setup_clear_cpu_cap(X86_FEATURE_XMM); | ||
| 111 | return 1; | ||
| 112 | } | ||
| 113 | __setup("nofxsr", x86_fxsr_setup); | ||
| 114 | |||
| 115 | static int __init x86_sep_setup(char *s) | ||
| 116 | { | ||
| 117 | setup_clear_cpu_cap(X86_FEATURE_SEP); | ||
| 118 | return 1; | ||
| 119 | } | ||
| 120 | __setup("nosep", x86_sep_setup); | ||
| 121 | |||
| 122 | /* Standard macro to see if a specific flag is changeable */ | ||
| 123 | static inline int flag_is_changeable_p(u32 flag) | ||
| 124 | { | ||
| 125 | u32 f1, f2; | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Cyrix and IDT cpus allow disabling of CPUID | ||
| 129 | * so the code below may return different results | ||
| 130 | * when it is executed before and after enabling | ||
| 131 | * the CPUID. Add "volatile" to not allow gcc to | ||
| 132 | * optimize the subsequent calls to this function. | ||
| 133 | */ | ||
| 134 | asm volatile ("pushfl\n\t" | ||
| 135 | "pushfl\n\t" | ||
| 136 | "popl %0\n\t" | ||
| 137 | "movl %0,%1\n\t" | ||
| 138 | "xorl %2,%0\n\t" | ||
| 139 | "pushl %0\n\t" | ||
| 140 | "popfl\n\t" | ||
| 141 | "pushfl\n\t" | ||
| 142 | "popl %0\n\t" | ||
| 143 | "popfl\n\t" | ||
| 144 | : "=&r" (f1), "=&r" (f2) | ||
| 145 | : "ir" (flag)); | ||
| 146 | |||
| 147 | return ((f1^f2) & flag) != 0; | ||
| 148 | } | ||
| 149 | |||
| 150 | /* Probe for the CPUID instruction */ | ||
| 151 | static int __cpuinit have_cpuid_p(void) | ||
| 152 | { | ||
| 153 | return flag_is_changeable_p(X86_EFLAGS_ID); | ||
| 154 | } | ||
| 155 | |||
| 156 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | ||
| 157 | { | ||
| 158 | if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { | ||
| 159 | /* Disable processor serial number */ | ||
| 160 | unsigned long lo, hi; | ||
| 161 | rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | ||
| 162 | lo |= 0x200000; | ||
| 163 | wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | ||
| 164 | printk(KERN_NOTICE "CPU serial number disabled.\n"); | ||
| 165 | clear_cpu_cap(c, X86_FEATURE_PN); | ||
| 166 | |||
| 167 | /* Disabling the serial number may affect the cpuid level */ | ||
| 168 | c->cpuid_level = cpuid_eax(0); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | static int __init x86_serial_nr_setup(char *s) | ||
| 173 | { | ||
| 174 | disable_x86_serial_nr = 0; | ||
| 175 | return 1; | ||
| 176 | } | ||
| 177 | __setup("serialnumber", x86_serial_nr_setup); | ||
| 178 | #else | ||
| 179 | static inline int flag_is_changeable_p(u32 flag) | ||
| 180 | { | ||
| 181 | return 1; | ||
| 182 | } | ||
| 183 | /* Probe for the CPUID instruction */ | ||
| 184 | static inline int have_cpuid_p(void) | ||
| 185 | { | ||
| 186 | return 1; | ||
| 187 | } | ||
| 188 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | ||
| 189 | { | ||
| 190 | } | ||
| 191 | #endif | ||
| 192 | |||
| 193 | /* | ||
| 194 | * Naming convention should be: <Name> [(<Codename>)] | ||
| 195 | * This table only is used unless init_<vendor>() below doesn't set it; | ||
| 196 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used | ||
| 197 | * | ||
| 198 | */ | ||
| 199 | |||
| 200 | /* Look up CPU names by table lookup. */ | ||
| 201 | static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) | ||
| 202 | { | ||
| 203 | struct cpu_model_info *info; | ||
| 204 | |||
| 205 | if (c->x86_model >= 16) | ||
| 206 | return NULL; /* Range check */ | ||
| 207 | |||
| 208 | if (!this_cpu) | ||
| 209 | return NULL; | ||
| 210 | |||
| 211 | info = this_cpu->c_models; | ||
| 212 | |||
| 213 | while (info && info->family) { | ||
| 214 | if (info->family == c->x86) | ||
| 215 | return info->model_names[c->x86_model]; | ||
| 216 | info++; | ||
| 217 | } | ||
| 218 | return NULL; /* Not found */ | ||
| 219 | } | ||
| 220 | |||
| 221 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | ||
| 222 | |||
| 223 | /* Current gdt points %fs at the "master" per-cpu area: after this, | ||
| 224 | * it's on the real one. */ | ||
| 225 | void switch_to_new_gdt(void) | ||
| 226 | { | ||
| 227 | struct desc_ptr gdt_descr; | ||
| 228 | |||
| 229 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | ||
| 230 | gdt_descr.size = GDT_SIZE - 1; | ||
| 231 | load_gdt(&gdt_descr); | ||
| 232 | #ifdef CONFIG_X86_32 | ||
| 233 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); | ||
| 234 | #endif | ||
| 235 | } | ||
| 236 | |||
| 237 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | ||
| 66 | 238 | ||
| 67 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | 239 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
| 68 | { | 240 | { |
| 241 | #ifdef CONFIG_X86_64 | ||
| 242 | display_cacheinfo(c); | ||
| 243 | #else | ||
| 69 | /* Not much we can do here... */ | 244 | /* Not much we can do here... */ |
| 70 | /* Check if at least it has cpuid */ | 245 | /* Check if at least it has cpuid */ |
| 71 | if (c->cpuid_level == -1) { | 246 | if (c->cpuid_level == -1) { |
| @@ -75,28 +250,22 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) | |||
| 75 | else if (c->x86 == 3) | 250 | else if (c->x86 == 3) |
| 76 | strcpy(c->x86_model_id, "386"); | 251 | strcpy(c->x86_model_id, "386"); |
| 77 | } | 252 | } |
| 253 | #endif | ||
| 78 | } | 254 | } |
| 79 | 255 | ||
| 80 | static struct cpu_dev __cpuinitdata default_cpu = { | 256 | static struct cpu_dev __cpuinitdata default_cpu = { |
| 81 | .c_init = default_init, | 257 | .c_init = default_init, |
| 82 | .c_vendor = "Unknown", | 258 | .c_vendor = "Unknown", |
| 259 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | ||
| 83 | }; | 260 | }; |
| 84 | static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; | ||
| 85 | |||
| 86 | static int __init cachesize_setup(char *str) | ||
| 87 | { | ||
| 88 | get_option(&str, &cachesize_override); | ||
| 89 | return 1; | ||
| 90 | } | ||
| 91 | __setup("cachesize=", cachesize_setup); | ||
| 92 | 261 | ||
| 93 | int __cpuinit get_model_name(struct cpuinfo_x86 *c) | 262 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) |
| 94 | { | 263 | { |
| 95 | unsigned int *v; | 264 | unsigned int *v; |
| 96 | char *p, *q; | 265 | char *p, *q; |
| 97 | 266 | ||
| 98 | if (cpuid_eax(0x80000000) < 0x80000004) | 267 | if (c->extended_cpuid_level < 0x80000004) |
| 99 | return 0; | 268 | return; |
| 100 | 269 | ||
| 101 | v = (unsigned int *) c->x86_model_id; | 270 | v = (unsigned int *) c->x86_model_id; |
| 102 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | 271 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); |
| @@ -115,30 +284,34 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) | |||
| 115 | while (q <= &c->x86_model_id[48]) | 284 | while (q <= &c->x86_model_id[48]) |
| 116 | *q++ = '\0'; /* Zero-pad the rest */ | 285 | *q++ = '\0'; /* Zero-pad the rest */ |
| 117 | } | 286 | } |
| 118 | |||
| 119 | return 1; | ||
| 120 | } | 287 | } |
| 121 | 288 | ||
| 122 | |||
| 123 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | 289 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) |
| 124 | { | 290 | { |
| 125 | unsigned int n, dummy, ecx, edx, l2size; | 291 | unsigned int n, dummy, ebx, ecx, edx, l2size; |
| 126 | 292 | ||
| 127 | n = cpuid_eax(0x80000000); | 293 | n = c->extended_cpuid_level; |
| 128 | 294 | ||
| 129 | if (n >= 0x80000005) { | 295 | if (n >= 0x80000005) { |
| 130 | cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); | 296 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); |
| 131 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | 297 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", |
| 132 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | 298 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); |
| 133 | c->x86_cache_size = (ecx>>24)+(edx>>24); | 299 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
| 300 | #ifdef CONFIG_X86_64 | ||
| 301 | /* On K8 L1 TLB is inclusive, so don't count it */ | ||
| 302 | c->x86_tlbsize = 0; | ||
| 303 | #endif | ||
| 134 | } | 304 | } |
| 135 | 305 | ||
| 136 | if (n < 0x80000006) /* Some chips just has a large L1. */ | 306 | if (n < 0x80000006) /* Some chips just has a large L1. */ |
| 137 | return; | 307 | return; |
| 138 | 308 | ||
| 139 | ecx = cpuid_ecx(0x80000006); | 309 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); |
| 140 | l2size = ecx >> 16; | 310 | l2size = ecx >> 16; |
| 141 | 311 | ||
| 312 | #ifdef CONFIG_X86_64 | ||
| 313 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | ||
| 314 | #else | ||
| 142 | /* do processor-specific cache resizing */ | 315 | /* do processor-specific cache resizing */ |
| 143 | if (this_cpu->c_size_cache) | 316 | if (this_cpu->c_size_cache) |
| 144 | l2size = this_cpu->c_size_cache(c, l2size); | 317 | l2size = this_cpu->c_size_cache(c, l2size); |
| @@ -149,116 +322,106 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
| 149 | 322 | ||
| 150 | if (l2size == 0) | 323 | if (l2size == 0) |
| 151 | return; /* Again, no L2 cache is possible */ | 324 | return; /* Again, no L2 cache is possible */ |
| 325 | #endif | ||
| 152 | 326 | ||
| 153 | c->x86_cache_size = l2size; | 327 | c->x86_cache_size = l2size; |
| 154 | 328 | ||
| 155 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | 329 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", |
| 156 | l2size, ecx & 0xFF); | 330 | l2size, ecx & 0xFF); |
| 157 | } | 331 | } |
| 158 | 332 | ||
| 159 | /* | 333 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
| 160 | * Naming convention should be: <Name> [(<Codename>)] | ||
| 161 | * This table only is used unless init_<vendor>() below doesn't set it; | ||
| 162 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used | ||
| 163 | * | ||
| 164 | */ | ||
| 165 | |||
| 166 | /* Look up CPU names by table lookup. */ | ||
| 167 | static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) | ||
| 168 | { | 334 | { |
| 169 | struct cpu_model_info *info; | 335 | #ifdef CONFIG_X86_HT |
| 336 | u32 eax, ebx, ecx, edx; | ||
| 337 | int index_msb, core_bits; | ||
| 170 | 338 | ||
| 171 | if (c->x86_model >= 16) | 339 | if (!cpu_has(c, X86_FEATURE_HT)) |
| 172 | return NULL; /* Range check */ | 340 | return; |
| 173 | 341 | ||
| 174 | if (!this_cpu) | 342 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
| 175 | return NULL; | 343 | goto out; |
| 176 | 344 | ||
| 177 | info = this_cpu->c_models; | 345 | if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) |
| 346 | return; | ||
| 178 | 347 | ||
| 179 | while (info && info->family) { | 348 | cpuid(1, &eax, &ebx, &ecx, &edx); |
| 180 | if (info->family == c->x86) | 349 | |
| 181 | return info->model_names[c->x86_model]; | 350 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
| 182 | info++; | 351 | |
| 352 | if (smp_num_siblings == 1) { | ||
| 353 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | ||
| 354 | } else if (smp_num_siblings > 1) { | ||
| 355 | |||
| 356 | if (smp_num_siblings > NR_CPUS) { | ||
| 357 | printk(KERN_WARNING "CPU: Unsupported number of siblings %d", | ||
| 358 | smp_num_siblings); | ||
| 359 | smp_num_siblings = 1; | ||
| 360 | return; | ||
| 361 | } | ||
| 362 | |||
| 363 | index_msb = get_count_order(smp_num_siblings); | ||
| 364 | #ifdef CONFIG_X86_64 | ||
| 365 | c->phys_proc_id = phys_pkg_id(index_msb); | ||
| 366 | #else | ||
| 367 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); | ||
| 368 | #endif | ||
| 369 | |||
| 370 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | ||
| 371 | |||
| 372 | index_msb = get_count_order(smp_num_siblings); | ||
| 373 | |||
| 374 | core_bits = get_count_order(c->x86_max_cores); | ||
| 375 | |||
| 376 | #ifdef CONFIG_X86_64 | ||
| 377 | c->cpu_core_id = phys_pkg_id(index_msb) & | ||
| 378 | ((1 << core_bits) - 1); | ||
| 379 | #else | ||
| 380 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & | ||
| 381 | ((1 << core_bits) - 1); | ||
| 382 | #endif | ||
| 183 | } | 383 | } |
| 184 | return NULL; /* Not found */ | ||
| 185 | } | ||
| 186 | 384 | ||
| 385 | out: | ||
| 386 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | ||
| 387 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | ||
| 388 | c->phys_proc_id); | ||
| 389 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | ||
| 390 | c->cpu_core_id); | ||
| 391 | } | ||
| 392 | #endif | ||
| 393 | } | ||
| 187 | 394 | ||
| 188 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) | 395 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) |
| 189 | { | 396 | { |
| 190 | char *v = c->x86_vendor_id; | 397 | char *v = c->x86_vendor_id; |
| 191 | int i; | 398 | int i; |
| 192 | static int printed; | 399 | static int printed; |
| 193 | 400 | ||
| 194 | for (i = 0; i < X86_VENDOR_NUM; i++) { | 401 | for (i = 0; i < X86_VENDOR_NUM; i++) { |
| 195 | if (cpu_devs[i]) { | 402 | if (!cpu_devs[i]) |
| 196 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || | 403 | break; |
| 197 | (cpu_devs[i]->c_ident[1] && | 404 | |
| 198 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { | 405 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || |
| 199 | c->x86_vendor = i; | 406 | (cpu_devs[i]->c_ident[1] && |
| 200 | if (!early) | 407 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { |
| 201 | this_cpu = cpu_devs[i]; | 408 | this_cpu = cpu_devs[i]; |
| 202 | return; | 409 | c->x86_vendor = this_cpu->c_x86_vendor; |
| 203 | } | 410 | return; |
| 204 | } | 411 | } |
| 205 | } | 412 | } |
| 413 | |||
| 206 | if (!printed) { | 414 | if (!printed) { |
| 207 | printed++; | 415 | printed++; |
| 208 | printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); | 416 | printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); |
| 209 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); | 417 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); |
| 210 | } | 418 | } |
| 419 | |||
| 211 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 420 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
| 212 | this_cpu = &default_cpu; | 421 | this_cpu = &default_cpu; |
| 213 | } | 422 | } |
| 214 | 423 | ||
| 215 | 424 | void __cpuinit cpu_detect(struct cpuinfo_x86 *c) | |
| 216 | static int __init x86_fxsr_setup(char *s) | ||
| 217 | { | ||
| 218 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | ||
| 219 | setup_clear_cpu_cap(X86_FEATURE_XMM); | ||
| 220 | return 1; | ||
| 221 | } | ||
| 222 | __setup("nofxsr", x86_fxsr_setup); | ||
| 223 | |||
| 224 | |||
| 225 | static int __init x86_sep_setup(char *s) | ||
| 226 | { | ||
| 227 | setup_clear_cpu_cap(X86_FEATURE_SEP); | ||
| 228 | return 1; | ||
| 229 | } | ||
| 230 | __setup("nosep", x86_sep_setup); | ||
| 231 | |||
| 232 | |||
| 233 | /* Standard macro to see if a specific flag is changeable */ | ||
| 234 | static inline int flag_is_changeable_p(u32 flag) | ||
| 235 | { | ||
| 236 | u32 f1, f2; | ||
| 237 | |||
| 238 | asm("pushfl\n\t" | ||
| 239 | "pushfl\n\t" | ||
| 240 | "popl %0\n\t" | ||
| 241 | "movl %0,%1\n\t" | ||
| 242 | "xorl %2,%0\n\t" | ||
| 243 | "pushl %0\n\t" | ||
| 244 | "popfl\n\t" | ||
| 245 | "pushfl\n\t" | ||
| 246 | "popl %0\n\t" | ||
| 247 | "popfl\n\t" | ||
| 248 | : "=&r" (f1), "=&r" (f2) | ||
| 249 | : "ir" (flag)); | ||
| 250 | |||
| 251 | return ((f1^f2) & flag) != 0; | ||
| 252 | } | ||
| 253 | |||
| 254 | |||
| 255 | /* Probe for the CPUID instruction */ | ||
| 256 | static int __cpuinit have_cpuid_p(void) | ||
| 257 | { | ||
| 258 | return flag_is_changeable_p(X86_EFLAGS_ID); | ||
| 259 | } | ||
| 260 | |||
| 261 | void __init cpu_detect(struct cpuinfo_x86 *c) | ||
| 262 | { | 425 | { |
| 263 | /* Get vendor name */ | 426 | /* Get vendor name */ |
| 264 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | 427 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, |
| @@ -267,50 +430,87 @@ void __init cpu_detect(struct cpuinfo_x86 *c) | |||
| 267 | (unsigned int *)&c->x86_vendor_id[4]); | 430 | (unsigned int *)&c->x86_vendor_id[4]); |
| 268 | 431 | ||
| 269 | c->x86 = 4; | 432 | c->x86 = 4; |
| 433 | /* Intel-defined flags: level 0x00000001 */ | ||
| 270 | if (c->cpuid_level >= 0x00000001) { | 434 | if (c->cpuid_level >= 0x00000001) { |
| 271 | u32 junk, tfms, cap0, misc; | 435 | u32 junk, tfms, cap0, misc; |
| 272 | cpuid(0x00000001, &tfms, &misc, &junk, &cap0); | 436 | cpuid(0x00000001, &tfms, &misc, &junk, &cap0); |
| 273 | c->x86 = (tfms >> 8) & 15; | 437 | c->x86 = (tfms >> 8) & 0xf; |
| 274 | c->x86_model = (tfms >> 4) & 15; | 438 | c->x86_model = (tfms >> 4) & 0xf; |
| 439 | c->x86_mask = tfms & 0xf; | ||
| 275 | if (c->x86 == 0xf) | 440 | if (c->x86 == 0xf) |
| 276 | c->x86 += (tfms >> 20) & 0xff; | 441 | c->x86 += (tfms >> 20) & 0xff; |
| 277 | if (c->x86 >= 0x6) | 442 | if (c->x86 >= 0x6) |
| 278 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 443 | c->x86_model += ((tfms >> 16) & 0xf) << 4; |
| 279 | c->x86_mask = tfms & 15; | ||
| 280 | if (cap0 & (1<<19)) { | 444 | if (cap0 & (1<<19)) { |
| 281 | c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; | ||
| 282 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | 445 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
| 446 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 283 | } | 447 | } |
| 284 | } | 448 | } |
| 285 | } | 449 | } |
| 286 | static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) | 450 | |
| 451 | static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | ||
| 287 | { | 452 | { |
| 288 | u32 tfms, xlvl; | 453 | u32 tfms, xlvl; |
| 289 | unsigned int ebx; | 454 | u32 ebx; |
| 290 | 455 | ||
| 291 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 456 | /* Intel-defined flags: level 0x00000001 */ |
| 292 | if (have_cpuid_p()) { | 457 | if (c->cpuid_level >= 0x00000001) { |
| 293 | /* Intel-defined flags: level 0x00000001 */ | 458 | u32 capability, excap; |
| 294 | if (c->cpuid_level >= 0x00000001) { | 459 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); |
| 295 | u32 capability, excap; | 460 | c->x86_capability[0] = capability; |
| 296 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); | 461 | c->x86_capability[4] = excap; |
| 297 | c->x86_capability[0] = capability; | 462 | } |
| 298 | c->x86_capability[4] = excap; | ||
| 299 | } | ||
| 300 | 463 | ||
| 301 | /* AMD-defined flags: level 0x80000001 */ | 464 | /* AMD-defined flags: level 0x80000001 */ |
| 302 | xlvl = cpuid_eax(0x80000000); | 465 | xlvl = cpuid_eax(0x80000000); |
| 303 | if ((xlvl & 0xffff0000) == 0x80000000) { | 466 | c->extended_cpuid_level = xlvl; |
| 304 | if (xlvl >= 0x80000001) { | 467 | if ((xlvl & 0xffff0000) == 0x80000000) { |
| 305 | c->x86_capability[1] = cpuid_edx(0x80000001); | 468 | if (xlvl >= 0x80000001) { |
| 306 | c->x86_capability[6] = cpuid_ecx(0x80000001); | 469 | c->x86_capability[1] = cpuid_edx(0x80000001); |
| 307 | } | 470 | c->x86_capability[6] = cpuid_ecx(0x80000001); |
| 308 | } | 471 | } |
| 472 | } | ||
| 473 | |||
| 474 | #ifdef CONFIG_X86_64 | ||
| 475 | if (c->extended_cpuid_level >= 0x80000008) { | ||
| 476 | u32 eax = cpuid_eax(0x80000008); | ||
| 309 | 477 | ||
| 478 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
| 479 | c->x86_phys_bits = eax & 0xff; | ||
| 310 | } | 480 | } |
| 481 | #endif | ||
| 482 | |||
| 483 | if (c->extended_cpuid_level >= 0x80000007) | ||
| 484 | c->x86_power = cpuid_edx(0x80000007); | ||
| 311 | 485 | ||
| 312 | } | 486 | } |
| 313 | 487 | ||
| 488 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | ||
| 489 | { | ||
| 490 | #ifdef CONFIG_X86_32 | ||
| 491 | int i; | ||
| 492 | |||
| 493 | /* | ||
| 494 | * First of all, decide if this is a 486 or higher | ||
| 495 | * It's a 486 if we can modify the AC flag | ||
| 496 | */ | ||
| 497 | if (flag_is_changeable_p(X86_EFLAGS_AC)) | ||
| 498 | c->x86 = 4; | ||
| 499 | else | ||
| 500 | c->x86 = 3; | ||
| 501 | |||
| 502 | for (i = 0; i < X86_VENDOR_NUM; i++) | ||
| 503 | if (cpu_devs[i] && cpu_devs[i]->c_identify) { | ||
| 504 | c->x86_vendor_id[0] = 0; | ||
| 505 | cpu_devs[i]->c_identify(c); | ||
| 506 | if (c->x86_vendor_id[0]) { | ||
| 507 | get_cpu_vendor(c); | ||
| 508 | break; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | #endif | ||
| 512 | } | ||
| 513 | |||
| 314 | /* | 514 | /* |
| 315 | * Do minimum CPU detection early. | 515 | * Do minimum CPU detection early. |
| 316 | * Fields really needed: vendor, cpuid_level, family, model, mask, | 516 | * Fields really needed: vendor, cpuid_level, family, model, mask, |
| @@ -320,109 +520,113 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) | |||
| 320 | * WARNING: this function is only called on the BP. Don't add code here | 520 | * WARNING: this function is only called on the BP. Don't add code here |
| 321 | * that is supposed to run on all CPUs. | 521 | * that is supposed to run on all CPUs. |
| 322 | */ | 522 | */ |
| 323 | static void __init early_cpu_detect(void) | 523 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
| 324 | { | 524 | { |
| 325 | struct cpuinfo_x86 *c = &boot_cpu_data; | 525 | #ifdef CONFIG_X86_64 |
| 326 | 526 | c->x86_clflush_size = 64; | |
| 327 | c->x86_cache_alignment = 32; | 527 | #else |
| 328 | c->x86_clflush_size = 32; | 528 | c->x86_clflush_size = 32; |
| 529 | #endif | ||
| 530 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 531 | |||
| 532 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | ||
| 533 | c->extended_cpuid_level = 0; | ||
| 534 | |||
| 535 | if (!have_cpuid_p()) | ||
| 536 | identify_cpu_without_cpuid(c); | ||
| 329 | 537 | ||
| 538 | /* cyrix could have cpuid enabled via c_identify()*/ | ||
| 330 | if (!have_cpuid_p()) | 539 | if (!have_cpuid_p()) |
| 331 | return; | 540 | return; |
| 332 | 541 | ||
| 333 | cpu_detect(c); | 542 | cpu_detect(c); |
| 334 | 543 | ||
| 335 | get_cpu_vendor(c, 1); | 544 | get_cpu_vendor(c); |
| 545 | |||
| 546 | get_cpu_cap(c); | ||
| 336 | 547 | ||
| 337 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | 548 | if (this_cpu->c_early_init) |
| 338 | cpu_devs[c->x86_vendor]->c_early_init) | 549 | this_cpu->c_early_init(c); |
| 339 | cpu_devs[c->x86_vendor]->c_early_init(c); | ||
| 340 | 550 | ||
| 341 | early_get_cap(c); | 551 | validate_pat_support(c); |
| 342 | } | 552 | } |
| 343 | 553 | ||
| 344 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | 554 | void __init early_cpu_init(void) |
| 345 | { | 555 | { |
| 346 | u32 tfms, xlvl; | 556 | struct cpu_dev **cdev; |
| 347 | unsigned int ebx; | 557 | int count = 0; |
| 348 | 558 | ||
| 349 | if (have_cpuid_p()) { | 559 | printk("KERNEL supported cpus:\n"); |
| 350 | /* Get vendor name */ | 560 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { |
| 351 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | 561 | struct cpu_dev *cpudev = *cdev; |
| 352 | (unsigned int *)&c->x86_vendor_id[0], | 562 | unsigned int j; |
| 353 | (unsigned int *)&c->x86_vendor_id[8], | 563 | |
| 354 | (unsigned int *)&c->x86_vendor_id[4]); | 564 | if (count >= X86_VENDOR_NUM) |
| 355 | 565 | break; | |
| 356 | get_cpu_vendor(c, 0); | 566 | cpu_devs[count] = cpudev; |
| 357 | /* Initialize the standard set of capabilities */ | 567 | count++; |
| 358 | /* Note that the vendor-specific code below might override */ | 568 | |
| 359 | /* Intel-defined flags: level 0x00000001 */ | 569 | for (j = 0; j < 2; j++) { |
| 360 | if (c->cpuid_level >= 0x00000001) { | 570 | if (!cpudev->c_ident[j]) |
| 361 | u32 capability, excap; | 571 | continue; |
| 362 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); | 572 | printk(" %s %s\n", cpudev->c_vendor, |
| 363 | c->x86_capability[0] = capability; | 573 | cpudev->c_ident[j]); |
| 364 | c->x86_capability[4] = excap; | ||
| 365 | c->x86 = (tfms >> 8) & 15; | ||
| 366 | c->x86_model = (tfms >> 4) & 15; | ||
| 367 | if (c->x86 == 0xf) | ||
| 368 | c->x86 += (tfms >> 20) & 0xff; | ||
| 369 | if (c->x86 >= 0x6) | ||
| 370 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | ||
| 371 | c->x86_mask = tfms & 15; | ||
| 372 | c->initial_apicid = (ebx >> 24) & 0xFF; | ||
| 373 | #ifdef CONFIG_X86_HT | ||
| 374 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | ||
| 375 | c->phys_proc_id = c->initial_apicid; | ||
| 376 | #else | ||
| 377 | c->apicid = c->initial_apicid; | ||
| 378 | #endif | ||
| 379 | if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) | ||
| 380 | c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; | ||
| 381 | } else { | ||
| 382 | /* Have CPUID level 0 only - unheard of */ | ||
| 383 | c->x86 = 4; | ||
| 384 | } | ||
| 385 | |||
| 386 | /* AMD-defined flags: level 0x80000001 */ | ||
| 387 | xlvl = cpuid_eax(0x80000000); | ||
| 388 | if ((xlvl & 0xffff0000) == 0x80000000) { | ||
| 389 | if (xlvl >= 0x80000001) { | ||
| 390 | c->x86_capability[1] = cpuid_edx(0x80000001); | ||
| 391 | c->x86_capability[6] = cpuid_ecx(0x80000001); | ||
| 392 | } | ||
| 393 | if (xlvl >= 0x80000004) | ||
| 394 | get_model_name(c); /* Default name */ | ||
| 395 | } | 574 | } |
| 396 | |||
| 397 | init_scattered_cpuid_features(c); | ||
| 398 | } | 575 | } |
| 399 | 576 | ||
| 577 | early_identify_cpu(&boot_cpu_data); | ||
| 400 | } | 578 | } |
| 401 | 579 | ||
| 402 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | 580 | /* |
| 581 | * The NOPL instruction is supposed to exist on all CPUs with | ||
| 582 | * family >= 6; unfortunately, that's not true in practice because | ||
| 583 | * of early VIA chips and (more importantly) broken virtualizers that | ||
| 584 | * are not easy to detect. In the latter case it doesn't even *fail* | ||
| 585 | * reliably, so probing for it doesn't even work. Disable it completely | ||
| 586 | * unless we can find a reliable way to detect all the broken cases. | ||
| 587 | */ | ||
| 588 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | ||
| 403 | { | 589 | { |
| 404 | if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { | 590 | clear_cpu_cap(c, X86_FEATURE_NOPL); |
| 405 | /* Disable processor serial number */ | ||
| 406 | unsigned long lo, hi; | ||
| 407 | rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | ||
| 408 | lo |= 0x200000; | ||
| 409 | wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | ||
| 410 | printk(KERN_NOTICE "CPU serial number disabled.\n"); | ||
| 411 | clear_cpu_cap(c, X86_FEATURE_PN); | ||
| 412 | |||
| 413 | /* Disabling the serial number may affect the cpuid level */ | ||
| 414 | c->cpuid_level = cpuid_eax(0); | ||
| 415 | } | ||
| 416 | } | 591 | } |
| 417 | 592 | ||
| 418 | static int __init x86_serial_nr_setup(char *s) | 593 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
| 419 | { | 594 | { |
| 420 | disable_x86_serial_nr = 0; | 595 | c->extended_cpuid_level = 0; |
| 421 | return 1; | 596 | |
| 422 | } | 597 | if (!have_cpuid_p()) |
| 423 | __setup("serialnumber", x86_serial_nr_setup); | 598 | identify_cpu_without_cpuid(c); |
| 599 | |||
| 600 | /* cyrix could have cpuid enabled via c_identify()*/ | ||
| 601 | if (!have_cpuid_p()) | ||
| 602 | return; | ||
| 603 | |||
| 604 | cpu_detect(c); | ||
| 605 | |||
| 606 | get_cpu_vendor(c); | ||
| 424 | 607 | ||
| 608 | get_cpu_cap(c); | ||
| 425 | 609 | ||
| 610 | if (c->cpuid_level >= 0x00000001) { | ||
| 611 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; | ||
| 612 | #ifdef CONFIG_X86_32 | ||
| 613 | # ifdef CONFIG_X86_HT | ||
| 614 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | ||
| 615 | # else | ||
| 616 | c->apicid = c->initial_apicid; | ||
| 617 | # endif | ||
| 618 | #endif | ||
| 619 | |||
| 620 | #ifdef CONFIG_X86_HT | ||
| 621 | c->phys_proc_id = c->initial_apicid; | ||
| 622 | #endif | ||
| 623 | } | ||
| 624 | |||
| 625 | get_model_name(c); /* Default name */ | ||
| 626 | |||
| 627 | init_scattered_cpuid_features(c); | ||
| 628 | detect_nopl(c); | ||
| 629 | } | ||
| 426 | 630 | ||
| 427 | /* | 631 | /* |
| 428 | * This does the hard work of actually picking apart the CPU stuff... | 632 | * This does the hard work of actually picking apart the CPU stuff... |
| @@ -434,30 +638,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 434 | c->loops_per_jiffy = loops_per_jiffy; | 638 | c->loops_per_jiffy = loops_per_jiffy; |
| 435 | c->x86_cache_size = -1; | 639 | c->x86_cache_size = -1; |
| 436 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 640 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
| 437 | c->cpuid_level = -1; /* CPUID not detected */ | ||
| 438 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | 641 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ |
| 439 | c->x86_vendor_id[0] = '\0'; /* Unset */ | 642 | c->x86_vendor_id[0] = '\0'; /* Unset */ |
| 440 | c->x86_model_id[0] = '\0'; /* Unset */ | 643 | c->x86_model_id[0] = '\0'; /* Unset */ |
| 441 | c->x86_max_cores = 1; | 644 | c->x86_max_cores = 1; |
| 645 | c->x86_coreid_bits = 0; | ||
| 646 | #ifdef CONFIG_X86_64 | ||
| 647 | c->x86_clflush_size = 64; | ||
| 648 | #else | ||
| 649 | c->cpuid_level = -1; /* CPUID not detected */ | ||
| 442 | c->x86_clflush_size = 32; | 650 | c->x86_clflush_size = 32; |
| 651 | #endif | ||
| 652 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 443 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 653 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
| 444 | 654 | ||
| 445 | if (!have_cpuid_p()) { | ||
| 446 | /* | ||
| 447 | * First of all, decide if this is a 486 or higher | ||
| 448 | * It's a 486 if we can modify the AC flag | ||
| 449 | */ | ||
| 450 | if (flag_is_changeable_p(X86_EFLAGS_AC)) | ||
| 451 | c->x86 = 4; | ||
| 452 | else | ||
| 453 | c->x86 = 3; | ||
| 454 | } | ||
| 455 | |||
| 456 | generic_identify(c); | 655 | generic_identify(c); |
| 457 | 656 | ||
| 458 | if (this_cpu->c_identify) | 657 | if (this_cpu->c_identify) |
| 459 | this_cpu->c_identify(c); | 658 | this_cpu->c_identify(c); |
| 460 | 659 | ||
| 660 | #ifdef CONFIG_X86_64 | ||
| 661 | c->apicid = phys_pkg_id(0); | ||
| 662 | #endif | ||
| 663 | |||
| 461 | /* | 664 | /* |
| 462 | * Vendor-specific initialization. In this section we | 665 | * Vendor-specific initialization. In this section we |
| 463 | * canonicalize the feature flags, meaning if there are | 666 | * canonicalize the feature flags, meaning if there are |
| @@ -491,6 +694,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 491 | c->x86, c->x86_model); | 694 | c->x86, c->x86_model); |
| 492 | } | 695 | } |
| 493 | 696 | ||
| 697 | #ifdef CONFIG_X86_64 | ||
| 698 | detect_ht(c); | ||
| 699 | #endif | ||
| 700 | |||
| 494 | /* | 701 | /* |
| 495 | * On SMP, boot_cpu_data holds the common feature set between | 702 | * On SMP, boot_cpu_data holds the common feature set between |
| 496 | * all CPUs; so make sure that we indicate which features are | 703 | * all CPUs; so make sure that we indicate which features are |
| @@ -499,7 +706,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 499 | */ | 706 | */ |
| 500 | if (c != &boot_cpu_data) { | 707 | if (c != &boot_cpu_data) { |
| 501 | /* AND the already accumulated flags with these */ | 708 | /* AND the already accumulated flags with these */ |
| 502 | for (i = 0 ; i < NCAPINTS ; i++) | 709 | for (i = 0; i < NCAPINTS; i++) |
| 503 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 710 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
| 504 | } | 711 | } |
| 505 | 712 | ||
| @@ -507,72 +714,91 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 507 | for (i = 0; i < NCAPINTS; i++) | 714 | for (i = 0; i < NCAPINTS; i++) |
| 508 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; | 715 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; |
| 509 | 716 | ||
| 717 | #ifdef CONFIG_X86_MCE | ||
| 510 | /* Init Machine Check Exception if available. */ | 718 | /* Init Machine Check Exception if available. */ |
| 511 | mcheck_init(c); | 719 | mcheck_init(c); |
| 720 | #endif | ||
| 512 | 721 | ||
| 513 | select_idle_routine(c); | 722 | select_idle_routine(c); |
| 723 | |||
| 724 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 725 | numa_add_cpu(smp_processor_id()); | ||
| 726 | #endif | ||
| 514 | } | 727 | } |
| 515 | 728 | ||
| 729 | #ifdef CONFIG_X86_64 | ||
| 730 | static void vgetcpu_set_mode(void) | ||
| 731 | { | ||
| 732 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
| 733 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
| 734 | else | ||
| 735 | vgetcpu_mode = VGETCPU_LSL; | ||
| 736 | } | ||
| 737 | #endif | ||
| 738 | |||
| 516 | void __init identify_boot_cpu(void) | 739 | void __init identify_boot_cpu(void) |
| 517 | { | 740 | { |
| 518 | identify_cpu(&boot_cpu_data); | 741 | identify_cpu(&boot_cpu_data); |
| 742 | #ifdef CONFIG_X86_32 | ||
| 519 | sysenter_setup(); | 743 | sysenter_setup(); |
| 520 | enable_sep_cpu(); | 744 | enable_sep_cpu(); |
| 745 | #else | ||
| 746 | vgetcpu_set_mode(); | ||
| 747 | #endif | ||
| 521 | } | 748 | } |
| 522 | 749 | ||
| 523 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 750 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
| 524 | { | 751 | { |
| 525 | BUG_ON(c == &boot_cpu_data); | 752 | BUG_ON(c == &boot_cpu_data); |
| 526 | identify_cpu(c); | 753 | identify_cpu(c); |
| 754 | #ifdef CONFIG_X86_32 | ||
| 527 | enable_sep_cpu(); | 755 | enable_sep_cpu(); |
| 756 | #endif | ||
| 528 | mtrr_ap_init(); | 757 | mtrr_ap_init(); |
| 529 | } | 758 | } |
| 530 | 759 | ||
| 531 | #ifdef CONFIG_X86_HT | 760 | struct msr_range { |
| 532 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 761 | unsigned min; |
| 533 | { | 762 | unsigned max; |
| 534 | u32 eax, ebx, ecx, edx; | 763 | }; |
| 535 | int index_msb, core_bits; | ||
| 536 | |||
| 537 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
| 538 | |||
| 539 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | ||
| 540 | return; | ||
| 541 | |||
| 542 | smp_num_siblings = (ebx & 0xff0000) >> 16; | ||
| 543 | 764 | ||
| 544 | if (smp_num_siblings == 1) { | 765 | static struct msr_range msr_range_array[] __cpuinitdata = { |
| 545 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 766 | { 0x00000000, 0x00000418}, |
| 546 | } else if (smp_num_siblings > 1) { | 767 | { 0xc0000000, 0xc000040b}, |
| 768 | { 0xc0010000, 0xc0010142}, | ||
| 769 | { 0xc0011000, 0xc001103b}, | ||
| 770 | }; | ||
| 547 | 771 | ||
| 548 | if (smp_num_siblings > NR_CPUS) { | 772 | static void __cpuinit print_cpu_msr(void) |
| 549 | printk(KERN_WARNING "CPU: Unsupported number of the " | 773 | { |
| 550 | "siblings %d", smp_num_siblings); | 774 | unsigned index; |
| 551 | smp_num_siblings = 1; | 775 | u64 val; |
| 552 | return; | 776 | int i; |
| 777 | unsigned index_min, index_max; | ||
| 778 | |||
| 779 | for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { | ||
| 780 | index_min = msr_range_array[i].min; | ||
| 781 | index_max = msr_range_array[i].max; | ||
| 782 | for (index = index_min; index < index_max; index++) { | ||
| 783 | if (rdmsrl_amd_safe(index, &val)) | ||
| 784 | continue; | ||
| 785 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); | ||
| 553 | } | 786 | } |
| 787 | } | ||
| 788 | } | ||
| 554 | 789 | ||
| 555 | index_msb = get_count_order(smp_num_siblings); | 790 | static int show_msr __cpuinitdata; |
| 556 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); | 791 | static __init int setup_show_msr(char *arg) |
| 557 | 792 | { | |
| 558 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 793 | int num; |
| 559 | c->phys_proc_id); | ||
| 560 | |||
| 561 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | ||
| 562 | |||
| 563 | index_msb = get_count_order(smp_num_siblings) ; | ||
| 564 | |||
| 565 | core_bits = get_count_order(c->x86_max_cores); | ||
| 566 | 794 | ||
| 567 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & | 795 | get_option(&arg, &num); |
| 568 | ((1 << core_bits) - 1); | ||
| 569 | 796 | ||
| 570 | if (c->x86_max_cores > 1) | 797 | if (num > 0) |
| 571 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 798 | show_msr = num; |
| 572 | c->cpu_core_id); | 799 | return 1; |
| 573 | } | ||
| 574 | } | 800 | } |
| 575 | #endif | 801 | __setup("show_msr=", setup_show_msr); |
| 576 | 802 | ||
| 577 | static __init int setup_noclflush(char *arg) | 803 | static __init int setup_noclflush(char *arg) |
| 578 | { | 804 | { |
| @@ -590,18 +816,26 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |||
| 590 | else if (c->cpuid_level >= 0) | 816 | else if (c->cpuid_level >= 0) |
| 591 | vendor = c->x86_vendor_id; | 817 | vendor = c->x86_vendor_id; |
| 592 | 818 | ||
| 593 | if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) | 819 | if (vendor && !strstr(c->x86_model_id, vendor)) |
| 594 | printk("%s ", vendor); | 820 | printk(KERN_CONT "%s ", vendor); |
| 595 | 821 | ||
| 596 | if (!c->x86_model_id[0]) | 822 | if (c->x86_model_id[0]) |
| 597 | printk("%d86", c->x86); | 823 | printk(KERN_CONT "%s", c->x86_model_id); |
| 598 | else | 824 | else |
| 599 | printk("%s", c->x86_model_id); | 825 | printk(KERN_CONT "%d86", c->x86); |
| 600 | 826 | ||
| 601 | if (c->x86_mask || c->cpuid_level >= 0) | 827 | if (c->x86_mask || c->cpuid_level >= 0) |
| 602 | printk(" stepping %02x\n", c->x86_mask); | 828 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); |
| 603 | else | 829 | else |
| 604 | printk("\n"); | 830 | printk(KERN_CONT "\n"); |
| 831 | |||
| 832 | #ifdef CONFIG_SMP | ||
| 833 | if (c->cpu_index < show_msr) | ||
| 834 | print_cpu_msr(); | ||
| 835 | #else | ||
| 836 | if (show_msr) | ||
| 837 | print_cpu_msr(); | ||
| 838 | #endif | ||
| 605 | } | 839 | } |
| 606 | 840 | ||
| 607 | static __init int setup_disablecpuid(char *arg) | 841 | static __init int setup_disablecpuid(char *arg) |
| @@ -617,19 +851,89 @@ __setup("clearcpuid=", setup_disablecpuid); | |||
| 617 | 851 | ||
| 618 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 852 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
| 619 | 853 | ||
| 620 | void __init early_cpu_init(void) | 854 | #ifdef CONFIG_X86_64 |
| 855 | struct x8664_pda **_cpu_pda __read_mostly; | ||
| 856 | EXPORT_SYMBOL(_cpu_pda); | ||
| 857 | |||
| 858 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | ||
| 859 | |||
| 860 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | ||
| 861 | |||
| 862 | void __cpuinit pda_init(int cpu) | ||
| 621 | { | 863 | { |
| 622 | struct cpu_vendor_dev *cvdev; | 864 | struct x8664_pda *pda = cpu_pda(cpu); |
| 865 | |||
| 866 | /* Setup up data that may be needed in __get_free_pages early */ | ||
| 867 | loadsegment(fs, 0); | ||
| 868 | loadsegment(gs, 0); | ||
| 869 | /* Memory clobbers used to order PDA accessed */ | ||
| 870 | mb(); | ||
| 871 | wrmsrl(MSR_GS_BASE, pda); | ||
| 872 | mb(); | ||
| 873 | |||
| 874 | pda->cpunumber = cpu; | ||
| 875 | pda->irqcount = -1; | ||
| 876 | pda->kernelstack = (unsigned long)stack_thread_info() - | ||
| 877 | PDA_STACKOFFSET + THREAD_SIZE; | ||
| 878 | pda->active_mm = &init_mm; | ||
| 879 | pda->mmu_state = 0; | ||
| 880 | |||
| 881 | if (cpu == 0) { | ||
| 882 | /* others are initialized in smpboot.c */ | ||
| 883 | pda->pcurrent = &init_task; | ||
| 884 | pda->irqstackptr = boot_cpu_stack; | ||
| 885 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 886 | } else { | ||
| 887 | if (!pda->irqstackptr) { | ||
| 888 | pda->irqstackptr = (char *) | ||
| 889 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | ||
| 890 | if (!pda->irqstackptr) | ||
| 891 | panic("cannot allocate irqstack for cpu %d", | ||
| 892 | cpu); | ||
| 893 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
| 894 | } | ||
| 895 | |||
| 896 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | ||
| 897 | pda->nodenumber = cpu_to_node(cpu); | ||
| 898 | } | ||
| 899 | } | ||
| 900 | |||
| 901 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | ||
| 902 | DEBUG_STKSZ] __page_aligned_bss; | ||
| 623 | 903 | ||
| 624 | for (cvdev = __x86cpuvendor_start ; | 904 | extern asmlinkage void ignore_sysret(void); |
| 625 | cvdev < __x86cpuvendor_end ; | ||
| 626 | cvdev++) | ||
| 627 | cpu_devs[cvdev->vendor] = cvdev->cpu_dev; | ||
| 628 | 905 | ||
| 629 | early_cpu_detect(); | 906 | /* May not be marked __init: used by software suspend */ |
| 630 | validate_pat_support(&boot_cpu_data); | 907 | void syscall_init(void) |
| 908 | { | ||
| 909 | /* | ||
| 910 | * LSTAR and STAR live in a bit strange symbiosis. | ||
| 911 | * They both write to the same internal register. STAR allows to | ||
| 912 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | ||
| 913 | */ | ||
| 914 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | ||
| 915 | wrmsrl(MSR_LSTAR, system_call); | ||
| 916 | wrmsrl(MSR_CSTAR, ignore_sysret); | ||
| 917 | |||
| 918 | #ifdef CONFIG_IA32_EMULATION | ||
| 919 | syscall32_cpu_init(); | ||
| 920 | #endif | ||
| 921 | |||
| 922 | /* Flags to clear on syscall */ | ||
| 923 | wrmsrl(MSR_SYSCALL_MASK, | ||
| 924 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | ||
| 631 | } | 925 | } |
| 632 | 926 | ||
| 927 | unsigned long kernel_eflags; | ||
| 928 | |||
| 929 | /* | ||
| 930 | * Copies of the original ist values from the tss are only accessed during | ||
| 931 | * debugging, no special alignment required. | ||
| 932 | */ | ||
| 933 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | ||
| 934 | |||
| 935 | #else | ||
| 936 | |||
| 633 | /* Make sure %fs is initialized properly in idle threads */ | 937 | /* Make sure %fs is initialized properly in idle threads */ |
| 634 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | 938 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) |
| 635 | { | 939 | { |
| @@ -637,25 +941,136 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | |||
| 637 | regs->fs = __KERNEL_PERCPU; | 941 | regs->fs = __KERNEL_PERCPU; |
| 638 | return regs; | 942 | return regs; |
| 639 | } | 943 | } |
| 640 | 944 | #endif | |
| 641 | /* Current gdt points %fs at the "master" per-cpu area: after this, | ||
| 642 | * it's on the real one. */ | ||
| 643 | void switch_to_new_gdt(void) | ||
| 644 | { | ||
| 645 | struct desc_ptr gdt_descr; | ||
| 646 | |||
| 647 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | ||
| 648 | gdt_descr.size = GDT_SIZE - 1; | ||
| 649 | load_gdt(&gdt_descr); | ||
| 650 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); | ||
| 651 | } | ||
| 652 | 945 | ||
| 653 | /* | 946 | /* |
| 654 | * cpu_init() initializes state that is per-CPU. Some data is already | 947 | * cpu_init() initializes state that is per-CPU. Some data is already |
| 655 | * initialized (naturally) in the bootstrap process, such as the GDT | 948 | * initialized (naturally) in the bootstrap process, such as the GDT |
| 656 | * and IDT. We reload them nevertheless, this function acts as a | 949 | * and IDT. We reload them nevertheless, this function acts as a |
| 657 | * 'CPU state barrier', nothing should get across. | 950 | * 'CPU state barrier', nothing should get across. |
| 951 | * A lot of state is already set up in PDA init for 64 bit | ||
| 658 | */ | 952 | */ |
| 953 | #ifdef CONFIG_X86_64 | ||
| 954 | void __cpuinit cpu_init(void) | ||
| 955 | { | ||
| 956 | int cpu = stack_smp_processor_id(); | ||
| 957 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
| 958 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | ||
| 959 | unsigned long v; | ||
| 960 | char *estacks = NULL; | ||
| 961 | struct task_struct *me; | ||
| 962 | int i; | ||
| 963 | |||
| 964 | /* CPU 0 is initialised in head64.c */ | ||
| 965 | if (cpu != 0) | ||
| 966 | pda_init(cpu); | ||
| 967 | else | ||
| 968 | estacks = boot_exception_stacks; | ||
| 969 | |||
| 970 | me = current; | ||
| 971 | |||
| 972 | if (cpu_test_and_set(cpu, cpu_initialized)) | ||
| 973 | panic("CPU#%d already initialized!\n", cpu); | ||
| 974 | |||
| 975 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | ||
| 976 | |||
| 977 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | ||
| 978 | |||
| 979 | /* | ||
| 980 | * Initialize the per-CPU GDT with the boot GDT, | ||
| 981 | * and set up the GDT descriptor: | ||
| 982 | */ | ||
| 983 | |||
| 984 | switch_to_new_gdt(); | ||
| 985 | load_idt((const struct desc_ptr *)&idt_descr); | ||
| 986 | |||
| 987 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | ||
| 988 | syscall_init(); | ||
| 989 | |||
| 990 | wrmsrl(MSR_FS_BASE, 0); | ||
| 991 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | ||
| 992 | barrier(); | ||
| 993 | |||
| 994 | check_efer(); | ||
| 995 | if (cpu != 0 && x2apic) | ||
| 996 | enable_x2apic(); | ||
| 997 | |||
| 998 | /* | ||
| 999 | * set up and load the per-CPU TSS | ||
| 1000 | */ | ||
| 1001 | if (!orig_ist->ist[0]) { | ||
| 1002 | static const unsigned int order[N_EXCEPTION_STACKS] = { | ||
| 1003 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | ||
| 1004 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
| 1005 | }; | ||
| 1006 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | ||
| 1007 | if (cpu) { | ||
| 1008 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
| 1009 | if (!estacks) | ||
| 1010 | panic("Cannot allocate exception " | ||
| 1011 | "stack %ld %d\n", v, cpu); | ||
| 1012 | } | ||
| 1013 | estacks += PAGE_SIZE << order[v]; | ||
| 1014 | orig_ist->ist[v] = t->x86_tss.ist[v] = | ||
| 1015 | (unsigned long)estacks; | ||
| 1016 | } | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | ||
| 1020 | /* | ||
| 1021 | * <= is required because the CPU will access up to | ||
| 1022 | * 8 bits beyond the end of the IO permission bitmap. | ||
| 1023 | */ | ||
| 1024 | for (i = 0; i <= IO_BITMAP_LONGS; i++) | ||
| 1025 | t->io_bitmap[i] = ~0UL; | ||
| 1026 | |||
| 1027 | atomic_inc(&init_mm.mm_count); | ||
| 1028 | me->active_mm = &init_mm; | ||
| 1029 | if (me->mm) | ||
| 1030 | BUG(); | ||
| 1031 | enter_lazy_tlb(&init_mm, me); | ||
| 1032 | |||
| 1033 | load_sp0(t, ¤t->thread); | ||
| 1034 | set_tss_desc(cpu, t); | ||
| 1035 | load_TR_desc(); | ||
| 1036 | load_LDT(&init_mm.context); | ||
| 1037 | |||
| 1038 | #ifdef CONFIG_KGDB | ||
| 1039 | /* | ||
| 1040 | * If the kgdb is connected no debug regs should be altered. This | ||
| 1041 | * is only applicable when KGDB and a KGDB I/O module are built | ||
| 1042 | * into the kernel and you are using early debugging with | ||
| 1043 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
| 1044 | */ | ||
| 1045 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
| 1046 | arch_kgdb_ops.correct_hw_break(); | ||
| 1047 | else { | ||
| 1048 | #endif | ||
| 1049 | /* | ||
| 1050 | * Clear all 6 debug registers: | ||
| 1051 | */ | ||
| 1052 | |||
| 1053 | set_debugreg(0UL, 0); | ||
| 1054 | set_debugreg(0UL, 1); | ||
| 1055 | set_debugreg(0UL, 2); | ||
| 1056 | set_debugreg(0UL, 3); | ||
| 1057 | set_debugreg(0UL, 6); | ||
| 1058 | set_debugreg(0UL, 7); | ||
| 1059 | #ifdef CONFIG_KGDB | ||
| 1060 | /* If the kgdb is connected no debug regs should be altered. */ | ||
| 1061 | } | ||
| 1062 | #endif | ||
| 1063 | |||
| 1064 | fpu_init(); | ||
| 1065 | |||
| 1066 | raw_local_save_flags(kernel_eflags); | ||
| 1067 | |||
| 1068 | if (is_uv_system()) | ||
| 1069 | uv_cpu_init(); | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | #else | ||
| 1073 | |||
| 659 | void __cpuinit cpu_init(void) | 1074 | void __cpuinit cpu_init(void) |
| 660 | { | 1075 | { |
| 661 | int cpu = smp_processor_id(); | 1076 | int cpu = smp_processor_id(); |
| @@ -709,19 +1124,21 @@ void __cpuinit cpu_init(void) | |||
| 709 | /* | 1124 | /* |
| 710 | * Force FPU initialization: | 1125 | * Force FPU initialization: |
| 711 | */ | 1126 | */ |
| 712 | current_thread_info()->status = 0; | 1127 | if (cpu_has_xsave) |
| 1128 | current_thread_info()->status = TS_XSAVE; | ||
| 1129 | else | ||
| 1130 | current_thread_info()->status = 0; | ||
| 713 | clear_used_math(); | 1131 | clear_used_math(); |
| 714 | mxcsr_feature_mask_init(); | 1132 | mxcsr_feature_mask_init(); |
| 715 | } | ||
| 716 | 1133 | ||
| 717 | #ifdef CONFIG_HOTPLUG_CPU | 1134 | /* |
| 718 | void __cpuinit cpu_uninit(void) | 1135 | * Boot processor to setup the FP and extended state context info. |
| 719 | { | 1136 | */ |
| 720 | int cpu = raw_smp_processor_id(); | 1137 | if (!smp_processor_id()) |
| 721 | cpu_clear(cpu, cpu_initialized); | 1138 | init_thread_xstate(); |
| 722 | 1139 | ||
| 723 | /* lazy TLB state */ | 1140 | xsave_init(); |
| 724 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
| 725 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
| 726 | } | 1141 | } |
| 1142 | |||
| 1143 | |||
| 727 | #endif | 1144 | #endif |
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c deleted file mode 100644 index 7b8cc72feb40..000000000000 --- a/arch/x86/kernel/cpu/common_64.c +++ /dev/null | |||
| @@ -1,681 +0,0 @@ | |||
| 1 | #include <linux/init.h> | ||
| 2 | #include <linux/kernel.h> | ||
| 3 | #include <linux/sched.h> | ||
| 4 | #include <linux/string.h> | ||
| 5 | #include <linux/bootmem.h> | ||
| 6 | #include <linux/bitops.h> | ||
| 7 | #include <linux/module.h> | ||
| 8 | #include <linux/kgdb.h> | ||
| 9 | #include <linux/topology.h> | ||
| 10 | #include <linux/string.h> | ||
| 11 | #include <linux/delay.h> | ||
| 12 | #include <linux/smp.h> | ||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/percpu.h> | ||
| 15 | #include <asm/processor.h> | ||
| 16 | #include <asm/i387.h> | ||
| 17 | #include <asm/msr.h> | ||
| 18 | #include <asm/io.h> | ||
| 19 | #include <asm/mmu_context.h> | ||
| 20 | #include <asm/mtrr.h> | ||
| 21 | #include <asm/mce.h> | ||
| 22 | #include <asm/pat.h> | ||
| 23 | #include <asm/numa.h> | ||
| 24 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 25 | #include <asm/mpspec.h> | ||
| 26 | #include <asm/apic.h> | ||
| 27 | #include <mach_apic.h> | ||
| 28 | #endif | ||
| 29 | #include <asm/pda.h> | ||
| 30 | #include <asm/pgtable.h> | ||
| 31 | #include <asm/processor.h> | ||
| 32 | #include <asm/desc.h> | ||
| 33 | #include <asm/atomic.h> | ||
| 34 | #include <asm/proto.h> | ||
| 35 | #include <asm/sections.h> | ||
| 36 | #include <asm/setup.h> | ||
| 37 | #include <asm/genapic.h> | ||
| 38 | |||
| 39 | #include "cpu.h" | ||
| 40 | |||
| 41 | /* We need valid kernel segments for data and code in long mode too | ||
| 42 | * IRET will check the segment types kkeil 2000/10/28 | ||
| 43 | * Also sysret mandates a special GDT layout | ||
| 44 | */ | ||
| 45 | /* The TLS descriptors are currently at a different place compared to i386. | ||
| 46 | Hopefully nobody expects them at a fixed place (Wine?) */ | ||
| 47 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | ||
| 48 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | ||
| 49 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | ||
| 50 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | ||
| 51 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | ||
| 52 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | ||
| 53 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | ||
| 54 | } }; | ||
| 55 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | ||
| 56 | |||
| 57 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | ||
| 58 | |||
| 59 | /* Current gdt points %fs at the "master" per-cpu area: after this, | ||
| 60 | * it's on the real one. */ | ||
| 61 | void switch_to_new_gdt(void) | ||
| 62 | { | ||
| 63 | struct desc_ptr gdt_descr; | ||
| 64 | |||
| 65 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | ||
| 66 | gdt_descr.size = GDT_SIZE - 1; | ||
| 67 | load_gdt(&gdt_descr); | ||
| 68 | } | ||
| 69 | |||
| 70 | struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; | ||
| 71 | |||
| 72 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | ||
| 73 | { | ||
| 74 | display_cacheinfo(c); | ||
| 75 | } | ||
| 76 | |||
| 77 | static struct cpu_dev __cpuinitdata default_cpu = { | ||
| 78 | .c_init = default_init, | ||
| 79 | .c_vendor = "Unknown", | ||
| 80 | }; | ||
| 81 | static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; | ||
| 82 | |||
| 83 | int __cpuinit get_model_name(struct cpuinfo_x86 *c) | ||
| 84 | { | ||
| 85 | unsigned int *v; | ||
| 86 | |||
| 87 | if (c->extended_cpuid_level < 0x80000004) | ||
| 88 | return 0; | ||
| 89 | |||
| 90 | v = (unsigned int *) c->x86_model_id; | ||
| 91 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | ||
| 92 | cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); | ||
| 93 | cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); | ||
| 94 | c->x86_model_id[48] = 0; | ||
| 95 | return 1; | ||
| 96 | } | ||
| 97 | |||
| 98 | |||
| 99 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | ||
| 100 | { | ||
| 101 | unsigned int n, dummy, ebx, ecx, edx; | ||
| 102 | |||
| 103 | n = c->extended_cpuid_level; | ||
| 104 | |||
| 105 | if (n >= 0x80000005) { | ||
| 106 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | ||
| 107 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " | ||
| 108 | "D cache %dK (%d bytes/line)\n", | ||
| 109 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | ||
| 110 | c->x86_cache_size = (ecx>>24) + (edx>>24); | ||
| 111 | /* On K8 L1 TLB is inclusive, so don't count it */ | ||
| 112 | c->x86_tlbsize = 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | if (n >= 0x80000006) { | ||
| 116 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); | ||
| 117 | ecx = cpuid_ecx(0x80000006); | ||
| 118 | c->x86_cache_size = ecx >> 16; | ||
| 119 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | ||
| 120 | |||
| 121 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | ||
| 122 | c->x86_cache_size, ecx & 0xFF); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | ||
| 127 | { | ||
| 128 | #ifdef CONFIG_SMP | ||
| 129 | u32 eax, ebx, ecx, edx; | ||
| 130 | int index_msb, core_bits; | ||
| 131 | |||
| 132 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
| 133 | |||
| 134 | |||
| 135 | if (!cpu_has(c, X86_FEATURE_HT)) | ||
| 136 | return; | ||
| 137 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | ||
| 138 | goto out; | ||
| 139 | |||
| 140 | smp_num_siblings = (ebx & 0xff0000) >> 16; | ||
| 141 | |||
| 142 | if (smp_num_siblings == 1) { | ||
| 143 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | ||
| 144 | } else if (smp_num_siblings > 1) { | ||
| 145 | |||
| 146 | if (smp_num_siblings > NR_CPUS) { | ||
| 147 | printk(KERN_WARNING "CPU: Unsupported number of " | ||
| 148 | "siblings %d", smp_num_siblings); | ||
| 149 | smp_num_siblings = 1; | ||
| 150 | return; | ||
| 151 | } | ||
| 152 | |||
| 153 | index_msb = get_count_order(smp_num_siblings); | ||
| 154 | c->phys_proc_id = phys_pkg_id(index_msb); | ||
| 155 | |||
| 156 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | ||
| 157 | |||
| 158 | index_msb = get_count_order(smp_num_siblings); | ||
| 159 | |||
| 160 | core_bits = get_count_order(c->x86_max_cores); | ||
| 161 | |||
| 162 | c->cpu_core_id = phys_pkg_id(index_msb) & | ||
| 163 | ((1 << core_bits) - 1); | ||
| 164 | } | ||
| 165 | out: | ||
| 166 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | ||
| 167 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | ||
| 168 | c->phys_proc_id); | ||
| 169 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | ||
| 170 | c->cpu_core_id); | ||
| 171 | } | ||
| 172 | |||
| 173 | #endif | ||
| 174 | } | ||
| 175 | |||
| 176 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | ||
| 177 | { | ||
| 178 | char *v = c->x86_vendor_id; | ||
| 179 | int i; | ||
| 180 | static int printed; | ||
| 181 | |||
| 182 | for (i = 0; i < X86_VENDOR_NUM; i++) { | ||
| 183 | if (cpu_devs[i]) { | ||
| 184 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || | ||
| 185 | (cpu_devs[i]->c_ident[1] && | ||
| 186 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { | ||
| 187 | c->x86_vendor = i; | ||
| 188 | this_cpu = cpu_devs[i]; | ||
| 189 | return; | ||
| 190 | } | ||
| 191 | } | ||
| 192 | } | ||
| 193 | if (!printed) { | ||
| 194 | printed++; | ||
| 195 | printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); | ||
| 196 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); | ||
| 197 | } | ||
| 198 | c->x86_vendor = X86_VENDOR_UNKNOWN; | ||
| 199 | } | ||
| 200 | |||
| 201 | static void __init early_cpu_support_print(void) | ||
| 202 | { | ||
| 203 | int i,j; | ||
| 204 | struct cpu_dev *cpu_devx; | ||
| 205 | |||
| 206 | printk("KERNEL supported cpus:\n"); | ||
| 207 | for (i = 0; i < X86_VENDOR_NUM; i++) { | ||
| 208 | cpu_devx = cpu_devs[i]; | ||
| 209 | if (!cpu_devx) | ||
| 210 | continue; | ||
| 211 | for (j = 0; j < 2; j++) { | ||
| 212 | if (!cpu_devx->c_ident[j]) | ||
| 213 | continue; | ||
| 214 | printk(" %s %s\n", cpu_devx->c_vendor, | ||
| 215 | cpu_devx->c_ident[j]); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | ||
| 221 | |||
| 222 | void __init early_cpu_init(void) | ||
| 223 | { | ||
| 224 | struct cpu_vendor_dev *cvdev; | ||
| 225 | |||
| 226 | for (cvdev = __x86cpuvendor_start ; | ||
| 227 | cvdev < __x86cpuvendor_end ; | ||
| 228 | cvdev++) | ||
| 229 | cpu_devs[cvdev->vendor] = cvdev->cpu_dev; | ||
| 230 | early_cpu_support_print(); | ||
| 231 | early_identify_cpu(&boot_cpu_data); | ||
| 232 | } | ||
| 233 | |||
| 234 | /* Do some early cpuid on the boot CPU to get some parameter that are | ||
| 235 | needed before check_bugs. Everything advanced is in identify_cpu | ||
| 236 | below. */ | ||
| 237 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | ||
| 238 | { | ||
| 239 | u32 tfms, xlvl; | ||
| 240 | |||
| 241 | c->loops_per_jiffy = loops_per_jiffy; | ||
| 242 | c->x86_cache_size = -1; | ||
| 243 | c->x86_vendor = X86_VENDOR_UNKNOWN; | ||
| 244 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | ||
| 245 | c->x86_vendor_id[0] = '\0'; /* Unset */ | ||
| 246 | c->x86_model_id[0] = '\0'; /* Unset */ | ||
| 247 | c->x86_clflush_size = 64; | ||
| 248 | c->x86_cache_alignment = c->x86_clflush_size; | ||
| 249 | c->x86_max_cores = 1; | ||
| 250 | c->x86_coreid_bits = 0; | ||
| 251 | c->extended_cpuid_level = 0; | ||
| 252 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | ||
| 253 | |||
| 254 | /* Get vendor name */ | ||
| 255 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | ||
| 256 | (unsigned int *)&c->x86_vendor_id[0], | ||
| 257 | (unsigned int *)&c->x86_vendor_id[8], | ||
| 258 | (unsigned int *)&c->x86_vendor_id[4]); | ||
| 259 | |||
| 260 | get_cpu_vendor(c); | ||
| 261 | |||
| 262 | /* Initialize the standard set of capabilities */ | ||
| 263 | /* Note that the vendor-specific code below might override */ | ||
| 264 | |||
| 265 | /* Intel-defined flags: level 0x00000001 */ | ||
| 266 | if (c->cpuid_level >= 0x00000001) { | ||
| 267 | __u32 misc; | ||
| 268 | cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], | ||
| 269 | &c->x86_capability[0]); | ||
| 270 | c->x86 = (tfms >> 8) & 0xf; | ||
| 271 | c->x86_model = (tfms >> 4) & 0xf; | ||
| 272 | c->x86_mask = tfms & 0xf; | ||
| 273 | if (c->x86 == 0xf) | ||
| 274 | c->x86 += (tfms >> 20) & 0xff; | ||
| 275 | if (c->x86 >= 0x6) | ||
| 276 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | ||
| 277 | if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) | ||
| 278 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | ||
| 279 | } else { | ||
| 280 | /* Have CPUID level 0 only - unheard of */ | ||
| 281 | c->x86 = 4; | ||
| 282 | } | ||
| 283 | |||
| 284 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; | ||
| 285 | #ifdef CONFIG_SMP | ||
| 286 | c->phys_proc_id = c->initial_apicid; | ||
| 287 | #endif | ||
| 288 | /* AMD-defined flags: level 0x80000001 */ | ||
| 289 | xlvl = cpuid_eax(0x80000000); | ||
| 290 | c->extended_cpuid_level = xlvl; | ||
| 291 | if ((xlvl & 0xffff0000) == 0x80000000) { | ||
| 292 | if (xlvl >= 0x80000001) { | ||
| 293 | c->x86_capability[1] = cpuid_edx(0x80000001); | ||
| 294 | c->x86_capability[6] = cpuid_ecx(0x80000001); | ||
| 295 | } | ||
| 296 | if (xlvl >= 0x80000004) | ||
| 297 | get_model_name(c); /* Default name */ | ||
| 298 | } | ||
| 299 | |||
| 300 | /* Transmeta-defined flags: level 0x80860001 */ | ||
| 301 | xlvl = cpuid_eax(0x80860000); | ||
| 302 | if ((xlvl & 0xffff0000) == 0x80860000) { | ||
| 303 | /* Don't set x86_cpuid_level here for now to not confuse. */ | ||
| 304 | if (xlvl >= 0x80860001) | ||
| 305 | c->x86_capability[2] = cpuid_edx(0x80860001); | ||
| 306 | } | ||
| 307 | |||
| 308 | c->extended_cpuid_level = cpuid_eax(0x80000000); | ||
| 309 | if (c->extended_cpuid_level >= 0x80000007) | ||
| 310 | c->x86_power = cpuid_edx(0x80000007); | ||
| 311 | |||
| 312 | if (c->extended_cpuid_level >= 0x80000008) { | ||
| 313 | u32 eax = cpuid_eax(0x80000008); | ||
| 314 | |||
| 315 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
| 316 | c->x86_phys_bits = eax & 0xff; | ||
| 317 | } | ||
| 318 | |||
| 319 | /* Assume all 64-bit CPUs support 32-bit syscall */ | ||
| 320 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
| 321 | |||
| 322 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | ||
| 323 | cpu_devs[c->x86_vendor]->c_early_init) | ||
| 324 | cpu_devs[c->x86_vendor]->c_early_init(c); | ||
| 325 | |||
| 326 | validate_pat_support(c); | ||
| 327 | |||
| 328 | /* early_param could clear that, but recall get it set again */ | ||
| 329 | if (disable_apic) | ||
| 330 | clear_cpu_cap(c, X86_FEATURE_APIC); | ||
| 331 | } | ||
| 332 | |||
| 333 | /* | ||
| 334 | * This does the hard work of actually picking apart the CPU stuff... | ||
| 335 | */ | ||
| 336 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | ||
| 337 | { | ||
| 338 | int i; | ||
| 339 | |||
| 340 | early_identify_cpu(c); | ||
| 341 | |||
| 342 | init_scattered_cpuid_features(c); | ||
| 343 | |||
| 344 | c->apicid = phys_pkg_id(0); | ||
| 345 | |||
| 346 | /* | ||
| 347 | * Vendor-specific initialization. In this section we | ||
| 348 | * canonicalize the feature flags, meaning if there are | ||
| 349 | * features a certain CPU supports which CPUID doesn't | ||
| 350 | * tell us, CPUID claiming incorrect flags, or other bugs, | ||
| 351 | * we handle them here. | ||
| 352 | * | ||
| 353 | * At the end of this section, c->x86_capability better | ||
| 354 | * indicate the features this CPU genuinely supports! | ||
| 355 | */ | ||
| 356 | if (this_cpu->c_init) | ||
| 357 | this_cpu->c_init(c); | ||
| 358 | |||
| 359 | detect_ht(c); | ||
| 360 | |||
| 361 | /* | ||
| 362 | * On SMP, boot_cpu_data holds the common feature set between | ||
| 363 | * all CPUs; so make sure that we indicate which features are | ||
| 364 | * common between the CPUs. The first time this routine gets | ||
| 365 | * executed, c == &boot_cpu_data. | ||
| 366 | */ | ||
| 367 | if (c != &boot_cpu_data) { | ||
| 368 | /* AND the already accumulated flags with these */ | ||
| 369 | for (i = 0; i < NCAPINTS; i++) | ||
| 370 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | ||
| 371 | } | ||
| 372 | |||
| 373 | /* Clear all flags overriden by options */ | ||
| 374 | for (i = 0; i < NCAPINTS; i++) | ||
| 375 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; | ||
| 376 | |||
| 377 | #ifdef CONFIG_X86_MCE | ||
| 378 | mcheck_init(c); | ||
| 379 | #endif | ||
| 380 | select_idle_routine(c); | ||
| 381 | |||
| 382 | #ifdef CONFIG_NUMA | ||
| 383 | numa_add_cpu(smp_processor_id()); | ||
| 384 | #endif | ||
| 385 | |||
| 386 | } | ||
| 387 | |||
| 388 | void __cpuinit identify_boot_cpu(void) | ||
| 389 | { | ||
| 390 | identify_cpu(&boot_cpu_data); | ||
| 391 | } | ||
| 392 | |||
| 393 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | ||
| 394 | { | ||
| 395 | BUG_ON(c == &boot_cpu_data); | ||
| 396 | identify_cpu(c); | ||
| 397 | mtrr_ap_init(); | ||
| 398 | } | ||
| 399 | |||
| 400 | static __init int setup_noclflush(char *arg) | ||
| 401 | { | ||
| 402 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | ||
| 403 | return 1; | ||
| 404 | } | ||
| 405 | __setup("noclflush", setup_noclflush); | ||
| 406 | |||
| 407 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | ||
| 408 | { | ||
| 409 | if (c->x86_model_id[0]) | ||
| 410 | printk(KERN_CONT "%s", c->x86_model_id); | ||
| 411 | |||
| 412 | if (c->x86_mask || c->cpuid_level >= 0) | ||
| 413 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); | ||
| 414 | else | ||
| 415 | printk(KERN_CONT "\n"); | ||
| 416 | } | ||
| 417 | |||
| 418 | static __init int setup_disablecpuid(char *arg) | ||
| 419 | { | ||
| 420 | int bit; | ||
| 421 | if (get_option(&arg, &bit) && bit < NCAPINTS*32) | ||
| 422 | setup_clear_cpu_cap(bit); | ||
| 423 | else | ||
| 424 | return 0; | ||
| 425 | return 1; | ||
| 426 | } | ||
| 427 | __setup("clearcpuid=", setup_disablecpuid); | ||
| 428 | |||
| 429 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | ||
| 430 | |||
| 431 | struct x8664_pda **_cpu_pda __read_mostly; | ||
| 432 | EXPORT_SYMBOL(_cpu_pda); | ||
| 433 | |||
| 434 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | ||
| 435 | |||
| 436 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | ||
| 437 | |||
| 438 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | ||
| 439 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | ||
| 440 | |||
| 441 | static int do_not_nx __cpuinitdata; | ||
| 442 | |||
| 443 | /* noexec=on|off | ||
| 444 | Control non executable mappings for 64bit processes. | ||
| 445 | |||
| 446 | on Enable(default) | ||
| 447 | off Disable | ||
| 448 | */ | ||
| 449 | static int __init nonx_setup(char *str) | ||
| 450 | { | ||
| 451 | if (!str) | ||
| 452 | return -EINVAL; | ||
| 453 | if (!strncmp(str, "on", 2)) { | ||
| 454 | __supported_pte_mask |= _PAGE_NX; | ||
| 455 | do_not_nx = 0; | ||
| 456 | } else if (!strncmp(str, "off", 3)) { | ||
| 457 | do_not_nx = 1; | ||
| 458 | __supported_pte_mask &= ~_PAGE_NX; | ||
| 459 | } | ||
| 460 | return 0; | ||
| 461 | } | ||
| 462 | early_param("noexec", nonx_setup); | ||
| 463 | |||
| 464 | int force_personality32; | ||
| 465 | |||
| 466 | /* noexec32=on|off | ||
| 467 | Control non executable heap for 32bit processes. | ||
| 468 | To control the stack too use noexec=off | ||
| 469 | |||
| 470 | on PROT_READ does not imply PROT_EXEC for 32bit processes (default) | ||
| 471 | off PROT_READ implies PROT_EXEC | ||
| 472 | */ | ||
| 473 | static int __init nonx32_setup(char *str) | ||
| 474 | { | ||
| 475 | if (!strcmp(str, "on")) | ||
| 476 | force_personality32 &= ~READ_IMPLIES_EXEC; | ||
| 477 | else if (!strcmp(str, "off")) | ||
| 478 | force_personality32 |= READ_IMPLIES_EXEC; | ||
| 479 | return 1; | ||
| 480 | } | ||
| 481 | __setup("noexec32=", nonx32_setup); | ||
| 482 | |||
| 483 | void pda_init(int cpu) | ||
| 484 | { | ||
| 485 | struct x8664_pda *pda = cpu_pda(cpu); | ||
| 486 | |||
| 487 | /* Setup up data that may be needed in __get_free_pages early */ | ||
| 488 | loadsegment(fs, 0); | ||
| 489 | loadsegment(gs, 0); | ||
| 490 | /* Memory clobbers used to order PDA accessed */ | ||
| 491 | mb(); | ||
| 492 | wrmsrl(MSR_GS_BASE, pda); | ||
| 493 | mb(); | ||
| 494 | |||
| 495 | pda->cpunumber = cpu; | ||
| 496 | pda->irqcount = -1; | ||
| 497 | pda->kernelstack = (unsigned long)stack_thread_info() - | ||
| 498 | PDA_STACKOFFSET + THREAD_SIZE; | ||
| 499 | pda->active_mm = &init_mm; | ||
| 500 | pda->mmu_state = 0; | ||
| 501 | |||
| 502 | if (cpu == 0) { | ||
| 503 | /* others are initialized in smpboot.c */ | ||
| 504 | pda->pcurrent = &init_task; | ||
| 505 | pda->irqstackptr = boot_cpu_stack; | ||
| 506 | } else { | ||
| 507 | pda->irqstackptr = (char *) | ||
| 508 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | ||
| 509 | if (!pda->irqstackptr) | ||
| 510 | panic("cannot allocate irqstack for cpu %d", cpu); | ||
| 511 | |||
| 512 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | ||
| 513 | pda->nodenumber = cpu_to_node(cpu); | ||
| 514 | } | ||
| 515 | |||
| 516 | pda->irqstackptr += IRQSTACKSIZE-64; | ||
| 517 | } | ||
| 518 | |||
| 519 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | ||
| 520 | DEBUG_STKSZ] | ||
| 521 | __attribute__((section(".bss.page_aligned"))); | ||
| 522 | |||
| 523 | extern asmlinkage void ignore_sysret(void); | ||
| 524 | |||
| 525 | /* May not be marked __init: used by software suspend */ | ||
| 526 | void syscall_init(void) | ||
| 527 | { | ||
| 528 | /* | ||
| 529 | * LSTAR and STAR live in a bit strange symbiosis. | ||
| 530 | * They both write to the same internal register. STAR allows to | ||
| 531 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | ||
| 532 | */ | ||
| 533 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | ||
| 534 | wrmsrl(MSR_LSTAR, system_call); | ||
| 535 | wrmsrl(MSR_CSTAR, ignore_sysret); | ||
| 536 | |||
| 537 | #ifdef CONFIG_IA32_EMULATION | ||
| 538 | syscall32_cpu_init(); | ||
| 539 | #endif | ||
| 540 | |||
| 541 | /* Flags to clear on syscall */ | ||
| 542 | wrmsrl(MSR_SYSCALL_MASK, | ||
| 543 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | ||
| 544 | } | ||
| 545 | |||
| 546 | void __cpuinit check_efer(void) | ||
| 547 | { | ||
| 548 | unsigned long efer; | ||
| 549 | |||
| 550 | rdmsrl(MSR_EFER, efer); | ||
| 551 | if (!(efer & EFER_NX) || do_not_nx) | ||
| 552 | __supported_pte_mask &= ~_PAGE_NX; | ||
| 553 | } | ||
| 554 | |||
| 555 | unsigned long kernel_eflags; | ||
| 556 | |||
| 557 | /* | ||
| 558 | * Copies of the original ist values from the tss are only accessed during | ||
| 559 | * debugging, no special alignment required. | ||
| 560 | */ | ||
| 561 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | ||
| 562 | |||
| 563 | /* | ||
| 564 | * cpu_init() initializes state that is per-CPU. Some data is already | ||
| 565 | * initialized (naturally) in the bootstrap process, such as the GDT | ||
| 566 | * and IDT. We reload them nevertheless, this function acts as a | ||
| 567 | * 'CPU state barrier', nothing should get across. | ||
| 568 | * A lot of state is already set up in PDA init. | ||
| 569 | */ | ||
| 570 | void __cpuinit cpu_init(void) | ||
| 571 | { | ||
| 572 | int cpu = stack_smp_processor_id(); | ||
| 573 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
| 574 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | ||
| 575 | unsigned long v; | ||
| 576 | char *estacks = NULL; | ||
| 577 | struct task_struct *me; | ||
| 578 | int i; | ||
| 579 | |||
| 580 | /* CPU 0 is initialised in head64.c */ | ||
| 581 | if (cpu != 0) | ||
| 582 | pda_init(cpu); | ||
| 583 | else | ||
| 584 | estacks = boot_exception_stacks; | ||
| 585 | |||
| 586 | me = current; | ||
| 587 | |||
| 588 | if (cpu_test_and_set(cpu, cpu_initialized)) | ||
| 589 | panic("CPU#%d already initialized!\n", cpu); | ||
| 590 | |||
| 591 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | ||
| 592 | |||
| 593 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | ||
| 594 | |||
| 595 | /* | ||
| 596 | * Initialize the per-CPU GDT with the boot GDT, | ||
| 597 | * and set up the GDT descriptor: | ||
| 598 | */ | ||
| 599 | |||
| 600 | switch_to_new_gdt(); | ||
| 601 | load_idt((const struct desc_ptr *)&idt_descr); | ||
| 602 | |||
| 603 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | ||
| 604 | syscall_init(); | ||
| 605 | |||
| 606 | wrmsrl(MSR_FS_BASE, 0); | ||
| 607 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | ||
| 608 | barrier(); | ||
| 609 | |||
| 610 | check_efer(); | ||
| 611 | |||
| 612 | /* | ||
| 613 | * set up and load the per-CPU TSS | ||
| 614 | */ | ||
| 615 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | ||
| 616 | static const unsigned int order[N_EXCEPTION_STACKS] = { | ||
| 617 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | ||
| 618 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
| 619 | }; | ||
| 620 | if (cpu) { | ||
| 621 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
| 622 | if (!estacks) | ||
| 623 | panic("Cannot allocate exception stack %ld %d\n", | ||
| 624 | v, cpu); | ||
| 625 | } | ||
| 626 | estacks += PAGE_SIZE << order[v]; | ||
| 627 | orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; | ||
| 628 | } | ||
| 629 | |||
| 630 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | ||
| 631 | /* | ||
| 632 | * <= is required because the CPU will access up to | ||
| 633 | * 8 bits beyond the end of the IO permission bitmap. | ||
| 634 | */ | ||
| 635 | for (i = 0; i <= IO_BITMAP_LONGS; i++) | ||
| 636 | t->io_bitmap[i] = ~0UL; | ||
| 637 | |||
| 638 | atomic_inc(&init_mm.mm_count); | ||
| 639 | me->active_mm = &init_mm; | ||
| 640 | if (me->mm) | ||
| 641 | BUG(); | ||
| 642 | enter_lazy_tlb(&init_mm, me); | ||
| 643 | |||
| 644 | load_sp0(t, ¤t->thread); | ||
| 645 | set_tss_desc(cpu, t); | ||
| 646 | load_TR_desc(); | ||
| 647 | load_LDT(&init_mm.context); | ||
| 648 | |||
| 649 | #ifdef CONFIG_KGDB | ||
| 650 | /* | ||
| 651 | * If the kgdb is connected no debug regs should be altered. This | ||
| 652 | * is only applicable when KGDB and a KGDB I/O module are built | ||
| 653 | * into the kernel and you are using early debugging with | ||
| 654 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
| 655 | */ | ||
| 656 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
| 657 | arch_kgdb_ops.correct_hw_break(); | ||
| 658 | else { | ||
| 659 | #endif | ||
| 660 | /* | ||
| 661 | * Clear all 6 debug registers: | ||
| 662 | */ | ||
| 663 | |||
| 664 | set_debugreg(0UL, 0); | ||
| 665 | set_debugreg(0UL, 1); | ||
| 666 | set_debugreg(0UL, 2); | ||
| 667 | set_debugreg(0UL, 3); | ||
| 668 | set_debugreg(0UL, 6); | ||
| 669 | set_debugreg(0UL, 7); | ||
| 670 | #ifdef CONFIG_KGDB | ||
| 671 | /* If the kgdb is connected no debug regs should be altered. */ | ||
| 672 | } | ||
| 673 | #endif | ||
| 674 | |||
| 675 | fpu_init(); | ||
| 676 | |||
| 677 | raw_local_save_flags(kernel_eflags); | ||
| 678 | |||
| 679 | if (is_uv_system()) | ||
| 680 | uv_cpu_init(); | ||
| 681 | } | ||
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 4d894e8565fe..de4094a39210 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
| @@ -21,23 +21,16 @@ struct cpu_dev { | |||
| 21 | void (*c_init)(struct cpuinfo_x86 * c); | 21 | void (*c_init)(struct cpuinfo_x86 * c); |
| 22 | void (*c_identify)(struct cpuinfo_x86 * c); | 22 | void (*c_identify)(struct cpuinfo_x86 * c); |
| 23 | unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); | 23 | unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); |
| 24 | int c_x86_vendor; | ||
| 24 | }; | 25 | }; |
| 25 | 26 | ||
| 26 | extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; | 27 | #define cpu_dev_register(cpu_devX) \ |
| 28 | static struct cpu_dev *__cpu_dev_##cpu_devX __used \ | ||
| 29 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ | ||
| 30 | &cpu_devX; | ||
| 27 | 31 | ||
| 28 | struct cpu_vendor_dev { | 32 | extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; |
| 29 | int vendor; | ||
| 30 | struct cpu_dev *cpu_dev; | ||
| 31 | }; | ||
| 32 | |||
| 33 | #define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ | ||
| 34 | static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ | ||
| 35 | __attribute__((__section__(".x86cpuvendor.init"))) = \ | ||
| 36 | { cpu_vendor_id, cpu_dev } | ||
| 37 | |||
| 38 | extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; | ||
| 39 | 33 | ||
| 40 | extern int get_model_name(struct cpuinfo_x86 *c); | ||
| 41 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 34 | extern void display_cacheinfo(struct cpuinfo_x86 *c); |
| 42 | 35 | ||
| 43 | #endif | 36 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index cb7a5715596d..efae3b22a0ff 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
| @@ -235,9 +235,9 @@ config X86_LONGHAUL | |||
| 235 | If in doubt, say N. | 235 | If in doubt, say N. |
| 236 | 236 | ||
| 237 | config X86_E_POWERSAVER | 237 | config X86_E_POWERSAVER |
| 238 | tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" | 238 | tristate "VIA C7 Enhanced PowerSaver" |
| 239 | select CPU_FREQ_TABLE | 239 | select CPU_FREQ_TABLE |
| 240 | depends on X86_32 && EXPERIMENTAL | 240 | depends on X86_32 |
| 241 | help | 241 | help |
| 242 | This adds the CPUFreq driver for VIA C7 processors. | 242 | This adds the CPUFreq driver for VIA C7 processors. |
| 243 | 243 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index b0c8208df9fa..8e48c5d4467d 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -202,7 +202,7 @@ static void drv_write(struct drv_cmd *cmd) | |||
| 202 | cpumask_t saved_mask = current->cpus_allowed; | 202 | cpumask_t saved_mask = current->cpus_allowed; |
| 203 | unsigned int i; | 203 | unsigned int i; |
| 204 | 204 | ||
| 205 | for_each_cpu_mask(i, cmd->mask) { | 205 | for_each_cpu_mask_nr(i, cmd->mask) { |
| 206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); | 206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); |
| 207 | do_drv_write(cmd); | 207 | do_drv_write(cmd); |
| 208 | } | 208 | } |
| @@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask) | |||
| 256 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | 256 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and |
| 257 | * no meaning should be associated with absolute values of these MSRs. | 257 | * no meaning should be associated with absolute values of these MSRs. |
| 258 | */ | 258 | */ |
| 259 | static unsigned int get_measured_perf(unsigned int cpu) | 259 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
| 260 | unsigned int cpu) | ||
| 260 | { | 261 | { |
| 261 | union { | 262 | union { |
| 262 | struct { | 263 | struct { |
| @@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
| 326 | 327 | ||
| 327 | #endif | 328 | #endif |
| 328 | 329 | ||
| 329 | retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; | 330 | retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; |
| 330 | 331 | ||
| 331 | put_cpu(); | 332 | put_cpu(); |
| 332 | set_cpus_allowed_ptr(current, &saved_mask); | 333 | set_cpus_allowed_ptr(current, &saved_mask); |
| @@ -451,7 +452,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
| 451 | 452 | ||
| 452 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 453 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
| 453 | freqs.new = data->freq_table[next_state].frequency; | 454 | freqs.new = data->freq_table[next_state].frequency; |
| 454 | for_each_cpu_mask(i, cmd.mask) { | 455 | for_each_cpu_mask_nr(i, cmd.mask) { |
| 455 | freqs.cpu = i; | 456 | freqs.cpu = i; |
| 456 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 457 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 457 | } | 458 | } |
| @@ -466,7 +467,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
| 466 | } | 467 | } |
| 467 | } | 468 | } |
| 468 | 469 | ||
| 469 | for_each_cpu_mask(i, cmd.mask) { | 470 | for_each_cpu_mask_nr(i, cmd.mask) { |
| 470 | freqs.cpu = i; | 471 | freqs.cpu = i; |
| 471 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 472 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 472 | } | 473 | } |
| @@ -779,13 +780,20 @@ static int __init acpi_cpufreq_init(void) | |||
| 779 | { | 780 | { |
| 780 | int ret; | 781 | int ret; |
| 781 | 782 | ||
| 783 | if (acpi_disabled) | ||
| 784 | return 0; | ||
| 785 | |||
| 782 | dprintk("acpi_cpufreq_init\n"); | 786 | dprintk("acpi_cpufreq_init\n"); |
| 783 | 787 | ||
| 784 | ret = acpi_cpufreq_early_init(); | 788 | ret = acpi_cpufreq_early_init(); |
| 785 | if (ret) | 789 | if (ret) |
| 786 | return ret; | 790 | return ret; |
| 787 | 791 | ||
| 788 | return cpufreq_register_driver(&acpi_cpufreq_driver); | 792 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); |
| 793 | if (ret) | ||
| 794 | free_percpu(acpi_perf_data); | ||
| 795 | |||
| 796 | return ret; | ||
| 789 | } | 797 | } |
| 790 | 798 | ||
| 791 | static void __exit acpi_cpufreq_exit(void) | 799 | static void __exit acpi_cpufreq_exit(void) |
| @@ -795,8 +803,6 @@ static void __exit acpi_cpufreq_exit(void) | |||
| 795 | cpufreq_unregister_driver(&acpi_cpufreq_driver); | 803 | cpufreq_unregister_driver(&acpi_cpufreq_driver); |
| 796 | 804 | ||
| 797 | free_percpu(acpi_perf_data); | 805 | free_percpu(acpi_perf_data); |
| 798 | |||
| 799 | return; | ||
| 800 | } | 806 | } |
| 801 | 807 | ||
| 802 | module_param(acpi_pstate_strict, uint, 0644); | 808 | module_param(acpi_pstate_strict, uint, 0644); |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 94619c22f563..fe613c93b366 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
| @@ -25,8 +25,8 @@ | |||
| 25 | #include <linux/cpufreq.h> | 25 | #include <linux/cpufreq.h> |
| 26 | 26 | ||
| 27 | #include <asm/msr.h> | 27 | #include <asm/msr.h> |
| 28 | #include <asm/timex.h> | 28 | #include <linux/timex.h> |
| 29 | #include <asm/io.h> | 29 | #include <linux/io.h> |
| 30 | 30 | ||
| 31 | #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ | 31 | #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ |
| 32 | #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ | 32 | #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ |
| @@ -44,7 +44,7 @@ struct s_elan_multiplier { | |||
| 44 | * It is important that the frequencies | 44 | * It is important that the frequencies |
| 45 | * are listed in ascending order here! | 45 | * are listed in ascending order here! |
| 46 | */ | 46 | */ |
| 47 | struct s_elan_multiplier elan_multiplier[] = { | 47 | static struct s_elan_multiplier elan_multiplier[] = { |
| 48 | {1000, 0x02, 0x18}, | 48 | {1000, 0x02, 0x18}, |
| 49 | {2000, 0x02, 0x10}, | 49 | {2000, 0x02, 0x10}, |
| 50 | {4000, 0x02, 0x08}, | 50 | {4000, 0x02, 0x08}, |
| @@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | |||
| 82 | u8 clockspeed_reg; /* Clock Speed Register */ | 82 | u8 clockspeed_reg; /* Clock Speed Register */ |
| 83 | 83 | ||
| 84 | local_irq_disable(); | 84 | local_irq_disable(); |
| 85 | outb_p(0x80,REG_CSCIR); | 85 | outb_p(0x80, REG_CSCIR); |
| 86 | clockspeed_reg = inb_p(REG_CSCDR); | 86 | clockspeed_reg = inb_p(REG_CSCDR); |
| 87 | local_irq_enable(); | 87 | local_irq_enable(); |
| 88 | 88 | ||
| @@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | |||
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | /* 33 MHz is not 32 MHz... */ | 100 | /* 33 MHz is not 32 MHz... */ |
| 101 | if ((clockspeed_reg & 0xE0)==0xA0) | 101 | if ((clockspeed_reg & 0xE0) == 0xA0) |
| 102 | return 33000; | 102 | return 33000; |
| 103 | 103 | ||
| 104 | return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); | 104 | return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | 107 | ||
| @@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | |||
| 117 | * There is no return value. | 117 | * There is no return value. |
| 118 | */ | 118 | */ |
| 119 | 119 | ||
| 120 | static void elanfreq_set_cpu_state (unsigned int state) | 120 | static void elanfreq_set_cpu_state(unsigned int state) |
| 121 | { | 121 | { |
| 122 | struct cpufreq_freqs freqs; | 122 | struct cpufreq_freqs freqs; |
| 123 | 123 | ||
| @@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state (unsigned int state) | |||
| 144 | */ | 144 | */ |
| 145 | 145 | ||
| 146 | local_irq_disable(); | 146 | local_irq_disable(); |
| 147 | outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ | 147 | outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ |
| 148 | outb_p(0x00,REG_CSCDR); | 148 | outb_p(0x00, REG_CSCDR); |
| 149 | local_irq_enable(); /* wait till internal pipelines and */ | 149 | local_irq_enable(); /* wait till internal pipelines and */ |
| 150 | udelay(1000); /* buffers have cleaned up */ | 150 | udelay(1000); /* buffers have cleaned up */ |
| 151 | 151 | ||
| 152 | local_irq_disable(); | 152 | local_irq_disable(); |
| 153 | 153 | ||
| 154 | /* now, set the CPU clock speed register (0x80) */ | 154 | /* now, set the CPU clock speed register (0x80) */ |
| 155 | outb_p(0x80,REG_CSCIR); | 155 | outb_p(0x80, REG_CSCIR); |
| 156 | outb_p(elan_multiplier[state].val80h,REG_CSCDR); | 156 | outb_p(elan_multiplier[state].val80h, REG_CSCDR); |
| 157 | 157 | ||
| 158 | /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ | 158 | /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ |
| 159 | outb_p(0x40,REG_CSCIR); | 159 | outb_p(0x40, REG_CSCIR); |
| 160 | outb_p(elan_multiplier[state].val40h,REG_CSCDR); | 160 | outb_p(elan_multiplier[state].val40h, REG_CSCDR); |
| 161 | udelay(10000); | 161 | udelay(10000); |
| 162 | local_irq_enable(); | 162 | local_irq_enable(); |
| 163 | 163 | ||
| @@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state (unsigned int state) | |||
| 173 | * for the hardware supported by the driver. | 173 | * for the hardware supported by the driver. |
| 174 | */ | 174 | */ |
| 175 | 175 | ||
| 176 | static int elanfreq_verify (struct cpufreq_policy *policy) | 176 | static int elanfreq_verify(struct cpufreq_policy *policy) |
| 177 | { | 177 | { |
| 178 | return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); | 178 | return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | static int elanfreq_target (struct cpufreq_policy *policy, | 181 | static int elanfreq_target(struct cpufreq_policy *policy, |
| 182 | unsigned int target_freq, | 182 | unsigned int target_freq, |
| 183 | unsigned int relation) | 183 | unsigned int relation) |
| 184 | { | 184 | { |
| @@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) | |||
| 205 | 205 | ||
| 206 | /* capability check */ | 206 | /* capability check */ |
| 207 | if ((c->x86_vendor != X86_VENDOR_AMD) || | 207 | if ((c->x86_vendor != X86_VENDOR_AMD) || |
| 208 | (c->x86 != 4) || (c->x86_model!=10)) | 208 | (c->x86 != 4) || (c->x86_model != 10)) |
| 209 | return -ENODEV; | 209 | return -ENODEV; |
| 210 | 210 | ||
| 211 | /* max freq */ | 211 | /* max freq */ |
| @@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) | |||
| 213 | max_freq = elanfreq_get_cpu_frequency(0); | 213 | max_freq = elanfreq_get_cpu_frequency(0); |
| 214 | 214 | ||
| 215 | /* table init */ | 215 | /* table init */ |
| 216 | for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { | 216 | for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { |
| 217 | if (elanfreq_table[i].frequency > max_freq) | 217 | if (elanfreq_table[i].frequency > max_freq) |
| 218 | elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; | 218 | elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; |
| 219 | } | 219 | } |
| @@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) | |||
| 224 | 224 | ||
| 225 | result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); | 225 | result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); |
| 226 | if (result) | 226 | if (result) |
| 227 | return (result); | 227 | return result; |
| 228 | 228 | ||
| 229 | cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); | 229 | cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); |
| 230 | return 0; | 230 | return 0; |
| @@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup); | |||
| 260 | #endif | 260 | #endif |
| 261 | 261 | ||
| 262 | 262 | ||
| 263 | static struct freq_attr* elanfreq_attr[] = { | 263 | static struct freq_attr *elanfreq_attr[] = { |
| 264 | &cpufreq_freq_attr_scaling_available_freqs, | 264 | &cpufreq_freq_attr_scaling_available_freqs, |
| 265 | NULL, | 265 | NULL, |
| 266 | }; | 266 | }; |
| @@ -284,9 +284,9 @@ static int __init elanfreq_init(void) | |||
| 284 | 284 | ||
| 285 | /* Test if we have the right hardware */ | 285 | /* Test if we have the right hardware */ |
| 286 | if ((c->x86_vendor != X86_VENDOR_AMD) || | 286 | if ((c->x86_vendor != X86_VENDOR_AMD) || |
| 287 | (c->x86 != 4) || (c->x86_model!=10)) { | 287 | (c->x86 != 4) || (c->x86_model != 10)) { |
| 288 | printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); | 288 | printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); |
| 289 | return -ENODEV; | 289 | return -ENODEV; |
| 290 | } | 290 | } |
| 291 | return cpufreq_register_driver(&elanfreq_driver); | 291 | return cpufreq_register_driver(&elanfreq_driver); |
| 292 | } | 292 | } |
| @@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void) | |||
| 298 | } | 298 | } |
| 299 | 299 | ||
| 300 | 300 | ||
| 301 | module_param (max_freq, int, 0444); | 301 | module_param(max_freq, int, 0444); |
| 302 | 302 | ||
| 303 | MODULE_LICENSE("GPL"); | 303 | MODULE_LICENSE("GPL"); |
| 304 | MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); | 304 | MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 06fcce516d51..b0461856acfb 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk> | 2 | * (C) 2001-2004 Dave Jones. <davej@redhat.com> |
| 3 | * (C) 2002 Padraig Brady. <padraig@antefacto.com> | 3 | * (C) 2002 Padraig Brady. <padraig@antefacto.com> |
| 4 | * | 4 | * |
| 5 | * Licensed under the terms of the GNU GPL License version 2. | 5 | * Licensed under the terms of the GNU GPL License version 2. |
| @@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); | |||
| 1019 | module_param(revid_errata, int, 0644); | 1019 | module_param(revid_errata, int, 0644); |
| 1020 | MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); | 1020 | MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); |
| 1021 | 1021 | ||
| 1022 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); | 1022 | MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); |
| 1023 | MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); | 1023 | MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); |
| 1024 | MODULE_LICENSE ("GPL"); | 1024 | MODULE_LICENSE ("GPL"); |
| 1025 | 1025 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 199e4e05e5dc..b8e05ee4f736 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
| @@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
| 122 | return 0; | 122 | return 0; |
| 123 | 123 | ||
| 124 | /* notifiers */ | 124 | /* notifiers */ |
| 125 | for_each_cpu_mask(i, policy->cpus) { | 125 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 126 | freqs.cpu = i; | 126 | freqs.cpu = i; |
| 127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 128 | } | 128 | } |
| @@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
| 130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software | 130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software |
| 131 | * Developer's Manual, Volume 3 | 131 | * Developer's Manual, Volume 3 |
| 132 | */ | 132 | */ |
| 133 | for_each_cpu_mask(i, policy->cpus) | 133 | for_each_cpu_mask_nr(i, policy->cpus) |
| 134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | 134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); |
| 135 | 135 | ||
| 136 | /* notifiers */ | 136 | /* notifiers */ |
| 137 | for_each_cpu_mask(i, policy->cpus) { | 137 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 138 | freqs.cpu = i; | 138 | freqs.cpu = i; |
| 139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 140 | } | 140 | } |
| @@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | if (c->x86 != 0xF) { | 173 | if (c->x86 != 0xF) { |
| 174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n"); | 174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); |
| 175 | return 0; | 175 | return 0; |
| 176 | } | 176 | } |
| 177 | 177 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index eb9b62b0830c..c1ac5790c63e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c | |||
| @@ -15,12 +15,11 @@ | |||
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | 16 | ||
| 17 | #include <asm/msr.h> | 17 | #include <asm/msr.h> |
| 18 | #include <asm/timex.h> | 18 | #include <linux/timex.h> |
| 19 | #include <asm/io.h> | 19 | #include <linux/io.h> |
| 20 | 20 | ||
| 21 | 21 | #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long | |
| 22 | #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long | 22 | as it is unused */ |
| 23 | as it is unused */ | ||
| 24 | 23 | ||
| 25 | static unsigned int busfreq; /* FSB, in 10 kHz */ | 24 | static unsigned int busfreq; /* FSB, in 10 kHz */ |
| 26 | static unsigned int max_multiplier; | 25 | static unsigned int max_multiplier; |
| @@ -53,7 +52,7 @@ static int powernow_k6_get_cpu_multiplier(void) | |||
| 53 | 52 | ||
| 54 | msrval = POWERNOW_IOPORT + 0x1; | 53 | msrval = POWERNOW_IOPORT + 0x1; |
| 55 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | 54 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ |
| 56 | invalue=inl(POWERNOW_IOPORT + 0x8); | 55 | invalue = inl(POWERNOW_IOPORT + 0x8); |
| 57 | msrval = POWERNOW_IOPORT + 0x0; | 56 | msrval = POWERNOW_IOPORT + 0x0; |
| 58 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | 57 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ |
| 59 | 58 | ||
| @@ -67,9 +66,9 @@ static int powernow_k6_get_cpu_multiplier(void) | |||
| 67 | * | 66 | * |
| 68 | * Tries to change the PowerNow! multiplier | 67 | * Tries to change the PowerNow! multiplier |
| 69 | */ | 68 | */ |
| 70 | static void powernow_k6_set_state (unsigned int best_i) | 69 | static void powernow_k6_set_state(unsigned int best_i) |
| 71 | { | 70 | { |
| 72 | unsigned long outvalue=0, invalue=0; | 71 | unsigned long outvalue = 0, invalue = 0; |
| 73 | unsigned long msrval; | 72 | unsigned long msrval; |
| 74 | struct cpufreq_freqs freqs; | 73 | struct cpufreq_freqs freqs; |
| 75 | 74 | ||
| @@ -90,10 +89,10 @@ static void powernow_k6_set_state (unsigned int best_i) | |||
| 90 | 89 | ||
| 91 | msrval = POWERNOW_IOPORT + 0x1; | 90 | msrval = POWERNOW_IOPORT + 0x1; |
| 92 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | 91 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ |
| 93 | invalue=inl(POWERNOW_IOPORT + 0x8); | 92 | invalue = inl(POWERNOW_IOPORT + 0x8); |
| 94 | invalue = invalue & 0xf; | 93 | invalue = invalue & 0xf; |
| 95 | outvalue = outvalue | invalue; | 94 | outvalue = outvalue | invalue; |
| 96 | outl(outvalue ,(POWERNOW_IOPORT + 0x8)); | 95 | outl(outvalue , (POWERNOW_IOPORT + 0x8)); |
| 97 | msrval = POWERNOW_IOPORT + 0x0; | 96 | msrval = POWERNOW_IOPORT + 0x0; |
| 98 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | 97 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ |
| 99 | 98 | ||
| @@ -124,7 +123,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy) | |||
| 124 | * | 123 | * |
| 125 | * sets a new CPUFreq policy | 124 | * sets a new CPUFreq policy |
| 126 | */ | 125 | */ |
| 127 | static int powernow_k6_target (struct cpufreq_policy *policy, | 126 | static int powernow_k6_target(struct cpufreq_policy *policy, |
| 128 | unsigned int target_freq, | 127 | unsigned int target_freq, |
| 129 | unsigned int relation) | 128 | unsigned int relation) |
| 130 | { | 129 | { |
| @@ -152,7 +151,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
| 152 | busfreq = cpu_khz / max_multiplier; | 151 | busfreq = cpu_khz / max_multiplier; |
| 153 | 152 | ||
| 154 | /* table init */ | 153 | /* table init */ |
| 155 | for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { | 154 | for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { |
| 156 | if (clock_ratio[i].index > max_multiplier) | 155 | if (clock_ratio[i].index > max_multiplier) |
| 157 | clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; | 156 | clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; |
| 158 | else | 157 | else |
| @@ -165,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
| 165 | 164 | ||
| 166 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); | 165 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); |
| 167 | if (result) | 166 | if (result) |
| 168 | return (result); | 167 | return result; |
| 169 | 168 | ||
| 170 | cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); | 169 | cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); |
| 171 | 170 | ||
| @@ -176,8 +175,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
| 176 | static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) | 175 | static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) |
| 177 | { | 176 | { |
| 178 | unsigned int i; | 177 | unsigned int i; |
| 179 | for (i=0; i<8; i++) { | 178 | for (i = 0; i < 8; i++) { |
| 180 | if (i==max_multiplier) | 179 | if (i == max_multiplier) |
| 181 | powernow_k6_set_state(i); | 180 | powernow_k6_set_state(i); |
| 182 | } | 181 | } |
| 183 | cpufreq_frequency_table_put_attr(policy->cpu); | 182 | cpufreq_frequency_table_put_attr(policy->cpu); |
| @@ -189,7 +188,7 @@ static unsigned int powernow_k6_get(unsigned int cpu) | |||
| 189 | return busfreq * powernow_k6_get_cpu_multiplier(); | 188 | return busfreq * powernow_k6_get_cpu_multiplier(); |
| 190 | } | 189 | } |
| 191 | 190 | ||
| 192 | static struct freq_attr* powernow_k6_attr[] = { | 191 | static struct freq_attr *powernow_k6_attr[] = { |
| 193 | &cpufreq_freq_attr_scaling_available_freqs, | 192 | &cpufreq_freq_attr_scaling_available_freqs, |
| 194 | NULL, | 193 | NULL, |
| 195 | }; | 194 | }; |
| @@ -227,7 +226,7 @@ static int __init powernow_k6_init(void) | |||
| 227 | } | 226 | } |
| 228 | 227 | ||
| 229 | if (cpufreq_register_driver(&powernow_k6_driver)) { | 228 | if (cpufreq_register_driver(&powernow_k6_driver)) { |
| 230 | release_region (POWERNOW_IOPORT, 16); | 229 | release_region(POWERNOW_IOPORT, 16); |
| 231 | return -EINVAL; | 230 | return -EINVAL; |
| 232 | } | 231 | } |
| 233 | 232 | ||
| @@ -243,13 +242,13 @@ static int __init powernow_k6_init(void) | |||
| 243 | static void __exit powernow_k6_exit(void) | 242 | static void __exit powernow_k6_exit(void) |
| 244 | { | 243 | { |
| 245 | cpufreq_unregister_driver(&powernow_k6_driver); | 244 | cpufreq_unregister_driver(&powernow_k6_driver); |
| 246 | release_region (POWERNOW_IOPORT, 16); | 245 | release_region(POWERNOW_IOPORT, 16); |
| 247 | } | 246 | } |
| 248 | 247 | ||
| 249 | 248 | ||
| 250 | MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); | 249 | MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); |
| 251 | MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); | 250 | MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); |
| 252 | MODULE_LICENSE ("GPL"); | 251 | MODULE_LICENSE("GPL"); |
| 253 | 252 | ||
| 254 | module_init(powernow_k6_init); | 253 | module_init(powernow_k6_init); |
| 255 | module_exit(powernow_k6_exit); | 254 | module_exit(powernow_k6_exit); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 0a61159d7b71..7c7d56b43136 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * AMD K7 Powernow driver. | 2 | * AMD K7 Powernow driver. |
| 3 | * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs. | 3 | * (C) 2003 Dave Jones on behalf of SuSE Labs. |
| 4 | * (C) 2003-2004 Dave Jones <davej@redhat.com> | 4 | * (C) 2003-2004 Dave Jones <davej@redhat.com> |
| 5 | * | 5 | * |
| 6 | * Licensed under the terms of the GNU GPL License version 2. | 6 | * Licensed under the terms of the GNU GPL License version 2. |
| @@ -692,7 +692,7 @@ static void __exit powernow_exit (void) | |||
| 692 | module_param(acpi_force, int, 0444); | 692 | module_param(acpi_force, int, 0444); |
| 693 | MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); | 693 | MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); |
| 694 | 694 | ||
| 695 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); | 695 | MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); |
| 696 | MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); | 696 | MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); |
| 697 | MODULE_LICENSE ("GPL"); | 697 | MODULE_LICENSE ("GPL"); |
| 698 | 698 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h index f8a63b3664e3..35fb4eaf6e1c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h | |||
| @@ -1,5 +1,4 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ | ||
| 3 | * (C) 2003 Dave Jones. | 2 | * (C) 2003 Dave Jones. |
| 4 | * | 3 | * |
| 5 | * Licensed under the terms of the GNU GPL License version 2. | 4 | * Licensed under the terms of the GNU GPL License version 2. |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 206791eb46e3..d3dcd58b87cd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | * Support : mark.langsdorf@amd.com | 7 | * Support : mark.langsdorf@amd.com |
| 8 | * | 8 | * |
| 9 | * Based on the powernow-k7.c module written by Dave Jones. | 9 | * Based on the powernow-k7.c module written by Dave Jones. |
| 10 | * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs | 10 | * (C) 2003 Dave Jones on behalf of SuSE Labs |
| 11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | 11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> |
| 12 | * (C) 2004 Pavel Machek <pavel@suse.cz> | 12 | * (C) 2004 Pavel Machek <pavel@suse.cz> |
| 13 | * Licensed under the terms of the GNU GPL License version 2. | 13 | * Licensed under the terms of the GNU GPL License version 2. |
| @@ -45,7 +45,6 @@ | |||
| 45 | #endif | 45 | #endif |
| 46 | 46 | ||
| 47 | #define PFX "powernow-k8: " | 47 | #define PFX "powernow-k8: " |
| 48 | #define BFX PFX "BIOS error: " | ||
| 49 | #define VERSION "version 2.20.00" | 48 | #define VERSION "version 2.20.00" |
| 50 | #include "powernow-k8.h" | 49 | #include "powernow-k8.h" |
| 51 | 50 | ||
| @@ -66,7 +65,6 @@ static u32 find_freq_from_fid(u32 fid) | |||
| 66 | return 800 + (fid * 100); | 65 | return 800 + (fid * 100); |
| 67 | } | 66 | } |
| 68 | 67 | ||
| 69 | |||
| 70 | /* Return a frequency in KHz, given an input fid */ | 68 | /* Return a frequency in KHz, given an input fid */ |
| 71 | static u32 find_khz_freq_from_fid(u32 fid) | 69 | static u32 find_khz_freq_from_fid(u32 fid) |
| 72 | { | 70 | { |
| @@ -78,7 +76,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p | |||
| 78 | return data[pstate].frequency; | 76 | return data[pstate].frequency; |
| 79 | } | 77 | } |
| 80 | 78 | ||
| 81 | |||
| 82 | /* Return the vco fid for an input fid | 79 | /* Return the vco fid for an input fid |
| 83 | * | 80 | * |
| 84 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | 81 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids |
| @@ -166,7 +163,6 @@ static void fidvid_msr_init(void) | |||
| 166 | wrmsr(MSR_FIDVID_CTL, lo, hi); | 163 | wrmsr(MSR_FIDVID_CTL, lo, hi); |
| 167 | } | 164 | } |
| 168 | 165 | ||
| 169 | |||
| 170 | /* write the new fid value along with the other control fields to the msr */ | 166 | /* write the new fid value along with the other control fields to the msr */ |
| 171 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | 167 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) |
| 172 | { | 168 | { |
| @@ -539,35 +535,40 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 | |||
| 539 | 535 | ||
| 540 | for (j = 0; j < data->numps; j++) { | 536 | for (j = 0; j < data->numps; j++) { |
| 541 | if (pst[j].vid > LEAST_VID) { | 537 | if (pst[j].vid > LEAST_VID) { |
| 542 | printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); | 538 | printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", |
| 539 | j, pst[j].vid); | ||
| 543 | return -EINVAL; | 540 | return -EINVAL; |
| 544 | } | 541 | } |
| 545 | if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ | 542 | if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ |
| 546 | printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); | 543 | printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" |
| 544 | " %d\n", j); | ||
| 547 | return -ENODEV; | 545 | return -ENODEV; |
| 548 | } | 546 | } |
| 549 | if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ | 547 | if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ |
| 550 | printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); | 548 | printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" |
| 549 | " %d\n", j); | ||
| 551 | return -ENODEV; | 550 | return -ENODEV; |
| 552 | } | 551 | } |
| 553 | if (pst[j].fid > MAX_FID) { | 552 | if (pst[j].fid > MAX_FID) { |
| 554 | printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); | 553 | printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" |
| 554 | " %d\n", j); | ||
| 555 | return -ENODEV; | 555 | return -ENODEV; |
| 556 | } | 556 | } |
| 557 | if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { | 557 | if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { |
| 558 | /* Only first fid is allowed to be in "low" range */ | 558 | /* Only first fid is allowed to be in "low" range */ |
| 559 | printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); | 559 | printk(KERN_ERR FW_BUG PFX "two low fids - %d : " |
| 560 | "0x%x\n", j, pst[j].fid); | ||
| 560 | return -EINVAL; | 561 | return -EINVAL; |
| 561 | } | 562 | } |
| 562 | if (pst[j].fid < lastfid) | 563 | if (pst[j].fid < lastfid) |
| 563 | lastfid = pst[j].fid; | 564 | lastfid = pst[j].fid; |
| 564 | } | 565 | } |
| 565 | if (lastfid & 1) { | 566 | if (lastfid & 1) { |
| 566 | printk(KERN_ERR BFX "lastfid invalid\n"); | 567 | printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); |
| 567 | return -EINVAL; | 568 | return -EINVAL; |
| 568 | } | 569 | } |
| 569 | if (lastfid > LO_FID_TABLE_TOP) | 570 | if (lastfid > LO_FID_TABLE_TOP) |
| 570 | printk(KERN_INFO BFX "first fid not from lo freq table\n"); | 571 | printk(KERN_INFO FW_BUG PFX "first fid not from lo freq table\n"); |
| 571 | 572 | ||
| 572 | return 0; | 573 | return 0; |
| 573 | } | 574 | } |
| @@ -675,13 +676,13 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
| 675 | 676 | ||
| 676 | dprintk("table vers: 0x%x\n", psb->tableversion); | 677 | dprintk("table vers: 0x%x\n", psb->tableversion); |
| 677 | if (psb->tableversion != PSB_VERSION_1_4) { | 678 | if (psb->tableversion != PSB_VERSION_1_4) { |
| 678 | printk(KERN_ERR BFX "PSB table is not v1.4\n"); | 679 | printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); |
| 679 | return -ENODEV; | 680 | return -ENODEV; |
| 680 | } | 681 | } |
| 681 | 682 | ||
| 682 | dprintk("flags: 0x%x\n", psb->flags1); | 683 | dprintk("flags: 0x%x\n", psb->flags1); |
| 683 | if (psb->flags1) { | 684 | if (psb->flags1) { |
| 684 | printk(KERN_ERR BFX "unknown flags\n"); | 685 | printk(KERN_ERR FW_BUG PFX "unknown flags\n"); |
| 685 | return -ENODEV; | 686 | return -ENODEV; |
| 686 | } | 687 | } |
| 687 | 688 | ||
| @@ -708,7 +709,7 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
| 708 | } | 709 | } |
| 709 | } | 710 | } |
| 710 | if (cpst != 1) { | 711 | if (cpst != 1) { |
| 711 | printk(KERN_ERR BFX "numpst must be 1\n"); | 712 | printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); |
| 712 | return -ENODEV; | 713 | return -ENODEV; |
| 713 | } | 714 | } |
| 714 | 715 | ||
| @@ -966,7 +967,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
| 966 | freqs.old = find_khz_freq_from_fid(data->currfid); | 967 | freqs.old = find_khz_freq_from_fid(data->currfid); |
| 967 | freqs.new = find_khz_freq_from_fid(fid); | 968 | freqs.new = find_khz_freq_from_fid(fid); |
| 968 | 969 | ||
| 969 | for_each_cpu_mask(i, *(data->available_cores)) { | 970 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 970 | freqs.cpu = i; | 971 | freqs.cpu = i; |
| 971 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 972 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 972 | } | 973 | } |
| @@ -974,7 +975,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
| 974 | res = transition_fid_vid(data, fid, vid); | 975 | res = transition_fid_vid(data, fid, vid); |
| 975 | freqs.new = find_khz_freq_from_fid(data->currfid); | 976 | freqs.new = find_khz_freq_from_fid(data->currfid); |
| 976 | 977 | ||
| 977 | for_each_cpu_mask(i, *(data->available_cores)) { | 978 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 978 | freqs.cpu = i; | 979 | freqs.cpu = i; |
| 979 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 980 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 980 | } | 981 | } |
| @@ -997,7 +998,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
| 997 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); | 998 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); |
| 998 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 999 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
| 999 | 1000 | ||
| 1000 | for_each_cpu_mask(i, *(data->available_cores)) { | 1001 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 1001 | freqs.cpu = i; | 1002 | freqs.cpu = i; |
| 1002 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1003 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 1003 | } | 1004 | } |
| @@ -1005,7 +1006,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
| 1005 | res = transition_pstate(data, pstate); | 1006 | res = transition_pstate(data, pstate); |
| 1006 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1007 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
| 1007 | 1008 | ||
| 1008 | for_each_cpu_mask(i, *(data->available_cores)) { | 1009 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 1009 | freqs.cpu = i; | 1010 | freqs.cpu = i; |
| 1010 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1011 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 1011 | } | 1012 | } |
| @@ -1133,17 +1134,19 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1133 | "ACPI Processor module before starting this " | 1134 | "ACPI Processor module before starting this " |
| 1134 | "driver.\n"); | 1135 | "driver.\n"); |
| 1135 | #else | 1136 | #else |
| 1136 | printk(KERN_ERR PFX "Your BIOS does not provide ACPI " | 1137 | printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" |
| 1137 | "_PSS objects in a way that Linux understands. " | 1138 | " ACPI _PSS objects in a way that Linux " |
| 1138 | "Please report this to the Linux ACPI maintainers" | 1139 | "understands. Please report this to the Linux " |
| 1139 | " and complain to your BIOS vendor.\n"); | 1140 | "ACPI maintainers and complain to your BIOS " |
| 1141 | "vendor.\n"); | ||
| 1140 | #endif | 1142 | #endif |
| 1141 | kfree(data); | 1143 | kfree(data); |
| 1142 | return -ENODEV; | 1144 | return -ENODEV; |
| 1143 | } | 1145 | } |
| 1144 | if (pol->cpu != 0) { | 1146 | if (pol->cpu != 0) { |
| 1145 | printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than " | 1147 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " |
| 1146 | "CPU0. Complain to your BIOS vendor.\n"); | 1148 | "CPU other than CPU0. Complain to your BIOS " |
| 1149 | "vendor.\n"); | ||
| 1147 | kfree(data); | 1150 | kfree(data); |
| 1148 | return -ENODEV; | 1151 | return -ENODEV; |
| 1149 | } | 1152 | } |
| @@ -1196,7 +1199,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1196 | 1199 | ||
| 1197 | /* min/max the cpu is capable of */ | 1200 | /* min/max the cpu is capable of */ |
| 1198 | if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { | 1201 | if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { |
| 1199 | printk(KERN_ERR PFX "invalid powernow_table\n"); | 1202 | printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); |
| 1200 | powernow_k8_cpu_exit_acpi(data); | 1203 | powernow_k8_cpu_exit_acpi(data); |
| 1201 | kfree(data->powernow_table); | 1204 | kfree(data->powernow_table); |
| 1202 | kfree(data); | 1205 | kfree(data); |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 908dd347c67e..3b5f06423e77 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
| @@ -26,9 +26,10 @@ | |||
| 26 | #include <asm/cpufeature.h> | 26 | #include <asm/cpufeature.h> |
| 27 | 27 | ||
| 28 | #define PFX "speedstep-centrino: " | 28 | #define PFX "speedstep-centrino: " |
| 29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" | 29 | #define MAINTAINER "cpufreq@vger.kernel.org" |
| 30 | 30 | ||
| 31 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | 31 | #define dprintk(msg...) \ |
| 32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | ||
| 32 | 33 | ||
| 33 | #define INTEL_MSR_RANGE (0xffff) | 34 | #define INTEL_MSR_RANGE (0xffff) |
| 34 | 35 | ||
| @@ -66,11 +67,12 @@ struct cpu_model | |||
| 66 | 67 | ||
| 67 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | 68 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ |
| 68 | }; | 69 | }; |
| 69 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); | 70 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
| 71 | const struct cpu_id *x); | ||
| 70 | 72 | ||
| 71 | /* Operating points for current CPU */ | 73 | /* Operating points for current CPU */ |
| 72 | static struct cpu_model *centrino_model[NR_CPUS]; | 74 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); |
| 73 | static const struct cpu_id *centrino_cpu[NR_CPUS]; | 75 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); |
| 74 | 76 | ||
| 75 | static struct cpufreq_driver centrino_driver; | 77 | static struct cpufreq_driver centrino_driver; |
| 76 | 78 | ||
| @@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
| 255 | return -ENOENT; | 257 | return -ENOENT; |
| 256 | } | 258 | } |
| 257 | 259 | ||
| 258 | centrino_model[policy->cpu] = model; | 260 | per_cpu(centrino_model, policy->cpu) = model; |
| 259 | 261 | ||
| 260 | dprintk("found \"%s\": max frequency: %dkHz\n", | 262 | dprintk("found \"%s\": max frequency: %dkHz\n", |
| 261 | model->model_name, model->max_freq); | 263 | model->model_name, model->max_freq); |
| @@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
| 264 | } | 266 | } |
| 265 | 267 | ||
| 266 | #else | 268 | #else |
| 267 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } | 269 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) |
| 270 | { | ||
| 271 | return -ENODEV; | ||
| 272 | } | ||
| 268 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | 273 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ |
| 269 | 274 | ||
| 270 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) | 275 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
| 276 | const struct cpu_id *x) | ||
| 271 | { | 277 | { |
| 272 | if ((c->x86 == x->x86) && | 278 | if ((c->x86 == x->x86) && |
| 273 | (c->x86_model == x->x86_model) && | 279 | (c->x86_model == x->x86_model) && |
| @@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | |||
| 286 | * for centrino, as some DSDTs are buggy. | 292 | * for centrino, as some DSDTs are buggy. |
| 287 | * Ideally, this can be done using the acpi_data structure. | 293 | * Ideally, this can be done using the acpi_data structure. |
| 288 | */ | 294 | */ |
| 289 | if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || | 295 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || |
| 290 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || | 296 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || |
| 291 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { | 297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { |
| 292 | msr = (msr >> 8) & 0xff; | 298 | msr = (msr >> 8) & 0xff; |
| 293 | return msr * 100000; | 299 | return msr * 100000; |
| 294 | } | 300 | } |
| 295 | 301 | ||
| 296 | if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) | 302 | if ((!per_cpu(centrino_model, cpu)) || |
| 303 | (!per_cpu(centrino_model, cpu)->op_points)) | ||
| 297 | return 0; | 304 | return 0; |
| 298 | 305 | ||
| 299 | msr &= 0xffff; | 306 | msr &= 0xffff; |
| 300 | for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { | 307 | for (i = 0; |
| 301 | if (msr == centrino_model[cpu]->op_points[i].index) | 308 | per_cpu(centrino_model, cpu)->op_points[i].frequency |
| 302 | return centrino_model[cpu]->op_points[i].frequency; | 309 | != CPUFREQ_TABLE_END; |
| 310 | i++) { | ||
| 311 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) | ||
| 312 | return per_cpu(centrino_model, cpu)-> | ||
| 313 | op_points[i].frequency; | ||
| 303 | } | 314 | } |
| 304 | if (failsafe) | 315 | if (failsafe) |
| 305 | return centrino_model[cpu]->op_points[i-1].frequency; | 316 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; |
| 306 | else | 317 | else |
| 307 | return 0; | 318 | return 0; |
| 308 | } | 319 | } |
| @@ -347,7 +358,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
| 347 | int i; | 358 | int i; |
| 348 | 359 | ||
| 349 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | 360 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ |
| 350 | if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) | 361 | if (cpu->x86_vendor != X86_VENDOR_INTEL || |
| 362 | !cpu_has(cpu, X86_FEATURE_EST)) | ||
| 351 | return -ENODEV; | 363 | return -ENODEV; |
| 352 | 364 | ||
| 353 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) | 365 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) |
| @@ -361,9 +373,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
| 361 | break; | 373 | break; |
| 362 | 374 | ||
| 363 | if (i != N_IDS) | 375 | if (i != N_IDS) |
| 364 | centrino_cpu[policy->cpu] = &cpu_ids[i]; | 376 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; |
| 365 | 377 | ||
| 366 | if (!centrino_cpu[policy->cpu]) { | 378 | if (!per_cpu(centrino_cpu, policy->cpu)) { |
| 367 | dprintk("found unsupported CPU with " | 379 | dprintk("found unsupported CPU with " |
| 368 | "Enhanced SpeedStep: send /proc/cpuinfo to " | 380 | "Enhanced SpeedStep: send /proc/cpuinfo to " |
| 369 | MAINTAINER "\n"); | 381 | MAINTAINER "\n"); |
| @@ -386,23 +398,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
| 386 | /* check to see if it stuck */ | 398 | /* check to see if it stuck */ |
| 387 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 399 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 388 | if (!(l & (1<<16))) { | 400 | if (!(l & (1<<16))) { |
| 389 | printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); | 401 | printk(KERN_INFO PFX |
| 402 | "couldn't enable Enhanced SpeedStep\n"); | ||
| 390 | return -ENODEV; | 403 | return -ENODEV; |
| 391 | } | 404 | } |
| 392 | } | 405 | } |
| 393 | 406 | ||
| 394 | freq = get_cur_freq(policy->cpu); | 407 | freq = get_cur_freq(policy->cpu); |
| 395 | 408 | policy->cpuinfo.transition_latency = 10000; | |
| 396 | policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ | 409 | /* 10uS transition latency */ |
| 397 | policy->cur = freq; | 410 | policy->cur = freq; |
| 398 | 411 | ||
| 399 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | 412 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); |
| 400 | 413 | ||
| 401 | ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); | 414 | ret = cpufreq_frequency_table_cpuinfo(policy, |
| 415 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
| 402 | if (ret) | 416 | if (ret) |
| 403 | return (ret); | 417 | return (ret); |
| 404 | 418 | ||
| 405 | cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); | 419 | cpufreq_frequency_table_get_attr( |
| 420 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); | ||
| 406 | 421 | ||
| 407 | return 0; | 422 | return 0; |
| 408 | } | 423 | } |
| @@ -411,12 +426,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
| 411 | { | 426 | { |
| 412 | unsigned int cpu = policy->cpu; | 427 | unsigned int cpu = policy->cpu; |
| 413 | 428 | ||
| 414 | if (!centrino_model[cpu]) | 429 | if (!per_cpu(centrino_model, cpu)) |
| 415 | return -ENODEV; | 430 | return -ENODEV; |
| 416 | 431 | ||
| 417 | cpufreq_frequency_table_put_attr(cpu); | 432 | cpufreq_frequency_table_put_attr(cpu); |
| 418 | 433 | ||
| 419 | centrino_model[cpu] = NULL; | 434 | per_cpu(centrino_model, cpu) = NULL; |
| 420 | 435 | ||
| 421 | return 0; | 436 | return 0; |
| 422 | } | 437 | } |
| @@ -430,17 +445,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
| 430 | */ | 445 | */ |
| 431 | static int centrino_verify (struct cpufreq_policy *policy) | 446 | static int centrino_verify (struct cpufreq_policy *policy) |
| 432 | { | 447 | { |
| 433 | return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); | 448 | return cpufreq_frequency_table_verify(policy, |
| 449 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
| 434 | } | 450 | } |
| 435 | 451 | ||
| 436 | /** | 452 | /** |
| 437 | * centrino_setpolicy - set a new CPUFreq policy | 453 | * centrino_setpolicy - set a new CPUFreq policy |
| 438 | * @policy: new policy | 454 | * @policy: new policy |
| 439 | * @target_freq: the target frequency | 455 | * @target_freq: the target frequency |
| 440 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | 456 | * @relation: how that frequency relates to achieved frequency |
| 457 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
| 441 | * | 458 | * |
| 442 | * Sets a new CPUFreq policy. | 459 | * Sets a new CPUFreq policy. |
| 443 | */ | 460 | */ |
| 461 | struct allmasks { | ||
| 462 | cpumask_t online_policy_cpus; | ||
| 463 | cpumask_t saved_mask; | ||
| 464 | cpumask_t set_mask; | ||
| 465 | cpumask_t covered_cpus; | ||
| 466 | }; | ||
| 467 | |||
| 444 | static int centrino_target (struct cpufreq_policy *policy, | 468 | static int centrino_target (struct cpufreq_policy *policy, |
| 445 | unsigned int target_freq, | 469 | unsigned int target_freq, |
| 446 | unsigned int relation) | 470 | unsigned int relation) |
| @@ -448,48 +472,55 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 448 | unsigned int newstate = 0; | 472 | unsigned int newstate = 0; |
| 449 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; | 473 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; |
| 450 | struct cpufreq_freqs freqs; | 474 | struct cpufreq_freqs freqs; |
| 451 | cpumask_t online_policy_cpus; | ||
| 452 | cpumask_t saved_mask; | ||
| 453 | cpumask_t set_mask; | ||
| 454 | cpumask_t covered_cpus; | ||
| 455 | int retval = 0; | 475 | int retval = 0; |
| 456 | unsigned int j, k, first_cpu, tmp; | 476 | unsigned int j, k, first_cpu, tmp; |
| 457 | 477 | CPUMASK_ALLOC(allmasks); | |
| 458 | if (unlikely(centrino_model[cpu] == NULL)) | 478 | CPUMASK_PTR(online_policy_cpus, allmasks); |
| 459 | return -ENODEV; | 479 | CPUMASK_PTR(saved_mask, allmasks); |
| 480 | CPUMASK_PTR(set_mask, allmasks); | ||
| 481 | CPUMASK_PTR(covered_cpus, allmasks); | ||
| 482 | |||
| 483 | if (unlikely(allmasks == NULL)) | ||
| 484 | return -ENOMEM; | ||
| 485 | |||
| 486 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | ||
| 487 | retval = -ENODEV; | ||
| 488 | goto out; | ||
| 489 | } | ||
| 460 | 490 | ||
| 461 | if (unlikely(cpufreq_frequency_table_target(policy, | 491 | if (unlikely(cpufreq_frequency_table_target(policy, |
| 462 | centrino_model[cpu]->op_points, | 492 | per_cpu(centrino_model, cpu)->op_points, |
| 463 | target_freq, | 493 | target_freq, |
| 464 | relation, | 494 | relation, |
| 465 | &newstate))) { | 495 | &newstate))) { |
| 466 | return -EINVAL; | 496 | retval = -EINVAL; |
| 497 | goto out; | ||
| 467 | } | 498 | } |
| 468 | 499 | ||
| 469 | #ifdef CONFIG_HOTPLUG_CPU | 500 | #ifdef CONFIG_HOTPLUG_CPU |
| 470 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | 501 | /* cpufreq holds the hotplug lock, so we are safe from here on */ |
| 471 | cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); | 502 | cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); |
| 472 | #else | 503 | #else |
| 473 | online_policy_cpus = policy->cpus; | 504 | *online_policy_cpus = policy->cpus; |
| 474 | #endif | 505 | #endif |
| 475 | 506 | ||
| 476 | saved_mask = current->cpus_allowed; | 507 | *saved_mask = current->cpus_allowed; |
| 477 | first_cpu = 1; | 508 | first_cpu = 1; |
| 478 | cpus_clear(covered_cpus); | 509 | cpus_clear(*covered_cpus); |
| 479 | for_each_cpu_mask(j, online_policy_cpus) { | 510 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
| 480 | /* | 511 | /* |
| 481 | * Support for SMP systems. | 512 | * Support for SMP systems. |
| 482 | * Make sure we are running on CPU that wants to change freq | 513 | * Make sure we are running on CPU that wants to change freq |
| 483 | */ | 514 | */ |
| 484 | cpus_clear(set_mask); | 515 | cpus_clear(*set_mask); |
| 485 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 516 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
| 486 | cpus_or(set_mask, set_mask, online_policy_cpus); | 517 | cpus_or(*set_mask, *set_mask, *online_policy_cpus); |
| 487 | else | 518 | else |
| 488 | cpu_set(j, set_mask); | 519 | cpu_set(j, *set_mask); |
| 489 | 520 | ||
| 490 | set_cpus_allowed_ptr(current, &set_mask); | 521 | set_cpus_allowed_ptr(current, set_mask); |
| 491 | preempt_disable(); | 522 | preempt_disable(); |
| 492 | if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { | 523 | if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { |
| 493 | dprintk("couldn't limit to CPUs in this domain\n"); | 524 | dprintk("couldn't limit to CPUs in this domain\n"); |
| 494 | retval = -EAGAIN; | 525 | retval = -EAGAIN; |
| 495 | if (first_cpu) { | 526 | if (first_cpu) { |
| @@ -500,7 +531,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 500 | break; | 531 | break; |
| 501 | } | 532 | } |
| 502 | 533 | ||
| 503 | msr = centrino_model[cpu]->op_points[newstate].index; | 534 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; |
| 504 | 535 | ||
| 505 | if (first_cpu) { | 536 | if (first_cpu) { |
| 506 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 537 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
| @@ -517,7 +548,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 517 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | 548 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", |
| 518 | target_freq, freqs.old, freqs.new, msr); | 549 | target_freq, freqs.old, freqs.new, msr); |
| 519 | 550 | ||
| 520 | for_each_cpu_mask(k, online_policy_cpus) { | 551 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
| 521 | freqs.cpu = k; | 552 | freqs.cpu = k; |
| 522 | cpufreq_notify_transition(&freqs, | 553 | cpufreq_notify_transition(&freqs, |
| 523 | CPUFREQ_PRECHANGE); | 554 | CPUFREQ_PRECHANGE); |
| @@ -536,11 +567,11 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 536 | break; | 567 | break; |
| 537 | } | 568 | } |
| 538 | 569 | ||
| 539 | cpu_set(j, covered_cpus); | 570 | cpu_set(j, *covered_cpus); |
| 540 | preempt_enable(); | 571 | preempt_enable(); |
| 541 | } | 572 | } |
| 542 | 573 | ||
| 543 | for_each_cpu_mask(k, online_policy_cpus) { | 574 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
| 544 | freqs.cpu = k; | 575 | freqs.cpu = k; |
| 545 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 576 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 546 | } | 577 | } |
| @@ -553,30 +584,32 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 553 | * Best effort undo.. | 584 | * Best effort undo.. |
| 554 | */ | 585 | */ |
| 555 | 586 | ||
| 556 | if (!cpus_empty(covered_cpus)) { | 587 | if (!cpus_empty(*covered_cpus)) |
| 557 | for_each_cpu_mask(j, covered_cpus) { | 588 | for_each_cpu_mask_nr(j, *covered_cpus) { |
| 558 | set_cpus_allowed_ptr(current, | 589 | set_cpus_allowed_ptr(current, |
| 559 | &cpumask_of_cpu(j)); | 590 | &cpumask_of_cpu(j)); |
| 560 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 591 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
| 561 | } | 592 | } |
| 562 | } | ||
| 563 | 593 | ||
| 564 | tmp = freqs.new; | 594 | tmp = freqs.new; |
| 565 | freqs.new = freqs.old; | 595 | freqs.new = freqs.old; |
| 566 | freqs.old = tmp; | 596 | freqs.old = tmp; |
| 567 | for_each_cpu_mask(j, online_policy_cpus) { | 597 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
| 568 | freqs.cpu = j; | 598 | freqs.cpu = j; |
| 569 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 599 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 570 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 600 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 571 | } | 601 | } |
| 572 | } | 602 | } |
| 573 | set_cpus_allowed_ptr(current, &saved_mask); | 603 | set_cpus_allowed_ptr(current, saved_mask); |
| 574 | return 0; | 604 | retval = 0; |
| 605 | goto out; | ||
| 575 | 606 | ||
| 576 | migrate_end: | 607 | migrate_end: |
| 577 | preempt_enable(); | 608 | preempt_enable(); |
| 578 | set_cpus_allowed_ptr(current, &saved_mask); | 609 | set_cpus_allowed_ptr(current, saved_mask); |
| 579 | return 0; | 610 | out: |
| 611 | CPUMASK_FREE(allmasks); | ||
| 612 | return retval; | ||
| 580 | } | 613 | } |
| 581 | 614 | ||
| 582 | static struct freq_attr* centrino_attr[] = { | 615 | static struct freq_attr* centrino_attr[] = { |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 1b50244b1fdf..04d0376b64b0 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
| @@ -279,7 +279,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
| 279 | 279 | ||
| 280 | cpus_allowed = current->cpus_allowed; | 280 | cpus_allowed = current->cpus_allowed; |
| 281 | 281 | ||
| 282 | for_each_cpu_mask(i, policy->cpus) { | 282 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 283 | freqs.cpu = i; | 283 | freqs.cpu = i; |
| 284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 285 | } | 285 | } |
| @@ -292,7 +292,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
| 292 | /* allow to be run on all CPUs */ | 292 | /* allow to be run on all CPUs */ |
| 293 | set_cpus_allowed_ptr(current, &cpus_allowed); | 293 | set_cpus_allowed_ptr(current, &cpus_allowed); |
| 294 | 294 | ||
| 295 | for_each_cpu_mask(i, policy->cpus) { | 295 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 296 | freqs.cpu = i; | 296 | freqs.cpu = i; |
| 297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 298 | } | 298 | } |
| @@ -431,7 +431,7 @@ static void __exit speedstep_exit(void) | |||
| 431 | } | 431 | } |
| 432 | 432 | ||
| 433 | 433 | ||
| 434 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); | 434 | MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); |
| 435 | MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); | 435 | MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); |
| 436 | MODULE_LICENSE ("GPL"); | 436 | MODULE_LICENSE ("GPL"); |
| 437 | 437 | ||
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06a..ffd0f5ed071a 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
| @@ -15,13 +15,11 @@ | |||
| 15 | /* | 15 | /* |
| 16 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU | 16 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU |
| 17 | */ | 17 | */ |
| 18 | static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | 18 | static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) |
| 19 | { | 19 | { |
| 20 | unsigned char ccr2, ccr3; | 20 | unsigned char ccr2, ccr3; |
| 21 | unsigned long flags; | ||
| 22 | 21 | ||
| 23 | /* we test for DEVID by checking whether CCR3 is writable */ | 22 | /* we test for DEVID by checking whether CCR3 is writable */ |
| 24 | local_irq_save(flags); | ||
| 25 | ccr3 = getCx86(CX86_CCR3); | 23 | ccr3 = getCx86(CX86_CCR3); |
| 26 | setCx86(CX86_CCR3, ccr3 ^ 0x80); | 24 | setCx86(CX86_CCR3, ccr3 ^ 0x80); |
| 27 | getCx86(0xc0); /* dummy to change bus */ | 25 | getCx86(0xc0); /* dummy to change bus */ |
| @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | |||
| 44 | *dir0 = getCx86(CX86_DIR0); | 42 | *dir0 = getCx86(CX86_DIR0); |
| 45 | *dir1 = getCx86(CX86_DIR1); | 43 | *dir1 = getCx86(CX86_DIR1); |
| 46 | } | 44 | } |
| 47 | local_irq_restore(flags); | ||
| 48 | } | 45 | } |
| 49 | 46 | ||
| 47 | static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | ||
| 48 | { | ||
| 49 | unsigned long flags; | ||
| 50 | |||
| 51 | local_irq_save(flags); | ||
| 52 | __do_cyrix_devid(dir0, dir1); | ||
| 53 | local_irq_restore(flags); | ||
| 54 | } | ||
| 50 | /* | 55 | /* |
| 51 | * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in | 56 | * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in |
| 52 | * order to identify the Cyrix CPU model after we're out of setup.c | 57 | * order to identify the Cyrix CPU model after we're out of setup.c |
| @@ -116,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void) | |||
| 116 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 121 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
| 117 | 122 | ||
| 118 | /* Load/Store Serialize to mem access disable (=reorder it) */ | 123 | /* Load/Store Serialize to mem access disable (=reorder it) */ |
| 119 | setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); | 124 | setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); |
| 120 | /* set load/store serialize from 1GB to 4GB */ | 125 | /* set load/store serialize from 1GB to 4GB */ |
| 121 | ccr3 |= 0xe0; | 126 | ccr3 |= 0xe0; |
| 122 | setCx86(CX86_CCR3, ccr3); | 127 | setCx86(CX86_CCR3, ccr3); |
| @@ -127,28 +132,11 @@ static void __cpuinit set_cx86_memwb(void) | |||
| 127 | printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); | 132 | printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); |
| 128 | 133 | ||
| 129 | /* CCR2 bit 2: unlock NW bit */ | 134 | /* CCR2 bit 2: unlock NW bit */ |
| 130 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); | 135 | setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); |
| 131 | /* set 'Not Write-through' */ | 136 | /* set 'Not Write-through' */ |
| 132 | write_cr0(read_cr0() | X86_CR0_NW); | 137 | write_cr0(read_cr0() | X86_CR0_NW); |
| 133 | /* CCR2 bit 2: lock NW bit and set WT1 */ | 138 | /* CCR2 bit 2: lock NW bit and set WT1 */ |
| 134 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); | 139 | setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); |
| 135 | } | ||
| 136 | |||
| 137 | static void __cpuinit set_cx86_inc(void) | ||
| 138 | { | ||
| 139 | unsigned char ccr3; | ||
| 140 | |||
| 141 | printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); | ||
| 142 | |||
| 143 | ccr3 = getCx86(CX86_CCR3); | ||
| 144 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
| 145 | /* PCR1 -- Performance Control */ | ||
| 146 | /* Incrementor on, whatever that is */ | ||
| 147 | setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); | ||
| 148 | /* PCR0 -- Performance Control */ | ||
| 149 | /* Incrementor Margin 10 */ | ||
| 150 | setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); | ||
| 151 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
| 152 | } | 140 | } |
| 153 | 141 | ||
| 154 | /* | 142 | /* |
| @@ -162,23 +150,40 @@ static void __cpuinit geode_configure(void) | |||
| 162 | local_irq_save(flags); | 150 | local_irq_save(flags); |
| 163 | 151 | ||
| 164 | /* Suspend on halt power saving and enable #SUSP pin */ | 152 | /* Suspend on halt power saving and enable #SUSP pin */ |
| 165 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); | 153 | setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); |
| 166 | 154 | ||
| 167 | ccr3 = getCx86(CX86_CCR3); | 155 | ccr3 = getCx86(CX86_CCR3); |
| 168 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 156 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
| 169 | 157 | ||
| 170 | 158 | ||
| 171 | /* FPU fast, DTE cache, Mem bypass */ | 159 | /* FPU fast, DTE cache, Mem bypass */ |
| 172 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); | 160 | setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); |
| 173 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | 161 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ |
| 174 | 162 | ||
| 175 | set_cx86_memwb(); | 163 | set_cx86_memwb(); |
| 176 | set_cx86_reorder(); | 164 | set_cx86_reorder(); |
| 177 | set_cx86_inc(); | ||
| 178 | 165 | ||
| 179 | local_irq_restore(flags); | 166 | local_irq_restore(flags); |
| 180 | } | 167 | } |
| 181 | 168 | ||
| 169 | static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) | ||
| 170 | { | ||
| 171 | unsigned char dir0, dir0_msn, dir1 = 0; | ||
| 172 | |||
| 173 | __do_cyrix_devid(&dir0, &dir1); | ||
| 174 | dir0_msn = dir0 >> 4; /* identifies CPU "family" */ | ||
| 175 | |||
| 176 | switch (dir0_msn) { | ||
| 177 | case 3: /* 6x86/6x86L */ | ||
| 178 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
| 179 | set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); | ||
| 180 | break; | ||
| 181 | case 5: /* 6x86MX/M II */ | ||
| 182 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
| 183 | set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); | ||
| 184 | break; | ||
| 185 | } | ||
| 186 | } | ||
| 182 | 187 | ||
| 183 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | 188 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) |
| 184 | { | 189 | { |
| @@ -286,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
| 286 | /* GXm supports extended cpuid levels 'ala' AMD */ | 291 | /* GXm supports extended cpuid levels 'ala' AMD */ |
| 287 | if (c->cpuid_level == 2) { | 292 | if (c->cpuid_level == 2) { |
| 288 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ | 293 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ |
| 289 | setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); | 294 | setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); |
| 290 | 295 | ||
| 291 | /* | 296 | /* |
| 292 | * GXm : 0x30 ... 0x5f GXm datasheet 51 | 297 | * GXm : 0x30 ... 0x5f GXm datasheet 51 |
| @@ -296,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
| 296 | */ | 301 | */ |
| 297 | if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) | 302 | if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) |
| 298 | geode_configure(); | 303 | geode_configure(); |
| 299 | get_model_name(c); /* get CPU marketing name */ | ||
| 300 | return; | 304 | return; |
| 301 | } else { /* MediaGX */ | 305 | } else { /* MediaGX */ |
| 302 | Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; | 306 | Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; |
| @@ -309,7 +313,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
| 309 | if (dir1 > 7) { | 313 | if (dir1 > 7) { |
| 310 | dir0_msn++; /* M II */ | 314 | dir0_msn++; /* M II */ |
| 311 | /* Enable MMX extensions (App note 108) */ | 315 | /* Enable MMX extensions (App note 108) */ |
| 312 | setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); | 316 | setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); |
| 313 | } else { | 317 | } else { |
| 314 | c->coma_bug = 1; /* 6x86MX, it has the bug. */ | 318 | c->coma_bug = 1; /* 6x86MX, it has the bug. */ |
| 315 | } | 319 | } |
| @@ -424,7 +428,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) | |||
| 424 | local_irq_save(flags); | 428 | local_irq_save(flags); |
| 425 | ccr3 = getCx86(CX86_CCR3); | 429 | ccr3 = getCx86(CX86_CCR3); |
| 426 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 430 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
| 427 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ | 431 | setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ |
| 428 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | 432 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ |
| 429 | local_irq_restore(flags); | 433 | local_irq_restore(flags); |
| 430 | } | 434 | } |
| @@ -434,16 +438,19 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) | |||
| 434 | static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { | 438 | static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { |
| 435 | .c_vendor = "Cyrix", | 439 | .c_vendor = "Cyrix", |
| 436 | .c_ident = { "CyrixInstead" }, | 440 | .c_ident = { "CyrixInstead" }, |
| 441 | .c_early_init = early_init_cyrix, | ||
| 437 | .c_init = init_cyrix, | 442 | .c_init = init_cyrix, |
| 438 | .c_identify = cyrix_identify, | 443 | .c_identify = cyrix_identify, |
| 444 | .c_x86_vendor = X86_VENDOR_CYRIX, | ||
| 439 | }; | 445 | }; |
| 440 | 446 | ||
| 441 | cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); | 447 | cpu_dev_register(cyrix_cpu_dev); |
| 442 | 448 | ||
| 443 | static struct cpu_dev nsc_cpu_dev __cpuinitdata = { | 449 | static struct cpu_dev nsc_cpu_dev __cpuinitdata = { |
| 444 | .c_vendor = "NSC", | 450 | .c_vendor = "NSC", |
| 445 | .c_ident = { "Geode by NSC" }, | 451 | .c_ident = { "Geode by NSC" }, |
| 446 | .c_init = init_nsc, | 452 | .c_init = init_nsc, |
| 453 | .c_x86_vendor = X86_VENDOR_NSC, | ||
| 447 | }; | 454 | }; |
| 448 | 455 | ||
| 449 | cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); | 456 | cpu_dev_register(nsc_cpu_dev); |
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c deleted file mode 100644 index e43ad4ad4cba..000000000000 --- a/arch/x86/kernel/cpu/feature_names.c +++ /dev/null | |||
| @@ -1,83 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Strings for the various x86 capability flags. | ||
| 3 | * | ||
| 4 | * This file must not contain any executable code. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <asm/cpufeature.h> | ||
| 8 | |||
| 9 | /* | ||
| 10 | * These flag bits must match the definitions in <asm/cpufeature.h>. | ||
| 11 | * NULL means this bit is undefined or reserved; either way it doesn't | ||
| 12 | * have meaning as far as Linux is concerned. Note that it's important | ||
| 13 | * to realize there is a difference between this table and CPUID -- if | ||
| 14 | * applications want to get the raw CPUID data, they should access | ||
| 15 | * /dev/cpu/<cpu_nr>/cpuid instead. | ||
| 16 | */ | ||
| 17 | const char * const x86_cap_flags[NCAPINTS*32] = { | ||
| 18 | /* Intel-defined */ | ||
| 19 | "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", | ||
| 20 | "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", | ||
| 21 | "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", | ||
| 22 | "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", | ||
| 23 | |||
| 24 | /* AMD-defined */ | ||
| 25 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 26 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | ||
| 27 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, | ||
| 28 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", | ||
| 29 | "3dnowext", "3dnow", | ||
| 30 | |||
| 31 | /* Transmeta-defined */ | ||
| 32 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | ||
| 33 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 34 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 35 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 36 | |||
| 37 | /* Other (Linux-defined) */ | ||
| 38 | "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", | ||
| 39 | NULL, NULL, NULL, NULL, | ||
| 40 | "constant_tsc", "up", NULL, "arch_perfmon", | ||
| 41 | "pebs", "bts", NULL, NULL, | ||
| 42 | "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 43 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 44 | |||
| 45 | /* Intel-defined (#2) */ | ||
| 46 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | ||
| 47 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | ||
| 48 | NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", | ||
| 49 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 50 | |||
| 51 | /* VIA/Cyrix/Centaur-defined */ | ||
| 52 | NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", | ||
| 53 | "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, | ||
| 54 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 55 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 56 | |||
| 57 | /* AMD-defined (#2) */ | ||
| 58 | "lahf_lm", "cmp_legacy", "svm", "extapic", | ||
| 59 | "cr8_legacy", "abm", "sse4a", "misalignsse", | ||
| 60 | "3dnowprefetch", "osvw", "ibs", "sse5", | ||
| 61 | "skinit", "wdt", NULL, NULL, | ||
| 62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 64 | |||
| 65 | /* Auxiliary (Linux-defined) */ | ||
| 66 | "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 67 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 68 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 69 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
| 70 | }; | ||
| 71 | |||
| 72 | const char *const x86_power_flags[32] = { | ||
| 73 | "ts", /* temperature sensor */ | ||
| 74 | "fid", /* frequency id control */ | ||
| 75 | "vid", /* voltage id control */ | ||
| 76 | "ttp", /* thermal trip */ | ||
| 77 | "tm", | ||
| 78 | "stc", | ||
| 79 | "100mhzsteps", | ||
| 80 | "hwpstate", | ||
| 81 | "", /* tsc invariant mapped to constant_tsc */ | ||
| 82 | /* nothing */ | ||
| 83 | }; | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 70609efdf1da..cce0b6118d55 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -15,6 +15,11 @@ | |||
| 15 | #include <asm/ds.h> | 15 | #include <asm/ds.h> |
| 16 | #include <asm/bugs.h> | 16 | #include <asm/bugs.h> |
| 17 | 17 | ||
| 18 | #ifdef CONFIG_X86_64 | ||
| 19 | #include <asm/topology.h> | ||
| 20 | #include <asm/numa_64.h> | ||
| 21 | #endif | ||
| 22 | |||
| 18 | #include "cpu.h" | 23 | #include "cpu.h" |
| 19 | 24 | ||
| 20 | #ifdef CONFIG_X86_LOCAL_APIC | 25 | #ifdef CONFIG_X86_LOCAL_APIC |
| @@ -23,23 +28,22 @@ | |||
| 23 | #include <mach_apic.h> | 28 | #include <mach_apic.h> |
| 24 | #endif | 29 | #endif |
| 25 | 30 | ||
| 26 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
| 27 | /* | ||
| 28 | * Alignment at which movsl is preferred for bulk memory copies. | ||
| 29 | */ | ||
| 30 | struct movsl_mask movsl_mask __read_mostly; | ||
| 31 | #endif | ||
| 32 | |||
| 33 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | 31 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) |
| 34 | { | 32 | { |
| 35 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ | ||
| 36 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | ||
| 37 | c->x86_cache_alignment = 128; | ||
| 38 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 33 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
| 39 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 34 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
| 40 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 35 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
| 36 | |||
| 37 | #ifdef CONFIG_X86_64 | ||
| 38 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | ||
| 39 | #else | ||
| 40 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ | ||
| 41 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | ||
| 42 | c->x86_cache_alignment = 128; | ||
| 43 | #endif | ||
| 41 | } | 44 | } |
| 42 | 45 | ||
| 46 | #ifdef CONFIG_X86_32 | ||
| 43 | /* | 47 | /* |
| 44 | * Early probe support logic for ppro memory erratum #50 | 48 | * Early probe support logic for ppro memory erratum #50 |
| 45 | * | 49 | * |
| @@ -59,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void) | |||
| 59 | return 0; | 63 | return 0; |
| 60 | } | 64 | } |
| 61 | 65 | ||
| 66 | #ifdef CONFIG_X86_F00F_BUG | ||
| 67 | static void __cpuinit trap_init_f00f_bug(void) | ||
| 68 | { | ||
| 69 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | ||
| 62 | 70 | ||
| 63 | /* | 71 | /* |
| 64 | * P4 Xeon errata 037 workaround. | 72 | * Update the IDT descriptor and reload the IDT so that |
| 65 | * Hardware prefetcher may cause stale data to be loaded into the cache. | 73 | * it uses the read-only mapped virtual address. |
| 66 | */ | 74 | */ |
| 67 | static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) | 75 | idt_descr.address = fix_to_virt(FIX_F00F_IDT); |
| 76 | load_idt(&idt_descr); | ||
| 77 | } | ||
| 78 | #endif | ||
| 79 | |||
| 80 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | ||
| 68 | { | 81 | { |
| 69 | unsigned long lo, hi; | 82 | unsigned long lo, hi; |
| 70 | 83 | ||
| 84 | #ifdef CONFIG_X86_F00F_BUG | ||
| 85 | /* | ||
| 86 | * All current models of Pentium and Pentium with MMX technology CPUs | ||
| 87 | * have the F0 0F bug, which lets nonprivileged users lock up the system. | ||
| 88 | * Note that the workaround only should be initialized once... | ||
| 89 | */ | ||
| 90 | c->f00f_bug = 0; | ||
| 91 | if (!paravirt_enabled() && c->x86 == 5) { | ||
| 92 | static int f00f_workaround_enabled; | ||
| 93 | |||
| 94 | c->f00f_bug = 1; | ||
| 95 | if (!f00f_workaround_enabled) { | ||
| 96 | trap_init_f00f_bug(); | ||
| 97 | printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); | ||
| 98 | f00f_workaround_enabled = 1; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | #endif | ||
| 102 | |||
| 103 | /* | ||
| 104 | * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until | ||
| 105 | * model 3 mask 3 | ||
| 106 | */ | ||
| 107 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) | ||
| 108 | clear_cpu_cap(c, X86_FEATURE_SEP); | ||
| 109 | |||
| 110 | /* | ||
| 111 | * P4 Xeon errata 037 workaround. | ||
| 112 | * Hardware prefetcher may cause stale data to be loaded into the cache. | ||
| 113 | */ | ||
| 71 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { | 114 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { |
| 72 | rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); | 115 | rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); |
| 73 | if ((lo & (1<<9)) == 0) { | 116 | if ((lo & (1<<9)) == 0) { |
| @@ -77,13 +120,68 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) | |||
| 77 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); | 120 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); |
| 78 | } | 121 | } |
| 79 | } | 122 | } |
| 123 | |||
| 124 | /* | ||
| 125 | * See if we have a good local APIC by checking for buggy Pentia, | ||
| 126 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
| 127 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
| 128 | * Specification Update"). | ||
| 129 | */ | ||
| 130 | if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && | ||
| 131 | (c->x86_mask < 0x6 || c->x86_mask == 0xb)) | ||
| 132 | set_cpu_cap(c, X86_FEATURE_11AP); | ||
| 133 | |||
| 134 | |||
| 135 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
| 136 | /* | ||
| 137 | * Set up the preferred alignment for movsl bulk memory moves | ||
| 138 | */ | ||
| 139 | switch (c->x86) { | ||
| 140 | case 4: /* 486: untested */ | ||
| 141 | break; | ||
| 142 | case 5: /* Old Pentia: untested */ | ||
| 143 | break; | ||
| 144 | case 6: /* PII/PIII only like movsl with 8-byte alignment */ | ||
| 145 | movsl_mask.mask = 7; | ||
| 146 | break; | ||
| 147 | case 15: /* P4 is OK down to 8-byte alignment */ | ||
| 148 | movsl_mask.mask = 7; | ||
| 149 | break; | ||
| 150 | } | ||
| 151 | #endif | ||
| 152 | |||
| 153 | #ifdef CONFIG_X86_NUMAQ | ||
| 154 | numaq_tsc_disable(); | ||
| 155 | #endif | ||
| 80 | } | 156 | } |
| 157 | #else | ||
| 158 | static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | ||
| 159 | { | ||
| 160 | } | ||
| 161 | #endif | ||
| 81 | 162 | ||
| 163 | static void __cpuinit srat_detect_node(void) | ||
| 164 | { | ||
| 165 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
| 166 | unsigned node; | ||
| 167 | int cpu = smp_processor_id(); | ||
| 168 | int apicid = hard_smp_processor_id(); | ||
| 169 | |||
| 170 | /* Don't do the funky fallback heuristics the AMD version employs | ||
| 171 | for now. */ | ||
| 172 | node = apicid_to_node[apicid]; | ||
| 173 | if (node == NUMA_NO_NODE || !node_online(node)) | ||
| 174 | node = first_node(node_online_map); | ||
| 175 | numa_set_node(cpu, node); | ||
| 176 | |||
| 177 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | ||
| 178 | #endif | ||
| 179 | } | ||
| 82 | 180 | ||
| 83 | /* | 181 | /* |
| 84 | * find out the number of processor cores on the die | 182 | * find out the number of processor cores on the die |
| 85 | */ | 183 | */ |
| 86 | static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) | 184 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) |
| 87 | { | 185 | { |
| 88 | unsigned int eax, ebx, ecx, edx; | 186 | unsigned int eax, ebx, ecx, edx; |
| 89 | 187 | ||
| @@ -98,45 +196,51 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) | |||
| 98 | return 1; | 196 | return 1; |
| 99 | } | 197 | } |
| 100 | 198 | ||
| 101 | #ifdef CONFIG_X86_F00F_BUG | 199 | static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) |
| 102 | static void __cpuinit trap_init_f00f_bug(void) | ||
| 103 | { | 200 | { |
| 104 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | 201 | /* Intel VMX MSR indicated features */ |
| 105 | 202 | #define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 | |
| 106 | /* | 203 | #define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 |
| 107 | * Update the IDT descriptor and reload the IDT so that | 204 | #define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 |
| 108 | * it uses the read-only mapped virtual address. | 205 | #define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 |
| 109 | */ | 206 | #define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 |
| 110 | idt_descr.address = fix_to_virt(FIX_F00F_IDT); | 207 | #define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 |
| 111 | load_idt(&idt_descr); | 208 | |
| 209 | u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; | ||
| 210 | |||
| 211 | clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); | ||
| 212 | clear_cpu_cap(c, X86_FEATURE_VNMI); | ||
| 213 | clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); | ||
| 214 | clear_cpu_cap(c, X86_FEATURE_EPT); | ||
| 215 | clear_cpu_cap(c, X86_FEATURE_VPID); | ||
| 216 | |||
| 217 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); | ||
| 218 | msr_ctl = vmx_msr_high | vmx_msr_low; | ||
| 219 | if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) | ||
| 220 | set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); | ||
| 221 | if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) | ||
| 222 | set_cpu_cap(c, X86_FEATURE_VNMI); | ||
| 223 | if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { | ||
| 224 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | ||
| 225 | vmx_msr_low, vmx_msr_high); | ||
| 226 | msr_ctl2 = vmx_msr_high | vmx_msr_low; | ||
| 227 | if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && | ||
| 228 | (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) | ||
| 229 | set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); | ||
| 230 | if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) | ||
| 231 | set_cpu_cap(c, X86_FEATURE_EPT); | ||
| 232 | if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) | ||
| 233 | set_cpu_cap(c, X86_FEATURE_VPID); | ||
| 234 | } | ||
| 112 | } | 235 | } |
| 113 | #endif | ||
| 114 | 236 | ||
| 115 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | 237 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) |
| 116 | { | 238 | { |
| 117 | unsigned int l2 = 0; | 239 | unsigned int l2 = 0; |
| 118 | char *p = NULL; | ||
| 119 | 240 | ||
| 120 | early_init_intel(c); | 241 | early_init_intel(c); |
| 121 | 242 | ||
| 122 | #ifdef CONFIG_X86_F00F_BUG | 243 | intel_workarounds(c); |
| 123 | /* | ||
| 124 | * All current models of Pentium and Pentium with MMX technology CPUs | ||
| 125 | * have the F0 0F bug, which lets nonprivileged users lock up the system. | ||
| 126 | * Note that the workaround only should be initialized once... | ||
| 127 | */ | ||
| 128 | c->f00f_bug = 0; | ||
| 129 | if (!paravirt_enabled() && c->x86 == 5) { | ||
| 130 | static int f00f_workaround_enabled; | ||
| 131 | |||
| 132 | c->f00f_bug = 1; | ||
| 133 | if (!f00f_workaround_enabled) { | ||
| 134 | trap_init_f00f_bug(); | ||
| 135 | printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); | ||
| 136 | f00f_workaround_enabled = 1; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | #endif | ||
| 140 | 244 | ||
| 141 | l2 = init_intel_cacheinfo(c); | 245 | l2 = init_intel_cacheinfo(c); |
| 142 | if (c->cpuid_level > 9) { | 246 | if (c->cpuid_level > 9) { |
| @@ -146,16 +250,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 146 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 250 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
| 147 | } | 251 | } |
| 148 | 252 | ||
| 149 | /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ | 253 | if (cpu_has_xmm2) |
| 150 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) | 254 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
| 151 | clear_cpu_cap(c, X86_FEATURE_SEP); | 255 | if (cpu_has_ds) { |
| 256 | unsigned int l1; | ||
| 257 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
| 258 | if (!(l1 & (1<<11))) | ||
| 259 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
| 260 | if (!(l1 & (1<<12))) | ||
| 261 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
| 262 | ds_init_intel(c); | ||
| 263 | } | ||
| 152 | 264 | ||
| 265 | #ifdef CONFIG_X86_64 | ||
| 266 | if (c->x86 == 15) | ||
| 267 | c->x86_cache_alignment = c->x86_clflush_size * 2; | ||
| 268 | if (c->x86 == 6) | ||
| 269 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 270 | #else | ||
| 153 | /* | 271 | /* |
| 154 | * Names for the Pentium II/Celeron processors | 272 | * Names for the Pentium II/Celeron processors |
| 155 | * detectable only by also checking the cache size. | 273 | * detectable only by also checking the cache size. |
| 156 | * Dixon is NOT a Celeron. | 274 | * Dixon is NOT a Celeron. |
| 157 | */ | 275 | */ |
| 158 | if (c->x86 == 6) { | 276 | if (c->x86 == 6) { |
| 277 | char *p = NULL; | ||
| 278 | |||
| 159 | switch (c->x86_model) { | 279 | switch (c->x86_model) { |
| 160 | case 5: | 280 | case 5: |
| 161 | if (c->x86_mask == 0) { | 281 | if (c->x86_mask == 0) { |
| @@ -178,60 +298,41 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 178 | p = "Celeron (Coppermine)"; | 298 | p = "Celeron (Coppermine)"; |
| 179 | break; | 299 | break; |
| 180 | } | 300 | } |
| 181 | } | ||
| 182 | 301 | ||
| 183 | if (p) | 302 | if (p) |
| 184 | strcpy(c->x86_model_id, p); | 303 | strcpy(c->x86_model_id, p); |
| 185 | |||
| 186 | c->x86_max_cores = num_cpu_cores(c); | ||
| 187 | |||
| 188 | detect_ht(c); | ||
| 189 | |||
| 190 | /* Work around errata */ | ||
| 191 | Intel_errata_workarounds(c); | ||
| 192 | |||
| 193 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
| 194 | /* | ||
| 195 | * Set up the preferred alignment for movsl bulk memory moves | ||
| 196 | */ | ||
| 197 | switch (c->x86) { | ||
| 198 | case 4: /* 486: untested */ | ||
| 199 | break; | ||
| 200 | case 5: /* Old Pentia: untested */ | ||
| 201 | break; | ||
| 202 | case 6: /* PII/PIII only like movsl with 8-byte alignment */ | ||
| 203 | movsl_mask.mask = 7; | ||
| 204 | break; | ||
| 205 | case 15: /* P4 is OK down to 8-byte alignment */ | ||
| 206 | movsl_mask.mask = 7; | ||
| 207 | break; | ||
| 208 | } | 304 | } |
| 209 | #endif | ||
| 210 | 305 | ||
| 211 | if (cpu_has_xmm2) | 306 | if (c->x86 == 15) |
| 212 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
| 213 | if (c->x86 == 15) { | ||
| 214 | set_cpu_cap(c, X86_FEATURE_P4); | 307 | set_cpu_cap(c, X86_FEATURE_P4); |
| 215 | } | ||
| 216 | if (c->x86 == 6) | 308 | if (c->x86 == 6) |
| 217 | set_cpu_cap(c, X86_FEATURE_P3); | 309 | set_cpu_cap(c, X86_FEATURE_P3); |
| 218 | if (cpu_has_ds) { | ||
| 219 | unsigned int l1; | ||
| 220 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
| 221 | if (!(l1 & (1<<11))) | ||
| 222 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
| 223 | if (!(l1 & (1<<12))) | ||
| 224 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
| 225 | } | ||
| 226 | 310 | ||
| 227 | if (cpu_has_bts) | 311 | if (cpu_has_bts) |
| 228 | ds_init_intel(c); | 312 | ptrace_bts_init_intel(c); |
| 229 | 313 | ||
| 230 | #ifdef CONFIG_X86_NUMAQ | ||
| 231 | numaq_tsc_disable(); | ||
| 232 | #endif | 314 | #endif |
| 315 | |||
| 316 | detect_extended_topology(c); | ||
| 317 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | ||
| 318 | /* | ||
| 319 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | ||
| 320 | * detection. | ||
| 321 | */ | ||
| 322 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
| 323 | #ifdef CONFIG_X86_32 | ||
| 324 | detect_ht(c); | ||
| 325 | #endif | ||
| 326 | } | ||
| 327 | |||
| 328 | /* Work around errata */ | ||
| 329 | srat_detect_node(); | ||
| 330 | |||
| 331 | if (cpu_has(c, X86_FEATURE_VMX)) | ||
| 332 | detect_vmx_virtcap(c); | ||
| 233 | } | 333 | } |
| 234 | 334 | ||
| 335 | #ifdef CONFIG_X86_32 | ||
| 235 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) | 336 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) |
| 236 | { | 337 | { |
| 237 | /* | 338 | /* |
| @@ -244,10 +345,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i | |||
| 244 | size = 256; | 345 | size = 256; |
| 245 | return size; | 346 | return size; |
| 246 | } | 347 | } |
| 348 | #endif | ||
| 247 | 349 | ||
| 248 | static struct cpu_dev intel_cpu_dev __cpuinitdata = { | 350 | static struct cpu_dev intel_cpu_dev __cpuinitdata = { |
| 249 | .c_vendor = "Intel", | 351 | .c_vendor = "Intel", |
| 250 | .c_ident = { "GenuineIntel" }, | 352 | .c_ident = { "GenuineIntel" }, |
| 353 | #ifdef CONFIG_X86_32 | ||
| 251 | .c_models = { | 354 | .c_models = { |
| 252 | { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = | 355 | { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = |
| 253 | { | 356 | { |
| @@ -297,76 +400,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { | |||
| 297 | } | 400 | } |
| 298 | }, | 401 | }, |
| 299 | }, | 402 | }, |
| 403 | .c_size_cache = intel_size_cache, | ||
| 404 | #endif | ||
| 300 | .c_early_init = early_init_intel, | 405 | .c_early_init = early_init_intel, |
| 301 | .c_init = init_intel, | 406 | .c_init = init_intel, |
| 302 | .c_size_cache = intel_size_cache, | 407 | .c_x86_vendor = X86_VENDOR_INTEL, |
| 303 | }; | 408 | }; |
| 304 | 409 | ||
| 305 | cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); | 410 | cpu_dev_register(intel_cpu_dev); |
| 306 | |||
| 307 | #ifndef CONFIG_X86_CMPXCHG | ||
| 308 | unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) | ||
| 309 | { | ||
| 310 | u8 prev; | ||
| 311 | unsigned long flags; | ||
| 312 | |||
| 313 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
| 314 | local_irq_save(flags); | ||
| 315 | prev = *(u8 *)ptr; | ||
| 316 | if (prev == old) | ||
| 317 | *(u8 *)ptr = new; | ||
| 318 | local_irq_restore(flags); | ||
| 319 | return prev; | ||
| 320 | } | ||
| 321 | EXPORT_SYMBOL(cmpxchg_386_u8); | ||
| 322 | |||
| 323 | unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) | ||
| 324 | { | ||
| 325 | u16 prev; | ||
| 326 | unsigned long flags; | ||
| 327 | |||
| 328 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
| 329 | local_irq_save(flags); | ||
| 330 | prev = *(u16 *)ptr; | ||
| 331 | if (prev == old) | ||
| 332 | *(u16 *)ptr = new; | ||
| 333 | local_irq_restore(flags); | ||
| 334 | return prev; | ||
| 335 | } | ||
| 336 | EXPORT_SYMBOL(cmpxchg_386_u16); | ||
| 337 | |||
| 338 | unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) | ||
| 339 | { | ||
| 340 | u32 prev; | ||
| 341 | unsigned long flags; | ||
| 342 | |||
| 343 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
| 344 | local_irq_save(flags); | ||
| 345 | prev = *(u32 *)ptr; | ||
| 346 | if (prev == old) | ||
| 347 | *(u32 *)ptr = new; | ||
| 348 | local_irq_restore(flags); | ||
| 349 | return prev; | ||
| 350 | } | ||
| 351 | EXPORT_SYMBOL(cmpxchg_386_u32); | ||
| 352 | #endif | ||
| 353 | |||
| 354 | #ifndef CONFIG_X86_CMPXCHG64 | ||
| 355 | unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) | ||
| 356 | { | ||
| 357 | u64 prev; | ||
| 358 | unsigned long flags; | ||
| 359 | |||
| 360 | /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ | ||
| 361 | local_irq_save(flags); | ||
| 362 | prev = *(u64 *)ptr; | ||
| 363 | if (prev == old) | ||
| 364 | *(u64 *)ptr = new; | ||
| 365 | local_irq_restore(flags); | ||
| 366 | return prev; | ||
| 367 | } | ||
| 368 | EXPORT_SYMBOL(cmpxchg_486_u64); | ||
| 369 | #endif | ||
| 370 | |||
| 371 | /* arch_initcall(intel_cpu_init); */ | ||
| 372 | 411 | ||
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c deleted file mode 100644 index 1019c58d39f0..000000000000 --- a/arch/x86/kernel/cpu/intel_64.c +++ /dev/null | |||
| @@ -1,95 +0,0 @@ | |||
| 1 | #include <linux/init.h> | ||
| 2 | #include <linux/smp.h> | ||
| 3 | #include <asm/processor.h> | ||
| 4 | #include <asm/ptrace.h> | ||
| 5 | #include <asm/topology.h> | ||
| 6 | #include <asm/numa_64.h> | ||
| 7 | |||
| 8 | #include "cpu.h" | ||
| 9 | |||
| 10 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | ||
| 11 | { | ||
| 12 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | ||
| 13 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
| 14 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
| 15 | |||
| 16 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | ||
| 17 | } | ||
| 18 | |||
| 19 | /* | ||
| 20 | * find out the number of processor cores on the die | ||
| 21 | */ | ||
| 22 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | ||
| 23 | { | ||
| 24 | unsigned int eax, t; | ||
| 25 | |||
| 26 | if (c->cpuid_level < 4) | ||
| 27 | return 1; | ||
| 28 | |||
| 29 | cpuid_count(4, 0, &eax, &t, &t, &t); | ||
| 30 | |||
| 31 | if (eax & 0x1f) | ||
| 32 | return ((eax >> 26) + 1); | ||
| 33 | else | ||
| 34 | return 1; | ||
| 35 | } | ||
| 36 | |||
| 37 | static void __cpuinit srat_detect_node(void) | ||
| 38 | { | ||
| 39 | #ifdef CONFIG_NUMA | ||
| 40 | unsigned node; | ||
| 41 | int cpu = smp_processor_id(); | ||
| 42 | int apicid = hard_smp_processor_id(); | ||
| 43 | |||
| 44 | /* Don't do the funky fallback heuristics the AMD version employs | ||
| 45 | for now. */ | ||
| 46 | node = apicid_to_node[apicid]; | ||
| 47 | if (node == NUMA_NO_NODE || !node_online(node)) | ||
| 48 | node = first_node(node_online_map); | ||
| 49 | numa_set_node(cpu, node); | ||
| 50 | |||
| 51 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
| 52 | #endif | ||
| 53 | } | ||
| 54 | |||
| 55 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | ||
| 56 | { | ||
| 57 | init_intel_cacheinfo(c); | ||
| 58 | if (c->cpuid_level > 9) { | ||
| 59 | unsigned eax = cpuid_eax(10); | ||
| 60 | /* Check for version and the number of counters */ | ||
| 61 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | ||
| 62 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | ||
| 63 | } | ||
| 64 | |||
| 65 | if (cpu_has_ds) { | ||
| 66 | unsigned int l1, l2; | ||
| 67 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
| 68 | if (!(l1 & (1<<11))) | ||
| 69 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
| 70 | if (!(l1 & (1<<12))) | ||
| 71 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
| 72 | } | ||
| 73 | |||
| 74 | |||
| 75 | if (cpu_has_bts) | ||
| 76 | ds_init_intel(c); | ||
| 77 | |||
| 78 | if (c->x86 == 15) | ||
| 79 | c->x86_cache_alignment = c->x86_clflush_size * 2; | ||
| 80 | if (c->x86 == 6) | ||
| 81 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
| 82 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
| 83 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
| 84 | |||
| 85 | srat_detect_node(); | ||
| 86 | } | ||
| 87 | |||
| 88 | static struct cpu_dev intel_cpu_dev __cpuinitdata = { | ||
| 89 | .c_vendor = "Intel", | ||
| 90 | .c_ident = { "GenuineIntel" }, | ||
| 91 | .c_early_init = early_init_intel, | ||
| 92 | .c_init = init_intel, | ||
| 93 | }; | ||
| 94 | cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); | ||
| 95 | |||
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 2c8afafa18e8..3f46afbb1cf1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Routines to indentify caches on Intel CPU. | 2 | * Routines to indentify caches on Intel CPU. |
| 3 | * | 3 | * |
| 4 | * Changes: | 4 | * Changes: |
| 5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) | 5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) |
| 6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. | 6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. |
| 7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. | 7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. |
| 8 | */ | 8 | */ |
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/compiler.h> | 13 | #include <linux/compiler.h> |
| 14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
| 15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
| 16 | #include <linux/pci.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
| 18 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
| @@ -130,9 +131,18 @@ struct _cpuid4_info { | |||
| 130 | union _cpuid4_leaf_ebx ebx; | 131 | union _cpuid4_leaf_ebx ebx; |
| 131 | union _cpuid4_leaf_ecx ecx; | 132 | union _cpuid4_leaf_ecx ecx; |
| 132 | unsigned long size; | 133 | unsigned long size; |
| 134 | unsigned long can_disable; | ||
| 133 | cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ | 135 | cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ |
| 134 | }; | 136 | }; |
| 135 | 137 | ||
| 138 | #ifdef CONFIG_PCI | ||
| 139 | static struct pci_device_id k8_nb_id[] = { | ||
| 140 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | ||
| 141 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | ||
| 142 | {} | ||
| 143 | }; | ||
| 144 | #endif | ||
| 145 | |||
| 136 | unsigned short num_cache_leaves; | 146 | unsigned short num_cache_leaves; |
| 137 | 147 | ||
| 138 | /* AMD doesn't have CPUID4. Emulate it here to report the same | 148 | /* AMD doesn't have CPUID4. Emulate it here to report the same |
| @@ -182,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = { | |||
| 182 | static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; | 192 | static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; |
| 183 | static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; | 193 | static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; |
| 184 | 194 | ||
| 185 | static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | 195 | static void __cpuinit |
| 186 | union _cpuid4_leaf_ebx *ebx, | 196 | amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, |
| 187 | union _cpuid4_leaf_ecx *ecx) | 197 | union _cpuid4_leaf_ebx *ebx, |
| 198 | union _cpuid4_leaf_ecx *ecx) | ||
| 188 | { | 199 | { |
| 189 | unsigned dummy; | 200 | unsigned dummy; |
| 190 | unsigned line_size, lines_per_tag, assoc, size_in_kb; | 201 | unsigned line_size, lines_per_tag, assoc, size_in_kb; |
| @@ -251,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
| 251 | (ebx->split.ways_of_associativity + 1) - 1; | 262 | (ebx->split.ways_of_associativity + 1) - 1; |
| 252 | } | 263 | } |
| 253 | 264 | ||
| 254 | static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | 265 | static void __cpuinit |
| 266 | amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) | ||
| 267 | { | ||
| 268 | if (index < 3) | ||
| 269 | return; | ||
| 270 | this_leaf->can_disable = 1; | ||
| 271 | } | ||
| 272 | |||
| 273 | static int | ||
| 274 | __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | ||
| 255 | { | 275 | { |
| 256 | union _cpuid4_leaf_eax eax; | 276 | union _cpuid4_leaf_eax eax; |
| 257 | union _cpuid4_leaf_ebx ebx; | 277 | union _cpuid4_leaf_ebx ebx; |
| 258 | union _cpuid4_leaf_ecx ecx; | 278 | union _cpuid4_leaf_ecx ecx; |
| 259 | unsigned edx; | 279 | unsigned edx; |
| 260 | 280 | ||
| 261 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | 281 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
| 262 | amd_cpuid4(index, &eax, &ebx, &ecx); | 282 | amd_cpuid4(index, &eax, &ebx, &ecx); |
| 263 | else | 283 | if (boot_cpu_data.x86 >= 0x10) |
| 264 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 284 | amd_check_l3_disable(index, this_leaf); |
| 285 | } else { | ||
| 286 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | ||
| 287 | } | ||
| 288 | |||
| 265 | if (eax.split.type == CACHE_TYPE_NULL) | 289 | if (eax.split.type == CACHE_TYPE_NULL) |
| 266 | return -EIO; /* better error ? */ | 290 | return -EIO; /* better error ? */ |
| 267 | 291 | ||
| 268 | this_leaf->eax = eax; | 292 | this_leaf->eax = eax; |
| 269 | this_leaf->ebx = ebx; | 293 | this_leaf->ebx = ebx; |
| 270 | this_leaf->ecx = ecx; | 294 | this_leaf->ecx = ecx; |
| 271 | this_leaf->size = (ecx.split.number_of_sets + 1) * | 295 | this_leaf->size = (ecx.split.number_of_sets + 1) * |
| 272 | (ebx.split.coherency_line_size + 1) * | 296 | (ebx.split.coherency_line_size + 1) * |
| 273 | (ebx.split.physical_line_partition + 1) * | 297 | (ebx.split.physical_line_partition + 1) * |
| 274 | (ebx.split.ways_of_associativity + 1); | 298 | (ebx.split.ways_of_associativity + 1); |
| 275 | return 0; | 299 | return 0; |
| 276 | } | 300 | } |
| 277 | 301 | ||
| @@ -453,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
| 453 | 477 | ||
| 454 | /* pointer to _cpuid4_info array (for each cache leaf) */ | 478 | /* pointer to _cpuid4_info array (for each cache leaf) */ |
| 455 | static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); | 479 | static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); |
| 456 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) | 480 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) |
| 457 | 481 | ||
| 458 | #ifdef CONFIG_SMP | 482 | #ifdef CONFIG_SMP |
| 459 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | 483 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
| @@ -489,8 +513,8 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
| 489 | int sibling; | 513 | int sibling; |
| 490 | 514 | ||
| 491 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 515 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
| 492 | for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { | 516 | for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { |
| 493 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); | 517 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); |
| 494 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); | 518 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); |
| 495 | } | 519 | } |
| 496 | } | 520 | } |
| @@ -572,7 +596,7 @@ struct _index_kobject { | |||
| 572 | 596 | ||
| 573 | /* pointer to array of kobjects for cpuX/cache/indexY */ | 597 | /* pointer to array of kobjects for cpuX/cache/indexY */ |
| 574 | static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); | 598 | static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); |
| 575 | #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) | 599 | #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) |
| 576 | 600 | ||
| 577 | #define show_one_plus(file_name, object, val) \ | 601 | #define show_one_plus(file_name, object, val) \ |
| 578 | static ssize_t show_##file_name \ | 602 | static ssize_t show_##file_name \ |
| @@ -637,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { | |||
| 637 | } | 661 | } |
| 638 | } | 662 | } |
| 639 | 663 | ||
| 664 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | ||
| 665 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | ||
| 666 | |||
| 667 | #ifdef CONFIG_PCI | ||
| 668 | static struct pci_dev *get_k8_northbridge(int node) | ||
| 669 | { | ||
| 670 | struct pci_dev *dev = NULL; | ||
| 671 | int i; | ||
| 672 | |||
| 673 | for (i = 0; i <= node; i++) { | ||
| 674 | do { | ||
| 675 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); | ||
| 676 | if (!dev) | ||
| 677 | break; | ||
| 678 | } while (!pci_match_id(&k8_nb_id[0], dev)); | ||
| 679 | if (!dev) | ||
| 680 | break; | ||
| 681 | } | ||
| 682 | return dev; | ||
| 683 | } | ||
| 684 | #else | ||
| 685 | static struct pci_dev *get_k8_northbridge(int node) | ||
| 686 | { | ||
| 687 | return NULL; | ||
| 688 | } | ||
| 689 | #endif | ||
| 690 | |||
| 691 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) | ||
| 692 | { | ||
| 693 | int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); | ||
| 694 | struct pci_dev *dev = NULL; | ||
| 695 | ssize_t ret = 0; | ||
| 696 | int i; | ||
| 697 | |||
| 698 | if (!this_leaf->can_disable) | ||
| 699 | return sprintf(buf, "Feature not enabled\n"); | ||
| 700 | |||
| 701 | dev = get_k8_northbridge(node); | ||
| 702 | if (!dev) { | ||
| 703 | printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); | ||
| 704 | return -EINVAL; | ||
| 705 | } | ||
| 706 | |||
| 707 | for (i = 0; i < 2; i++) { | ||
| 708 | unsigned int reg; | ||
| 709 | |||
| 710 | pci_read_config_dword(dev, 0x1BC + i * 4, ®); | ||
| 711 | |||
| 712 | ret += sprintf(buf, "%sEntry: %d\n", buf, i); | ||
| 713 | ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", | ||
| 714 | buf, | ||
| 715 | reg & 0x80000000 ? "Disabled" : "Allowed", | ||
| 716 | reg & 0x40000000 ? "Disabled" : "Allowed"); | ||
| 717 | ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", | ||
| 718 | buf, (reg & 0x30000) >> 16, reg & 0xfff); | ||
| 719 | } | ||
| 720 | return ret; | ||
| 721 | } | ||
| 722 | |||
| 723 | static ssize_t | ||
| 724 | store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, | ||
| 725 | size_t count) | ||
| 726 | { | ||
| 727 | int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); | ||
| 728 | struct pci_dev *dev = NULL; | ||
| 729 | unsigned int ret, index, val; | ||
| 730 | |||
| 731 | if (!this_leaf->can_disable) | ||
| 732 | return 0; | ||
| 733 | |||
| 734 | if (strlen(buf) > 15) | ||
| 735 | return -EINVAL; | ||
| 736 | |||
| 737 | ret = sscanf(buf, "%x %x", &index, &val); | ||
| 738 | if (ret != 2) | ||
| 739 | return -EINVAL; | ||
| 740 | if (index > 1) | ||
| 741 | return -EINVAL; | ||
| 742 | |||
| 743 | val |= 0xc0000000; | ||
| 744 | dev = get_k8_northbridge(node); | ||
| 745 | if (!dev) { | ||
| 746 | printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); | ||
| 747 | return -EINVAL; | ||
| 748 | } | ||
| 749 | |||
| 750 | pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); | ||
| 751 | wbinvd(); | ||
| 752 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | ||
| 753 | |||
| 754 | return 1; | ||
| 755 | } | ||
| 756 | |||
| 640 | struct _cache_attr { | 757 | struct _cache_attr { |
| 641 | struct attribute attr; | 758 | struct attribute attr; |
| 642 | ssize_t (*show)(struct _cpuid4_info *, char *); | 759 | ssize_t (*show)(struct _cpuid4_info *, char *); |
| @@ -657,6 +774,8 @@ define_one_ro(size); | |||
| 657 | define_one_ro(shared_cpu_map); | 774 | define_one_ro(shared_cpu_map); |
| 658 | define_one_ro(shared_cpu_list); | 775 | define_one_ro(shared_cpu_list); |
| 659 | 776 | ||
| 777 | static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); | ||
| 778 | |||
| 660 | static struct attribute * default_attrs[] = { | 779 | static struct attribute * default_attrs[] = { |
| 661 | &type.attr, | 780 | &type.attr, |
| 662 | &level.attr, | 781 | &level.attr, |
| @@ -667,12 +786,10 @@ static struct attribute * default_attrs[] = { | |||
| 667 | &size.attr, | 786 | &size.attr, |
| 668 | &shared_cpu_map.attr, | 787 | &shared_cpu_map.attr, |
| 669 | &shared_cpu_list.attr, | 788 | &shared_cpu_list.attr, |
| 789 | &cache_disable.attr, | ||
| 670 | NULL | 790 | NULL |
| 671 | }; | 791 | }; |
| 672 | 792 | ||
| 673 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | ||
| 674 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | ||
| 675 | |||
| 676 | static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) | 793 | static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) |
| 677 | { | 794 | { |
| 678 | struct _cache_attr *fattr = to_attr(attr); | 795 | struct _cache_attr *fattr = to_attr(attr); |
| @@ -682,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) | |||
| 682 | ret = fattr->show ? | 799 | ret = fattr->show ? |
| 683 | fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), | 800 | fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), |
| 684 | buf) : | 801 | buf) : |
| 685 | 0; | 802 | 0; |
| 686 | return ret; | 803 | return ret; |
| 687 | } | 804 | } |
| 688 | 805 | ||
| 689 | static ssize_t store(struct kobject * kobj, struct attribute * attr, | 806 | static ssize_t store(struct kobject * kobj, struct attribute * attr, |
| 690 | const char * buf, size_t count) | 807 | const char * buf, size_t count) |
| 691 | { | 808 | { |
| 692 | return 0; | 809 | struct _cache_attr *fattr = to_attr(attr); |
| 810 | struct _index_kobject *this_leaf = to_object(kobj); | ||
| 811 | ssize_t ret; | ||
| 812 | |||
| 813 | ret = fattr->store ? | ||
| 814 | fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), | ||
| 815 | buf, count) : | ||
| 816 | 0; | ||
| 817 | return ret; | ||
| 693 | } | 818 | } |
| 694 | 819 | ||
| 695 | static struct sysfs_ops sysfs_ops = { | 820 | static struct sysfs_ops sysfs_ops = { |
| @@ -780,15 +905,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
| 780 | } | 905 | } |
| 781 | kobject_put(per_cpu(cache_kobject, cpu)); | 906 | kobject_put(per_cpu(cache_kobject, cpu)); |
| 782 | cpuid4_cache_sysfs_exit(cpu); | 907 | cpuid4_cache_sysfs_exit(cpu); |
| 783 | break; | 908 | return retval; |
| 784 | } | 909 | } |
| 785 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); | 910 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); |
| 786 | } | 911 | } |
| 787 | if (!retval) | 912 | cpu_set(cpu, cache_dev_map); |
| 788 | cpu_set(cpu, cache_dev_map); | ||
| 789 | 913 | ||
| 790 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); | 914 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); |
| 791 | return retval; | 915 | return 0; |
| 792 | } | 916 | } |
| 793 | 917 | ||
| 794 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | 918 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) |
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index f390c9f66351..dd3af6e7b39a 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Athlon/Hammer specific Machine Check Exception Reporting | 2 | * Athlon specific Machine Check Exception Reporting |
| 3 | * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk> | 3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 774d87cfd8cd..0ebf3fc6a610 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * mce.c - x86 Machine Check Exception Reporting | 2 | * mce.c - x86 Machine Check Exception Reporting |
| 3 | * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> | 3 | * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com> |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index c4a7ec31394c..4b031a4ac856 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
| @@ -580,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
| 580 | char __user *buf = ubuf; | 580 | char __user *buf = ubuf; |
| 581 | int i, err; | 581 | int i, err; |
| 582 | 582 | ||
| 583 | cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); | 583 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); |
| 584 | if (!cpu_tsc) | 584 | if (!cpu_tsc) |
| 585 | return -ENOMEM; | 585 | return -ENOMEM; |
| 586 | 586 | ||
| @@ -759,13 +759,18 @@ static struct sysdev_class mce_sysclass = { | |||
| 759 | }; | 759 | }; |
| 760 | 760 | ||
| 761 | DEFINE_PER_CPU(struct sys_device, device_mce); | 761 | DEFINE_PER_CPU(struct sys_device, device_mce); |
| 762 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; | ||
| 762 | 763 | ||
| 763 | /* Why are there no generic functions for this? */ | 764 | /* Why are there no generic functions for this? */ |
| 764 | #define ACCESSOR(name, var, start) \ | 765 | #define ACCESSOR(name, var, start) \ |
| 765 | static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ | 766 | static ssize_t show_ ## name(struct sys_device *s, \ |
| 767 | struct sysdev_attribute *attr, \ | ||
| 768 | char *buf) { \ | ||
| 766 | return sprintf(buf, "%lx\n", (unsigned long)var); \ | 769 | return sprintf(buf, "%lx\n", (unsigned long)var); \ |
| 767 | } \ | 770 | } \ |
| 768 | static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ | 771 | static ssize_t set_ ## name(struct sys_device *s, \ |
| 772 | struct sysdev_attribute *attr, \ | ||
| 773 | const char *buf, size_t siz) { \ | ||
| 769 | char *end; \ | 774 | char *end; \ |
| 770 | unsigned long new = simple_strtoul(buf, &end, 0); \ | 775 | unsigned long new = simple_strtoul(buf, &end, 0); \ |
| 771 | if (end == buf) return -EINVAL; \ | 776 | if (end == buf) return -EINVAL; \ |
| @@ -786,14 +791,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart()) | |||
| 786 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 791 | ACCESSOR(bank4ctl,bank[4],mce_restart()) |
| 787 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | 792 | ACCESSOR(bank5ctl,bank[5],mce_restart()) |
| 788 | 793 | ||
| 789 | static ssize_t show_trigger(struct sys_device *s, char *buf) | 794 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
| 795 | char *buf) | ||
| 790 | { | 796 | { |
| 791 | strcpy(buf, trigger); | 797 | strcpy(buf, trigger); |
| 792 | strcat(buf, "\n"); | 798 | strcat(buf, "\n"); |
| 793 | return strlen(trigger) + 1; | 799 | return strlen(trigger) + 1; |
| 794 | } | 800 | } |
| 795 | 801 | ||
| 796 | static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) | 802 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
| 803 | const char *buf,size_t siz) | ||
| 797 | { | 804 | { |
| 798 | char *p; | 805 | char *p; |
| 799 | int len; | 806 | int len; |
| @@ -806,12 +813,12 @@ static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) | |||
| 806 | } | 813 | } |
| 807 | 814 | ||
| 808 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 815 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); |
| 809 | ACCESSOR(tolerant,tolerant,) | 816 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
| 810 | ACCESSOR(check_interval,check_interval,mce_restart()) | 817 | ACCESSOR(check_interval,check_interval,mce_restart()) |
| 811 | static struct sysdev_attribute *mce_attributes[] = { | 818 | static struct sysdev_attribute *mce_attributes[] = { |
| 812 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | 819 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, |
| 813 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, | 820 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, |
| 814 | &attr_tolerant, &attr_check_interval, &attr_trigger, | 821 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, |
| 815 | NULL | 822 | NULL |
| 816 | }; | 823 | }; |
| 817 | 824 | ||
| @@ -853,7 +860,7 @@ error: | |||
| 853 | return err; | 860 | return err; |
| 854 | } | 861 | } |
| 855 | 862 | ||
| 856 | static void mce_remove_device(unsigned int cpu) | 863 | static __cpuinit void mce_remove_device(unsigned int cpu) |
| 857 | { | 864 | { |
| 858 | int i; | 865 | int i; |
| 859 | 866 | ||
| @@ -877,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | |||
| 877 | case CPU_ONLINE: | 884 | case CPU_ONLINE: |
| 878 | case CPU_ONLINE_FROZEN: | 885 | case CPU_ONLINE_FROZEN: |
| 879 | mce_create_device(cpu); | 886 | mce_create_device(cpu); |
| 887 | if (threshold_cpu_callback) | ||
| 888 | threshold_cpu_callback(action, cpu); | ||
| 880 | break; | 889 | break; |
| 881 | case CPU_DEAD: | 890 | case CPU_DEAD: |
| 882 | case CPU_DEAD_FROZEN: | 891 | case CPU_DEAD_FROZEN: |
| 892 | if (threshold_cpu_callback) | ||
| 893 | threshold_cpu_callback(action, cpu); | ||
| 883 | mce_remove_device(cpu); | 894 | mce_remove_device(cpu); |
| 884 | break; | 895 | break; |
| 885 | } | 896 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 7c9a813e1193..5eb390a4b2e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
| @@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 527 | if (err) | 527 | if (err) |
| 528 | goto out_free; | 528 | goto out_free; |
| 529 | 529 | ||
| 530 | for_each_cpu_mask(i, b->cpus) { | 530 | for_each_cpu_mask_nr(i, b->cpus) { |
| 531 | if (i == cpu) | 531 | if (i == cpu) |
| 532 | continue; | 532 | continue; |
| 533 | 533 | ||
| @@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
| 617 | #endif | 617 | #endif |
| 618 | 618 | ||
| 619 | /* remove all sibling symlinks before unregistering */ | 619 | /* remove all sibling symlinks before unregistering */ |
| 620 | for_each_cpu_mask(i, b->cpus) { | 620 | for_each_cpu_mask_nr(i, b->cpus) { |
| 621 | if (i == cpu) | 621 | if (i == cpu) |
| 622 | continue; | 622 | continue; |
| 623 | 623 | ||
| @@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
| 628 | deallocate_threshold_block(cpu, bank); | 628 | deallocate_threshold_block(cpu, bank); |
| 629 | 629 | ||
| 630 | free_out: | 630 | free_out: |
| 631 | kobject_del(b->kobj); | ||
| 631 | kobject_put(b->kobj); | 632 | kobject_put(b->kobj); |
| 632 | kfree(b); | 633 | kfree(b); |
| 633 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 634 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
| @@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu) | |||
| 645 | } | 646 | } |
| 646 | 647 | ||
| 647 | /* get notified when a cpu comes on/off */ | 648 | /* get notified when a cpu comes on/off */ |
| 648 | static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, | 649 | static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, |
| 649 | unsigned long action, void *hcpu) | 650 | unsigned int cpu) |
| 650 | { | 651 | { |
| 651 | /* cpu was unsigned int to begin with */ | ||
| 652 | unsigned int cpu = (unsigned long)hcpu; | ||
| 653 | |||
| 654 | if (cpu >= NR_CPUS) | 652 | if (cpu >= NR_CPUS) |
| 655 | goto out; | 653 | return; |
| 656 | 654 | ||
| 657 | switch (action) { | 655 | switch (action) { |
| 658 | case CPU_ONLINE: | 656 | case CPU_ONLINE: |
| @@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, | |||
| 666 | default: | 664 | default: |
| 667 | break; | 665 | break; |
| 668 | } | 666 | } |
| 669 | out: | ||
| 670 | return NOTIFY_OK; | ||
| 671 | } | 667 | } |
| 672 | 668 | ||
| 673 | static struct notifier_block threshold_cpu_notifier __cpuinitdata = { | ||
| 674 | .notifier_call = threshold_cpu_callback, | ||
| 675 | }; | ||
| 676 | |||
| 677 | static __init int threshold_init_device(void) | 669 | static __init int threshold_init_device(void) |
| 678 | { | 670 | { |
| 679 | unsigned lcpu = 0; | 671 | unsigned lcpu = 0; |
| @@ -684,7 +676,7 @@ static __init int threshold_init_device(void) | |||
| 684 | if (err) | 676 | if (err) |
| 685 | return err; | 677 | return err; |
| 686 | } | 678 | } |
| 687 | register_hotcpu_notifier(&threshold_cpu_notifier); | 679 | threshold_cpu_callback = amd_64_threshold_cpu_callback; |
| 688 | return 0; | 680 | return 0; |
| 689 | } | 681 | } |
| 690 | 682 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index cc1fccdd31e0..a74af128efc9 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Non Fatal Machine Check Exception Reporting | 2 | * Non Fatal Machine Check Exception Reporting |
| 3 | * | 3 | * |
| 4 | * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk> | 4 | * (C) Copyright 2002 Dave Jones. <davej@redhat.com> |
| 5 | * | 5 | * |
| 6 | * This file contains routines to check for non-fatal MCEs every 15s | 6 | * This file contains routines to check for non-fatal MCEs every 15s |
| 7 | * | 7 | * |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index eef001ad3bde..9b60fce09f75 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
| @@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
| 102 | /* The temperature transition interrupt handler setup */ | 102 | /* The temperature transition interrupt handler setup */ |
| 103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | 103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ |
| 104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | 104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ |
| 105 | apic_write_around(APIC_LVTTHMR, h); | 105 | apic_write(APIC_LVTTHMR, h); |
| 106 | 106 | ||
| 107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
| 108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | 108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); |
| @@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
| 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); | 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); |
| 115 | 115 | ||
| 116 | l = apic_read(APIC_LVTTHMR); | 116 | l = apic_read(APIC_LVTTHMR); |
| 117 | apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
| 118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | 118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); |
| 119 | 119 | ||
| 120 | /* enable thermal throttle processing */ | 120 | /* enable thermal throttle processing */ |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1f4cc48c14c6..d5ae2243f0b9 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0); | |||
| 35 | 35 | ||
| 36 | #define define_therm_throt_sysdev_show_func(name) \ | 36 | #define define_therm_throt_sysdev_show_func(name) \ |
| 37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | 37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ |
| 38 | struct sysdev_attribute *attr, \ | ||
| 38 | char *buf) \ | 39 | char *buf) \ |
| 39 | { \ | 40 | { \ |
| 40 | unsigned int cpu = dev->id; \ | 41 | unsigned int cpu = dev->id; \ |
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl new file mode 100644 index 000000000000..dfea390e1608 --- /dev/null +++ b/arch/x86/kernel/cpu/mkcapflags.pl | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | #!/usr/bin/perl | ||
| 2 | # | ||
| 3 | # Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h | ||
| 4 | # | ||
| 5 | |||
| 6 | ($in, $out) = @ARGV; | ||
| 7 | |||
| 8 | open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; | ||
| 9 | open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; | ||
| 10 | |||
| 11 | print OUT "#include <asm/cpufeature.h>\n\n"; | ||
| 12 | print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; | ||
| 13 | |||
| 14 | while (defined($line = <IN>)) { | ||
| 15 | if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { | ||
| 16 | $macro = $1; | ||
| 17 | $feature = $2; | ||
| 18 | $tail = $3; | ||
| 19 | if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { | ||
| 20 | $feature = $1; | ||
| 21 | } | ||
| 22 | |||
| 23 | if ($feature ne '') { | ||
| 24 | printf OUT "\t%-32s = \"%s\",\n", | ||
| 25 | "[$macro]", "\L$feature"; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | } | ||
| 29 | print OUT "};\n"; | ||
| 30 | |||
| 31 | close(IN); | ||
| 32 | close(OUT); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d9eacd..4e8d77f01eeb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
| @@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
| 379 | unsigned long *size, mtrr_type *type) | 379 | unsigned long *size, mtrr_type *type) |
| 380 | { | 380 | { |
| 381 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | 381 | unsigned int mask_lo, mask_hi, base_lo, base_hi; |
| 382 | unsigned int tmp, hi; | ||
| 382 | 383 | ||
| 383 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 384 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
| 384 | if ((mask_lo & 0x800) == 0) { | 385 | if ((mask_lo & 0x800) == 0) { |
| @@ -392,8 +393,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
| 392 | rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); | 393 | rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); |
| 393 | 394 | ||
| 394 | /* Work out the shifted address mask. */ | 395 | /* Work out the shifted address mask. */ |
| 395 | mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) | 396 | tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; |
| 396 | | mask_lo >> PAGE_SHIFT; | 397 | mask_lo = size_or_mask | tmp; |
| 398 | /* Expand tmp with high bits to all 1s*/ | ||
| 399 | hi = fls(tmp); | ||
| 400 | if (hi > 0) { | ||
| 401 | tmp |= ~((1<<(hi - 1)) - 1); | ||
| 402 | |||
| 403 | if (tmp != mask_lo) { | ||
| 404 | WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); | ||
| 405 | mask_lo = tmp; | ||
| 406 | } | ||
| 407 | } | ||
| 397 | 408 | ||
| 398 | /* This works correctly if size is a power of two, i.e. a | 409 | /* This works correctly if size is a power of two, i.e. a |
| 399 | contiguous range. */ | 410 | contiguous range. */ |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 84c480bb3715..4c4214690dd1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
| @@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
| 405 | } | 405 | } |
| 406 | /* RED-PEN: base can be > 32bit */ | 406 | /* RED-PEN: base can be > 32bit */ |
| 407 | len += seq_printf(seq, | 407 | len += seq_printf(seq, |
| 408 | "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", | 408 | "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", |
| 409 | i, base, base >> (20 - PAGE_SHIFT), size, factor, | 409 | i, base, base >> (20 - PAGE_SHIFT), size, factor, |
| 410 | mtrr_attrib_to_str(type), mtrr_usage_table[i]); | 410 | mtrr_usage_table[i], mtrr_attrib_to_str(type)); |
| 411 | } | 411 | } |
| 412 | } | 412 | } |
| 413 | return 0; | 413 | return 0; |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8faf..c78c04821ea1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
| @@ -729,7 +729,7 @@ struct var_mtrr_range_state { | |||
| 729 | mtrr_type type; | 729 | mtrr_type type; |
| 730 | }; | 730 | }; |
| 731 | 731 | ||
| 732 | struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | 732 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; |
| 733 | static int __initdata debug_print; | 733 | static int __initdata debug_print; |
| 734 | 734 | ||
| 735 | static int __init | 735 | static int __init |
| @@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
| 759 | /* take out UC ranges */ | 759 | /* take out UC ranges */ |
| 760 | for (i = 0; i < num_var_ranges; i++) { | 760 | for (i = 0; i < num_var_ranges; i++) { |
| 761 | type = range_state[i].type; | 761 | type = range_state[i].type; |
| 762 | if (type != MTRR_TYPE_UNCACHABLE) | 762 | if (type != MTRR_TYPE_UNCACHABLE && |
| 763 | type != MTRR_TYPE_WRPROT) | ||
| 763 | continue; | 764 | continue; |
| 764 | size = range_state[i].size_pfn; | 765 | size = range_state[i].size_pfn; |
| 765 | if (!size) | 766 | if (!size) |
| @@ -834,7 +835,14 @@ static int __init enable_mtrr_cleanup_setup(char *str) | |||
| 834 | enable_mtrr_cleanup = 1; | 835 | enable_mtrr_cleanup = 1; |
| 835 | return 0; | 836 | return 0; |
| 836 | } | 837 | } |
| 837 | early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); | 838 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); |
| 839 | |||
| 840 | static int __init mtrr_cleanup_debug_setup(char *str) | ||
| 841 | { | ||
| 842 | debug_print = 1; | ||
| 843 | return 0; | ||
| 844 | } | ||
| 845 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); | ||
| 838 | 846 | ||
| 839 | struct var_mtrr_state { | 847 | struct var_mtrr_state { |
| 840 | unsigned long range_startk; | 848 | unsigned long range_startk; |
| @@ -898,6 +906,27 @@ set_var_mtrr_all(unsigned int address_bits) | |||
| 898 | } | 906 | } |
| 899 | } | 907 | } |
| 900 | 908 | ||
| 909 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) | ||
| 910 | { | ||
| 911 | char factor; | ||
| 912 | unsigned long base = sizek; | ||
| 913 | |||
| 914 | if (base & ((1<<10) - 1)) { | ||
| 915 | /* not MB alignment */ | ||
| 916 | factor = 'K'; | ||
| 917 | } else if (base & ((1<<20) - 1)){ | ||
| 918 | factor = 'M'; | ||
| 919 | base >>= 10; | ||
| 920 | } else { | ||
| 921 | factor = 'G'; | ||
| 922 | base >>= 20; | ||
| 923 | } | ||
| 924 | |||
| 925 | *factorp = factor; | ||
| 926 | |||
| 927 | return base; | ||
| 928 | } | ||
| 929 | |||
| 901 | static unsigned int __init | 930 | static unsigned int __init |
| 902 | range_to_mtrr(unsigned int reg, unsigned long range_startk, | 931 | range_to_mtrr(unsigned int reg, unsigned long range_startk, |
| 903 | unsigned long range_sizek, unsigned char type) | 932 | unsigned long range_sizek, unsigned char type) |
| @@ -919,13 +948,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
| 919 | align = max_align; | 948 | align = max_align; |
| 920 | 949 | ||
| 921 | sizek = 1 << align; | 950 | sizek = 1 << align; |
| 922 | if (debug_print) | 951 | if (debug_print) { |
| 952 | char start_factor = 'K', size_factor = 'K'; | ||
| 953 | unsigned long start_base, size_base; | ||
| 954 | |||
| 955 | start_base = to_size_factor(range_startk, &start_factor), | ||
| 956 | size_base = to_size_factor(sizek, &size_factor), | ||
| 957 | |||
| 923 | printk(KERN_DEBUG "Setting variable MTRR %d, " | 958 | printk(KERN_DEBUG "Setting variable MTRR %d, " |
| 924 | "base: %ldMB, range: %ldMB, type %s\n", | 959 | "base: %ld%cB, range: %ld%cB, type %s\n", |
| 925 | reg, range_startk >> 10, sizek >> 10, | 960 | reg, start_base, start_factor, |
| 961 | size_base, size_factor, | ||
| 926 | (type == MTRR_TYPE_UNCACHABLE)?"UC": | 962 | (type == MTRR_TYPE_UNCACHABLE)?"UC": |
| 927 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") | 963 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") |
| 928 | ); | 964 | ); |
| 965 | } | ||
| 929 | save_var_mtrr(reg++, range_startk, sizek, type); | 966 | save_var_mtrr(reg++, range_startk, sizek, type); |
| 930 | range_startk += sizek; | 967 | range_startk += sizek; |
| 931 | range_sizek -= sizek; | 968 | range_sizek -= sizek; |
| @@ -970,6 +1007,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
| 970 | /* try to append some small hole */ | 1007 | /* try to append some small hole */ |
| 971 | range0_basek = state->range_startk; | 1008 | range0_basek = state->range_startk; |
| 972 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | 1009 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); |
| 1010 | |||
| 1011 | /* no increase */ | ||
| 973 | if (range0_sizek == state->range_sizek) { | 1012 | if (range0_sizek == state->range_sizek) { |
| 974 | if (debug_print) | 1013 | if (debug_print) |
| 975 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | 1014 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", |
| @@ -980,13 +1019,40 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
| 980 | return 0; | 1019 | return 0; |
| 981 | } | 1020 | } |
| 982 | 1021 | ||
| 983 | range0_sizek -= chunk_sizek; | 1022 | /* only cut back, when it is not the last */ |
| 984 | if (range0_sizek && sizek) { | 1023 | if (sizek) { |
| 985 | while (range0_basek + range0_sizek > (basek + sizek)) { | 1024 | while (range0_basek + range0_sizek > (basek + sizek)) { |
| 986 | range0_sizek -= chunk_sizek; | 1025 | if (range0_sizek >= chunk_sizek) |
| 987 | if (!range0_sizek) | 1026 | range0_sizek -= chunk_sizek; |
| 988 | break; | 1027 | else |
| 989 | } | 1028 | range0_sizek = 0; |
| 1029 | |||
| 1030 | if (!range0_sizek) | ||
| 1031 | break; | ||
| 1032 | } | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | second_try: | ||
| 1036 | range_basek = range0_basek + range0_sizek; | ||
| 1037 | |||
| 1038 | /* one hole in the middle */ | ||
| 1039 | if (range_basek > basek && range_basek <= (basek + sizek)) | ||
| 1040 | second_sizek = range_basek - basek; | ||
| 1041 | |||
| 1042 | if (range0_sizek > state->range_sizek) { | ||
| 1043 | |||
| 1044 | /* one hole in middle or at end */ | ||
| 1045 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; | ||
| 1046 | |||
| 1047 | /* hole size should be less than half of range0 size */ | ||
| 1048 | if (hole_sizek >= (range0_sizek >> 1) && | ||
| 1049 | range0_sizek >= chunk_sizek) { | ||
| 1050 | range0_sizek -= chunk_sizek; | ||
| 1051 | second_sizek = 0; | ||
| 1052 | hole_sizek = 0; | ||
| 1053 | |||
| 1054 | goto second_try; | ||
| 1055 | } | ||
| 990 | } | 1056 | } |
| 991 | 1057 | ||
| 992 | if (range0_sizek) { | 1058 | if (range0_sizek) { |
| @@ -996,50 +1062,28 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
| 996 | (range0_basek + range0_sizek)<<10); | 1062 | (range0_basek + range0_sizek)<<10); |
| 997 | state->reg = range_to_mtrr(state->reg, range0_basek, | 1063 | state->reg = range_to_mtrr(state->reg, range0_basek, |
| 998 | range0_sizek, MTRR_TYPE_WRBACK); | 1064 | range0_sizek, MTRR_TYPE_WRBACK); |
| 999 | |||
| 1000 | } | ||
| 1001 | |||
| 1002 | range_basek = range0_basek + range0_sizek; | ||
| 1003 | range_sizek = chunk_sizek; | ||
| 1004 | |||
| 1005 | if (range_basek + range_sizek > basek && | ||
| 1006 | range_basek + range_sizek <= (basek + sizek)) { | ||
| 1007 | /* one hole */ | ||
| 1008 | second_basek = basek; | ||
| 1009 | second_sizek = range_basek + range_sizek - basek; | ||
| 1010 | } | 1065 | } |
| 1011 | 1066 | ||
| 1012 | /* if last piece, only could one hole near end */ | 1067 | if (range0_sizek < state->range_sizek) { |
| 1013 | if ((second_basek || !basek) && | 1068 | /* need to handle left over */ |
| 1014 | range_sizek - (state->range_sizek - range0_sizek) - second_sizek < | ||
| 1015 | (chunk_sizek >> 1)) { | ||
| 1016 | /* | ||
| 1017 | * one hole in middle (second_sizek is 0) or at end | ||
| 1018 | * (second_sizek is 0 ) | ||
| 1019 | */ | ||
| 1020 | hole_sizek = range_sizek - (state->range_sizek - range0_sizek) | ||
| 1021 | - second_sizek; | ||
| 1022 | hole_basek = range_basek + range_sizek - hole_sizek | ||
| 1023 | - second_sizek; | ||
| 1024 | } else { | ||
| 1025 | /* fallback for big hole, or several holes */ | ||
| 1026 | range_sizek = state->range_sizek - range0_sizek; | 1069 | range_sizek = state->range_sizek - range0_sizek; |
| 1027 | second_basek = 0; | 1070 | |
| 1028 | second_sizek = 0; | 1071 | if (debug_print) |
| 1072 | printk(KERN_DEBUG "range: %016lx - %016lx\n", | ||
| 1073 | range_basek<<10, | ||
| 1074 | (range_basek + range_sizek)<<10); | ||
| 1075 | state->reg = range_to_mtrr(state->reg, range_basek, | ||
| 1076 | range_sizek, MTRR_TYPE_WRBACK); | ||
| 1029 | } | 1077 | } |
| 1030 | 1078 | ||
| 1031 | if (debug_print) | ||
| 1032 | printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, | ||
| 1033 | (range_basek + range_sizek)<<10); | ||
| 1034 | state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, | ||
| 1035 | MTRR_TYPE_WRBACK); | ||
| 1036 | if (hole_sizek) { | 1079 | if (hole_sizek) { |
| 1080 | hole_basek = range_basek - hole_sizek - second_sizek; | ||
| 1037 | if (debug_print) | 1081 | if (debug_print) |
| 1038 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | 1082 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", |
| 1039 | hole_basek<<10, (hole_basek + hole_sizek)<<10); | 1083 | hole_basek<<10, |
| 1040 | state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, | 1084 | (hole_basek + hole_sizek)<<10); |
| 1041 | MTRR_TYPE_UNCACHABLE); | 1085 | state->reg = range_to_mtrr(state->reg, hole_basek, |
| 1042 | 1086 | hole_sizek, MTRR_TYPE_UNCACHABLE); | |
| 1043 | } | 1087 | } |
| 1044 | 1088 | ||
| 1045 | return second_sizek; | 1089 | return second_sizek; |
| @@ -1154,11 +1198,11 @@ struct mtrr_cleanup_result { | |||
| 1154 | }; | 1198 | }; |
| 1155 | 1199 | ||
| 1156 | /* | 1200 | /* |
| 1157 | * gran_size: 1M, 2M, ..., 2G | 1201 | * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G |
| 1158 | * chunk size: gran_size, ..., 4G | 1202 | * chunk size: gran_size, ..., 2G |
| 1159 | * so we need (2+13)*6 | 1203 | * so we need (1+16)*8 |
| 1160 | */ | 1204 | */ |
| 1161 | #define NUM_RESULT 90 | 1205 | #define NUM_RESULT 136 |
| 1162 | #define PSHIFT (PAGE_SHIFT - 10) | 1206 | #define PSHIFT (PAGE_SHIFT - 10) |
| 1163 | 1207 | ||
| 1164 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | 1208 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; |
| @@ -1168,13 +1212,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | |||
| 1168 | static int __init mtrr_cleanup(unsigned address_bits) | 1212 | static int __init mtrr_cleanup(unsigned address_bits) |
| 1169 | { | 1213 | { |
| 1170 | unsigned long extra_remove_base, extra_remove_size; | 1214 | unsigned long extra_remove_base, extra_remove_size; |
| 1171 | unsigned long i, base, size, def, dummy; | 1215 | unsigned long base, size, def, dummy; |
| 1172 | mtrr_type type; | 1216 | mtrr_type type; |
| 1173 | int nr_range, nr_range_new; | 1217 | int nr_range, nr_range_new; |
| 1174 | u64 chunk_size, gran_size; | 1218 | u64 chunk_size, gran_size; |
| 1175 | unsigned long range_sums, range_sums_new; | 1219 | unsigned long range_sums, range_sums_new; |
| 1176 | int index_good; | 1220 | int index_good; |
| 1177 | int num_reg_good; | 1221 | int num_reg_good; |
| 1222 | int i; | ||
| 1178 | 1223 | ||
| 1179 | /* extra one for all 0 */ | 1224 | /* extra one for all 0 */ |
| 1180 | int num[MTRR_NUM_TYPES + 1]; | 1225 | int num[MTRR_NUM_TYPES + 1]; |
| @@ -1204,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1204 | continue; | 1249 | continue; |
| 1205 | if (!size) | 1250 | if (!size) |
| 1206 | type = MTRR_NUM_TYPES; | 1251 | type = MTRR_NUM_TYPES; |
| 1252 | if (type == MTRR_TYPE_WRPROT) | ||
| 1253 | type = MTRR_TYPE_UNCACHABLE; | ||
| 1207 | num[type]++; | 1254 | num[type]++; |
| 1208 | } | 1255 | } |
| 1209 | 1256 | ||
| @@ -1216,23 +1263,57 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1216 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1263 | num_var_ranges - num[MTRR_NUM_TYPES]) |
| 1217 | return 0; | 1264 | return 0; |
| 1218 | 1265 | ||
| 1266 | /* print original var MTRRs at first, for debugging: */ | ||
| 1267 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
| 1268 | for (i = 0; i < num_var_ranges; i++) { | ||
| 1269 | char start_factor = 'K', size_factor = 'K'; | ||
| 1270 | unsigned long start_base, size_base; | ||
| 1271 | |||
| 1272 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | ||
| 1273 | if (!size_base) | ||
| 1274 | continue; | ||
| 1275 | |||
| 1276 | size_base = to_size_factor(size_base, &size_factor), | ||
| 1277 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | ||
| 1278 | start_base = to_size_factor(start_base, &start_factor), | ||
| 1279 | type = range_state[i].type; | ||
| 1280 | |||
| 1281 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | ||
| 1282 | i, start_base, start_factor, | ||
| 1283 | size_base, size_factor, | ||
| 1284 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
| 1285 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
| 1286 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
| 1287 | ); | ||
| 1288 | } | ||
| 1289 | |||
| 1219 | memset(range, 0, sizeof(range)); | 1290 | memset(range, 0, sizeof(range)); |
| 1220 | extra_remove_size = 0; | 1291 | extra_remove_size = 0; |
| 1221 | if (mtrr_tom2) { | 1292 | extra_remove_base = 1 << (32 - PAGE_SHIFT); |
| 1222 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | 1293 | if (mtrr_tom2) |
| 1223 | extra_remove_size = | 1294 | extra_remove_size = |
| 1224 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | 1295 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; |
| 1225 | } | ||
| 1226 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | 1296 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, |
| 1227 | extra_remove_size); | 1297 | extra_remove_size); |
| 1298 | /* | ||
| 1299 | * [0, 1M) should always be coverred by var mtrr with WB | ||
| 1300 | * and fixed mtrrs should take effective before var mtrr for it | ||
| 1301 | */ | ||
| 1302 | nr_range = add_range_with_merge(range, nr_range, 0, | ||
| 1303 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | ||
| 1304 | /* sort the ranges */ | ||
| 1305 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
| 1306 | |||
| 1228 | range_sums = sum_ranges(range, nr_range); | 1307 | range_sums = sum_ranges(range, nr_range); |
| 1229 | printk(KERN_INFO "total RAM coverred: %ldM\n", | 1308 | printk(KERN_INFO "total RAM coverred: %ldM\n", |
| 1230 | range_sums >> (20 - PAGE_SHIFT)); | 1309 | range_sums >> (20 - PAGE_SHIFT)); |
| 1231 | 1310 | ||
| 1232 | if (mtrr_chunk_size && mtrr_gran_size) { | 1311 | if (mtrr_chunk_size && mtrr_gran_size) { |
| 1233 | int num_reg; | 1312 | int num_reg; |
| 1313 | char gran_factor, chunk_factor, lose_factor; | ||
| 1314 | unsigned long gran_base, chunk_base, lose_base; | ||
| 1234 | 1315 | ||
| 1235 | debug_print = 1; | 1316 | debug_print++; |
| 1236 | /* convert ranges to var ranges state */ | 1317 | /* convert ranges to var ranges state */ |
| 1237 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | 1318 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, |
| 1238 | mtrr_gran_size); | 1319 | mtrr_gran_size); |
| @@ -1256,34 +1337,48 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1256 | result[i].lose_cover_sizek = | 1337 | result[i].lose_cover_sizek = |
| 1257 | (range_sums - range_sums_new) << PSHIFT; | 1338 | (range_sums - range_sums_new) << PSHIFT; |
| 1258 | 1339 | ||
| 1259 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | 1340 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
| 1260 | result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, | 1341 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
| 1261 | result[i].chunk_sizek >> 10); | 1342 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
| 1262 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", | 1343 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
| 1344 | result[i].bad?"*BAD*":" ", | ||
| 1345 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1346 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
| 1263 | result[i].num_reg, result[i].bad?"-":"", | 1347 | result[i].num_reg, result[i].bad?"-":"", |
| 1264 | result[i].lose_cover_sizek >> 10); | 1348 | lose_base, lose_factor); |
| 1265 | if (!result[i].bad) { | 1349 | if (!result[i].bad) { |
| 1266 | set_var_mtrr_all(address_bits); | 1350 | set_var_mtrr_all(address_bits); |
| 1267 | return 1; | 1351 | return 1; |
| 1268 | } | 1352 | } |
| 1269 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | 1353 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " |
| 1270 | "will find optimal one\n"); | 1354 | "will find optimal one\n"); |
| 1271 | debug_print = 0; | 1355 | debug_print--; |
| 1272 | memset(result, 0, sizeof(result[0])); | 1356 | memset(result, 0, sizeof(result[0])); |
| 1273 | } | 1357 | } |
| 1274 | 1358 | ||
| 1275 | i = 0; | 1359 | i = 0; |
| 1276 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | 1360 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); |
| 1277 | memset(result, 0, sizeof(result)); | 1361 | memset(result, 0, sizeof(result)); |
| 1278 | for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { | 1362 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { |
| 1279 | for (chunk_size = gran_size; chunk_size < (1ULL<<33); | 1363 | char gran_factor; |
| 1364 | unsigned long gran_base; | ||
| 1365 | |||
| 1366 | if (debug_print) | ||
| 1367 | gran_base = to_size_factor(gran_size >> 10, &gran_factor); | ||
| 1368 | |||
| 1369 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | ||
| 1280 | chunk_size <<= 1) { | 1370 | chunk_size <<= 1) { |
| 1281 | int num_reg; | 1371 | int num_reg; |
| 1282 | 1372 | ||
| 1283 | if (debug_print) | 1373 | if (debug_print) { |
| 1284 | printk(KERN_INFO | 1374 | char chunk_factor; |
| 1285 | "\ngran_size: %lldM chunk_size_size: %lldM\n", | 1375 | unsigned long chunk_base; |
| 1286 | gran_size >> 20, chunk_size >> 20); | 1376 | |
| 1377 | chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), | ||
| 1378 | printk(KERN_INFO "\n"); | ||
| 1379 | printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", | ||
| 1380 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1381 | } | ||
| 1287 | if (i >= NUM_RESULT) | 1382 | if (i >= NUM_RESULT) |
| 1288 | continue; | 1383 | continue; |
| 1289 | 1384 | ||
| @@ -1326,12 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1326 | 1421 | ||
| 1327 | /* print out all */ | 1422 | /* print out all */ |
| 1328 | for (i = 0; i < NUM_RESULT; i++) { | 1423 | for (i = 0; i < NUM_RESULT; i++) { |
| 1329 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | 1424 | char gran_factor, chunk_factor, lose_factor; |
| 1330 | result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, | 1425 | unsigned long gran_base, chunk_base, lose_base; |
| 1331 | result[i].chunk_sizek >> 10); | 1426 | |
| 1332 | printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", | 1427 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
| 1333 | result[i].num_reg, result[i].bad?"-":"", | 1428 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
| 1334 | result[i].lose_cover_sizek >> 10); | 1429 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
| 1430 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
| 1431 | result[i].bad?"*BAD*":" ", | ||
| 1432 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
| 1433 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
| 1434 | result[i].num_reg, result[i].bad?"-":"", | ||
| 1435 | lose_base, lose_factor); | ||
| 1335 | } | 1436 | } |
| 1336 | 1437 | ||
| 1337 | /* try to find the optimal index */ | 1438 | /* try to find the optimal index */ |
| @@ -1339,10 +1440,8 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1339 | nr_mtrr_spare_reg = num_var_ranges - 1; | 1440 | nr_mtrr_spare_reg = num_var_ranges - 1; |
| 1340 | num_reg_good = -1; | 1441 | num_reg_good = -1; |
| 1341 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | 1442 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { |
| 1342 | if (!min_loss_pfn[i]) { | 1443 | if (!min_loss_pfn[i]) |
| 1343 | num_reg_good = i; | 1444 | num_reg_good = i; |
| 1344 | break; | ||
| 1345 | } | ||
| 1346 | } | 1445 | } |
| 1347 | 1446 | ||
| 1348 | index_good = -1; | 1447 | index_good = -1; |
| @@ -1358,21 +1457,26 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
| 1358 | } | 1457 | } |
| 1359 | 1458 | ||
| 1360 | if (index_good != -1) { | 1459 | if (index_good != -1) { |
| 1460 | char gran_factor, chunk_factor, lose_factor; | ||
| 1461 | unsigned long gran_base, chunk_base, lose_base; | ||
| 1462 | |||
| 1361 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | 1463 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); |
| 1362 | i = index_good; | 1464 | i = index_good; |
| 1363 | printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", | 1465 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
| 1364 | result[i].gran_sizek >> 10, | 1466 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
| 1365 | result[i].chunk_sizek >> 10); | 1467 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
| 1366 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", | 1468 | printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", |
| 1367 | result[i].num_reg, | 1469 | gran_base, gran_factor, chunk_base, chunk_factor); |
| 1368 | result[i].lose_cover_sizek >> 10); | 1470 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", |
| 1471 | result[i].num_reg, lose_base, lose_factor); | ||
| 1369 | /* convert ranges to var ranges state */ | 1472 | /* convert ranges to var ranges state */ |
| 1370 | chunk_size = result[i].chunk_sizek; | 1473 | chunk_size = result[i].chunk_sizek; |
| 1371 | chunk_size <<= 10; | 1474 | chunk_size <<= 10; |
| 1372 | gran_size = result[i].gran_sizek; | 1475 | gran_size = result[i].gran_sizek; |
| 1373 | gran_size <<= 10; | 1476 | gran_size <<= 10; |
| 1374 | debug_print = 1; | 1477 | debug_print++; |
| 1375 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | 1478 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
| 1479 | debug_print--; | ||
| 1376 | set_var_mtrr_all(address_bits); | 1480 | set_var_mtrr_all(address_bits); |
| 1377 | return 1; | 1481 | return 1; |
| 1378 | } | 1482 | } |
| @@ -1496,11 +1600,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
| 1496 | 1600 | ||
| 1497 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 1601 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ |
| 1498 | if (!highest_pfn) { | 1602 | if (!highest_pfn) { |
| 1499 | if (!kvm_para_available()) { | 1603 | WARN(!kvm_para_available(), KERN_WARNING |
| 1500 | printk(KERN_WARNING | ||
| 1501 | "WARNING: strange, CPU MTRRs all blank?\n"); | 1604 | "WARNING: strange, CPU MTRRs all blank?\n"); |
| 1502 | WARN_ON(1); | ||
| 1503 | } | ||
| 1504 | return 0; | 1605 | return 0; |
| 1505 | } | 1606 | } |
| 1506 | 1607 | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 6d4bdc02388a..9abd48b22674 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
| @@ -17,6 +17,8 @@ | |||
| 17 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
| 18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
| 19 | #include <linux/nmi.h> | 19 | #include <linux/nmi.h> |
| 20 | #include <linux/kprobes.h> | ||
| 21 | |||
| 20 | #include <asm/apic.h> | 22 | #include <asm/apic.h> |
| 21 | #include <asm/intel_arch_perfmon.h> | 23 | #include <asm/intel_arch_perfmon.h> |
| 22 | 24 | ||
| @@ -250,7 +252,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr, | |||
| 250 | 252 | ||
| 251 | do_div(count, nmi_hz); | 253 | do_div(count, nmi_hz); |
| 252 | if(descr) | 254 | if(descr) |
| 253 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
| 254 | wrmsrl(perfctr_msr, 0 - count); | 256 | wrmsrl(perfctr_msr, 0 - count); |
| 255 | } | 257 | } |
| 256 | 258 | ||
| @@ -261,7 +263,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, | |||
| 261 | 263 | ||
| 262 | do_div(count, nmi_hz); | 264 | do_div(count, nmi_hz); |
| 263 | if(descr) | 265 | if(descr) |
| 264 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 266 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
| 265 | wrmsr(perfctr_msr, (u32)(-count), 0); | 267 | wrmsr(perfctr_msr, (u32)(-count), 0); |
| 266 | } | 268 | } |
| 267 | 269 | ||
| @@ -295,13 +297,19 @@ static int setup_k7_watchdog(unsigned nmi_hz) | |||
| 295 | /* setup the timer */ | 297 | /* setup the timer */ |
| 296 | wrmsr(evntsel_msr, evntsel, 0); | 298 | wrmsr(evntsel_msr, evntsel, 0); |
| 297 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | 299 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); |
| 298 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 299 | evntsel |= K7_EVNTSEL_ENABLE; | ||
| 300 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 301 | 300 | ||
| 301 | /* initialize the wd struct before enabling */ | ||
| 302 | wd->perfctr_msr = perfctr_msr; | 302 | wd->perfctr_msr = perfctr_msr; |
| 303 | wd->evntsel_msr = evntsel_msr; | 303 | wd->evntsel_msr = evntsel_msr; |
| 304 | wd->cccr_msr = 0; /* unused */ | 304 | wd->cccr_msr = 0; /* unused */ |
| 305 | |||
| 306 | /* ok, everything is initialized, announce that we're set */ | ||
| 307 | cpu_nmi_set_wd_enabled(); | ||
| 308 | |||
| 309 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 310 | evntsel |= K7_EVNTSEL_ENABLE; | ||
| 311 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 312 | |||
| 305 | return 1; | 313 | return 1; |
| 306 | } | 314 | } |
| 307 | 315 | ||
| @@ -330,7 +338,8 @@ static void single_msr_unreserve(void) | |||
| 330 | release_perfctr_nmi(wd_ops->perfctr); | 338 | release_perfctr_nmi(wd_ops->perfctr); |
| 331 | } | 339 | } |
| 332 | 340 | ||
| 333 | static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | 341 | static void __kprobes |
| 342 | single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
| 334 | { | 343 | { |
| 335 | /* start the cycle over again */ | 344 | /* start the cycle over again */ |
| 336 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | 345 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); |
| @@ -379,17 +388,23 @@ static int setup_p6_watchdog(unsigned nmi_hz) | |||
| 379 | wrmsr(evntsel_msr, evntsel, 0); | 388 | wrmsr(evntsel_msr, evntsel, 0); |
| 380 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 389 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
| 381 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | 390 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); |
| 382 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 383 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
| 384 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 385 | 391 | ||
| 392 | /* initialize the wd struct before enabling */ | ||
| 386 | wd->perfctr_msr = perfctr_msr; | 393 | wd->perfctr_msr = perfctr_msr; |
| 387 | wd->evntsel_msr = evntsel_msr; | 394 | wd->evntsel_msr = evntsel_msr; |
| 388 | wd->cccr_msr = 0; /* unused */ | 395 | wd->cccr_msr = 0; /* unused */ |
| 396 | |||
| 397 | /* ok, everything is initialized, announce that we're set */ | ||
| 398 | cpu_nmi_set_wd_enabled(); | ||
| 399 | |||
| 400 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 401 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
| 402 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 403 | |||
| 389 | return 1; | 404 | return 1; |
| 390 | } | 405 | } |
| 391 | 406 | ||
| 392 | static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | 407 | static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) |
| 393 | { | 408 | { |
| 394 | /* | 409 | /* |
| 395 | * P6 based Pentium M need to re-unmask | 410 | * P6 based Pentium M need to re-unmask |
| @@ -432,6 +447,27 @@ static const struct wd_ops p6_wd_ops = { | |||
| 432 | #define P4_CCCR_ENABLE (1 << 12) | 447 | #define P4_CCCR_ENABLE (1 << 12) |
| 433 | #define P4_CCCR_OVF (1 << 31) | 448 | #define P4_CCCR_OVF (1 << 31) |
| 434 | 449 | ||
| 450 | #define P4_CONTROLS 18 | ||
| 451 | static unsigned int p4_controls[18] = { | ||
| 452 | MSR_P4_BPU_CCCR0, | ||
| 453 | MSR_P4_BPU_CCCR1, | ||
| 454 | MSR_P4_BPU_CCCR2, | ||
| 455 | MSR_P4_BPU_CCCR3, | ||
| 456 | MSR_P4_MS_CCCR0, | ||
| 457 | MSR_P4_MS_CCCR1, | ||
| 458 | MSR_P4_MS_CCCR2, | ||
| 459 | MSR_P4_MS_CCCR3, | ||
| 460 | MSR_P4_FLAME_CCCR0, | ||
| 461 | MSR_P4_FLAME_CCCR1, | ||
| 462 | MSR_P4_FLAME_CCCR2, | ||
| 463 | MSR_P4_FLAME_CCCR3, | ||
| 464 | MSR_P4_IQ_CCCR0, | ||
| 465 | MSR_P4_IQ_CCCR1, | ||
| 466 | MSR_P4_IQ_CCCR2, | ||
| 467 | MSR_P4_IQ_CCCR3, | ||
| 468 | MSR_P4_IQ_CCCR4, | ||
| 469 | MSR_P4_IQ_CCCR5, | ||
| 470 | }; | ||
| 435 | /* | 471 | /* |
| 436 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | 472 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter |
| 437 | * CRU_ESCR0 (with any non-null event selector) through a complemented | 473 | * CRU_ESCR0 (with any non-null event selector) through a complemented |
| @@ -473,12 +509,38 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
| 473 | evntsel_msr = MSR_P4_CRU_ESCR0; | 509 | evntsel_msr = MSR_P4_CRU_ESCR0; |
| 474 | cccr_msr = MSR_P4_IQ_CCCR0; | 510 | cccr_msr = MSR_P4_IQ_CCCR0; |
| 475 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | 511 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); |
| 512 | |||
| 513 | /* | ||
| 514 | * If we're on the kdump kernel or other situation, we may | ||
| 515 | * still have other performance counter registers set to | ||
| 516 | * interrupt and they'll keep interrupting forever because | ||
| 517 | * of the P4_CCCR_OVF quirk. So we need to ACK all the | ||
| 518 | * pending interrupts and disable all the registers here, | ||
| 519 | * before reenabling the NMI delivery. Refer to p4_rearm() | ||
| 520 | * about the P4_CCCR_OVF quirk. | ||
| 521 | */ | ||
| 522 | if (reset_devices) { | ||
| 523 | unsigned int low, high; | ||
| 524 | int i; | ||
| 525 | |||
| 526 | for (i = 0; i < P4_CONTROLS; i++) { | ||
| 527 | rdmsr(p4_controls[i], low, high); | ||
| 528 | low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); | ||
| 529 | wrmsr(p4_controls[i], low, high); | ||
| 530 | } | ||
| 531 | } | ||
| 476 | } else { | 532 | } else { |
| 477 | /* logical cpu 1 */ | 533 | /* logical cpu 1 */ |
| 478 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | 534 | perfctr_msr = MSR_P4_IQ_PERFCTR1; |
| 479 | evntsel_msr = MSR_P4_CRU_ESCR0; | 535 | evntsel_msr = MSR_P4_CRU_ESCR0; |
| 480 | cccr_msr = MSR_P4_IQ_CCCR1; | 536 | cccr_msr = MSR_P4_IQ_CCCR1; |
| 481 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | 537 | |
| 538 | /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ | ||
| 539 | if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) | ||
| 540 | cccr_val = P4_CCCR_OVF_PMI0; | ||
| 541 | else | ||
| 542 | cccr_val = P4_CCCR_OVF_PMI1; | ||
| 543 | cccr_val |= P4_CCCR_ESCR_SELECT(4); | ||
| 482 | } | 544 | } |
| 483 | 545 | ||
| 484 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | 546 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
| @@ -493,12 +555,17 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
| 493 | wrmsr(evntsel_msr, evntsel, 0); | 555 | wrmsr(evntsel_msr, evntsel, 0); |
| 494 | wrmsr(cccr_msr, cccr_val, 0); | 556 | wrmsr(cccr_msr, cccr_val, 0); |
| 495 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | 557 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); |
| 496 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 558 | |
| 497 | cccr_val |= P4_CCCR_ENABLE; | ||
| 498 | wrmsr(cccr_msr, cccr_val, 0); | ||
| 499 | wd->perfctr_msr = perfctr_msr; | 559 | wd->perfctr_msr = perfctr_msr; |
| 500 | wd->evntsel_msr = evntsel_msr; | 560 | wd->evntsel_msr = evntsel_msr; |
| 501 | wd->cccr_msr = cccr_msr; | 561 | wd->cccr_msr = cccr_msr; |
| 562 | |||
| 563 | /* ok, everything is initialized, announce that we're set */ | ||
| 564 | cpu_nmi_set_wd_enabled(); | ||
| 565 | |||
| 566 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 567 | cccr_val |= P4_CCCR_ENABLE; | ||
| 568 | wrmsr(cccr_msr, cccr_val, 0); | ||
| 502 | return 1; | 569 | return 1; |
| 503 | } | 570 | } |
| 504 | 571 | ||
| @@ -541,7 +608,7 @@ static void p4_unreserve(void) | |||
| 541 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | 608 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); |
| 542 | } | 609 | } |
| 543 | 610 | ||
| 544 | static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | 611 | static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) |
| 545 | { | 612 | { |
| 546 | unsigned dummy; | 613 | unsigned dummy; |
| 547 | /* | 614 | /* |
| @@ -614,13 +681,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
| 614 | wrmsr(evntsel_msr, evntsel, 0); | 681 | wrmsr(evntsel_msr, evntsel, 0); |
| 615 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 682 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
| 616 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | 683 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); |
| 617 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 618 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
| 619 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 620 | 684 | ||
| 621 | wd->perfctr_msr = perfctr_msr; | 685 | wd->perfctr_msr = perfctr_msr; |
| 622 | wd->evntsel_msr = evntsel_msr; | 686 | wd->evntsel_msr = evntsel_msr; |
| 623 | wd->cccr_msr = 0; /* unused */ | 687 | wd->cccr_msr = 0; /* unused */ |
| 688 | |||
| 689 | /* ok, everything is initialized, announce that we're set */ | ||
| 690 | cpu_nmi_set_wd_enabled(); | ||
| 691 | |||
| 692 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 693 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
| 694 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 624 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | 695 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); |
| 625 | return 1; | 696 | return 1; |
| 626 | } | 697 | } |
| @@ -716,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz) | |||
| 716 | return hz; | 787 | return hz; |
| 717 | } | 788 | } |
| 718 | 789 | ||
| 719 | int lapic_wd_event(unsigned nmi_hz) | 790 | int __kprobes lapic_wd_event(unsigned nmi_hz) |
| 720 | { | 791 | { |
| 721 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 792 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
| 722 | u64 ctr; | 793 | u64 ctr; |
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c new file mode 100644 index 000000000000..5abbea297e0c --- /dev/null +++ b/arch/x86/kernel/cpu/powerflags.c | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | /* | ||
| 2 | * Strings for the various x86 power flags | ||
| 3 | * | ||
| 4 | * This file must not contain any executable code. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <asm/cpufeature.h> | ||
| 8 | |||
| 9 | const char *const x86_power_flags[32] = { | ||
| 10 | "ts", /* temperature sensor */ | ||
| 11 | "fid", /* frequency id control */ | ||
| 12 | "vid", /* voltage id control */ | ||
| 13 | "ttp", /* thermal trip */ | ||
| 14 | "tm", | ||
| 15 | "stc", | ||
| 16 | "100mhzsteps", | ||
| 17 | "hwpstate", | ||
| 18 | "", /* tsc invariant mapped to constant_tsc */ | ||
| 19 | /* nothing */ | ||
| 20 | }; | ||
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 0d0d9057e7c0..01b1244ef1c0 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
| @@ -160,14 +160,16 @@ static void *c_start(struct seq_file *m, loff_t *pos) | |||
| 160 | { | 160 | { |
| 161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ | 161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ |
| 162 | *pos = first_cpu(cpu_online_map); | 162 | *pos = first_cpu(cpu_online_map); |
| 163 | if ((*pos) < NR_CPUS && cpu_online(*pos)) | 163 | else |
| 164 | *pos = next_cpu_nr(*pos - 1, cpu_online_map); | ||
| 165 | if ((*pos) < nr_cpu_ids) | ||
| 164 | return &cpu_data(*pos); | 166 | return &cpu_data(*pos); |
| 165 | return NULL; | 167 | return NULL; |
| 166 | } | 168 | } |
| 167 | 169 | ||
| 168 | static void *c_next(struct seq_file *m, void *v, loff_t *pos) | 170 | static void *c_next(struct seq_file *m, void *v, loff_t *pos) |
| 169 | { | 171 | { |
| 170 | *pos = next_cpu(*pos, cpu_online_map); | 172 | (*pos)++; |
| 171 | return c_start(m, pos); | 173 | return c_start(m, pos); |
| 172 | } | 174 | } |
| 173 | 175 | ||
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index b911a2c61b8f..52b3fefbd5af 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
| @@ -5,6 +5,18 @@ | |||
| 5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
| 6 | #include "cpu.h" | 6 | #include "cpu.h" |
| 7 | 7 | ||
| 8 | static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) | ||
| 9 | { | ||
| 10 | u32 xlvl; | ||
| 11 | |||
| 12 | /* Transmeta-defined flags: level 0x80860001 */ | ||
| 13 | xlvl = cpuid_eax(0x80860000); | ||
| 14 | if ((xlvl & 0xffff0000) == 0x80860000) { | ||
| 15 | if (xlvl >= 0x80860001) | ||
| 16 | c->x86_capability[2] = cpuid_edx(0x80860001); | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 8 | static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | 20 | static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) |
| 9 | { | 21 | { |
| 10 | unsigned int cap_mask, uk, max, dummy; | 22 | unsigned int cap_mask, uk, max, dummy; |
| @@ -12,7 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
| 12 | unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; | 24 | unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; |
| 13 | char cpu_info[65]; | 25 | char cpu_info[65]; |
| 14 | 26 | ||
| 15 | get_model_name(c); /* Same as AMD/Cyrix */ | 27 | early_init_transmeta(c); |
| 28 | |||
| 16 | display_cacheinfo(c); | 29 | display_cacheinfo(c); |
| 17 | 30 | ||
| 18 | /* Print CMS and CPU revision */ | 31 | /* Print CMS and CPU revision */ |
| @@ -85,23 +98,12 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
| 85 | #endif | 98 | #endif |
| 86 | } | 99 | } |
| 87 | 100 | ||
| 88 | static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) | ||
| 89 | { | ||
| 90 | u32 xlvl; | ||
| 91 | |||
| 92 | /* Transmeta-defined flags: level 0x80860001 */ | ||
| 93 | xlvl = cpuid_eax(0x80860000); | ||
| 94 | if ((xlvl & 0xffff0000) == 0x80860000) { | ||
| 95 | if (xlvl >= 0x80860001) | ||
| 96 | c->x86_capability[2] = cpuid_edx(0x80860001); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { | 101 | static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { |
| 101 | .c_vendor = "Transmeta", | 102 | .c_vendor = "Transmeta", |
| 102 | .c_ident = { "GenuineTMx86", "TransmetaCPU" }, | 103 | .c_ident = { "GenuineTMx86", "TransmetaCPU" }, |
| 104 | .c_early_init = early_init_transmeta, | ||
| 103 | .c_init = init_transmeta, | 105 | .c_init = init_transmeta, |
| 104 | .c_identify = transmeta_identify, | 106 | .c_x86_vendor = X86_VENDOR_TRANSMETA, |
| 105 | }; | 107 | }; |
| 106 | 108 | ||
| 107 | cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); | 109 | cpu_dev_register(transmeta_cpu_dev); |
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index b1fc90989d75..e777f79e0960 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c | |||
| @@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { | |||
| 19 | } | 19 | } |
| 20 | }, | 20 | }, |
| 21 | }, | 21 | }, |
| 22 | .c_x86_vendor = X86_VENDOR_UMC, | ||
| 22 | }; | 23 | }; |
| 23 | 24 | ||
| 24 | cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); | 25 | cpu_dev_register(umc_cpu_dev); |
| 25 | 26 | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 2de5fa2bbf77..72cefd1e649b 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
| @@ -36,7 +36,6 @@ | |||
| 36 | #include <linux/smp_lock.h> | 36 | #include <linux/smp_lock.h> |
| 37 | #include <linux/major.h> | 37 | #include <linux/major.h> |
| 38 | #include <linux/fs.h> | 38 | #include <linux/fs.h> |
| 39 | #include <linux/smp_lock.h> | ||
| 40 | #include <linux/device.h> | 39 | #include <linux/device.h> |
| 41 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
| 42 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
| @@ -89,6 +88,8 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
| 89 | struct cpuid_regs cmd; | 88 | struct cpuid_regs cmd; |
| 90 | int cpu = iminor(file->f_path.dentry->d_inode); | 89 | int cpu = iminor(file->f_path.dentry->d_inode); |
| 91 | u64 pos = *ppos; | 90 | u64 pos = *ppos; |
| 91 | ssize_t bytes = 0; | ||
| 92 | int err = 0; | ||
| 92 | 93 | ||
| 93 | if (count % 16) | 94 | if (count % 16) |
| 94 | return -EINVAL; /* Invalid chunk size */ | 95 | return -EINVAL; /* Invalid chunk size */ |
| @@ -96,14 +97,19 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
| 96 | for (; count; count -= 16) { | 97 | for (; count; count -= 16) { |
| 97 | cmd.eax = pos; | 98 | cmd.eax = pos; |
| 98 | cmd.ecx = pos >> 32; | 99 | cmd.ecx = pos >> 32; |
| 99 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); | 100 | err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); |
| 100 | if (copy_to_user(tmp, &cmd, 16)) | 101 | if (err) |
| 101 | return -EFAULT; | 102 | break; |
| 103 | if (copy_to_user(tmp, &cmd, 16)) { | ||
| 104 | err = -EFAULT; | ||
| 105 | break; | ||
| 106 | } | ||
| 102 | tmp += 16; | 107 | tmp += 16; |
| 108 | bytes += 16; | ||
| 103 | *ppos = ++pos; | 109 | *ppos = ++pos; |
| 104 | } | 110 | } |
| 105 | 111 | ||
| 106 | return tmp - buf; | 112 | return bytes ? bytes : err; |
| 107 | } | 113 | } |
| 108 | 114 | ||
| 109 | static int cpuid_open(struct inode *inode, struct file *file) | 115 | static int cpuid_open(struct inode *inode, struct file *file) |
| @@ -141,7 +147,7 @@ static __cpuinit int cpuid_device_create(int cpu) | |||
| 141 | { | 147 | { |
| 142 | struct device *dev; | 148 | struct device *dev; |
| 143 | 149 | ||
| 144 | dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), | 150 | dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), NULL, |
| 145 | "cpu%d", cpu); | 151 | "cpu%d", cpu); |
| 146 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; | 152 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; |
| 147 | } | 153 | } |
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index 72d0c56c1b48..f7cdb3b457aa 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c | |||
| @@ -13,6 +13,9 @@ | |||
| 13 | 13 | ||
| 14 | static void *kdump_buf_page; | 14 | static void *kdump_buf_page; |
| 15 | 15 | ||
| 16 | /* Stores the physical address of elf header of crash image. */ | ||
| 17 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | ||
| 18 | |||
| 16 | /** | 19 | /** |
| 17 | * copy_oldmem_page - copy one page from "oldmem" | 20 | * copy_oldmem_page - copy one page from "oldmem" |
| 18 | * @pfn: page frame number to be copied | 21 | * @pfn: page frame number to be copied |
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a46..045b36cada65 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c | |||
| @@ -7,9 +7,11 @@ | |||
| 7 | 7 | ||
| 8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
| 9 | #include <linux/crash_dump.h> | 9 | #include <linux/crash_dump.h> |
| 10 | #include <linux/uaccess.h> | ||
| 11 | #include <linux/io.h> | ||
| 10 | 12 | ||
| 11 | #include <asm/uaccess.h> | 13 | /* Stores the physical address of elf header of crash image. */ |
| 12 | #include <asm/io.h> | 14 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; |
| 13 | 15 | ||
| 14 | /** | 16 | /** |
| 15 | * copy_oldmem_page - copy one page from "oldmem" | 17 | * copy_oldmem_page - copy one page from "oldmem" |
| @@ -25,7 +27,7 @@ | |||
| 25 | * in the current kernel. We stitch up a pte, similar to kmap_atomic. | 27 | * in the current kernel. We stitch up a pte, similar to kmap_atomic. |
| 26 | */ | 28 | */ |
| 27 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | 29 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, |
| 28 | size_t csize, unsigned long offset, int userbuf) | 30 | size_t csize, unsigned long offset, int userbuf) |
| 29 | { | 31 | { |
| 30 | void *vaddr; | 32 | void *vaddr; |
| 31 | 33 | ||
| @@ -33,14 +35,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
| 33 | return 0; | 35 | return 0; |
| 34 | 36 | ||
| 35 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); | 37 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); |
| 38 | if (!vaddr) | ||
| 39 | return -ENOMEM; | ||
| 36 | 40 | ||
| 37 | if (userbuf) { | 41 | if (userbuf) { |
| 38 | if (copy_to_user(buf, (vaddr + offset), csize)) { | 42 | if (copy_to_user(buf, vaddr + offset, csize)) { |
| 39 | iounmap(vaddr); | 43 | iounmap(vaddr); |
| 40 | return -EFAULT; | 44 | return -EFAULT; |
| 41 | } | 45 | } |
| 42 | } else | 46 | } else |
| 43 | memcpy(buf, (vaddr + offset), csize); | 47 | memcpy(buf, vaddr + offset, csize); |
| 44 | 48 | ||
| 45 | iounmap(vaddr); | 49 | iounmap(vaddr); |
| 46 | return csize; | 50 | return csize; |
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index a47798b59f07..b4f14c6c09d9 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c | |||
| @@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = { | |||
| 66 | .ds = __USER_DS, | 66 | .ds = __USER_DS, |
| 67 | .fs = __KERNEL_PERCPU, | 67 | .fs = __KERNEL_PERCPU, |
| 68 | 68 | ||
| 69 | .__cr3 = __pa(swapper_pg_dir) | 69 | .__cr3 = __pa_nodebug(swapper_pg_dir), |
| 70 | } | 70 | } |
| 71 | }; | 71 | }; |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48d..2b69994fd3a8 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
| @@ -2,26 +2,49 @@ | |||
| 2 | * Debug Store support | 2 | * Debug Store support |
| 3 | * | 3 | * |
| 4 | * This provides a low-level interface to the hardware's Debug Store | 4 | * This provides a low-level interface to the hardware's Debug Store |
| 5 | * feature that is used for last branch recording (LBR) and | 5 | * feature that is used for branch trace store (BTS) and |
| 6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
| 7 | * | 7 | * |
| 8 | * Different architectures use a different DS layout/pointer size. | 8 | * It manages: |
| 9 | * The below functions therefore work on a void*. | 9 | * - per-thread and per-cpu allocation of BTS and PEBS |
| 10 | * - buffer memory allocation (optional) | ||
| 11 | * - buffer overflow handling | ||
| 12 | * - buffer access | ||
| 10 | * | 13 | * |
| 14 | * It assumes: | ||
| 15 | * - get_task_struct on all parameter tasks | ||
| 16 | * - current is allowed to trace parameter tasks | ||
| 11 | * | 17 | * |
| 12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
| 13 | * trace store, BTS) is supported. | ||
| 14 | * | 18 | * |
| 15 | * | 19 | * Copyright (C) 2007-2008 Intel Corporation. |
| 16 | * Copyright (C) 2007 Intel Corporation. | 20 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 |
| 17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
| 18 | */ | 21 | */ |
| 19 | 22 | ||
| 23 | |||
| 24 | #ifdef CONFIG_X86_DS | ||
| 25 | |||
| 20 | #include <asm/ds.h> | 26 | #include <asm/ds.h> |
| 21 | 27 | ||
| 22 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
| 23 | #include <linux/string.h> | 29 | #include <linux/string.h> |
| 24 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
| 31 | #include <linux/sched.h> | ||
| 32 | #include <linux/mm.h> | ||
| 33 | |||
| 34 | |||
| 35 | /* | ||
| 36 | * The configuration for a particular DS hardware implementation. | ||
| 37 | */ | ||
| 38 | struct ds_configuration { | ||
| 39 | /* the size of the DS structure in bytes */ | ||
| 40 | unsigned char sizeof_ds; | ||
| 41 | /* the size of one pointer-typed field in the DS structure in bytes; | ||
| 42 | this covers the first 8 fields related to buffer management. */ | ||
| 43 | unsigned char sizeof_field; | ||
| 44 | /* the size of a BTS/PEBS record in bytes */ | ||
| 45 | unsigned char sizeof_rec[2]; | ||
| 46 | }; | ||
| 47 | static struct ds_configuration ds_cfg; | ||
| 25 | 48 | ||
| 26 | 49 | ||
| 27 | /* | 50 | /* |
| @@ -44,378 +67,747 @@ | |||
| 44 | * (interrupt occurs when write pointer passes interrupt pointer) | 67 | * (interrupt occurs when write pointer passes interrupt pointer) |
| 45 | * - value to which counter is reset following counter overflow | 68 | * - value to which counter is reset following counter overflow |
| 46 | * | 69 | * |
| 47 | * On later architectures, the last branch recording hardware uses | 70 | * Later architectures use 64bit pointers throughout, whereas earlier |
| 48 | * 64bit pointers even in 32bit mode. | 71 | * architectures use 32bit pointers in 32bit mode. |
| 49 | * | ||
| 50 | * | ||
| 51 | * Branch Trace Store (BTS) records store information about control | ||
| 52 | * flow changes. They at least provide the following information: | ||
| 53 | * - source linear address | ||
| 54 | * - destination linear address | ||
| 55 | * | 72 | * |
| 56 | * Netburst supported a predicated bit that had been dropped in later | ||
| 57 | * architectures. We do not suppor it. | ||
| 58 | * | 73 | * |
| 74 | * We compute the base address for the first 8 fields based on: | ||
| 75 | * - the field size stored in the DS configuration | ||
| 76 | * - the relative field position | ||
| 77 | * - an offset giving the start of the respective region | ||
| 59 | * | 78 | * |
| 60 | * In order to abstract from the actual DS and BTS layout, we describe | 79 | * This offset is further used to index various arrays holding |
| 61 | * the access to the relevant fields. | 80 | * information for BTS and PEBS at the respective index. |
| 62 | * Thanks to Andi Kleen for proposing this design. | ||
| 63 | * | 81 | * |
| 64 | * The implementation, however, is not as general as it might seem. In | 82 | * On later 32bit processors, we only access the lower 32bit of the |
| 65 | * order to stay somewhat simple and efficient, we assume an | 83 | * 64bit pointer fields. The upper halves will be zeroed out. |
| 66 | * underlying unsigned type (mostly a pointer type) and we expect the | ||
| 67 | * field to be at least as big as that type. | ||
| 68 | */ | 84 | */ |
| 69 | 85 | ||
| 70 | /* | 86 | enum ds_field { |
| 71 | * A special from_ip address to indicate that the BTS record is an | 87 | ds_buffer_base = 0, |
| 72 | * info record that needs to be interpreted or skipped. | 88 | ds_index, |
| 73 | */ | 89 | ds_absolute_maximum, |
| 74 | #define BTS_ESCAPE_ADDRESS (-1) | 90 | ds_interrupt_threshold, |
| 91 | }; | ||
| 75 | 92 | ||
| 76 | /* | 93 | enum ds_qualifier { |
| 77 | * A field access descriptor | 94 | ds_bts = 0, |
| 78 | */ | 95 | ds_pebs |
| 79 | struct access_desc { | ||
| 80 | unsigned char offset; | ||
| 81 | unsigned char size; | ||
| 82 | }; | 96 | }; |
| 83 | 97 | ||
| 98 | static inline unsigned long ds_get(const unsigned char *base, | ||
| 99 | enum ds_qualifier qual, enum ds_field field) | ||
| 100 | { | ||
| 101 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
| 102 | return *(unsigned long *)base; | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | ||
| 106 | enum ds_field field, unsigned long value) | ||
| 107 | { | ||
| 108 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
| 109 | (*(unsigned long *)base) = value; | ||
| 110 | } | ||
| 111 | |||
| 112 | |||
| 84 | /* | 113 | /* |
| 85 | * The configuration for a particular DS/BTS hardware implementation. | 114 | * Locking is done only for allocating BTS or PEBS resources and for |
| 115 | * guarding context and buffer memory allocation. | ||
| 116 | * | ||
| 117 | * Most functions require the current task to own the ds context part | ||
| 118 | * they are going to access. All the locking is done when validating | ||
| 119 | * access to the context. | ||
| 86 | */ | 120 | */ |
| 87 | struct ds_configuration { | 121 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); |
| 88 | /* the DS configuration */ | ||
| 89 | unsigned char sizeof_ds; | ||
| 90 | struct access_desc bts_buffer_base; | ||
| 91 | struct access_desc bts_index; | ||
| 92 | struct access_desc bts_absolute_maximum; | ||
| 93 | struct access_desc bts_interrupt_threshold; | ||
| 94 | /* the BTS configuration */ | ||
| 95 | unsigned char sizeof_bts; | ||
| 96 | struct access_desc from_ip; | ||
| 97 | struct access_desc to_ip; | ||
| 98 | /* BTS variants used to store additional information like | ||
| 99 | timestamps */ | ||
| 100 | struct access_desc info_type; | ||
| 101 | struct access_desc info_data; | ||
| 102 | unsigned long debugctl_mask; | ||
| 103 | }; | ||
| 104 | 122 | ||
| 105 | /* | 123 | /* |
| 106 | * The global configuration used by the below accessor functions | 124 | * Validate that the current task is allowed to access the BTS/PEBS |
| 125 | * buffer of the parameter task. | ||
| 126 | * | ||
| 127 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
| 107 | */ | 128 | */ |
| 108 | static struct ds_configuration ds_cfg; | 129 | static inline int ds_validate_access(struct ds_context *context, |
| 130 | enum ds_qualifier qual) | ||
| 131 | { | ||
| 132 | if (!context) | ||
| 133 | return -EPERM; | ||
| 134 | |||
| 135 | if (context->owner[qual] == current) | ||
| 136 | return 0; | ||
| 137 | |||
| 138 | return -EPERM; | ||
| 139 | } | ||
| 140 | |||
| 109 | 141 | ||
| 110 | /* | 142 | /* |
| 111 | * Accessor functions for some DS and BTS fields using the above | 143 | * We either support (system-wide) per-cpu or per-thread allocation. |
| 112 | * global ptrace_bts_cfg. | 144 | * We distinguish the two based on the task_struct pointer, where a |
| 145 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
| 146 | * | ||
| 147 | * Allocations are use-counted. As soon as resources are allocated, | ||
| 148 | * further allocations must be of the same type (per-cpu or | ||
| 149 | * per-thread). We model this by counting allocations (i.e. the number | ||
| 150 | * of tracers of a certain type) for one type negatively: | ||
| 151 | * =0 no tracers | ||
| 152 | * >0 number of per-thread tracers | ||
| 153 | * <0 number of per-cpu tracers | ||
| 154 | * | ||
| 155 | * The below functions to get and put tracers and to check the | ||
| 156 | * allocation type require the ds_lock to be held by the caller. | ||
| 157 | * | ||
| 158 | * Tracers essentially gives the number of ds contexts for a certain | ||
| 159 | * type of allocation. | ||
| 113 | */ | 160 | */ |
| 114 | static inline unsigned long get_bts_buffer_base(char *base) | 161 | static long tracers; |
| 162 | |||
| 163 | static inline void get_tracer(struct task_struct *task) | ||
| 115 | { | 164 | { |
| 116 | return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); | 165 | tracers += (task ? 1 : -1); |
| 117 | } | 166 | } |
| 118 | static inline void set_bts_buffer_base(char *base, unsigned long value) | 167 | |
| 168 | static inline void put_tracer(struct task_struct *task) | ||
| 119 | { | 169 | { |
| 120 | (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; | 170 | tracers -= (task ? 1 : -1); |
| 121 | } | 171 | } |
| 122 | static inline unsigned long get_bts_index(char *base) | 172 | |
| 173 | static inline int check_tracer(struct task_struct *task) | ||
| 123 | { | 174 | { |
| 124 | return *(unsigned long *)(base + ds_cfg.bts_index.offset); | 175 | return (task ? (tracers >= 0) : (tracers <= 0)); |
| 125 | } | 176 | } |
| 126 | static inline void set_bts_index(char *base, unsigned long value) | 177 | |
| 178 | |||
| 179 | /* | ||
| 180 | * The DS context is either attached to a thread or to a cpu: | ||
| 181 | * - in the former case, the thread_struct contains a pointer to the | ||
| 182 | * attached context. | ||
| 183 | * - in the latter case, we use a static array of per-cpu context | ||
| 184 | * pointers. | ||
| 185 | * | ||
| 186 | * Contexts are use-counted. They are allocated on first access and | ||
| 187 | * deallocated when the last user puts the context. | ||
| 188 | * | ||
| 189 | * We distinguish between an allocating and a non-allocating get of a | ||
| 190 | * context: | ||
| 191 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
| 192 | * requires the caller to hold the global ds_lock. | ||
| 193 | * - the non-allocating get is used for all other cases. A | ||
| 194 | * non-existing context indicates an error. It acquires and releases | ||
| 195 | * the ds_lock itself for obtaining the context. | ||
| 196 | * | ||
| 197 | * A context and its DS configuration are allocated and deallocated | ||
| 198 | * together. A context always has a DS configuration of the | ||
| 199 | * appropriate size. | ||
| 200 | */ | ||
| 201 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
| 202 | |||
| 203 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Returns the pointer to the parameter task's context or to the | ||
| 207 | * system-wide context, if task is NULL. | ||
| 208 | * | ||
| 209 | * Increases the use count of the returned context, if not NULL. | ||
| 210 | */ | ||
| 211 | static inline struct ds_context *ds_get_context(struct task_struct *task) | ||
| 127 | { | 212 | { |
| 128 | (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; | 213 | struct ds_context *context; |
| 214 | |||
| 215 | spin_lock(&ds_lock); | ||
| 216 | |||
| 217 | context = (task ? task->thread.ds_ctx : this_system_context); | ||
| 218 | if (context) | ||
| 219 | context->count++; | ||
| 220 | |||
| 221 | spin_unlock(&ds_lock); | ||
| 222 | |||
| 223 | return context; | ||
| 129 | } | 224 | } |
| 130 | static inline unsigned long get_bts_absolute_maximum(char *base) | 225 | |
| 226 | /* | ||
| 227 | * Same as ds_get_context, but allocates the context and it's DS | ||
| 228 | * structure, if necessary; returns NULL; if out of memory. | ||
| 229 | * | ||
| 230 | * pre: requires ds_lock to be held | ||
| 231 | */ | ||
| 232 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
| 131 | { | 233 | { |
| 132 | return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); | 234 | struct ds_context **p_context = |
| 235 | (task ? &task->thread.ds_ctx : &this_system_context); | ||
| 236 | struct ds_context *context = *p_context; | ||
| 237 | |||
| 238 | if (!context) { | ||
| 239 | context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
| 240 | |||
| 241 | if (!context) | ||
| 242 | return NULL; | ||
| 243 | |||
| 244 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
| 245 | if (!context->ds) { | ||
| 246 | kfree(context); | ||
| 247 | return NULL; | ||
| 248 | } | ||
| 249 | |||
| 250 | *p_context = context; | ||
| 251 | |||
| 252 | context->this = p_context; | ||
| 253 | context->task = task; | ||
| 254 | |||
| 255 | if (task) | ||
| 256 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
| 257 | |||
| 258 | if (!task || (task == current)) | ||
| 259 | wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); | ||
| 260 | |||
| 261 | get_tracer(task); | ||
| 262 | } | ||
| 263 | |||
| 264 | context->count++; | ||
| 265 | |||
| 266 | return context; | ||
| 133 | } | 267 | } |
| 134 | static inline void set_bts_absolute_maximum(char *base, unsigned long value) | 268 | |
| 269 | /* | ||
| 270 | * Decreases the use count of the parameter context, if not NULL. | ||
| 271 | * Deallocates the context, if the use count reaches zero. | ||
| 272 | */ | ||
| 273 | static inline void ds_put_context(struct ds_context *context) | ||
| 135 | { | 274 | { |
| 136 | (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; | 275 | if (!context) |
| 276 | return; | ||
| 277 | |||
| 278 | spin_lock(&ds_lock); | ||
| 279 | |||
| 280 | if (--context->count) | ||
| 281 | goto out; | ||
| 282 | |||
| 283 | *(context->this) = NULL; | ||
| 284 | |||
| 285 | if (context->task) | ||
| 286 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
| 287 | |||
| 288 | if (!context->task || (context->task == current)) | ||
| 289 | wrmsrl(MSR_IA32_DS_AREA, 0); | ||
| 290 | |||
| 291 | put_tracer(context->task); | ||
| 292 | |||
| 293 | /* free any leftover buffers from tracers that did not | ||
| 294 | * deallocate them properly. */ | ||
| 295 | kfree(context->buffer[ds_bts]); | ||
| 296 | kfree(context->buffer[ds_pebs]); | ||
| 297 | kfree(context->ds); | ||
| 298 | kfree(context); | ||
| 299 | out: | ||
| 300 | spin_unlock(&ds_lock); | ||
| 137 | } | 301 | } |
| 138 | static inline unsigned long get_bts_interrupt_threshold(char *base) | 302 | |
| 303 | |||
| 304 | /* | ||
| 305 | * Handle a buffer overflow | ||
| 306 | * | ||
| 307 | * task: the task whose buffers are overflowing; | ||
| 308 | * NULL for a buffer overflow on the current cpu | ||
| 309 | * context: the ds context | ||
| 310 | * qual: the buffer type | ||
| 311 | */ | ||
| 312 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | ||
| 313 | enum ds_qualifier qual) | ||
| 139 | { | 314 | { |
| 140 | return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); | 315 | if (!context) |
| 316 | return; | ||
| 317 | |||
| 318 | if (context->callback[qual]) | ||
| 319 | (*context->callback[qual])(task); | ||
| 320 | |||
| 321 | /* todo: do some more overflow handling */ | ||
| 141 | } | 322 | } |
| 142 | static inline void set_bts_interrupt_threshold(char *base, unsigned long value) | 323 | |
| 324 | |||
| 325 | /* | ||
| 326 | * Allocate a non-pageable buffer of the parameter size. | ||
| 327 | * Checks the memory and the locked memory rlimit. | ||
| 328 | * | ||
| 329 | * Returns the buffer, if successful; | ||
| 330 | * NULL, if out of memory or rlimit exceeded. | ||
| 331 | * | ||
| 332 | * size: the requested buffer size in bytes | ||
| 333 | * pages (out): if not NULL, contains the number of pages reserved | ||
| 334 | */ | ||
| 335 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | ||
| 143 | { | 336 | { |
| 144 | (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; | 337 | unsigned long rlim, vm, pgsz; |
| 338 | void *buffer; | ||
| 339 | |||
| 340 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
| 341 | |||
| 342 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
| 343 | vm = current->mm->total_vm + pgsz; | ||
| 344 | if (rlim < vm) | ||
| 345 | return NULL; | ||
| 346 | |||
| 347 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
| 348 | vm = current->mm->locked_vm + pgsz; | ||
| 349 | if (rlim < vm) | ||
| 350 | return NULL; | ||
| 351 | |||
| 352 | buffer = kzalloc(size, GFP_KERNEL); | ||
| 353 | if (!buffer) | ||
| 354 | return NULL; | ||
| 355 | |||
| 356 | current->mm->total_vm += pgsz; | ||
| 357 | current->mm->locked_vm += pgsz; | ||
| 358 | |||
| 359 | if (pages) | ||
| 360 | *pages = pgsz; | ||
| 361 | |||
| 362 | return buffer; | ||
| 145 | } | 363 | } |
| 146 | static inline unsigned long get_from_ip(char *base) | 364 | |
| 365 | static int ds_request(struct task_struct *task, void *base, size_t size, | ||
| 366 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | ||
| 147 | { | 367 | { |
| 148 | return *(unsigned long *)(base + ds_cfg.from_ip.offset); | 368 | struct ds_context *context; |
| 369 | unsigned long buffer, adj; | ||
| 370 | const unsigned long alignment = (1 << 3); | ||
| 371 | int error = 0; | ||
| 372 | |||
| 373 | if (!ds_cfg.sizeof_ds) | ||
| 374 | return -EOPNOTSUPP; | ||
| 375 | |||
| 376 | /* we require some space to do alignment adjustments below */ | ||
| 377 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | ||
| 378 | return -EINVAL; | ||
| 379 | |||
| 380 | /* buffer overflow notification is not yet implemented */ | ||
| 381 | if (ovfl) | ||
| 382 | return -EOPNOTSUPP; | ||
| 383 | |||
| 384 | |||
| 385 | spin_lock(&ds_lock); | ||
| 386 | |||
| 387 | if (!check_tracer(task)) | ||
| 388 | return -EPERM; | ||
| 389 | |||
| 390 | error = -ENOMEM; | ||
| 391 | context = ds_alloc_context(task); | ||
| 392 | if (!context) | ||
| 393 | goto out_unlock; | ||
| 394 | |||
| 395 | error = -EALREADY; | ||
| 396 | if (context->owner[qual] == current) | ||
| 397 | goto out_unlock; | ||
| 398 | error = -EPERM; | ||
| 399 | if (context->owner[qual] != NULL) | ||
| 400 | goto out_unlock; | ||
| 401 | context->owner[qual] = current; | ||
| 402 | |||
| 403 | spin_unlock(&ds_lock); | ||
| 404 | |||
| 405 | |||
| 406 | error = -ENOMEM; | ||
| 407 | if (!base) { | ||
| 408 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
| 409 | if (!base) | ||
| 410 | goto out_release; | ||
| 411 | |||
| 412 | context->buffer[qual] = base; | ||
| 413 | } | ||
| 414 | error = 0; | ||
| 415 | |||
| 416 | context->callback[qual] = ovfl; | ||
| 417 | |||
| 418 | /* adjust the buffer address and size to meet alignment | ||
| 419 | * constraints: | ||
| 420 | * - buffer is double-word aligned | ||
| 421 | * - size is multiple of record size | ||
| 422 | * | ||
| 423 | * We checked the size at the very beginning; we have enough | ||
| 424 | * space to do the adjustment. | ||
| 425 | */ | ||
| 426 | buffer = (unsigned long)base; | ||
| 427 | |||
| 428 | adj = ALIGN(buffer, alignment) - buffer; | ||
| 429 | buffer += adj; | ||
| 430 | size -= adj; | ||
| 431 | |||
| 432 | size /= ds_cfg.sizeof_rec[qual]; | ||
| 433 | size *= ds_cfg.sizeof_rec[qual]; | ||
| 434 | |||
| 435 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
| 436 | ds_set(context->ds, qual, ds_index, buffer); | ||
| 437 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
| 438 | |||
| 439 | if (ovfl) { | ||
| 440 | /* todo: select a suitable interrupt threshold */ | ||
| 441 | } else | ||
| 442 | ds_set(context->ds, qual, | ||
| 443 | ds_interrupt_threshold, buffer + size + 1); | ||
| 444 | |||
| 445 | /* we keep the context until ds_release */ | ||
| 446 | return error; | ||
| 447 | |||
| 448 | out_release: | ||
| 449 | context->owner[qual] = NULL; | ||
| 450 | ds_put_context(context); | ||
| 451 | return error; | ||
| 452 | |||
| 453 | out_unlock: | ||
| 454 | spin_unlock(&ds_lock); | ||
| 455 | ds_put_context(context); | ||
| 456 | return error; | ||
| 149 | } | 457 | } |
| 150 | static inline void set_from_ip(char *base, unsigned long value) | 458 | |
| 459 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
| 460 | ds_ovfl_callback_t ovfl) | ||
| 151 | { | 461 | { |
| 152 | (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; | 462 | return ds_request(task, base, size, ovfl, ds_bts); |
| 153 | } | 463 | } |
| 154 | static inline unsigned long get_to_ip(char *base) | 464 | |
| 465 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
| 466 | ds_ovfl_callback_t ovfl) | ||
| 155 | { | 467 | { |
| 156 | return *(unsigned long *)(base + ds_cfg.to_ip.offset); | 468 | return ds_request(task, base, size, ovfl, ds_pebs); |
| 157 | } | 469 | } |
| 158 | static inline void set_to_ip(char *base, unsigned long value) | 470 | |
| 471 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
| 159 | { | 472 | { |
| 160 | (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; | 473 | struct ds_context *context; |
| 474 | int error; | ||
| 475 | |||
| 476 | context = ds_get_context(task); | ||
| 477 | error = ds_validate_access(context, qual); | ||
| 478 | if (error < 0) | ||
| 479 | goto out; | ||
| 480 | |||
| 481 | kfree(context->buffer[qual]); | ||
| 482 | context->buffer[qual] = NULL; | ||
| 483 | |||
| 484 | current->mm->total_vm -= context->pages[qual]; | ||
| 485 | current->mm->locked_vm -= context->pages[qual]; | ||
| 486 | context->pages[qual] = 0; | ||
| 487 | context->owner[qual] = NULL; | ||
| 488 | |||
| 489 | /* | ||
| 490 | * we put the context twice: | ||
| 491 | * once for the ds_get_context | ||
| 492 | * once for the corresponding ds_request | ||
| 493 | */ | ||
| 494 | ds_put_context(context); | ||
| 495 | out: | ||
| 496 | ds_put_context(context); | ||
| 497 | return error; | ||
| 161 | } | 498 | } |
| 162 | static inline unsigned char get_info_type(char *base) | 499 | |
| 500 | int ds_release_bts(struct task_struct *task) | ||
| 163 | { | 501 | { |
| 164 | return *(unsigned char *)(base + ds_cfg.info_type.offset); | 502 | return ds_release(task, ds_bts); |
| 165 | } | 503 | } |
| 166 | static inline void set_info_type(char *base, unsigned char value) | 504 | |
| 505 | int ds_release_pebs(struct task_struct *task) | ||
| 167 | { | 506 | { |
| 168 | (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; | 507 | return ds_release(task, ds_pebs); |
| 169 | } | 508 | } |
| 170 | static inline unsigned long get_info_data(char *base) | 509 | |
| 510 | static int ds_get_index(struct task_struct *task, size_t *pos, | ||
| 511 | enum ds_qualifier qual) | ||
| 171 | { | 512 | { |
| 172 | return *(unsigned long *)(base + ds_cfg.info_data.offset); | 513 | struct ds_context *context; |
| 514 | unsigned long base, index; | ||
| 515 | int error; | ||
| 516 | |||
| 517 | context = ds_get_context(task); | ||
| 518 | error = ds_validate_access(context, qual); | ||
| 519 | if (error < 0) | ||
| 520 | goto out; | ||
| 521 | |||
| 522 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 523 | index = ds_get(context->ds, qual, ds_index); | ||
| 524 | |||
| 525 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | ||
| 526 | if (pos) | ||
| 527 | *pos = error; | ||
| 528 | out: | ||
| 529 | ds_put_context(context); | ||
| 530 | return error; | ||
| 173 | } | 531 | } |
| 174 | static inline void set_info_data(char *base, unsigned long value) | 532 | |
| 533 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | ||
| 175 | { | 534 | { |
| 176 | (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; | 535 | return ds_get_index(task, pos, ds_bts); |
| 177 | } | 536 | } |
| 178 | 537 | ||
| 538 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
| 539 | { | ||
| 540 | return ds_get_index(task, pos, ds_pebs); | ||
| 541 | } | ||
| 179 | 542 | ||
| 180 | int ds_allocate(void **dsp, size_t bts_size_in_bytes) | 543 | static int ds_get_end(struct task_struct *task, size_t *pos, |
| 544 | enum ds_qualifier qual) | ||
| 181 | { | 545 | { |
| 182 | size_t bts_size_in_records; | 546 | struct ds_context *context; |
| 183 | unsigned long bts; | 547 | unsigned long base, end; |
| 184 | void *ds; | 548 | int error; |
| 549 | |||
| 550 | context = ds_get_context(task); | ||
| 551 | error = ds_validate_access(context, qual); | ||
| 552 | if (error < 0) | ||
| 553 | goto out; | ||
| 554 | |||
| 555 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 556 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 557 | |||
| 558 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | ||
| 559 | if (pos) | ||
| 560 | *pos = error; | ||
| 561 | out: | ||
| 562 | ds_put_context(context); | ||
| 563 | return error; | ||
| 564 | } | ||
| 185 | 565 | ||
| 186 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 566 | int ds_get_bts_end(struct task_struct *task, size_t *pos) |
| 187 | return -EOPNOTSUPP; | 567 | { |
| 568 | return ds_get_end(task, pos, ds_bts); | ||
| 569 | } | ||
| 188 | 570 | ||
| 189 | if (bts_size_in_bytes < 0) | 571 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) |
| 190 | return -EINVAL; | 572 | { |
| 573 | return ds_get_end(task, pos, ds_pebs); | ||
| 574 | } | ||
| 191 | 575 | ||
| 192 | bts_size_in_records = | 576 | static int ds_access(struct task_struct *task, size_t index, |
| 193 | bts_size_in_bytes / ds_cfg.sizeof_bts; | 577 | const void **record, enum ds_qualifier qual) |
| 194 | bts_size_in_bytes = | 578 | { |
| 195 | bts_size_in_records * ds_cfg.sizeof_bts; | 579 | struct ds_context *context; |
| 580 | unsigned long base, idx; | ||
| 581 | int error; | ||
| 196 | 582 | ||
| 197 | if (bts_size_in_bytes <= 0) | 583 | if (!record) |
| 198 | return -EINVAL; | 584 | return -EINVAL; |
| 199 | 585 | ||
| 200 | bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); | 586 | context = ds_get_context(task); |
| 201 | 587 | error = ds_validate_access(context, qual); | |
| 202 | if (!bts) | 588 | if (error < 0) |
| 203 | return -ENOMEM; | 589 | goto out; |
| 204 | 590 | ||
| 205 | ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | 591 | base = ds_get(context->ds, qual, ds_buffer_base); |
| 592 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
| 206 | 593 | ||
| 207 | if (!ds) { | 594 | error = -EINVAL; |
| 208 | kfree((void *)bts); | 595 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) |
| 209 | return -ENOMEM; | 596 | goto out; |
| 210 | } | ||
| 211 | |||
| 212 | set_bts_buffer_base(ds, bts); | ||
| 213 | set_bts_index(ds, bts); | ||
| 214 | set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); | ||
| 215 | set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); | ||
| 216 | 597 | ||
| 217 | *dsp = ds; | 598 | *record = (const void *)idx; |
| 218 | return 0; | 599 | error = ds_cfg.sizeof_rec[qual]; |
| 600 | out: | ||
| 601 | ds_put_context(context); | ||
| 602 | return error; | ||
| 219 | } | 603 | } |
| 220 | 604 | ||
| 221 | int ds_free(void **dsp) | 605 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) |
| 222 | { | 606 | { |
| 223 | if (*dsp) { | 607 | return ds_access(task, index, record, ds_bts); |
| 224 | kfree((void *)get_bts_buffer_base(*dsp)); | ||
| 225 | kfree(*dsp); | ||
| 226 | *dsp = NULL; | ||
| 227 | } | ||
| 228 | return 0; | ||
| 229 | } | 608 | } |
| 230 | 609 | ||
| 231 | int ds_get_bts_size(void *ds) | 610 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) |
| 232 | { | 611 | { |
| 233 | int size_in_bytes; | 612 | return ds_access(task, index, record, ds_pebs); |
| 234 | |||
| 235 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
| 236 | return -EOPNOTSUPP; | ||
| 237 | |||
| 238 | if (!ds) | ||
| 239 | return 0; | ||
| 240 | |||
| 241 | size_in_bytes = | ||
| 242 | get_bts_absolute_maximum(ds) - | ||
| 243 | get_bts_buffer_base(ds); | ||
| 244 | return size_in_bytes; | ||
| 245 | } | 613 | } |
| 246 | 614 | ||
| 247 | int ds_get_bts_end(void *ds) | 615 | static int ds_write(struct task_struct *task, const void *record, size_t size, |
| 616 | enum ds_qualifier qual, int force) | ||
| 248 | { | 617 | { |
| 249 | int size_in_bytes = ds_get_bts_size(ds); | 618 | struct ds_context *context; |
| 250 | 619 | int error; | |
| 251 | if (size_in_bytes <= 0) | ||
| 252 | return size_in_bytes; | ||
| 253 | 620 | ||
| 254 | return size_in_bytes / ds_cfg.sizeof_bts; | 621 | if (!record) |
| 255 | } | 622 | return -EINVAL; |
| 256 | 623 | ||
| 257 | int ds_get_bts_index(void *ds) | 624 | error = -EPERM; |
| 258 | { | 625 | context = ds_get_context(task); |
| 259 | int index_offset_in_bytes; | 626 | if (!context) |
| 627 | goto out; | ||
| 260 | 628 | ||
| 261 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 629 | if (!force) { |
| 262 | return -EOPNOTSUPP; | 630 | error = ds_validate_access(context, qual); |
| 631 | if (error < 0) | ||
| 632 | goto out; | ||
| 633 | } | ||
| 263 | 634 | ||
| 264 | index_offset_in_bytes = | 635 | error = 0; |
| 265 | get_bts_index(ds) - | 636 | while (size) { |
| 266 | get_bts_buffer_base(ds); | 637 | unsigned long base, index, end, write_end, int_th; |
| 638 | unsigned long write_size, adj_write_size; | ||
| 639 | |||
| 640 | /* | ||
| 641 | * write as much as possible without producing an | ||
| 642 | * overflow interrupt. | ||
| 643 | * | ||
| 644 | * interrupt_threshold must either be | ||
| 645 | * - bigger than absolute_maximum or | ||
| 646 | * - point to a record between buffer_base and absolute_maximum | ||
| 647 | * | ||
| 648 | * index points to a valid record. | ||
| 649 | */ | ||
| 650 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 651 | index = ds_get(context->ds, qual, ds_index); | ||
| 652 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 653 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 654 | |||
| 655 | write_end = min(end, int_th); | ||
| 656 | |||
| 657 | /* if we are already beyond the interrupt threshold, | ||
| 658 | * we fill the entire buffer */ | ||
| 659 | if (write_end <= index) | ||
| 660 | write_end = end; | ||
| 661 | |||
| 662 | if (write_end <= index) | ||
| 663 | goto out; | ||
| 664 | |||
| 665 | write_size = min((unsigned long) size, write_end - index); | ||
| 666 | memcpy((void *)index, record, write_size); | ||
| 667 | |||
| 668 | record = (const char *)record + write_size; | ||
| 669 | size -= write_size; | ||
| 670 | error += write_size; | ||
| 671 | |||
| 672 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
| 673 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
| 674 | |||
| 675 | /* zero out trailing bytes */ | ||
| 676 | memset((char *)index + write_size, 0, | ||
| 677 | adj_write_size - write_size); | ||
| 678 | index += adj_write_size; | ||
| 679 | |||
| 680 | if (index >= end) | ||
| 681 | index = base; | ||
| 682 | ds_set(context->ds, qual, ds_index, index); | ||
| 683 | |||
| 684 | if (index >= int_th) | ||
| 685 | ds_overflow(task, context, qual); | ||
| 686 | } | ||
| 267 | 687 | ||
| 268 | return index_offset_in_bytes / ds_cfg.sizeof_bts; | 688 | out: |
| 689 | ds_put_context(context); | ||
| 690 | return error; | ||
| 269 | } | 691 | } |
| 270 | 692 | ||
| 271 | int ds_set_overflow(void *ds, int method) | 693 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) |
| 272 | { | 694 | { |
| 273 | switch (method) { | 695 | return ds_write(task, record, size, ds_bts, /* force = */ 0); |
| 274 | case DS_O_SIGNAL: | ||
| 275 | return -EOPNOTSUPP; | ||
| 276 | case DS_O_WRAP: | ||
| 277 | return 0; | ||
| 278 | default: | ||
| 279 | return -EINVAL; | ||
| 280 | } | ||
| 281 | } | 696 | } |
| 282 | 697 | ||
| 283 | int ds_get_overflow(void *ds) | 698 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) |
| 284 | { | 699 | { |
| 285 | return DS_O_WRAP; | 700 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); |
| 286 | } | 701 | } |
| 287 | 702 | ||
| 288 | int ds_clear(void *ds) | 703 | int ds_unchecked_write_bts(struct task_struct *task, |
| 704 | const void *record, size_t size) | ||
| 289 | { | 705 | { |
| 290 | int bts_size = ds_get_bts_size(ds); | 706 | return ds_write(task, record, size, ds_bts, /* force = */ 1); |
| 291 | unsigned long bts_base; | ||
| 292 | |||
| 293 | if (bts_size <= 0) | ||
| 294 | return bts_size; | ||
| 295 | |||
| 296 | bts_base = get_bts_buffer_base(ds); | ||
| 297 | memset((void *)bts_base, 0, bts_size); | ||
| 298 | |||
| 299 | set_bts_index(ds, bts_base); | ||
| 300 | return 0; | ||
| 301 | } | 707 | } |
| 302 | 708 | ||
| 303 | int ds_read_bts(void *ds, int index, struct bts_struct *out) | 709 | int ds_unchecked_write_pebs(struct task_struct *task, |
| 710 | const void *record, size_t size) | ||
| 304 | { | 711 | { |
| 305 | void *bts; | 712 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); |
| 713 | } | ||
| 306 | 714 | ||
| 307 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 715 | static int ds_reset_or_clear(struct task_struct *task, |
| 308 | return -EOPNOTSUPP; | 716 | enum ds_qualifier qual, int clear) |
| 717 | { | ||
| 718 | struct ds_context *context; | ||
| 719 | unsigned long base, end; | ||
| 720 | int error; | ||
| 309 | 721 | ||
| 310 | if (index < 0) | 722 | context = ds_get_context(task); |
| 311 | return -EINVAL; | 723 | error = ds_validate_access(context, qual); |
| 724 | if (error < 0) | ||
| 725 | goto out; | ||
| 312 | 726 | ||
| 313 | if (index >= ds_get_bts_size(ds)) | 727 | base = ds_get(context->ds, qual, ds_buffer_base); |
| 314 | return -EINVAL; | 728 | end = ds_get(context->ds, qual, ds_absolute_maximum); |
| 315 | 729 | ||
| 316 | bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); | 730 | if (clear) |
| 731 | memset((void *)base, 0, end - base); | ||
| 317 | 732 | ||
| 318 | memset(out, 0, sizeof(*out)); | 733 | ds_set(context->ds, qual, ds_index, base); |
| 319 | if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { | ||
| 320 | out->qualifier = get_info_type(bts); | ||
| 321 | out->variant.jiffies = get_info_data(bts); | ||
| 322 | } else { | ||
| 323 | out->qualifier = BTS_BRANCH; | ||
| 324 | out->variant.lbr.from_ip = get_from_ip(bts); | ||
| 325 | out->variant.lbr.to_ip = get_to_ip(bts); | ||
| 326 | } | ||
| 327 | 734 | ||
| 328 | return sizeof(*out);; | 735 | error = 0; |
| 736 | out: | ||
| 737 | ds_put_context(context); | ||
| 738 | return error; | ||
| 329 | } | 739 | } |
| 330 | 740 | ||
| 331 | int ds_write_bts(void *ds, const struct bts_struct *in) | 741 | int ds_reset_bts(struct task_struct *task) |
| 332 | { | 742 | { |
| 333 | unsigned long bts; | 743 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); |
| 334 | 744 | } | |
| 335 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
| 336 | return -EOPNOTSUPP; | ||
| 337 | |||
| 338 | if (ds_get_bts_size(ds) <= 0) | ||
| 339 | return -ENXIO; | ||
| 340 | 745 | ||
| 341 | bts = get_bts_index(ds); | 746 | int ds_reset_pebs(struct task_struct *task) |
| 747 | { | ||
| 748 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | ||
| 749 | } | ||
| 342 | 750 | ||
| 343 | memset((void *)bts, 0, ds_cfg.sizeof_bts); | 751 | int ds_clear_bts(struct task_struct *task) |
| 344 | switch (in->qualifier) { | 752 | { |
| 345 | case BTS_INVALID: | 753 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); |
| 346 | break; | 754 | } |
| 347 | 755 | ||
| 348 | case BTS_BRANCH: | 756 | int ds_clear_pebs(struct task_struct *task) |
| 349 | set_from_ip((void *)bts, in->variant.lbr.from_ip); | 757 | { |
| 350 | set_to_ip((void *)bts, in->variant.lbr.to_ip); | 758 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); |
| 351 | break; | 759 | } |
| 352 | 760 | ||
| 353 | case BTS_TASK_ARRIVES: | 761 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) |
| 354 | case BTS_TASK_DEPARTS: | 762 | { |
| 355 | set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); | 763 | struct ds_context *context; |
| 356 | set_info_type((void *)bts, in->qualifier); | 764 | int error; |
| 357 | set_info_data((void *)bts, in->variant.jiffies); | ||
| 358 | break; | ||
| 359 | 765 | ||
| 360 | default: | 766 | if (!value) |
| 361 | return -EINVAL; | 767 | return -EINVAL; |
| 362 | } | ||
| 363 | 768 | ||
| 364 | bts = bts + ds_cfg.sizeof_bts; | 769 | context = ds_get_context(task); |
| 365 | if (bts >= get_bts_absolute_maximum(ds)) | 770 | error = ds_validate_access(context, ds_pebs); |
| 366 | bts = get_bts_buffer_base(ds); | 771 | if (error < 0) |
| 367 | set_bts_index(ds, bts); | 772 | goto out; |
| 368 | 773 | ||
| 369 | return ds_cfg.sizeof_bts; | 774 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); |
| 775 | |||
| 776 | error = 0; | ||
| 777 | out: | ||
| 778 | ds_put_context(context); | ||
| 779 | return error; | ||
| 370 | } | 780 | } |
| 371 | 781 | ||
| 372 | unsigned long ds_debugctl_mask(void) | 782 | int ds_set_pebs_reset(struct task_struct *task, u64 value) |
| 373 | { | 783 | { |
| 374 | return ds_cfg.debugctl_mask; | 784 | struct ds_context *context; |
| 375 | } | 785 | int error; |
| 376 | 786 | ||
| 377 | #ifdef __i386__ | 787 | context = ds_get_context(task); |
| 378 | static const struct ds_configuration ds_cfg_netburst = { | 788 | error = ds_validate_access(context, ds_pebs); |
| 379 | .sizeof_ds = 9 * 4, | 789 | if (error < 0) |
| 380 | .bts_buffer_base = { 0, 4 }, | 790 | goto out; |
| 381 | .bts_index = { 4, 4 }, | ||
| 382 | .bts_absolute_maximum = { 8, 4 }, | ||
| 383 | .bts_interrupt_threshold = { 12, 4 }, | ||
| 384 | .sizeof_bts = 3 * 4, | ||
| 385 | .from_ip = { 0, 4 }, | ||
| 386 | .to_ip = { 4, 4 }, | ||
| 387 | .info_type = { 4, 1 }, | ||
| 388 | .info_data = { 8, 4 }, | ||
| 389 | .debugctl_mask = (1<<2)|(1<<3) | ||
| 390 | }; | ||
| 391 | 791 | ||
| 392 | static const struct ds_configuration ds_cfg_pentium_m = { | 792 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; |
| 393 | .sizeof_ds = 9 * 4, | 793 | |
| 394 | .bts_buffer_base = { 0, 4 }, | 794 | error = 0; |
| 395 | .bts_index = { 4, 4 }, | 795 | out: |
| 396 | .bts_absolute_maximum = { 8, 4 }, | 796 | ds_put_context(context); |
| 397 | .bts_interrupt_threshold = { 12, 4 }, | 797 | return error; |
| 398 | .sizeof_bts = 3 * 4, | 798 | } |
| 399 | .from_ip = { 0, 4 }, | 799 | |
| 400 | .to_ip = { 4, 4 }, | 800 | static const struct ds_configuration ds_cfg_var = { |
| 401 | .info_type = { 4, 1 }, | 801 | .sizeof_ds = sizeof(long) * 12, |
| 402 | .info_data = { 8, 4 }, | 802 | .sizeof_field = sizeof(long), |
| 403 | .debugctl_mask = (1<<6)|(1<<7) | 803 | .sizeof_rec[ds_bts] = sizeof(long) * 3, |
| 804 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | ||
| 404 | }; | 805 | }; |
| 405 | #endif /* _i386_ */ | 806 | static const struct ds_configuration ds_cfg_64 = { |
| 406 | 807 | .sizeof_ds = 8 * 12, | |
| 407 | static const struct ds_configuration ds_cfg_core2 = { | 808 | .sizeof_field = 8, |
| 408 | .sizeof_ds = 9 * 8, | 809 | .sizeof_rec[ds_bts] = 8 * 3, |
| 409 | .bts_buffer_base = { 0, 8 }, | 810 | .sizeof_rec[ds_pebs] = 8 * 10 |
| 410 | .bts_index = { 8, 8 }, | ||
| 411 | .bts_absolute_maximum = { 16, 8 }, | ||
| 412 | .bts_interrupt_threshold = { 24, 8 }, | ||
| 413 | .sizeof_bts = 3 * 8, | ||
| 414 | .from_ip = { 0, 8 }, | ||
| 415 | .to_ip = { 8, 8 }, | ||
| 416 | .info_type = { 8, 1 }, | ||
| 417 | .info_data = { 16, 8 }, | ||
| 418 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
| 419 | }; | 811 | }; |
| 420 | 812 | ||
| 421 | static inline void | 813 | static inline void |
| @@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 429 | switch (c->x86) { | 821 | switch (c->x86) { |
| 430 | case 0x6: | 822 | case 0x6: |
| 431 | switch (c->x86_model) { | 823 | switch (c->x86_model) { |
| 432 | #ifdef __i386__ | ||
| 433 | case 0xD: | 824 | case 0xD: |
| 434 | case 0xE: /* Pentium M */ | 825 | case 0xE: /* Pentium M */ |
| 435 | ds_configure(&ds_cfg_pentium_m); | 826 | ds_configure(&ds_cfg_var); |
| 436 | break; | 827 | break; |
| 437 | #endif /* _i386_ */ | ||
| 438 | case 0xF: /* Core2 */ | 828 | case 0xF: /* Core2 */ |
| 439 | ds_configure(&ds_cfg_core2); | 829 | case 0x1C: /* Atom */ |
| 830 | ds_configure(&ds_cfg_64); | ||
| 440 | break; | 831 | break; |
| 441 | default: | 832 | default: |
| 442 | /* sorry, don't know about them */ | 833 | /* sorry, don't know about them */ |
| @@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 445 | break; | 836 | break; |
| 446 | case 0xF: | 837 | case 0xF: |
| 447 | switch (c->x86_model) { | 838 | switch (c->x86_model) { |
| 448 | #ifdef __i386__ | ||
| 449 | case 0x0: | 839 | case 0x0: |
| 450 | case 0x1: | 840 | case 0x1: |
| 451 | case 0x2: /* Netburst */ | 841 | case 0x2: /* Netburst */ |
| 452 | ds_configure(&ds_cfg_netburst); | 842 | ds_configure(&ds_cfg_var); |
| 453 | break; | 843 | break; |
| 454 | #endif /* _i386_ */ | ||
| 455 | default: | 844 | default: |
| 456 | /* sorry, don't know about them */ | 845 | /* sorry, don't know about them */ |
| 457 | break; | 846 | break; |
| @@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 462 | break; | 851 | break; |
| 463 | } | 852 | } |
| 464 | } | 853 | } |
| 854 | |||
| 855 | void ds_free(struct ds_context *context) | ||
| 856 | { | ||
| 857 | /* This is called when the task owning the parameter context | ||
| 858 | * is dying. There should not be any user of that context left | ||
| 859 | * to disturb us, anymore. */ | ||
| 860 | unsigned long leftovers = context->count; | ||
| 861 | while (leftovers--) | ||
| 862 | ds_put_context(context); | ||
| 863 | } | ||
| 864 | #endif /* CONFIG_X86_DS */ | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c new file mode 100644 index 000000000000..b3614752197b --- /dev/null +++ b/arch/x86/kernel/dumpstack_32.c | |||
| @@ -0,0 +1,449 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | */ | ||
| 5 | #include <linux/kallsyms.h> | ||
| 6 | #include <linux/kprobes.h> | ||
| 7 | #include <linux/uaccess.h> | ||
| 8 | #include <linux/utsname.h> | ||
| 9 | #include <linux/hardirq.h> | ||
| 10 | #include <linux/kdebug.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/ptrace.h> | ||
| 13 | #include <linux/kexec.h> | ||
| 14 | #include <linux/bug.h> | ||
| 15 | #include <linux/nmi.h> | ||
| 16 | #include <linux/sysfs.h> | ||
| 17 | |||
| 18 | #include <asm/stacktrace.h> | ||
| 19 | |||
| 20 | #define STACKSLOTS_PER_LINE 8 | ||
| 21 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
| 22 | |||
| 23 | int panic_on_unrecovered_nmi; | ||
| 24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
| 25 | static unsigned int code_bytes = 64; | ||
| 26 | static int die_counter; | ||
| 27 | |||
| 28 | void printk_address(unsigned long address, int reliable) | ||
| 29 | { | ||
| 30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
| 31 | reliable ? "" : "? ", (void *) address); | ||
| 32 | } | ||
| 33 | |||
| 34 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
| 35 | void *p, unsigned int size, void *end) | ||
| 36 | { | ||
| 37 | void *t = tinfo; | ||
| 38 | if (end) { | ||
| 39 | if (p < end && p >= (end-THREAD_SIZE)) | ||
| 40 | return 1; | ||
| 41 | else | ||
| 42 | return 0; | ||
| 43 | } | ||
| 44 | return p > t && p < t + THREAD_SIZE - size; | ||
| 45 | } | ||
| 46 | |||
| 47 | /* The form of the top of the frame on the stack */ | ||
| 48 | struct stack_frame { | ||
| 49 | struct stack_frame *next_frame; | ||
| 50 | unsigned long return_address; | ||
| 51 | }; | ||
| 52 | |||
| 53 | static inline unsigned long | ||
| 54 | print_context_stack(struct thread_info *tinfo, | ||
| 55 | unsigned long *stack, unsigned long bp, | ||
| 56 | const struct stacktrace_ops *ops, void *data, | ||
| 57 | unsigned long *end) | ||
| 58 | { | ||
| 59 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
| 60 | |||
| 61 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
| 62 | unsigned long addr; | ||
| 63 | |||
| 64 | addr = *stack; | ||
| 65 | if (__kernel_text_address(addr)) { | ||
| 66 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
| 67 | ops->address(data, addr, 1); | ||
| 68 | frame = frame->next_frame; | ||
| 69 | bp = (unsigned long) frame; | ||
| 70 | } else { | ||
| 71 | ops->address(data, addr, bp == 0); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | stack++; | ||
| 75 | } | ||
| 76 | return bp; | ||
| 77 | } | ||
| 78 | |||
| 79 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 80 | unsigned long *stack, unsigned long bp, | ||
| 81 | const struct stacktrace_ops *ops, void *data) | ||
| 82 | { | ||
| 83 | if (!task) | ||
| 84 | task = current; | ||
| 85 | |||
| 86 | if (!stack) { | ||
| 87 | unsigned long dummy; | ||
| 88 | stack = &dummy; | ||
| 89 | if (task && task != current) | ||
| 90 | stack = (unsigned long *)task->thread.sp; | ||
| 91 | } | ||
| 92 | |||
| 93 | #ifdef CONFIG_FRAME_POINTER | ||
| 94 | if (!bp) { | ||
| 95 | if (task == current) { | ||
| 96 | /* Grab bp right from our regs */ | ||
| 97 | get_bp(bp); | ||
| 98 | } else { | ||
| 99 | /* bp is the last reg pushed by switch_to */ | ||
| 100 | bp = *(unsigned long *) task->thread.sp; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | #endif | ||
| 104 | |||
| 105 | for (;;) { | ||
| 106 | struct thread_info *context; | ||
| 107 | |||
| 108 | context = (struct thread_info *) | ||
| 109 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | ||
| 110 | bp = print_context_stack(context, stack, bp, ops, data, NULL); | ||
| 111 | |||
| 112 | stack = (unsigned long *)context->previous_esp; | ||
| 113 | if (!stack) | ||
| 114 | break; | ||
| 115 | if (ops->stack(data, "IRQ") < 0) | ||
| 116 | break; | ||
| 117 | touch_nmi_watchdog(); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | EXPORT_SYMBOL(dump_trace); | ||
| 121 | |||
| 122 | static void | ||
| 123 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
| 124 | { | ||
| 125 | printk(data); | ||
| 126 | print_symbol(msg, symbol); | ||
| 127 | printk("\n"); | ||
| 128 | } | ||
| 129 | |||
| 130 | static void print_trace_warning(void *data, char *msg) | ||
| 131 | { | ||
| 132 | printk("%s%s\n", (char *)data, msg); | ||
| 133 | } | ||
| 134 | |||
| 135 | static int print_trace_stack(void *data, char *name) | ||
| 136 | { | ||
| 137 | printk("%s <%s> ", (char *)data, name); | ||
| 138 | return 0; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Print one address/symbol entries per line. | ||
| 143 | */ | ||
| 144 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
| 145 | { | ||
| 146 | touch_nmi_watchdog(); | ||
| 147 | printk(data); | ||
| 148 | printk_address(addr, reliable); | ||
| 149 | } | ||
| 150 | |||
| 151 | static const struct stacktrace_ops print_trace_ops = { | ||
| 152 | .warning = print_trace_warning, | ||
| 153 | .warning_symbol = print_trace_warning_symbol, | ||
| 154 | .stack = print_trace_stack, | ||
| 155 | .address = print_trace_address, | ||
| 156 | }; | ||
| 157 | |||
| 158 | static void | ||
| 159 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 160 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
| 161 | { | ||
| 162 | printk("%sCall Trace:\n", log_lvl); | ||
| 163 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
| 164 | } | ||
| 165 | |||
| 166 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 167 | unsigned long *stack, unsigned long bp) | ||
| 168 | { | ||
| 169 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
| 170 | } | ||
| 171 | |||
| 172 | static void | ||
| 173 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 174 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
| 175 | { | ||
| 176 | unsigned long *stack; | ||
| 177 | int i; | ||
| 178 | |||
| 179 | if (sp == NULL) { | ||
| 180 | if (task) | ||
| 181 | sp = (unsigned long *)task->thread.sp; | ||
| 182 | else | ||
| 183 | sp = (unsigned long *)&sp; | ||
| 184 | } | ||
| 185 | |||
| 186 | stack = sp; | ||
| 187 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
| 188 | if (kstack_end(stack)) | ||
| 189 | break; | ||
| 190 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | ||
| 191 | printk("\n%s", log_lvl); | ||
| 192 | printk(" %08lx", *stack++); | ||
| 193 | touch_nmi_watchdog(); | ||
| 194 | } | ||
| 195 | printk("\n"); | ||
| 196 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | ||
| 197 | } | ||
| 198 | |||
| 199 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
| 200 | { | ||
| 201 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
| 202 | } | ||
| 203 | |||
| 204 | /* | ||
| 205 | * The architecture-independent dump_stack generator | ||
| 206 | */ | ||
| 207 | void dump_stack(void) | ||
| 208 | { | ||
| 209 | unsigned long bp = 0; | ||
| 210 | unsigned long stack; | ||
| 211 | |||
| 212 | #ifdef CONFIG_FRAME_POINTER | ||
| 213 | if (!bp) | ||
| 214 | get_bp(bp); | ||
| 215 | #endif | ||
| 216 | |||
| 217 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
| 218 | current->pid, current->comm, print_tainted(), | ||
| 219 | init_utsname()->release, | ||
| 220 | (int)strcspn(init_utsname()->version, " "), | ||
| 221 | init_utsname()->version); | ||
| 222 | show_trace(NULL, NULL, &stack, bp); | ||
| 223 | } | ||
| 224 | |||
| 225 | EXPORT_SYMBOL(dump_stack); | ||
| 226 | |||
| 227 | void show_registers(struct pt_regs *regs) | ||
| 228 | { | ||
| 229 | int i; | ||
| 230 | |||
| 231 | print_modules(); | ||
| 232 | __show_regs(regs, 0); | ||
| 233 | |||
| 234 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", | ||
| 235 | TASK_COMM_LEN, current->comm, task_pid_nr(current), | ||
| 236 | current_thread_info(), current, task_thread_info(current)); | ||
| 237 | /* | ||
| 238 | * When in-kernel, we also print out the stack and code at the | ||
| 239 | * time of the fault.. | ||
| 240 | */ | ||
| 241 | if (!user_mode_vm(regs)) { | ||
| 242 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
| 243 | unsigned int code_len = code_bytes; | ||
| 244 | unsigned char c; | ||
| 245 | u8 *ip; | ||
| 246 | |||
| 247 | printk(KERN_EMERG "Stack:\n"); | ||
| 248 | show_stack_log_lvl(NULL, regs, ®s->sp, | ||
| 249 | 0, KERN_EMERG); | ||
| 250 | |||
| 251 | printk(KERN_EMERG "Code: "); | ||
| 252 | |||
| 253 | ip = (u8 *)regs->ip - code_prologue; | ||
| 254 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
| 255 | /* try starting at IP */ | ||
| 256 | ip = (u8 *)regs->ip; | ||
| 257 | code_len = code_len - code_prologue + 1; | ||
| 258 | } | ||
| 259 | for (i = 0; i < code_len; i++, ip++) { | ||
| 260 | if (ip < (u8 *)PAGE_OFFSET || | ||
| 261 | probe_kernel_address(ip, c)) { | ||
| 262 | printk(" Bad EIP value."); | ||
| 263 | break; | ||
| 264 | } | ||
| 265 | if (ip == (u8 *)regs->ip) | ||
| 266 | printk("<%02x> ", c); | ||
| 267 | else | ||
| 268 | printk("%02x ", c); | ||
| 269 | } | ||
| 270 | } | ||
| 271 | printk("\n"); | ||
| 272 | } | ||
| 273 | |||
| 274 | int is_valid_bugaddr(unsigned long ip) | ||
| 275 | { | ||
| 276 | unsigned short ud2; | ||
| 277 | |||
| 278 | if (ip < PAGE_OFFSET) | ||
| 279 | return 0; | ||
| 280 | if (probe_kernel_address((unsigned short *)ip, ud2)) | ||
| 281 | return 0; | ||
| 282 | |||
| 283 | return ud2 == 0x0b0f; | ||
| 284 | } | ||
| 285 | |||
| 286 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
| 287 | static int die_owner = -1; | ||
| 288 | static unsigned int die_nest_count; | ||
| 289 | |||
| 290 | unsigned __kprobes long oops_begin(void) | ||
| 291 | { | ||
| 292 | unsigned long flags; | ||
| 293 | |||
| 294 | oops_enter(); | ||
| 295 | |||
| 296 | if (die_owner != raw_smp_processor_id()) { | ||
| 297 | console_verbose(); | ||
| 298 | raw_local_irq_save(flags); | ||
| 299 | __raw_spin_lock(&die_lock); | ||
| 300 | die_owner = smp_processor_id(); | ||
| 301 | die_nest_count = 0; | ||
| 302 | bust_spinlocks(1); | ||
| 303 | } else { | ||
| 304 | raw_local_irq_save(flags); | ||
| 305 | } | ||
| 306 | die_nest_count++; | ||
| 307 | return flags; | ||
| 308 | } | ||
| 309 | |||
| 310 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
| 311 | { | ||
| 312 | bust_spinlocks(0); | ||
| 313 | die_owner = -1; | ||
| 314 | add_taint(TAINT_DIE); | ||
| 315 | __raw_spin_unlock(&die_lock); | ||
| 316 | raw_local_irq_restore(flags); | ||
| 317 | |||
| 318 | if (!regs) | ||
| 319 | return; | ||
| 320 | |||
| 321 | if (kexec_should_crash(current)) | ||
| 322 | crash_kexec(regs); | ||
| 323 | if (in_interrupt()) | ||
| 324 | panic("Fatal exception in interrupt"); | ||
| 325 | if (panic_on_oops) | ||
| 326 | panic("Fatal exception"); | ||
| 327 | oops_exit(); | ||
| 328 | do_exit(signr); | ||
| 329 | } | ||
| 330 | |||
| 331 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
| 332 | { | ||
| 333 | unsigned short ss; | ||
| 334 | unsigned long sp; | ||
| 335 | |||
| 336 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
| 337 | #ifdef CONFIG_PREEMPT | ||
| 338 | printk("PREEMPT "); | ||
| 339 | #endif | ||
| 340 | #ifdef CONFIG_SMP | ||
| 341 | printk("SMP "); | ||
| 342 | #endif | ||
| 343 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 344 | printk("DEBUG_PAGEALLOC"); | ||
| 345 | #endif | ||
| 346 | printk("\n"); | ||
| 347 | sysfs_printk_last_file(); | ||
| 348 | if (notify_die(DIE_OOPS, str, regs, err, | ||
| 349 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
| 350 | return 1; | ||
| 351 | |||
| 352 | show_registers(regs); | ||
| 353 | /* Executive summary in case the oops scrolled away */ | ||
| 354 | sp = (unsigned long) (®s->sp); | ||
| 355 | savesegment(ss, ss); | ||
| 356 | if (user_mode(regs)) { | ||
| 357 | sp = regs->sp; | ||
| 358 | ss = regs->ss & 0xffff; | ||
| 359 | } | ||
| 360 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
| 361 | print_symbol("%s", regs->ip); | ||
| 362 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
| 363 | return 0; | ||
| 364 | } | ||
| 365 | |||
| 366 | /* | ||
| 367 | * This is gone through when something in the kernel has done something bad | ||
| 368 | * and is about to be terminated: | ||
| 369 | */ | ||
| 370 | void die(const char *str, struct pt_regs *regs, long err) | ||
| 371 | { | ||
| 372 | unsigned long flags = oops_begin(); | ||
| 373 | |||
| 374 | if (die_nest_count < 3) { | ||
| 375 | report_bug(regs->ip, regs); | ||
| 376 | |||
| 377 | if (__die(str, regs, err)) | ||
| 378 | regs = NULL; | ||
| 379 | } else { | ||
| 380 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
| 381 | } | ||
| 382 | |||
| 383 | oops_end(flags, regs, SIGSEGV); | ||
| 384 | } | ||
| 385 | |||
| 386 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
| 387 | |||
| 388 | void notrace __kprobes | ||
| 389 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
| 390 | { | ||
| 391 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 392 | return; | ||
| 393 | |||
| 394 | spin_lock(&nmi_print_lock); | ||
| 395 | /* | ||
| 396 | * We are in trouble anyway, lets at least try | ||
| 397 | * to get a message out: | ||
| 398 | */ | ||
| 399 | bust_spinlocks(1); | ||
| 400 | printk(KERN_EMERG "%s", str); | ||
| 401 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
| 402 | smp_processor_id(), regs->ip); | ||
| 403 | show_registers(regs); | ||
| 404 | if (do_panic) | ||
| 405 | panic("Non maskable interrupt"); | ||
| 406 | console_silent(); | ||
| 407 | spin_unlock(&nmi_print_lock); | ||
| 408 | |||
| 409 | /* | ||
| 410 | * If we are in kernel we are probably nested up pretty bad | ||
| 411 | * and might aswell get out now while we still can: | ||
| 412 | */ | ||
| 413 | if (!user_mode_vm(regs)) { | ||
| 414 | current->thread.trap_no = 2; | ||
| 415 | crash_kexec(regs); | ||
| 416 | } | ||
| 417 | |||
| 418 | bust_spinlocks(0); | ||
| 419 | do_exit(SIGSEGV); | ||
| 420 | } | ||
| 421 | |||
| 422 | static int __init oops_setup(char *s) | ||
| 423 | { | ||
| 424 | if (!s) | ||
| 425 | return -EINVAL; | ||
| 426 | if (!strcmp(s, "panic")) | ||
| 427 | panic_on_oops = 1; | ||
| 428 | return 0; | ||
| 429 | } | ||
| 430 | early_param("oops", oops_setup); | ||
| 431 | |||
| 432 | static int __init kstack_setup(char *s) | ||
| 433 | { | ||
| 434 | if (!s) | ||
| 435 | return -EINVAL; | ||
| 436 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | early_param("kstack", kstack_setup); | ||
| 440 | |||
| 441 | static int __init code_bytes_setup(char *s) | ||
| 442 | { | ||
| 443 | code_bytes = simple_strtoul(s, NULL, 0); | ||
| 444 | if (code_bytes > 8192) | ||
| 445 | code_bytes = 8192; | ||
| 446 | |||
| 447 | return 1; | ||
| 448 | } | ||
| 449 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c new file mode 100644 index 000000000000..96a5db7da8a7 --- /dev/null +++ b/arch/x86/kernel/dumpstack_64.c | |||
| @@ -0,0 +1,575 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | */ | ||
| 5 | #include <linux/kallsyms.h> | ||
| 6 | #include <linux/kprobes.h> | ||
| 7 | #include <linux/uaccess.h> | ||
| 8 | #include <linux/utsname.h> | ||
| 9 | #include <linux/hardirq.h> | ||
| 10 | #include <linux/kdebug.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/ptrace.h> | ||
| 13 | #include <linux/kexec.h> | ||
| 14 | #include <linux/bug.h> | ||
| 15 | #include <linux/nmi.h> | ||
| 16 | #include <linux/sysfs.h> | ||
| 17 | |||
| 18 | #include <asm/stacktrace.h> | ||
| 19 | |||
| 20 | #define STACKSLOTS_PER_LINE 4 | ||
| 21 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
| 22 | |||
| 23 | int panic_on_unrecovered_nmi; | ||
| 24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
| 25 | static unsigned int code_bytes = 64; | ||
| 26 | static int die_counter; | ||
| 27 | |||
| 28 | void printk_address(unsigned long address, int reliable) | ||
| 29 | { | ||
| 30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
| 31 | reliable ? "" : "? ", (void *) address); | ||
| 32 | } | ||
| 33 | |||
| 34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | ||
| 35 | unsigned *usedp, char **idp) | ||
| 36 | { | ||
| 37 | static char ids[][8] = { | ||
| 38 | [DEBUG_STACK - 1] = "#DB", | ||
| 39 | [NMI_STACK - 1] = "NMI", | ||
| 40 | [DOUBLEFAULT_STACK - 1] = "#DF", | ||
| 41 | [STACKFAULT_STACK - 1] = "#SS", | ||
| 42 | [MCE_STACK - 1] = "#MC", | ||
| 43 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
| 44 | [N_EXCEPTION_STACKS ... | ||
| 45 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
| 46 | #endif | ||
| 47 | }; | ||
| 48 | unsigned k; | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Iterate over all exception stacks, and figure out whether | ||
| 52 | * 'stack' is in one of them: | ||
| 53 | */ | ||
| 54 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | ||
| 55 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; | ||
| 56 | /* | ||
| 57 | * Is 'stack' above this exception frame's end? | ||
| 58 | * If yes then skip to the next frame. | ||
| 59 | */ | ||
| 60 | if (stack >= end) | ||
| 61 | continue; | ||
| 62 | /* | ||
| 63 | * Is 'stack' above this exception frame's start address? | ||
| 64 | * If yes then we found the right frame. | ||
| 65 | */ | ||
| 66 | if (stack >= end - EXCEPTION_STKSZ) { | ||
| 67 | /* | ||
| 68 | * Make sure we only iterate through an exception | ||
| 69 | * stack once. If it comes up for the second time | ||
| 70 | * then there's something wrong going on - just | ||
| 71 | * break out and return NULL: | ||
| 72 | */ | ||
| 73 | if (*usedp & (1U << k)) | ||
| 74 | break; | ||
| 75 | *usedp |= 1U << k; | ||
| 76 | *idp = ids[k]; | ||
| 77 | return (unsigned long *)end; | ||
| 78 | } | ||
| 79 | /* | ||
| 80 | * If this is a debug stack, and if it has a larger size than | ||
| 81 | * the usual exception stacks, then 'stack' might still | ||
| 82 | * be within the lower portion of the debug stack: | ||
| 83 | */ | ||
| 84 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
| 85 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
| 86 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Black magic. A large debug stack is composed of | ||
| 90 | * multiple exception stack entries, which we | ||
| 91 | * iterate through now. Dont look: | ||
| 92 | */ | ||
| 93 | do { | ||
| 94 | ++j; | ||
| 95 | end -= EXCEPTION_STKSZ; | ||
| 96 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | ||
| 97 | } while (stack < end - EXCEPTION_STKSZ); | ||
| 98 | if (*usedp & (1U << j)) | ||
| 99 | break; | ||
| 100 | *usedp |= 1U << j; | ||
| 101 | *idp = ids[j]; | ||
| 102 | return (unsigned long *)end; | ||
| 103 | } | ||
| 104 | #endif | ||
| 105 | } | ||
| 106 | return NULL; | ||
| 107 | } | ||
| 108 | |||
| 109 | /* | ||
| 110 | * x86-64 can have up to three kernel stacks: | ||
| 111 | * process stack | ||
| 112 | * interrupt stack | ||
| 113 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
| 114 | */ | ||
| 115 | |||
| 116 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
| 117 | void *p, unsigned int size, void *end) | ||
| 118 | { | ||
| 119 | void *t = tinfo; | ||
| 120 | if (end) { | ||
| 121 | if (p < end && p >= (end-THREAD_SIZE)) | ||
| 122 | return 1; | ||
| 123 | else | ||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | return p > t && p < t + THREAD_SIZE - size; | ||
| 127 | } | ||
| 128 | |||
| 129 | /* The form of the top of the frame on the stack */ | ||
| 130 | struct stack_frame { | ||
| 131 | struct stack_frame *next_frame; | ||
| 132 | unsigned long return_address; | ||
| 133 | }; | ||
| 134 | |||
| 135 | static inline unsigned long | ||
| 136 | print_context_stack(struct thread_info *tinfo, | ||
| 137 | unsigned long *stack, unsigned long bp, | ||
| 138 | const struct stacktrace_ops *ops, void *data, | ||
| 139 | unsigned long *end) | ||
| 140 | { | ||
| 141 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
| 142 | |||
| 143 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
| 144 | unsigned long addr; | ||
| 145 | |||
| 146 | addr = *stack; | ||
| 147 | if (__kernel_text_address(addr)) { | ||
| 148 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
| 149 | ops->address(data, addr, 1); | ||
| 150 | frame = frame->next_frame; | ||
| 151 | bp = (unsigned long) frame; | ||
| 152 | } else { | ||
| 153 | ops->address(data, addr, bp == 0); | ||
| 154 | } | ||
| 155 | } | ||
| 156 | stack++; | ||
| 157 | } | ||
| 158 | return bp; | ||
| 159 | } | ||
| 160 | |||
| 161 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 162 | unsigned long *stack, unsigned long bp, | ||
| 163 | const struct stacktrace_ops *ops, void *data) | ||
| 164 | { | ||
| 165 | const unsigned cpu = get_cpu(); | ||
| 166 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | ||
| 167 | unsigned used = 0; | ||
| 168 | struct thread_info *tinfo; | ||
| 169 | |||
| 170 | if (!task) | ||
| 171 | task = current; | ||
| 172 | |||
| 173 | if (!stack) { | ||
| 174 | unsigned long dummy; | ||
| 175 | stack = &dummy; | ||
| 176 | if (task && task != current) | ||
| 177 | stack = (unsigned long *)task->thread.sp; | ||
| 178 | } | ||
| 179 | |||
| 180 | #ifdef CONFIG_FRAME_POINTER | ||
| 181 | if (!bp) { | ||
| 182 | if (task == current) { | ||
| 183 | /* Grab bp right from our regs */ | ||
| 184 | get_bp(bp); | ||
| 185 | } else { | ||
| 186 | /* bp is the last reg pushed by switch_to */ | ||
| 187 | bp = *(unsigned long *) task->thread.sp; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | #endif | ||
| 191 | |||
| 192 | /* | ||
| 193 | * Print function call entries in all stacks, starting at the | ||
| 194 | * current stack address. If the stacks consist of nested | ||
| 195 | * exceptions | ||
| 196 | */ | ||
| 197 | tinfo = task_thread_info(task); | ||
| 198 | for (;;) { | ||
| 199 | char *id; | ||
| 200 | unsigned long *estack_end; | ||
| 201 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | ||
| 202 | &used, &id); | ||
| 203 | |||
| 204 | if (estack_end) { | ||
| 205 | if (ops->stack(data, id) < 0) | ||
| 206 | break; | ||
| 207 | |||
| 208 | bp = print_context_stack(tinfo, stack, bp, ops, | ||
| 209 | data, estack_end); | ||
| 210 | ops->stack(data, "<EOE>"); | ||
| 211 | /* | ||
| 212 | * We link to the next stack via the | ||
| 213 | * second-to-last pointer (index -2 to end) in the | ||
| 214 | * exception stack: | ||
| 215 | */ | ||
| 216 | stack = (unsigned long *) estack_end[-2]; | ||
| 217 | continue; | ||
| 218 | } | ||
| 219 | if (irqstack_end) { | ||
| 220 | unsigned long *irqstack; | ||
| 221 | irqstack = irqstack_end - | ||
| 222 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | ||
| 223 | |||
| 224 | if (stack >= irqstack && stack < irqstack_end) { | ||
| 225 | if (ops->stack(data, "IRQ") < 0) | ||
| 226 | break; | ||
| 227 | bp = print_context_stack(tinfo, stack, bp, | ||
| 228 | ops, data, irqstack_end); | ||
| 229 | /* | ||
| 230 | * We link to the next stack (which would be | ||
| 231 | * the process stack normally) the last | ||
| 232 | * pointer (index -1 to end) in the IRQ stack: | ||
| 233 | */ | ||
| 234 | stack = (unsigned long *) (irqstack_end[-1]); | ||
| 235 | irqstack_end = NULL; | ||
| 236 | ops->stack(data, "EOI"); | ||
| 237 | continue; | ||
| 238 | } | ||
| 239 | } | ||
| 240 | break; | ||
| 241 | } | ||
| 242 | |||
| 243 | /* | ||
| 244 | * This handles the process stack: | ||
| 245 | */ | ||
| 246 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | ||
| 247 | put_cpu(); | ||
| 248 | } | ||
| 249 | EXPORT_SYMBOL(dump_trace); | ||
| 250 | |||
| 251 | static void | ||
| 252 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
| 253 | { | ||
| 254 | printk(data); | ||
| 255 | print_symbol(msg, symbol); | ||
| 256 | printk("\n"); | ||
| 257 | } | ||
| 258 | |||
| 259 | static void print_trace_warning(void *data, char *msg) | ||
| 260 | { | ||
| 261 | printk("%s%s\n", (char *)data, msg); | ||
| 262 | } | ||
| 263 | |||
| 264 | static int print_trace_stack(void *data, char *name) | ||
| 265 | { | ||
| 266 | printk("%s <%s> ", (char *)data, name); | ||
| 267 | return 0; | ||
| 268 | } | ||
| 269 | |||
| 270 | /* | ||
| 271 | * Print one address/symbol entries per line. | ||
| 272 | */ | ||
| 273 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
| 274 | { | ||
| 275 | touch_nmi_watchdog(); | ||
| 276 | printk(data); | ||
| 277 | printk_address(addr, reliable); | ||
| 278 | } | ||
| 279 | |||
| 280 | static const struct stacktrace_ops print_trace_ops = { | ||
| 281 | .warning = print_trace_warning, | ||
| 282 | .warning_symbol = print_trace_warning_symbol, | ||
| 283 | .stack = print_trace_stack, | ||
| 284 | .address = print_trace_address, | ||
| 285 | }; | ||
| 286 | |||
| 287 | static void | ||
| 288 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 289 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
| 290 | { | ||
| 291 | printk("%sCall Trace:\n", log_lvl); | ||
| 292 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
| 293 | } | ||
| 294 | |||
| 295 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 296 | unsigned long *stack, unsigned long bp) | ||
| 297 | { | ||
| 298 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
| 299 | } | ||
| 300 | |||
| 301 | static void | ||
| 302 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 303 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
| 304 | { | ||
| 305 | unsigned long *stack; | ||
| 306 | int i; | ||
| 307 | const int cpu = smp_processor_id(); | ||
| 308 | unsigned long *irqstack_end = | ||
| 309 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); | ||
| 310 | unsigned long *irqstack = | ||
| 311 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | ||
| 312 | |||
| 313 | /* | ||
| 314 | * debugging aid: "show_stack(NULL, NULL);" prints the | ||
| 315 | * back trace for this cpu. | ||
| 316 | */ | ||
| 317 | |||
| 318 | if (sp == NULL) { | ||
| 319 | if (task) | ||
| 320 | sp = (unsigned long *)task->thread.sp; | ||
| 321 | else | ||
| 322 | sp = (unsigned long *)&sp; | ||
| 323 | } | ||
| 324 | |||
| 325 | stack = sp; | ||
| 326 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
| 327 | if (stack >= irqstack && stack <= irqstack_end) { | ||
| 328 | if (stack == irqstack_end) { | ||
| 329 | stack = (unsigned long *) (irqstack_end[-1]); | ||
| 330 | printk(" <EOI> "); | ||
| 331 | } | ||
| 332 | } else { | ||
| 333 | if (((long) stack & (THREAD_SIZE-1)) == 0) | ||
| 334 | break; | ||
| 335 | } | ||
| 336 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | ||
| 337 | printk("\n%s", log_lvl); | ||
| 338 | printk(" %016lx", *stack++); | ||
| 339 | touch_nmi_watchdog(); | ||
| 340 | } | ||
| 341 | printk("\n"); | ||
| 342 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | ||
| 343 | } | ||
| 344 | |||
| 345 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
| 346 | { | ||
| 347 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
| 348 | } | ||
| 349 | |||
| 350 | /* | ||
| 351 | * The architecture-independent dump_stack generator | ||
| 352 | */ | ||
| 353 | void dump_stack(void) | ||
| 354 | { | ||
| 355 | unsigned long bp = 0; | ||
| 356 | unsigned long stack; | ||
| 357 | |||
| 358 | #ifdef CONFIG_FRAME_POINTER | ||
| 359 | if (!bp) | ||
| 360 | get_bp(bp); | ||
| 361 | #endif | ||
| 362 | |||
| 363 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
| 364 | current->pid, current->comm, print_tainted(), | ||
| 365 | init_utsname()->release, | ||
| 366 | (int)strcspn(init_utsname()->version, " "), | ||
| 367 | init_utsname()->version); | ||
| 368 | show_trace(NULL, NULL, &stack, bp); | ||
| 369 | } | ||
| 370 | EXPORT_SYMBOL(dump_stack); | ||
| 371 | |||
| 372 | void show_registers(struct pt_regs *regs) | ||
| 373 | { | ||
| 374 | int i; | ||
| 375 | unsigned long sp; | ||
| 376 | const int cpu = smp_processor_id(); | ||
| 377 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | ||
| 378 | |||
| 379 | sp = regs->sp; | ||
| 380 | printk("CPU %d ", cpu); | ||
| 381 | __show_regs(regs, 1); | ||
| 382 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | ||
| 383 | cur->comm, cur->pid, task_thread_info(cur), cur); | ||
| 384 | |||
| 385 | /* | ||
| 386 | * When in-kernel, we also print out the stack and code at the | ||
| 387 | * time of the fault.. | ||
| 388 | */ | ||
| 389 | if (!user_mode(regs)) { | ||
| 390 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
| 391 | unsigned int code_len = code_bytes; | ||
| 392 | unsigned char c; | ||
| 393 | u8 *ip; | ||
| 394 | |||
| 395 | printk(KERN_EMERG "Stack:\n"); | ||
| 396 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | ||
| 397 | regs->bp, KERN_EMERG); | ||
| 398 | |||
| 399 | printk(KERN_EMERG "Code: "); | ||
| 400 | |||
| 401 | ip = (u8 *)regs->ip - code_prologue; | ||
| 402 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
| 403 | /* try starting at IP */ | ||
| 404 | ip = (u8 *)regs->ip; | ||
| 405 | code_len = code_len - code_prologue + 1; | ||
| 406 | } | ||
| 407 | for (i = 0; i < code_len; i++, ip++) { | ||
| 408 | if (ip < (u8 *)PAGE_OFFSET || | ||
| 409 | probe_kernel_address(ip, c)) { | ||
| 410 | printk(" Bad RIP value."); | ||
| 411 | break; | ||
| 412 | } | ||
| 413 | if (ip == (u8 *)regs->ip) | ||
| 414 | printk("<%02x> ", c); | ||
| 415 | else | ||
| 416 | printk("%02x ", c); | ||
| 417 | } | ||
| 418 | } | ||
| 419 | printk("\n"); | ||
| 420 | } | ||
| 421 | |||
| 422 | int is_valid_bugaddr(unsigned long ip) | ||
| 423 | { | ||
| 424 | unsigned short ud2; | ||
| 425 | |||
| 426 | if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) | ||
| 427 | return 0; | ||
| 428 | |||
| 429 | return ud2 == 0x0b0f; | ||
| 430 | } | ||
| 431 | |||
| 432 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
| 433 | static int die_owner = -1; | ||
| 434 | static unsigned int die_nest_count; | ||
| 435 | |||
| 436 | unsigned __kprobes long oops_begin(void) | ||
| 437 | { | ||
| 438 | int cpu; | ||
| 439 | unsigned long flags; | ||
| 440 | |||
| 441 | oops_enter(); | ||
| 442 | |||
| 443 | /* racy, but better than risking deadlock. */ | ||
| 444 | raw_local_irq_save(flags); | ||
| 445 | cpu = smp_processor_id(); | ||
| 446 | if (!__raw_spin_trylock(&die_lock)) { | ||
| 447 | if (cpu == die_owner) | ||
| 448 | /* nested oops. should stop eventually */; | ||
| 449 | else | ||
| 450 | __raw_spin_lock(&die_lock); | ||
| 451 | } | ||
| 452 | die_nest_count++; | ||
| 453 | die_owner = cpu; | ||
| 454 | console_verbose(); | ||
| 455 | bust_spinlocks(1); | ||
| 456 | return flags; | ||
| 457 | } | ||
| 458 | |||
| 459 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
| 460 | { | ||
| 461 | die_owner = -1; | ||
| 462 | bust_spinlocks(0); | ||
| 463 | die_nest_count--; | ||
| 464 | if (!die_nest_count) | ||
| 465 | /* Nest count reaches zero, release the lock. */ | ||
| 466 | __raw_spin_unlock(&die_lock); | ||
| 467 | raw_local_irq_restore(flags); | ||
| 468 | if (!regs) { | ||
| 469 | oops_exit(); | ||
| 470 | return; | ||
| 471 | } | ||
| 472 | if (in_interrupt()) | ||
| 473 | panic("Fatal exception in interrupt"); | ||
| 474 | if (panic_on_oops) | ||
| 475 | panic("Fatal exception"); | ||
| 476 | oops_exit(); | ||
| 477 | do_exit(signr); | ||
| 478 | } | ||
| 479 | |||
| 480 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
| 481 | { | ||
| 482 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
| 483 | #ifdef CONFIG_PREEMPT | ||
| 484 | printk("PREEMPT "); | ||
| 485 | #endif | ||
| 486 | #ifdef CONFIG_SMP | ||
| 487 | printk("SMP "); | ||
| 488 | #endif | ||
| 489 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 490 | printk("DEBUG_PAGEALLOC"); | ||
| 491 | #endif | ||
| 492 | printk("\n"); | ||
| 493 | sysfs_printk_last_file(); | ||
| 494 | if (notify_die(DIE_OOPS, str, regs, err, | ||
| 495 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
| 496 | return 1; | ||
| 497 | |||
| 498 | show_registers(regs); | ||
| 499 | add_taint(TAINT_DIE); | ||
| 500 | /* Executive summary in case the oops scrolled away */ | ||
| 501 | printk(KERN_ALERT "RIP "); | ||
| 502 | printk_address(regs->ip, 1); | ||
| 503 | printk(" RSP <%016lx>\n", regs->sp); | ||
| 504 | if (kexec_should_crash(current)) | ||
| 505 | crash_kexec(regs); | ||
| 506 | return 0; | ||
| 507 | } | ||
| 508 | |||
| 509 | void die(const char *str, struct pt_regs *regs, long err) | ||
| 510 | { | ||
| 511 | unsigned long flags = oops_begin(); | ||
| 512 | |||
| 513 | if (!user_mode(regs)) | ||
| 514 | report_bug(regs->ip, regs); | ||
| 515 | |||
| 516 | if (__die(str, regs, err)) | ||
| 517 | regs = NULL; | ||
| 518 | oops_end(flags, regs, SIGSEGV); | ||
| 519 | } | ||
| 520 | |||
| 521 | notrace __kprobes void | ||
| 522 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
| 523 | { | ||
| 524 | unsigned long flags; | ||
| 525 | |||
| 526 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 527 | return; | ||
| 528 | |||
| 529 | flags = oops_begin(); | ||
| 530 | /* | ||
| 531 | * We are in trouble anyway, lets at least try | ||
| 532 | * to get a message out. | ||
| 533 | */ | ||
| 534 | printk(KERN_EMERG "%s", str); | ||
| 535 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
| 536 | smp_processor_id(), regs->ip); | ||
| 537 | show_registers(regs); | ||
| 538 | if (kexec_should_crash(current)) | ||
| 539 | crash_kexec(regs); | ||
| 540 | if (do_panic || panic_on_oops) | ||
| 541 | panic("Non maskable interrupt"); | ||
| 542 | oops_end(flags, NULL, SIGBUS); | ||
| 543 | nmi_exit(); | ||
| 544 | local_irq_enable(); | ||
| 545 | do_exit(SIGBUS); | ||
| 546 | } | ||
| 547 | |||
| 548 | static int __init oops_setup(char *s) | ||
| 549 | { | ||
| 550 | if (!s) | ||
| 551 | return -EINVAL; | ||
| 552 | if (!strcmp(s, "panic")) | ||
| 553 | panic_on_oops = 1; | ||
| 554 | return 0; | ||
| 555 | } | ||
| 556 | early_param("oops", oops_setup); | ||
| 557 | |||
| 558 | static int __init kstack_setup(char *s) | ||
| 559 | { | ||
| 560 | if (!s) | ||
| 561 | return -EINVAL; | ||
| 562 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
| 563 | return 0; | ||
| 564 | } | ||
| 565 | early_param("kstack", kstack_setup); | ||
| 566 | |||
| 567 | static int __init code_bytes_setup(char *s) | ||
| 568 | { | ||
| 569 | code_bytes = simple_strtoul(s, NULL, 0); | ||
| 570 | if (code_bytes > 8192) | ||
| 571 | code_bytes = 8192; | ||
| 572 | |||
| 573 | return 1; | ||
| 574 | } | ||
| 575 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 28c29180b380..ce97bf3bed12 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
| @@ -148,6 +148,9 @@ void __init e820_print_map(char *who) | |||
| 148 | case E820_NVS: | 148 | case E820_NVS: |
| 149 | printk(KERN_CONT "(ACPI NVS)\n"); | 149 | printk(KERN_CONT "(ACPI NVS)\n"); |
| 150 | break; | 150 | break; |
| 151 | case E820_UNUSABLE: | ||
| 152 | printk("(unusable)\n"); | ||
| 153 | break; | ||
| 151 | default: | 154 | default: |
| 152 | printk(KERN_CONT "type %u\n", e820.map[i].type); | 155 | printk(KERN_CONT "type %u\n", e820.map[i].type); |
| 153 | break; | 156 | break; |
| @@ -877,7 +880,8 @@ void __init early_res_to_bootmem(u64 start, u64 end) | |||
| 877 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | 880 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) |
| 878 | count++; | 881 | count++; |
| 879 | 882 | ||
| 880 | printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); | 883 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", |
| 884 | count, start, end); | ||
| 881 | for (i = 0; i < count; i++) { | 885 | for (i = 0; i < count; i++) { |
| 882 | struct early_res *r = &early_res[i]; | 886 | struct early_res *r = &early_res[i]; |
| 883 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | 887 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, |
| @@ -1202,7 +1206,7 @@ static int __init parse_memmap_opt(char *p) | |||
| 1202 | if (!p) | 1206 | if (!p) |
| 1203 | return -EINVAL; | 1207 | return -EINVAL; |
| 1204 | 1208 | ||
| 1205 | if (!strcmp(p, "exactmap")) { | 1209 | if (!strncmp(p, "exactmap", 8)) { |
| 1206 | #ifdef CONFIG_CRASH_DUMP | 1210 | #ifdef CONFIG_CRASH_DUMP |
| 1207 | /* | 1211 | /* |
| 1208 | * If we are doing a crash dump, we still need to know | 1212 | * If we are doing a crash dump, we still need to know |
| @@ -1259,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type) | |||
| 1259 | case E820_RAM: return "System RAM"; | 1263 | case E820_RAM: return "System RAM"; |
| 1260 | case E820_ACPI: return "ACPI Tables"; | 1264 | case E820_ACPI: return "ACPI Tables"; |
| 1261 | case E820_NVS: return "ACPI Non-volatile Storage"; | 1265 | case E820_NVS: return "ACPI Non-volatile Storage"; |
| 1266 | case E820_UNUSABLE: return "Unusable memory"; | ||
| 1262 | default: return "reserved"; | 1267 | default: return "reserved"; |
| 1263 | } | 1268 | } |
| 1264 | } | 1269 | } |
| @@ -1266,6 +1271,7 @@ static inline const char *e820_type_to_string(int e820_type) | |||
| 1266 | /* | 1271 | /* |
| 1267 | * Mark e820 reserved areas as busy for the resource manager. | 1272 | * Mark e820 reserved areas as busy for the resource manager. |
| 1268 | */ | 1273 | */ |
| 1274 | static struct resource __initdata *e820_res; | ||
| 1269 | void __init e820_reserve_resources(void) | 1275 | void __init e820_reserve_resources(void) |
| 1270 | { | 1276 | { |
| 1271 | int i; | 1277 | int i; |
| @@ -1273,20 +1279,26 @@ void __init e820_reserve_resources(void) | |||
| 1273 | u64 end; | 1279 | u64 end; |
| 1274 | 1280 | ||
| 1275 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | 1281 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); |
| 1282 | e820_res = res; | ||
| 1276 | for (i = 0; i < e820.nr_map; i++) { | 1283 | for (i = 0; i < e820.nr_map; i++) { |
| 1277 | end = e820.map[i].addr + e820.map[i].size - 1; | 1284 | end = e820.map[i].addr + e820.map[i].size - 1; |
| 1278 | #ifndef CONFIG_RESOURCES_64BIT | 1285 | if (end != (resource_size_t)end) { |
| 1279 | if (end > 0x100000000ULL) { | ||
| 1280 | res++; | 1286 | res++; |
| 1281 | continue; | 1287 | continue; |
| 1282 | } | 1288 | } |
| 1283 | #endif | ||
| 1284 | res->name = e820_type_to_string(e820.map[i].type); | 1289 | res->name = e820_type_to_string(e820.map[i].type); |
| 1285 | res->start = e820.map[i].addr; | 1290 | res->start = e820.map[i].addr; |
| 1286 | res->end = end; | 1291 | res->end = end; |
| 1287 | 1292 | ||
| 1288 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | 1293 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
| 1289 | insert_resource(&iomem_resource, res); | 1294 | |
| 1295 | /* | ||
| 1296 | * don't register the region that could be conflicted with | ||
| 1297 | * pci device BAR resource and insert them later in | ||
| 1298 | * pcibios_resource_survey() | ||
| 1299 | */ | ||
| 1300 | if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) | ||
| 1301 | insert_resource(&iomem_resource, res); | ||
| 1290 | res++; | 1302 | res++; |
| 1291 | } | 1303 | } |
| 1292 | 1304 | ||
| @@ -1298,10 +1310,18 @@ void __init e820_reserve_resources(void) | |||
| 1298 | } | 1310 | } |
| 1299 | } | 1311 | } |
| 1300 | 1312 | ||
| 1301 | /* | 1313 | void __init e820_reserve_resources_late(void) |
| 1302 | * Non-standard memory setup can be specified via this quirk: | 1314 | { |
| 1303 | */ | 1315 | int i; |
| 1304 | char * (*arch_memory_setup_quirk)(void); | 1316 | struct resource *res; |
| 1317 | |||
| 1318 | res = e820_res; | ||
| 1319 | for (i = 0; i < e820.nr_map; i++) { | ||
| 1320 | if (!res->parent && res->end) | ||
| 1321 | reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); | ||
| 1322 | res++; | ||
| 1323 | } | ||
| 1324 | } | ||
| 1305 | 1325 | ||
| 1306 | char *__init default_machine_specific_memory_setup(void) | 1326 | char *__init default_machine_specific_memory_setup(void) |
| 1307 | { | 1327 | { |
| @@ -1343,8 +1363,8 @@ char *__init default_machine_specific_memory_setup(void) | |||
| 1343 | 1363 | ||
| 1344 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) | 1364 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) |
| 1345 | { | 1365 | { |
| 1346 | if (arch_memory_setup_quirk) { | 1366 | if (x86_quirks->arch_memory_setup) { |
| 1347 | char *who = arch_memory_setup_quirk(); | 1367 | char *who = x86_quirks->arch_memory_setup(); |
| 1348 | 1368 | ||
| 1349 | if (who) | 1369 | if (who) |
| 1350 | return who; | 1370 | return who; |
| @@ -1367,24 +1387,3 @@ void __init setup_memory_map(void) | |||
| 1367 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1387 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
| 1368 | e820_print_map(who); | 1388 | e820_print_map(who); |
| 1369 | } | 1389 | } |
| 1370 | |||
| 1371 | #ifdef CONFIG_X86_64 | ||
| 1372 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | ||
| 1373 | { | ||
| 1374 | int i; | ||
| 1375 | |||
| 1376 | if (slot < 0 || slot >= e820.nr_map) | ||
| 1377 | return -1; | ||
| 1378 | for (i = slot; i < e820.nr_map; i++) { | ||
| 1379 | if (e820.map[i].type != E820_RAM) | ||
| 1380 | continue; | ||
| 1381 | break; | ||
| 1382 | } | ||
| 1383 | if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) | ||
| 1384 | return -1; | ||
| 1385 | *addr = e820.map[i].addr; | ||
| 1386 | *size = min_t(u64, e820.map[i].size + e820.map[i].addr, | ||
| 1387 | max_pfn << PAGE_SHIFT) - *addr; | ||
| 1388 | return i + 1; | ||
| 1389 | } | ||
| 1390 | #endif | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a0e11c0cc872..3ce029ffaa55 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
| @@ -16,10 +16,7 @@ | |||
| 16 | #include <asm/dma.h> | 16 | #include <asm/dma.h> |
| 17 | #include <asm/io_apic.h> | 17 | #include <asm/io_apic.h> |
| 18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
| 19 | 19 | #include <asm/iommu.h> | |
| 20 | #ifdef CONFIG_GART_IOMMU | ||
| 21 | #include <asm/gart.h> | ||
| 22 | #endif | ||
| 23 | 20 | ||
| 24 | static void __init fix_hypertransport_config(int num, int slot, int func) | 21 | static void __init fix_hypertransport_config(int num, int slot, int func) |
| 25 | { | 22 | { |
| @@ -98,6 +95,113 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
| 98 | 95 | ||
| 99 | } | 96 | } |
| 100 | 97 | ||
| 98 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) | ||
| 99 | static u32 __init ati_ixp4x0_rev(int num, int slot, int func) | ||
| 100 | { | ||
| 101 | u32 d; | ||
| 102 | u8 b; | ||
| 103 | |||
| 104 | b = read_pci_config_byte(num, slot, func, 0xac); | ||
| 105 | b &= ~(1<<5); | ||
| 106 | write_pci_config_byte(num, slot, func, 0xac, b); | ||
| 107 | |||
| 108 | d = read_pci_config(num, slot, func, 0x70); | ||
| 109 | d |= 1<<8; | ||
| 110 | write_pci_config(num, slot, func, 0x70, d); | ||
| 111 | |||
| 112 | d = read_pci_config(num, slot, func, 0x8); | ||
| 113 | d &= 0xff; | ||
| 114 | return d; | ||
| 115 | } | ||
| 116 | |||
| 117 | static void __init ati_bugs(int num, int slot, int func) | ||
| 118 | { | ||
| 119 | u32 d; | ||
| 120 | u8 b; | ||
| 121 | |||
| 122 | if (acpi_use_timer_override) | ||
| 123 | return; | ||
| 124 | |||
| 125 | d = ati_ixp4x0_rev(num, slot, func); | ||
| 126 | if (d < 0x82) | ||
| 127 | acpi_skip_timer_override = 1; | ||
| 128 | else { | ||
| 129 | /* check for IRQ0 interrupt swap */ | ||
| 130 | outb(0x72, 0xcd6); b = inb(0xcd7); | ||
| 131 | if (!(b & 0x2)) | ||
| 132 | acpi_skip_timer_override = 1; | ||
| 133 | } | ||
| 134 | |||
| 135 | if (acpi_skip_timer_override) { | ||
| 136 | printk(KERN_INFO "SB4X0 revision 0x%x\n", d); | ||
| 137 | printk(KERN_INFO "Ignoring ACPI timer override.\n"); | ||
| 138 | printk(KERN_INFO "If you got timer trouble " | ||
| 139 | "try acpi_use_timer_override\n"); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | static u32 __init ati_sbx00_rev(int num, int slot, int func) | ||
| 144 | { | ||
| 145 | u32 old, d; | ||
| 146 | |||
| 147 | d = read_pci_config(num, slot, func, 0x70); | ||
| 148 | old = d; | ||
| 149 | d &= ~(1<<8); | ||
| 150 | write_pci_config(num, slot, func, 0x70, d); | ||
| 151 | d = read_pci_config(num, slot, func, 0x8); | ||
| 152 | d &= 0xff; | ||
| 153 | write_pci_config(num, slot, func, 0x70, old); | ||
| 154 | |||
| 155 | return d; | ||
| 156 | } | ||
| 157 | |||
| 158 | static void __init ati_bugs_contd(int num, int slot, int func) | ||
| 159 | { | ||
| 160 | u32 d, rev; | ||
| 161 | |||
| 162 | if (acpi_use_timer_override) | ||
| 163 | return; | ||
| 164 | |||
| 165 | rev = ati_sbx00_rev(num, slot, func); | ||
| 166 | if (rev > 0x13) | ||
| 167 | return; | ||
| 168 | |||
| 169 | /* check for IRQ0 interrupt swap */ | ||
| 170 | d = read_pci_config(num, slot, func, 0x64); | ||
| 171 | if (!(d & (1<<14))) | ||
| 172 | acpi_skip_timer_override = 1; | ||
| 173 | |||
| 174 | if (acpi_skip_timer_override) { | ||
| 175 | printk(KERN_INFO "SB600 revision 0x%x\n", rev); | ||
| 176 | printk(KERN_INFO "Ignoring ACPI timer override.\n"); | ||
| 177 | printk(KERN_INFO "If you got timer trouble " | ||
| 178 | "try acpi_use_timer_override\n"); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | #else | ||
| 182 | static void __init ati_bugs(int num, int slot, int func) | ||
| 183 | { | ||
| 184 | } | ||
| 185 | |||
| 186 | static void __init ati_bugs_contd(int num, int slot, int func) | ||
| 187 | { | ||
| 188 | } | ||
| 189 | #endif | ||
| 190 | |||
| 191 | #ifdef CONFIG_DMAR | ||
| 192 | static void __init intel_g33_dmar(int num, int slot, int func) | ||
| 193 | { | ||
| 194 | struct acpi_table_header *dmar_tbl; | ||
| 195 | acpi_status status; | ||
| 196 | |||
| 197 | status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); | ||
| 198 | if (ACPI_SUCCESS(status)) { | ||
| 199 | printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); | ||
| 200 | dmar_disabled = 1; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | #endif | ||
| 204 | |||
| 101 | #define QFLAG_APPLY_ONCE 0x1 | 205 | #define QFLAG_APPLY_ONCE 0x1 |
| 102 | #define QFLAG_APPLIED 0x2 | 206 | #define QFLAG_APPLIED 0x2 |
| 103 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 207 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
| @@ -117,6 +221,14 @@ static struct chipset early_qrk[] __initdata = { | |||
| 117 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, | 221 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, |
| 118 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 222 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
| 119 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, | 223 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, |
| 224 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, | ||
| 225 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, | ||
| 226 | { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | ||
| 227 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, | ||
| 228 | #ifdef CONFIG_DMAR | ||
| 229 | { PCI_VENDOR_ID_INTEL, 0x29c0, | ||
| 230 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, | ||
| 231 | #endif | ||
| 120 | {} | 232 | {} |
| 121 | }; | 233 | }; |
| 122 | 234 | ||
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index ff9e7350da54..34ad997d3834 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
| @@ -3,11 +3,19 @@ | |||
| 3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
| 4 | #include <linux/string.h> | 4 | #include <linux/string.h> |
| 5 | #include <linux/screen_info.h> | 5 | #include <linux/screen_info.h> |
| 6 | #include <linux/usb/ch9.h> | ||
| 7 | #include <linux/pci_regs.h> | ||
| 8 | #include <linux/pci_ids.h> | ||
| 9 | #include <linux/errno.h> | ||
| 6 | #include <asm/io.h> | 10 | #include <asm/io.h> |
| 7 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
| 8 | #include <asm/fcntl.h> | 12 | #include <asm/fcntl.h> |
| 9 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
| 10 | #include <xen/hvc-console.h> | 14 | #include <xen/hvc-console.h> |
| 15 | #include <asm/pci-direct.h> | ||
| 16 | #include <asm/pgtable.h> | ||
| 17 | #include <asm/fixmap.h> | ||
| 18 | #include <linux/usb/ehci_def.h> | ||
| 11 | 19 | ||
| 12 | /* Simple VGA output */ | 20 | /* Simple VGA output */ |
| 13 | #define VGABASE (__ISA_IO_base + 0xb8000) | 21 | #define VGABASE (__ISA_IO_base + 0xb8000) |
| @@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /* ttyS0 */ | |||
| 78 | static int early_serial_putc(unsigned char ch) | 86 | static int early_serial_putc(unsigned char ch) |
| 79 | { | 87 | { |
| 80 | unsigned timeout = 0xffff; | 88 | unsigned timeout = 0xffff; |
| 89 | |||
| 81 | while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) | 90 | while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) |
| 82 | cpu_relax(); | 91 | cpu_relax(); |
| 83 | outb(ch, early_serial_base + TXR); | 92 | outb(ch, early_serial_base + TXR); |
| @@ -111,7 +120,7 @@ static __init void early_serial_init(char *s) | |||
| 111 | if (!strncmp(s, "0x", 2)) { | 120 | if (!strncmp(s, "0x", 2)) { |
| 112 | early_serial_base = simple_strtoul(s, &e, 16); | 121 | early_serial_base = simple_strtoul(s, &e, 16); |
| 113 | } else { | 122 | } else { |
| 114 | static int bases[] = { 0x3f8, 0x2f8 }; | 123 | static const int __initconst bases[] = { 0x3f8, 0x2f8 }; |
| 115 | 124 | ||
| 116 | if (!strncmp(s, "ttyS", 4)) | 125 | if (!strncmp(s, "ttyS", 4)) |
| 117 | s += 4; | 126 | s += 4; |
| @@ -151,6 +160,721 @@ static struct console early_serial_console = { | |||
| 151 | .index = -1, | 160 | .index = -1, |
| 152 | }; | 161 | }; |
| 153 | 162 | ||
| 163 | #ifdef CONFIG_EARLY_PRINTK_DBGP | ||
| 164 | |||
| 165 | static struct ehci_caps __iomem *ehci_caps; | ||
| 166 | static struct ehci_regs __iomem *ehci_regs; | ||
| 167 | static struct ehci_dbg_port __iomem *ehci_debug; | ||
| 168 | static unsigned int dbgp_endpoint_out; | ||
| 169 | |||
| 170 | struct ehci_dev { | ||
| 171 | u32 bus; | ||
| 172 | u32 slot; | ||
| 173 | u32 func; | ||
| 174 | }; | ||
| 175 | |||
| 176 | static struct ehci_dev ehci_dev; | ||
| 177 | |||
| 178 | #define USB_DEBUG_DEVNUM 127 | ||
| 179 | |||
| 180 | #define DBGP_DATA_TOGGLE 0x8800 | ||
| 181 | |||
| 182 | static inline u32 dbgp_pid_update(u32 x, u32 tok) | ||
| 183 | { | ||
| 184 | return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff); | ||
| 185 | } | ||
| 186 | |||
| 187 | static inline u32 dbgp_len_update(u32 x, u32 len) | ||
| 188 | { | ||
| 189 | return (x & ~0x0f) | (len & 0x0f); | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | ||
| 193 | * USB Packet IDs (PIDs) | ||
| 194 | */ | ||
| 195 | |||
| 196 | /* token */ | ||
| 197 | #define USB_PID_OUT 0xe1 | ||
| 198 | #define USB_PID_IN 0x69 | ||
| 199 | #define USB_PID_SOF 0xa5 | ||
| 200 | #define USB_PID_SETUP 0x2d | ||
| 201 | /* handshake */ | ||
| 202 | #define USB_PID_ACK 0xd2 | ||
| 203 | #define USB_PID_NAK 0x5a | ||
| 204 | #define USB_PID_STALL 0x1e | ||
| 205 | #define USB_PID_NYET 0x96 | ||
| 206 | /* data */ | ||
| 207 | #define USB_PID_DATA0 0xc3 | ||
| 208 | #define USB_PID_DATA1 0x4b | ||
| 209 | #define USB_PID_DATA2 0x87 | ||
| 210 | #define USB_PID_MDATA 0x0f | ||
| 211 | /* Special */ | ||
| 212 | #define USB_PID_PREAMBLE 0x3c | ||
| 213 | #define USB_PID_ERR 0x3c | ||
| 214 | #define USB_PID_SPLIT 0x78 | ||
| 215 | #define USB_PID_PING 0xb4 | ||
| 216 | #define USB_PID_UNDEF_0 0xf0 | ||
| 217 | |||
| 218 | #define USB_PID_DATA_TOGGLE 0x88 | ||
| 219 | #define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE) | ||
| 220 | |||
| 221 | #define PCI_CAP_ID_EHCI_DEBUG 0xa | ||
| 222 | |||
| 223 | #define HUB_ROOT_RESET_TIME 50 /* times are in msec */ | ||
| 224 | #define HUB_SHORT_RESET_TIME 10 | ||
| 225 | #define HUB_LONG_RESET_TIME 200 | ||
| 226 | #define HUB_RESET_TIMEOUT 500 | ||
| 227 | |||
| 228 | #define DBGP_MAX_PACKET 8 | ||
| 229 | |||
| 230 | static int dbgp_wait_until_complete(void) | ||
| 231 | { | ||
| 232 | u32 ctrl; | ||
| 233 | int loop = 0x100000; | ||
| 234 | |||
| 235 | do { | ||
| 236 | ctrl = readl(&ehci_debug->control); | ||
| 237 | /* Stop when the transaction is finished */ | ||
| 238 | if (ctrl & DBGP_DONE) | ||
| 239 | break; | ||
| 240 | } while (--loop > 0); | ||
| 241 | |||
| 242 | if (!loop) | ||
| 243 | return -1; | ||
| 244 | |||
| 245 | /* | ||
| 246 | * Now that we have observed the completed transaction, | ||
| 247 | * clear the done bit. | ||
| 248 | */ | ||
| 249 | writel(ctrl | DBGP_DONE, &ehci_debug->control); | ||
| 250 | return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); | ||
| 251 | } | ||
| 252 | |||
| 253 | static void dbgp_mdelay(int ms) | ||
| 254 | { | ||
| 255 | int i; | ||
| 256 | |||
| 257 | while (ms--) { | ||
| 258 | for (i = 0; i < 1000; i++) | ||
| 259 | outb(0x1, 0x80); | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | static void dbgp_breath(void) | ||
| 264 | { | ||
| 265 | /* Sleep to give the debug port a chance to breathe */ | ||
| 266 | } | ||
| 267 | |||
| 268 | static int dbgp_wait_until_done(unsigned ctrl) | ||
| 269 | { | ||
| 270 | u32 pids, lpid; | ||
| 271 | int ret; | ||
| 272 | int loop = 3; | ||
| 273 | |||
| 274 | retry: | ||
| 275 | writel(ctrl | DBGP_GO, &ehci_debug->control); | ||
| 276 | ret = dbgp_wait_until_complete(); | ||
| 277 | pids = readl(&ehci_debug->pids); | ||
| 278 | lpid = DBGP_PID_GET(pids); | ||
| 279 | |||
| 280 | if (ret < 0) | ||
| 281 | return ret; | ||
| 282 | |||
| 283 | /* | ||
| 284 | * If the port is getting full or it has dropped data | ||
| 285 | * start pacing ourselves, not necessary but it's friendly. | ||
| 286 | */ | ||
| 287 | if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET)) | ||
| 288 | dbgp_breath(); | ||
| 289 | |||
| 290 | /* If I get a NACK reissue the transmission */ | ||
| 291 | if (lpid == USB_PID_NAK) { | ||
| 292 | if (--loop > 0) | ||
| 293 | goto retry; | ||
| 294 | } | ||
| 295 | |||
| 296 | return ret; | ||
| 297 | } | ||
| 298 | |||
| 299 | static void dbgp_set_data(const void *buf, int size) | ||
| 300 | { | ||
| 301 | const unsigned char *bytes = buf; | ||
| 302 | u32 lo, hi; | ||
| 303 | int i; | ||
| 304 | |||
| 305 | lo = hi = 0; | ||
| 306 | for (i = 0; i < 4 && i < size; i++) | ||
| 307 | lo |= bytes[i] << (8*i); | ||
| 308 | for (; i < 8 && i < size; i++) | ||
| 309 | hi |= bytes[i] << (8*(i - 4)); | ||
| 310 | writel(lo, &ehci_debug->data03); | ||
| 311 | writel(hi, &ehci_debug->data47); | ||
| 312 | } | ||
| 313 | |||
| 314 | static void dbgp_get_data(void *buf, int size) | ||
| 315 | { | ||
| 316 | unsigned char *bytes = buf; | ||
| 317 | u32 lo, hi; | ||
| 318 | int i; | ||
| 319 | |||
| 320 | lo = readl(&ehci_debug->data03); | ||
| 321 | hi = readl(&ehci_debug->data47); | ||
| 322 | for (i = 0; i < 4 && i < size; i++) | ||
| 323 | bytes[i] = (lo >> (8*i)) & 0xff; | ||
| 324 | for (; i < 8 && i < size; i++) | ||
| 325 | bytes[i] = (hi >> (8*(i - 4))) & 0xff; | ||
| 326 | } | ||
| 327 | |||
| 328 | static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, | ||
| 329 | const char *bytes, int size) | ||
| 330 | { | ||
| 331 | u32 pids, addr, ctrl; | ||
| 332 | int ret; | ||
| 333 | |||
| 334 | if (size > DBGP_MAX_PACKET) | ||
| 335 | return -1; | ||
| 336 | |||
| 337 | addr = DBGP_EPADDR(devnum, endpoint); | ||
| 338 | |||
| 339 | pids = readl(&ehci_debug->pids); | ||
| 340 | pids = dbgp_pid_update(pids, USB_PID_OUT); | ||
| 341 | |||
| 342 | ctrl = readl(&ehci_debug->control); | ||
| 343 | ctrl = dbgp_len_update(ctrl, size); | ||
| 344 | ctrl |= DBGP_OUT; | ||
| 345 | ctrl |= DBGP_GO; | ||
| 346 | |||
| 347 | dbgp_set_data(bytes, size); | ||
| 348 | writel(addr, &ehci_debug->address); | ||
| 349 | writel(pids, &ehci_debug->pids); | ||
| 350 | |||
| 351 | ret = dbgp_wait_until_done(ctrl); | ||
| 352 | if (ret < 0) | ||
| 353 | return ret; | ||
| 354 | |||
| 355 | return ret; | ||
| 356 | } | ||
| 357 | |||
| 358 | static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, | ||
| 359 | int size) | ||
| 360 | { | ||
| 361 | u32 pids, addr, ctrl; | ||
| 362 | int ret; | ||
| 363 | |||
| 364 | if (size > DBGP_MAX_PACKET) | ||
| 365 | return -1; | ||
| 366 | |||
| 367 | addr = DBGP_EPADDR(devnum, endpoint); | ||
| 368 | |||
| 369 | pids = readl(&ehci_debug->pids); | ||
| 370 | pids = dbgp_pid_update(pids, USB_PID_IN); | ||
| 371 | |||
| 372 | ctrl = readl(&ehci_debug->control); | ||
| 373 | ctrl = dbgp_len_update(ctrl, size); | ||
| 374 | ctrl &= ~DBGP_OUT; | ||
| 375 | ctrl |= DBGP_GO; | ||
| 376 | |||
| 377 | writel(addr, &ehci_debug->address); | ||
| 378 | writel(pids, &ehci_debug->pids); | ||
| 379 | ret = dbgp_wait_until_done(ctrl); | ||
| 380 | if (ret < 0) | ||
| 381 | return ret; | ||
| 382 | |||
| 383 | if (size > ret) | ||
| 384 | size = ret; | ||
| 385 | dbgp_get_data(data, size); | ||
| 386 | return ret; | ||
| 387 | } | ||
| 388 | |||
| 389 | static int dbgp_control_msg(unsigned devnum, int requesttype, int request, | ||
| 390 | int value, int index, void *data, int size) | ||
| 391 | { | ||
| 392 | u32 pids, addr, ctrl; | ||
| 393 | struct usb_ctrlrequest req; | ||
| 394 | int read; | ||
| 395 | int ret; | ||
| 396 | |||
| 397 | read = (requesttype & USB_DIR_IN) != 0; | ||
| 398 | if (size > (read ? DBGP_MAX_PACKET:0)) | ||
| 399 | return -1; | ||
| 400 | |||
| 401 | /* Compute the control message */ | ||
| 402 | req.bRequestType = requesttype; | ||
| 403 | req.bRequest = request; | ||
| 404 | req.wValue = cpu_to_le16(value); | ||
| 405 | req.wIndex = cpu_to_le16(index); | ||
| 406 | req.wLength = cpu_to_le16(size); | ||
| 407 | |||
| 408 | pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); | ||
| 409 | addr = DBGP_EPADDR(devnum, 0); | ||
| 410 | |||
| 411 | ctrl = readl(&ehci_debug->control); | ||
| 412 | ctrl = dbgp_len_update(ctrl, sizeof(req)); | ||
| 413 | ctrl |= DBGP_OUT; | ||
| 414 | ctrl |= DBGP_GO; | ||
| 415 | |||
| 416 | /* Send the setup message */ | ||
| 417 | dbgp_set_data(&req, sizeof(req)); | ||
| 418 | writel(addr, &ehci_debug->address); | ||
| 419 | writel(pids, &ehci_debug->pids); | ||
| 420 | ret = dbgp_wait_until_done(ctrl); | ||
| 421 | if (ret < 0) | ||
| 422 | return ret; | ||
| 423 | |||
| 424 | /* Read the result */ | ||
| 425 | return dbgp_bulk_read(devnum, 0, data, size); | ||
| 426 | } | ||
| 427 | |||
| 428 | |||
| 429 | /* Find a PCI capability */ | ||
| 430 | static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap) | ||
| 431 | { | ||
| 432 | u8 pos; | ||
| 433 | int bytes; | ||
| 434 | |||
| 435 | if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & | ||
| 436 | PCI_STATUS_CAP_LIST)) | ||
| 437 | return 0; | ||
| 438 | |||
| 439 | pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); | ||
| 440 | for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { | ||
| 441 | u8 id; | ||
| 442 | |||
| 443 | pos &= ~3; | ||
| 444 | id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); | ||
| 445 | if (id == 0xff) | ||
| 446 | break; | ||
| 447 | if (id == cap) | ||
| 448 | return pos; | ||
| 449 | |||
| 450 | pos = read_pci_config_byte(num, slot, func, | ||
| 451 | pos+PCI_CAP_LIST_NEXT); | ||
| 452 | } | ||
| 453 | return 0; | ||
| 454 | } | ||
| 455 | |||
| 456 | static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func) | ||
| 457 | { | ||
| 458 | u32 class; | ||
| 459 | |||
| 460 | class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); | ||
| 461 | if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI) | ||
| 462 | return 0; | ||
| 463 | |||
| 464 | return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG); | ||
| 465 | } | ||
| 466 | |||
| 467 | static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) | ||
| 468 | { | ||
| 469 | u32 bus, slot, func; | ||
| 470 | |||
| 471 | for (bus = 0; bus < 256; bus++) { | ||
| 472 | for (slot = 0; slot < 32; slot++) { | ||
| 473 | for (func = 0; func < 8; func++) { | ||
| 474 | unsigned cap; | ||
| 475 | |||
| 476 | cap = __find_dbgp(bus, slot, func); | ||
| 477 | |||
| 478 | if (!cap) | ||
| 479 | continue; | ||
| 480 | if (ehci_num-- != 0) | ||
| 481 | continue; | ||
| 482 | *rbus = bus; | ||
| 483 | *rslot = slot; | ||
| 484 | *rfunc = func; | ||
| 485 | return cap; | ||
| 486 | } | ||
| 487 | } | ||
| 488 | } | ||
| 489 | return 0; | ||
| 490 | } | ||
| 491 | |||
| 492 | static int ehci_reset_port(int port) | ||
| 493 | { | ||
| 494 | u32 portsc; | ||
| 495 | u32 delay_time, delay; | ||
| 496 | int loop; | ||
| 497 | |||
| 498 | /* Reset the usb debug port */ | ||
| 499 | portsc = readl(&ehci_regs->port_status[port - 1]); | ||
| 500 | portsc &= ~PORT_PE; | ||
| 501 | portsc |= PORT_RESET; | ||
| 502 | writel(portsc, &ehci_regs->port_status[port - 1]); | ||
| 503 | |||
| 504 | delay = HUB_ROOT_RESET_TIME; | ||
| 505 | for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; | ||
| 506 | delay_time += delay) { | ||
| 507 | dbgp_mdelay(delay); | ||
| 508 | |||
| 509 | portsc = readl(&ehci_regs->port_status[port - 1]); | ||
| 510 | if (portsc & PORT_RESET) { | ||
| 511 | /* force reset to complete */ | ||
| 512 | loop = 2; | ||
| 513 | writel(portsc & ~(PORT_RWC_BITS | PORT_RESET), | ||
| 514 | &ehci_regs->port_status[port - 1]); | ||
| 515 | do { | ||
| 516 | portsc = readl(&ehci_regs->port_status[port-1]); | ||
| 517 | } while ((portsc & PORT_RESET) && (--loop > 0)); | ||
| 518 | } | ||
| 519 | |||
| 520 | /* Device went away? */ | ||
| 521 | if (!(portsc & PORT_CONNECT)) | ||
| 522 | return -ENOTCONN; | ||
| 523 | |||
| 524 | /* bomb out completely if something weird happend */ | ||
| 525 | if ((portsc & PORT_CSC)) | ||
| 526 | return -EINVAL; | ||
| 527 | |||
| 528 | /* If we've finished resetting, then break out of the loop */ | ||
| 529 | if (!(portsc & PORT_RESET) && (portsc & PORT_PE)) | ||
| 530 | return 0; | ||
| 531 | } | ||
| 532 | return -EBUSY; | ||
| 533 | } | ||
| 534 | |||
| 535 | static int ehci_wait_for_port(int port) | ||
| 536 | { | ||
| 537 | u32 status; | ||
| 538 | int ret, reps; | ||
| 539 | |||
| 540 | for (reps = 0; reps < 3; reps++) { | ||
| 541 | dbgp_mdelay(100); | ||
| 542 | status = readl(&ehci_regs->status); | ||
| 543 | if (status & STS_PCD) { | ||
| 544 | ret = ehci_reset_port(port); | ||
| 545 | if (ret == 0) | ||
| 546 | return 0; | ||
| 547 | } | ||
| 548 | } | ||
| 549 | return -ENOTCONN; | ||
| 550 | } | ||
| 551 | |||
| 552 | #ifdef DBGP_DEBUG | ||
| 553 | # define dbgp_printk early_printk | ||
| 554 | #else | ||
| 555 | static inline void dbgp_printk(const char *fmt, ...) { } | ||
| 556 | #endif | ||
| 557 | |||
| 558 | typedef void (*set_debug_port_t)(int port); | ||
| 559 | |||
| 560 | static void default_set_debug_port(int port) | ||
| 561 | { | ||
| 562 | } | ||
| 563 | |||
| 564 | static set_debug_port_t set_debug_port = default_set_debug_port; | ||
| 565 | |||
| 566 | static void nvidia_set_debug_port(int port) | ||
| 567 | { | ||
| 568 | u32 dword; | ||
| 569 | dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, | ||
| 570 | 0x74); | ||
| 571 | dword &= ~(0x0f<<12); | ||
| 572 | dword |= ((port & 0x0f)<<12); | ||
| 573 | write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74, | ||
| 574 | dword); | ||
| 575 | dbgp_printk("set debug port to %d\n", port); | ||
| 576 | } | ||
| 577 | |||
| 578 | static void __init detect_set_debug_port(void) | ||
| 579 | { | ||
| 580 | u32 vendorid; | ||
| 581 | |||
| 582 | vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, | ||
| 583 | 0x00); | ||
| 584 | |||
| 585 | if ((vendorid & 0xffff) == 0x10de) { | ||
| 586 | dbgp_printk("using nvidia set_debug_port\n"); | ||
| 587 | set_debug_port = nvidia_set_debug_port; | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
| 591 | static int __init ehci_setup(void) | ||
| 592 | { | ||
| 593 | struct usb_debug_descriptor dbgp_desc; | ||
| 594 | u32 cmd, ctrl, status, portsc, hcs_params; | ||
| 595 | u32 debug_port, new_debug_port = 0, n_ports; | ||
| 596 | u32 devnum; | ||
| 597 | int ret, i; | ||
| 598 | int loop; | ||
| 599 | int port_map_tried; | ||
| 600 | int playtimes = 3; | ||
| 601 | |||
| 602 | try_next_time: | ||
| 603 | port_map_tried = 0; | ||
| 604 | |||
| 605 | try_next_port: | ||
| 606 | |||
| 607 | hcs_params = readl(&ehci_caps->hcs_params); | ||
| 608 | debug_port = HCS_DEBUG_PORT(hcs_params); | ||
| 609 | n_ports = HCS_N_PORTS(hcs_params); | ||
| 610 | |||
| 611 | dbgp_printk("debug_port: %d\n", debug_port); | ||
| 612 | dbgp_printk("n_ports: %d\n", n_ports); | ||
| 613 | |||
| 614 | for (i = 1; i <= n_ports; i++) { | ||
| 615 | portsc = readl(&ehci_regs->port_status[i-1]); | ||
| 616 | dbgp_printk("portstatus%d: %08x\n", i, portsc); | ||
| 617 | } | ||
| 618 | |||
| 619 | if (port_map_tried && (new_debug_port != debug_port)) { | ||
| 620 | if (--playtimes) { | ||
| 621 | set_debug_port(new_debug_port); | ||
| 622 | goto try_next_time; | ||
| 623 | } | ||
| 624 | return -1; | ||
| 625 | } | ||
| 626 | |||
| 627 | loop = 10; | ||
| 628 | /* Reset the EHCI controller */ | ||
| 629 | cmd = readl(&ehci_regs->command); | ||
| 630 | cmd |= CMD_RESET; | ||
| 631 | writel(cmd, &ehci_regs->command); | ||
| 632 | do { | ||
| 633 | cmd = readl(&ehci_regs->command); | ||
| 634 | } while ((cmd & CMD_RESET) && (--loop > 0)); | ||
| 635 | |||
| 636 | if (!loop) { | ||
| 637 | dbgp_printk("can not reset ehci\n"); | ||
| 638 | return -1; | ||
| 639 | } | ||
| 640 | dbgp_printk("ehci reset done\n"); | ||
| 641 | |||
| 642 | /* Claim ownership, but do not enable yet */ | ||
| 643 | ctrl = readl(&ehci_debug->control); | ||
| 644 | ctrl |= DBGP_OWNER; | ||
| 645 | ctrl &= ~(DBGP_ENABLED | DBGP_INUSE); | ||
| 646 | writel(ctrl, &ehci_debug->control); | ||
| 647 | |||
| 648 | /* Start the ehci running */ | ||
| 649 | cmd = readl(&ehci_regs->command); | ||
| 650 | cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET); | ||
| 651 | cmd |= CMD_RUN; | ||
| 652 | writel(cmd, &ehci_regs->command); | ||
| 653 | |||
| 654 | /* Ensure everything is routed to the EHCI */ | ||
| 655 | writel(FLAG_CF, &ehci_regs->configured_flag); | ||
| 656 | |||
| 657 | /* Wait until the controller is no longer halted */ | ||
| 658 | loop = 10; | ||
| 659 | do { | ||
| 660 | status = readl(&ehci_regs->status); | ||
| 661 | } while ((status & STS_HALT) && (--loop > 0)); | ||
| 662 | |||
| 663 | if (!loop) { | ||
| 664 | dbgp_printk("ehci can be started\n"); | ||
| 665 | return -1; | ||
| 666 | } | ||
| 667 | dbgp_printk("ehci started\n"); | ||
| 668 | |||
| 669 | /* Wait for a device to show up in the debug port */ | ||
| 670 | ret = ehci_wait_for_port(debug_port); | ||
| 671 | if (ret < 0) { | ||
| 672 | dbgp_printk("No device found in debug port\n"); | ||
| 673 | goto next_debug_port; | ||
| 674 | } | ||
| 675 | dbgp_printk("ehci wait for port done\n"); | ||
| 676 | |||
| 677 | /* Enable the debug port */ | ||
| 678 | ctrl = readl(&ehci_debug->control); | ||
| 679 | ctrl |= DBGP_CLAIM; | ||
| 680 | writel(ctrl, &ehci_debug->control); | ||
| 681 | ctrl = readl(&ehci_debug->control); | ||
| 682 | if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) { | ||
| 683 | dbgp_printk("No device in debug port\n"); | ||
| 684 | writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control); | ||
| 685 | goto err; | ||
| 686 | } | ||
| 687 | dbgp_printk("debug ported enabled\n"); | ||
| 688 | |||
| 689 | /* Completely transfer the debug device to the debug controller */ | ||
| 690 | portsc = readl(&ehci_regs->port_status[debug_port - 1]); | ||
| 691 | portsc &= ~PORT_PE; | ||
| 692 | writel(portsc, &ehci_regs->port_status[debug_port - 1]); | ||
| 693 | |||
| 694 | dbgp_mdelay(100); | ||
| 695 | |||
| 696 | /* Find the debug device and make it device number 127 */ | ||
| 697 | for (devnum = 0; devnum <= 127; devnum++) { | ||
| 698 | ret = dbgp_control_msg(devnum, | ||
| 699 | USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE, | ||
| 700 | USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0, | ||
| 701 | &dbgp_desc, sizeof(dbgp_desc)); | ||
| 702 | if (ret > 0) | ||
| 703 | break; | ||
| 704 | } | ||
| 705 | if (devnum > 127) { | ||
| 706 | dbgp_printk("Could not find attached debug device\n"); | ||
| 707 | goto err; | ||
| 708 | } | ||
| 709 | if (ret < 0) { | ||
| 710 | dbgp_printk("Attached device is not a debug device\n"); | ||
| 711 | goto err; | ||
| 712 | } | ||
| 713 | dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint; | ||
| 714 | |||
| 715 | /* Move the device to 127 if it isn't already there */ | ||
| 716 | if (devnum != USB_DEBUG_DEVNUM) { | ||
| 717 | ret = dbgp_control_msg(devnum, | ||
| 718 | USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, | ||
| 719 | USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0); | ||
| 720 | if (ret < 0) { | ||
| 721 | dbgp_printk("Could not move attached device to %d\n", | ||
| 722 | USB_DEBUG_DEVNUM); | ||
| 723 | goto err; | ||
| 724 | } | ||
| 725 | devnum = USB_DEBUG_DEVNUM; | ||
| 726 | dbgp_printk("debug device renamed to 127\n"); | ||
| 727 | } | ||
| 728 | |||
| 729 | /* Enable the debug interface */ | ||
| 730 | ret = dbgp_control_msg(USB_DEBUG_DEVNUM, | ||
| 731 | USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, | ||
| 732 | USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0); | ||
| 733 | if (ret < 0) { | ||
| 734 | dbgp_printk(" Could not enable the debug device\n"); | ||
| 735 | goto err; | ||
| 736 | } | ||
| 737 | dbgp_printk("debug interface enabled\n"); | ||
| 738 | |||
| 739 | /* Perform a small write to get the even/odd data state in sync | ||
| 740 | */ | ||
| 741 | ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1); | ||
| 742 | if (ret < 0) { | ||
| 743 | dbgp_printk("dbgp_bulk_write failed: %d\n", ret); | ||
| 744 | goto err; | ||
| 745 | } | ||
| 746 | dbgp_printk("small write doned\n"); | ||
| 747 | |||
| 748 | return 0; | ||
| 749 | err: | ||
| 750 | /* Things didn't work so remove my claim */ | ||
| 751 | ctrl = readl(&ehci_debug->control); | ||
| 752 | ctrl &= ~(DBGP_CLAIM | DBGP_OUT); | ||
| 753 | writel(ctrl, &ehci_debug->control); | ||
| 754 | return -1; | ||
| 755 | |||
| 756 | next_debug_port: | ||
| 757 | port_map_tried |= (1<<(debug_port - 1)); | ||
| 758 | new_debug_port = ((debug_port-1+1)%n_ports) + 1; | ||
| 759 | if (port_map_tried != ((1<<n_ports) - 1)) { | ||
| 760 | set_debug_port(new_debug_port); | ||
| 761 | goto try_next_port; | ||
| 762 | } | ||
| 763 | if (--playtimes) { | ||
| 764 | set_debug_port(new_debug_port); | ||
| 765 | goto try_next_time; | ||
| 766 | } | ||
| 767 | |||
| 768 | return -1; | ||
| 769 | } | ||
| 770 | |||
| 771 | static int __init early_dbgp_init(char *s) | ||
| 772 | { | ||
| 773 | u32 debug_port, bar, offset; | ||
| 774 | u32 bus, slot, func, cap; | ||
| 775 | void __iomem *ehci_bar; | ||
| 776 | u32 dbgp_num; | ||
| 777 | u32 bar_val; | ||
| 778 | char *e; | ||
| 779 | int ret; | ||
| 780 | u8 byte; | ||
| 781 | |||
| 782 | if (!early_pci_allowed()) | ||
| 783 | return -1; | ||
| 784 | |||
| 785 | dbgp_num = 0; | ||
| 786 | if (*s) | ||
| 787 | dbgp_num = simple_strtoul(s, &e, 10); | ||
| 788 | dbgp_printk("dbgp_num: %d\n", dbgp_num); | ||
| 789 | |||
| 790 | cap = find_dbgp(dbgp_num, &bus, &slot, &func); | ||
| 791 | if (!cap) | ||
| 792 | return -1; | ||
| 793 | |||
| 794 | dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot, | ||
| 795 | func); | ||
| 796 | |||
| 797 | debug_port = read_pci_config(bus, slot, func, cap); | ||
| 798 | bar = (debug_port >> 29) & 0x7; | ||
| 799 | bar = (bar * 4) + 0xc; | ||
| 800 | offset = (debug_port >> 16) & 0xfff; | ||
| 801 | dbgp_printk("bar: %02x offset: %03x\n", bar, offset); | ||
| 802 | if (bar != PCI_BASE_ADDRESS_0) { | ||
| 803 | dbgp_printk("only debug ports on bar 1 handled.\n"); | ||
| 804 | |||
| 805 | return -1; | ||
| 806 | } | ||
| 807 | |||
| 808 | bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); | ||
| 809 | dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset); | ||
| 810 | if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) { | ||
| 811 | dbgp_printk("only simple 32bit mmio bars supported\n"); | ||
| 812 | |||
| 813 | return -1; | ||
| 814 | } | ||
| 815 | |||
| 816 | /* double check if the mem space is enabled */ | ||
| 817 | byte = read_pci_config_byte(bus, slot, func, 0x04); | ||
| 818 | if (!(byte & 0x2)) { | ||
| 819 | byte |= 0x02; | ||
| 820 | write_pci_config_byte(bus, slot, func, 0x04, byte); | ||
| 821 | dbgp_printk("mmio for ehci enabled\n"); | ||
| 822 | } | ||
| 823 | |||
| 824 | /* | ||
| 825 | * FIXME I don't have the bar size so just guess PAGE_SIZE is more | ||
| 826 | * than enough. 1K is the biggest I have seen. | ||
| 827 | */ | ||
| 828 | set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); | ||
| 829 | ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); | ||
| 830 | ehci_bar += bar_val & ~PAGE_MASK; | ||
| 831 | dbgp_printk("ehci_bar: %p\n", ehci_bar); | ||
| 832 | |||
| 833 | ehci_caps = ehci_bar; | ||
| 834 | ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase)); | ||
| 835 | ehci_debug = ehci_bar + offset; | ||
| 836 | ehci_dev.bus = bus; | ||
| 837 | ehci_dev.slot = slot; | ||
| 838 | ehci_dev.func = func; | ||
| 839 | |||
| 840 | detect_set_debug_port(); | ||
| 841 | |||
| 842 | ret = ehci_setup(); | ||
| 843 | if (ret < 0) { | ||
| 844 | dbgp_printk("ehci_setup failed\n"); | ||
| 845 | ehci_debug = NULL; | ||
| 846 | |||
| 847 | return -1; | ||
| 848 | } | ||
| 849 | |||
| 850 | return 0; | ||
| 851 | } | ||
| 852 | |||
| 853 | static void early_dbgp_write(struct console *con, const char *str, u32 n) | ||
| 854 | { | ||
| 855 | int chunk, ret; | ||
| 856 | |||
| 857 | if (!ehci_debug) | ||
| 858 | return; | ||
| 859 | while (n > 0) { | ||
| 860 | chunk = n; | ||
| 861 | if (chunk > DBGP_MAX_PACKET) | ||
| 862 | chunk = DBGP_MAX_PACKET; | ||
| 863 | ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, | ||
| 864 | dbgp_endpoint_out, str, chunk); | ||
| 865 | str += chunk; | ||
| 866 | n -= chunk; | ||
| 867 | } | ||
| 868 | } | ||
| 869 | |||
| 870 | static struct console early_dbgp_console = { | ||
| 871 | .name = "earlydbg", | ||
| 872 | .write = early_dbgp_write, | ||
| 873 | .flags = CON_PRINTBUFFER, | ||
| 874 | .index = -1, | ||
| 875 | }; | ||
| 876 | #endif | ||
| 877 | |||
| 154 | /* Console interface to a host file on AMD's SimNow! */ | 878 | /* Console interface to a host file on AMD's SimNow! */ |
| 155 | 879 | ||
| 156 | static int simnow_fd; | 880 | static int simnow_fd; |
| @@ -165,6 +889,7 @@ enum { | |||
| 165 | static noinline long simnow(long cmd, long a, long b, long c) | 889 | static noinline long simnow(long cmd, long a, long b, long c) |
| 166 | { | 890 | { |
| 167 | long ret; | 891 | long ret; |
| 892 | |||
| 168 | asm volatile("cpuid" : | 893 | asm volatile("cpuid" : |
| 169 | "=a" (ret) : | 894 | "=a" (ret) : |
| 170 | "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); | 895 | "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); |
| @@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c) | |||
| 174 | static void __init simnow_init(char *str) | 899 | static void __init simnow_init(char *str) |
| 175 | { | 900 | { |
| 176 | char *fn = "klog"; | 901 | char *fn = "klog"; |
| 902 | |||
| 177 | if (*str == '=') | 903 | if (*str == '=') |
| 178 | fn = ++str; | 904 | fn = ++str; |
| 179 | /* error ignored */ | 905 | /* error ignored */ |
| @@ -194,7 +920,7 @@ static struct console simnow_console = { | |||
| 194 | 920 | ||
| 195 | /* Direct interface for emergencies */ | 921 | /* Direct interface for emergencies */ |
| 196 | static struct console *early_console = &early_vga_console; | 922 | static struct console *early_console = &early_vga_console; |
| 197 | static int early_console_initialized; | 923 | static int __initdata early_console_initialized; |
| 198 | 924 | ||
| 199 | asmlinkage void early_printk(const char *fmt, ...) | 925 | asmlinkage void early_printk(const char *fmt, ...) |
| 200 | { | 926 | { |
| @@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...) | |||
| 208 | va_end(ap); | 934 | va_end(ap); |
| 209 | } | 935 | } |
| 210 | 936 | ||
| 211 | static int __initdata keep_early; | ||
| 212 | 937 | ||
| 213 | static int __init setup_early_printk(char *buf) | 938 | static int __init setup_early_printk(char *buf) |
| 214 | { | 939 | { |
| 940 | int keep_early; | ||
| 941 | |||
| 215 | if (!buf) | 942 | if (!buf) |
| 216 | return 0; | 943 | return 0; |
| 217 | 944 | ||
| @@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf) | |||
| 219 | return 0; | 946 | return 0; |
| 220 | early_console_initialized = 1; | 947 | early_console_initialized = 1; |
| 221 | 948 | ||
| 222 | if (strstr(buf, "keep")) | 949 | keep_early = (strstr(buf, "keep") != NULL); |
| 223 | keep_early = 1; | ||
| 224 | 950 | ||
| 225 | if (!strncmp(buf, "serial", 6)) { | 951 | if (!strncmp(buf, "serial", 6)) { |
| 226 | early_serial_init(buf + 6); | 952 | early_serial_init(buf + 6); |
| @@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf) | |||
| 238 | simnow_init(buf + 6); | 964 | simnow_init(buf + 6); |
| 239 | early_console = &simnow_console; | 965 | early_console = &simnow_console; |
| 240 | keep_early = 1; | 966 | keep_early = 1; |
| 967 | #ifdef CONFIG_EARLY_PRINTK_DBGP | ||
| 968 | } else if (!strncmp(buf, "dbgp", 4)) { | ||
| 969 | if (early_dbgp_init(buf+4) < 0) | ||
| 970 | return 0; | ||
| 971 | early_console = &early_dbgp_console; | ||
| 972 | /* | ||
| 973 | * usb subsys will reset ehci controller, so don't keep | ||
| 974 | * that early console | ||
| 975 | */ | ||
| 976 | keep_early = 0; | ||
| 977 | #endif | ||
| 241 | #ifdef CONFIG_HVC_XEN | 978 | #ifdef CONFIG_HVC_XEN |
| 242 | } else if (!strncmp(buf, "xen", 3)) { | 979 | } else if (!strncmp(buf, "xen", 3)) { |
| 243 | early_console = &xenboot_console; | 980 | early_console = &xenboot_console; |
| @@ -251,4 +988,5 @@ static int __init setup_early_printk(char *buf) | |||
| 251 | register_console(early_console); | 988 | register_console(early_console); |
| 252 | return 0; | 989 | return 0; |
| 253 | } | 990 | } |
| 991 | |||
| 254 | early_param("earlyprintk", setup_early_printk); | 992 | early_param("earlyprintk", setup_early_printk); |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 06cc8d4254b1..1119d247fe11 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
| @@ -367,6 +367,10 @@ void __init efi_init(void) | |||
| 367 | efi.smbios = config_tables[i].table; | 367 | efi.smbios = config_tables[i].table; |
| 368 | printk(" SMBIOS=0x%lx ", config_tables[i].table); | 368 | printk(" SMBIOS=0x%lx ", config_tables[i].table); |
| 369 | } else if (!efi_guidcmp(config_tables[i].guid, | 369 | } else if (!efi_guidcmp(config_tables[i].guid, |
| 370 | UV_SYSTEM_TABLE_GUID)) { | ||
| 371 | efi.uv_systab = config_tables[i].table; | ||
| 372 | printk(" UVsystab=0x%lx ", config_tables[i].table); | ||
| 373 | } else if (!efi_guidcmp(config_tables[i].guid, | ||
| 370 | HCDP_TABLE_GUID)) { | 374 | HCDP_TABLE_GUID)) { |
| 371 | efi.hcdp = config_tables[i].table; | 375 | efi.hcdp = config_tables[i].table; |
| 372 | printk(" HCDP=0x%lx ", config_tables[i].table); | 376 | printk(" HCDP=0x%lx ", config_tables[i].table); |
| @@ -414,9 +418,11 @@ void __init efi_init(void) | |||
| 414 | if (memmap.map == NULL) | 418 | if (memmap.map == NULL) |
| 415 | printk(KERN_ERR "Could not map the EFI memory map!\n"); | 419 | printk(KERN_ERR "Could not map the EFI memory map!\n"); |
| 416 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); | 420 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); |
| 421 | |||
| 417 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) | 422 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) |
| 418 | printk(KERN_WARNING "Kernel-defined memdesc" | 423 | printk(KERN_WARNING |
| 419 | "doesn't match the one from EFI!\n"); | 424 | "Kernel-defined memdesc doesn't match the one from EFI!\n"); |
| 425 | |||
| 420 | if (add_efi_memmap) | 426 | if (add_efi_memmap) |
| 421 | do_add_efi_memmap(); | 427 | do_add_efi_memmap(); |
| 422 | 428 | ||
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 4b63c8e1f13b..5cab48ee61a4 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c | |||
| @@ -53,7 +53,7 @@ void efi_call_phys_prelog(void) | |||
| 53 | * directory. If I have PAE, I just need to duplicate one entry in | 53 | * directory. If I have PAE, I just need to duplicate one entry in |
| 54 | * page directory. | 54 | * page directory. |
| 55 | */ | 55 | */ |
| 56 | cr4 = read_cr4(); | 56 | cr4 = read_cr4_safe(); |
| 57 | 57 | ||
| 58 | if (cr4 & X86_CR4_PAE) { | 58 | if (cr4 & X86_CR4_PAE) { |
| 59 | efi_bak_pg_dir_pointer[0].pgd = | 59 | efi_bak_pg_dir_pointer[0].pgd = |
| @@ -91,7 +91,7 @@ void efi_call_phys_epilog(void) | |||
| 91 | gdt_descr.size = GDT_SIZE - 1; | 91 | gdt_descr.size = GDT_SIZE - 1; |
| 92 | load_gdt(&gdt_descr); | 92 | load_gdt(&gdt_descr); |
| 93 | 93 | ||
| 94 | cr4 = read_cr4(); | 94 | cr4 = read_cr4_safe(); |
| 95 | 95 | ||
| 96 | if (cr4 & X86_CR4_PAE) { | 96 | if (cr4 & X86_CR4_PAE) { |
| 97 | swapper_pg_dir[pgd_index(0)].pgd = | 97 | swapper_pg_dir[pgd_index(0)].pgd = |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6bc07f0f1202..dd65143941a8 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -54,6 +54,16 @@ | |||
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
| 56 | 56 | ||
| 57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
| 58 | #include <linux/elf-em.h> | ||
| 59 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | ||
| 60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
| 61 | |||
| 62 | #ifndef CONFIG_AUDITSYSCALL | ||
| 63 | #define sysenter_audit syscall_trace_entry | ||
| 64 | #define sysexit_audit syscall_exit_work | ||
| 65 | #endif | ||
| 66 | |||
| 57 | /* | 67 | /* |
| 58 | * We use macros for low-level operations which need to be overridden | 68 | * We use macros for low-level operations which need to be overridden |
| 59 | * for paravirtualization. The following will never clobber any registers: | 69 | * for paravirtualization. The following will never clobber any registers: |
| @@ -332,8 +342,9 @@ sysenter_past_esp: | |||
| 332 | GET_THREAD_INFO(%ebp) | 342 | GET_THREAD_INFO(%ebp) |
| 333 | 343 | ||
| 334 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 344 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
| 335 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 345 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
| 336 | jnz syscall_trace_entry | 346 | jnz sysenter_audit |
| 347 | sysenter_do_call: | ||
| 337 | cmpl $(nr_syscalls), %eax | 348 | cmpl $(nr_syscalls), %eax |
| 338 | jae syscall_badsys | 349 | jae syscall_badsys |
| 339 | call *sys_call_table(,%eax,4) | 350 | call *sys_call_table(,%eax,4) |
| @@ -343,7 +354,8 @@ sysenter_past_esp: | |||
| 343 | TRACE_IRQS_OFF | 354 | TRACE_IRQS_OFF |
| 344 | movl TI_flags(%ebp), %ecx | 355 | movl TI_flags(%ebp), %ecx |
| 345 | testw $_TIF_ALLWORK_MASK, %cx | 356 | testw $_TIF_ALLWORK_MASK, %cx |
| 346 | jne syscall_exit_work | 357 | jne sysexit_audit |
| 358 | sysenter_exit: | ||
| 347 | /* if something modifies registers it must also disable sysexit */ | 359 | /* if something modifies registers it must also disable sysexit */ |
| 348 | movl PT_EIP(%esp), %edx | 360 | movl PT_EIP(%esp), %edx |
| 349 | movl PT_OLDESP(%esp), %ecx | 361 | movl PT_OLDESP(%esp), %ecx |
| @@ -351,6 +363,45 @@ sysenter_past_esp: | |||
| 351 | TRACE_IRQS_ON | 363 | TRACE_IRQS_ON |
| 352 | 1: mov PT_FS(%esp), %fs | 364 | 1: mov PT_FS(%esp), %fs |
| 353 | ENABLE_INTERRUPTS_SYSEXIT | 365 | ENABLE_INTERRUPTS_SYSEXIT |
| 366 | |||
| 367 | #ifdef CONFIG_AUDITSYSCALL | ||
| 368 | sysenter_audit: | ||
| 369 | testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | ||
| 370 | jnz syscall_trace_entry | ||
| 371 | addl $4,%esp | ||
| 372 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 373 | /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ | ||
| 374 | /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ | ||
| 375 | /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ | ||
| 376 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | ||
| 377 | movl %eax,%edx /* 2nd arg: syscall number */ | ||
| 378 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | ||
| 379 | call audit_syscall_entry | ||
| 380 | pushl %ebx | ||
| 381 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 382 | movl PT_EAX(%esp),%eax /* reload syscall number */ | ||
| 383 | jmp sysenter_do_call | ||
| 384 | |||
| 385 | sysexit_audit: | ||
| 386 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
| 387 | jne syscall_exit_work | ||
| 388 | TRACE_IRQS_ON | ||
| 389 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
| 390 | movl %eax,%edx /* second arg, syscall return value */ | ||
| 391 | cmpl $0,%eax /* is it < 0? */ | ||
| 392 | setl %al /* 1 if so, 0 if not */ | ||
| 393 | movzbl %al,%eax /* zero-extend that */ | ||
| 394 | inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
| 395 | call audit_syscall_exit | ||
| 396 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
| 397 | TRACE_IRQS_OFF | ||
| 398 | movl TI_flags(%ebp), %ecx | ||
| 399 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
| 400 | jne syscall_exit_work | ||
| 401 | movl PT_EAX(%esp),%eax /* reload syscall return value */ | ||
| 402 | jmp sysenter_exit | ||
| 403 | #endif | ||
| 404 | |||
| 354 | CFI_ENDPROC | 405 | CFI_ENDPROC |
| 355 | .pushsection .fixup,"ax" | 406 | .pushsection .fixup,"ax" |
| 356 | 2: movl $0,PT_FS(%esp) | 407 | 2: movl $0,PT_FS(%esp) |
| @@ -370,7 +421,7 @@ ENTRY(system_call) | |||
| 370 | GET_THREAD_INFO(%ebp) | 421 | GET_THREAD_INFO(%ebp) |
| 371 | # system call tracing in operation / emulation | 422 | # system call tracing in operation / emulation |
| 372 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 423 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
| 373 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 424 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
| 374 | jnz syscall_trace_entry | 425 | jnz syscall_trace_entry |
| 375 | cmpl $(nr_syscalls), %eax | 426 | cmpl $(nr_syscalls), %eax |
| 376 | jae syscall_badsys | 427 | jae syscall_badsys |
| @@ -383,10 +434,6 @@ syscall_exit: | |||
| 383 | # setting need_resched or sigpending | 434 | # setting need_resched or sigpending |
| 384 | # between sampling and the iret | 435 | # between sampling and the iret |
| 385 | TRACE_IRQS_OFF | 436 | TRACE_IRQS_OFF |
| 386 | testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit | ||
| 387 | jz no_singlestep | ||
| 388 | orl $_TIF_SINGLESTEP,TI_flags(%ebp) | ||
| 389 | no_singlestep: | ||
| 390 | movl TI_flags(%ebp), %ecx | 437 | movl TI_flags(%ebp), %ecx |
| 391 | testw $_TIF_ALLWORK_MASK, %cx # current->work | 438 | testw $_TIF_ALLWORK_MASK, %cx # current->work |
| 392 | jne syscall_exit_work | 439 | jne syscall_exit_work |
| @@ -514,12 +561,8 @@ END(work_pending) | |||
| 514 | syscall_trace_entry: | 561 | syscall_trace_entry: |
| 515 | movl $-ENOSYS,PT_EAX(%esp) | 562 | movl $-ENOSYS,PT_EAX(%esp) |
| 516 | movl %esp, %eax | 563 | movl %esp, %eax |
| 517 | xorl %edx,%edx | 564 | call syscall_trace_enter |
| 518 | call do_syscall_trace | 565 | /* What it returned is what we'll actually use. */ |
| 519 | cmpl $0, %eax | ||
| 520 | jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | ||
| 521 | # so must skip actual syscall | ||
| 522 | movl PT_ORIG_EAX(%esp), %eax | ||
| 523 | cmpl $(nr_syscalls), %eax | 566 | cmpl $(nr_syscalls), %eax |
| 524 | jnae syscall_call | 567 | jnae syscall_call |
| 525 | jmp syscall_exit | 568 | jmp syscall_exit |
| @@ -528,14 +571,13 @@ END(syscall_trace_entry) | |||
| 528 | # perform syscall exit tracing | 571 | # perform syscall exit tracing |
| 529 | ALIGN | 572 | ALIGN |
| 530 | syscall_exit_work: | 573 | syscall_exit_work: |
| 531 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | 574 | testb $_TIF_WORK_SYSCALL_EXIT, %cl |
| 532 | jz work_pending | 575 | jz work_pending |
| 533 | TRACE_IRQS_ON | 576 | TRACE_IRQS_ON |
| 534 | ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call | 577 | ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call |
| 535 | # schedule() instead | 578 | # schedule() instead |
| 536 | movl %esp, %eax | 579 | movl %esp, %eax |
| 537 | movl $1, %edx | 580 | call syscall_trace_leave |
| 538 | call do_syscall_trace | ||
| 539 | jmp resume_userspace | 581 | jmp resume_userspace |
| 540 | END(syscall_exit_work) | 582 | END(syscall_exit_work) |
| 541 | CFI_ENDPROC | 583 | CFI_ENDPROC |
| @@ -587,7 +629,7 @@ ENTRY(interrupt) | |||
| 587 | ENTRY(irq_entries_start) | 629 | ENTRY(irq_entries_start) |
| 588 | RING0_INT_FRAME | 630 | RING0_INT_FRAME |
| 589 | vector=0 | 631 | vector=0 |
| 590 | .rept NR_IRQS | 632 | .rept NR_VECTORS |
| 591 | ALIGN | 633 | ALIGN |
| 592 | .if vector | 634 | .if vector |
| 593 | CFI_ADJUST_CFA_OFFSET -4 | 635 | CFI_ADJUST_CFA_OFFSET -4 |
| @@ -688,6 +730,7 @@ error_code: | |||
| 688 | movl $(__USER_DS), %ecx | 730 | movl $(__USER_DS), %ecx |
| 689 | movl %ecx, %ds | 731 | movl %ecx, %ds |
| 690 | movl %ecx, %es | 732 | movl %ecx, %es |
| 733 | TRACE_IRQS_OFF | ||
| 691 | movl %esp,%eax # pt_regs pointer | 734 | movl %esp,%eax # pt_regs pointer |
| 692 | call *%edi | 735 | call *%edi |
| 693 | jmp ret_from_exception | 736 | jmp ret_from_exception |
| @@ -718,20 +761,9 @@ ENTRY(device_not_available) | |||
| 718 | RING0_INT_FRAME | 761 | RING0_INT_FRAME |
| 719 | pushl $-1 # mark this as an int | 762 | pushl $-1 # mark this as an int |
| 720 | CFI_ADJUST_CFA_OFFSET 4 | 763 | CFI_ADJUST_CFA_OFFSET 4 |
| 721 | SAVE_ALL | 764 | pushl $do_device_not_available |
| 722 | GET_CR0_INTO_EAX | ||
| 723 | testl $0x4, %eax # EM (math emulation bit) | ||
| 724 | jne device_not_available_emulate | ||
| 725 | preempt_stop(CLBR_ANY) | ||
| 726 | call math_state_restore | ||
| 727 | jmp ret_from_exception | ||
| 728 | device_not_available_emulate: | ||
| 729 | pushl $0 # temporary storage for ORIG_EIP | ||
| 730 | CFI_ADJUST_CFA_OFFSET 4 | 765 | CFI_ADJUST_CFA_OFFSET 4 |
| 731 | call math_emulate | 766 | jmp error_code |
| 732 | addl $4, %esp | ||
| 733 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 734 | jmp ret_from_exception | ||
| 735 | CFI_ENDPROC | 767 | CFI_ENDPROC |
| 736 | END(device_not_available) | 768 | END(device_not_available) |
| 737 | 769 | ||
| @@ -772,6 +804,7 @@ debug_stack_correct: | |||
| 772 | pushl $-1 # mark this as an int | 804 | pushl $-1 # mark this as an int |
| 773 | CFI_ADJUST_CFA_OFFSET 4 | 805 | CFI_ADJUST_CFA_OFFSET 4 |
| 774 | SAVE_ALL | 806 | SAVE_ALL |
| 807 | TRACE_IRQS_OFF | ||
| 775 | xorl %edx,%edx # error code 0 | 808 | xorl %edx,%edx # error code 0 |
| 776 | movl %esp,%eax # pt_regs pointer | 809 | movl %esp,%eax # pt_regs pointer |
| 777 | call do_debug | 810 | call do_debug |
| @@ -816,6 +849,7 @@ nmi_stack_correct: | |||
| 816 | pushl %eax | 849 | pushl %eax |
| 817 | CFI_ADJUST_CFA_OFFSET 4 | 850 | CFI_ADJUST_CFA_OFFSET 4 |
| 818 | SAVE_ALL | 851 | SAVE_ALL |
| 852 | TRACE_IRQS_OFF | ||
| 819 | xorl %edx,%edx # zero error code | 853 | xorl %edx,%edx # zero error code |
| 820 | movl %esp,%eax # pt_regs pointer | 854 | movl %esp,%eax # pt_regs pointer |
| 821 | call do_nmi | 855 | call do_nmi |
| @@ -856,6 +890,7 @@ nmi_espfix_stack: | |||
| 856 | pushl %eax | 890 | pushl %eax |
| 857 | CFI_ADJUST_CFA_OFFSET 4 | 891 | CFI_ADJUST_CFA_OFFSET 4 |
| 858 | SAVE_ALL | 892 | SAVE_ALL |
| 893 | TRACE_IRQS_OFF | ||
| 859 | FIXUP_ESPFIX_STACK # %eax == %esp | 894 | FIXUP_ESPFIX_STACK # %eax == %esp |
| 860 | xorl %edx,%edx # zero error code | 895 | xorl %edx,%edx # zero error code |
| 861 | call do_nmi | 896 | call do_nmi |
| @@ -886,6 +921,7 @@ KPROBE_ENTRY(int3) | |||
| 886 | pushl $-1 # mark this as an int | 921 | pushl $-1 # mark this as an int |
| 887 | CFI_ADJUST_CFA_OFFSET 4 | 922 | CFI_ADJUST_CFA_OFFSET 4 |
| 888 | SAVE_ALL | 923 | SAVE_ALL |
| 924 | TRACE_IRQS_OFF | ||
| 889 | xorl %edx,%edx # zero error code | 925 | xorl %edx,%edx # zero error code |
| 890 | movl %esp,%eax # pt_regs pointer | 926 | movl %esp,%eax # pt_regs pointer |
| 891 | call do_int3 | 927 | call do_int3 |
| @@ -1024,6 +1060,7 @@ ENDPROC(kernel_thread_helper) | |||
| 1024 | ENTRY(xen_sysenter_target) | 1060 | ENTRY(xen_sysenter_target) |
| 1025 | RING0_INT_FRAME | 1061 | RING0_INT_FRAME |
| 1026 | addl $5*4, %esp /* remove xen-provided frame */ | 1062 | addl $5*4, %esp /* remove xen-provided frame */ |
| 1063 | CFI_ADJUST_CFA_OFFSET -5*4 | ||
| 1027 | jmp sysenter_past_esp | 1064 | jmp sysenter_past_esp |
| 1028 | CFI_ENDPROC | 1065 | CFI_ENDPROC |
| 1029 | 1066 | ||
| @@ -1116,20 +1153,6 @@ ENDPROC(xen_failsafe_callback) | |||
| 1116 | #ifdef CONFIG_DYNAMIC_FTRACE | 1153 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 1117 | 1154 | ||
| 1118 | ENTRY(mcount) | 1155 | ENTRY(mcount) |
| 1119 | pushl %eax | ||
| 1120 | pushl %ecx | ||
| 1121 | pushl %edx | ||
| 1122 | movl 0xc(%esp), %eax | ||
| 1123 | subl $MCOUNT_INSN_SIZE, %eax | ||
| 1124 | |||
| 1125 | .globl mcount_call | ||
| 1126 | mcount_call: | ||
| 1127 | call ftrace_stub | ||
| 1128 | |||
| 1129 | popl %edx | ||
| 1130 | popl %ecx | ||
| 1131 | popl %eax | ||
| 1132 | |||
| 1133 | ret | 1156 | ret |
| 1134 | END(mcount) | 1157 | END(mcount) |
| 1135 | 1158 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ae63e584c340..09e7145484c5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -53,37 +53,17 @@ | |||
| 53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | 55 | ||
| 56 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
| 57 | #include <linux/elf-em.h> | ||
| 58 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) | ||
| 59 | #define __AUDIT_ARCH_64BIT 0x80000000 | ||
| 60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
| 61 | |||
| 56 | .code64 | 62 | .code64 |
| 57 | 63 | ||
| 58 | #ifdef CONFIG_FTRACE | 64 | #ifdef CONFIG_FTRACE |
| 59 | #ifdef CONFIG_DYNAMIC_FTRACE | 65 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 60 | ENTRY(mcount) | 66 | ENTRY(mcount) |
| 61 | |||
| 62 | subq $0x38, %rsp | ||
| 63 | movq %rax, (%rsp) | ||
| 64 | movq %rcx, 8(%rsp) | ||
| 65 | movq %rdx, 16(%rsp) | ||
| 66 | movq %rsi, 24(%rsp) | ||
| 67 | movq %rdi, 32(%rsp) | ||
| 68 | movq %r8, 40(%rsp) | ||
| 69 | movq %r9, 48(%rsp) | ||
| 70 | |||
| 71 | movq 0x38(%rsp), %rdi | ||
| 72 | subq $MCOUNT_INSN_SIZE, %rdi | ||
| 73 | |||
| 74 | .globl mcount_call | ||
| 75 | mcount_call: | ||
| 76 | call ftrace_stub | ||
| 77 | |||
| 78 | movq 48(%rsp), %r9 | ||
| 79 | movq 40(%rsp), %r8 | ||
| 80 | movq 32(%rsp), %rdi | ||
| 81 | movq 24(%rsp), %rsi | ||
| 82 | movq 16(%rsp), %rdx | ||
| 83 | movq 8(%rsp), %rcx | ||
| 84 | movq (%rsp), %rax | ||
| 85 | addq $0x38, %rsp | ||
| 86 | |||
| 87 | retq | 67 | retq |
| 88 | END(mcount) | 68 | END(mcount) |
| 89 | 69 | ||
| @@ -269,9 +249,9 @@ ENTRY(native_usergs_sysret64) | |||
| 269 | ENTRY(ret_from_fork) | 249 | ENTRY(ret_from_fork) |
| 270 | CFI_DEFAULT_STACK | 250 | CFI_DEFAULT_STACK |
| 271 | push kernel_eflags(%rip) | 251 | push kernel_eflags(%rip) |
| 272 | CFI_ADJUST_CFA_OFFSET 4 | 252 | CFI_ADJUST_CFA_OFFSET 8 |
| 273 | popf # reset kernel eflags | 253 | popf # reset kernel eflags |
| 274 | CFI_ADJUST_CFA_OFFSET -4 | 254 | CFI_ADJUST_CFA_OFFSET -8 |
| 275 | call schedule_tail | 255 | call schedule_tail |
| 276 | GET_THREAD_INFO(%rcx) | 256 | GET_THREAD_INFO(%rcx) |
| 277 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 257 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) |
| @@ -349,9 +329,9 @@ ENTRY(system_call_after_swapgs) | |||
| 349 | movq %rcx,RIP-ARGOFFSET(%rsp) | 329 | movq %rcx,RIP-ARGOFFSET(%rsp) |
| 350 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 330 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
| 351 | GET_THREAD_INFO(%rcx) | 331 | GET_THREAD_INFO(%rcx) |
| 352 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ | 332 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) |
| 353 | TI_flags(%rcx) | ||
| 354 | jnz tracesys | 333 | jnz tracesys |
| 334 | system_call_fastpath: | ||
| 355 | cmpq $__NR_syscall_max,%rax | 335 | cmpq $__NR_syscall_max,%rax |
| 356 | ja badsys | 336 | ja badsys |
| 357 | movq %r10,%rcx | 337 | movq %r10,%rcx |
| @@ -403,16 +383,16 @@ sysret_careful: | |||
| 403 | sysret_signal: | 383 | sysret_signal: |
| 404 | TRACE_IRQS_ON | 384 | TRACE_IRQS_ON |
| 405 | ENABLE_INTERRUPTS(CLBR_NONE) | 385 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 406 | testl $_TIF_DO_NOTIFY_MASK,%edx | 386 | #ifdef CONFIG_AUDITSYSCALL |
| 407 | jz 1f | 387 | bt $TIF_SYSCALL_AUDIT,%edx |
| 408 | 388 | jc sysret_audit | |
| 409 | /* Really a signal */ | 389 | #endif |
| 410 | /* edx: work flags (arg3) */ | 390 | /* edx: work flags (arg3) */ |
| 411 | leaq do_notify_resume(%rip),%rax | 391 | leaq do_notify_resume(%rip),%rax |
| 412 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 392 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
| 413 | xorl %esi,%esi # oldset -> arg2 | 393 | xorl %esi,%esi # oldset -> arg2 |
| 414 | call ptregscall_common | 394 | call ptregscall_common |
| 415 | 1: movl $_TIF_WORK_MASK,%edi | 395 | movl $_TIF_WORK_MASK,%edi |
| 416 | /* Use IRET because user could have changed frame. This | 396 | /* Use IRET because user could have changed frame. This |
| 417 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 397 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
| 418 | DISABLE_INTERRUPTS(CLBR_NONE) | 398 | DISABLE_INTERRUPTS(CLBR_NONE) |
| @@ -423,14 +403,56 @@ badsys: | |||
| 423 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 403 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
| 424 | jmp ret_from_sys_call | 404 | jmp ret_from_sys_call |
| 425 | 405 | ||
| 406 | #ifdef CONFIG_AUDITSYSCALL | ||
| 407 | /* | ||
| 408 | * Fast path for syscall audit without full syscall trace. | ||
| 409 | * We just call audit_syscall_entry() directly, and then | ||
| 410 | * jump back to the normal fast path. | ||
| 411 | */ | ||
| 412 | auditsys: | ||
| 413 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ | ||
| 414 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ | ||
| 415 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ | ||
| 416 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | ||
| 417 | movq %rax,%rsi /* 2nd arg: syscall number */ | ||
| 418 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | ||
| 419 | call audit_syscall_entry | ||
| 420 | LOAD_ARGS 0 /* reload call-clobbered registers */ | ||
| 421 | jmp system_call_fastpath | ||
| 422 | |||
| 423 | /* | ||
| 424 | * Return fast path for syscall audit. Call audit_syscall_exit() | ||
| 425 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | ||
| 426 | * masked off. | ||
| 427 | */ | ||
| 428 | sysret_audit: | ||
| 429 | movq %rax,%rsi /* second arg, syscall return value */ | ||
| 430 | cmpq $0,%rax /* is it < 0? */ | ||
| 431 | setl %al /* 1 if so, 0 if not */ | ||
| 432 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
| 433 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
| 434 | call audit_syscall_exit | ||
| 435 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
| 436 | jmp sysret_check | ||
| 437 | #endif /* CONFIG_AUDITSYSCALL */ | ||
| 438 | |||
| 426 | /* Do syscall tracing */ | 439 | /* Do syscall tracing */ |
| 427 | tracesys: | 440 | tracesys: |
| 441 | #ifdef CONFIG_AUDITSYSCALL | ||
| 442 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | ||
| 443 | jz auditsys | ||
| 444 | #endif | ||
| 428 | SAVE_REST | 445 | SAVE_REST |
| 429 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 446 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
| 430 | FIXUP_TOP_OF_STACK %rdi | 447 | FIXUP_TOP_OF_STACK %rdi |
| 431 | movq %rsp,%rdi | 448 | movq %rsp,%rdi |
| 432 | call syscall_trace_enter | 449 | call syscall_trace_enter |
| 433 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ | 450 | /* |
| 451 | * Reload arg registers from stack in case ptrace changed them. | ||
| 452 | * We don't reload %rax because syscall_trace_enter() returned | ||
| 453 | * the value it wants us to use in the table lookup. | ||
| 454 | */ | ||
| 455 | LOAD_ARGS ARGOFFSET, 1 | ||
| 434 | RESTORE_REST | 456 | RESTORE_REST |
| 435 | cmpq $__NR_syscall_max,%rax | 457 | cmpq $__NR_syscall_max,%rax |
| 436 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | 458 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ |
| @@ -444,6 +466,7 @@ tracesys: | |||
| 444 | * Has correct top of stack, but partial stack frame. | 466 | * Has correct top of stack, but partial stack frame. |
| 445 | */ | 467 | */ |
| 446 | .globl int_ret_from_sys_call | 468 | .globl int_ret_from_sys_call |
| 469 | .globl int_with_check | ||
| 447 | int_ret_from_sys_call: | 470 | int_ret_from_sys_call: |
| 448 | DISABLE_INTERRUPTS(CLBR_NONE) | 471 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 449 | TRACE_IRQS_OFF | 472 | TRACE_IRQS_OFF |
| @@ -483,7 +506,7 @@ int_very_careful: | |||
| 483 | ENABLE_INTERRUPTS(CLBR_NONE) | 506 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 484 | SAVE_REST | 507 | SAVE_REST |
| 485 | /* Check for syscall exit trace */ | 508 | /* Check for syscall exit trace */ |
| 486 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | 509 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
| 487 | jz int_signal | 510 | jz int_signal |
| 488 | pushq %rdi | 511 | pushq %rdi |
| 489 | CFI_ADJUST_CFA_OFFSET 8 | 512 | CFI_ADJUST_CFA_OFFSET 8 |
| @@ -491,7 +514,7 @@ int_very_careful: | |||
| 491 | call syscall_trace_leave | 514 | call syscall_trace_leave |
| 492 | popq %rdi | 515 | popq %rdi |
| 493 | CFI_ADJUST_CFA_OFFSET -8 | 516 | CFI_ADJUST_CFA_OFFSET -8 |
| 494 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 517 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
| 495 | jmp int_restore_rest | 518 | jmp int_restore_rest |
| 496 | 519 | ||
| 497 | int_signal: | 520 | int_signal: |
| @@ -618,6 +641,13 @@ END(stub_rt_sigreturn) | |||
| 618 | SAVE_ARGS | 641 | SAVE_ARGS |
| 619 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 642 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler |
| 620 | pushq %rbp | 643 | pushq %rbp |
| 644 | /* | ||
| 645 | * Save rbp twice: One is for marking the stack frame, as usual, and the | ||
| 646 | * other, to fill pt_regs properly. This is because bx comes right | ||
| 647 | * before the last saved register in that structure, and not bp. If the | ||
| 648 | * base pointer were in the place bx is today, this would not be needed. | ||
| 649 | */ | ||
| 650 | movq %rbp, -8(%rsp) | ||
| 621 | CFI_ADJUST_CFA_OFFSET 8 | 651 | CFI_ADJUST_CFA_OFFSET 8 |
| 622 | CFI_REL_OFFSET rbp, 0 | 652 | CFI_REL_OFFSET rbp, 0 |
| 623 | movq %rsp,%rbp | 653 | movq %rsp,%rbp |
| @@ -883,6 +913,9 @@ END(spurious_interrupt) | |||
| 883 | .if \ist | 913 | .if \ist |
| 884 | movq %gs:pda_data_offset, %rbp | 914 | movq %gs:pda_data_offset, %rbp |
| 885 | .endif | 915 | .endif |
| 916 | .if \irqtrace | ||
| 917 | TRACE_IRQS_OFF | ||
| 918 | .endif | ||
| 886 | movq %rsp,%rdi | 919 | movq %rsp,%rdi |
| 887 | movq ORIG_RAX(%rsp),%rsi | 920 | movq ORIG_RAX(%rsp),%rsi |
| 888 | movq $-1,ORIG_RAX(%rsp) | 921 | movq $-1,ORIG_RAX(%rsp) |
| @@ -1009,7 +1042,8 @@ KPROBE_ENTRY(error_entry) | |||
| 1009 | je error_kernelspace | 1042 | je error_kernelspace |
| 1010 | error_swapgs: | 1043 | error_swapgs: |
| 1011 | SWAPGS | 1044 | SWAPGS |
| 1012 | error_sti: | 1045 | error_sti: |
| 1046 | TRACE_IRQS_OFF | ||
| 1013 | movq %rdi,RDI(%rsp) | 1047 | movq %rdi,RDI(%rsp) |
| 1014 | CFI_REL_OFFSET rdi,RDI | 1048 | CFI_REL_OFFSET rdi,RDI |
| 1015 | movq %rsp,%rdi | 1049 | movq %rsp,%rdi |
| @@ -1183,12 +1217,13 @@ ENTRY(simd_coprocessor_error) | |||
| 1183 | END(simd_coprocessor_error) | 1217 | END(simd_coprocessor_error) |
| 1184 | 1218 | ||
| 1185 | ENTRY(device_not_available) | 1219 | ENTRY(device_not_available) |
| 1186 | zeroentry math_state_restore | 1220 | zeroentry do_device_not_available |
| 1187 | END(device_not_available) | 1221 | END(device_not_available) |
| 1188 | 1222 | ||
| 1189 | /* runs on exception stack */ | 1223 | /* runs on exception stack */ |
| 1190 | KPROBE_ENTRY(debug) | 1224 | KPROBE_ENTRY(debug) |
| 1191 | INTR_FRAME | 1225 | INTR_FRAME |
| 1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1192 | pushq $0 | 1227 | pushq $0 |
| 1193 | CFI_ADJUST_CFA_OFFSET 8 | 1228 | CFI_ADJUST_CFA_OFFSET 8 |
| 1194 | paranoidentry do_debug, DEBUG_STACK | 1229 | paranoidentry do_debug, DEBUG_STACK |
| @@ -1198,6 +1233,7 @@ KPROBE_END(debug) | |||
| 1198 | /* runs on exception stack */ | 1233 | /* runs on exception stack */ |
| 1199 | KPROBE_ENTRY(nmi) | 1234 | KPROBE_ENTRY(nmi) |
| 1200 | INTR_FRAME | 1235 | INTR_FRAME |
| 1236 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1201 | pushq $-1 | 1237 | pushq $-1 |
| 1202 | CFI_ADJUST_CFA_OFFSET 8 | 1238 | CFI_ADJUST_CFA_OFFSET 8 |
| 1203 | paranoidentry do_nmi, 0, 0 | 1239 | paranoidentry do_nmi, 0, 0 |
| @@ -1211,6 +1247,7 @@ KPROBE_END(nmi) | |||
| 1211 | 1247 | ||
| 1212 | KPROBE_ENTRY(int3) | 1248 | KPROBE_ENTRY(int3) |
| 1213 | INTR_FRAME | 1249 | INTR_FRAME |
| 1250 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1214 | pushq $0 | 1251 | pushq $0 |
| 1215 | CFI_ADJUST_CFA_OFFSET 8 | 1252 | CFI_ADJUST_CFA_OFFSET 8 |
| 1216 | paranoidentry do_int3, DEBUG_STACK | 1253 | paranoidentry do_int3, DEBUG_STACK |
| @@ -1237,6 +1274,7 @@ END(coprocessor_segment_overrun) | |||
| 1237 | /* runs on exception stack */ | 1274 | /* runs on exception stack */ |
| 1238 | ENTRY(double_fault) | 1275 | ENTRY(double_fault) |
| 1239 | XCPT_FRAME | 1276 | XCPT_FRAME |
| 1277 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1240 | paranoidentry do_double_fault | 1278 | paranoidentry do_double_fault |
| 1241 | jmp paranoid_exit1 | 1279 | jmp paranoid_exit1 |
| 1242 | CFI_ENDPROC | 1280 | CFI_ENDPROC |
| @@ -1253,6 +1291,7 @@ END(segment_not_present) | |||
| 1253 | /* runs on exception stack */ | 1291 | /* runs on exception stack */ |
| 1254 | ENTRY(stack_segment) | 1292 | ENTRY(stack_segment) |
| 1255 | XCPT_FRAME | 1293 | XCPT_FRAME |
| 1294 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1256 | paranoidentry do_stack_segment | 1295 | paranoidentry do_stack_segment |
| 1257 | jmp paranoid_exit1 | 1296 | jmp paranoid_exit1 |
| 1258 | CFI_ENDPROC | 1297 | CFI_ENDPROC |
| @@ -1278,6 +1317,7 @@ END(spurious_interrupt_bug) | |||
| 1278 | /* runs on exception stack */ | 1317 | /* runs on exception stack */ |
| 1279 | ENTRY(machine_check) | 1318 | ENTRY(machine_check) |
| 1280 | INTR_FRAME | 1319 | INTR_FRAME |
| 1320 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
| 1281 | pushq $0 | 1321 | pushq $0 |
| 1282 | CFI_ADJUST_CFA_OFFSET 8 | 1322 | CFI_ADJUST_CFA_OFFSET 8 |
| 1283 | paranoidentry do_machine_check | 1323 | paranoidentry do_machine_check |
| @@ -1312,3 +1352,103 @@ KPROBE_ENTRY(ignore_sysret) | |||
| 1312 | sysret | 1352 | sysret |
| 1313 | CFI_ENDPROC | 1353 | CFI_ENDPROC |
| 1314 | ENDPROC(ignore_sysret) | 1354 | ENDPROC(ignore_sysret) |
| 1355 | |||
| 1356 | #ifdef CONFIG_XEN | ||
| 1357 | ENTRY(xen_hypervisor_callback) | ||
| 1358 | zeroentry xen_do_hypervisor_callback | ||
| 1359 | END(xen_hypervisor_callback) | ||
| 1360 | |||
| 1361 | /* | ||
| 1362 | # A note on the "critical region" in our callback handler. | ||
| 1363 | # We want to avoid stacking callback handlers due to events occurring | ||
| 1364 | # during handling of the last event. To do this, we keep events disabled | ||
| 1365 | # until we've done all processing. HOWEVER, we must enable events before | ||
| 1366 | # popping the stack frame (can't be done atomically) and so it would still | ||
| 1367 | # be possible to get enough handler activations to overflow the stack. | ||
| 1368 | # Although unlikely, bugs of that kind are hard to track down, so we'd | ||
| 1369 | # like to avoid the possibility. | ||
| 1370 | # So, on entry to the handler we detect whether we interrupted an | ||
| 1371 | # existing activation in its critical region -- if so, we pop the current | ||
| 1372 | # activation and restart the handler using the previous one. | ||
| 1373 | */ | ||
| 1374 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | ||
| 1375 | CFI_STARTPROC | ||
| 1376 | /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | ||
| 1377 | see the correct pointer to the pt_regs */ | ||
| 1378 | movq %rdi, %rsp # we don't return, adjust the stack frame | ||
| 1379 | CFI_ENDPROC | ||
| 1380 | CFI_DEFAULT_STACK | ||
| 1381 | 11: incl %gs:pda_irqcount | ||
| 1382 | movq %rsp,%rbp | ||
| 1383 | CFI_DEF_CFA_REGISTER rbp | ||
| 1384 | cmovzq %gs:pda_irqstackptr,%rsp | ||
| 1385 | pushq %rbp # backlink for old unwinder | ||
| 1386 | call xen_evtchn_do_upcall | ||
| 1387 | popq %rsp | ||
| 1388 | CFI_DEF_CFA_REGISTER rsp | ||
| 1389 | decl %gs:pda_irqcount | ||
| 1390 | jmp error_exit | ||
| 1391 | CFI_ENDPROC | ||
| 1392 | END(do_hypervisor_callback) | ||
| 1393 | |||
| 1394 | /* | ||
| 1395 | # Hypervisor uses this for application faults while it executes. | ||
| 1396 | # We get here for two reasons: | ||
| 1397 | # 1. Fault while reloading DS, ES, FS or GS | ||
| 1398 | # 2. Fault while executing IRET | ||
| 1399 | # Category 1 we do not need to fix up as Xen has already reloaded all segment | ||
| 1400 | # registers that could be reloaded and zeroed the others. | ||
| 1401 | # Category 2 we fix up by killing the current process. We cannot use the | ||
| 1402 | # normal Linux return path in this case because if we use the IRET hypercall | ||
| 1403 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | ||
| 1404 | # We distinguish between categories by comparing each saved segment register | ||
| 1405 | # with its current contents: any discrepancy means we in category 1. | ||
| 1406 | */ | ||
| 1407 | ENTRY(xen_failsafe_callback) | ||
| 1408 | framesz = (RIP-0x30) /* workaround buggy gas */ | ||
| 1409 | _frame framesz | ||
| 1410 | CFI_REL_OFFSET rcx, 0 | ||
| 1411 | CFI_REL_OFFSET r11, 8 | ||
| 1412 | movw %ds,%cx | ||
| 1413 | cmpw %cx,0x10(%rsp) | ||
| 1414 | CFI_REMEMBER_STATE | ||
| 1415 | jne 1f | ||
| 1416 | movw %es,%cx | ||
| 1417 | cmpw %cx,0x18(%rsp) | ||
| 1418 | jne 1f | ||
| 1419 | movw %fs,%cx | ||
| 1420 | cmpw %cx,0x20(%rsp) | ||
| 1421 | jne 1f | ||
| 1422 | movw %gs,%cx | ||
| 1423 | cmpw %cx,0x28(%rsp) | ||
| 1424 | jne 1f | ||
| 1425 | /* All segments match their saved values => Category 2 (Bad IRET). */ | ||
| 1426 | movq (%rsp),%rcx | ||
| 1427 | CFI_RESTORE rcx | ||
| 1428 | movq 8(%rsp),%r11 | ||
| 1429 | CFI_RESTORE r11 | ||
| 1430 | addq $0x30,%rsp | ||
| 1431 | CFI_ADJUST_CFA_OFFSET -0x30 | ||
| 1432 | pushq $0 | ||
| 1433 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1434 | pushq %r11 | ||
| 1435 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1436 | pushq %rcx | ||
| 1437 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1438 | jmp general_protection | ||
| 1439 | CFI_RESTORE_STATE | ||
| 1440 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | ||
| 1441 | movq (%rsp),%rcx | ||
| 1442 | CFI_RESTORE rcx | ||
| 1443 | movq 8(%rsp),%r11 | ||
| 1444 | CFI_RESTORE r11 | ||
| 1445 | addq $0x30,%rsp | ||
| 1446 | CFI_ADJUST_CFA_OFFSET -0x30 | ||
| 1447 | pushq $0 | ||
| 1448 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1449 | SAVE_ALL | ||
| 1450 | jmp error_exit | ||
| 1451 | CFI_ENDPROC | ||
| 1452 | END(xen_failsafe_callback) | ||
| 1453 | |||
| 1454 | #endif /* CONFIG_XEN */ | ||
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c new file mode 100644 index 000000000000..f454c78fcef6 --- /dev/null +++ b/arch/x86/kernel/es7000_32.c | |||
| @@ -0,0 +1,363 @@ | |||
| 1 | /* | ||
| 2 | * Written by: Garry Forsgren, Unisys Corporation | ||
| 3 | * Natalie Protasevich, Unisys Corporation | ||
| 4 | * This file contains the code to configure and interface | ||
| 5 | * with Unisys ES7000 series hardware system manager. | ||
| 6 | * | ||
| 7 | * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or modify it | ||
| 10 | * under the terms of version 2 of the GNU General Public License as | ||
| 11 | * published by the Free Software Foundation. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it would be useful, but | ||
| 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License along | ||
| 18 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
| 19 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
| 20 | * | ||
| 21 | * Contact information: Unisys Corporation, Township Line & Union Meeting | ||
| 22 | * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: | ||
| 23 | * | ||
| 24 | * http://www.unisys.com | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/module.h> | ||
| 28 | #include <linux/types.h> | ||
| 29 | #include <linux/kernel.h> | ||
| 30 | #include <linux/smp.h> | ||
| 31 | #include <linux/string.h> | ||
| 32 | #include <linux/spinlock.h> | ||
| 33 | #include <linux/errno.h> | ||
| 34 | #include <linux/notifier.h> | ||
| 35 | #include <linux/reboot.h> | ||
| 36 | #include <linux/init.h> | ||
| 37 | #include <linux/acpi.h> | ||
| 38 | #include <asm/io.h> | ||
| 39 | #include <asm/nmi.h> | ||
| 40 | #include <asm/smp.h> | ||
| 41 | #include <asm/apicdef.h> | ||
| 42 | #include <mach_mpparse.h> | ||
| 43 | |||
| 44 | /* | ||
| 45 | * ES7000 chipsets | ||
| 46 | */ | ||
| 47 | |||
| 48 | #define NON_UNISYS 0 | ||
| 49 | #define ES7000_CLASSIC 1 | ||
| 50 | #define ES7000_ZORRO 2 | ||
| 51 | |||
| 52 | |||
| 53 | #define MIP_REG 1 | ||
| 54 | #define MIP_PSAI_REG 4 | ||
| 55 | |||
| 56 | #define MIP_BUSY 1 | ||
| 57 | #define MIP_SPIN 0xf0000 | ||
| 58 | #define MIP_VALID 0x0100000000000000ULL | ||
| 59 | #define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) | ||
| 60 | |||
| 61 | #define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) | ||
| 62 | |||
| 63 | struct mip_reg_info { | ||
| 64 | unsigned long long mip_info; | ||
| 65 | unsigned long long delivery_info; | ||
| 66 | unsigned long long host_reg; | ||
| 67 | unsigned long long mip_reg; | ||
| 68 | }; | ||
| 69 | |||
| 70 | struct part_info { | ||
| 71 | unsigned char type; | ||
| 72 | unsigned char length; | ||
| 73 | unsigned char part_id; | ||
| 74 | unsigned char apic_mode; | ||
| 75 | unsigned long snum; | ||
| 76 | char ptype[16]; | ||
| 77 | char sname[64]; | ||
| 78 | char pname[64]; | ||
| 79 | }; | ||
| 80 | |||
| 81 | struct psai { | ||
| 82 | unsigned long long entry_type; | ||
| 83 | unsigned long long addr; | ||
| 84 | unsigned long long bep_addr; | ||
| 85 | }; | ||
| 86 | |||
| 87 | struct es7000_mem_info { | ||
| 88 | unsigned char type; | ||
| 89 | unsigned char length; | ||
| 90 | unsigned char resv[6]; | ||
| 91 | unsigned long long start; | ||
| 92 | unsigned long long size; | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct es7000_oem_table { | ||
| 96 | unsigned long long hdr; | ||
| 97 | struct mip_reg_info mip; | ||
| 98 | struct part_info pif; | ||
| 99 | struct es7000_mem_info shm; | ||
| 100 | struct psai psai; | ||
| 101 | }; | ||
| 102 | |||
| 103 | #ifdef CONFIG_ACPI | ||
| 104 | |||
| 105 | struct oem_table { | ||
| 106 | struct acpi_table_header Header; | ||
| 107 | u32 OEMTableAddr; | ||
| 108 | u32 OEMTableSize; | ||
| 109 | }; | ||
| 110 | |||
| 111 | extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); | ||
| 112 | extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); | ||
| 113 | #endif | ||
| 114 | |||
| 115 | struct mip_reg { | ||
| 116 | unsigned long long off_0; | ||
| 117 | unsigned long long off_8; | ||
| 118 | unsigned long long off_10; | ||
| 119 | unsigned long long off_18; | ||
| 120 | unsigned long long off_20; | ||
| 121 | unsigned long long off_28; | ||
| 122 | unsigned long long off_30; | ||
| 123 | unsigned long long off_38; | ||
| 124 | }; | ||
| 125 | |||
| 126 | #define MIP_SW_APIC 0x1020b | ||
| 127 | #define MIP_FUNC(VALUE) (VALUE & 0xff) | ||
| 128 | |||
| 129 | /* | ||
| 130 | * ES7000 Globals | ||
| 131 | */ | ||
| 132 | |||
| 133 | static volatile unsigned long *psai = NULL; | ||
| 134 | static struct mip_reg *mip_reg; | ||
| 135 | static struct mip_reg *host_reg; | ||
| 136 | static int mip_port; | ||
| 137 | static unsigned long mip_addr, host_addr; | ||
| 138 | |||
| 139 | int es7000_plat; | ||
| 140 | |||
| 141 | /* | ||
| 142 | * GSI override for ES7000 platforms. | ||
| 143 | */ | ||
| 144 | |||
| 145 | static unsigned int base; | ||
| 146 | |||
| 147 | static int | ||
| 148 | es7000_rename_gsi(int ioapic, int gsi) | ||
| 149 | { | ||
| 150 | if (es7000_plat == ES7000_ZORRO) | ||
| 151 | return gsi; | ||
| 152 | |||
| 153 | if (!base) { | ||
| 154 | int i; | ||
| 155 | for (i = 0; i < nr_ioapics; i++) | ||
| 156 | base += nr_ioapic_registers[i]; | ||
| 157 | } | ||
| 158 | |||
| 159 | if (!ioapic && (gsi < 16)) | ||
| 160 | gsi += base; | ||
| 161 | return gsi; | ||
| 162 | } | ||
| 163 | |||
| 164 | void __init | ||
| 165 | setup_unisys(void) | ||
| 166 | { | ||
| 167 | /* | ||
| 168 | * Determine the generation of the ES7000 currently running. | ||
| 169 | * | ||
| 170 | * es7000_plat = 1 if the machine is a 5xx ES7000 box | ||
| 171 | * es7000_plat = 2 if the machine is a x86_64 ES7000 box | ||
| 172 | * | ||
| 173 | */ | ||
| 174 | if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) | ||
| 175 | es7000_plat = ES7000_ZORRO; | ||
| 176 | else | ||
| 177 | es7000_plat = ES7000_CLASSIC; | ||
| 178 | ioapic_renumber_irq = es7000_rename_gsi; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Parse the OEM Table | ||
| 183 | */ | ||
| 184 | |||
| 185 | int __init | ||
| 186 | parse_unisys_oem (char *oemptr) | ||
| 187 | { | ||
| 188 | int i; | ||
| 189 | int success = 0; | ||
| 190 | unsigned char type, size; | ||
| 191 | unsigned long val; | ||
| 192 | char *tp = NULL; | ||
| 193 | struct psai *psaip = NULL; | ||
| 194 | struct mip_reg_info *mi; | ||
| 195 | struct mip_reg *host, *mip; | ||
| 196 | |||
| 197 | tp = oemptr; | ||
| 198 | |||
| 199 | tp += 8; | ||
| 200 | |||
| 201 | for (i=0; i <= 6; i++) { | ||
| 202 | type = *tp++; | ||
| 203 | size = *tp++; | ||
| 204 | tp -= 2; | ||
| 205 | switch (type) { | ||
| 206 | case MIP_REG: | ||
| 207 | mi = (struct mip_reg_info *)tp; | ||
| 208 | val = MIP_RD_LO(mi->host_reg); | ||
| 209 | host_addr = val; | ||
| 210 | host = (struct mip_reg *)val; | ||
| 211 | host_reg = __va(host); | ||
| 212 | val = MIP_RD_LO(mi->mip_reg); | ||
| 213 | mip_port = MIP_PORT(mi->mip_info); | ||
| 214 | mip_addr = val; | ||
| 215 | mip = (struct mip_reg *)val; | ||
| 216 | mip_reg = __va(mip); | ||
| 217 | pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", | ||
| 218 | (unsigned long)host_reg); | ||
| 219 | pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", | ||
| 220 | (unsigned long)mip_reg); | ||
| 221 | success++; | ||
| 222 | break; | ||
| 223 | case MIP_PSAI_REG: | ||
| 224 | psaip = (struct psai *)tp; | ||
| 225 | if (tp != NULL) { | ||
| 226 | if (psaip->addr) | ||
| 227 | psai = __va(psaip->addr); | ||
| 228 | else | ||
| 229 | psai = NULL; | ||
| 230 | success++; | ||
| 231 | } | ||
| 232 | break; | ||
| 233 | default: | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | tp += size; | ||
| 237 | } | ||
| 238 | |||
| 239 | if (success < 2) { | ||
| 240 | es7000_plat = NON_UNISYS; | ||
| 241 | } else | ||
| 242 | setup_unisys(); | ||
| 243 | return es7000_plat; | ||
| 244 | } | ||
| 245 | |||
| 246 | #ifdef CONFIG_ACPI | ||
| 247 | static unsigned long oem_addrX; | ||
| 248 | static unsigned long oem_size; | ||
| 249 | int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) | ||
| 250 | { | ||
| 251 | struct acpi_table_header *header = NULL; | ||
| 252 | int i = 0; | ||
| 253 | acpi_size tbl_size; | ||
| 254 | |||
| 255 | while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { | ||
| 256 | if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { | ||
| 257 | struct oem_table *t = (struct oem_table *)header; | ||
| 258 | |||
| 259 | oem_addrX = t->OEMTableAddr; | ||
| 260 | oem_size = t->OEMTableSize; | ||
| 261 | early_acpi_os_unmap_memory(header, tbl_size); | ||
| 262 | |||
| 263 | *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, | ||
| 264 | oem_size); | ||
| 265 | return 0; | ||
| 266 | } | ||
| 267 | early_acpi_os_unmap_memory(header, tbl_size); | ||
| 268 | } | ||
| 269 | return -1; | ||
| 270 | } | ||
| 271 | |||
| 272 | void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) | ||
| 273 | { | ||
| 274 | if (!oem_addr) | ||
| 275 | return; | ||
| 276 | |||
| 277 | __acpi_unmap_table((char *)oem_addr, oem_size); | ||
| 278 | } | ||
| 279 | #endif | ||
| 280 | |||
| 281 | static void | ||
| 282 | es7000_spin(int n) | ||
| 283 | { | ||
| 284 | int i = 0; | ||
| 285 | |||
| 286 | while (i++ < n) | ||
| 287 | rep_nop(); | ||
| 288 | } | ||
| 289 | |||
| 290 | static int __init | ||
| 291 | es7000_mip_write(struct mip_reg *mip_reg) | ||
| 292 | { | ||
| 293 | int status = 0; | ||
| 294 | int spin; | ||
| 295 | |||
| 296 | spin = MIP_SPIN; | ||
| 297 | while (((unsigned long long)host_reg->off_38 & | ||
| 298 | (unsigned long long)MIP_VALID) != 0) { | ||
| 299 | if (--spin <= 0) { | ||
| 300 | printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); | ||
| 301 | return -1; | ||
| 302 | } | ||
| 303 | es7000_spin(MIP_SPIN); | ||
| 304 | } | ||
| 305 | |||
| 306 | memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); | ||
| 307 | outb(1, mip_port); | ||
| 308 | |||
| 309 | spin = MIP_SPIN; | ||
| 310 | |||
| 311 | while (((unsigned long long)mip_reg->off_38 & | ||
| 312 | (unsigned long long)MIP_VALID) == 0) { | ||
| 313 | if (--spin <= 0) { | ||
| 314 | printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); | ||
| 315 | return -1; | ||
| 316 | } | ||
| 317 | es7000_spin(MIP_SPIN); | ||
| 318 | } | ||
| 319 | |||
| 320 | status = ((unsigned long long)mip_reg->off_0 & | ||
| 321 | (unsigned long long)0xffff0000000000ULL) >> 48; | ||
| 322 | mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & | ||
| 323 | (unsigned long long)~MIP_VALID); | ||
| 324 | return status; | ||
| 325 | } | ||
| 326 | |||
| 327 | int | ||
| 328 | es7000_start_cpu(int cpu, unsigned long eip) | ||
| 329 | { | ||
| 330 | unsigned long vect = 0, psaival = 0; | ||
| 331 | |||
| 332 | if (psai == NULL) | ||
| 333 | return -1; | ||
| 334 | |||
| 335 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
| 336 | psaival = (0x1000000 | vect | cpu); | ||
| 337 | |||
| 338 | while (*psai & 0x1000000) | ||
| 339 | ; | ||
| 340 | |||
| 341 | *psai = psaival; | ||
| 342 | |||
| 343 | return 0; | ||
| 344 | |||
| 345 | } | ||
| 346 | |||
| 347 | void __init | ||
| 348 | es7000_sw_apic(void) | ||
| 349 | { | ||
| 350 | if (es7000_plat) { | ||
| 351 | int mip_status; | ||
| 352 | struct mip_reg es7000_mip_reg; | ||
| 353 | |||
| 354 | printk("ES7000: Enabling APIC mode.\n"); | ||
| 355 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); | ||
| 356 | es7000_mip_reg.off_0 = MIP_SW_APIC; | ||
| 357 | es7000_mip_reg.off_38 = (MIP_VALID); | ||
| 358 | while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) | ||
| 359 | printk("es7000_sw_apic: command failed, status = %x\n", | ||
| 360 | mip_status); | ||
| 361 | return; | ||
| 362 | } | ||
| 363 | } | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ab115cd15fdf..d073d981a730 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
| @@ -11,17 +11,18 @@ | |||
| 11 | 11 | ||
| 12 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
| 13 | #include <linux/hardirq.h> | 13 | #include <linux/hardirq.h> |
| 14 | #include <linux/uaccess.h> | ||
| 14 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
| 15 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
| 16 | #include <linux/init.h> | 17 | #include <linux/init.h> |
| 17 | #include <linux/list.h> | 18 | #include <linux/list.h> |
| 18 | 19 | ||
| 19 | #include <asm/alternative.h> | ||
| 20 | #include <asm/ftrace.h> | 20 | #include <asm/ftrace.h> |
| 21 | #include <asm/nops.h> | ||
| 21 | 22 | ||
| 22 | 23 | ||
| 23 | /* Long is fine, even if it is only 4 bytes ;-) */ | 24 | /* Long is fine, even if it is only 4 bytes ;-) */ |
| 24 | static long *ftrace_nop; | 25 | static unsigned long *ftrace_nop; |
| 25 | 26 | ||
| 26 | union ftrace_code_union { | 27 | union ftrace_code_union { |
| 27 | char code[MCOUNT_INSN_SIZE]; | 28 | char code[MCOUNT_INSN_SIZE]; |
| @@ -60,11 +61,7 @@ notrace int | |||
| 60 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, | 61 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, |
| 61 | unsigned char *new_code) | 62 | unsigned char *new_code) |
| 62 | { | 63 | { |
| 63 | unsigned replaced; | 64 | unsigned char replaced[MCOUNT_INSN_SIZE]; |
| 64 | unsigned old = *(unsigned *)old_code; /* 4 bytes */ | ||
| 65 | unsigned new = *(unsigned *)new_code; /* 4 bytes */ | ||
| 66 | unsigned char newch = new_code[4]; | ||
| 67 | int faulted = 0; | ||
| 68 | 65 | ||
| 69 | /* | 66 | /* |
| 70 | * Note: Due to modules and __init, code can | 67 | * Note: Due to modules and __init, code can |
| @@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
| 72 | * as well as code changing. | 69 | * as well as code changing. |
| 73 | * | 70 | * |
| 74 | * No real locking needed, this code is run through | 71 | * No real locking needed, this code is run through |
| 75 | * kstop_machine. | 72 | * kstop_machine, or before SMP starts. |
| 76 | */ | 73 | */ |
| 77 | asm volatile ( | 74 | if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) |
| 78 | "1: lock\n" | 75 | return 1; |
| 79 | " cmpxchg %3, (%2)\n" | 76 | |
| 80 | " jnz 2f\n" | 77 | if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) |
| 81 | " movb %b4, 4(%2)\n" | 78 | return 2; |
| 82 | "2:\n" | ||
| 83 | ".section .fixup, \"ax\"\n" | ||
| 84 | "3: movl $1, %0\n" | ||
| 85 | " jmp 2b\n" | ||
| 86 | ".previous\n" | ||
| 87 | _ASM_EXTABLE(1b, 3b) | ||
| 88 | : "=r"(faulted), "=a"(replaced) | ||
| 89 | : "r"(ip), "r"(new), "c"(newch), | ||
| 90 | "0"(faulted), "a"(old) | ||
| 91 | : "memory"); | ||
| 92 | sync_core(); | ||
| 93 | 79 | ||
| 94 | if (replaced != old && replaced != new) | 80 | WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code, |
| 95 | faulted = 2; | 81 | MCOUNT_INSN_SIZE)); |
| 96 | 82 | ||
| 97 | return faulted; | 83 | sync_core(); |
| 84 | |||
| 85 | return 0; | ||
| 98 | } | 86 | } |
| 99 | 87 | ||
| 100 | notrace int ftrace_update_ftrace_func(ftrace_func_t func) | 88 | notrace int ftrace_update_ftrace_func(ftrace_func_t func) |
| @@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) | |||
| 112 | 100 | ||
| 113 | notrace int ftrace_mcount_set(unsigned long *data) | 101 | notrace int ftrace_mcount_set(unsigned long *data) |
| 114 | { | 102 | { |
| 115 | unsigned long ip = (long)(&mcount_call); | 103 | /* mcount is initialized as a nop */ |
| 116 | unsigned long *addr = data; | 104 | *data = 0; |
| 117 | unsigned char old[MCOUNT_INSN_SIZE], *new; | ||
| 118 | |||
| 119 | /* | ||
| 120 | * Replace the mcount stub with a pointer to the | ||
| 121 | * ip recorder function. | ||
| 122 | */ | ||
| 123 | memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); | ||
| 124 | new = ftrace_call_replace(ip, *addr); | ||
| 125 | *addr = ftrace_modify_code(ip, old, new); | ||
| 126 | |||
| 127 | return 0; | 105 | return 0; |
| 128 | } | 106 | } |
| 129 | 107 | ||
| 130 | int __init ftrace_dyn_arch_init(void *data) | 108 | int __init ftrace_dyn_arch_init(void *data) |
| 131 | { | 109 | { |
| 132 | const unsigned char *const *noptable = find_nop_table(); | 110 | extern const unsigned char ftrace_test_p6nop[]; |
| 133 | 111 | extern const unsigned char ftrace_test_nop5[]; | |
| 134 | /* This is running in kstop_machine */ | 112 | extern const unsigned char ftrace_test_jmp[]; |
| 135 | 113 | int faulted = 0; | |
| 136 | ftrace_mcount_set(data); | ||
| 137 | 114 | ||
| 138 | ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; | 115 | /* |
| 116 | * There is no good nop for all x86 archs. | ||
| 117 | * We will default to using the P6_NOP5, but first we | ||
| 118 | * will test to make sure that the nop will actually | ||
| 119 | * work on this CPU. If it faults, we will then | ||
| 120 | * go to a lesser efficient 5 byte nop. If that fails | ||
| 121 | * we then just use a jmp as our nop. This isn't the most | ||
| 122 | * efficient nop, but we can not use a multi part nop | ||
| 123 | * since we would then risk being preempted in the middle | ||
| 124 | * of that nop, and if we enabled tracing then, it might | ||
| 125 | * cause a system crash. | ||
| 126 | * | ||
| 127 | * TODO: check the cpuid to determine the best nop. | ||
| 128 | */ | ||
| 129 | asm volatile ( | ||
| 130 | "jmp ftrace_test_jmp\n" | ||
| 131 | /* This code needs to stay around */ | ||
| 132 | ".section .text, \"ax\"\n" | ||
| 133 | "ftrace_test_jmp:" | ||
| 134 | "jmp ftrace_test_p6nop\n" | ||
| 135 | "nop\n" | ||
| 136 | "nop\n" | ||
| 137 | "nop\n" /* 2 byte jmp + 3 bytes */ | ||
| 138 | "ftrace_test_p6nop:" | ||
| 139 | P6_NOP5 | ||
| 140 | "jmp 1f\n" | ||
| 141 | "ftrace_test_nop5:" | ||
| 142 | ".byte 0x66,0x66,0x66,0x66,0x90\n" | ||
| 143 | "jmp 1f\n" | ||
| 144 | ".previous\n" | ||
| 145 | "1:" | ||
| 146 | ".section .fixup, \"ax\"\n" | ||
| 147 | "2: movl $1, %0\n" | ||
| 148 | " jmp ftrace_test_nop5\n" | ||
| 149 | "3: movl $2, %0\n" | ||
| 150 | " jmp 1b\n" | ||
| 151 | ".previous\n" | ||
| 152 | _ASM_EXTABLE(ftrace_test_p6nop, 2b) | ||
| 153 | _ASM_EXTABLE(ftrace_test_nop5, 3b) | ||
| 154 | : "=r"(faulted) : "0" (faulted)); | ||
| 155 | |||
| 156 | switch (faulted) { | ||
| 157 | case 0: | ||
| 158 | pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); | ||
| 159 | ftrace_nop = (unsigned long *)ftrace_test_p6nop; | ||
| 160 | break; | ||
| 161 | case 1: | ||
| 162 | pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); | ||
| 163 | ftrace_nop = (unsigned long *)ftrace_test_nop5; | ||
| 164 | break; | ||
| 165 | case 2: | ||
| 166 | pr_info("ftrace: converting mcount calls to jmp . + 5\n"); | ||
| 167 | ftrace_nop = (unsigned long *)ftrace_test_jmp; | ||
| 168 | break; | ||
| 169 | } | ||
| 170 | |||
| 171 | /* The return code is retured via data */ | ||
| 172 | *(unsigned long *)data = 0; | ||
| 139 | 173 | ||
| 140 | return 0; | 174 | return 0; |
| 141 | } | 175 | } |
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd217..6c9bfc9e1e95 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
| @@ -16,86 +16,63 @@ | |||
| 16 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
| 17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
| 18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
| 19 | #include <linux/dmar.h> | ||
| 19 | 20 | ||
| 20 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
| 21 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
| 22 | #include <asm/genapic.h> | 23 | #include <asm/genapic.h> |
| 23 | 24 | ||
| 24 | #ifdef CONFIG_ACPI | 25 | extern struct genapic apic_flat; |
| 25 | #include <acpi/acpi_bus.h> | 26 | extern struct genapic apic_physflat; |
| 26 | #endif | 27 | extern struct genapic apic_x2xpic_uv_x; |
| 27 | 28 | extern struct genapic apic_x2apic_phys; | |
| 28 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 29 | extern struct genapic apic_x2apic_cluster; |
| 29 | 30 | ||
| 30 | struct genapic __read_mostly *genapic = &apic_flat; | 31 | struct genapic __read_mostly *genapic = &apic_flat; |
| 31 | 32 | ||
| 32 | static enum uv_system_type uv_system_type; | 33 | static struct genapic *apic_probe[] __initdata = { |
| 34 | &apic_x2apic_uv_x, | ||
| 35 | &apic_x2apic_phys, | ||
| 36 | &apic_x2apic_cluster, | ||
| 37 | &apic_physflat, | ||
| 38 | NULL, | ||
| 39 | }; | ||
| 33 | 40 | ||
| 34 | /* | 41 | /* |
| 35 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 42 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
| 36 | */ | 43 | */ |
| 37 | void __init setup_apic_routing(void) | 44 | void __init setup_apic_routing(void) |
| 38 | { | 45 | { |
| 39 | if (uv_system_type == UV_NON_UNIQUE_APIC) | 46 | if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { |
| 40 | genapic = &apic_x2apic_uv_x; | 47 | if (!intr_remapping_enabled) |
| 41 | else | 48 | genapic = &apic_flat; |
| 42 | #ifdef CONFIG_ACPI | 49 | } |
| 43 | /* | ||
| 44 | * Quirk: some x86_64 machines can only use physical APIC mode | ||
| 45 | * regardless of how many processors are present (x86_64 ES7000 | ||
| 46 | * is an example). | ||
| 47 | */ | ||
| 48 | if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && | ||
| 49 | (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) | ||
| 50 | genapic = &apic_physflat; | ||
| 51 | else | ||
| 52 | #endif | ||
| 53 | |||
| 54 | if (max_physical_apicid < 8) | ||
| 55 | genapic = &apic_flat; | ||
| 56 | else | ||
| 57 | genapic = &apic_physflat; | ||
| 58 | 50 | ||
| 59 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | 51 | if (genapic == &apic_flat) { |
| 52 | if (max_physical_apicid >= 8) | ||
| 53 | genapic = &apic_physflat; | ||
| 54 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | ||
| 55 | } | ||
| 60 | } | 56 | } |
| 61 | 57 | ||
| 62 | /* Same for both flat and physical. */ | 58 | /* Same for both flat and physical. */ |
| 63 | 59 | ||
| 64 | void send_IPI_self(int vector) | 60 | void apic_send_IPI_self(int vector) |
| 65 | { | 61 | { |
| 66 | __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); | 62 | __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); |
| 67 | } | 63 | } |
| 68 | 64 | ||
| 69 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 65 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
| 70 | { | 66 | { |
| 71 | if (!strcmp(oem_id, "SGI")) { | 67 | int i; |
| 72 | if (!strcmp(oem_table_id, "UVL")) | 68 | |
| 73 | uv_system_type = UV_LEGACY_APIC; | 69 | for (i = 0; apic_probe[i]; ++i) { |
| 74 | else if (!strcmp(oem_table_id, "UVX")) | 70 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { |
| 75 | uv_system_type = UV_X2APIC; | 71 | genapic = apic_probe[i]; |
| 76 | else if (!strcmp(oem_table_id, "UVH")) | 72 | printk(KERN_INFO "Setting APIC routing to %s.\n", |
| 77 | uv_system_type = UV_NON_UNIQUE_APIC; | 73 | genapic->name); |
| 74 | return 1; | ||
| 75 | } | ||
| 78 | } | 76 | } |
| 79 | return 0; | 77 | return 0; |
| 80 | } | 78 | } |
| 81 | |||
| 82 | unsigned int read_apic_id(void) | ||
| 83 | { | ||
| 84 | unsigned int id; | ||
| 85 | |||
| 86 | WARN_ON(preemptible() && num_online_cpus() > 1); | ||
| 87 | id = apic_read(APIC_ID); | ||
| 88 | if (uv_system_type >= UV_X2APIC) | ||
| 89 | id |= __get_cpu_var(x2apic_extra_bits); | ||
| 90 | return id; | ||
| 91 | } | ||
| 92 | |||
| 93 | enum uv_system_type get_uv_system_type(void) | ||
| 94 | { | ||
| 95 | return uv_system_type; | ||
| 96 | } | ||
| 97 | |||
| 98 | int is_uv_system(void) | ||
| 99 | { | ||
| 100 | return uv_system_type != UV_NONE; | ||
| 101 | } | ||
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee8..c0262791bda4 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c | |||
| @@ -15,9 +15,20 @@ | |||
| 15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
| 16 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
| 17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
| 18 | #include <linux/hardirq.h> | ||
| 18 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
| 19 | #include <asm/ipi.h> | 20 | #include <asm/ipi.h> |
| 20 | #include <asm/genapic.h> | 21 | #include <asm/genapic.h> |
| 22 | #include <mach_apicdef.h> | ||
| 23 | |||
| 24 | #ifdef CONFIG_ACPI | ||
| 25 | #include <acpi/acpi_bus.h> | ||
| 26 | #endif | ||
| 27 | |||
| 28 | static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 29 | { | ||
| 30 | return 1; | ||
| 31 | } | ||
| 21 | 32 | ||
| 22 | static cpumask_t flat_target_cpus(void) | 33 | static cpumask_t flat_target_cpus(void) |
| 23 | { | 34 | { |
| @@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector) | |||
| 95 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | 106 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); |
| 96 | } | 107 | } |
| 97 | 108 | ||
| 109 | static unsigned int get_apic_id(unsigned long x) | ||
| 110 | { | ||
| 111 | unsigned int id; | ||
| 112 | |||
| 113 | id = (((x)>>24) & 0xFFu); | ||
| 114 | return id; | ||
| 115 | } | ||
| 116 | |||
| 117 | static unsigned long set_apic_id(unsigned int id) | ||
| 118 | { | ||
| 119 | unsigned long x; | ||
| 120 | |||
| 121 | x = ((id & 0xFFu)<<24); | ||
| 122 | return x; | ||
| 123 | } | ||
| 124 | |||
| 125 | static unsigned int read_xapic_id(void) | ||
| 126 | { | ||
| 127 | unsigned int id; | ||
| 128 | |||
| 129 | id = get_apic_id(apic_read(APIC_ID)); | ||
| 130 | return id; | ||
| 131 | } | ||
| 132 | |||
| 98 | static int flat_apic_id_registered(void) | 133 | static int flat_apic_id_registered(void) |
| 99 | { | 134 | { |
| 100 | return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); | 135 | return physid_isset(read_xapic_id(), phys_cpu_present_map); |
| 101 | } | 136 | } |
| 102 | 137 | ||
| 103 | static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) | 138 | static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) |
| @@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb) | |||
| 112 | 147 | ||
| 113 | struct genapic apic_flat = { | 148 | struct genapic apic_flat = { |
| 114 | .name = "flat", | 149 | .name = "flat", |
| 150 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | ||
| 115 | .int_delivery_mode = dest_LowestPrio, | 151 | .int_delivery_mode = dest_LowestPrio, |
| 116 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), | 152 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), |
| 117 | .target_cpus = flat_target_cpus, | 153 | .target_cpus = flat_target_cpus, |
| @@ -121,8 +157,12 @@ struct genapic apic_flat = { | |||
| 121 | .send_IPI_all = flat_send_IPI_all, | 157 | .send_IPI_all = flat_send_IPI_all, |
| 122 | .send_IPI_allbutself = flat_send_IPI_allbutself, | 158 | .send_IPI_allbutself = flat_send_IPI_allbutself, |
| 123 | .send_IPI_mask = flat_send_IPI_mask, | 159 | .send_IPI_mask = flat_send_IPI_mask, |
| 160 | .send_IPI_self = apic_send_IPI_self, | ||
| 124 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, | 161 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, |
| 125 | .phys_pkg_id = phys_pkg_id, | 162 | .phys_pkg_id = phys_pkg_id, |
| 163 | .get_apic_id = get_apic_id, | ||
| 164 | .set_apic_id = set_apic_id, | ||
| 165 | .apic_id_mask = (0xFFu<<24), | ||
| 126 | }; | 166 | }; |
| 127 | 167 | ||
| 128 | /* | 168 | /* |
| @@ -130,6 +170,23 @@ struct genapic apic_flat = { | |||
| 130 | * We cannot use logical delivery in this case because the mask | 170 | * We cannot use logical delivery in this case because the mask |
| 131 | * overflows, so use physical mode. | 171 | * overflows, so use physical mode. |
| 132 | */ | 172 | */ |
| 173 | static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 174 | { | ||
| 175 | #ifdef CONFIG_ACPI | ||
| 176 | /* | ||
| 177 | * Quirk: some x86_64 machines can only use physical APIC mode | ||
| 178 | * regardless of how many processors are present (x86_64 ES7000 | ||
| 179 | * is an example). | ||
| 180 | */ | ||
| 181 | if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && | ||
| 182 | (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { | ||
| 183 | printk(KERN_DEBUG "system APIC only can use physical flat"); | ||
| 184 | return 1; | ||
| 185 | } | ||
| 186 | #endif | ||
| 187 | |||
| 188 | return 0; | ||
| 189 | } | ||
| 133 | 190 | ||
| 134 | static cpumask_t physflat_target_cpus(void) | 191 | static cpumask_t physflat_target_cpus(void) |
| 135 | { | 192 | { |
| @@ -168,7 +225,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
| 168 | * May as well be the first. | 225 | * May as well be the first. |
| 169 | */ | 226 | */ |
| 170 | cpu = first_cpu(cpumask); | 227 | cpu = first_cpu(cpumask); |
| 171 | if ((unsigned)cpu < NR_CPUS) | 228 | if ((unsigned)cpu < nr_cpu_ids) |
| 172 | return per_cpu(x86_cpu_to_apicid, cpu); | 229 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 173 | else | 230 | else |
| 174 | return BAD_APICID; | 231 | return BAD_APICID; |
| @@ -176,6 +233,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
| 176 | 233 | ||
| 177 | struct genapic apic_physflat = { | 234 | struct genapic apic_physflat = { |
| 178 | .name = "physical flat", | 235 | .name = "physical flat", |
| 236 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, | ||
| 179 | .int_delivery_mode = dest_Fixed, | 237 | .int_delivery_mode = dest_Fixed, |
| 180 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 238 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), |
| 181 | .target_cpus = physflat_target_cpus, | 239 | .target_cpus = physflat_target_cpus, |
| @@ -185,6 +243,10 @@ struct genapic apic_physflat = { | |||
| 185 | .send_IPI_all = physflat_send_IPI_all, | 243 | .send_IPI_all = physflat_send_IPI_all, |
| 186 | .send_IPI_allbutself = physflat_send_IPI_allbutself, | 244 | .send_IPI_allbutself = physflat_send_IPI_allbutself, |
| 187 | .send_IPI_mask = physflat_send_IPI_mask, | 245 | .send_IPI_mask = physflat_send_IPI_mask, |
| 246 | .send_IPI_self = apic_send_IPI_self, | ||
| 188 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | 247 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, |
| 189 | .phys_pkg_id = phys_pkg_id, | 248 | .phys_pkg_id = phys_pkg_id, |
| 249 | .get_apic_id = get_apic_id, | ||
| 250 | .set_apic_id = set_apic_id, | ||
| 251 | .apic_id_mask = (0xFFu<<24), | ||
| 190 | }; | 252 | }; |
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c new file mode 100644 index 000000000000..f6a2c8eb48a6 --- /dev/null +++ b/arch/x86/kernel/genx2apic_cluster.c | |||
| @@ -0,0 +1,159 @@ | |||
| 1 | #include <linux/threads.h> | ||
| 2 | #include <linux/cpumask.h> | ||
| 3 | #include <linux/string.h> | ||
| 4 | #include <linux/kernel.h> | ||
| 5 | #include <linux/ctype.h> | ||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/dmar.h> | ||
| 8 | |||
| 9 | #include <asm/smp.h> | ||
| 10 | #include <asm/ipi.h> | ||
| 11 | #include <asm/genapic.h> | ||
| 12 | |||
| 13 | DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); | ||
| 14 | |||
| 15 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 16 | { | ||
| 17 | if (cpu_has_x2apic) | ||
| 18 | return 1; | ||
| 19 | |||
| 20 | return 0; | ||
| 21 | } | ||
| 22 | |||
| 23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | ||
| 24 | |||
| 25 | static cpumask_t x2apic_target_cpus(void) | ||
| 26 | { | ||
| 27 | return cpumask_of_cpu(0); | ||
| 28 | } | ||
| 29 | |||
| 30 | /* | ||
| 31 | * for now each logical cpu is in its own vector allocation domain. | ||
| 32 | */ | ||
| 33 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | ||
| 34 | { | ||
| 35 | cpumask_t domain = CPU_MASK_NONE; | ||
| 36 | cpu_set(cpu, domain); | ||
| 37 | return domain; | ||
| 38 | } | ||
| 39 | |||
| 40 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | ||
| 41 | unsigned int dest) | ||
| 42 | { | ||
| 43 | unsigned long cfg; | ||
| 44 | |||
| 45 | cfg = __prepare_ICR(0, vector, dest); | ||
| 46 | |||
| 47 | /* | ||
| 48 | * send the IPI. | ||
| 49 | */ | ||
| 50 | x2apic_icr_write(cfg, apicid); | ||
| 51 | } | ||
| 52 | |||
| 53 | /* | ||
| 54 | * for now, we send the IPI's one by one in the cpumask. | ||
| 55 | * TBD: Based on the cpu mask, we can send the IPI's to the cluster group | ||
| 56 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register | ||
| 57 | * writes. | ||
| 58 | */ | ||
| 59 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | ||
| 60 | { | ||
| 61 | unsigned long flags; | ||
| 62 | unsigned long query_cpu; | ||
| 63 | |||
| 64 | local_irq_save(flags); | ||
| 65 | for_each_cpu_mask(query_cpu, mask) { | ||
| 66 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
| 67 | vector, APIC_DEST_LOGICAL); | ||
| 68 | } | ||
| 69 | local_irq_restore(flags); | ||
| 70 | } | ||
| 71 | |||
| 72 | static void x2apic_send_IPI_allbutself(int vector) | ||
| 73 | { | ||
| 74 | cpumask_t mask = cpu_online_map; | ||
| 75 | |||
| 76 | cpu_clear(smp_processor_id(), mask); | ||
| 77 | |||
| 78 | if (!cpus_empty(mask)) | ||
| 79 | x2apic_send_IPI_mask(mask, vector); | ||
| 80 | } | ||
| 81 | |||
| 82 | static void x2apic_send_IPI_all(int vector) | ||
| 83 | { | ||
| 84 | x2apic_send_IPI_mask(cpu_online_map, vector); | ||
| 85 | } | ||
| 86 | |||
| 87 | static int x2apic_apic_id_registered(void) | ||
| 88 | { | ||
| 89 | return 1; | ||
| 90 | } | ||
| 91 | |||
| 92 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | ||
| 93 | { | ||
| 94 | int cpu; | ||
| 95 | |||
| 96 | /* | ||
| 97 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
| 98 | * May as well be the first. | ||
| 99 | */ | ||
| 100 | cpu = first_cpu(cpumask); | ||
| 101 | if ((unsigned)cpu < NR_CPUS) | ||
| 102 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
| 103 | else | ||
| 104 | return BAD_APICID; | ||
| 105 | } | ||
| 106 | |||
| 107 | static unsigned int get_apic_id(unsigned long x) | ||
| 108 | { | ||
| 109 | unsigned int id; | ||
| 110 | |||
| 111 | id = x; | ||
| 112 | return id; | ||
| 113 | } | ||
| 114 | |||
| 115 | static unsigned long set_apic_id(unsigned int id) | ||
| 116 | { | ||
| 117 | unsigned long x; | ||
| 118 | |||
| 119 | x = id; | ||
| 120 | return x; | ||
| 121 | } | ||
| 122 | |||
| 123 | static unsigned int phys_pkg_id(int index_msb) | ||
| 124 | { | ||
| 125 | return current_cpu_data.initial_apicid >> index_msb; | ||
| 126 | } | ||
| 127 | |||
| 128 | static void x2apic_send_IPI_self(int vector) | ||
| 129 | { | ||
| 130 | apic_write(APIC_SELF_IPI, vector); | ||
| 131 | } | ||
| 132 | |||
| 133 | static void init_x2apic_ldr(void) | ||
| 134 | { | ||
| 135 | int cpu = smp_processor_id(); | ||
| 136 | |||
| 137 | per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); | ||
| 138 | return; | ||
| 139 | } | ||
| 140 | |||
| 141 | struct genapic apic_x2apic_cluster = { | ||
| 142 | .name = "cluster x2apic", | ||
| 143 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | ||
| 144 | .int_delivery_mode = dest_LowestPrio, | ||
| 145 | .int_dest_mode = (APIC_DEST_LOGICAL != 0), | ||
| 146 | .target_cpus = x2apic_target_cpus, | ||
| 147 | .vector_allocation_domain = x2apic_vector_allocation_domain, | ||
| 148 | .apic_id_registered = x2apic_apic_id_registered, | ||
| 149 | .init_apic_ldr = init_x2apic_ldr, | ||
| 150 | .send_IPI_all = x2apic_send_IPI_all, | ||
| 151 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | ||
| 152 | .send_IPI_mask = x2apic_send_IPI_mask, | ||
| 153 | .send_IPI_self = x2apic_send_IPI_self, | ||
| 154 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
| 155 | .phys_pkg_id = phys_pkg_id, | ||
| 156 | .get_apic_id = get_apic_id, | ||
| 157 | .set_apic_id = set_apic_id, | ||
| 158 | .apic_id_mask = (0xFFFFFFFFu), | ||
| 159 | }; | ||
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c new file mode 100644 index 000000000000..d042211768b7 --- /dev/null +++ b/arch/x86/kernel/genx2apic_phys.c | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | #include <linux/threads.h> | ||
| 2 | #include <linux/cpumask.h> | ||
| 3 | #include <linux/string.h> | ||
| 4 | #include <linux/kernel.h> | ||
| 5 | #include <linux/ctype.h> | ||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/dmar.h> | ||
| 8 | |||
| 9 | #include <asm/smp.h> | ||
| 10 | #include <asm/ipi.h> | ||
| 11 | #include <asm/genapic.h> | ||
| 12 | |||
| 13 | static int x2apic_phys; | ||
| 14 | |||
| 15 | static int set_x2apic_phys_mode(char *arg) | ||
| 16 | { | ||
| 17 | x2apic_phys = 1; | ||
| 18 | return 0; | ||
| 19 | } | ||
| 20 | early_param("x2apic_phys", set_x2apic_phys_mode); | ||
| 21 | |||
| 22 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 23 | { | ||
| 24 | if (cpu_has_x2apic && x2apic_phys) | ||
| 25 | return 1; | ||
| 26 | |||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | |||
| 30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | ||
| 31 | |||
| 32 | static cpumask_t x2apic_target_cpus(void) | ||
| 33 | { | ||
| 34 | return cpumask_of_cpu(0); | ||
| 35 | } | ||
| 36 | |||
| 37 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | ||
| 38 | { | ||
| 39 | cpumask_t domain = CPU_MASK_NONE; | ||
| 40 | cpu_set(cpu, domain); | ||
| 41 | return domain; | ||
| 42 | } | ||
| 43 | |||
| 44 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | ||
| 45 | unsigned int dest) | ||
| 46 | { | ||
| 47 | unsigned long cfg; | ||
| 48 | |||
| 49 | cfg = __prepare_ICR(0, vector, dest); | ||
| 50 | |||
| 51 | /* | ||
| 52 | * send the IPI. | ||
| 53 | */ | ||
| 54 | x2apic_icr_write(cfg, apicid); | ||
| 55 | } | ||
| 56 | |||
| 57 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | ||
| 58 | { | ||
| 59 | unsigned long flags; | ||
| 60 | unsigned long query_cpu; | ||
| 61 | |||
| 62 | local_irq_save(flags); | ||
| 63 | for_each_cpu_mask(query_cpu, mask) { | ||
| 64 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | ||
| 65 | vector, APIC_DEST_PHYSICAL); | ||
| 66 | } | ||
| 67 | local_irq_restore(flags); | ||
| 68 | } | ||
| 69 | |||
| 70 | static void x2apic_send_IPI_allbutself(int vector) | ||
| 71 | { | ||
| 72 | cpumask_t mask = cpu_online_map; | ||
| 73 | |||
| 74 | cpu_clear(smp_processor_id(), mask); | ||
| 75 | |||
| 76 | if (!cpus_empty(mask)) | ||
| 77 | x2apic_send_IPI_mask(mask, vector); | ||
| 78 | } | ||
| 79 | |||
| 80 | static void x2apic_send_IPI_all(int vector) | ||
| 81 | { | ||
| 82 | x2apic_send_IPI_mask(cpu_online_map, vector); | ||
| 83 | } | ||
| 84 | |||
| 85 | static int x2apic_apic_id_registered(void) | ||
| 86 | { | ||
| 87 | return 1; | ||
| 88 | } | ||
| 89 | |||
| 90 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | ||
| 91 | { | ||
| 92 | int cpu; | ||
| 93 | |||
| 94 | /* | ||
| 95 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
| 96 | * May as well be the first. | ||
| 97 | */ | ||
| 98 | cpu = first_cpu(cpumask); | ||
| 99 | if ((unsigned)cpu < NR_CPUS) | ||
| 100 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
| 101 | else | ||
| 102 | return BAD_APICID; | ||
| 103 | } | ||
| 104 | |||
| 105 | static unsigned int get_apic_id(unsigned long x) | ||
| 106 | { | ||
| 107 | unsigned int id; | ||
| 108 | |||
| 109 | id = x; | ||
| 110 | return id; | ||
| 111 | } | ||
| 112 | |||
| 113 | static unsigned long set_apic_id(unsigned int id) | ||
| 114 | { | ||
| 115 | unsigned long x; | ||
| 116 | |||
| 117 | x = id; | ||
| 118 | return x; | ||
| 119 | } | ||
| 120 | |||
| 121 | static unsigned int phys_pkg_id(int index_msb) | ||
| 122 | { | ||
| 123 | return current_cpu_data.initial_apicid >> index_msb; | ||
| 124 | } | ||
| 125 | |||
| 126 | void x2apic_send_IPI_self(int vector) | ||
| 127 | { | ||
| 128 | apic_write(APIC_SELF_IPI, vector); | ||
| 129 | } | ||
| 130 | |||
| 131 | void init_x2apic_ldr(void) | ||
| 132 | { | ||
| 133 | return; | ||
| 134 | } | ||
| 135 | |||
| 136 | struct genapic apic_x2apic_phys = { | ||
| 137 | .name = "physical x2apic", | ||
| 138 | .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, | ||
| 139 | .int_delivery_mode = dest_Fixed, | ||
| 140 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | ||
| 141 | .target_cpus = x2apic_target_cpus, | ||
| 142 | .vector_allocation_domain = x2apic_vector_allocation_domain, | ||
| 143 | .apic_id_registered = x2apic_apic_id_registered, | ||
| 144 | .init_apic_ldr = init_x2apic_ldr, | ||
| 145 | .send_IPI_all = x2apic_send_IPI_all, | ||
| 146 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | ||
| 147 | .send_IPI_mask = x2apic_send_IPI_mask, | ||
| 148 | .send_IPI_self = x2apic_send_IPI_self, | ||
| 149 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
| 150 | .phys_pkg_id = phys_pkg_id, | ||
| 151 | .get_apic_id = get_apic_id, | ||
| 152 | .set_apic_id = set_apic_id, | ||
| 153 | .apic_id_mask = (0xFFFFFFFFu), | ||
| 154 | }; | ||
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c30b06..680a06557c5e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
| @@ -12,18 +12,49 @@ | |||
| 12 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
| 13 | #include <linux/cpumask.h> | 13 | #include <linux/cpumask.h> |
| 14 | #include <linux/string.h> | 14 | #include <linux/string.h> |
| 15 | #include <linux/kernel.h> | ||
| 16 | #include <linux/ctype.h> | 15 | #include <linux/ctype.h> |
| 17 | #include <linux/init.h> | 16 | #include <linux/init.h> |
| 18 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
| 19 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
| 20 | #include <linux/module.h> | 19 | #include <linux/module.h> |
| 20 | #include <linux/hardirq.h> | ||
| 21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
| 22 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
| 23 | #include <asm/genapic.h> | 23 | #include <asm/genapic.h> |
| 24 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
| 25 | #include <asm/uv/uv_mmrs.h> | 25 | #include <asm/uv/uv_mmrs.h> |
| 26 | #include <asm/uv/uv_hub.h> | 26 | #include <asm/uv/uv_hub.h> |
| 27 | #include <asm/uv/bios.h> | ||
| 28 | |||
| 29 | DEFINE_PER_CPU(int, x2apic_extra_bits); | ||
| 30 | |||
| 31 | static enum uv_system_type uv_system_type; | ||
| 32 | |||
| 33 | static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
| 34 | { | ||
| 35 | if (!strcmp(oem_id, "SGI")) { | ||
| 36 | if (!strcmp(oem_table_id, "UVL")) | ||
| 37 | uv_system_type = UV_LEGACY_APIC; | ||
| 38 | else if (!strcmp(oem_table_id, "UVX")) | ||
| 39 | uv_system_type = UV_X2APIC; | ||
| 40 | else if (!strcmp(oem_table_id, "UVH")) { | ||
| 41 | uv_system_type = UV_NON_UNIQUE_APIC; | ||
| 42 | return 1; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | return 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | enum uv_system_type get_uv_system_type(void) | ||
| 49 | { | ||
| 50 | return uv_system_type; | ||
| 51 | } | ||
| 52 | |||
| 53 | int is_uv_system(void) | ||
| 54 | { | ||
| 55 | return uv_system_type != UV_NONE; | ||
| 56 | } | ||
| 57 | EXPORT_SYMBOL_GPL(is_uv_system); | ||
| 27 | 58 | ||
| 28 | DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 59 | DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); |
| 29 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); | 60 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); |
| @@ -40,6 +71,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade); | |||
| 40 | short uv_possible_blades; | 71 | short uv_possible_blades; |
| 41 | EXPORT_SYMBOL_GPL(uv_possible_blades); | 72 | EXPORT_SYMBOL_GPL(uv_possible_blades); |
| 42 | 73 | ||
| 74 | unsigned long sn_rtc_cycles_per_second; | ||
| 75 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | ||
| 76 | |||
| 43 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 77 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
| 44 | 78 | ||
| 45 | static cpumask_t uv_target_cpus(void) | 79 | static cpumask_t uv_target_cpus(void) |
| @@ -80,7 +114,7 @@ static void uv_send_IPI_one(int cpu, int vector) | |||
| 80 | unsigned long val, apicid, lapicid; | 114 | unsigned long val, apicid, lapicid; |
| 81 | int pnode; | 115 | int pnode; |
| 82 | 116 | ||
| 83 | apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ | 117 | apicid = per_cpu(x86_cpu_to_apicid, cpu); |
| 84 | lapicid = apicid & 0x3f; /* ZZZ macro needed */ | 118 | lapicid = apicid & 0x3f; /* ZZZ macro needed */ |
| 85 | pnode = uv_apicid_to_pnode(apicid); | 119 | pnode = uv_apicid_to_pnode(apicid); |
| 86 | val = | 120 | val = |
| @@ -94,7 +128,7 @@ static void uv_send_IPI_mask(cpumask_t mask, int vector) | |||
| 94 | { | 128 | { |
| 95 | unsigned int cpu; | 129 | unsigned int cpu; |
| 96 | 130 | ||
| 97 | for (cpu = 0; cpu < NR_CPUS; ++cpu) | 131 | for_each_possible_cpu(cpu) |
| 98 | if (cpu_isset(cpu, mask)) | 132 | if (cpu_isset(cpu, mask)) |
| 99 | uv_send_IPI_one(cpu, vector); | 133 | uv_send_IPI_one(cpu, vector); |
| 100 | } | 134 | } |
| @@ -119,6 +153,10 @@ static int uv_apic_id_registered(void) | |||
| 119 | return 1; | 153 | return 1; |
| 120 | } | 154 | } |
| 121 | 155 | ||
| 156 | static void uv_init_apic_ldr(void) | ||
| 157 | { | ||
| 158 | } | ||
| 159 | |||
| 122 | static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | 160 | static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) |
| 123 | { | 161 | { |
| 124 | int cpu; | 162 | int cpu; |
| @@ -128,37 +166,65 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | |||
| 128 | * May as well be the first. | 166 | * May as well be the first. |
| 129 | */ | 167 | */ |
| 130 | cpu = first_cpu(cpumask); | 168 | cpu = first_cpu(cpumask); |
| 131 | if ((unsigned)cpu < NR_CPUS) | 169 | if ((unsigned)cpu < nr_cpu_ids) |
| 132 | return per_cpu(x86_cpu_to_apicid, cpu); | 170 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 133 | else | 171 | else |
| 134 | return BAD_APICID; | 172 | return BAD_APICID; |
| 135 | } | 173 | } |
| 136 | 174 | ||
| 175 | static unsigned int get_apic_id(unsigned long x) | ||
| 176 | { | ||
| 177 | unsigned int id; | ||
| 178 | |||
| 179 | WARN_ON(preemptible() && num_online_cpus() > 1); | ||
| 180 | id = x | __get_cpu_var(x2apic_extra_bits); | ||
| 181 | |||
| 182 | return id; | ||
| 183 | } | ||
| 184 | |||
| 185 | static unsigned long set_apic_id(unsigned int id) | ||
| 186 | { | ||
| 187 | unsigned long x; | ||
| 188 | |||
| 189 | /* maskout x2apic_extra_bits ? */ | ||
| 190 | x = id; | ||
| 191 | return x; | ||
| 192 | } | ||
| 193 | |||
| 194 | static unsigned int uv_read_apic_id(void) | ||
| 195 | { | ||
| 196 | |||
| 197 | return get_apic_id(apic_read(APIC_ID)); | ||
| 198 | } | ||
| 199 | |||
| 137 | static unsigned int phys_pkg_id(int index_msb) | 200 | static unsigned int phys_pkg_id(int index_msb) |
| 138 | { | 201 | { |
| 139 | return GET_APIC_ID(read_apic_id()) >> index_msb; | 202 | return uv_read_apic_id() >> index_msb; |
| 140 | } | 203 | } |
| 141 | 204 | ||
| 142 | #ifdef ZZZ /* Needs x2apic patch */ | ||
| 143 | static void uv_send_IPI_self(int vector) | 205 | static void uv_send_IPI_self(int vector) |
| 144 | { | 206 | { |
| 145 | apic_write(APIC_SELF_IPI, vector); | 207 | apic_write(APIC_SELF_IPI, vector); |
| 146 | } | 208 | } |
| 147 | #endif | ||
| 148 | 209 | ||
| 149 | struct genapic apic_x2apic_uv_x = { | 210 | struct genapic apic_x2apic_uv_x = { |
| 150 | .name = "UV large system", | 211 | .name = "UV large system", |
| 212 | .acpi_madt_oem_check = uv_acpi_madt_oem_check, | ||
| 151 | .int_delivery_mode = dest_Fixed, | 213 | .int_delivery_mode = dest_Fixed, |
| 152 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | 214 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), |
| 153 | .target_cpus = uv_target_cpus, | 215 | .target_cpus = uv_target_cpus, |
| 154 | .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ | 216 | .vector_allocation_domain = uv_vector_allocation_domain, |
| 155 | .apic_id_registered = uv_apic_id_registered, | 217 | .apic_id_registered = uv_apic_id_registered, |
| 218 | .init_apic_ldr = uv_init_apic_ldr, | ||
| 156 | .send_IPI_all = uv_send_IPI_all, | 219 | .send_IPI_all = uv_send_IPI_all, |
| 157 | .send_IPI_allbutself = uv_send_IPI_allbutself, | 220 | .send_IPI_allbutself = uv_send_IPI_allbutself, |
| 158 | .send_IPI_mask = uv_send_IPI_mask, | 221 | .send_IPI_mask = uv_send_IPI_mask, |
| 159 | /* ZZZ.send_IPI_self = uv_send_IPI_self, */ | 222 | .send_IPI_self = uv_send_IPI_self, |
| 160 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | 223 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, |
| 161 | .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ | 224 | .phys_pkg_id = phys_pkg_id, |
| 225 | .get_apic_id = get_apic_id, | ||
| 226 | .set_apic_id = set_apic_id, | ||
| 227 | .apic_id_mask = (0xFFFFFFFFu), | ||
| 162 | }; | 228 | }; |
| 163 | 229 | ||
| 164 | static __cpuinit void set_x2apic_extra_bits(int pnode) | 230 | static __cpuinit void set_x2apic_extra_bits(int pnode) |
| @@ -218,12 +284,13 @@ static __init void map_low_mmrs(void) | |||
| 218 | 284 | ||
| 219 | enum map_type {map_wb, map_uc}; | 285 | enum map_type {map_wb, map_uc}; |
| 220 | 286 | ||
| 221 | static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) | 287 | static __init void map_high(char *id, unsigned long base, int shift, |
| 288 | int max_pnode, enum map_type map_type) | ||
| 222 | { | 289 | { |
| 223 | unsigned long bytes, paddr; | 290 | unsigned long bytes, paddr; |
| 224 | 291 | ||
| 225 | paddr = base << shift; | 292 | paddr = base << shift; |
| 226 | bytes = (1UL << shift); | 293 | bytes = (1UL << shift) * (max_pnode + 1); |
| 227 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, | 294 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, |
| 228 | paddr + bytes); | 295 | paddr + bytes); |
| 229 | if (map_type == map_uc) | 296 | if (map_type == map_uc) |
| @@ -239,7 +306,7 @@ static __init void map_gru_high(int max_pnode) | |||
| 239 | 306 | ||
| 240 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 307 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
| 241 | if (gru.s.enable) | 308 | if (gru.s.enable) |
| 242 | map_high("GRU", gru.s.base, shift, map_wb); | 309 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
| 243 | } | 310 | } |
| 244 | 311 | ||
| 245 | static __init void map_config_high(int max_pnode) | 312 | static __init void map_config_high(int max_pnode) |
| @@ -249,7 +316,7 @@ static __init void map_config_high(int max_pnode) | |||
| 249 | 316 | ||
| 250 | cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); | 317 | cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); |
| 251 | if (cfg.s.enable) | 318 | if (cfg.s.enable) |
| 252 | map_high("CONFIG", cfg.s.base, shift, map_uc); | 319 | map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); |
| 253 | } | 320 | } |
| 254 | 321 | ||
| 255 | static __init void map_mmr_high(int max_pnode) | 322 | static __init void map_mmr_high(int max_pnode) |
| @@ -259,7 +326,7 @@ static __init void map_mmr_high(int max_pnode) | |||
| 259 | 326 | ||
| 260 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | 327 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); |
| 261 | if (mmr.s.enable) | 328 | if (mmr.s.enable) |
| 262 | map_high("MMR", mmr.s.base, shift, map_uc); | 329 | map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); |
| 263 | } | 330 | } |
| 264 | 331 | ||
| 265 | static __init void map_mmioh_high(int max_pnode) | 332 | static __init void map_mmioh_high(int max_pnode) |
| @@ -269,10 +336,44 @@ static __init void map_mmioh_high(int max_pnode) | |||
| 269 | 336 | ||
| 270 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | 337 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); |
| 271 | if (mmioh.s.enable) | 338 | if (mmioh.s.enable) |
| 272 | map_high("MMIOH", mmioh.s.base, shift, map_uc); | 339 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); |
| 340 | } | ||
| 341 | |||
| 342 | static __init void uv_rtc_init(void) | ||
| 343 | { | ||
| 344 | long status; | ||
| 345 | u64 ticks_per_sec; | ||
| 346 | |||
| 347 | status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, | ||
| 348 | &ticks_per_sec); | ||
| 349 | if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) { | ||
| 350 | printk(KERN_WARNING | ||
| 351 | "unable to determine platform RTC clock frequency, " | ||
| 352 | "guessing.\n"); | ||
| 353 | /* BIOS gives wrong value for clock freq. so guess */ | ||
| 354 | sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; | ||
| 355 | } else | ||
| 356 | sn_rtc_cycles_per_second = ticks_per_sec; | ||
| 273 | } | 357 | } |
| 274 | 358 | ||
| 275 | static __init void uv_system_init(void) | 359 | /* |
| 360 | * Called on each cpu to initialize the per_cpu UV data area. | ||
| 361 | * ZZZ hotplug not supported yet | ||
| 362 | */ | ||
| 363 | void __cpuinit uv_cpu_init(void) | ||
| 364 | { | ||
| 365 | /* CPU 0 initilization will be done via uv_system_init. */ | ||
| 366 | if (!uv_blade_info) | ||
| 367 | return; | ||
| 368 | |||
| 369 | uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; | ||
| 370 | |||
| 371 | if (get_uv_system_type() == UV_NON_UNIQUE_APIC) | ||
| 372 | set_x2apic_extra_bits(uv_hub_info->pnode); | ||
| 373 | } | ||
| 374 | |||
| 375 | |||
| 376 | void __init uv_system_init(void) | ||
| 276 | { | 377 | { |
| 277 | union uvh_si_addr_map_config_u m_n_config; | 378 | union uvh_si_addr_map_config_u m_n_config; |
| 278 | union uvh_node_id_u node_id; | 379 | union uvh_node_id_u node_id; |
| @@ -326,6 +427,11 @@ static __init void uv_system_init(void) | |||
| 326 | gnode_upper = (((unsigned long)node_id.s.node_id) & | 427 | gnode_upper = (((unsigned long)node_id.s.node_id) & |
| 327 | ~((1 << n_val) - 1)) << m_val; | 428 | ~((1 << n_val) - 1)) << m_val; |
| 328 | 429 | ||
| 430 | uv_bios_init(); | ||
| 431 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | ||
| 432 | &uv_coherency_id, &uv_region_size); | ||
| 433 | uv_rtc_init(); | ||
| 434 | |||
| 329 | for_each_present_cpu(cpu) { | 435 | for_each_present_cpu(cpu) { |
| 330 | nid = cpu_to_node(cpu); | 436 | nid = cpu_to_node(cpu); |
| 331 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); | 437 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); |
| @@ -345,7 +451,7 @@ static __init void uv_system_init(void) | |||
| 345 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 451 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; |
| 346 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 452 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
| 347 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 453 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
| 348 | uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ | 454 | uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; |
| 349 | uv_node_to_blade[nid] = blade; | 455 | uv_node_to_blade[nid] = blade; |
| 350 | uv_cpu_to_blade[cpu] = blade; | 456 | uv_cpu_to_blade[cpu] = blade; |
| 351 | max_pnode = max(pnode, max_pnode); | 457 | max_pnode = max(pnode, max_pnode); |
| @@ -360,19 +466,6 @@ static __init void uv_system_init(void) | |||
| 360 | map_mmr_high(max_pnode); | 466 | map_mmr_high(max_pnode); |
| 361 | map_config_high(max_pnode); | 467 | map_config_high(max_pnode); |
| 362 | map_mmioh_high(max_pnode); | 468 | map_mmioh_high(max_pnode); |
| 363 | } | ||
| 364 | 469 | ||
| 365 | /* | 470 | uv_cpu_init(); |
| 366 | * Called on each cpu to initialize the per_cpu UV data area. | ||
| 367 | * ZZZ hotplug not supported yet | ||
| 368 | */ | ||
| 369 | void __cpuinit uv_cpu_init(void) | ||
| 370 | { | ||
| 371 | if (!uv_node_to_blade) | ||
| 372 | uv_system_init(); | ||
| 373 | |||
| 374 | uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; | ||
| 375 | |||
| 376 | if (get_uv_system_type() == UV_NON_UNIQUE_APIC) | ||
| 377 | set_x2apic_extra_bits(uv_hub_info->pnode); | ||
| 378 | } | 471 | } |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 3e66bd364a9d..1dcb0f13897e 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
| @@ -35,6 +35,7 @@ void __init reserve_ebda_region(void) | |||
| 35 | 35 | ||
| 36 | /* start of EBDA area */ | 36 | /* start of EBDA area */ |
| 37 | ebda_addr = get_bios_ebda(); | 37 | ebda_addr = get_bios_ebda(); |
| 38 | printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); | ||
| 38 | 39 | ||
| 39 | /* Fixup: bios puts an EBDA in the top 64K segment */ | 40 | /* Fixup: bios puts an EBDA in the top 64K segment */ |
| 40 | /* of conventional memory, but does not adjust lowmem. */ | 41 | /* of conventional memory, but does not adjust lowmem. */ |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c97819829146..d16084f90649 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
| @@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | |||
| 39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | 39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; |
| 40 | #endif | 40 | #endif |
| 41 | 41 | ||
| 42 | void __init x86_64_init_pda(void) | ||
| 43 | { | ||
| 44 | _cpu_pda = __cpu_pda; | ||
| 45 | cpu_pda(0) = &_boot_cpu_pda; | ||
| 46 | pda_init(0); | ||
| 47 | } | ||
| 48 | |||
| 42 | static void __init zap_identity_mappings(void) | 49 | static void __init zap_identity_mappings(void) |
| 43 | { | 50 | { |
| 44 | pgd_t *pgd = pgd_offset_k(0UL); | 51 | pgd_t *pgd = pgd_offset_k(0UL); |
| @@ -81,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
| 81 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | 88 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); |
| 82 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | 89 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == |
| 83 | (__START_KERNEL & PGDIR_MASK))); | 90 | (__START_KERNEL & PGDIR_MASK))); |
| 91 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | ||
| 84 | 92 | ||
| 85 | /* clear bss before set_intr_gate with early_idt_handler */ | 93 | /* clear bss before set_intr_gate with early_idt_handler */ |
| 86 | clear_bss(); | 94 | clear_bss(); |
| @@ -100,13 +108,10 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
| 100 | } | 108 | } |
| 101 | load_idt((const struct desc_ptr *)&idt_descr); | 109 | load_idt((const struct desc_ptr *)&idt_descr); |
| 102 | 110 | ||
| 103 | early_printk("Kernel alive\n"); | 111 | if (console_loglevel == 10) |
| 104 | 112 | early_printk("Kernel alive\n"); | |
| 105 | _cpu_pda = __cpu_pda; | ||
| 106 | cpu_pda(0) = &_boot_cpu_pda; | ||
| 107 | pda_init(0); | ||
| 108 | 113 | ||
| 109 | early_printk("Kernel really alive\n"); | 114 | x86_64_init_pda(); |
| 110 | 115 | ||
| 111 | x86_64_start_reservations(real_mode_data); | 116 | x86_64_start_reservations(real_mode_data); |
| 112 | } | 117 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f67e93441caf..e835b4eea70b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
| 172 | * | 172 | * |
| 173 | * Note that the stack is not yet set up! | 173 | * Note that the stack is not yet set up! |
| 174 | */ | 174 | */ |
| 175 | #define PTE_ATTR 0x007 /* PRESENT+RW+USER */ | ||
| 176 | #define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ | ||
| 177 | #define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ | ||
| 178 | |||
| 179 | default_entry: | 175 | default_entry: |
| 180 | #ifdef CONFIG_X86_PAE | 176 | #ifdef CONFIG_X86_PAE |
| 181 | 177 | ||
| @@ -196,9 +192,9 @@ default_entry: | |||
| 196 | movl $pa(pg0), %edi | 192 | movl $pa(pg0), %edi |
| 197 | movl %edi, pa(init_pg_tables_start) | 193 | movl %edi, pa(init_pg_tables_start) |
| 198 | movl $pa(swapper_pg_pmd), %edx | 194 | movl $pa(swapper_pg_pmd), %edx |
| 199 | movl $PTE_ATTR, %eax | 195 | movl $PTE_IDENT_ATTR, %eax |
| 200 | 10: | 196 | 10: |
| 201 | leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ | 197 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ |
| 202 | movl %ecx,(%edx) /* Store PMD entry */ | 198 | movl %ecx,(%edx) /* Store PMD entry */ |
| 203 | /* Upper half already zero */ | 199 | /* Upper half already zero */ |
| 204 | addl $8,%edx | 200 | addl $8,%edx |
| @@ -215,7 +211,7 @@ default_entry: | |||
| 215 | * End condition: we must map up to and including INIT_MAP_BEYOND_END | 211 | * End condition: we must map up to and including INIT_MAP_BEYOND_END |
| 216 | * bytes beyond the end of our own page tables. | 212 | * bytes beyond the end of our own page tables. |
| 217 | */ | 213 | */ |
| 218 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 214 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
| 219 | cmpl %ebp,%eax | 215 | cmpl %ebp,%eax |
| 220 | jb 10b | 216 | jb 10b |
| 221 | 1: | 217 | 1: |
| @@ -224,7 +220,7 @@ default_entry: | |||
| 224 | movl %eax, pa(max_pfn_mapped) | 220 | movl %eax, pa(max_pfn_mapped) |
| 225 | 221 | ||
| 226 | /* Do early initialization of the fixmap area */ | 222 | /* Do early initialization of the fixmap area */ |
| 227 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 223 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 228 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) | 224 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) |
| 229 | #else /* Not PAE */ | 225 | #else /* Not PAE */ |
| 230 | 226 | ||
| @@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 233 | movl $pa(pg0), %edi | 229 | movl $pa(pg0), %edi |
| 234 | movl %edi, pa(init_pg_tables_start) | 230 | movl %edi, pa(init_pg_tables_start) |
| 235 | movl $pa(swapper_pg_dir), %edx | 231 | movl $pa(swapper_pg_dir), %edx |
| 236 | movl $PTE_ATTR, %eax | 232 | movl $PTE_IDENT_ATTR, %eax |
| 237 | 10: | 233 | 10: |
| 238 | leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ | 234 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ |
| 239 | movl %ecx,(%edx) /* Store identity PDE entry */ | 235 | movl %ecx,(%edx) /* Store identity PDE entry */ |
| 240 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ | 236 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ |
| 241 | addl $4,%edx | 237 | addl $4,%edx |
| @@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 249 | * bytes beyond the end of our own page tables; the +0x007 is | 245 | * bytes beyond the end of our own page tables; the +0x007 is |
| 250 | * the attribute bits | 246 | * the attribute bits |
| 251 | */ | 247 | */ |
| 252 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 248 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
| 253 | cmpl %ebp,%eax | 249 | cmpl %ebp,%eax |
| 254 | jb 10b | 250 | jb 10b |
| 255 | movl %edi,pa(init_pg_tables_end) | 251 | movl %edi,pa(init_pg_tables_end) |
| @@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
| 257 | movl %eax, pa(max_pfn_mapped) | 253 | movl %eax, pa(max_pfn_mapped) |
| 258 | 254 | ||
| 259 | /* Do early initialization of the fixmap area */ | 255 | /* Do early initialization of the fixmap area */ |
| 260 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 256 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
| 261 | movl %eax,pa(swapper_pg_dir+0xffc) | 257 | movl %eax,pa(swapper_pg_dir+0xffc) |
| 262 | #endif | 258 | #endif |
| 263 | jmp 3f | 259 | jmp 3f |
| @@ -456,9 +452,6 @@ is386: movl $2,%ecx # set MP | |||
| 456 | 1: | 452 | 1: |
| 457 | #endif /* CONFIG_SMP */ | 453 | #endif /* CONFIG_SMP */ |
| 458 | jmp *(initial_code) | 454 | jmp *(initial_code) |
| 459 | .align 4 | ||
| 460 | ENTRY(initial_code) | ||
| 461 | .long i386_start_kernel | ||
| 462 | 455 | ||
| 463 | /* | 456 | /* |
| 464 | * We depend on ET to be correct. This checks for 287/387. | 457 | * We depend on ET to be correct. This checks for 287/387. |
| @@ -601,6 +594,11 @@ ignore_int: | |||
| 601 | #endif | 594 | #endif |
| 602 | iret | 595 | iret |
| 603 | 596 | ||
| 597 | .section .cpuinit.data,"wa" | ||
| 598 | .align 4 | ||
| 599 | ENTRY(initial_code) | ||
| 600 | .long i386_start_kernel | ||
| 601 | |||
| 604 | .section .text | 602 | .section .text |
| 605 | /* | 603 | /* |
| 606 | * Real beginning of normal "text" segment | 604 | * Real beginning of normal "text" segment |
| @@ -632,19 +630,19 @@ ENTRY(empty_zero_page) | |||
| 632 | /* Page-aligned for the benefit of paravirt? */ | 630 | /* Page-aligned for the benefit of paravirt? */ |
| 633 | .align PAGE_SIZE_asm | 631 | .align PAGE_SIZE_asm |
| 634 | ENTRY(swapper_pg_dir) | 632 | ENTRY(swapper_pg_dir) |
| 635 | .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ | 633 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ |
| 636 | # if KPMDS == 3 | 634 | # if KPMDS == 3 |
| 637 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 635 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 638 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 636 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 639 | .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 | 637 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 |
| 640 | # elif KPMDS == 2 | 638 | # elif KPMDS == 2 |
| 641 | .long 0,0 | 639 | .long 0,0 |
| 642 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 640 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 643 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 641 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
| 644 | # elif KPMDS == 1 | 642 | # elif KPMDS == 1 |
| 645 | .long 0,0 | 643 | .long 0,0 |
| 646 | .long 0,0 | 644 | .long 0,0 |
| 647 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 645 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
| 648 | # else | 646 | # else |
| 649 | # error "Kernel PMDs should be 1, 2 or 3" | 647 | # error "Kernel PMDs should be 1, 2 or 3" |
| 650 | # endif | 648 | # endif |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b07ac7b217cb..26cfdc1d7c7f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
| @@ -110,7 +110,7 @@ startup_64: | |||
| 110 | movq %rdi, %rax | 110 | movq %rdi, %rax |
| 111 | shrq $PMD_SHIFT, %rax | 111 | shrq $PMD_SHIFT, %rax |
| 112 | andq $(PTRS_PER_PMD - 1), %rax | 112 | andq $(PTRS_PER_PMD - 1), %rax |
| 113 | leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx | 113 | leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx |
| 114 | leaq level2_spare_pgt(%rip), %rbx | 114 | leaq level2_spare_pgt(%rip), %rbx |
| 115 | movq %rdx, 0(%rbx, %rax, 8) | 115 | movq %rdx, 0(%rbx, %rax, 8) |
| 116 | ident_complete: | 116 | ident_complete: |
| @@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt) | |||
| 374 | /* Since I easily can, map the first 1G. | 374 | /* Since I easily can, map the first 1G. |
| 375 | * Don't set NX because code runs from these pages. | 375 | * Don't set NX because code runs from these pages. |
| 376 | */ | 376 | */ |
| 377 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) | 377 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) |
| 378 | 378 | ||
| 379 | NEXT_PAGE(level2_kernel_pgt) | 379 | NEXT_PAGE(level2_kernel_pgt) |
| 380 | /* | 380 | /* |
| @@ -407,6 +407,7 @@ ENTRY(phys_base) | |||
| 407 | /* This must match the first entry in level2_kernel_pgt */ | 407 | /* This must match the first entry in level2_kernel_pgt */ |
| 408 | .quad 0x0000000000000000 | 408 | .quad 0x0000000000000000 |
| 409 | 409 | ||
| 410 | #include "../../x86/xen/xen-head.S" | ||
| 410 | 411 | ||
| 411 | .section .bss, "aw", @nobits | 412 | .section .bss, "aw", @nobits |
| 412 | .align L1_CACHE_BYTES | 413 | .align L1_CACHE_BYTES |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 0ea6a19bfdfe..77017e834cf7 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -1,29 +1,49 @@ | |||
| 1 | #include <linux/clocksource.h> | 1 | #include <linux/clocksource.h> |
| 2 | #include <linux/clockchips.h> | 2 | #include <linux/clockchips.h> |
| 3 | #include <linux/interrupt.h> | ||
| 4 | #include <linux/sysdev.h> | ||
| 3 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
| 4 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
| 5 | #include <linux/hpet.h> | 7 | #include <linux/hpet.h> |
| 6 | #include <linux/init.h> | 8 | #include <linux/init.h> |
| 7 | #include <linux/sysdev.h> | 9 | #include <linux/cpu.h> |
| 8 | #include <linux/pm.h> | 10 | #include <linux/pm.h> |
| 11 | #include <linux/io.h> | ||
| 9 | 12 | ||
| 10 | #include <asm/fixmap.h> | 13 | #include <asm/fixmap.h> |
| 11 | #include <asm/hpet.h> | ||
| 12 | #include <asm/i8253.h> | 14 | #include <asm/i8253.h> |
| 13 | #include <asm/io.h> | 15 | #include <asm/hpet.h> |
| 14 | 16 | ||
| 15 | #define HPET_MASK CLOCKSOURCE_MASK(32) | 17 | #define HPET_MASK CLOCKSOURCE_MASK(32) |
| 16 | #define HPET_SHIFT 22 | 18 | #define HPET_SHIFT 22 |
| 17 | 19 | ||
| 18 | /* FSEC = 10^-15 | 20 | /* FSEC = 10^-15 |
| 19 | NSEC = 10^-9 */ | 21 | NSEC = 10^-9 */ |
| 20 | #define FSEC_PER_NSEC 1000000L | 22 | #define FSEC_PER_NSEC 1000000L |
| 23 | |||
| 24 | #define HPET_DEV_USED_BIT 2 | ||
| 25 | #define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) | ||
| 26 | #define HPET_DEV_VALID 0x8 | ||
| 27 | #define HPET_DEV_FSB_CAP 0x1000 | ||
| 28 | #define HPET_DEV_PERI_CAP 0x2000 | ||
| 29 | |||
| 30 | #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) | ||
| 21 | 31 | ||
| 22 | /* | 32 | /* |
| 23 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
| 24 | */ | 34 | */ |
| 25 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
| 26 | static void __iomem *hpet_virt_address; | 36 | unsigned long hpet_num_timers; |
| 37 | static void __iomem *hpet_virt_address; | ||
| 38 | |||
| 39 | struct hpet_dev { | ||
| 40 | struct clock_event_device evt; | ||
| 41 | unsigned int num; | ||
| 42 | int cpu; | ||
| 43 | unsigned int irq; | ||
| 44 | unsigned int flags; | ||
| 45 | char name[10]; | ||
| 46 | }; | ||
| 27 | 47 | ||
| 28 | unsigned long hpet_readl(unsigned long a) | 48 | unsigned long hpet_readl(unsigned long a) |
| 29 | { | 49 | { |
| @@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void) | |||
| 59 | static int boot_hpet_disable; | 79 | static int boot_hpet_disable; |
| 60 | int hpet_force_user; | 80 | int hpet_force_user; |
| 61 | 81 | ||
| 62 | static int __init hpet_setup(char* str) | 82 | static int __init hpet_setup(char *str) |
| 63 | { | 83 | { |
| 64 | if (str) { | 84 | if (str) { |
| 65 | if (!strncmp("disable", str, 7)) | 85 | if (!strncmp("disable", str, 7)) |
| @@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet); | |||
| 80 | 100 | ||
| 81 | static inline int is_hpet_capable(void) | 101 | static inline int is_hpet_capable(void) |
| 82 | { | 102 | { |
| 83 | return (!boot_hpet_disable && hpet_address); | 103 | return !boot_hpet_disable && hpet_address; |
| 84 | } | 104 | } |
| 85 | 105 | ||
| 86 | /* | 106 | /* |
| @@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); | |||
| 102 | * timer 0 and timer 1 in case of RTC emulation. | 122 | * timer 0 and timer 1 in case of RTC emulation. |
| 103 | */ | 123 | */ |
| 104 | #ifdef CONFIG_HPET | 124 | #ifdef CONFIG_HPET |
| 125 | |||
| 126 | static void hpet_reserve_msi_timers(struct hpet_data *hd); | ||
| 127 | |||
| 105 | static void hpet_reserve_platform_timers(unsigned long id) | 128 | static void hpet_reserve_platform_timers(unsigned long id) |
| 106 | { | 129 | { |
| 107 | struct hpet __iomem *hpet = hpet_virt_address; | 130 | struct hpet __iomem *hpet = hpet_virt_address; |
| @@ -111,25 +134,31 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
| 111 | 134 | ||
| 112 | nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; | 135 | nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; |
| 113 | 136 | ||
| 114 | memset(&hd, 0, sizeof (hd)); | 137 | memset(&hd, 0, sizeof(hd)); |
| 115 | hd.hd_phys_address = hpet_address; | 138 | hd.hd_phys_address = hpet_address; |
| 116 | hd.hd_address = hpet; | 139 | hd.hd_address = hpet; |
| 117 | hd.hd_nirqs = nrtimers; | 140 | hd.hd_nirqs = nrtimers; |
| 118 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
| 119 | hpet_reserve_timer(&hd, 0); | 141 | hpet_reserve_timer(&hd, 0); |
| 120 | 142 | ||
| 121 | #ifdef CONFIG_HPET_EMULATE_RTC | 143 | #ifdef CONFIG_HPET_EMULATE_RTC |
| 122 | hpet_reserve_timer(&hd, 1); | 144 | hpet_reserve_timer(&hd, 1); |
| 123 | #endif | 145 | #endif |
| 124 | 146 | ||
| 147 | /* | ||
| 148 | * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 | ||
| 149 | * is wrong for i8259!) not the output IRQ. Many BIOS writers | ||
| 150 | * don't bother configuring *any* comparator interrupts. | ||
| 151 | */ | ||
| 125 | hd.hd_irq[0] = HPET_LEGACY_8254; | 152 | hd.hd_irq[0] = HPET_LEGACY_8254; |
| 126 | hd.hd_irq[1] = HPET_LEGACY_RTC; | 153 | hd.hd_irq[1] = HPET_LEGACY_RTC; |
| 127 | 154 | ||
| 128 | for (i = 2; i < nrtimers; timer++, i++) { | 155 | for (i = 2; i < nrtimers; timer++, i++) { |
| 129 | hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> | 156 | hd.hd_irq[i] = (readl(&timer->hpet_config) & |
| 130 | Tn_INT_ROUTE_CNF_SHIFT; | 157 | Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; |
| 131 | } | 158 | } |
| 132 | 159 | ||
| 160 | hpet_reserve_msi_timers(&hd); | ||
| 161 | |||
| 133 | hpet_alloc(&hd); | 162 | hpet_alloc(&hd); |
| 134 | 163 | ||
| 135 | } | 164 | } |
| @@ -210,8 +239,8 @@ static void hpet_legacy_clockevent_register(void) | |||
| 210 | /* Calculate the min / max delta */ | 239 | /* Calculate the min / max delta */ |
| 211 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | 240 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, |
| 212 | &hpet_clockevent); | 241 | &hpet_clockevent); |
| 213 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, | 242 | /* 5 usec minimum reprogramming delta. */ |
| 214 | &hpet_clockevent); | 243 | hpet_clockevent.min_delta_ns = 5000; |
| 215 | 244 | ||
| 216 | /* | 245 | /* |
| 217 | * Start hpet with the boot cpu mask and make it | 246 | * Start hpet with the boot cpu mask and make it |
| @@ -223,63 +252,421 @@ static void hpet_legacy_clockevent_register(void) | |||
| 223 | printk(KERN_DEBUG "hpet clockevent registered\n"); | 252 | printk(KERN_DEBUG "hpet clockevent registered\n"); |
| 224 | } | 253 | } |
| 225 | 254 | ||
| 226 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 255 | static int hpet_setup_msi_irq(unsigned int irq); |
| 227 | struct clock_event_device *evt) | 256 | |
| 257 | static void hpet_set_mode(enum clock_event_mode mode, | ||
| 258 | struct clock_event_device *evt, int timer) | ||
| 228 | { | 259 | { |
| 229 | unsigned long cfg, cmp, now; | 260 | unsigned long cfg, cmp, now; |
| 230 | uint64_t delta; | 261 | uint64_t delta; |
| 231 | 262 | ||
| 232 | switch(mode) { | 263 | switch (mode) { |
| 233 | case CLOCK_EVT_MODE_PERIODIC: | 264 | case CLOCK_EVT_MODE_PERIODIC: |
| 234 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; | 265 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; |
| 235 | delta >>= hpet_clockevent.shift; | 266 | delta >>= evt->shift; |
| 236 | now = hpet_readl(HPET_COUNTER); | 267 | now = hpet_readl(HPET_COUNTER); |
| 237 | cmp = now + (unsigned long) delta; | 268 | cmp = now + (unsigned long) delta; |
| 238 | cfg = hpet_readl(HPET_T0_CFG); | 269 | cfg = hpet_readl(HPET_Tn_CFG(timer)); |
| 239 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | | 270 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | |
| 240 | HPET_TN_SETVAL | HPET_TN_32BIT; | 271 | HPET_TN_SETVAL | HPET_TN_32BIT; |
| 241 | hpet_writel(cfg, HPET_T0_CFG); | 272 | hpet_writel(cfg, HPET_Tn_CFG(timer)); |
| 242 | /* | 273 | /* |
| 243 | * The first write after writing TN_SETVAL to the | 274 | * The first write after writing TN_SETVAL to the |
| 244 | * config register sets the counter value, the second | 275 | * config register sets the counter value, the second |
| 245 | * write sets the period. | 276 | * write sets the period. |
| 246 | */ | 277 | */ |
| 247 | hpet_writel(cmp, HPET_T0_CMP); | 278 | hpet_writel(cmp, HPET_Tn_CMP(timer)); |
| 248 | udelay(1); | 279 | udelay(1); |
| 249 | hpet_writel((unsigned long) delta, HPET_T0_CMP); | 280 | hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); |
| 250 | break; | 281 | break; |
| 251 | 282 | ||
| 252 | case CLOCK_EVT_MODE_ONESHOT: | 283 | case CLOCK_EVT_MODE_ONESHOT: |
| 253 | cfg = hpet_readl(HPET_T0_CFG); | 284 | cfg = hpet_readl(HPET_Tn_CFG(timer)); |
| 254 | cfg &= ~HPET_TN_PERIODIC; | 285 | cfg &= ~HPET_TN_PERIODIC; |
| 255 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | 286 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; |
| 256 | hpet_writel(cfg, HPET_T0_CFG); | 287 | hpet_writel(cfg, HPET_Tn_CFG(timer)); |
| 257 | break; | 288 | break; |
| 258 | 289 | ||
| 259 | case CLOCK_EVT_MODE_UNUSED: | 290 | case CLOCK_EVT_MODE_UNUSED: |
| 260 | case CLOCK_EVT_MODE_SHUTDOWN: | 291 | case CLOCK_EVT_MODE_SHUTDOWN: |
| 261 | cfg = hpet_readl(HPET_T0_CFG); | 292 | cfg = hpet_readl(HPET_Tn_CFG(timer)); |
| 262 | cfg &= ~HPET_TN_ENABLE; | 293 | cfg &= ~HPET_TN_ENABLE; |
| 263 | hpet_writel(cfg, HPET_T0_CFG); | 294 | hpet_writel(cfg, HPET_Tn_CFG(timer)); |
| 264 | break; | 295 | break; |
| 265 | 296 | ||
| 266 | case CLOCK_EVT_MODE_RESUME: | 297 | case CLOCK_EVT_MODE_RESUME: |
| 267 | hpet_enable_legacy_int(); | 298 | if (timer == 0) { |
| 299 | hpet_enable_legacy_int(); | ||
| 300 | } else { | ||
| 301 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); | ||
| 302 | hpet_setup_msi_irq(hdev->irq); | ||
| 303 | disable_irq(hdev->irq); | ||
| 304 | irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); | ||
| 305 | enable_irq(hdev->irq); | ||
| 306 | } | ||
| 268 | break; | 307 | break; |
| 269 | } | 308 | } |
| 270 | } | 309 | } |
| 271 | 310 | ||
| 272 | static int hpet_legacy_next_event(unsigned long delta, | 311 | static int hpet_next_event(unsigned long delta, |
| 273 | struct clock_event_device *evt) | 312 | struct clock_event_device *evt, int timer) |
| 274 | { | 313 | { |
| 275 | unsigned long cnt; | 314 | u32 cnt; |
| 276 | 315 | ||
| 277 | cnt = hpet_readl(HPET_COUNTER); | 316 | cnt = hpet_readl(HPET_COUNTER); |
| 278 | cnt += delta; | 317 | cnt += (u32) delta; |
| 279 | hpet_writel(cnt, HPET_T0_CMP); | 318 | hpet_writel(cnt, HPET_Tn_CMP(timer)); |
| 319 | |||
| 320 | /* | ||
| 321 | * We need to read back the CMP register to make sure that | ||
| 322 | * what we wrote hit the chip before we compare it to the | ||
| 323 | * counter. | ||
| 324 | */ | ||
| 325 | WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); | ||
| 326 | |||
| 327 | return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | ||
| 328 | } | ||
| 329 | |||
| 330 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | ||
| 331 | struct clock_event_device *evt) | ||
| 332 | { | ||
| 333 | hpet_set_mode(mode, evt, 0); | ||
| 334 | } | ||
| 335 | |||
| 336 | static int hpet_legacy_next_event(unsigned long delta, | ||
| 337 | struct clock_event_device *evt) | ||
| 338 | { | ||
| 339 | return hpet_next_event(delta, evt, 0); | ||
| 340 | } | ||
| 341 | |||
| 342 | /* | ||
| 343 | * HPET MSI Support | ||
| 344 | */ | ||
| 345 | #ifdef CONFIG_PCI_MSI | ||
| 346 | |||
| 347 | static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); | ||
| 348 | static struct hpet_dev *hpet_devs; | ||
| 349 | |||
| 350 | void hpet_msi_unmask(unsigned int irq) | ||
| 351 | { | ||
| 352 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 353 | unsigned long cfg; | ||
| 354 | |||
| 355 | /* unmask it */ | ||
| 356 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | ||
| 357 | cfg |= HPET_TN_FSB; | ||
| 358 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | ||
| 359 | } | ||
| 360 | |||
| 361 | void hpet_msi_mask(unsigned int irq) | ||
| 362 | { | ||
| 363 | unsigned long cfg; | ||
| 364 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 365 | |||
| 366 | /* mask it */ | ||
| 367 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | ||
| 368 | cfg &= ~HPET_TN_FSB; | ||
| 369 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | ||
| 370 | } | ||
| 371 | |||
| 372 | void hpet_msi_write(unsigned int irq, struct msi_msg *msg) | ||
| 373 | { | ||
| 374 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 375 | |||
| 376 | hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); | ||
| 377 | hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); | ||
| 378 | } | ||
| 379 | |||
| 380 | void hpet_msi_read(unsigned int irq, struct msi_msg *msg) | ||
| 381 | { | ||
| 382 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 383 | |||
| 384 | msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); | ||
| 385 | msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); | ||
| 386 | msg->address_hi = 0; | ||
| 387 | } | ||
| 388 | |||
| 389 | static void hpet_msi_set_mode(enum clock_event_mode mode, | ||
| 390 | struct clock_event_device *evt) | ||
| 391 | { | ||
| 392 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); | ||
| 393 | hpet_set_mode(mode, evt, hdev->num); | ||
| 394 | } | ||
| 395 | |||
| 396 | static int hpet_msi_next_event(unsigned long delta, | ||
| 397 | struct clock_event_device *evt) | ||
| 398 | { | ||
| 399 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); | ||
| 400 | return hpet_next_event(delta, evt, hdev->num); | ||
| 401 | } | ||
| 402 | |||
| 403 | static int hpet_setup_msi_irq(unsigned int irq) | ||
| 404 | { | ||
| 405 | if (arch_setup_hpet_msi(irq)) { | ||
| 406 | destroy_irq(irq); | ||
| 407 | return -EINVAL; | ||
| 408 | } | ||
| 409 | return 0; | ||
| 410 | } | ||
| 411 | |||
| 412 | static int hpet_assign_irq(struct hpet_dev *dev) | ||
| 413 | { | ||
| 414 | unsigned int irq; | ||
| 415 | |||
| 416 | irq = create_irq(); | ||
| 417 | if (!irq) | ||
| 418 | return -EINVAL; | ||
| 419 | |||
| 420 | set_irq_data(irq, dev); | ||
| 421 | |||
| 422 | if (hpet_setup_msi_irq(irq)) | ||
| 423 | return -EINVAL; | ||
| 424 | |||
| 425 | dev->irq = irq; | ||
| 426 | return 0; | ||
| 427 | } | ||
| 428 | |||
| 429 | static irqreturn_t hpet_interrupt_handler(int irq, void *data) | ||
| 430 | { | ||
| 431 | struct hpet_dev *dev = (struct hpet_dev *)data; | ||
| 432 | struct clock_event_device *hevt = &dev->evt; | ||
| 433 | |||
| 434 | if (!hevt->event_handler) { | ||
| 435 | printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", | ||
| 436 | dev->num); | ||
| 437 | return IRQ_HANDLED; | ||
| 438 | } | ||
| 439 | |||
| 440 | hevt->event_handler(hevt); | ||
| 441 | return IRQ_HANDLED; | ||
| 442 | } | ||
| 443 | |||
| 444 | static int hpet_setup_irq(struct hpet_dev *dev) | ||
| 445 | { | ||
| 446 | |||
| 447 | if (request_irq(dev->irq, hpet_interrupt_handler, | ||
| 448 | IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) | ||
| 449 | return -1; | ||
| 450 | |||
| 451 | disable_irq(dev->irq); | ||
| 452 | irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); | ||
| 453 | enable_irq(dev->irq); | ||
| 454 | |||
| 455 | printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", | ||
| 456 | dev->name, dev->irq); | ||
| 457 | |||
| 458 | return 0; | ||
| 459 | } | ||
| 460 | |||
| 461 | /* This should be called in specific @cpu */ | ||
| 462 | static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | ||
| 463 | { | ||
| 464 | struct clock_event_device *evt = &hdev->evt; | ||
| 465 | uint64_t hpet_freq; | ||
| 466 | |||
| 467 | WARN_ON(cpu != smp_processor_id()); | ||
| 468 | if (!(hdev->flags & HPET_DEV_VALID)) | ||
| 469 | return; | ||
| 280 | 470 | ||
| 281 | return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; | 471 | if (hpet_setup_msi_irq(hdev->irq)) |
| 472 | return; | ||
| 473 | |||
| 474 | hdev->cpu = cpu; | ||
| 475 | per_cpu(cpu_hpet_dev, cpu) = hdev; | ||
| 476 | evt->name = hdev->name; | ||
| 477 | hpet_setup_irq(hdev); | ||
| 478 | evt->irq = hdev->irq; | ||
| 479 | |||
| 480 | evt->rating = 110; | ||
| 481 | evt->features = CLOCK_EVT_FEAT_ONESHOT; | ||
| 482 | if (hdev->flags & HPET_DEV_PERI_CAP) | ||
| 483 | evt->features |= CLOCK_EVT_FEAT_PERIODIC; | ||
| 484 | |||
| 485 | evt->set_mode = hpet_msi_set_mode; | ||
| 486 | evt->set_next_event = hpet_msi_next_event; | ||
| 487 | evt->shift = 32; | ||
| 488 | |||
| 489 | /* | ||
| 490 | * The period is a femto seconds value. We need to calculate the | ||
| 491 | * scaled math multiplication factor for nanosecond to hpet tick | ||
| 492 | * conversion. | ||
| 493 | */ | ||
| 494 | hpet_freq = 1000000000000000ULL; | ||
| 495 | do_div(hpet_freq, hpet_period); | ||
| 496 | evt->mult = div_sc((unsigned long) hpet_freq, | ||
| 497 | NSEC_PER_SEC, evt->shift); | ||
| 498 | /* Calculate the max delta */ | ||
| 499 | evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt); | ||
| 500 | /* 5 usec minimum reprogramming delta. */ | ||
| 501 | evt->min_delta_ns = 5000; | ||
| 502 | |||
| 503 | evt->cpumask = cpumask_of_cpu(hdev->cpu); | ||
| 504 | clockevents_register_device(evt); | ||
| 505 | } | ||
| 506 | |||
| 507 | #ifdef CONFIG_HPET | ||
| 508 | /* Reserve at least one timer for userspace (/dev/hpet) */ | ||
| 509 | #define RESERVE_TIMERS 1 | ||
| 510 | #else | ||
| 511 | #define RESERVE_TIMERS 0 | ||
| 512 | #endif | ||
| 513 | |||
| 514 | static void hpet_msi_capability_lookup(unsigned int start_timer) | ||
| 515 | { | ||
| 516 | unsigned int id; | ||
| 517 | unsigned int num_timers; | ||
| 518 | unsigned int num_timers_used = 0; | ||
| 519 | int i; | ||
| 520 | |||
| 521 | id = hpet_readl(HPET_ID); | ||
| 522 | |||
| 523 | num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); | ||
| 524 | num_timers++; /* Value read out starts from 0 */ | ||
| 525 | |||
| 526 | hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); | ||
| 527 | if (!hpet_devs) | ||
| 528 | return; | ||
| 529 | |||
| 530 | hpet_num_timers = num_timers; | ||
| 531 | |||
| 532 | for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { | ||
| 533 | struct hpet_dev *hdev = &hpet_devs[num_timers_used]; | ||
| 534 | unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); | ||
| 535 | |||
| 536 | /* Only consider HPET timer with MSI support */ | ||
| 537 | if (!(cfg & HPET_TN_FSB_CAP)) | ||
| 538 | continue; | ||
| 539 | |||
| 540 | hdev->flags = 0; | ||
| 541 | if (cfg & HPET_TN_PERIODIC_CAP) | ||
| 542 | hdev->flags |= HPET_DEV_PERI_CAP; | ||
| 543 | hdev->num = i; | ||
| 544 | |||
| 545 | sprintf(hdev->name, "hpet%d", i); | ||
| 546 | if (hpet_assign_irq(hdev)) | ||
| 547 | continue; | ||
| 548 | |||
| 549 | hdev->flags |= HPET_DEV_FSB_CAP; | ||
| 550 | hdev->flags |= HPET_DEV_VALID; | ||
| 551 | num_timers_used++; | ||
| 552 | if (num_timers_used == num_possible_cpus()) | ||
| 553 | break; | ||
| 554 | } | ||
| 555 | |||
| 556 | printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n", | ||
| 557 | num_timers, num_timers_used); | ||
| 558 | } | ||
| 559 | |||
| 560 | #ifdef CONFIG_HPET | ||
| 561 | static void hpet_reserve_msi_timers(struct hpet_data *hd) | ||
| 562 | { | ||
| 563 | int i; | ||
| 564 | |||
| 565 | if (!hpet_devs) | ||
| 566 | return; | ||
| 567 | |||
| 568 | for (i = 0; i < hpet_num_timers; i++) { | ||
| 569 | struct hpet_dev *hdev = &hpet_devs[i]; | ||
| 570 | |||
| 571 | if (!(hdev->flags & HPET_DEV_VALID)) | ||
| 572 | continue; | ||
| 573 | |||
| 574 | hd->hd_irq[hdev->num] = hdev->irq; | ||
| 575 | hpet_reserve_timer(hd, hdev->num); | ||
| 576 | } | ||
| 577 | } | ||
| 578 | #endif | ||
| 579 | |||
| 580 | static struct hpet_dev *hpet_get_unused_timer(void) | ||
| 581 | { | ||
| 582 | int i; | ||
| 583 | |||
| 584 | if (!hpet_devs) | ||
| 585 | return NULL; | ||
| 586 | |||
| 587 | for (i = 0; i < hpet_num_timers; i++) { | ||
| 588 | struct hpet_dev *hdev = &hpet_devs[i]; | ||
| 589 | |||
| 590 | if (!(hdev->flags & HPET_DEV_VALID)) | ||
| 591 | continue; | ||
| 592 | if (test_and_set_bit(HPET_DEV_USED_BIT, | ||
| 593 | (unsigned long *)&hdev->flags)) | ||
| 594 | continue; | ||
| 595 | return hdev; | ||
| 596 | } | ||
| 597 | return NULL; | ||
| 598 | } | ||
| 599 | |||
| 600 | struct hpet_work_struct { | ||
| 601 | struct delayed_work work; | ||
| 602 | struct completion complete; | ||
| 603 | }; | ||
| 604 | |||
| 605 | static void hpet_work(struct work_struct *w) | ||
| 606 | { | ||
| 607 | struct hpet_dev *hdev; | ||
| 608 | int cpu = smp_processor_id(); | ||
| 609 | struct hpet_work_struct *hpet_work; | ||
| 610 | |||
| 611 | hpet_work = container_of(w, struct hpet_work_struct, work.work); | ||
| 612 | |||
| 613 | hdev = hpet_get_unused_timer(); | ||
| 614 | if (hdev) | ||
| 615 | init_one_hpet_msi_clockevent(hdev, cpu); | ||
| 616 | |||
| 617 | complete(&hpet_work->complete); | ||
| 618 | } | ||
| 619 | |||
| 620 | static int hpet_cpuhp_notify(struct notifier_block *n, | ||
| 621 | unsigned long action, void *hcpu) | ||
| 622 | { | ||
| 623 | unsigned long cpu = (unsigned long)hcpu; | ||
| 624 | struct hpet_work_struct work; | ||
| 625 | struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); | ||
| 626 | |||
| 627 | switch (action & 0xf) { | ||
| 628 | case CPU_ONLINE: | ||
| 629 | INIT_DELAYED_WORK(&work.work, hpet_work); | ||
| 630 | init_completion(&work.complete); | ||
| 631 | /* FIXME: add schedule_work_on() */ | ||
| 632 | schedule_delayed_work_on(cpu, &work.work, 0); | ||
| 633 | wait_for_completion(&work.complete); | ||
| 634 | break; | ||
| 635 | case CPU_DEAD: | ||
| 636 | if (hdev) { | ||
| 637 | free_irq(hdev->irq, hdev); | ||
| 638 | hdev->flags &= ~HPET_DEV_USED; | ||
| 639 | per_cpu(cpu_hpet_dev, cpu) = NULL; | ||
| 640 | } | ||
| 641 | break; | ||
| 642 | } | ||
| 643 | return NOTIFY_OK; | ||
| 644 | } | ||
| 645 | #else | ||
| 646 | |||
| 647 | static int hpet_setup_msi_irq(unsigned int irq) | ||
| 648 | { | ||
| 649 | return 0; | ||
| 650 | } | ||
| 651 | static void hpet_msi_capability_lookup(unsigned int start_timer) | ||
| 652 | { | ||
| 653 | return; | ||
| 654 | } | ||
| 655 | |||
| 656 | #ifdef CONFIG_HPET | ||
| 657 | static void hpet_reserve_msi_timers(struct hpet_data *hd) | ||
| 658 | { | ||
| 659 | return; | ||
| 282 | } | 660 | } |
| 661 | #endif | ||
| 662 | |||
| 663 | static int hpet_cpuhp_notify(struct notifier_block *n, | ||
| 664 | unsigned long action, void *hcpu) | ||
| 665 | { | ||
| 666 | return NOTIFY_OK; | ||
| 667 | } | ||
| 668 | |||
| 669 | #endif | ||
| 283 | 670 | ||
| 284 | /* | 671 | /* |
| 285 | * Clock source related code | 672 | * Clock source related code |
| @@ -359,6 +746,7 @@ static int hpet_clocksource_register(void) | |||
| 359 | int __init hpet_enable(void) | 746 | int __init hpet_enable(void) |
| 360 | { | 747 | { |
| 361 | unsigned long id; | 748 | unsigned long id; |
| 749 | int i; | ||
| 362 | 750 | ||
| 363 | if (!is_hpet_capable()) | 751 | if (!is_hpet_capable()) |
| 364 | return 0; | 752 | return 0; |
| @@ -369,6 +757,29 @@ int __init hpet_enable(void) | |||
| 369 | * Read the period and check for a sane value: | 757 | * Read the period and check for a sane value: |
| 370 | */ | 758 | */ |
| 371 | hpet_period = hpet_readl(HPET_PERIOD); | 759 | hpet_period = hpet_readl(HPET_PERIOD); |
| 760 | |||
| 761 | /* | ||
| 762 | * AMD SB700 based systems with spread spectrum enabled use a | ||
| 763 | * SMM based HPET emulation to provide proper frequency | ||
| 764 | * setting. The SMM code is initialized with the first HPET | ||
| 765 | * register access and takes some time to complete. During | ||
| 766 | * this time the config register reads 0xffffffff. We check | ||
| 767 | * for max. 1000 loops whether the config register reads a non | ||
| 768 | * 0xffffffff value to make sure that HPET is up and running | ||
| 769 | * before we go further. A counting loop is safe, as the HPET | ||
| 770 | * access takes thousands of CPU cycles. On non SB700 based | ||
| 771 | * machines this check is only done once and has no side | ||
| 772 | * effects. | ||
| 773 | */ | ||
| 774 | for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { | ||
| 775 | if (i == 1000) { | ||
| 776 | printk(KERN_WARNING | ||
| 777 | "HPET config register value = 0xFFFFFFFF. " | ||
| 778 | "Disabling HPET\n"); | ||
| 779 | goto out_nohpet; | ||
| 780 | } | ||
| 781 | } | ||
| 782 | |||
| 372 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) | 783 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) |
| 373 | goto out_nohpet; | 784 | goto out_nohpet; |
| 374 | 785 | ||
| @@ -392,8 +803,10 @@ int __init hpet_enable(void) | |||
| 392 | 803 | ||
| 393 | if (id & HPET_ID_LEGSUP) { | 804 | if (id & HPET_ID_LEGSUP) { |
| 394 | hpet_legacy_clockevent_register(); | 805 | hpet_legacy_clockevent_register(); |
| 806 | hpet_msi_capability_lookup(2); | ||
| 395 | return 1; | 807 | return 1; |
| 396 | } | 808 | } |
| 809 | hpet_msi_capability_lookup(0); | ||
| 397 | return 0; | 810 | return 0; |
| 398 | 811 | ||
| 399 | out_nohpet: | 812 | out_nohpet: |
| @@ -410,6 +823,8 @@ out_nohpet: | |||
| 410 | */ | 823 | */ |
| 411 | static __init int hpet_late_init(void) | 824 | static __init int hpet_late_init(void) |
| 412 | { | 825 | { |
| 826 | int cpu; | ||
| 827 | |||
| 413 | if (boot_hpet_disable) | 828 | if (boot_hpet_disable) |
| 414 | return -ENODEV; | 829 | return -ENODEV; |
| 415 | 830 | ||
| @@ -425,6 +840,13 @@ static __init int hpet_late_init(void) | |||
| 425 | 840 | ||
| 426 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 841 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
| 427 | 842 | ||
| 843 | for_each_online_cpu(cpu) { | ||
| 844 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); | ||
| 845 | } | ||
| 846 | |||
| 847 | /* This notifier should be called after workqueue is ready */ | ||
| 848 | hotcpu_notifier(hpet_cpuhp_notify, -20); | ||
| 849 | |||
| 428 | return 0; | 850 | return 0; |
| 429 | } | 851 | } |
| 430 | fs_initcall(hpet_late_init); | 852 | fs_initcall(hpet_late_init); |
| @@ -468,7 +890,7 @@ void hpet_disable(void) | |||
| 468 | #define RTC_NUM_INTS 1 | 890 | #define RTC_NUM_INTS 1 |
| 469 | 891 | ||
| 470 | static unsigned long hpet_rtc_flags; | 892 | static unsigned long hpet_rtc_flags; |
| 471 | static unsigned long hpet_prev_update_sec; | 893 | static int hpet_prev_update_sec; |
| 472 | static struct rtc_time hpet_alarm_time; | 894 | static struct rtc_time hpet_alarm_time; |
| 473 | static unsigned long hpet_pie_count; | 895 | static unsigned long hpet_pie_count; |
| 474 | static unsigned long hpet_t1_cmp; | 896 | static unsigned long hpet_t1_cmp; |
| @@ -575,6 +997,9 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask) | |||
| 575 | 997 | ||
| 576 | hpet_rtc_flags |= bit_mask; | 998 | hpet_rtc_flags |= bit_mask; |
| 577 | 999 | ||
| 1000 | if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE)) | ||
| 1001 | hpet_prev_update_sec = -1; | ||
| 1002 | |||
| 578 | if (!oldbits) | 1003 | if (!oldbits) |
| 579 | hpet_rtc_timer_init(); | 1004 | hpet_rtc_timer_init(); |
| 580 | 1005 | ||
| @@ -652,7 +1077,7 @@ static void hpet_rtc_timer_reinit(void) | |||
| 652 | if (hpet_rtc_flags & RTC_PIE) | 1077 | if (hpet_rtc_flags & RTC_PIE) |
| 653 | hpet_pie_count += lost_ints; | 1078 | hpet_pie_count += lost_ints; |
| 654 | if (printk_ratelimit()) | 1079 | if (printk_ratelimit()) |
| 655 | printk(KERN_WARNING "rtc: lost %d interrupts\n", | 1080 | printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", |
| 656 | lost_ints); | 1081 | lost_ints); |
| 657 | } | 1082 | } |
| 658 | } | 1083 | } |
| @@ -670,7 +1095,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
| 670 | 1095 | ||
| 671 | if (hpet_rtc_flags & RTC_UIE && | 1096 | if (hpet_rtc_flags & RTC_UIE && |
| 672 | curr_time.tm_sec != hpet_prev_update_sec) { | 1097 | curr_time.tm_sec != hpet_prev_update_sec) { |
| 673 | rtc_int_flag = RTC_UF; | 1098 | if (hpet_prev_update_sec >= 0) |
| 1099 | rtc_int_flag = RTC_UF; | ||
| 674 | hpet_prev_update_sec = curr_time.tm_sec; | 1100 | hpet_prev_update_sec = curr_time.tm_sec; |
| 675 | } | 1101 | } |
| 676 | 1102 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index eb9ddd8efb82..1f20608d4ca8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -21,9 +21,12 @@ | |||
| 21 | # include <asm/sigcontext32.h> | 21 | # include <asm/sigcontext32.h> |
| 22 | # include <asm/user32.h> | 22 | # include <asm/user32.h> |
| 23 | #else | 23 | #else |
| 24 | # define save_i387_ia32 save_i387 | 24 | # define save_i387_xstate_ia32 save_i387_xstate |
| 25 | # define restore_i387_ia32 restore_i387 | 25 | # define restore_i387_xstate_ia32 restore_i387_xstate |
| 26 | # define _fpstate_ia32 _fpstate | 26 | # define _fpstate_ia32 _fpstate |
| 27 | # define _xstate_ia32 _xstate | ||
| 28 | # define sig_xstate_ia32_size sig_xstate_size | ||
| 29 | # define fx_sw_reserved_ia32 fx_sw_reserved | ||
| 27 | # define user_i387_ia32_struct user_i387_struct | 30 | # define user_i387_ia32_struct user_i387_struct |
| 28 | # define user32_fxsr_struct user_fxsr_struct | 31 | # define user32_fxsr_struct user_fxsr_struct |
| 29 | #endif | 32 | #endif |
| @@ -36,6 +39,7 @@ | |||
| 36 | 39 | ||
| 37 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | 40 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
| 38 | unsigned int xstate_size; | 41 | unsigned int xstate_size; |
| 42 | unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); | ||
| 39 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; | 43 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; |
| 40 | 44 | ||
| 41 | void __cpuinit mxcsr_feature_mask_init(void) | 45 | void __cpuinit mxcsr_feature_mask_init(void) |
| @@ -61,6 +65,11 @@ void __init init_thread_xstate(void) | |||
| 61 | return; | 65 | return; |
| 62 | } | 66 | } |
| 63 | 67 | ||
| 68 | if (cpu_has_xsave) { | ||
| 69 | xsave_cntxt_init(); | ||
| 70 | return; | ||
| 71 | } | ||
| 72 | |||
| 64 | if (cpu_has_fxsr) | 73 | if (cpu_has_fxsr) |
| 65 | xstate_size = sizeof(struct i387_fxsave_struct); | 74 | xstate_size = sizeof(struct i387_fxsave_struct); |
| 66 | #ifdef CONFIG_X86_32 | 75 | #ifdef CONFIG_X86_32 |
| @@ -83,9 +92,19 @@ void __cpuinit fpu_init(void) | |||
| 83 | 92 | ||
| 84 | write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ | 93 | write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ |
| 85 | 94 | ||
| 95 | /* | ||
| 96 | * Boot processor to setup the FP and extended state context info. | ||
| 97 | */ | ||
| 98 | if (!smp_processor_id()) | ||
| 99 | init_thread_xstate(); | ||
| 100 | xsave_init(); | ||
| 101 | |||
| 86 | mxcsr_feature_mask_init(); | 102 | mxcsr_feature_mask_init(); |
| 87 | /* clean state in init */ | 103 | /* clean state in init */ |
| 88 | current_thread_info()->status = 0; | 104 | if (cpu_has_xsave) |
| 105 | current_thread_info()->status = TS_XSAVE; | ||
| 106 | else | ||
| 107 | current_thread_info()->status = 0; | ||
| 89 | clear_used_math(); | 108 | clear_used_math(); |
| 90 | } | 109 | } |
| 91 | #endif /* CONFIG_X86_64 */ | 110 | #endif /* CONFIG_X86_64 */ |
| @@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 195 | */ | 214 | */ |
| 196 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 215 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; |
| 197 | 216 | ||
| 217 | /* | ||
| 218 | * update the header bits in the xsave header, indicating the | ||
| 219 | * presence of FP and SSE state. | ||
| 220 | */ | ||
| 221 | if (cpu_has_xsave) | ||
| 222 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | ||
| 223 | |||
| 198 | return ret; | 224 | return ret; |
| 199 | } | 225 | } |
| 200 | 226 | ||
| @@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 395 | if (!ret) | 421 | if (!ret) |
| 396 | convert_to_fxsr(target, &env); | 422 | convert_to_fxsr(target, &env); |
| 397 | 423 | ||
| 424 | /* | ||
| 425 | * update the header bit in the xsave header, indicating the | ||
| 426 | * presence of FP. | ||
| 427 | */ | ||
| 428 | if (cpu_has_xsave) | ||
| 429 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; | ||
| 398 | return ret; | 430 | return ret; |
| 399 | } | 431 | } |
| 400 | 432 | ||
| @@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
| 407 | struct task_struct *tsk = current; | 439 | struct task_struct *tsk = current; |
| 408 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 440 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; |
| 409 | 441 | ||
| 410 | unlazy_fpu(tsk); | ||
| 411 | fp->status = fp->swd; | 442 | fp->status = fp->swd; |
| 412 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | 443 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) |
| 413 | return -1; | 444 | return -1; |
| @@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | |||
| 421 | struct user_i387_ia32_struct env; | 452 | struct user_i387_ia32_struct env; |
| 422 | int err = 0; | 453 | int err = 0; |
| 423 | 454 | ||
| 424 | unlazy_fpu(tsk); | ||
| 425 | |||
| 426 | convert_from_fxsr(&env, tsk); | 455 | convert_from_fxsr(&env, tsk); |
| 427 | if (__copy_to_user(buf, &env, sizeof(env))) | 456 | if (__copy_to_user(buf, &env, sizeof(env))) |
| 428 | return -1; | 457 | return -1; |
| @@ -432,16 +461,54 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | |||
| 432 | if (err) | 461 | if (err) |
| 433 | return -1; | 462 | return -1; |
| 434 | 463 | ||
| 435 | if (__copy_to_user(&buf->_fxsr_env[0], fx, | 464 | if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) |
| 436 | sizeof(struct i387_fxsave_struct))) | 465 | return -1; |
| 466 | return 1; | ||
| 467 | } | ||
| 468 | |||
| 469 | static int save_i387_xsave(void __user *buf) | ||
| 470 | { | ||
| 471 | struct task_struct *tsk = current; | ||
| 472 | struct _fpstate_ia32 __user *fx = buf; | ||
| 473 | int err = 0; | ||
| 474 | |||
| 475 | /* | ||
| 476 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
| 477 | * vector while saving the state to the user context. | ||
| 478 | * This will enable us capturing any changes(during sigreturn) to | ||
| 479 | * the FP/SSE bits by the legacy applications which don't touch | ||
| 480 | * xstate_bv in the xsave header. | ||
| 481 | * | ||
| 482 | * xsave aware applications can change the xstate_bv in the xsave | ||
| 483 | * header as well as change any contents in the memory layout. | ||
| 484 | * xrestore as part of sigreturn will capture all the changes. | ||
| 485 | */ | ||
| 486 | tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | ||
| 487 | |||
| 488 | if (save_i387_fxsave(fx) < 0) | ||
| 489 | return -1; | ||
| 490 | |||
| 491 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, | ||
| 492 | sizeof(struct _fpx_sw_bytes)); | ||
| 493 | err |= __put_user(FP_XSTATE_MAGIC2, | ||
| 494 | (__u32 __user *) (buf + sig_xstate_ia32_size | ||
| 495 | - FP_XSTATE_MAGIC2_SIZE)); | ||
| 496 | if (err) | ||
| 437 | return -1; | 497 | return -1; |
| 498 | |||
| 438 | return 1; | 499 | return 1; |
| 439 | } | 500 | } |
| 440 | 501 | ||
| 441 | int save_i387_ia32(struct _fpstate_ia32 __user *buf) | 502 | int save_i387_xstate_ia32(void __user *buf) |
| 442 | { | 503 | { |
| 504 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
| 505 | struct task_struct *tsk = current; | ||
| 506 | |||
| 443 | if (!used_math()) | 507 | if (!used_math()) |
| 444 | return 0; | 508 | return 0; |
| 509 | |||
| 510 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) | ||
| 511 | return -EACCES; | ||
| 445 | /* | 512 | /* |
| 446 | * This will cause a "finit" to be triggered by the next | 513 | * This will cause a "finit" to be triggered by the next |
| 447 | * attempted FPU operation by the 'current' process. | 514 | * attempted FPU operation by the 'current' process. |
| @@ -451,13 +518,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf) | |||
| 451 | if (!HAVE_HWFP) { | 518 | if (!HAVE_HWFP) { |
| 452 | return fpregs_soft_get(current, NULL, | 519 | return fpregs_soft_get(current, NULL, |
| 453 | 0, sizeof(struct user_i387_ia32_struct), | 520 | 0, sizeof(struct user_i387_ia32_struct), |
| 454 | NULL, buf) ? -1 : 1; | 521 | NULL, fp) ? -1 : 1; |
| 455 | } | 522 | } |
| 456 | 523 | ||
| 524 | unlazy_fpu(tsk); | ||
| 525 | |||
| 526 | if (cpu_has_xsave) | ||
| 527 | return save_i387_xsave(fp); | ||
| 457 | if (cpu_has_fxsr) | 528 | if (cpu_has_fxsr) |
| 458 | return save_i387_fxsave(buf); | 529 | return save_i387_fxsave(fp); |
| 459 | else | 530 | else |
| 460 | return save_i387_fsave(buf); | 531 | return save_i387_fsave(fp); |
| 461 | } | 532 | } |
| 462 | 533 | ||
| 463 | static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | 534 | static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) |
| @@ -468,14 +539,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
| 468 | sizeof(struct i387_fsave_struct)); | 539 | sizeof(struct i387_fsave_struct)); |
| 469 | } | 540 | } |
| 470 | 541 | ||
| 471 | static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) | 542 | static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, |
| 543 | unsigned int size) | ||
| 472 | { | 544 | { |
| 473 | struct task_struct *tsk = current; | 545 | struct task_struct *tsk = current; |
| 474 | struct user_i387_ia32_struct env; | 546 | struct user_i387_ia32_struct env; |
| 475 | int err; | 547 | int err; |
| 476 | 548 | ||
| 477 | err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], | 549 | err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], |
| 478 | sizeof(struct i387_fxsave_struct)); | 550 | size); |
| 479 | /* mxcsr reserved bits must be masked to zero for security reasons */ | 551 | /* mxcsr reserved bits must be masked to zero for security reasons */ |
| 480 | tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 552 | tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; |
| 481 | if (err || __copy_from_user(&env, buf, sizeof(env))) | 553 | if (err || __copy_from_user(&env, buf, sizeof(env))) |
| @@ -485,14 +557,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) | |||
| 485 | return 0; | 557 | return 0; |
| 486 | } | 558 | } |
| 487 | 559 | ||
| 488 | int restore_i387_ia32(struct _fpstate_ia32 __user *buf) | 560 | static int restore_i387_xsave(void __user *buf) |
| 561 | { | ||
| 562 | struct _fpx_sw_bytes fx_sw_user; | ||
| 563 | struct _fpstate_ia32 __user *fx_user = | ||
| 564 | ((struct _fpstate_ia32 __user *) buf); | ||
| 565 | struct i387_fxsave_struct __user *fx = | ||
| 566 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | ||
| 567 | struct xsave_hdr_struct *xsave_hdr = | ||
| 568 | ¤t->thread.xstate->xsave.xsave_hdr; | ||
| 569 | u64 mask; | ||
| 570 | int err; | ||
| 571 | |||
| 572 | if (check_for_xstate(fx, buf, &fx_sw_user)) | ||
| 573 | goto fx_only; | ||
| 574 | |||
| 575 | mask = fx_sw_user.xstate_bv; | ||
| 576 | |||
| 577 | err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); | ||
| 578 | |||
| 579 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
| 580 | /* | ||
| 581 | * These bits must be zero. | ||
| 582 | */ | ||
| 583 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
| 584 | |||
| 585 | /* | ||
| 586 | * Init the state that is not present in the memory layout | ||
| 587 | * and enabled by the OS. | ||
| 588 | */ | ||
| 589 | mask = ~(pcntxt_mask & ~mask); | ||
| 590 | xsave_hdr->xstate_bv &= mask; | ||
| 591 | |||
| 592 | return err; | ||
| 593 | fx_only: | ||
| 594 | /* | ||
| 595 | * Couldn't find the extended state information in the memory | ||
| 596 | * layout. Restore the FP/SSE and init the other extended state | ||
| 597 | * enabled by the OS. | ||
| 598 | */ | ||
| 599 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
| 600 | return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); | ||
| 601 | } | ||
| 602 | |||
| 603 | int restore_i387_xstate_ia32(void __user *buf) | ||
| 489 | { | 604 | { |
| 490 | int err; | 605 | int err; |
| 491 | struct task_struct *tsk = current; | 606 | struct task_struct *tsk = current; |
| 607 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
| 492 | 608 | ||
| 493 | if (HAVE_HWFP) | 609 | if (HAVE_HWFP) |
| 494 | clear_fpu(tsk); | 610 | clear_fpu(tsk); |
| 495 | 611 | ||
| 612 | if (!buf) { | ||
| 613 | if (used_math()) { | ||
| 614 | clear_fpu(tsk); | ||
| 615 | clear_used_math(); | ||
| 616 | } | ||
| 617 | |||
| 618 | return 0; | ||
| 619 | } else | ||
| 620 | if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) | ||
| 621 | return -EACCES; | ||
| 622 | |||
| 496 | if (!used_math()) { | 623 | if (!used_math()) { |
| 497 | err = init_fpu(tsk); | 624 | err = init_fpu(tsk); |
| 498 | if (err) | 625 | if (err) |
| @@ -500,14 +627,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) | |||
| 500 | } | 627 | } |
| 501 | 628 | ||
| 502 | if (HAVE_HWFP) { | 629 | if (HAVE_HWFP) { |
| 503 | if (cpu_has_fxsr) | 630 | if (cpu_has_xsave) |
| 504 | err = restore_i387_fxsave(buf); | 631 | err = restore_i387_xsave(buf); |
| 632 | else if (cpu_has_fxsr) | ||
| 633 | err = restore_i387_fxsave(fp, sizeof(struct | ||
| 634 | i387_fxsave_struct)); | ||
| 505 | else | 635 | else |
| 506 | err = restore_i387_fsave(buf); | 636 | err = restore_i387_fsave(fp); |
| 507 | } else { | 637 | } else { |
| 508 | err = fpregs_soft_set(current, NULL, | 638 | err = fpregs_soft_set(current, NULL, |
| 509 | 0, sizeof(struct user_i387_ia32_struct), | 639 | 0, sizeof(struct user_i387_ia32_struct), |
| 510 | NULL, buf) != 0; | 640 | NULL, fp) != 0; |
| 511 | } | 641 | } |
| 512 | set_used_math(); | 642 | set_used_math(); |
| 513 | 643 | ||
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index dc92b49d9204..4b8a53d841f7 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
| @@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void) | |||
| 282 | 282 | ||
| 283 | device_initcall(i8259A_init_sysfs); | 283 | device_initcall(i8259A_init_sysfs); |
| 284 | 284 | ||
| 285 | void mask_8259A(void) | ||
| 286 | { | ||
| 287 | unsigned long flags; | ||
| 288 | |||
| 289 | spin_lock_irqsave(&i8259A_lock, flags); | ||
| 290 | |||
| 291 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | ||
| 292 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | ||
| 293 | |||
| 294 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
| 295 | } | ||
| 296 | |||
| 297 | void unmask_8259A(void) | ||
| 298 | { | ||
| 299 | unsigned long flags; | ||
| 300 | |||
| 301 | spin_lock_irqsave(&i8259A_lock, flags); | ||
| 302 | |||
| 303 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | ||
| 304 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | ||
| 305 | |||
| 306 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
| 307 | } | ||
| 308 | |||
| 285 | void init_8259A(int auto_eoi) | 309 | void init_8259A(int auto_eoi) |
| 286 | { | 310 | { |
| 287 | unsigned long flags; | 311 | unsigned long flags; |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic.c index 6510cde36b35..b764d7429c61 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic.c | |||
| @@ -27,16 +27,21 @@ | |||
| 27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
| 28 | #include <linux/pci.h> | 28 | #include <linux/pci.h> |
| 29 | #include <linux/mc146818rtc.h> | 29 | #include <linux/mc146818rtc.h> |
| 30 | #include <linux/compiler.h> | ||
| 30 | #include <linux/acpi.h> | 31 | #include <linux/acpi.h> |
| 32 | #include <linux/module.h> | ||
| 31 | #include <linux/sysdev.h> | 33 | #include <linux/sysdev.h> |
| 32 | #include <linux/msi.h> | 34 | #include <linux/msi.h> |
| 33 | #include <linux/htirq.h> | 35 | #include <linux/htirq.h> |
| 34 | #include <linux/dmar.h> | 36 | #include <linux/freezer.h> |
| 35 | #include <linux/jiffies.h> | 37 | #include <linux/kthread.h> |
| 38 | #include <linux/jiffies.h> /* time_after() */ | ||
| 36 | #ifdef CONFIG_ACPI | 39 | #ifdef CONFIG_ACPI |
| 37 | #include <acpi/acpi_bus.h> | 40 | #include <acpi/acpi_bus.h> |
| 38 | #endif | 41 | #endif |
| 39 | #include <linux/bootmem.h> | 42 | #include <linux/bootmem.h> |
| 43 | #include <linux/dmar.h> | ||
| 44 | #include <linux/hpet.h> | ||
| 40 | 45 | ||
| 41 | #include <asm/idle.h> | 46 | #include <asm/idle.h> |
| 42 | #include <asm/io.h> | 47 | #include <asm/io.h> |
| @@ -45,62 +50,31 @@ | |||
| 45 | #include <asm/proto.h> | 50 | #include <asm/proto.h> |
| 46 | #include <asm/acpi.h> | 51 | #include <asm/acpi.h> |
| 47 | #include <asm/dma.h> | 52 | #include <asm/dma.h> |
| 53 | #include <asm/timer.h> | ||
| 54 | #include <asm/i8259.h> | ||
| 48 | #include <asm/nmi.h> | 55 | #include <asm/nmi.h> |
| 49 | #include <asm/msidef.h> | 56 | #include <asm/msidef.h> |
| 50 | #include <asm/hypertransport.h> | 57 | #include <asm/hypertransport.h> |
| 58 | #include <asm/setup.h> | ||
| 59 | #include <asm/irq_remapping.h> | ||
| 60 | #include <asm/hpet.h> | ||
| 61 | #include <asm/uv/uv_hub.h> | ||
| 62 | #include <asm/uv/uv_irq.h> | ||
| 51 | 63 | ||
| 52 | #include <mach_ipi.h> | 64 | #include <mach_ipi.h> |
| 53 | #include <mach_apic.h> | 65 | #include <mach_apic.h> |
| 66 | #include <mach_apicdef.h> | ||
| 54 | 67 | ||
| 55 | struct irq_cfg { | 68 | #define __apicdebuginit(type) static type __init |
| 56 | cpumask_t domain; | ||
| 57 | cpumask_t old_domain; | ||
| 58 | unsigned move_cleanup_count; | ||
| 59 | u8 vector; | ||
| 60 | u8 move_in_progress : 1; | ||
| 61 | }; | ||
| 62 | |||
| 63 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | ||
| 64 | static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { | ||
| 65 | [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, | ||
| 66 | [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, | ||
| 67 | [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, | ||
| 68 | [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, | ||
| 69 | [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, | ||
| 70 | [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, | ||
| 71 | [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, | ||
| 72 | [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, | ||
| 73 | [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, | ||
| 74 | [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, | ||
| 75 | [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, | ||
| 76 | [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, | ||
| 77 | [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, | ||
| 78 | [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, | ||
| 79 | [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, | ||
| 80 | [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, | ||
| 81 | }; | ||
| 82 | |||
| 83 | static int assign_irq_vector(int irq, cpumask_t mask); | ||
| 84 | 69 | ||
| 85 | int first_system_vector = 0xfe; | 70 | /* |
| 86 | 71 | * Is the SiS APIC rmw bug present ? | |
| 87 | char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | 72 | * -1 = don't know, 0 = no, 1 = yes |
| 88 | 73 | */ | |
| 89 | #define __apicdebuginit __init | 74 | int sis_apic_bug = -1; |
| 90 | |||
| 91 | int sis_apic_bug; /* not actually supported, dummy for compile */ | ||
| 92 | |||
| 93 | static int no_timer_check; | ||
| 94 | |||
| 95 | static int disable_timer_pin_1 __initdata; | ||
| 96 | |||
| 97 | int timer_through_8259 __initdata; | ||
| 98 | |||
| 99 | /* Where if anywhere is the i8259 connect in external int mode */ | ||
| 100 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | ||
| 101 | 75 | ||
| 102 | static DEFINE_SPINLOCK(ioapic_lock); | 76 | static DEFINE_SPINLOCK(ioapic_lock); |
| 103 | DEFINE_SPINLOCK(vector_lock); | 77 | static DEFINE_SPINLOCK(vector_lock); |
| 104 | 78 | ||
| 105 | /* | 79 | /* |
| 106 | * # of IRQ routing registers | 80 | * # of IRQ routing registers |
| @@ -117,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |||
| 117 | /* # of MP IRQ source entries */ | 91 | /* # of MP IRQ source entries */ |
| 118 | int mp_irq_entries; | 92 | int mp_irq_entries; |
| 119 | 93 | ||
| 94 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
| 95 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | ||
| 96 | #endif | ||
| 97 | |||
| 120 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | 98 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); |
| 121 | 99 | ||
| 100 | int skip_ioapic_setup; | ||
| 101 | |||
| 102 | static int __init parse_noapic(char *str) | ||
| 103 | { | ||
| 104 | /* disable IO-APIC */ | ||
| 105 | disable_ioapic_setup(); | ||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | early_param("noapic", parse_noapic); | ||
| 109 | |||
| 110 | struct irq_pin_list; | ||
| 111 | struct irq_cfg { | ||
| 112 | unsigned int irq; | ||
| 113 | struct irq_pin_list *irq_2_pin; | ||
| 114 | cpumask_t domain; | ||
| 115 | cpumask_t old_domain; | ||
| 116 | unsigned move_cleanup_count; | ||
| 117 | u8 vector; | ||
| 118 | u8 move_in_progress : 1; | ||
| 119 | }; | ||
| 120 | |||
| 121 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | ||
| 122 | static struct irq_cfg irq_cfgx[NR_IRQS] = { | ||
| 123 | [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, | ||
| 124 | [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, | ||
| 125 | [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, | ||
| 126 | [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, | ||
| 127 | [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, | ||
| 128 | [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, | ||
| 129 | [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, | ||
| 130 | [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, | ||
| 131 | [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, | ||
| 132 | [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, | ||
| 133 | [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, | ||
| 134 | [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, | ||
| 135 | [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, | ||
| 136 | [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, | ||
| 137 | [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, | ||
| 138 | [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, | ||
| 139 | }; | ||
| 140 | |||
| 141 | #define for_each_irq_cfg(irq, cfg) \ | ||
| 142 | for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) | ||
| 143 | |||
| 144 | static struct irq_cfg *irq_cfg(unsigned int irq) | ||
| 145 | { | ||
| 146 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | ||
| 147 | } | ||
| 148 | |||
| 149 | static struct irq_cfg *irq_cfg_alloc(unsigned int irq) | ||
| 150 | { | ||
| 151 | return irq_cfg(irq); | ||
| 152 | } | ||
| 153 | |||
| 122 | /* | 154 | /* |
| 123 | * Rough estimation of how many shared IRQs there are, can | 155 | * Rough estimation of how many shared IRQs there are, can be changed |
| 124 | * be changed anytime. | 156 | * anytime. |
| 125 | */ | 157 | */ |
| 126 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | 158 | #define MAX_PLUS_SHARED_IRQS NR_IRQS |
| 127 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | 159 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) |
| @@ -133,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |||
| 133 | * between pins and IRQs. | 165 | * between pins and IRQs. |
| 134 | */ | 166 | */ |
| 135 | 167 | ||
| 136 | static struct irq_pin_list { | 168 | struct irq_pin_list { |
| 137 | short apic, pin, next; | 169 | int apic, pin; |
| 138 | } irq_2_pin[PIN_MAP_SIZE]; | 170 | struct irq_pin_list *next; |
| 171 | }; | ||
| 172 | |||
| 173 | static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; | ||
| 174 | static struct irq_pin_list *irq_2_pin_ptr; | ||
| 175 | |||
| 176 | static void __init irq_2_pin_init(void) | ||
| 177 | { | ||
| 178 | struct irq_pin_list *pin = irq_2_pin_head; | ||
| 179 | int i; | ||
| 180 | |||
| 181 | for (i = 1; i < PIN_MAP_SIZE; i++) | ||
| 182 | pin[i-1].next = &pin[i]; | ||
| 183 | |||
| 184 | irq_2_pin_ptr = &pin[0]; | ||
| 185 | } | ||
| 186 | |||
| 187 | static struct irq_pin_list *get_one_free_irq_2_pin(void) | ||
| 188 | { | ||
| 189 | struct irq_pin_list *pin = irq_2_pin_ptr; | ||
| 190 | |||
| 191 | if (!pin) | ||
| 192 | panic("can not get more irq_2_pin\n"); | ||
| 193 | |||
| 194 | irq_2_pin_ptr = pin->next; | ||
| 195 | pin->next = NULL; | ||
| 196 | return pin; | ||
| 197 | } | ||
| 139 | 198 | ||
| 140 | struct io_apic { | 199 | struct io_apic { |
| 141 | unsigned int index; | 200 | unsigned int index; |
| @@ -166,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i | |||
| 166 | /* | 225 | /* |
| 167 | * Re-write a value: to be used for read-modify-write | 226 | * Re-write a value: to be used for read-modify-write |
| 168 | * cycles where the read already set up the index register. | 227 | * cycles where the read already set up the index register. |
| 228 | * | ||
| 229 | * Older SiS APIC requires we rewrite the index register | ||
| 169 | */ | 230 | */ |
| 170 | static inline void io_apic_modify(unsigned int apic, unsigned int value) | 231 | static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) |
| 171 | { | 232 | { |
| 172 | struct io_apic __iomem *io_apic = io_apic_base(apic); | 233 | struct io_apic __iomem *io_apic = io_apic_base(apic); |
| 234 | |||
| 235 | if (sis_apic_bug) | ||
| 236 | writel(reg, &io_apic->index); | ||
| 173 | writel(value, &io_apic->data); | 237 | writel(value, &io_apic->data); |
| 174 | } | 238 | } |
| 175 | 239 | ||
| @@ -177,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq) | |||
| 177 | { | 241 | { |
| 178 | struct irq_pin_list *entry; | 242 | struct irq_pin_list *entry; |
| 179 | unsigned long flags; | 243 | unsigned long flags; |
| 244 | struct irq_cfg *cfg = irq_cfg(irq); | ||
| 180 | 245 | ||
| 181 | spin_lock_irqsave(&ioapic_lock, flags); | 246 | spin_lock_irqsave(&ioapic_lock, flags); |
| 182 | entry = irq_2_pin + irq; | 247 | entry = cfg->irq_2_pin; |
| 183 | for (;;) { | 248 | for (;;) { |
| 184 | unsigned int reg; | 249 | unsigned int reg; |
| 185 | int pin; | 250 | int pin; |
| 186 | 251 | ||
| 187 | pin = entry->pin; | 252 | if (!entry) |
| 188 | if (pin == -1) | ||
| 189 | break; | 253 | break; |
| 254 | pin = entry->pin; | ||
| 190 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | 255 | reg = io_apic_read(entry->apic, 0x10 + pin*2); |
| 191 | /* Is the remote IRR bit set? */ | 256 | /* Is the remote IRR bit set? */ |
| 192 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { | 257 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { |
| @@ -195,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq) | |||
| 195 | } | 260 | } |
| 196 | if (!entry->next) | 261 | if (!entry->next) |
| 197 | break; | 262 | break; |
| 198 | entry = irq_2_pin + entry->next; | 263 | entry = entry->next; |
| 199 | } | 264 | } |
| 200 | spin_unlock_irqrestore(&ioapic_lock, flags); | 265 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 201 | 266 | ||
| 202 | return false; | 267 | return false; |
| 203 | } | 268 | } |
| 204 | 269 | ||
| 205 | /* | ||
| 206 | * Synchronize the IO-APIC and the CPU by doing | ||
| 207 | * a dummy read from the IO-APIC | ||
| 208 | */ | ||
| 209 | static inline void io_apic_sync(unsigned int apic) | ||
| 210 | { | ||
| 211 | struct io_apic __iomem *io_apic = io_apic_base(apic); | ||
| 212 | readl(&io_apic->data); | ||
| 213 | } | ||
| 214 | |||
| 215 | #define __DO_ACTION(R, ACTION, FINAL) \ | ||
| 216 | \ | ||
| 217 | { \ | ||
| 218 | int pin; \ | ||
| 219 | struct irq_pin_list *entry = irq_2_pin + irq; \ | ||
| 220 | \ | ||
| 221 | BUG_ON(irq >= NR_IRQS); \ | ||
| 222 | for (;;) { \ | ||
| 223 | unsigned int reg; \ | ||
| 224 | pin = entry->pin; \ | ||
| 225 | if (pin == -1) \ | ||
| 226 | break; \ | ||
| 227 | reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ | ||
| 228 | reg ACTION; \ | ||
| 229 | io_apic_modify(entry->apic, reg); \ | ||
| 230 | FINAL; \ | ||
| 231 | if (!entry->next) \ | ||
| 232 | break; \ | ||
| 233 | entry = irq_2_pin + entry->next; \ | ||
| 234 | } \ | ||
| 235 | } | ||
| 236 | |||
| 237 | union entry_union { | 270 | union entry_union { |
| 238 | struct { u32 w1, w2; }; | 271 | struct { u32 w1, w2; }; |
| 239 | struct IO_APIC_route_entry entry; | 272 | struct IO_APIC_route_entry entry; |
| @@ -293,54 +326,71 @@ static void ioapic_mask_entry(int apic, int pin) | |||
| 293 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | 326 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) |
| 294 | { | 327 | { |
| 295 | int apic, pin; | 328 | int apic, pin; |
| 296 | struct irq_pin_list *entry = irq_2_pin + irq; | 329 | struct irq_cfg *cfg; |
| 330 | struct irq_pin_list *entry; | ||
| 297 | 331 | ||
| 298 | BUG_ON(irq >= NR_IRQS); | 332 | cfg = irq_cfg(irq); |
| 333 | entry = cfg->irq_2_pin; | ||
| 299 | for (;;) { | 334 | for (;;) { |
| 300 | unsigned int reg; | 335 | unsigned int reg; |
| 336 | |||
| 337 | if (!entry) | ||
| 338 | break; | ||
| 339 | |||
| 301 | apic = entry->apic; | 340 | apic = entry->apic; |
| 302 | pin = entry->pin; | 341 | pin = entry->pin; |
| 303 | if (pin == -1) | 342 | #ifdef CONFIG_INTR_REMAP |
| 304 | break; | 343 | /* |
| 344 | * With interrupt-remapping, destination information comes | ||
| 345 | * from interrupt-remapping table entry. | ||
| 346 | */ | ||
| 347 | if (!irq_remapped(irq)) | ||
| 348 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
| 349 | #else | ||
| 305 | io_apic_write(apic, 0x11 + pin*2, dest); | 350 | io_apic_write(apic, 0x11 + pin*2, dest); |
| 351 | #endif | ||
| 306 | reg = io_apic_read(apic, 0x10 + pin*2); | 352 | reg = io_apic_read(apic, 0x10 + pin*2); |
| 307 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 353 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
| 308 | reg |= vector; | 354 | reg |= vector; |
| 309 | io_apic_modify(apic, reg); | 355 | io_apic_modify(apic, 0x10 + pin*2, reg); |
| 310 | if (!entry->next) | 356 | if (!entry->next) |
| 311 | break; | 357 | break; |
| 312 | entry = irq_2_pin + entry->next; | 358 | entry = entry->next; |
| 313 | } | 359 | } |
| 314 | } | 360 | } |
| 315 | 361 | ||
| 362 | static int assign_irq_vector(int irq, cpumask_t mask); | ||
| 363 | |||
| 316 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 364 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) |
| 317 | { | 365 | { |
| 318 | struct irq_cfg *cfg = irq_cfg + irq; | 366 | struct irq_cfg *cfg; |
| 319 | unsigned long flags; | 367 | unsigned long flags; |
| 320 | unsigned int dest; | 368 | unsigned int dest; |
| 321 | cpumask_t tmp; | 369 | cpumask_t tmp; |
| 370 | struct irq_desc *desc; | ||
| 322 | 371 | ||
| 323 | cpus_and(tmp, mask, cpu_online_map); | 372 | cpus_and(tmp, mask, cpu_online_map); |
| 324 | if (cpus_empty(tmp)) | 373 | if (cpus_empty(tmp)) |
| 325 | return; | 374 | return; |
| 326 | 375 | ||
| 376 | cfg = irq_cfg(irq); | ||
| 327 | if (assign_irq_vector(irq, mask)) | 377 | if (assign_irq_vector(irq, mask)) |
| 328 | return; | 378 | return; |
| 329 | 379 | ||
| 330 | cpus_and(tmp, cfg->domain, mask); | 380 | cpus_and(tmp, cfg->domain, mask); |
| 331 | dest = cpu_mask_to_apicid(tmp); | 381 | dest = cpu_mask_to_apicid(tmp); |
| 332 | |||
| 333 | /* | 382 | /* |
| 334 | * Only the high 8 bits are valid. | 383 | * Only the high 8 bits are valid. |
| 335 | */ | 384 | */ |
| 336 | dest = SET_APIC_LOGICAL_ID(dest); | 385 | dest = SET_APIC_LOGICAL_ID(dest); |
| 337 | 386 | ||
| 387 | desc = irq_to_desc(irq); | ||
| 338 | spin_lock_irqsave(&ioapic_lock, flags); | 388 | spin_lock_irqsave(&ioapic_lock, flags); |
| 339 | __target_IO_APIC_irq(irq, dest, cfg->vector); | 389 | __target_IO_APIC_irq(irq, dest, cfg->vector); |
| 340 | irq_desc[irq].affinity = mask; | 390 | desc->affinity = mask; |
| 341 | spin_unlock_irqrestore(&ioapic_lock, flags); | 391 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 342 | } | 392 | } |
| 343 | #endif | 393 | #endif /* CONFIG_SMP */ |
| 344 | 394 | ||
| 345 | /* | 395 | /* |
| 346 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | 396 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are |
| @@ -349,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
| 349 | */ | 399 | */ |
| 350 | static void add_pin_to_irq(unsigned int irq, int apic, int pin) | 400 | static void add_pin_to_irq(unsigned int irq, int apic, int pin) |
| 351 | { | 401 | { |
| 352 | static int first_free_entry = NR_IRQS; | 402 | struct irq_cfg *cfg; |
| 353 | struct irq_pin_list *entry = irq_2_pin + irq; | 403 | struct irq_pin_list *entry; |
| 404 | |||
| 405 | /* first time to refer irq_cfg, so with new */ | ||
| 406 | cfg = irq_cfg_alloc(irq); | ||
| 407 | entry = cfg->irq_2_pin; | ||
| 408 | if (!entry) { | ||
| 409 | entry = get_one_free_irq_2_pin(); | ||
| 410 | cfg->irq_2_pin = entry; | ||
| 411 | entry->apic = apic; | ||
| 412 | entry->pin = pin; | ||
| 413 | return; | ||
| 414 | } | ||
| 354 | 415 | ||
| 355 | BUG_ON(irq >= NR_IRQS); | 416 | while (entry->next) { |
| 356 | while (entry->next) | 417 | /* not again, please */ |
| 357 | entry = irq_2_pin + entry->next; | 418 | if (entry->apic == apic && entry->pin == pin) |
| 419 | return; | ||
| 358 | 420 | ||
| 359 | if (entry->pin != -1) { | 421 | entry = entry->next; |
| 360 | entry->next = first_free_entry; | ||
| 361 | entry = irq_2_pin + entry->next; | ||
| 362 | if (++first_free_entry >= PIN_MAP_SIZE) | ||
| 363 | panic("io_apic.c: ran out of irq_2_pin entries!"); | ||
| 364 | } | 422 | } |
| 423 | |||
| 424 | entry->next = get_one_free_irq_2_pin(); | ||
| 425 | entry = entry->next; | ||
| 365 | entry->apic = apic; | 426 | entry->apic = apic; |
| 366 | entry->pin = pin; | 427 | entry->pin = pin; |
| 367 | } | 428 | } |
| @@ -373,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq, | |||
| 373 | int oldapic, int oldpin, | 434 | int oldapic, int oldpin, |
| 374 | int newapic, int newpin) | 435 | int newapic, int newpin) |
| 375 | { | 436 | { |
| 376 | struct irq_pin_list *entry = irq_2_pin + irq; | 437 | struct irq_cfg *cfg = irq_cfg(irq); |
| 438 | struct irq_pin_list *entry = cfg->irq_2_pin; | ||
| 439 | int replaced = 0; | ||
| 377 | 440 | ||
| 378 | while (1) { | 441 | while (entry) { |
| 379 | if (entry->apic == oldapic && entry->pin == oldpin) { | 442 | if (entry->apic == oldapic && entry->pin == oldpin) { |
| 380 | entry->apic = newapic; | 443 | entry->apic = newapic; |
| 381 | entry->pin = newpin; | 444 | entry->pin = newpin; |
| 382 | } | 445 | replaced = 1; |
| 383 | if (!entry->next) | 446 | /* every one is different, right? */ |
| 384 | break; | 447 | break; |
| 385 | entry = irq_2_pin + entry->next; | 448 | } |
| 449 | entry = entry->next; | ||
| 386 | } | 450 | } |
| 451 | |||
| 452 | /* why? call replace before add? */ | ||
| 453 | if (!replaced) | ||
| 454 | add_pin_to_irq(irq, newapic, newpin); | ||
| 387 | } | 455 | } |
| 388 | 456 | ||
| 457 | static inline void io_apic_modify_irq(unsigned int irq, | ||
| 458 | int mask_and, int mask_or, | ||
| 459 | void (*final)(struct irq_pin_list *entry)) | ||
| 460 | { | ||
| 461 | int pin; | ||
| 462 | struct irq_cfg *cfg; | ||
| 463 | struct irq_pin_list *entry; | ||
| 389 | 464 | ||
| 390 | #define DO_ACTION(name,R,ACTION, FINAL) \ | 465 | cfg = irq_cfg(irq); |
| 391 | \ | 466 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { |
| 392 | static void name##_IO_APIC_irq (unsigned int irq) \ | 467 | unsigned int reg; |
| 393 | __DO_ACTION(R, ACTION, FINAL) | 468 | pin = entry->pin; |
| 469 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | ||
| 470 | reg &= mask_and; | ||
| 471 | reg |= mask_or; | ||
| 472 | io_apic_modify(entry->apic, 0x10 + pin * 2, reg); | ||
| 473 | if (final) | ||
| 474 | final(entry); | ||
| 475 | } | ||
| 476 | } | ||
| 394 | 477 | ||
| 395 | /* mask = 1 */ | 478 | static void __unmask_IO_APIC_irq(unsigned int irq) |
| 396 | DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) | 479 | { |
| 480 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); | ||
| 481 | } | ||
| 397 | 482 | ||
| 398 | /* mask = 0 */ | 483 | #ifdef CONFIG_X86_64 |
| 399 | DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) | 484 | void io_apic_sync(struct irq_pin_list *entry) |
| 485 | { | ||
| 486 | /* | ||
| 487 | * Synchronize the IO-APIC and the CPU by doing | ||
| 488 | * a dummy read from the IO-APIC | ||
| 489 | */ | ||
| 490 | struct io_apic __iomem *io_apic; | ||
| 491 | io_apic = io_apic_base(entry->apic); | ||
| 492 | readl(&io_apic->data); | ||
| 493 | } | ||
| 494 | |||
| 495 | static void __mask_IO_APIC_irq(unsigned int irq) | ||
| 496 | { | ||
| 497 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | ||
| 498 | } | ||
| 499 | #else /* CONFIG_X86_32 */ | ||
| 500 | static void __mask_IO_APIC_irq(unsigned int irq) | ||
| 501 | { | ||
| 502 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); | ||
| 503 | } | ||
| 504 | |||
| 505 | static void __mask_and_edge_IO_APIC_irq(unsigned int irq) | ||
| 506 | { | ||
| 507 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, | ||
| 508 | IO_APIC_REDIR_MASKED, NULL); | ||
| 509 | } | ||
| 510 | |||
| 511 | static void __unmask_and_level_IO_APIC_irq(unsigned int irq) | ||
| 512 | { | ||
| 513 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, | ||
| 514 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
| 515 | } | ||
| 516 | #endif /* CONFIG_X86_32 */ | ||
| 400 | 517 | ||
| 401 | static void mask_IO_APIC_irq (unsigned int irq) | 518 | static void mask_IO_APIC_irq (unsigned int irq) |
| 402 | { | 519 | { |
| @@ -439,24 +556,145 @@ static void clear_IO_APIC (void) | |||
| 439 | clear_IO_APIC_pin(apic, pin); | 556 | clear_IO_APIC_pin(apic, pin); |
| 440 | } | 557 | } |
| 441 | 558 | ||
| 442 | int skip_ioapic_setup; | 559 | #if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) |
| 443 | int ioapic_force; | 560 | void send_IPI_self(int vector) |
| 561 | { | ||
| 562 | unsigned int cfg; | ||
| 444 | 563 | ||
| 445 | static int __init parse_noapic(char *str) | 564 | /* |
| 565 | * Wait for idle. | ||
| 566 | */ | ||
| 567 | apic_wait_icr_idle(); | ||
| 568 | cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; | ||
| 569 | /* | ||
| 570 | * Send the IPI. The write to APIC_ICR fires this off. | ||
| 571 | */ | ||
| 572 | apic_write(APIC_ICR, cfg); | ||
| 573 | } | ||
| 574 | #endif /* !CONFIG_SMP && CONFIG_X86_32*/ | ||
| 575 | |||
| 576 | #ifdef CONFIG_X86_32 | ||
| 577 | /* | ||
| 578 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to | ||
| 579 | * specific CPU-side IRQs. | ||
| 580 | */ | ||
| 581 | |||
| 582 | #define MAX_PIRQS 8 | ||
| 583 | static int pirq_entries [MAX_PIRQS]; | ||
| 584 | static int pirqs_enabled; | ||
| 585 | |||
| 586 | static int __init ioapic_pirq_setup(char *str) | ||
| 446 | { | 587 | { |
| 447 | disable_ioapic_setup(); | 588 | int i, max; |
| 589 | int ints[MAX_PIRQS+1]; | ||
| 590 | |||
| 591 | get_options(str, ARRAY_SIZE(ints), ints); | ||
| 592 | |||
| 593 | for (i = 0; i < MAX_PIRQS; i++) | ||
| 594 | pirq_entries[i] = -1; | ||
| 595 | |||
| 596 | pirqs_enabled = 1; | ||
| 597 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 598 | "PIRQ redirection, working around broken MP-BIOS.\n"); | ||
| 599 | max = MAX_PIRQS; | ||
| 600 | if (ints[0] < MAX_PIRQS) | ||
| 601 | max = ints[0]; | ||
| 602 | |||
| 603 | for (i = 0; i < max; i++) { | ||
| 604 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
| 605 | "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); | ||
| 606 | /* | ||
| 607 | * PIRQs are mapped upside down, usually. | ||
| 608 | */ | ||
| 609 | pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; | ||
| 610 | } | ||
| 611 | return 1; | ||
| 612 | } | ||
| 613 | |||
| 614 | __setup("pirq=", ioapic_pirq_setup); | ||
| 615 | #endif /* CONFIG_X86_32 */ | ||
| 616 | |||
| 617 | #ifdef CONFIG_INTR_REMAP | ||
| 618 | /* I/O APIC RTE contents at the OS boot up */ | ||
| 619 | static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; | ||
| 620 | |||
| 621 | /* | ||
| 622 | * Saves and masks all the unmasked IO-APIC RTE's | ||
| 623 | */ | ||
| 624 | int save_mask_IO_APIC_setup(void) | ||
| 625 | { | ||
| 626 | union IO_APIC_reg_01 reg_01; | ||
| 627 | unsigned long flags; | ||
| 628 | int apic, pin; | ||
| 629 | |||
| 630 | /* | ||
| 631 | * The number of IO-APIC IRQ registers (== #pins): | ||
| 632 | */ | ||
| 633 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 634 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 635 | reg_01.raw = io_apic_read(apic, 1); | ||
| 636 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 637 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
| 638 | } | ||
| 639 | |||
| 640 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 641 | early_ioapic_entries[apic] = | ||
| 642 | kzalloc(sizeof(struct IO_APIC_route_entry) * | ||
| 643 | nr_ioapic_registers[apic], GFP_KERNEL); | ||
| 644 | if (!early_ioapic_entries[apic]) | ||
| 645 | goto nomem; | ||
| 646 | } | ||
| 647 | |||
| 648 | for (apic = 0; apic < nr_ioapics; apic++) | ||
| 649 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
| 650 | struct IO_APIC_route_entry entry; | ||
| 651 | |||
| 652 | entry = early_ioapic_entries[apic][pin] = | ||
| 653 | ioapic_read_entry(apic, pin); | ||
| 654 | if (!entry.mask) { | ||
| 655 | entry.mask = 1; | ||
| 656 | ioapic_write_entry(apic, pin, entry); | ||
| 657 | } | ||
| 658 | } | ||
| 659 | |||
| 448 | return 0; | 660 | return 0; |
| 661 | |||
| 662 | nomem: | ||
| 663 | while (apic >= 0) | ||
| 664 | kfree(early_ioapic_entries[apic--]); | ||
| 665 | memset(early_ioapic_entries, 0, | ||
| 666 | ARRAY_SIZE(early_ioapic_entries)); | ||
| 667 | |||
| 668 | return -ENOMEM; | ||
| 449 | } | 669 | } |
| 450 | early_param("noapic", parse_noapic); | ||
| 451 | 670 | ||
| 452 | /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ | 671 | void restore_IO_APIC_setup(void) |
| 453 | static int __init disable_timer_pin_setup(char *arg) | ||
| 454 | { | 672 | { |
| 455 | disable_timer_pin_1 = 1; | 673 | int apic, pin; |
| 456 | return 1; | 674 | |
| 675 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 676 | if (!early_ioapic_entries[apic]) | ||
| 677 | break; | ||
| 678 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | ||
| 679 | ioapic_write_entry(apic, pin, | ||
| 680 | early_ioapic_entries[apic][pin]); | ||
| 681 | kfree(early_ioapic_entries[apic]); | ||
| 682 | early_ioapic_entries[apic] = NULL; | ||
| 683 | } | ||
| 457 | } | 684 | } |
| 458 | __setup("disable_timer_pin_1", disable_timer_pin_setup); | ||
| 459 | 685 | ||
| 686 | void reinit_intr_remapped_IO_APIC(int intr_remapping) | ||
| 687 | { | ||
| 688 | /* | ||
| 689 | * for now plain restore of previous settings. | ||
| 690 | * TBD: In the case of OS enabling interrupt-remapping, | ||
| 691 | * IO-APIC RTE's need to be setup to point to interrupt-remapping | ||
| 692 | * table entries. for now, do a plain restore, and wait for | ||
| 693 | * the setup_IO_APIC_irqs() to do proper initialization. | ||
| 694 | */ | ||
| 695 | restore_IO_APIC_setup(); | ||
| 696 | } | ||
| 697 | #endif | ||
| 460 | 698 | ||
| 461 | /* | 699 | /* |
| 462 | * Find the IRQ entry number of a certain pin. | 700 | * Find the IRQ entry number of a certain pin. |
| @@ -560,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
| 560 | best_guess = irq; | 798 | best_guess = irq; |
| 561 | } | 799 | } |
| 562 | } | 800 | } |
| 563 | BUG_ON(best_guess >= NR_IRQS); | ||
| 564 | return best_guess; | 801 | return best_guess; |
| 565 | } | 802 | } |
| 566 | 803 | ||
| 804 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | ||
| 805 | |||
| 806 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | ||
| 807 | /* | ||
| 808 | * EISA Edge/Level control register, ELCR | ||
| 809 | */ | ||
| 810 | static int EISA_ELCR(unsigned int irq) | ||
| 811 | { | ||
| 812 | if (irq < 16) { | ||
| 813 | unsigned int port = 0x4d0 + (irq >> 3); | ||
| 814 | return (inb(port) >> (irq & 7)) & 1; | ||
| 815 | } | ||
| 816 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 817 | "Broken MPtable reports ISA irq %d\n", irq); | ||
| 818 | return 0; | ||
| 819 | } | ||
| 820 | |||
| 821 | #endif | ||
| 822 | |||
| 567 | /* ISA interrupts are always polarity zero edge triggered, | 823 | /* ISA interrupts are always polarity zero edge triggered, |
| 568 | * when listed as conforming in the MP table. */ | 824 | * when listed as conforming in the MP table. */ |
| 569 | 825 | ||
| 570 | #define default_ISA_trigger(idx) (0) | 826 | #define default_ISA_trigger(idx) (0) |
| 571 | #define default_ISA_polarity(idx) (0) | 827 | #define default_ISA_polarity(idx) (0) |
| 572 | 828 | ||
| 829 | /* EISA interrupts are always polarity zero and can be edge or level | ||
| 830 | * trigger depending on the ELCR value. If an interrupt is listed as | ||
| 831 | * EISA conforming in the MP table, that means its trigger type must | ||
| 832 | * be read in from the ELCR */ | ||
| 833 | |||
| 834 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) | ||
| 835 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) | ||
| 836 | |||
| 573 | /* PCI interrupts are always polarity one level triggered, | 837 | /* PCI interrupts are always polarity one level triggered, |
| 574 | * when listed as conforming in the MP table. */ | 838 | * when listed as conforming in the MP table. */ |
| 575 | 839 | ||
| 576 | #define default_PCI_trigger(idx) (1) | 840 | #define default_PCI_trigger(idx) (1) |
| 577 | #define default_PCI_polarity(idx) (1) | 841 | #define default_PCI_polarity(idx) (1) |
| 578 | 842 | ||
| 843 | /* MCA interrupts are always polarity zero level triggered, | ||
| 844 | * when listed as conforming in the MP table. */ | ||
| 845 | |||
| 846 | #define default_MCA_trigger(idx) (1) | ||
| 847 | #define default_MCA_polarity(idx) default_ISA_polarity(idx) | ||
| 848 | |||
| 579 | static int MPBIOS_polarity(int idx) | 849 | static int MPBIOS_polarity(int idx) |
| 580 | { | 850 | { |
| 581 | int bus = mp_irqs[idx].mp_srcbus; | 851 | int bus = mp_irqs[idx].mp_srcbus; |
| @@ -633,6 +903,36 @@ static int MPBIOS_trigger(int idx) | |||
| 633 | trigger = default_ISA_trigger(idx); | 903 | trigger = default_ISA_trigger(idx); |
| 634 | else | 904 | else |
| 635 | trigger = default_PCI_trigger(idx); | 905 | trigger = default_PCI_trigger(idx); |
| 906 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | ||
| 907 | switch (mp_bus_id_to_type[bus]) { | ||
| 908 | case MP_BUS_ISA: /* ISA pin */ | ||
| 909 | { | ||
| 910 | /* set before the switch */ | ||
| 911 | break; | ||
| 912 | } | ||
| 913 | case MP_BUS_EISA: /* EISA pin */ | ||
| 914 | { | ||
| 915 | trigger = default_EISA_trigger(idx); | ||
| 916 | break; | ||
| 917 | } | ||
| 918 | case MP_BUS_PCI: /* PCI pin */ | ||
| 919 | { | ||
| 920 | /* set before the switch */ | ||
| 921 | break; | ||
| 922 | } | ||
| 923 | case MP_BUS_MCA: /* MCA pin */ | ||
| 924 | { | ||
| 925 | trigger = default_MCA_trigger(idx); | ||
| 926 | break; | ||
| 927 | } | ||
| 928 | default: | ||
| 929 | { | ||
| 930 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
| 931 | trigger = 1; | ||
| 932 | break; | ||
| 933 | } | ||
| 934 | } | ||
| 935 | #endif | ||
| 636 | break; | 936 | break; |
| 637 | case 1: /* edge */ | 937 | case 1: /* edge */ |
| 638 | { | 938 | { |
| @@ -670,6 +970,7 @@ static inline int irq_trigger(int idx) | |||
| 670 | return MPBIOS_trigger(idx); | 970 | return MPBIOS_trigger(idx); |
| 671 | } | 971 | } |
| 672 | 972 | ||
| 973 | int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
| 673 | static int pin_2_irq(int idx, int apic, int pin) | 974 | static int pin_2_irq(int idx, int apic, int pin) |
| 674 | { | 975 | { |
| 675 | int irq, i; | 976 | int irq, i; |
| @@ -691,11 +992,48 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
| 691 | while (i < apic) | 992 | while (i < apic) |
| 692 | irq += nr_ioapic_registers[i++]; | 993 | irq += nr_ioapic_registers[i++]; |
| 693 | irq += pin; | 994 | irq += pin; |
| 995 | /* | ||
| 996 | * For MPS mode, so far only needed by ES7000 platform | ||
| 997 | */ | ||
| 998 | if (ioapic_renumber_irq) | ||
| 999 | irq = ioapic_renumber_irq(apic, irq); | ||
| 694 | } | 1000 | } |
| 695 | BUG_ON(irq >= NR_IRQS); | 1001 | |
| 1002 | #ifdef CONFIG_X86_32 | ||
| 1003 | /* | ||
| 1004 | * PCI IRQ command line redirection. Yes, limits are hardcoded. | ||
| 1005 | */ | ||
| 1006 | if ((pin >= 16) && (pin <= 23)) { | ||
| 1007 | if (pirq_entries[pin-16] != -1) { | ||
| 1008 | if (!pirq_entries[pin-16]) { | ||
| 1009 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
| 1010 | "disabling PIRQ%d\n", pin-16); | ||
| 1011 | } else { | ||
| 1012 | irq = pirq_entries[pin-16]; | ||
| 1013 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
| 1014 | "using PIRQ%d -> IRQ %d\n", | ||
| 1015 | pin-16, irq); | ||
| 1016 | } | ||
| 1017 | } | ||
| 1018 | } | ||
| 1019 | #endif | ||
| 1020 | |||
| 696 | return irq; | 1021 | return irq; |
| 697 | } | 1022 | } |
| 698 | 1023 | ||
| 1024 | void lock_vector_lock(void) | ||
| 1025 | { | ||
| 1026 | /* Used to the online set of cpus does not change | ||
| 1027 | * during assign_irq_vector. | ||
| 1028 | */ | ||
| 1029 | spin_lock(&vector_lock); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | void unlock_vector_lock(void) | ||
| 1033 | { | ||
| 1034 | spin_unlock(&vector_lock); | ||
| 1035 | } | ||
| 1036 | |||
| 699 | static int __assign_irq_vector(int irq, cpumask_t mask) | 1037 | static int __assign_irq_vector(int irq, cpumask_t mask) |
| 700 | { | 1038 | { |
| 701 | /* | 1039 | /* |
| @@ -714,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
| 714 | int cpu; | 1052 | int cpu; |
| 715 | struct irq_cfg *cfg; | 1053 | struct irq_cfg *cfg; |
| 716 | 1054 | ||
| 717 | BUG_ON((unsigned)irq >= NR_IRQS); | 1055 | cfg = irq_cfg(irq); |
| 718 | cfg = &irq_cfg[irq]; | ||
| 719 | 1056 | ||
| 720 | /* Only try and allocate irqs on cpus that are present */ | 1057 | /* Only try and allocate irqs on cpus that are present */ |
| 721 | cpus_and(mask, mask, cpu_online_map); | 1058 | cpus_and(mask, mask, cpu_online_map); |
| @@ -731,7 +1068,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
| 731 | return 0; | 1068 | return 0; |
| 732 | } | 1069 | } |
| 733 | 1070 | ||
| 734 | for_each_cpu_mask(cpu, mask) { | 1071 | for_each_cpu_mask_nr(cpu, mask) { |
| 735 | cpumask_t domain, new_mask; | 1072 | cpumask_t domain, new_mask; |
| 736 | int new_cpu; | 1073 | int new_cpu; |
| 737 | int vector, offset; | 1074 | int vector, offset; |
| @@ -750,9 +1087,14 @@ next: | |||
| 750 | } | 1087 | } |
| 751 | if (unlikely(current_vector == vector)) | 1088 | if (unlikely(current_vector == vector)) |
| 752 | continue; | 1089 | continue; |
| 1090 | #ifdef CONFIG_X86_64 | ||
| 753 | if (vector == IA32_SYSCALL_VECTOR) | 1091 | if (vector == IA32_SYSCALL_VECTOR) |
| 754 | goto next; | 1092 | goto next; |
| 755 | for_each_cpu_mask(new_cpu, new_mask) | 1093 | #else |
| 1094 | if (vector == SYSCALL_VECTOR) | ||
| 1095 | goto next; | ||
| 1096 | #endif | ||
| 1097 | for_each_cpu_mask_nr(new_cpu, new_mask) | ||
| 756 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 1098 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) |
| 757 | goto next; | 1099 | goto next; |
| 758 | /* Found one! */ | 1100 | /* Found one! */ |
| @@ -762,7 +1104,7 @@ next: | |||
| 762 | cfg->move_in_progress = 1; | 1104 | cfg->move_in_progress = 1; |
| 763 | cfg->old_domain = cfg->domain; | 1105 | cfg->old_domain = cfg->domain; |
| 764 | } | 1106 | } |
| 765 | for_each_cpu_mask(new_cpu, new_mask) | 1107 | for_each_cpu_mask_nr(new_cpu, new_mask) |
| 766 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 1108 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
| 767 | cfg->vector = vector; | 1109 | cfg->vector = vector; |
| 768 | cfg->domain = domain; | 1110 | cfg->domain = domain; |
| @@ -788,30 +1130,30 @@ static void __clear_irq_vector(int irq) | |||
| 788 | cpumask_t mask; | 1130 | cpumask_t mask; |
| 789 | int cpu, vector; | 1131 | int cpu, vector; |
| 790 | 1132 | ||
| 791 | BUG_ON((unsigned)irq >= NR_IRQS); | 1133 | cfg = irq_cfg(irq); |
| 792 | cfg = &irq_cfg[irq]; | ||
| 793 | BUG_ON(!cfg->vector); | 1134 | BUG_ON(!cfg->vector); |
| 794 | 1135 | ||
| 795 | vector = cfg->vector; | 1136 | vector = cfg->vector; |
| 796 | cpus_and(mask, cfg->domain, cpu_online_map); | 1137 | cpus_and(mask, cfg->domain, cpu_online_map); |
| 797 | for_each_cpu_mask(cpu, mask) | 1138 | for_each_cpu_mask_nr(cpu, mask) |
| 798 | per_cpu(vector_irq, cpu)[vector] = -1; | 1139 | per_cpu(vector_irq, cpu)[vector] = -1; |
| 799 | 1140 | ||
| 800 | cfg->vector = 0; | 1141 | cfg->vector = 0; |
| 801 | cpus_clear(cfg->domain); | 1142 | cpus_clear(cfg->domain); |
| 802 | } | 1143 | } |
| 803 | 1144 | ||
| 804 | static void __setup_vector_irq(int cpu) | 1145 | void __setup_vector_irq(int cpu) |
| 805 | { | 1146 | { |
| 806 | /* Initialize vector_irq on a new cpu */ | 1147 | /* Initialize vector_irq on a new cpu */ |
| 807 | /* This function must be called with vector_lock held */ | 1148 | /* This function must be called with vector_lock held */ |
| 808 | int irq, vector; | 1149 | int irq, vector; |
| 1150 | struct irq_cfg *cfg; | ||
| 809 | 1151 | ||
| 810 | /* Mark the inuse vectors */ | 1152 | /* Mark the inuse vectors */ |
| 811 | for (irq = 0; irq < NR_IRQS; ++irq) { | 1153 | for_each_irq_cfg(irq, cfg) { |
| 812 | if (!cpu_isset(cpu, irq_cfg[irq].domain)) | 1154 | if (!cpu_isset(cpu, cfg->domain)) |
| 813 | continue; | 1155 | continue; |
| 814 | vector = irq_cfg[irq].vector; | 1156 | vector = cfg->vector; |
| 815 | per_cpu(vector_irq, cpu)[vector] = irq; | 1157 | per_cpu(vector_irq, cpu)[vector] = irq; |
| 816 | } | 1158 | } |
| 817 | /* Mark the free vectors */ | 1159 | /* Mark the free vectors */ |
| @@ -819,44 +1161,154 @@ static void __setup_vector_irq(int cpu) | |||
| 819 | irq = per_cpu(vector_irq, cpu)[vector]; | 1161 | irq = per_cpu(vector_irq, cpu)[vector]; |
| 820 | if (irq < 0) | 1162 | if (irq < 0) |
| 821 | continue; | 1163 | continue; |
| 822 | if (!cpu_isset(cpu, irq_cfg[irq].domain)) | 1164 | |
| 1165 | cfg = irq_cfg(irq); | ||
| 1166 | if (!cpu_isset(cpu, cfg->domain)) | ||
| 823 | per_cpu(vector_irq, cpu)[vector] = -1; | 1167 | per_cpu(vector_irq, cpu)[vector] = -1; |
| 824 | } | 1168 | } |
| 825 | } | 1169 | } |
| 826 | 1170 | ||
| 827 | void setup_vector_irq(int cpu) | 1171 | static struct irq_chip ioapic_chip; |
| 828 | { | 1172 | #ifdef CONFIG_INTR_REMAP |
| 829 | spin_lock(&vector_lock); | 1173 | static struct irq_chip ir_ioapic_chip; |
| 830 | __setup_vector_irq(smp_processor_id()); | 1174 | #endif |
| 831 | spin_unlock(&vector_lock); | ||
| 832 | } | ||
| 833 | 1175 | ||
| 1176 | #define IOAPIC_AUTO -1 | ||
| 1177 | #define IOAPIC_EDGE 0 | ||
| 1178 | #define IOAPIC_LEVEL 1 | ||
| 834 | 1179 | ||
| 835 | static struct irq_chip ioapic_chip; | 1180 | #ifdef CONFIG_X86_32 |
| 1181 | static inline int IO_APIC_irq_trigger(int irq) | ||
| 1182 | { | ||
| 1183 | int apic, idx, pin; | ||
| 1184 | |||
| 1185 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1186 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
| 1187 | idx = find_irq_entry(apic, pin, mp_INT); | ||
| 1188 | if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) | ||
| 1189 | return irq_trigger(idx); | ||
| 1190 | } | ||
| 1191 | } | ||
| 1192 | /* | ||
| 1193 | * nonexistent IRQs are edge default | ||
| 1194 | */ | ||
| 1195 | return 0; | ||
| 1196 | } | ||
| 1197 | #else | ||
| 1198 | static inline int IO_APIC_irq_trigger(int irq) | ||
| 1199 | { | ||
| 1200 | return 1; | ||
| 1201 | } | ||
| 1202 | #endif | ||
| 836 | 1203 | ||
| 837 | static void ioapic_register_intr(int irq, unsigned long trigger) | 1204 | static void ioapic_register_intr(int irq, unsigned long trigger) |
| 838 | { | 1205 | { |
| 839 | if (trigger) { | 1206 | struct irq_desc *desc; |
| 840 | irq_desc[irq].status |= IRQ_LEVEL; | 1207 | |
| 1208 | desc = irq_to_desc(irq); | ||
| 1209 | |||
| 1210 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
| 1211 | trigger == IOAPIC_LEVEL) | ||
| 1212 | desc->status |= IRQ_LEVEL; | ||
| 1213 | else | ||
| 1214 | desc->status &= ~IRQ_LEVEL; | ||
| 1215 | |||
| 1216 | #ifdef CONFIG_INTR_REMAP | ||
| 1217 | if (irq_remapped(irq)) { | ||
| 1218 | desc->status |= IRQ_MOVE_PCNTXT; | ||
| 1219 | if (trigger) | ||
| 1220 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | ||
| 1221 | handle_fasteoi_irq, | ||
| 1222 | "fasteoi"); | ||
| 1223 | else | ||
| 1224 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | ||
| 1225 | handle_edge_irq, "edge"); | ||
| 1226 | return; | ||
| 1227 | } | ||
| 1228 | #endif | ||
| 1229 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
| 1230 | trigger == IOAPIC_LEVEL) | ||
| 841 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 1231 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
| 842 | handle_fasteoi_irq, "fasteoi"); | 1232 | handle_fasteoi_irq, |
| 843 | } else { | 1233 | "fasteoi"); |
| 844 | irq_desc[irq].status &= ~IRQ_LEVEL; | 1234 | else |
| 845 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 1235 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
| 846 | handle_edge_irq, "edge"); | 1236 | handle_edge_irq, "edge"); |
| 1237 | } | ||
| 1238 | |||
| 1239 | static int setup_ioapic_entry(int apic, int irq, | ||
| 1240 | struct IO_APIC_route_entry *entry, | ||
| 1241 | unsigned int destination, int trigger, | ||
| 1242 | int polarity, int vector) | ||
| 1243 | { | ||
| 1244 | /* | ||
| 1245 | * add it to the IO-APIC irq-routing table: | ||
| 1246 | */ | ||
| 1247 | memset(entry,0,sizeof(*entry)); | ||
| 1248 | |||
| 1249 | #ifdef CONFIG_INTR_REMAP | ||
| 1250 | if (intr_remapping_enabled) { | ||
| 1251 | struct intel_iommu *iommu = map_ioapic_to_ir(apic); | ||
| 1252 | struct irte irte; | ||
| 1253 | struct IR_IO_APIC_route_entry *ir_entry = | ||
| 1254 | (struct IR_IO_APIC_route_entry *) entry; | ||
| 1255 | int index; | ||
| 1256 | |||
| 1257 | if (!iommu) | ||
| 1258 | panic("No mapping iommu for ioapic %d\n", apic); | ||
| 1259 | |||
| 1260 | index = alloc_irte(iommu, irq, 1); | ||
| 1261 | if (index < 0) | ||
| 1262 | panic("Failed to allocate IRTE for ioapic %d\n", apic); | ||
| 1263 | |||
| 1264 | memset(&irte, 0, sizeof(irte)); | ||
| 1265 | |||
| 1266 | irte.present = 1; | ||
| 1267 | irte.dst_mode = INT_DEST_MODE; | ||
| 1268 | irte.trigger_mode = trigger; | ||
| 1269 | irte.dlvry_mode = INT_DELIVERY_MODE; | ||
| 1270 | irte.vector = vector; | ||
| 1271 | irte.dest_id = IRTE_DEST(destination); | ||
| 1272 | |||
| 1273 | modify_irte(irq, &irte); | ||
| 1274 | |||
| 1275 | ir_entry->index2 = (index >> 15) & 0x1; | ||
| 1276 | ir_entry->zero = 0; | ||
| 1277 | ir_entry->format = 1; | ||
| 1278 | ir_entry->index = (index & 0x7fff); | ||
| 1279 | } else | ||
| 1280 | #endif | ||
| 1281 | { | ||
| 1282 | entry->delivery_mode = INT_DELIVERY_MODE; | ||
| 1283 | entry->dest_mode = INT_DEST_MODE; | ||
| 1284 | entry->dest = destination; | ||
| 847 | } | 1285 | } |
| 1286 | |||
| 1287 | entry->mask = 0; /* enable IRQ */ | ||
| 1288 | entry->trigger = trigger; | ||
| 1289 | entry->polarity = polarity; | ||
| 1290 | entry->vector = vector; | ||
| 1291 | |||
| 1292 | /* Mask level triggered irqs. | ||
| 1293 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. | ||
| 1294 | */ | ||
| 1295 | if (trigger) | ||
| 1296 | entry->mask = 1; | ||
| 1297 | return 0; | ||
| 848 | } | 1298 | } |
| 849 | 1299 | ||
| 850 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | 1300 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, |
| 851 | int trigger, int polarity) | 1301 | int trigger, int polarity) |
| 852 | { | 1302 | { |
| 853 | struct irq_cfg *cfg = irq_cfg + irq; | 1303 | struct irq_cfg *cfg; |
| 854 | struct IO_APIC_route_entry entry; | 1304 | struct IO_APIC_route_entry entry; |
| 855 | cpumask_t mask; | 1305 | cpumask_t mask; |
| 856 | 1306 | ||
| 857 | if (!IO_APIC_IRQ(irq)) | 1307 | if (!IO_APIC_IRQ(irq)) |
| 858 | return; | 1308 | return; |
| 859 | 1309 | ||
| 1310 | cfg = irq_cfg(irq); | ||
| 1311 | |||
| 860 | mask = TARGET_CPUS; | 1312 | mask = TARGET_CPUS; |
| 861 | if (assign_irq_vector(irq, mask)) | 1313 | if (assign_irq_vector(irq, mask)) |
| 862 | return; | 1314 | return; |
| @@ -869,24 +1321,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
| 869 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, | 1321 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, |
| 870 | irq, trigger, polarity); | 1322 | irq, trigger, polarity); |
| 871 | 1323 | ||
| 872 | /* | ||
| 873 | * add it to the IO-APIC irq-routing table: | ||
| 874 | */ | ||
| 875 | memset(&entry,0,sizeof(entry)); | ||
| 876 | 1324 | ||
| 877 | entry.delivery_mode = INT_DELIVERY_MODE; | 1325 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, |
| 878 | entry.dest_mode = INT_DEST_MODE; | 1326 | cpu_mask_to_apicid(mask), trigger, polarity, |
| 879 | entry.dest = cpu_mask_to_apicid(mask); | 1327 | cfg->vector)) { |
| 880 | entry.mask = 0; /* enable IRQ */ | 1328 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
| 881 | entry.trigger = trigger; | 1329 | mp_ioapics[apic].mp_apicid, pin); |
| 882 | entry.polarity = polarity; | 1330 | __clear_irq_vector(irq); |
| 883 | entry.vector = cfg->vector; | 1331 | return; |
| 884 | 1332 | } | |
| 885 | /* Mask level triggered irqs. | ||
| 886 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. | ||
| 887 | */ | ||
| 888 | if (trigger) | ||
| 889 | entry.mask = 1; | ||
| 890 | 1333 | ||
| 891 | ioapic_register_intr(irq, trigger); | 1334 | ioapic_register_intr(irq, trigger); |
| 892 | if (irq < 16) | 1335 | if (irq < 16) |
| @@ -897,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
| 897 | 1340 | ||
| 898 | static void __init setup_IO_APIC_irqs(void) | 1341 | static void __init setup_IO_APIC_irqs(void) |
| 899 | { | 1342 | { |
| 900 | int apic, pin, idx, irq, first_notcon = 1; | 1343 | int apic, pin, idx, irq; |
| 1344 | int notcon = 0; | ||
| 901 | 1345 | ||
| 902 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1346 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
| 903 | 1347 | ||
| 904 | for (apic = 0; apic < nr_ioapics; apic++) { | 1348 | for (apic = 0; apic < nr_ioapics; apic++) { |
| 905 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1349 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
| 906 | 1350 | ||
| 907 | idx = find_irq_entry(apic,pin,mp_INT); | 1351 | idx = find_irq_entry(apic, pin, mp_INT); |
| 908 | if (idx == -1) { | 1352 | if (idx == -1) { |
| 909 | if (first_notcon) { | 1353 | if (!notcon) { |
| 910 | apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); | 1354 | notcon = 1; |
| 911 | first_notcon = 0; | 1355 | apic_printk(APIC_VERBOSE, |
| 912 | } else | 1356 | KERN_DEBUG " %d-%d", |
| 913 | apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); | 1357 | mp_ioapics[apic].mp_apicid, |
| 914 | continue; | 1358 | pin); |
| 915 | } | 1359 | } else |
| 916 | if (!first_notcon) { | 1360 | apic_printk(APIC_VERBOSE, " %d-%d", |
| 917 | apic_printk(APIC_VERBOSE, " not connected.\n"); | 1361 | mp_ioapics[apic].mp_apicid, |
| 918 | first_notcon = 1; | 1362 | pin); |
| 919 | } | 1363 | continue; |
| 1364 | } | ||
| 1365 | if (notcon) { | ||
| 1366 | apic_printk(APIC_VERBOSE, | ||
| 1367 | " (apicid-pin) not connected\n"); | ||
| 1368 | notcon = 0; | ||
| 1369 | } | ||
| 920 | 1370 | ||
| 921 | irq = pin_2_irq(idx, apic, pin); | 1371 | irq = pin_2_irq(idx, apic, pin); |
| 922 | add_pin_to_irq(irq, apic, pin); | 1372 | #ifdef CONFIG_X86_32 |
| 1373 | if (multi_timer_check(apic, irq)) | ||
| 1374 | continue; | ||
| 1375 | #endif | ||
| 1376 | add_pin_to_irq(irq, apic, pin); | ||
| 923 | 1377 | ||
| 924 | setup_IO_APIC_irq(apic, pin, irq, | 1378 | setup_IO_APIC_irq(apic, pin, irq, |
| 925 | irq_trigger(idx), irq_polarity(idx)); | 1379 | irq_trigger(idx), irq_polarity(idx)); |
| 926 | } | 1380 | } |
| 927 | } | 1381 | } |
| 928 | 1382 | ||
| 929 | if (!first_notcon) | 1383 | if (notcon) |
| 930 | apic_printk(APIC_VERBOSE, " not connected.\n"); | 1384 | apic_printk(APIC_VERBOSE, |
| 1385 | " (apicid-pin) not connected\n"); | ||
| 931 | } | 1386 | } |
| 932 | 1387 | ||
| 933 | /* | 1388 | /* |
| @@ -938,6 +1393,11 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
| 938 | { | 1393 | { |
| 939 | struct IO_APIC_route_entry entry; | 1394 | struct IO_APIC_route_entry entry; |
| 940 | 1395 | ||
| 1396 | #ifdef CONFIG_INTR_REMAP | ||
| 1397 | if (intr_remapping_enabled) | ||
| 1398 | return; | ||
| 1399 | #endif | ||
| 1400 | |||
| 941 | memset(&entry, 0, sizeof(entry)); | 1401 | memset(&entry, 0, sizeof(entry)); |
| 942 | 1402 | ||
| 943 | /* | 1403 | /* |
| @@ -964,13 +1424,17 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | |||
| 964 | ioapic_write_entry(apic, pin, entry); | 1424 | ioapic_write_entry(apic, pin, entry); |
| 965 | } | 1425 | } |
| 966 | 1426 | ||
| 967 | void __apicdebuginit print_IO_APIC(void) | 1427 | |
| 1428 | __apicdebuginit(void) print_IO_APIC(void) | ||
| 968 | { | 1429 | { |
| 969 | int apic, i; | 1430 | int apic, i; |
| 970 | union IO_APIC_reg_00 reg_00; | 1431 | union IO_APIC_reg_00 reg_00; |
| 971 | union IO_APIC_reg_01 reg_01; | 1432 | union IO_APIC_reg_01 reg_01; |
| 972 | union IO_APIC_reg_02 reg_02; | 1433 | union IO_APIC_reg_02 reg_02; |
| 1434 | union IO_APIC_reg_03 reg_03; | ||
| 973 | unsigned long flags; | 1435 | unsigned long flags; |
| 1436 | struct irq_cfg *cfg; | ||
| 1437 | unsigned int irq; | ||
| 974 | 1438 | ||
| 975 | if (apic_verbosity == APIC_QUIET) | 1439 | if (apic_verbosity == APIC_QUIET) |
| 976 | return; | 1440 | return; |
| @@ -993,12 +1457,16 @@ void __apicdebuginit print_IO_APIC(void) | |||
| 993 | reg_01.raw = io_apic_read(apic, 1); | 1457 | reg_01.raw = io_apic_read(apic, 1); |
| 994 | if (reg_01.bits.version >= 0x10) | 1458 | if (reg_01.bits.version >= 0x10) |
| 995 | reg_02.raw = io_apic_read(apic, 2); | 1459 | reg_02.raw = io_apic_read(apic, 2); |
| 1460 | if (reg_01.bits.version >= 0x20) | ||
| 1461 | reg_03.raw = io_apic_read(apic, 3); | ||
| 996 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1462 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 997 | 1463 | ||
| 998 | printk("\n"); | 1464 | printk("\n"); |
| 999 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); | 1465 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); |
| 1000 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1466 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
| 1001 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1467 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
| 1468 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | ||
| 1469 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); | ||
| 1002 | 1470 | ||
| 1003 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); | 1471 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); |
| 1004 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | 1472 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); |
| @@ -1006,11 +1474,27 @@ void __apicdebuginit print_IO_APIC(void) | |||
| 1006 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | 1474 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); |
| 1007 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | 1475 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); |
| 1008 | 1476 | ||
| 1009 | if (reg_01.bits.version >= 0x10) { | 1477 | /* |
| 1478 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, | ||
| 1479 | * but the value of reg_02 is read as the previous read register | ||
| 1480 | * value, so ignore it if reg_02 == reg_01. | ||
| 1481 | */ | ||
| 1482 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { | ||
| 1010 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | 1483 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); |
| 1011 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | 1484 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); |
| 1012 | } | 1485 | } |
| 1013 | 1486 | ||
| 1487 | /* | ||
| 1488 | * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 | ||
| 1489 | * or reg_03, but the value of reg_0[23] is read as the previous read | ||
| 1490 | * register value, so ignore it if reg_03 == reg_0[12]. | ||
| 1491 | */ | ||
| 1492 | if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && | ||
| 1493 | reg_03.raw != reg_01.raw) { | ||
| 1494 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); | ||
| 1495 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); | ||
| 1496 | } | ||
| 1497 | |||
| 1014 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1498 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
| 1015 | 1499 | ||
| 1016 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" | 1500 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" |
| @@ -1039,16 +1523,16 @@ void __apicdebuginit print_IO_APIC(void) | |||
| 1039 | } | 1523 | } |
| 1040 | } | 1524 | } |
| 1041 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1525 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
| 1042 | for (i = 0; i < NR_IRQS; i++) { | 1526 | for_each_irq_cfg(irq, cfg) { |
| 1043 | struct irq_pin_list *entry = irq_2_pin + i; | 1527 | struct irq_pin_list *entry = cfg->irq_2_pin; |
| 1044 | if (entry->pin < 0) | 1528 | if (!entry) |
| 1045 | continue; | 1529 | continue; |
| 1046 | printk(KERN_DEBUG "IRQ%d ", i); | 1530 | printk(KERN_DEBUG "IRQ%d ", irq); |
| 1047 | for (;;) { | 1531 | for (;;) { |
| 1048 | printk("-> %d:%d", entry->apic, entry->pin); | 1532 | printk("-> %d:%d", entry->apic, entry->pin); |
| 1049 | if (!entry->next) | 1533 | if (!entry->next) |
| 1050 | break; | 1534 | break; |
| 1051 | entry = irq_2_pin + entry->next; | 1535 | entry = entry->next; |
| 1052 | } | 1536 | } |
| 1053 | printk("\n"); | 1537 | printk("\n"); |
| 1054 | } | 1538 | } |
| @@ -1058,9 +1542,7 @@ void __apicdebuginit print_IO_APIC(void) | |||
| 1058 | return; | 1542 | return; |
| 1059 | } | 1543 | } |
| 1060 | 1544 | ||
| 1061 | #if 0 | 1545 | __apicdebuginit(void) print_APIC_bitfield(int base) |
| 1062 | |||
| 1063 | static __apicdebuginit void print_APIC_bitfield (int base) | ||
| 1064 | { | 1546 | { |
| 1065 | unsigned int v; | 1547 | unsigned int v; |
| 1066 | int i, j; | 1548 | int i, j; |
| @@ -1081,9 +1563,10 @@ static __apicdebuginit void print_APIC_bitfield (int base) | |||
| 1081 | } | 1563 | } |
| 1082 | } | 1564 | } |
| 1083 | 1565 | ||
| 1084 | void __apicdebuginit print_local_APIC(void * dummy) | 1566 | __apicdebuginit(void) print_local_APIC(void *dummy) |
| 1085 | { | 1567 | { |
| 1086 | unsigned int v, ver, maxlvt; | 1568 | unsigned int v, ver, maxlvt; |
| 1569 | u64 icr; | ||
| 1087 | 1570 | ||
| 1088 | if (apic_verbosity == APIC_QUIET) | 1571 | if (apic_verbosity == APIC_QUIET) |
| 1089 | return; | 1572 | return; |
| @@ -1091,7 +1574,7 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
| 1091 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1574 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
| 1092 | smp_processor_id(), hard_smp_processor_id()); | 1575 | smp_processor_id(), hard_smp_processor_id()); |
| 1093 | v = apic_read(APIC_ID); | 1576 | v = apic_read(APIC_ID); |
| 1094 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); | 1577 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); |
| 1095 | v = apic_read(APIC_LVR); | 1578 | v = apic_read(APIC_LVR); |
| 1096 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | 1579 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
| 1097 | ver = GET_APIC_VERSION(v); | 1580 | ver = GET_APIC_VERSION(v); |
| @@ -1100,20 +1583,31 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
| 1100 | v = apic_read(APIC_TASKPRI); | 1583 | v = apic_read(APIC_TASKPRI); |
| 1101 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); | 1584 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); |
| 1102 | 1585 | ||
| 1103 | v = apic_read(APIC_ARBPRI); | 1586 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ |
| 1104 | printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, | 1587 | if (!APIC_XAPIC(ver)) { |
| 1105 | v & APIC_ARBPRI_MASK); | 1588 | v = apic_read(APIC_ARBPRI); |
| 1106 | v = apic_read(APIC_PROCPRI); | 1589 | printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, |
| 1107 | printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); | 1590 | v & APIC_ARBPRI_MASK); |
| 1591 | } | ||
| 1592 | v = apic_read(APIC_PROCPRI); | ||
| 1593 | printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); | ||
| 1594 | } | ||
| 1595 | |||
| 1596 | /* | ||
| 1597 | * Remote read supported only in the 82489DX and local APIC for | ||
| 1598 | * Pentium processors. | ||
| 1599 | */ | ||
| 1600 | if (!APIC_INTEGRATED(ver) || maxlvt == 3) { | ||
| 1601 | v = apic_read(APIC_RRR); | ||
| 1602 | printk(KERN_DEBUG "... APIC RRR: %08x\n", v); | ||
| 1603 | } | ||
| 1108 | 1604 | ||
| 1109 | v = apic_read(APIC_EOI); | ||
| 1110 | printk(KERN_DEBUG "... APIC EOI: %08x\n", v); | ||
| 1111 | v = apic_read(APIC_RRR); | ||
| 1112 | printk(KERN_DEBUG "... APIC RRR: %08x\n", v); | ||
| 1113 | v = apic_read(APIC_LDR); | 1605 | v = apic_read(APIC_LDR); |
| 1114 | printk(KERN_DEBUG "... APIC LDR: %08x\n", v); | 1606 | printk(KERN_DEBUG "... APIC LDR: %08x\n", v); |
| 1115 | v = apic_read(APIC_DFR); | 1607 | if (!x2apic_enabled()) { |
| 1116 | printk(KERN_DEBUG "... APIC DFR: %08x\n", v); | 1608 | v = apic_read(APIC_DFR); |
| 1609 | printk(KERN_DEBUG "... APIC DFR: %08x\n", v); | ||
| 1610 | } | ||
| 1117 | v = apic_read(APIC_SPIV); | 1611 | v = apic_read(APIC_SPIV); |
| 1118 | printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); | 1612 | printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); |
| 1119 | 1613 | ||
| @@ -1124,13 +1618,17 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
| 1124 | printk(KERN_DEBUG "... APIC IRR field:\n"); | 1618 | printk(KERN_DEBUG "... APIC IRR field:\n"); |
| 1125 | print_APIC_bitfield(APIC_IRR); | 1619 | print_APIC_bitfield(APIC_IRR); |
| 1126 | 1620 | ||
| 1127 | v = apic_read(APIC_ESR); | 1621 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ |
| 1128 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); | 1622 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
| 1623 | apic_write(APIC_ESR, 0); | ||
| 1129 | 1624 | ||
| 1130 | v = apic_read(APIC_ICR); | 1625 | v = apic_read(APIC_ESR); |
| 1131 | printk(KERN_DEBUG "... APIC ICR: %08x\n", v); | 1626 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); |
| 1132 | v = apic_read(APIC_ICR2); | 1627 | } |
| 1133 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); | 1628 | |
| 1629 | icr = apic_icr_read(); | ||
| 1630 | printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); | ||
| 1631 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); | ||
| 1134 | 1632 | ||
| 1135 | v = apic_read(APIC_LVTT); | 1633 | v = apic_read(APIC_LVTT); |
| 1136 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); | 1634 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); |
| @@ -1158,12 +1656,17 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
| 1158 | printk("\n"); | 1656 | printk("\n"); |
| 1159 | } | 1657 | } |
| 1160 | 1658 | ||
| 1161 | void print_all_local_APICs (void) | 1659 | __apicdebuginit(void) print_all_local_APICs(void) |
| 1162 | { | 1660 | { |
| 1163 | on_each_cpu(print_local_APIC, NULL, 1); | 1661 | int cpu; |
| 1662 | |||
| 1663 | preempt_disable(); | ||
| 1664 | for_each_online_cpu(cpu) | ||
| 1665 | smp_call_function_single(cpu, print_local_APIC, NULL, 1); | ||
| 1666 | preempt_enable(); | ||
| 1164 | } | 1667 | } |
| 1165 | 1668 | ||
| 1166 | void __apicdebuginit print_PIC(void) | 1669 | __apicdebuginit(void) print_PIC(void) |
| 1167 | { | 1670 | { |
| 1168 | unsigned int v; | 1671 | unsigned int v; |
| 1169 | unsigned long flags; | 1672 | unsigned long flags; |
| @@ -1195,19 +1698,34 @@ void __apicdebuginit print_PIC(void) | |||
| 1195 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | 1698 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); |
| 1196 | } | 1699 | } |
| 1197 | 1700 | ||
| 1198 | #endif /* 0 */ | 1701 | __apicdebuginit(int) print_all_ICs(void) |
| 1702 | { | ||
| 1703 | print_PIC(); | ||
| 1704 | print_all_local_APICs(); | ||
| 1705 | print_IO_APIC(); | ||
| 1706 | |||
| 1707 | return 0; | ||
| 1708 | } | ||
| 1709 | |||
| 1710 | fs_initcall(print_all_ICs); | ||
| 1711 | |||
| 1712 | |||
| 1713 | /* Where if anywhere is the i8259 connect in external int mode */ | ||
| 1714 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | ||
| 1199 | 1715 | ||
| 1200 | void __init enable_IO_APIC(void) | 1716 | void __init enable_IO_APIC(void) |
| 1201 | { | 1717 | { |
| 1202 | union IO_APIC_reg_01 reg_01; | 1718 | union IO_APIC_reg_01 reg_01; |
| 1203 | int i8259_apic, i8259_pin; | 1719 | int i8259_apic, i8259_pin; |
| 1204 | int i, apic; | 1720 | int apic; |
| 1205 | unsigned long flags; | 1721 | unsigned long flags; |
| 1206 | 1722 | ||
| 1207 | for (i = 0; i < PIN_MAP_SIZE; i++) { | 1723 | #ifdef CONFIG_X86_32 |
| 1208 | irq_2_pin[i].pin = -1; | 1724 | int i; |
| 1209 | irq_2_pin[i].next = 0; | 1725 | if (!pirqs_enabled) |
| 1210 | } | 1726 | for (i = 0; i < MAX_PIRQS; i++) |
| 1727 | pirq_entries[i] = -1; | ||
| 1728 | #endif | ||
| 1211 | 1729 | ||
| 1212 | /* | 1730 | /* |
| 1213 | * The number of IO-APIC IRQ registers (== #pins): | 1731 | * The number of IO-APIC IRQ registers (== #pins): |
| @@ -1237,6 +1755,10 @@ void __init enable_IO_APIC(void) | |||
| 1237 | } | 1755 | } |
| 1238 | found_i8259: | 1756 | found_i8259: |
| 1239 | /* Look to see what if the MP table has reported the ExtINT */ | 1757 | /* Look to see what if the MP table has reported the ExtINT */ |
| 1758 | /* If we could not find the appropriate pin by looking at the ioapic | ||
| 1759 | * the i8259 probably is not connected the ioapic but give the | ||
| 1760 | * mptable a chance anyway. | ||
| 1761 | */ | ||
| 1240 | i8259_pin = find_isa_irq_pin(0, mp_ExtINT); | 1762 | i8259_pin = find_isa_irq_pin(0, mp_ExtINT); |
| 1241 | i8259_apic = find_isa_irq_apic(0, mp_ExtINT); | 1763 | i8259_apic = find_isa_irq_apic(0, mp_ExtINT); |
| 1242 | /* Trust the MP table if nothing is setup in the hardware */ | 1764 | /* Trust the MP table if nothing is setup in the hardware */ |
| @@ -1285,7 +1807,7 @@ void disable_IO_APIC(void) | |||
| 1285 | entry.dest_mode = 0; /* Physical */ | 1807 | entry.dest_mode = 0; /* Physical */ |
| 1286 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ | 1808 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ |
| 1287 | entry.vector = 0; | 1809 | entry.vector = 0; |
| 1288 | entry.dest = GET_APIC_ID(read_apic_id()); | 1810 | entry.dest = read_apic_id(); |
| 1289 | 1811 | ||
| 1290 | /* | 1812 | /* |
| 1291 | * Add it to the IO-APIC irq-routing table: | 1813 | * Add it to the IO-APIC irq-routing table: |
| @@ -1296,6 +1818,133 @@ void disable_IO_APIC(void) | |||
| 1296 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); | 1818 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); |
| 1297 | } | 1819 | } |
| 1298 | 1820 | ||
| 1821 | #ifdef CONFIG_X86_32 | ||
| 1822 | /* | ||
| 1823 | * function to set the IO-APIC physical IDs based on the | ||
| 1824 | * values stored in the MPC table. | ||
| 1825 | * | ||
| 1826 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | ||
| 1827 | */ | ||
| 1828 | |||
| 1829 | static void __init setup_ioapic_ids_from_mpc(void) | ||
| 1830 | { | ||
| 1831 | union IO_APIC_reg_00 reg_00; | ||
| 1832 | physid_mask_t phys_id_present_map; | ||
| 1833 | int apic; | ||
| 1834 | int i; | ||
| 1835 | unsigned char old_id; | ||
| 1836 | unsigned long flags; | ||
| 1837 | |||
| 1838 | if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) | ||
| 1839 | return; | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * Don't check I/O APIC IDs for xAPIC systems. They have | ||
| 1843 | * no meaning without the serial APIC bus. | ||
| 1844 | */ | ||
| 1845 | if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
| 1846 | || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
| 1847 | return; | ||
| 1848 | /* | ||
| 1849 | * This is broken; anything with a real cpu count has to | ||
| 1850 | * circumvent this idiocy regardless. | ||
| 1851 | */ | ||
| 1852 | phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); | ||
| 1853 | |||
| 1854 | /* | ||
| 1855 | * Set the IOAPIC ID to the value stored in the MPC table. | ||
| 1856 | */ | ||
| 1857 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1858 | |||
| 1859 | /* Read the register 0 value */ | ||
| 1860 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1861 | reg_00.raw = io_apic_read(apic, 0); | ||
| 1862 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1863 | |||
| 1864 | old_id = mp_ioapics[apic].mp_apicid; | ||
| 1865 | |||
| 1866 | if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { | ||
| 1867 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | ||
| 1868 | apic, mp_ioapics[apic].mp_apicid); | ||
| 1869 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | ||
| 1870 | reg_00.bits.ID); | ||
| 1871 | mp_ioapics[apic].mp_apicid = reg_00.bits.ID; | ||
| 1872 | } | ||
| 1873 | |||
| 1874 | /* | ||
| 1875 | * Sanity check, is the ID really free? Every APIC in a | ||
| 1876 | * system must have a unique ID or we get lots of nice | ||
| 1877 | * 'stuck on smp_invalidate_needed IPI wait' messages. | ||
| 1878 | */ | ||
| 1879 | if (check_apicid_used(phys_id_present_map, | ||
| 1880 | mp_ioapics[apic].mp_apicid)) { | ||
| 1881 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | ||
| 1882 | apic, mp_ioapics[apic].mp_apicid); | ||
| 1883 | for (i = 0; i < get_physical_broadcast(); i++) | ||
| 1884 | if (!physid_isset(i, phys_id_present_map)) | ||
| 1885 | break; | ||
| 1886 | if (i >= get_physical_broadcast()) | ||
| 1887 | panic("Max APIC ID exceeded!\n"); | ||
| 1888 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | ||
| 1889 | i); | ||
| 1890 | physid_set(i, phys_id_present_map); | ||
| 1891 | mp_ioapics[apic].mp_apicid = i; | ||
| 1892 | } else { | ||
| 1893 | physid_mask_t tmp; | ||
| 1894 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); | ||
| 1895 | apic_printk(APIC_VERBOSE, "Setting %d in the " | ||
| 1896 | "phys_id_present_map\n", | ||
| 1897 | mp_ioapics[apic].mp_apicid); | ||
| 1898 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | ||
| 1899 | } | ||
| 1900 | |||
| 1901 | |||
| 1902 | /* | ||
| 1903 | * We need to adjust the IRQ routing table | ||
| 1904 | * if the ID changed. | ||
| 1905 | */ | ||
| 1906 | if (old_id != mp_ioapics[apic].mp_apicid) | ||
| 1907 | for (i = 0; i < mp_irq_entries; i++) | ||
| 1908 | if (mp_irqs[i].mp_dstapic == old_id) | ||
| 1909 | mp_irqs[i].mp_dstapic | ||
| 1910 | = mp_ioapics[apic].mp_apicid; | ||
| 1911 | |||
| 1912 | /* | ||
| 1913 | * Read the right value from the MPC table and | ||
| 1914 | * write it into the ID register. | ||
| 1915 | */ | ||
| 1916 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 1917 | "...changing IO-APIC physical APIC ID to %d ...", | ||
| 1918 | mp_ioapics[apic].mp_apicid); | ||
| 1919 | |||
| 1920 | reg_00.bits.ID = mp_ioapics[apic].mp_apicid; | ||
| 1921 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1922 | io_apic_write(apic, 0, reg_00.raw); | ||
| 1923 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1924 | |||
| 1925 | /* | ||
| 1926 | * Sanity check | ||
| 1927 | */ | ||
| 1928 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1929 | reg_00.raw = io_apic_read(apic, 0); | ||
| 1930 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1931 | if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) | ||
| 1932 | printk("could not set ID!\n"); | ||
| 1933 | else | ||
| 1934 | apic_printk(APIC_VERBOSE, " ok.\n"); | ||
| 1935 | } | ||
| 1936 | } | ||
| 1937 | #endif | ||
| 1938 | |||
| 1939 | int no_timer_check __initdata; | ||
| 1940 | |||
| 1941 | static int __init notimercheck(char *s) | ||
| 1942 | { | ||
| 1943 | no_timer_check = 1; | ||
| 1944 | return 1; | ||
| 1945 | } | ||
| 1946 | __setup("no_timer_check", notimercheck); | ||
| 1947 | |||
| 1299 | /* | 1948 | /* |
| 1300 | * There is a nasty bug in some older SMP boards, their mptable lies | 1949 | * There is a nasty bug in some older SMP boards, their mptable lies |
| 1301 | * about the timer IRQ. We do the following to work around the situation: | 1950 | * about the timer IRQ. We do the following to work around the situation: |
| @@ -1309,6 +1958,9 @@ static int __init timer_irq_works(void) | |||
| 1309 | unsigned long t1 = jiffies; | 1958 | unsigned long t1 = jiffies; |
| 1310 | unsigned long flags; | 1959 | unsigned long flags; |
| 1311 | 1960 | ||
| 1961 | if (no_timer_check) | ||
| 1962 | return 1; | ||
| 1963 | |||
| 1312 | local_save_flags(flags); | 1964 | local_save_flags(flags); |
| 1313 | local_irq_enable(); | 1965 | local_irq_enable(); |
| 1314 | /* Let ten ticks pass... */ | 1966 | /* Let ten ticks pass... */ |
| @@ -1369,19 +2021,27 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
| 1369 | return was_pending; | 2021 | return was_pending; |
| 1370 | } | 2022 | } |
| 1371 | 2023 | ||
| 2024 | #ifdef CONFIG_X86_64 | ||
| 1372 | static int ioapic_retrigger_irq(unsigned int irq) | 2025 | static int ioapic_retrigger_irq(unsigned int irq) |
| 1373 | { | 2026 | { |
| 1374 | struct irq_cfg *cfg = &irq_cfg[irq]; | 2027 | |
| 1375 | cpumask_t mask; | 2028 | struct irq_cfg *cfg = irq_cfg(irq); |
| 1376 | unsigned long flags; | 2029 | unsigned long flags; |
| 1377 | 2030 | ||
| 1378 | spin_lock_irqsave(&vector_lock, flags); | 2031 | spin_lock_irqsave(&vector_lock, flags); |
| 1379 | mask = cpumask_of_cpu(first_cpu(cfg->domain)); | 2032 | send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); |
| 1380 | send_IPI_mask(mask, cfg->vector); | ||
| 1381 | spin_unlock_irqrestore(&vector_lock, flags); | 2033 | spin_unlock_irqrestore(&vector_lock, flags); |
| 1382 | 2034 | ||
| 1383 | return 1; | 2035 | return 1; |
| 1384 | } | 2036 | } |
| 2037 | #else | ||
| 2038 | static int ioapic_retrigger_irq(unsigned int irq) | ||
| 2039 | { | ||
| 2040 | send_IPI_self(irq_cfg(irq)->vector); | ||
| 2041 | |||
| 2042 | return 1; | ||
| 2043 | } | ||
| 2044 | #endif | ||
| 1385 | 2045 | ||
| 1386 | /* | 2046 | /* |
| 1387 | * Level and edge triggered IO-APIC interrupts need different handling, | 2047 | * Level and edge triggered IO-APIC interrupts need different handling, |
| @@ -1393,11 +2053,159 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
| 1393 | */ | 2053 | */ |
| 1394 | 2054 | ||
| 1395 | #ifdef CONFIG_SMP | 2055 | #ifdef CONFIG_SMP |
| 2056 | |||
| 2057 | #ifdef CONFIG_INTR_REMAP | ||
| 2058 | static void ir_irq_migration(struct work_struct *work); | ||
| 2059 | |||
| 2060 | static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); | ||
| 2061 | |||
| 2062 | /* | ||
| 2063 | * Migrate the IO-APIC irq in the presence of intr-remapping. | ||
| 2064 | * | ||
| 2065 | * For edge triggered, irq migration is a simple atomic update(of vector | ||
| 2066 | * and cpu destination) of IRTE and flush the hardware cache. | ||
| 2067 | * | ||
| 2068 | * For level triggered, we need to modify the io-apic RTE aswell with the update | ||
| 2069 | * vector information, along with modifying IRTE with vector and destination. | ||
| 2070 | * So irq migration for level triggered is little bit more complex compared to | ||
| 2071 | * edge triggered migration. But the good news is, we use the same algorithm | ||
| 2072 | * for level triggered migration as we have today, only difference being, | ||
| 2073 | * we now initiate the irq migration from process context instead of the | ||
| 2074 | * interrupt context. | ||
| 2075 | * | ||
| 2076 | * In future, when we do a directed EOI (combined with cpu EOI broadcast | ||
| 2077 | * suppression) to the IO-APIC, level triggered irq migration will also be | ||
| 2078 | * as simple as edge triggered migration and we can do the irq migration | ||
| 2079 | * with a simple atomic update to IO-APIC RTE. | ||
| 2080 | */ | ||
| 2081 | static void migrate_ioapic_irq(int irq, cpumask_t mask) | ||
| 2082 | { | ||
| 2083 | struct irq_cfg *cfg; | ||
| 2084 | struct irq_desc *desc; | ||
| 2085 | cpumask_t tmp, cleanup_mask; | ||
| 2086 | struct irte irte; | ||
| 2087 | int modify_ioapic_rte; | ||
| 2088 | unsigned int dest; | ||
| 2089 | unsigned long flags; | ||
| 2090 | |||
| 2091 | cpus_and(tmp, mask, cpu_online_map); | ||
| 2092 | if (cpus_empty(tmp)) | ||
| 2093 | return; | ||
| 2094 | |||
| 2095 | if (get_irte(irq, &irte)) | ||
| 2096 | return; | ||
| 2097 | |||
| 2098 | if (assign_irq_vector(irq, mask)) | ||
| 2099 | return; | ||
| 2100 | |||
| 2101 | cfg = irq_cfg(irq); | ||
| 2102 | cpus_and(tmp, cfg->domain, mask); | ||
| 2103 | dest = cpu_mask_to_apicid(tmp); | ||
| 2104 | |||
| 2105 | desc = irq_to_desc(irq); | ||
| 2106 | modify_ioapic_rte = desc->status & IRQ_LEVEL; | ||
| 2107 | if (modify_ioapic_rte) { | ||
| 2108 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2109 | __target_IO_APIC_irq(irq, dest, cfg->vector); | ||
| 2110 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 2111 | } | ||
| 2112 | |||
| 2113 | irte.vector = cfg->vector; | ||
| 2114 | irte.dest_id = IRTE_DEST(dest); | ||
| 2115 | |||
| 2116 | /* | ||
| 2117 | * Modified the IRTE and flushes the Interrupt entry cache. | ||
| 2118 | */ | ||
| 2119 | modify_irte(irq, &irte); | ||
| 2120 | |||
| 2121 | if (cfg->move_in_progress) { | ||
| 2122 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | ||
| 2123 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
| 2124 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
| 2125 | cfg->move_in_progress = 0; | ||
| 2126 | } | ||
| 2127 | |||
| 2128 | desc->affinity = mask; | ||
| 2129 | } | ||
| 2130 | |||
| 2131 | static int migrate_irq_remapped_level(int irq) | ||
| 2132 | { | ||
| 2133 | int ret = -1; | ||
| 2134 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 2135 | |||
| 2136 | mask_IO_APIC_irq(irq); | ||
| 2137 | |||
| 2138 | if (io_apic_level_ack_pending(irq)) { | ||
| 2139 | /* | ||
| 2140 | * Interrupt in progress. Migrating irq now will change the | ||
| 2141 | * vector information in the IO-APIC RTE and that will confuse | ||
| 2142 | * the EOI broadcast performed by cpu. | ||
| 2143 | * So, delay the irq migration to the next instance. | ||
| 2144 | */ | ||
| 2145 | schedule_delayed_work(&ir_migration_work, 1); | ||
| 2146 | goto unmask; | ||
| 2147 | } | ||
| 2148 | |||
| 2149 | /* everthing is clear. we have right of way */ | ||
| 2150 | migrate_ioapic_irq(irq, desc->pending_mask); | ||
| 2151 | |||
| 2152 | ret = 0; | ||
| 2153 | desc->status &= ~IRQ_MOVE_PENDING; | ||
| 2154 | cpus_clear(desc->pending_mask); | ||
| 2155 | |||
| 2156 | unmask: | ||
| 2157 | unmask_IO_APIC_irq(irq); | ||
| 2158 | return ret; | ||
| 2159 | } | ||
| 2160 | |||
| 2161 | static void ir_irq_migration(struct work_struct *work) | ||
| 2162 | { | ||
| 2163 | unsigned int irq; | ||
| 2164 | struct irq_desc *desc; | ||
| 2165 | |||
| 2166 | for_each_irq_desc(irq, desc) { | ||
| 2167 | if (desc->status & IRQ_MOVE_PENDING) { | ||
| 2168 | unsigned long flags; | ||
| 2169 | |||
| 2170 | spin_lock_irqsave(&desc->lock, flags); | ||
| 2171 | if (!desc->chip->set_affinity || | ||
| 2172 | !(desc->status & IRQ_MOVE_PENDING)) { | ||
| 2173 | desc->status &= ~IRQ_MOVE_PENDING; | ||
| 2174 | spin_unlock_irqrestore(&desc->lock, flags); | ||
| 2175 | continue; | ||
| 2176 | } | ||
| 2177 | |||
| 2178 | desc->chip->set_affinity(irq, desc->pending_mask); | ||
| 2179 | spin_unlock_irqrestore(&desc->lock, flags); | ||
| 2180 | } | ||
| 2181 | } | ||
| 2182 | } | ||
| 2183 | |||
| 2184 | /* | ||
| 2185 | * Migrates the IRQ destination in the process context. | ||
| 2186 | */ | ||
| 2187 | static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | ||
| 2188 | { | ||
| 2189 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 2190 | |||
| 2191 | if (desc->status & IRQ_LEVEL) { | ||
| 2192 | desc->status |= IRQ_MOVE_PENDING; | ||
| 2193 | desc->pending_mask = mask; | ||
| 2194 | migrate_irq_remapped_level(irq); | ||
| 2195 | return; | ||
| 2196 | } | ||
| 2197 | |||
| 2198 | migrate_ioapic_irq(irq, mask); | ||
| 2199 | } | ||
| 2200 | #endif | ||
| 2201 | |||
| 1396 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 2202 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
| 1397 | { | 2203 | { |
| 1398 | unsigned vector, me; | 2204 | unsigned vector, me; |
| 1399 | ack_APIC_irq(); | 2205 | ack_APIC_irq(); |
| 2206 | #ifdef CONFIG_X86_64 | ||
| 1400 | exit_idle(); | 2207 | exit_idle(); |
| 2208 | #endif | ||
| 1401 | irq_enter(); | 2209 | irq_enter(); |
| 1402 | 2210 | ||
| 1403 | me = smp_processor_id(); | 2211 | me = smp_processor_id(); |
| @@ -1406,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
| 1406 | struct irq_desc *desc; | 2214 | struct irq_desc *desc; |
| 1407 | struct irq_cfg *cfg; | 2215 | struct irq_cfg *cfg; |
| 1408 | irq = __get_cpu_var(vector_irq)[vector]; | 2216 | irq = __get_cpu_var(vector_irq)[vector]; |
| 1409 | if (irq >= NR_IRQS) | 2217 | |
| 2218 | desc = irq_to_desc(irq); | ||
| 2219 | if (!desc) | ||
| 1410 | continue; | 2220 | continue; |
| 1411 | 2221 | ||
| 1412 | desc = irq_desc + irq; | 2222 | cfg = irq_cfg(irq); |
| 1413 | cfg = irq_cfg + irq; | ||
| 1414 | spin_lock(&desc->lock); | 2223 | spin_lock(&desc->lock); |
| 1415 | if (!cfg->move_cleanup_count) | 2224 | if (!cfg->move_cleanup_count) |
| 1416 | goto unlock; | 2225 | goto unlock; |
| @@ -1429,7 +2238,7 @@ unlock: | |||
| 1429 | 2238 | ||
| 1430 | static void irq_complete_move(unsigned int irq) | 2239 | static void irq_complete_move(unsigned int irq) |
| 1431 | { | 2240 | { |
| 1432 | struct irq_cfg *cfg = irq_cfg + irq; | 2241 | struct irq_cfg *cfg = irq_cfg(irq); |
| 1433 | unsigned vector, me; | 2242 | unsigned vector, me; |
| 1434 | 2243 | ||
| 1435 | if (likely(!cfg->move_in_progress)) | 2244 | if (likely(!cfg->move_in_progress)) |
| @@ -1449,6 +2258,17 @@ static void irq_complete_move(unsigned int irq) | |||
| 1449 | #else | 2258 | #else |
| 1450 | static inline void irq_complete_move(unsigned int irq) {} | 2259 | static inline void irq_complete_move(unsigned int irq) {} |
| 1451 | #endif | 2260 | #endif |
| 2261 | #ifdef CONFIG_INTR_REMAP | ||
| 2262 | static void ack_x2apic_level(unsigned int irq) | ||
| 2263 | { | ||
| 2264 | ack_x2APIC_irq(); | ||
| 2265 | } | ||
| 2266 | |||
| 2267 | static void ack_x2apic_edge(unsigned int irq) | ||
| 2268 | { | ||
| 2269 | ack_x2APIC_irq(); | ||
| 2270 | } | ||
| 2271 | #endif | ||
| 1452 | 2272 | ||
| 1453 | static void ack_apic_edge(unsigned int irq) | 2273 | static void ack_apic_edge(unsigned int irq) |
| 1454 | { | 2274 | { |
| @@ -1457,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq) | |||
| 1457 | ack_APIC_irq(); | 2277 | ack_APIC_irq(); |
| 1458 | } | 2278 | } |
| 1459 | 2279 | ||
| 2280 | atomic_t irq_mis_count; | ||
| 2281 | |||
| 1460 | static void ack_apic_level(unsigned int irq) | 2282 | static void ack_apic_level(unsigned int irq) |
| 1461 | { | 2283 | { |
| 2284 | #ifdef CONFIG_X86_32 | ||
| 2285 | unsigned long v; | ||
| 2286 | int i; | ||
| 2287 | #endif | ||
| 1462 | int do_unmask_irq = 0; | 2288 | int do_unmask_irq = 0; |
| 1463 | 2289 | ||
| 1464 | irq_complete_move(irq); | 2290 | irq_complete_move(irq); |
| 1465 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2291 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 1466 | /* If we are moving the irq we need to mask it */ | 2292 | /* If we are moving the irq we need to mask it */ |
| 1467 | if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { | 2293 | if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { |
| 1468 | do_unmask_irq = 1; | 2294 | do_unmask_irq = 1; |
| 1469 | mask_IO_APIC_irq(irq); | 2295 | mask_IO_APIC_irq(irq); |
| 1470 | } | 2296 | } |
| 1471 | #endif | 2297 | #endif |
| 1472 | 2298 | ||
| 2299 | #ifdef CONFIG_X86_32 | ||
| 2300 | /* | ||
| 2301 | * It appears there is an erratum which affects at least version 0x11 | ||
| 2302 | * of I/O APIC (that's the 82093AA and cores integrated into various | ||
| 2303 | * chipsets). Under certain conditions a level-triggered interrupt is | ||
| 2304 | * erroneously delivered as edge-triggered one but the respective IRR | ||
| 2305 | * bit gets set nevertheless. As a result the I/O unit expects an EOI | ||
| 2306 | * message but it will never arrive and further interrupts are blocked | ||
| 2307 | * from the source. The exact reason is so far unknown, but the | ||
| 2308 | * phenomenon was observed when two consecutive interrupt requests | ||
| 2309 | * from a given source get delivered to the same CPU and the source is | ||
| 2310 | * temporarily disabled in between. | ||
| 2311 | * | ||
| 2312 | * A workaround is to simulate an EOI message manually. We achieve it | ||
| 2313 | * by setting the trigger mode to edge and then to level when the edge | ||
| 2314 | * trigger mode gets detected in the TMR of a local APIC for a | ||
| 2315 | * level-triggered interrupt. We mask the source for the time of the | ||
| 2316 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | ||
| 2317 | * The idea is from Manfred Spraul. --macro | ||
| 2318 | */ | ||
| 2319 | i = irq_cfg(irq)->vector; | ||
| 2320 | |||
| 2321 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | ||
| 2322 | #endif | ||
| 2323 | |||
| 1473 | /* | 2324 | /* |
| 1474 | * We must acknowledge the irq before we move it or the acknowledge will | 2325 | * We must acknowledge the irq before we move it or the acknowledge will |
| 1475 | * not propagate properly. | 2326 | * not propagate properly. |
| @@ -1508,24 +2359,51 @@ static void ack_apic_level(unsigned int irq) | |||
| 1508 | move_masked_irq(irq); | 2359 | move_masked_irq(irq); |
| 1509 | unmask_IO_APIC_irq(irq); | 2360 | unmask_IO_APIC_irq(irq); |
| 1510 | } | 2361 | } |
| 2362 | |||
| 2363 | #ifdef CONFIG_X86_32 | ||
| 2364 | if (!(v & (1 << (i & 0x1f)))) { | ||
| 2365 | atomic_inc(&irq_mis_count); | ||
| 2366 | spin_lock(&ioapic_lock); | ||
| 2367 | __mask_and_edge_IO_APIC_irq(irq); | ||
| 2368 | __unmask_and_level_IO_APIC_irq(irq); | ||
| 2369 | spin_unlock(&ioapic_lock); | ||
| 2370 | } | ||
| 2371 | #endif | ||
| 1511 | } | 2372 | } |
| 1512 | 2373 | ||
| 1513 | static struct irq_chip ioapic_chip __read_mostly = { | 2374 | static struct irq_chip ioapic_chip __read_mostly = { |
| 1514 | .name = "IO-APIC", | 2375 | .name = "IO-APIC", |
| 1515 | .startup = startup_ioapic_irq, | 2376 | .startup = startup_ioapic_irq, |
| 1516 | .mask = mask_IO_APIC_irq, | 2377 | .mask = mask_IO_APIC_irq, |
| 1517 | .unmask = unmask_IO_APIC_irq, | 2378 | .unmask = unmask_IO_APIC_irq, |
| 1518 | .ack = ack_apic_edge, | 2379 | .ack = ack_apic_edge, |
| 1519 | .eoi = ack_apic_level, | 2380 | .eoi = ack_apic_level, |
| 1520 | #ifdef CONFIG_SMP | 2381 | #ifdef CONFIG_SMP |
| 1521 | .set_affinity = set_ioapic_affinity_irq, | 2382 | .set_affinity = set_ioapic_affinity_irq, |
| 1522 | #endif | 2383 | #endif |
| 1523 | .retrigger = ioapic_retrigger_irq, | 2384 | .retrigger = ioapic_retrigger_irq, |
| 1524 | }; | 2385 | }; |
| 1525 | 2386 | ||
| 2387 | #ifdef CONFIG_INTR_REMAP | ||
| 2388 | static struct irq_chip ir_ioapic_chip __read_mostly = { | ||
| 2389 | .name = "IR-IO-APIC", | ||
| 2390 | .startup = startup_ioapic_irq, | ||
| 2391 | .mask = mask_IO_APIC_irq, | ||
| 2392 | .unmask = unmask_IO_APIC_irq, | ||
| 2393 | .ack = ack_x2apic_edge, | ||
| 2394 | .eoi = ack_x2apic_level, | ||
| 2395 | #ifdef CONFIG_SMP | ||
| 2396 | .set_affinity = set_ir_ioapic_affinity_irq, | ||
| 2397 | #endif | ||
| 2398 | .retrigger = ioapic_retrigger_irq, | ||
| 2399 | }; | ||
| 2400 | #endif | ||
| 2401 | |||
| 1526 | static inline void init_IO_APIC_traps(void) | 2402 | static inline void init_IO_APIC_traps(void) |
| 1527 | { | 2403 | { |
| 1528 | int irq; | 2404 | int irq; |
| 2405 | struct irq_desc *desc; | ||
| 2406 | struct irq_cfg *cfg; | ||
| 1529 | 2407 | ||
| 1530 | /* | 2408 | /* |
| 1531 | * NOTE! The local APIC isn't very good at handling | 2409 | * NOTE! The local APIC isn't very good at handling |
| @@ -1538,8 +2416,8 @@ static inline void init_IO_APIC_traps(void) | |||
| 1538 | * Also, we've got to be careful not to trash gate | 2416 | * Also, we've got to be careful not to trash gate |
| 1539 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 2417 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
| 1540 | */ | 2418 | */ |
| 1541 | for (irq = 0; irq < NR_IRQS ; irq++) { | 2419 | for_each_irq_cfg(irq, cfg) { |
| 1542 | if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { | 2420 | if (IO_APIC_IRQ(irq) && !cfg->vector) { |
| 1543 | /* | 2421 | /* |
| 1544 | * Hmm.. We don't have an entry for this, | 2422 | * Hmm.. We don't have an entry for this, |
| 1545 | * so default to an old-fashioned 8259 | 2423 | * so default to an old-fashioned 8259 |
| @@ -1547,27 +2425,33 @@ static inline void init_IO_APIC_traps(void) | |||
| 1547 | */ | 2425 | */ |
| 1548 | if (irq < 16) | 2426 | if (irq < 16) |
| 1549 | make_8259A_irq(irq); | 2427 | make_8259A_irq(irq); |
| 1550 | else | 2428 | else { |
| 2429 | desc = irq_to_desc(irq); | ||
| 1551 | /* Strange. Oh, well.. */ | 2430 | /* Strange. Oh, well.. */ |
| 1552 | irq_desc[irq].chip = &no_irq_chip; | 2431 | desc->chip = &no_irq_chip; |
| 2432 | } | ||
| 1553 | } | 2433 | } |
| 1554 | } | 2434 | } |
| 1555 | } | 2435 | } |
| 1556 | 2436 | ||
| 1557 | static void unmask_lapic_irq(unsigned int irq) | 2437 | /* |
| 2438 | * The local APIC irq-chip implementation: | ||
| 2439 | */ | ||
| 2440 | |||
| 2441 | static void mask_lapic_irq(unsigned int irq) | ||
| 1558 | { | 2442 | { |
| 1559 | unsigned long v; | 2443 | unsigned long v; |
| 1560 | 2444 | ||
| 1561 | v = apic_read(APIC_LVT0); | 2445 | v = apic_read(APIC_LVT0); |
| 1562 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2446 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
| 1563 | } | 2447 | } |
| 1564 | 2448 | ||
| 1565 | static void mask_lapic_irq(unsigned int irq) | 2449 | static void unmask_lapic_irq(unsigned int irq) |
| 1566 | { | 2450 | { |
| 1567 | unsigned long v; | 2451 | unsigned long v; |
| 1568 | 2452 | ||
| 1569 | v = apic_read(APIC_LVT0); | 2453 | v = apic_read(APIC_LVT0); |
| 1570 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); | 2454 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
| 1571 | } | 2455 | } |
| 1572 | 2456 | ||
| 1573 | static void ack_lapic_irq (unsigned int irq) | 2457 | static void ack_lapic_irq (unsigned int irq) |
| @@ -1584,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
| 1584 | 2468 | ||
| 1585 | static void lapic_register_intr(int irq) | 2469 | static void lapic_register_intr(int irq) |
| 1586 | { | 2470 | { |
| 1587 | irq_desc[irq].status &= ~IRQ_LEVEL; | 2471 | struct irq_desc *desc; |
| 2472 | |||
| 2473 | desc = irq_to_desc(irq); | ||
| 2474 | desc->status &= ~IRQ_LEVEL; | ||
| 1588 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | 2475 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, |
| 1589 | "edge"); | 2476 | "edge"); |
| 1590 | } | 2477 | } |
| @@ -1592,19 +2479,19 @@ static void lapic_register_intr(int irq) | |||
| 1592 | static void __init setup_nmi(void) | 2479 | static void __init setup_nmi(void) |
| 1593 | { | 2480 | { |
| 1594 | /* | 2481 | /* |
| 1595 | * Dirty trick to enable the NMI watchdog ... | 2482 | * Dirty trick to enable the NMI watchdog ... |
| 1596 | * We put the 8259A master into AEOI mode and | 2483 | * We put the 8259A master into AEOI mode and |
| 1597 | * unmask on all local APICs LVT0 as NMI. | 2484 | * unmask on all local APICs LVT0 as NMI. |
| 1598 | * | 2485 | * |
| 1599 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | 2486 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') |
| 1600 | * is from Maciej W. Rozycki - so we do not have to EOI from | 2487 | * is from Maciej W. Rozycki - so we do not have to EOI from |
| 1601 | * the NMI handler or the timer interrupt. | 2488 | * the NMI handler or the timer interrupt. |
| 1602 | */ | 2489 | */ |
| 1603 | printk(KERN_INFO "activating NMI Watchdog ..."); | 2490 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); |
| 1604 | 2491 | ||
| 1605 | enable_NMI_through_LVT0(); | 2492 | enable_NMI_through_LVT0(); |
| 1606 | 2493 | ||
| 1607 | printk(" done.\n"); | 2494 | apic_printk(APIC_VERBOSE, " done.\n"); |
| 1608 | } | 2495 | } |
| 1609 | 2496 | ||
| 1610 | /* | 2497 | /* |
| @@ -1621,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void) | |||
| 1621 | unsigned char save_control, save_freq_select; | 2508 | unsigned char save_control, save_freq_select; |
| 1622 | 2509 | ||
| 1623 | pin = find_isa_irq_pin(8, mp_INT); | 2510 | pin = find_isa_irq_pin(8, mp_INT); |
| 2511 | if (pin == -1) { | ||
| 2512 | WARN_ON_ONCE(1); | ||
| 2513 | return; | ||
| 2514 | } | ||
| 1624 | apic = find_isa_irq_apic(8, mp_INT); | 2515 | apic = find_isa_irq_apic(8, mp_INT); |
| 1625 | if (pin == -1) | 2516 | if (apic == -1) { |
| 2517 | WARN_ON_ONCE(1); | ||
| 1626 | return; | 2518 | return; |
| 2519 | } | ||
| 1627 | 2520 | ||
| 1628 | entry0 = ioapic_read_entry(apic, pin); | 2521 | entry0 = ioapic_read_entry(apic, pin); |
| 1629 | |||
| 1630 | clear_IO_APIC_pin(apic, pin); | 2522 | clear_IO_APIC_pin(apic, pin); |
| 1631 | 2523 | ||
| 1632 | memset(&entry1, 0, sizeof(entry1)); | 2524 | memset(&entry1, 0, sizeof(entry1)); |
| @@ -1661,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void) | |||
| 1661 | ioapic_write_entry(apic, pin, entry0); | 2553 | ioapic_write_entry(apic, pin, entry0); |
| 1662 | } | 2554 | } |
| 1663 | 2555 | ||
| 2556 | static int disable_timer_pin_1 __initdata; | ||
| 2557 | /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ | ||
| 2558 | static int __init disable_timer_pin_setup(char *arg) | ||
| 2559 | { | ||
| 2560 | disable_timer_pin_1 = 1; | ||
| 2561 | return 0; | ||
| 2562 | } | ||
| 2563 | early_param("disable_timer_pin_1", disable_timer_pin_setup); | ||
| 2564 | |||
| 2565 | int timer_through_8259 __initdata; | ||
| 2566 | |||
| 1664 | /* | 2567 | /* |
| 1665 | * This code may look a bit paranoid, but it's supposed to cooperate with | 2568 | * This code may look a bit paranoid, but it's supposed to cooperate with |
| 1666 | * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ | 2569 | * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ |
| 1667 | * is so screwy. Thanks to Brian Perkins for testing/hacking this beast | 2570 | * is so screwy. Thanks to Brian Perkins for testing/hacking this beast |
| 1668 | * fanatically on his truly buggy board. | 2571 | * fanatically on his truly buggy board. |
| 1669 | * | 2572 | * |
| 1670 | * FIXME: really need to revamp this for modern platforms only. | 2573 | * FIXME: really need to revamp this for all platforms. |
| 1671 | */ | 2574 | */ |
| 1672 | static inline void __init check_timer(void) | 2575 | static inline void __init check_timer(void) |
| 1673 | { | 2576 | { |
| 1674 | struct irq_cfg *cfg = irq_cfg + 0; | 2577 | struct irq_cfg *cfg = irq_cfg(0); |
| 1675 | int apic1, pin1, apic2, pin2; | 2578 | int apic1, pin1, apic2, pin2; |
| 1676 | unsigned long flags; | 2579 | unsigned long flags; |
| 2580 | unsigned int ver; | ||
| 1677 | int no_pin1 = 0; | 2581 | int no_pin1 = 0; |
| 1678 | 2582 | ||
| 1679 | local_irq_save(flags); | 2583 | local_irq_save(flags); |
| 1680 | 2584 | ||
| 2585 | ver = apic_read(APIC_LVR); | ||
| 2586 | ver = GET_APIC_VERSION(ver); | ||
| 2587 | |||
| 1681 | /* | 2588 | /* |
| 1682 | * get/set the timer IRQ vector: | 2589 | * get/set the timer IRQ vector: |
| 1683 | */ | 2590 | */ |
| @@ -1686,18 +2593,27 @@ static inline void __init check_timer(void) | |||
| 1686 | 2593 | ||
| 1687 | /* | 2594 | /* |
| 1688 | * As IRQ0 is to be enabled in the 8259A, the virtual | 2595 | * As IRQ0 is to be enabled in the 8259A, the virtual |
| 1689 | * wire has to be disabled in the local APIC. | 2596 | * wire has to be disabled in the local APIC. Also |
| 2597 | * timer interrupts need to be acknowledged manually in | ||
| 2598 | * the 8259A for the i82489DX when using the NMI | ||
| 2599 | * watchdog as that APIC treats NMIs as level-triggered. | ||
| 2600 | * The AEOI mode will finish them in the 8259A | ||
| 2601 | * automatically. | ||
| 1690 | */ | 2602 | */ |
| 1691 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2603 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
| 1692 | init_8259A(1); | 2604 | init_8259A(1); |
| 2605 | #ifdef CONFIG_X86_32 | ||
| 2606 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
| 2607 | #endif | ||
| 1693 | 2608 | ||
| 1694 | pin1 = find_isa_irq_pin(0, mp_INT); | 2609 | pin1 = find_isa_irq_pin(0, mp_INT); |
| 1695 | apic1 = find_isa_irq_apic(0, mp_INT); | 2610 | apic1 = find_isa_irq_apic(0, mp_INT); |
| 1696 | pin2 = ioapic_i8259.pin; | 2611 | pin2 = ioapic_i8259.pin; |
| 1697 | apic2 = ioapic_i8259.apic; | 2612 | apic2 = ioapic_i8259.apic; |
| 1698 | 2613 | ||
| 1699 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | 2614 | apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " |
| 1700 | cfg->vector, apic1, pin1, apic2, pin2); | 2615 | "apic1=%d pin1=%d apic2=%d pin2=%d\n", |
| 2616 | cfg->vector, apic1, pin1, apic2, pin2); | ||
| 1701 | 2617 | ||
| 1702 | /* | 2618 | /* |
| 1703 | * Some BIOS writers are clueless and report the ExtINTA | 2619 | * Some BIOS writers are clueless and report the ExtINTA |
| @@ -1707,6 +2623,10 @@ static inline void __init check_timer(void) | |||
| 1707 | * 8259A. | 2623 | * 8259A. |
| 1708 | */ | 2624 | */ |
| 1709 | if (pin1 == -1) { | 2625 | if (pin1 == -1) { |
| 2626 | #ifdef CONFIG_INTR_REMAP | ||
| 2627 | if (intr_remapping_enabled) | ||
| 2628 | panic("BIOS bug: timer not connected to IO-APIC"); | ||
| 2629 | #endif | ||
| 1710 | pin1 = pin2; | 2630 | pin1 = pin2; |
| 1711 | apic1 = apic2; | 2631 | apic1 = apic2; |
| 1712 | no_pin1 = 1; | 2632 | no_pin1 = 1; |
| @@ -1724,7 +2644,7 @@ static inline void __init check_timer(void) | |||
| 1724 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2644 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
| 1725 | } | 2645 | } |
| 1726 | unmask_IO_APIC_irq(0); | 2646 | unmask_IO_APIC_irq(0); |
| 1727 | if (!no_timer_check && timer_irq_works()) { | 2647 | if (timer_irq_works()) { |
| 1728 | if (nmi_watchdog == NMI_IO_APIC) { | 2648 | if (nmi_watchdog == NMI_IO_APIC) { |
| 1729 | setup_nmi(); | 2649 | setup_nmi(); |
| 1730 | enable_8259A_irq(0); | 2650 | enable_8259A_irq(0); |
| @@ -1733,16 +2653,19 @@ static inline void __init check_timer(void) | |||
| 1733 | clear_IO_APIC_pin(0, pin1); | 2653 | clear_IO_APIC_pin(0, pin1); |
| 1734 | goto out; | 2654 | goto out; |
| 1735 | } | 2655 | } |
| 2656 | #ifdef CONFIG_INTR_REMAP | ||
| 2657 | if (intr_remapping_enabled) | ||
| 2658 | panic("timer doesn't work through Interrupt-remapped IO-APIC"); | ||
| 2659 | #endif | ||
| 1736 | clear_IO_APIC_pin(apic1, pin1); | 2660 | clear_IO_APIC_pin(apic1, pin1); |
| 1737 | if (!no_pin1) | 2661 | if (!no_pin1) |
| 1738 | apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " | 2662 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
| 1739 | "8254 timer not connected to IO-APIC\n"); | 2663 | "8254 timer not connected to IO-APIC\n"); |
| 1740 | 2664 | ||
| 1741 | apic_printk(APIC_VERBOSE,KERN_INFO | 2665 | apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " |
| 1742 | "...trying to set up timer (IRQ0) " | 2666 | "(IRQ0) through the 8259A ...\n"); |
| 1743 | "through the 8259A ... "); | 2667 | apic_printk(APIC_QUIET, KERN_INFO |
| 1744 | apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", | 2668 | "..... (found apic %d pin %d) ...\n", apic2, pin2); |
| 1745 | apic2, pin2); | ||
| 1746 | /* | 2669 | /* |
| 1747 | * legacy devices should be connected to IO APIC #0 | 2670 | * legacy devices should be connected to IO APIC #0 |
| 1748 | */ | 2671 | */ |
| @@ -1751,7 +2674,7 @@ static inline void __init check_timer(void) | |||
| 1751 | unmask_IO_APIC_irq(0); | 2674 | unmask_IO_APIC_irq(0); |
| 1752 | enable_8259A_irq(0); | 2675 | enable_8259A_irq(0); |
| 1753 | if (timer_irq_works()) { | 2676 | if (timer_irq_works()) { |
| 1754 | apic_printk(APIC_VERBOSE," works.\n"); | 2677 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
| 1755 | timer_through_8259 = 1; | 2678 | timer_through_8259 = 1; |
| 1756 | if (nmi_watchdog == NMI_IO_APIC) { | 2679 | if (nmi_watchdog == NMI_IO_APIC) { |
| 1757 | disable_8259A_irq(0); | 2680 | disable_8259A_irq(0); |
| @@ -1765,29 +2688,35 @@ static inline void __init check_timer(void) | |||
| 1765 | */ | 2688 | */ |
| 1766 | disable_8259A_irq(0); | 2689 | disable_8259A_irq(0); |
| 1767 | clear_IO_APIC_pin(apic2, pin2); | 2690 | clear_IO_APIC_pin(apic2, pin2); |
| 1768 | apic_printk(APIC_VERBOSE," failed.\n"); | 2691 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
| 1769 | } | 2692 | } |
| 1770 | 2693 | ||
| 1771 | if (nmi_watchdog == NMI_IO_APIC) { | 2694 | if (nmi_watchdog == NMI_IO_APIC) { |
| 1772 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | 2695 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " |
| 2696 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
| 1773 | nmi_watchdog = NMI_NONE; | 2697 | nmi_watchdog = NMI_NONE; |
| 1774 | } | 2698 | } |
| 2699 | #ifdef CONFIG_X86_32 | ||
| 2700 | timer_ack = 0; | ||
| 2701 | #endif | ||
| 1775 | 2702 | ||
| 1776 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 2703 | apic_printk(APIC_QUIET, KERN_INFO |
| 2704 | "...trying to set up timer as Virtual Wire IRQ...\n"); | ||
| 1777 | 2705 | ||
| 1778 | lapic_register_intr(0); | 2706 | lapic_register_intr(0); |
| 1779 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 2707 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
| 1780 | enable_8259A_irq(0); | 2708 | enable_8259A_irq(0); |
| 1781 | 2709 | ||
| 1782 | if (timer_irq_works()) { | 2710 | if (timer_irq_works()) { |
| 1783 | apic_printk(APIC_VERBOSE," works.\n"); | 2711 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
| 1784 | goto out; | 2712 | goto out; |
| 1785 | } | 2713 | } |
| 1786 | disable_8259A_irq(0); | 2714 | disable_8259A_irq(0); |
| 1787 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 2715 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
| 1788 | apic_printk(APIC_VERBOSE," failed.\n"); | 2716 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
| 1789 | 2717 | ||
| 1790 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | 2718 | apic_printk(APIC_QUIET, KERN_INFO |
| 2719 | "...trying to set up timer as ExtINT IRQ...\n"); | ||
| 1791 | 2720 | ||
| 1792 | init_8259A(0); | 2721 | init_8259A(0); |
| 1793 | make_8259A_irq(0); | 2722 | make_8259A_irq(0); |
| @@ -1796,22 +2725,16 @@ static inline void __init check_timer(void) | |||
| 1796 | unlock_ExtINT_logic(); | 2725 | unlock_ExtINT_logic(); |
| 1797 | 2726 | ||
| 1798 | if (timer_irq_works()) { | 2727 | if (timer_irq_works()) { |
| 1799 | apic_printk(APIC_VERBOSE," works.\n"); | 2728 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
| 1800 | goto out; | 2729 | goto out; |
| 1801 | } | 2730 | } |
| 1802 | apic_printk(APIC_VERBOSE," failed :(.\n"); | 2731 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
| 1803 | panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); | 2732 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
| 2733 | "report. Then try booting with the 'noapic' option.\n"); | ||
| 1804 | out: | 2734 | out: |
| 1805 | local_irq_restore(flags); | 2735 | local_irq_restore(flags); |
| 1806 | } | 2736 | } |
| 1807 | 2737 | ||
| 1808 | static int __init notimercheck(char *s) | ||
| 1809 | { | ||
| 1810 | no_timer_check = 1; | ||
| 1811 | return 1; | ||
| 1812 | } | ||
| 1813 | __setup("no_timer_check", notimercheck); | ||
| 1814 | |||
| 1815 | /* | 2738 | /* |
| 1816 | * Traditionally ISA IRQ2 is the cascade IRQ, and is not available | 2739 | * Traditionally ISA IRQ2 is the cascade IRQ, and is not available |
| 1817 | * to devices. However there may be an I/O APIC pin available for | 2740 | * to devices. However there may be an I/O APIC pin available for |
| @@ -1829,27 +2752,49 @@ __setup("no_timer_check", notimercheck); | |||
| 1829 | * the I/O APIC in all cases now. No actual device should request | 2752 | * the I/O APIC in all cases now. No actual device should request |
| 1830 | * it anyway. --macro | 2753 | * it anyway. --macro |
| 1831 | */ | 2754 | */ |
| 1832 | #define PIC_IRQS (1<<2) | 2755 | #define PIC_IRQS (1 << PIC_CASCADE_IR) |
| 1833 | 2756 | ||
| 1834 | void __init setup_IO_APIC(void) | 2757 | void __init setup_IO_APIC(void) |
| 1835 | { | 2758 | { |
| 1836 | 2759 | ||
| 2760 | #ifdef CONFIG_X86_32 | ||
| 2761 | enable_IO_APIC(); | ||
| 2762 | #else | ||
| 1837 | /* | 2763 | /* |
| 1838 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 2764 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP |
| 1839 | */ | 2765 | */ |
| 2766 | #endif | ||
| 1840 | 2767 | ||
| 1841 | io_apic_irqs = ~PIC_IRQS; | 2768 | io_apic_irqs = ~PIC_IRQS; |
| 1842 | 2769 | ||
| 1843 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 2770 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
| 1844 | 2771 | /* | |
| 2772 | * Set up IO-APIC IRQ routing. | ||
| 2773 | */ | ||
| 2774 | #ifdef CONFIG_X86_32 | ||
| 2775 | if (!acpi_ioapic) | ||
| 2776 | setup_ioapic_ids_from_mpc(); | ||
| 2777 | #endif | ||
| 1845 | sync_Arb_IDs(); | 2778 | sync_Arb_IDs(); |
| 1846 | setup_IO_APIC_irqs(); | 2779 | setup_IO_APIC_irqs(); |
| 1847 | init_IO_APIC_traps(); | 2780 | init_IO_APIC_traps(); |
| 1848 | check_timer(); | 2781 | check_timer(); |
| 1849 | if (!acpi_ioapic) | ||
| 1850 | print_IO_APIC(); | ||
| 1851 | } | 2782 | } |
| 1852 | 2783 | ||
| 2784 | /* | ||
| 2785 | * Called after all the initialization is done. If we didnt find any | ||
| 2786 | * APIC bugs then we can allow the modify fast path | ||
| 2787 | */ | ||
| 2788 | |||
| 2789 | static int __init io_apic_bug_finalize(void) | ||
| 2790 | { | ||
| 2791 | if (sis_apic_bug == -1) | ||
| 2792 | sis_apic_bug = 0; | ||
| 2793 | return 0; | ||
| 2794 | } | ||
| 2795 | |||
| 2796 | late_initcall(io_apic_bug_finalize); | ||
| 2797 | |||
| 1853 | struct sysfs_ioapic_data { | 2798 | struct sysfs_ioapic_data { |
| 1854 | struct sys_device dev; | 2799 | struct sys_device dev; |
| 1855 | struct IO_APIC_route_entry entry[0]; | 2800 | struct IO_APIC_route_entry entry[0]; |
| @@ -1937,38 +2882,60 @@ device_initcall(ioapic_init_sysfs); | |||
| 1937 | /* | 2882 | /* |
| 1938 | * Dynamic irq allocate and deallocation | 2883 | * Dynamic irq allocate and deallocation |
| 1939 | */ | 2884 | */ |
| 1940 | int create_irq(void) | 2885 | unsigned int create_irq_nr(unsigned int irq_want) |
| 1941 | { | 2886 | { |
| 1942 | /* Allocate an unused irq */ | 2887 | /* Allocate an unused irq */ |
| 1943 | int irq; | 2888 | unsigned int irq; |
| 1944 | int new; | 2889 | unsigned int new; |
| 1945 | unsigned long flags; | 2890 | unsigned long flags; |
| 2891 | struct irq_cfg *cfg_new; | ||
| 2892 | |||
| 2893 | irq_want = nr_irqs - 1; | ||
| 1946 | 2894 | ||
| 1947 | irq = -ENOSPC; | 2895 | irq = 0; |
| 1948 | spin_lock_irqsave(&vector_lock, flags); | 2896 | spin_lock_irqsave(&vector_lock, flags); |
| 1949 | for (new = (NR_IRQS - 1); new >= 0; new--) { | 2897 | for (new = irq_want; new > 0; new--) { |
| 1950 | if (platform_legacy_irq(new)) | 2898 | if (platform_legacy_irq(new)) |
| 1951 | continue; | 2899 | continue; |
| 1952 | if (irq_cfg[new].vector != 0) | 2900 | cfg_new = irq_cfg(new); |
| 2901 | if (cfg_new && cfg_new->vector != 0) | ||
| 1953 | continue; | 2902 | continue; |
| 2903 | /* check if need to create one */ | ||
| 2904 | if (!cfg_new) | ||
| 2905 | cfg_new = irq_cfg_alloc(new); | ||
| 1954 | if (__assign_irq_vector(new, TARGET_CPUS) == 0) | 2906 | if (__assign_irq_vector(new, TARGET_CPUS) == 0) |
| 1955 | irq = new; | 2907 | irq = new; |
| 1956 | break; | 2908 | break; |
| 1957 | } | 2909 | } |
| 1958 | spin_unlock_irqrestore(&vector_lock, flags); | 2910 | spin_unlock_irqrestore(&vector_lock, flags); |
| 1959 | 2911 | ||
| 1960 | if (irq >= 0) { | 2912 | if (irq > 0) { |
| 1961 | dynamic_irq_init(irq); | 2913 | dynamic_irq_init(irq); |
| 1962 | } | 2914 | } |
| 1963 | return irq; | 2915 | return irq; |
| 1964 | } | 2916 | } |
| 1965 | 2917 | ||
| 2918 | int create_irq(void) | ||
| 2919 | { | ||
| 2920 | int irq; | ||
| 2921 | |||
| 2922 | irq = create_irq_nr(nr_irqs - 1); | ||
| 2923 | |||
| 2924 | if (irq == 0) | ||
| 2925 | irq = -1; | ||
| 2926 | |||
| 2927 | return irq; | ||
| 2928 | } | ||
| 2929 | |||
| 1966 | void destroy_irq(unsigned int irq) | 2930 | void destroy_irq(unsigned int irq) |
| 1967 | { | 2931 | { |
| 1968 | unsigned long flags; | 2932 | unsigned long flags; |
| 1969 | 2933 | ||
| 1970 | dynamic_irq_cleanup(irq); | 2934 | dynamic_irq_cleanup(irq); |
| 1971 | 2935 | ||
| 2936 | #ifdef CONFIG_INTR_REMAP | ||
| 2937 | free_irte(irq); | ||
| 2938 | #endif | ||
| 1972 | spin_lock_irqsave(&vector_lock, flags); | 2939 | spin_lock_irqsave(&vector_lock, flags); |
| 1973 | __clear_irq_vector(irq); | 2940 | __clear_irq_vector(irq); |
| 1974 | spin_unlock_irqrestore(&vector_lock, flags); | 2941 | spin_unlock_irqrestore(&vector_lock, flags); |
| @@ -1980,18 +2947,50 @@ void destroy_irq(unsigned int irq) | |||
| 1980 | #ifdef CONFIG_PCI_MSI | 2947 | #ifdef CONFIG_PCI_MSI |
| 1981 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) | 2948 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) |
| 1982 | { | 2949 | { |
| 1983 | struct irq_cfg *cfg = irq_cfg + irq; | 2950 | struct irq_cfg *cfg; |
| 1984 | int err; | 2951 | int err; |
| 1985 | unsigned dest; | 2952 | unsigned dest; |
| 1986 | cpumask_t tmp; | 2953 | cpumask_t tmp; |
| 1987 | 2954 | ||
| 1988 | tmp = TARGET_CPUS; | 2955 | tmp = TARGET_CPUS; |
| 1989 | err = assign_irq_vector(irq, tmp); | 2956 | err = assign_irq_vector(irq, tmp); |
| 1990 | if (!err) { | 2957 | if (err) |
| 1991 | cpus_and(tmp, cfg->domain, tmp); | 2958 | return err; |
| 1992 | dest = cpu_mask_to_apicid(tmp); | 2959 | |
| 2960 | cfg = irq_cfg(irq); | ||
| 2961 | cpus_and(tmp, cfg->domain, tmp); | ||
| 2962 | dest = cpu_mask_to_apicid(tmp); | ||
| 2963 | |||
| 2964 | #ifdef CONFIG_INTR_REMAP | ||
| 2965 | if (irq_remapped(irq)) { | ||
| 2966 | struct irte irte; | ||
| 2967 | int ir_index; | ||
| 2968 | u16 sub_handle; | ||
| 2969 | |||
| 2970 | ir_index = map_irq_to_irte_handle(irq, &sub_handle); | ||
| 2971 | BUG_ON(ir_index == -1); | ||
| 2972 | |||
| 2973 | memset (&irte, 0, sizeof(irte)); | ||
| 2974 | |||
| 2975 | irte.present = 1; | ||
| 2976 | irte.dst_mode = INT_DEST_MODE; | ||
| 2977 | irte.trigger_mode = 0; /* edge */ | ||
| 2978 | irte.dlvry_mode = INT_DELIVERY_MODE; | ||
| 2979 | irte.vector = cfg->vector; | ||
| 2980 | irte.dest_id = IRTE_DEST(dest); | ||
| 2981 | |||
| 2982 | modify_irte(irq, &irte); | ||
| 1993 | 2983 | ||
| 1994 | msg->address_hi = MSI_ADDR_BASE_HI; | 2984 | msg->address_hi = MSI_ADDR_BASE_HI; |
| 2985 | msg->data = sub_handle; | ||
| 2986 | msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | | ||
| 2987 | MSI_ADDR_IR_SHV | | ||
| 2988 | MSI_ADDR_IR_INDEX1(ir_index) | | ||
| 2989 | MSI_ADDR_IR_INDEX2(ir_index); | ||
| 2990 | } else | ||
| 2991 | #endif | ||
| 2992 | { | ||
| 2993 | msg->address_hi = MSI_ADDR_BASE_HI; | ||
| 1995 | msg->address_lo = | 2994 | msg->address_lo = |
| 1996 | MSI_ADDR_BASE_LO | | 2995 | MSI_ADDR_BASE_LO | |
| 1997 | ((INT_DEST_MODE == 0) ? | 2996 | ((INT_DEST_MODE == 0) ? |
| @@ -2016,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
| 2016 | #ifdef CONFIG_SMP | 3015 | #ifdef CONFIG_SMP |
| 2017 | static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | 3016 | static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) |
| 2018 | { | 3017 | { |
| 2019 | struct irq_cfg *cfg = irq_cfg + irq; | 3018 | struct irq_cfg *cfg; |
| 2020 | struct msi_msg msg; | 3019 | struct msi_msg msg; |
| 2021 | unsigned int dest; | 3020 | unsigned int dest; |
| 2022 | cpumask_t tmp; | 3021 | cpumask_t tmp; |
| 3022 | struct irq_desc *desc; | ||
| 2023 | 3023 | ||
| 2024 | cpus_and(tmp, mask, cpu_online_map); | 3024 | cpus_and(tmp, mask, cpu_online_map); |
| 2025 | if (cpus_empty(tmp)) | 3025 | if (cpus_empty(tmp)) |
| @@ -2028,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |||
| 2028 | if (assign_irq_vector(irq, mask)) | 3028 | if (assign_irq_vector(irq, mask)) |
| 2029 | return; | 3029 | return; |
| 2030 | 3030 | ||
| 3031 | cfg = irq_cfg(irq); | ||
| 2031 | cpus_and(tmp, cfg->domain, mask); | 3032 | cpus_and(tmp, cfg->domain, mask); |
| 2032 | dest = cpu_mask_to_apicid(tmp); | 3033 | dest = cpu_mask_to_apicid(tmp); |
| 2033 | 3034 | ||
| @@ -2039,8 +3040,61 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |||
| 2039 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3040 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
| 2040 | 3041 | ||
| 2041 | write_msi_msg(irq, &msg); | 3042 | write_msi_msg(irq, &msg); |
| 2042 | irq_desc[irq].affinity = mask; | 3043 | desc = irq_to_desc(irq); |
| 3044 | desc->affinity = mask; | ||
| 2043 | } | 3045 | } |
| 3046 | |||
| 3047 | #ifdef CONFIG_INTR_REMAP | ||
| 3048 | /* | ||
| 3049 | * Migrate the MSI irq to another cpumask. This migration is | ||
| 3050 | * done in the process context using interrupt-remapping hardware. | ||
| 3051 | */ | ||
| 3052 | static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | ||
| 3053 | { | ||
| 3054 | struct irq_cfg *cfg; | ||
| 3055 | unsigned int dest; | ||
| 3056 | cpumask_t tmp, cleanup_mask; | ||
| 3057 | struct irte irte; | ||
| 3058 | struct irq_desc *desc; | ||
| 3059 | |||
| 3060 | cpus_and(tmp, mask, cpu_online_map); | ||
| 3061 | if (cpus_empty(tmp)) | ||
| 3062 | return; | ||
| 3063 | |||
| 3064 | if (get_irte(irq, &irte)) | ||
| 3065 | return; | ||
| 3066 | |||
| 3067 | if (assign_irq_vector(irq, mask)) | ||
| 3068 | return; | ||
| 3069 | |||
| 3070 | cfg = irq_cfg(irq); | ||
| 3071 | cpus_and(tmp, cfg->domain, mask); | ||
| 3072 | dest = cpu_mask_to_apicid(tmp); | ||
| 3073 | |||
| 3074 | irte.vector = cfg->vector; | ||
| 3075 | irte.dest_id = IRTE_DEST(dest); | ||
| 3076 | |||
| 3077 | /* | ||
| 3078 | * atomically update the IRTE with the new destination and vector. | ||
| 3079 | */ | ||
| 3080 | modify_irte(irq, &irte); | ||
| 3081 | |||
| 3082 | /* | ||
| 3083 | * After this point, all the interrupts will start arriving | ||
| 3084 | * at the new destination. So, time to cleanup the previous | ||
| 3085 | * vector allocation. | ||
| 3086 | */ | ||
| 3087 | if (cfg->move_in_progress) { | ||
| 3088 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | ||
| 3089 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
| 3090 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
| 3091 | cfg->move_in_progress = 0; | ||
| 3092 | } | ||
| 3093 | |||
| 3094 | desc = irq_to_desc(irq); | ||
| 3095 | desc->affinity = mask; | ||
| 3096 | } | ||
| 3097 | #endif | ||
| 2044 | #endif /* CONFIG_SMP */ | 3098 | #endif /* CONFIG_SMP */ |
| 2045 | 3099 | ||
| 2046 | /* | 3100 | /* |
| @@ -2058,26 +3112,179 @@ static struct irq_chip msi_chip = { | |||
| 2058 | .retrigger = ioapic_retrigger_irq, | 3112 | .retrigger = ioapic_retrigger_irq, |
| 2059 | }; | 3113 | }; |
| 2060 | 3114 | ||
| 2061 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | 3115 | #ifdef CONFIG_INTR_REMAP |
| 3116 | static struct irq_chip msi_ir_chip = { | ||
| 3117 | .name = "IR-PCI-MSI", | ||
| 3118 | .unmask = unmask_msi_irq, | ||
| 3119 | .mask = mask_msi_irq, | ||
| 3120 | .ack = ack_x2apic_edge, | ||
| 3121 | #ifdef CONFIG_SMP | ||
| 3122 | .set_affinity = ir_set_msi_irq_affinity, | ||
| 3123 | #endif | ||
| 3124 | .retrigger = ioapic_retrigger_irq, | ||
| 3125 | }; | ||
| 3126 | |||
| 3127 | /* | ||
| 3128 | * Map the PCI dev to the corresponding remapping hardware unit | ||
| 3129 | * and allocate 'nvec' consecutive interrupt-remapping table entries | ||
| 3130 | * in it. | ||
| 3131 | */ | ||
| 3132 | static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | ||
| 2062 | { | 3133 | { |
| 3134 | struct intel_iommu *iommu; | ||
| 3135 | int index; | ||
| 3136 | |||
| 3137 | iommu = map_dev_to_ir(dev); | ||
| 3138 | if (!iommu) { | ||
| 3139 | printk(KERN_ERR | ||
| 3140 | "Unable to map PCI %s to iommu\n", pci_name(dev)); | ||
| 3141 | return -ENOENT; | ||
| 3142 | } | ||
| 3143 | |||
| 3144 | index = alloc_irte(iommu, irq, nvec); | ||
| 3145 | if (index < 0) { | ||
| 3146 | printk(KERN_ERR | ||
| 3147 | "Unable to allocate %d IRTE for PCI %s\n", nvec, | ||
| 3148 | pci_name(dev)); | ||
| 3149 | return -ENOSPC; | ||
| 3150 | } | ||
| 3151 | return index; | ||
| 3152 | } | ||
| 3153 | #endif | ||
| 3154 | |||
| 3155 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | ||
| 3156 | { | ||
| 3157 | int ret; | ||
| 2063 | struct msi_msg msg; | 3158 | struct msi_msg msg; |
| 2064 | int irq, ret; | ||
| 2065 | irq = create_irq(); | ||
| 2066 | if (irq < 0) | ||
| 2067 | return irq; | ||
| 2068 | 3159 | ||
| 2069 | ret = msi_compose_msg(dev, irq, &msg); | 3160 | ret = msi_compose_msg(dev, irq, &msg); |
| 3161 | if (ret < 0) | ||
| 3162 | return ret; | ||
| 3163 | |||
| 3164 | set_irq_msi(irq, desc); | ||
| 3165 | write_msi_msg(irq, &msg); | ||
| 3166 | |||
| 3167 | #ifdef CONFIG_INTR_REMAP | ||
| 3168 | if (irq_remapped(irq)) { | ||
| 3169 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 3170 | /* | ||
| 3171 | * irq migration in process context | ||
| 3172 | */ | ||
| 3173 | desc->status |= IRQ_MOVE_PCNTXT; | ||
| 3174 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); | ||
| 3175 | } else | ||
| 3176 | #endif | ||
| 3177 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | ||
| 3178 | |||
| 3179 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); | ||
| 3180 | |||
| 3181 | return 0; | ||
| 3182 | } | ||
| 3183 | |||
| 3184 | static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) | ||
| 3185 | { | ||
| 3186 | unsigned int irq; | ||
| 3187 | |||
| 3188 | irq = dev->bus->number; | ||
| 3189 | irq <<= 8; | ||
| 3190 | irq |= dev->devfn; | ||
| 3191 | irq <<= 12; | ||
| 3192 | |||
| 3193 | return irq; | ||
| 3194 | } | ||
| 3195 | |||
| 3196 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | ||
| 3197 | { | ||
| 3198 | unsigned int irq; | ||
| 3199 | int ret; | ||
| 3200 | unsigned int irq_want; | ||
| 3201 | |||
| 3202 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | ||
| 3203 | |||
| 3204 | irq = create_irq_nr(irq_want); | ||
| 3205 | if (irq == 0) | ||
| 3206 | return -1; | ||
| 3207 | |||
| 3208 | #ifdef CONFIG_INTR_REMAP | ||
| 3209 | if (!intr_remapping_enabled) | ||
| 3210 | goto no_ir; | ||
| 3211 | |||
| 3212 | ret = msi_alloc_irte(dev, irq, 1); | ||
| 3213 | if (ret < 0) | ||
| 3214 | goto error; | ||
| 3215 | no_ir: | ||
| 3216 | #endif | ||
| 3217 | ret = setup_msi_irq(dev, desc, irq); | ||
| 2070 | if (ret < 0) { | 3218 | if (ret < 0) { |
| 2071 | destroy_irq(irq); | 3219 | destroy_irq(irq); |
| 2072 | return ret; | 3220 | return ret; |
| 2073 | } | 3221 | } |
| 3222 | return 0; | ||
| 2074 | 3223 | ||
| 2075 | set_irq_msi(irq, desc); | 3224 | #ifdef CONFIG_INTR_REMAP |
| 2076 | write_msi_msg(irq, &msg); | 3225 | error: |
| 3226 | destroy_irq(irq); | ||
| 3227 | return ret; | ||
| 3228 | #endif | ||
| 3229 | } | ||
| 2077 | 3230 | ||
| 2078 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | 3231 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
| 3232 | { | ||
| 3233 | unsigned int irq; | ||
| 3234 | int ret, sub_handle; | ||
| 3235 | struct msi_desc *desc; | ||
| 3236 | unsigned int irq_want; | ||
| 2079 | 3237 | ||
| 3238 | #ifdef CONFIG_INTR_REMAP | ||
| 3239 | struct intel_iommu *iommu = 0; | ||
| 3240 | int index = 0; | ||
| 3241 | #endif | ||
| 3242 | |||
| 3243 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | ||
| 3244 | sub_handle = 0; | ||
| 3245 | list_for_each_entry(desc, &dev->msi_list, list) { | ||
| 3246 | irq = create_irq_nr(irq_want--); | ||
| 3247 | if (irq == 0) | ||
| 3248 | return -1; | ||
| 3249 | #ifdef CONFIG_INTR_REMAP | ||
| 3250 | if (!intr_remapping_enabled) | ||
| 3251 | goto no_ir; | ||
| 3252 | |||
| 3253 | if (!sub_handle) { | ||
| 3254 | /* | ||
| 3255 | * allocate the consecutive block of IRTE's | ||
| 3256 | * for 'nvec' | ||
| 3257 | */ | ||
| 3258 | index = msi_alloc_irte(dev, irq, nvec); | ||
| 3259 | if (index < 0) { | ||
| 3260 | ret = index; | ||
| 3261 | goto error; | ||
| 3262 | } | ||
| 3263 | } else { | ||
| 3264 | iommu = map_dev_to_ir(dev); | ||
| 3265 | if (!iommu) { | ||
| 3266 | ret = -ENOENT; | ||
| 3267 | goto error; | ||
| 3268 | } | ||
| 3269 | /* | ||
| 3270 | * setup the mapping between the irq and the IRTE | ||
| 3271 | * base index, the sub_handle pointing to the | ||
| 3272 | * appropriate interrupt remap table entry. | ||
| 3273 | */ | ||
| 3274 | set_irte_irq(irq, iommu, index, sub_handle); | ||
| 3275 | } | ||
| 3276 | no_ir: | ||
| 3277 | #endif | ||
| 3278 | ret = setup_msi_irq(dev, desc, irq); | ||
| 3279 | if (ret < 0) | ||
| 3280 | goto error; | ||
| 3281 | sub_handle++; | ||
| 3282 | } | ||
| 2080 | return 0; | 3283 | return 0; |
| 3284 | |||
| 3285 | error: | ||
| 3286 | destroy_irq(irq); | ||
| 3287 | return ret; | ||
| 2081 | } | 3288 | } |
| 2082 | 3289 | ||
| 2083 | void arch_teardown_msi_irq(unsigned int irq) | 3290 | void arch_teardown_msi_irq(unsigned int irq) |
| @@ -2089,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
| 2089 | #ifdef CONFIG_SMP | 3296 | #ifdef CONFIG_SMP |
| 2090 | static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | 3297 | static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) |
| 2091 | { | 3298 | { |
| 2092 | struct irq_cfg *cfg = irq_cfg + irq; | 3299 | struct irq_cfg *cfg; |
| 2093 | struct msi_msg msg; | 3300 | struct msi_msg msg; |
| 2094 | unsigned int dest; | 3301 | unsigned int dest; |
| 2095 | cpumask_t tmp; | 3302 | cpumask_t tmp; |
| 3303 | struct irq_desc *desc; | ||
| 2096 | 3304 | ||
| 2097 | cpus_and(tmp, mask, cpu_online_map); | 3305 | cpus_and(tmp, mask, cpu_online_map); |
| 2098 | if (cpus_empty(tmp)) | 3306 | if (cpus_empty(tmp)) |
| @@ -2101,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
| 2101 | if (assign_irq_vector(irq, mask)) | 3309 | if (assign_irq_vector(irq, mask)) |
| 2102 | return; | 3310 | return; |
| 2103 | 3311 | ||
| 3312 | cfg = irq_cfg(irq); | ||
| 2104 | cpus_and(tmp, cfg->domain, mask); | 3313 | cpus_and(tmp, cfg->domain, mask); |
| 2105 | dest = cpu_mask_to_apicid(tmp); | 3314 | dest = cpu_mask_to_apicid(tmp); |
| 2106 | 3315 | ||
| @@ -2112,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
| 2112 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3321 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
| 2113 | 3322 | ||
| 2114 | dmar_msi_write(irq, &msg); | 3323 | dmar_msi_write(irq, &msg); |
| 2115 | irq_desc[irq].affinity = mask; | 3324 | desc = irq_to_desc(irq); |
| 3325 | desc->affinity = mask; | ||
| 2116 | } | 3326 | } |
| 2117 | #endif /* CONFIG_SMP */ | 3327 | #endif /* CONFIG_SMP */ |
| 2118 | 3328 | ||
| @@ -2142,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
| 2142 | } | 3352 | } |
| 2143 | #endif | 3353 | #endif |
| 2144 | 3354 | ||
| 3355 | #ifdef CONFIG_HPET_TIMER | ||
| 3356 | |||
| 3357 | #ifdef CONFIG_SMP | ||
| 3358 | static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) | ||
| 3359 | { | ||
| 3360 | struct irq_cfg *cfg; | ||
| 3361 | struct irq_desc *desc; | ||
| 3362 | struct msi_msg msg; | ||
| 3363 | unsigned int dest; | ||
| 3364 | cpumask_t tmp; | ||
| 3365 | |||
| 3366 | cpus_and(tmp, mask, cpu_online_map); | ||
| 3367 | if (cpus_empty(tmp)) | ||
| 3368 | return; | ||
| 3369 | |||
| 3370 | if (assign_irq_vector(irq, mask)) | ||
| 3371 | return; | ||
| 3372 | |||
| 3373 | cfg = irq_cfg(irq); | ||
| 3374 | cpus_and(tmp, cfg->domain, mask); | ||
| 3375 | dest = cpu_mask_to_apicid(tmp); | ||
| 3376 | |||
| 3377 | hpet_msi_read(irq, &msg); | ||
| 3378 | |||
| 3379 | msg.data &= ~MSI_DATA_VECTOR_MASK; | ||
| 3380 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | ||
| 3381 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | ||
| 3382 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | ||
| 3383 | |||
| 3384 | hpet_msi_write(irq, &msg); | ||
| 3385 | desc = irq_to_desc(irq); | ||
| 3386 | desc->affinity = mask; | ||
| 3387 | } | ||
| 3388 | #endif /* CONFIG_SMP */ | ||
| 3389 | |||
| 3390 | struct irq_chip hpet_msi_type = { | ||
| 3391 | .name = "HPET_MSI", | ||
| 3392 | .unmask = hpet_msi_unmask, | ||
| 3393 | .mask = hpet_msi_mask, | ||
| 3394 | .ack = ack_apic_edge, | ||
| 3395 | #ifdef CONFIG_SMP | ||
| 3396 | .set_affinity = hpet_msi_set_affinity, | ||
| 3397 | #endif | ||
| 3398 | .retrigger = ioapic_retrigger_irq, | ||
| 3399 | }; | ||
| 3400 | |||
| 3401 | int arch_setup_hpet_msi(unsigned int irq) | ||
| 3402 | { | ||
| 3403 | int ret; | ||
| 3404 | struct msi_msg msg; | ||
| 3405 | |||
| 3406 | ret = msi_compose_msg(NULL, irq, &msg); | ||
| 3407 | if (ret < 0) | ||
| 3408 | return ret; | ||
| 3409 | |||
| 3410 | hpet_msi_write(irq, &msg); | ||
| 3411 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, | ||
| 3412 | "edge"); | ||
| 3413 | |||
| 3414 | return 0; | ||
| 3415 | } | ||
| 3416 | #endif | ||
| 3417 | |||
| 2145 | #endif /* CONFIG_PCI_MSI */ | 3418 | #endif /* CONFIG_PCI_MSI */ |
| 2146 | /* | 3419 | /* |
| 2147 | * Hypertransport interrupt support | 3420 | * Hypertransport interrupt support |
| @@ -2166,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
| 2166 | 3439 | ||
| 2167 | static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) | 3440 | static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) |
| 2168 | { | 3441 | { |
| 2169 | struct irq_cfg *cfg = irq_cfg + irq; | 3442 | struct irq_cfg *cfg; |
| 2170 | unsigned int dest; | 3443 | unsigned int dest; |
| 2171 | cpumask_t tmp; | 3444 | cpumask_t tmp; |
| 3445 | struct irq_desc *desc; | ||
| 2172 | 3446 | ||
| 2173 | cpus_and(tmp, mask, cpu_online_map); | 3447 | cpus_and(tmp, mask, cpu_online_map); |
| 2174 | if (cpus_empty(tmp)) | 3448 | if (cpus_empty(tmp)) |
| @@ -2177,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) | |||
| 2177 | if (assign_irq_vector(irq, mask)) | 3451 | if (assign_irq_vector(irq, mask)) |
| 2178 | return; | 3452 | return; |
| 2179 | 3453 | ||
| 3454 | cfg = irq_cfg(irq); | ||
| 2180 | cpus_and(tmp, cfg->domain, mask); | 3455 | cpus_and(tmp, cfg->domain, mask); |
| 2181 | dest = cpu_mask_to_apicid(tmp); | 3456 | dest = cpu_mask_to_apicid(tmp); |
| 2182 | 3457 | ||
| 2183 | target_ht_irq(irq, dest, cfg->vector); | 3458 | target_ht_irq(irq, dest, cfg->vector); |
| 2184 | irq_desc[irq].affinity = mask; | 3459 | desc = irq_to_desc(irq); |
| 3460 | desc->affinity = mask; | ||
| 2185 | } | 3461 | } |
| 2186 | #endif | 3462 | #endif |
| 2187 | 3463 | ||
| @@ -2198,7 +3474,7 @@ static struct irq_chip ht_irq_chip = { | |||
| 2198 | 3474 | ||
| 2199 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | 3475 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
| 2200 | { | 3476 | { |
| 2201 | struct irq_cfg *cfg = irq_cfg + irq; | 3477 | struct irq_cfg *cfg; |
| 2202 | int err; | 3478 | int err; |
| 2203 | cpumask_t tmp; | 3479 | cpumask_t tmp; |
| 2204 | 3480 | ||
| @@ -2208,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
| 2208 | struct ht_irq_msg msg; | 3484 | struct ht_irq_msg msg; |
| 2209 | unsigned dest; | 3485 | unsigned dest; |
| 2210 | 3486 | ||
| 3487 | cfg = irq_cfg(irq); | ||
| 2211 | cpus_and(tmp, cfg->domain, tmp); | 3488 | cpus_and(tmp, cfg->domain, tmp); |
| 2212 | dest = cpu_mask_to_apicid(tmp); | 3489 | dest = cpu_mask_to_apicid(tmp); |
| 2213 | 3490 | ||
| @@ -2230,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
| 2230 | 3507 | ||
| 2231 | set_irq_chip_and_handler_name(irq, &ht_irq_chip, | 3508 | set_irq_chip_and_handler_name(irq, &ht_irq_chip, |
| 2232 | handle_edge_irq, "edge"); | 3509 | handle_edge_irq, "edge"); |
| 3510 | |||
| 3511 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); | ||
| 2233 | } | 3512 | } |
| 2234 | return err; | 3513 | return err; |
| 2235 | } | 3514 | } |
| 2236 | #endif /* CONFIG_HT_IRQ */ | 3515 | #endif /* CONFIG_HT_IRQ */ |
| 2237 | 3516 | ||
| 3517 | #ifdef CONFIG_X86_64 | ||
| 3518 | /* | ||
| 3519 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
| 3520 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
| 3521 | */ | ||
| 3522 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
| 3523 | unsigned long mmr_offset) | ||
| 3524 | { | ||
| 3525 | const cpumask_t *eligible_cpu = get_cpu_mask(cpu); | ||
| 3526 | struct irq_cfg *cfg; | ||
| 3527 | int mmr_pnode; | ||
| 3528 | unsigned long mmr_value; | ||
| 3529 | struct uv_IO_APIC_route_entry *entry; | ||
| 3530 | unsigned long flags; | ||
| 3531 | int err; | ||
| 3532 | |||
| 3533 | err = assign_irq_vector(irq, *eligible_cpu); | ||
| 3534 | if (err != 0) | ||
| 3535 | return err; | ||
| 3536 | |||
| 3537 | spin_lock_irqsave(&vector_lock, flags); | ||
| 3538 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
| 3539 | irq_name); | ||
| 3540 | spin_unlock_irqrestore(&vector_lock, flags); | ||
| 3541 | |||
| 3542 | cfg = irq_cfg(irq); | ||
| 3543 | |||
| 3544 | mmr_value = 0; | ||
| 3545 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
| 3546 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
| 3547 | |||
| 3548 | entry->vector = cfg->vector; | ||
| 3549 | entry->delivery_mode = INT_DELIVERY_MODE; | ||
| 3550 | entry->dest_mode = INT_DEST_MODE; | ||
| 3551 | entry->polarity = 0; | ||
| 3552 | entry->trigger = 0; | ||
| 3553 | entry->mask = 0; | ||
| 3554 | entry->dest = cpu_mask_to_apicid(*eligible_cpu); | ||
| 3555 | |||
| 3556 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
| 3557 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
| 3558 | |||
| 3559 | return irq; | ||
| 3560 | } | ||
| 3561 | |||
| 3562 | /* | ||
| 3563 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
| 3564 | * longer allowed to be sent. | ||
| 3565 | */ | ||
| 3566 | void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) | ||
| 3567 | { | ||
| 3568 | unsigned long mmr_value; | ||
| 3569 | struct uv_IO_APIC_route_entry *entry; | ||
| 3570 | int mmr_pnode; | ||
| 3571 | |||
| 3572 | mmr_value = 0; | ||
| 3573 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
| 3574 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
| 3575 | |||
| 3576 | entry->mask = 1; | ||
| 3577 | |||
| 3578 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
| 3579 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
| 3580 | } | ||
| 3581 | #endif /* CONFIG_X86_64 */ | ||
| 3582 | |||
| 3583 | int __init io_apic_get_redir_entries (int ioapic) | ||
| 3584 | { | ||
| 3585 | union IO_APIC_reg_01 reg_01; | ||
| 3586 | unsigned long flags; | ||
| 3587 | |||
| 3588 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 3589 | reg_01.raw = io_apic_read(ioapic, 1); | ||
| 3590 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 3591 | |||
| 3592 | return reg_01.bits.entries; | ||
| 3593 | } | ||
| 3594 | |||
| 3595 | int __init probe_nr_irqs(void) | ||
| 3596 | { | ||
| 3597 | int idx; | ||
| 3598 | int nr = 0; | ||
| 3599 | #ifndef CONFIG_XEN | ||
| 3600 | int nr_min = 32; | ||
| 3601 | #else | ||
| 3602 | int nr_min = NR_IRQS; | ||
| 3603 | #endif | ||
| 3604 | |||
| 3605 | for (idx = 0; idx < nr_ioapics; idx++) | ||
| 3606 | nr += io_apic_get_redir_entries(idx) + 1; | ||
| 3607 | |||
| 3608 | /* double it for hotplug and msi and nmi */ | ||
| 3609 | nr <<= 1; | ||
| 3610 | |||
| 3611 | /* something wrong ? */ | ||
| 3612 | if (nr < nr_min) | ||
| 3613 | nr = nr_min; | ||
| 3614 | |||
| 3615 | return nr; | ||
| 3616 | } | ||
| 3617 | |||
| 2238 | /* -------------------------------------------------------------------------- | 3618 | /* -------------------------------------------------------------------------- |
| 2239 | ACPI-based IOAPIC Configuration | 3619 | ACPI-based IOAPIC Configuration |
| 2240 | -------------------------------------------------------------------------- */ | 3620 | -------------------------------------------------------------------------- */ |
| 2241 | 3621 | ||
| 2242 | #ifdef CONFIG_ACPI | 3622 | #ifdef CONFIG_ACPI |
| 2243 | 3623 | ||
| 2244 | #define IO_APIC_MAX_ID 0xFE | 3624 | #ifdef CONFIG_X86_32 |
| 3625 | int __init io_apic_get_unique_id(int ioapic, int apic_id) | ||
| 3626 | { | ||
| 3627 | union IO_APIC_reg_00 reg_00; | ||
| 3628 | static physid_mask_t apic_id_map = PHYSID_MASK_NONE; | ||
| 3629 | physid_mask_t tmp; | ||
| 3630 | unsigned long flags; | ||
| 3631 | int i = 0; | ||
| 2245 | 3632 | ||
| 2246 | int __init io_apic_get_redir_entries (int ioapic) | 3633 | /* |
| 3634 | * The P4 platform supports up to 256 APIC IDs on two separate APIC | ||
| 3635 | * buses (one for LAPICs, one for IOAPICs), where predecessors only | ||
| 3636 | * supports up to 16 on one shared APIC bus. | ||
| 3637 | * | ||
| 3638 | * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full | ||
| 3639 | * advantage of new APIC bus architecture. | ||
| 3640 | */ | ||
| 3641 | |||
| 3642 | if (physids_empty(apic_id_map)) | ||
| 3643 | apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); | ||
| 3644 | |||
| 3645 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 3646 | reg_00.raw = io_apic_read(ioapic, 0); | ||
| 3647 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 3648 | |||
| 3649 | if (apic_id >= get_physical_broadcast()) { | ||
| 3650 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " | ||
| 3651 | "%d\n", ioapic, apic_id, reg_00.bits.ID); | ||
| 3652 | apic_id = reg_00.bits.ID; | ||
| 3653 | } | ||
| 3654 | |||
| 3655 | /* | ||
| 3656 | * Every APIC in a system must have a unique ID or we get lots of nice | ||
| 3657 | * 'stuck on smp_invalidate_needed IPI wait' messages. | ||
| 3658 | */ | ||
| 3659 | if (check_apicid_used(apic_id_map, apic_id)) { | ||
| 3660 | |||
| 3661 | for (i = 0; i < get_physical_broadcast(); i++) { | ||
| 3662 | if (!check_apicid_used(apic_id_map, i)) | ||
| 3663 | break; | ||
| 3664 | } | ||
| 3665 | |||
| 3666 | if (i == get_physical_broadcast()) | ||
| 3667 | panic("Max apic_id exceeded!\n"); | ||
| 3668 | |||
| 3669 | printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " | ||
| 3670 | "trying %d\n", ioapic, apic_id, i); | ||
| 3671 | |||
| 3672 | apic_id = i; | ||
| 3673 | } | ||
| 3674 | |||
| 3675 | tmp = apicid_to_cpu_present(apic_id); | ||
| 3676 | physids_or(apic_id_map, apic_id_map, tmp); | ||
| 3677 | |||
| 3678 | if (reg_00.bits.ID != apic_id) { | ||
| 3679 | reg_00.bits.ID = apic_id; | ||
| 3680 | |||
| 3681 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 3682 | io_apic_write(ioapic, 0, reg_00.raw); | ||
| 3683 | reg_00.raw = io_apic_read(ioapic, 0); | ||
| 3684 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 3685 | |||
| 3686 | /* Sanity check */ | ||
| 3687 | if (reg_00.bits.ID != apic_id) { | ||
| 3688 | printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); | ||
| 3689 | return -1; | ||
| 3690 | } | ||
| 3691 | } | ||
| 3692 | |||
| 3693 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 3694 | "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); | ||
| 3695 | |||
| 3696 | return apic_id; | ||
| 3697 | } | ||
| 3698 | |||
| 3699 | int __init io_apic_get_version(int ioapic) | ||
| 2247 | { | 3700 | { |
| 2248 | union IO_APIC_reg_01 reg_01; | 3701 | union IO_APIC_reg_01 reg_01; |
| 2249 | unsigned long flags; | 3702 | unsigned long flags; |
| @@ -2252,9 +3705,9 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
| 2252 | reg_01.raw = io_apic_read(ioapic, 1); | 3705 | reg_01.raw = io_apic_read(ioapic, 1); |
| 2253 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3706 | spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2254 | 3707 | ||
| 2255 | return reg_01.bits.entries; | 3708 | return reg_01.bits.version; |
| 2256 | } | 3709 | } |
| 2257 | 3710 | #endif | |
| 2258 | 3711 | ||
| 2259 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) | 3712 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) |
| 2260 | { | 3713 | { |
| @@ -2306,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
| 2306 | void __init setup_ioapic_dest(void) | 3759 | void __init setup_ioapic_dest(void) |
| 2307 | { | 3760 | { |
| 2308 | int pin, ioapic, irq, irq_entry; | 3761 | int pin, ioapic, irq, irq_entry; |
| 3762 | struct irq_cfg *cfg; | ||
| 2309 | 3763 | ||
| 2310 | if (skip_ioapic_setup == 1) | 3764 | if (skip_ioapic_setup == 1) |
| 2311 | return; | 3765 | return; |
| @@ -2321,10 +3775,15 @@ void __init setup_ioapic_dest(void) | |||
| 2321 | * when you have too many devices, because at that time only boot | 3775 | * when you have too many devices, because at that time only boot |
| 2322 | * cpu is online. | 3776 | * cpu is online. |
| 2323 | */ | 3777 | */ |
| 2324 | if (!irq_cfg[irq].vector) | 3778 | cfg = irq_cfg(irq); |
| 3779 | if (!cfg->vector) | ||
| 2325 | setup_IO_APIC_irq(ioapic, pin, irq, | 3780 | setup_IO_APIC_irq(ioapic, pin, irq, |
| 2326 | irq_trigger(irq_entry), | 3781 | irq_trigger(irq_entry), |
| 2327 | irq_polarity(irq_entry)); | 3782 | irq_polarity(irq_entry)); |
| 3783 | #ifdef CONFIG_INTR_REMAP | ||
| 3784 | else if (intr_remapping_enabled) | ||
| 3785 | set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); | ||
| 3786 | #endif | ||
| 2328 | else | 3787 | else |
| 2329 | set_ioapic_affinity_irq(irq, TARGET_CPUS); | 3788 | set_ioapic_affinity_irq(irq, TARGET_CPUS); |
| 2330 | } | 3789 | } |
| @@ -2375,18 +3834,33 @@ void __init ioapic_init_mappings(void) | |||
| 2375 | struct resource *ioapic_res; | 3834 | struct resource *ioapic_res; |
| 2376 | int i; | 3835 | int i; |
| 2377 | 3836 | ||
| 3837 | irq_2_pin_init(); | ||
| 2378 | ioapic_res = ioapic_setup_resources(); | 3838 | ioapic_res = ioapic_setup_resources(); |
| 2379 | for (i = 0; i < nr_ioapics; i++) { | 3839 | for (i = 0; i < nr_ioapics; i++) { |
| 2380 | if (smp_found_config) { | 3840 | if (smp_found_config) { |
| 2381 | ioapic_phys = mp_ioapics[i].mp_apicaddr; | 3841 | ioapic_phys = mp_ioapics[i].mp_apicaddr; |
| 3842 | #ifdef CONFIG_X86_32 | ||
| 3843 | if (!ioapic_phys) { | ||
| 3844 | printk(KERN_ERR | ||
| 3845 | "WARNING: bogus zero IO-APIC " | ||
| 3846 | "address found in MPTABLE, " | ||
| 3847 | "disabling IO/APIC support!\n"); | ||
| 3848 | smp_found_config = 0; | ||
| 3849 | skip_ioapic_setup = 1; | ||
| 3850 | goto fake_ioapic_page; | ||
| 3851 | } | ||
| 3852 | #endif | ||
| 2382 | } else { | 3853 | } else { |
| 3854 | #ifdef CONFIG_X86_32 | ||
| 3855 | fake_ioapic_page: | ||
| 3856 | #endif | ||
| 2383 | ioapic_phys = (unsigned long) | 3857 | ioapic_phys = (unsigned long) |
| 2384 | alloc_bootmem_pages(PAGE_SIZE); | 3858 | alloc_bootmem_pages(PAGE_SIZE); |
| 2385 | ioapic_phys = __pa(ioapic_phys); | 3859 | ioapic_phys = __pa(ioapic_phys); |
| 2386 | } | 3860 | } |
| 2387 | set_fixmap_nocache(idx, ioapic_phys); | 3861 | set_fixmap_nocache(idx, ioapic_phys); |
| 2388 | apic_printk(APIC_VERBOSE, | 3862 | apic_printk(APIC_VERBOSE, |
| 2389 | "mapped IOAPIC to %016lx (%016lx)\n", | 3863 | "mapped IOAPIC to %08lx (%08lx)\n", |
| 2390 | __fix_to_virt(idx), ioapic_phys); | 3864 | __fix_to_virt(idx), ioapic_phys); |
| 2391 | idx++; | 3865 | idx++; |
| 2392 | 3866 | ||
| @@ -2420,4 +3894,3 @@ static int __init ioapic_insert_resources(void) | |||
| 2420 | /* Insert the IO APIC resources after PCI initialization has occured to handle | 3894 | /* Insert the IO APIC resources after PCI initialization has occured to handle |
| 2421 | * IO APICS that are mapped in on a BAR in PCI space. */ | 3895 | * IO APICS that are mapped in on a BAR in PCI space. */ |
| 2422 | late_initcall(ioapic_insert_resources); | 3896 | late_initcall(ioapic_insert_resources); |
| 2423 | |||
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c deleted file mode 100644 index 558abf4c796a..000000000000 --- a/arch/x86/kernel/io_apic_32.c +++ /dev/null | |||
| @@ -1,2900 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Intel IO-APIC support for multi-Pentium hosts. | ||
| 3 | * | ||
| 4 | * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo | ||
| 5 | * | ||
| 6 | * Many thanks to Stig Venaas for trying out countless experimental | ||
| 7 | * patches and reporting/debugging problems patiently! | ||
| 8 | * | ||
| 9 | * (c) 1999, Multiple IO-APIC support, developed by | ||
| 10 | * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and | ||
| 11 | * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>, | ||
| 12 | * further tested and cleaned up by Zach Brown <zab@redhat.com> | ||
| 13 | * and Ingo Molnar <mingo@redhat.com> | ||
| 14 | * | ||
| 15 | * Fixes | ||
| 16 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; | ||
| 17 | * thanks to Eric Gilmore | ||
| 18 | * and Rolf G. Tews | ||
| 19 | * for testing these extensively | ||
| 20 | * Paul Diefenbaugh : Added full ACPI support | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/mm.h> | ||
| 24 | #include <linux/interrupt.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | #include <linux/delay.h> | ||
| 27 | #include <linux/sched.h> | ||
| 28 | #include <linux/bootmem.h> | ||
| 29 | #include <linux/mc146818rtc.h> | ||
| 30 | #include <linux/compiler.h> | ||
| 31 | #include <linux/acpi.h> | ||
| 32 | #include <linux/module.h> | ||
| 33 | #include <linux/sysdev.h> | ||
| 34 | #include <linux/pci.h> | ||
| 35 | #include <linux/msi.h> | ||
| 36 | #include <linux/htirq.h> | ||
| 37 | #include <linux/freezer.h> | ||
| 38 | #include <linux/kthread.h> | ||
| 39 | #include <linux/jiffies.h> /* time_after() */ | ||
| 40 | |||
| 41 | #include <asm/io.h> | ||
| 42 | #include <asm/smp.h> | ||
| 43 | #include <asm/desc.h> | ||
| 44 | #include <asm/timer.h> | ||
| 45 | #include <asm/i8259.h> | ||
| 46 | #include <asm/nmi.h> | ||
| 47 | #include <asm/msidef.h> | ||
| 48 | #include <asm/hypertransport.h> | ||
| 49 | |||
| 50 | #include <mach_apic.h> | ||
| 51 | #include <mach_apicdef.h> | ||
| 52 | |||
| 53 | int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
| 54 | atomic_t irq_mis_count; | ||
| 55 | |||
| 56 | /* Where if anywhere is the i8259 connect in external int mode */ | ||
| 57 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | ||
| 58 | |||
| 59 | static DEFINE_SPINLOCK(ioapic_lock); | ||
| 60 | static DEFINE_SPINLOCK(vector_lock); | ||
| 61 | |||
| 62 | int timer_through_8259 __initdata; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Is the SiS APIC rmw bug present ? | ||
| 66 | * -1 = don't know, 0 = no, 1 = yes | ||
| 67 | */ | ||
| 68 | int sis_apic_bug = -1; | ||
| 69 | |||
| 70 | /* | ||
| 71 | * # of IRQ routing registers | ||
| 72 | */ | ||
| 73 | int nr_ioapic_registers[MAX_IO_APICS]; | ||
| 74 | |||
| 75 | /* I/O APIC entries */ | ||
| 76 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; | ||
| 77 | int nr_ioapics; | ||
| 78 | |||
| 79 | /* MP IRQ source entries */ | ||
| 80 | struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | ||
| 81 | |||
| 82 | /* # of MP IRQ source entries */ | ||
| 83 | int mp_irq_entries; | ||
| 84 | |||
| 85 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
| 86 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | ||
| 87 | #endif | ||
| 88 | |||
| 89 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | ||
| 90 | |||
| 91 | static int disable_timer_pin_1 __initdata; | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Rough estimation of how many shared IRQs there are, can | ||
| 95 | * be changed anytime. | ||
| 96 | */ | ||
| 97 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | ||
| 98 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | ||
| 99 | |||
| 100 | /* | ||
| 101 | * This is performance-critical, we want to do it O(1) | ||
| 102 | * | ||
| 103 | * the indexing order of this array favors 1:1 mappings | ||
| 104 | * between pins and IRQs. | ||
| 105 | */ | ||
| 106 | |||
| 107 | static struct irq_pin_list { | ||
| 108 | int apic, pin, next; | ||
| 109 | } irq_2_pin[PIN_MAP_SIZE]; | ||
| 110 | |||
| 111 | struct io_apic { | ||
| 112 | unsigned int index; | ||
| 113 | unsigned int unused[3]; | ||
| 114 | unsigned int data; | ||
| 115 | }; | ||
| 116 | |||
| 117 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | ||
| 118 | { | ||
| 119 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | ||
| 120 | + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); | ||
| 121 | } | ||
| 122 | |||
| 123 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | ||
| 124 | { | ||
| 125 | struct io_apic __iomem *io_apic = io_apic_base(apic); | ||
| 126 | writel(reg, &io_apic->index); | ||
| 127 | return readl(&io_apic->data); | ||
| 128 | } | ||
| 129 | |||
| 130 | static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) | ||
| 131 | { | ||
| 132 | struct io_apic __iomem *io_apic = io_apic_base(apic); | ||
| 133 | writel(reg, &io_apic->index); | ||
| 134 | writel(value, &io_apic->data); | ||
| 135 | } | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Re-write a value: to be used for read-modify-write | ||
| 139 | * cycles where the read already set up the index register. | ||
| 140 | * | ||
| 141 | * Older SiS APIC requires we rewrite the index register | ||
| 142 | */ | ||
| 143 | static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) | ||
| 144 | { | ||
| 145 | volatile struct io_apic __iomem *io_apic = io_apic_base(apic); | ||
| 146 | if (sis_apic_bug) | ||
| 147 | writel(reg, &io_apic->index); | ||
| 148 | writel(value, &io_apic->data); | ||
| 149 | } | ||
| 150 | |||
| 151 | union entry_union { | ||
| 152 | struct { u32 w1, w2; }; | ||
| 153 | struct IO_APIC_route_entry entry; | ||
| 154 | }; | ||
| 155 | |||
| 156 | static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | ||
| 157 | { | ||
| 158 | union entry_union eu; | ||
| 159 | unsigned long flags; | ||
| 160 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 161 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | ||
| 162 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | ||
| 163 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 164 | return eu.entry; | ||
| 165 | } | ||
| 166 | |||
| 167 | /* | ||
| 168 | * When we write a new IO APIC routing entry, we need to write the high | ||
| 169 | * word first! If the mask bit in the low word is clear, we will enable | ||
| 170 | * the interrupt, and we need to make sure the entry is fully populated | ||
| 171 | * before that happens. | ||
| 172 | */ | ||
| 173 | static void | ||
| 174 | __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
| 175 | { | ||
| 176 | union entry_union eu; | ||
| 177 | eu.entry = e; | ||
| 178 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | ||
| 179 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | ||
| 180 | } | ||
| 181 | |||
| 182 | static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
| 183 | { | ||
| 184 | unsigned long flags; | ||
| 185 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 186 | __ioapic_write_entry(apic, pin, e); | ||
| 187 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 188 | } | ||
| 189 | |||
| 190 | /* | ||
| 191 | * When we mask an IO APIC routing entry, we need to write the low | ||
| 192 | * word first, in order to set the mask bit before we change the | ||
| 193 | * high bits! | ||
| 194 | */ | ||
| 195 | static void ioapic_mask_entry(int apic, int pin) | ||
| 196 | { | ||
| 197 | unsigned long flags; | ||
| 198 | union entry_union eu = { .entry.mask = 1 }; | ||
| 199 | |||
| 200 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 201 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | ||
| 202 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | ||
| 203 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 204 | } | ||
| 205 | |||
| 206 | /* | ||
| 207 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | ||
| 208 | * shared ISA-space IRQs, so we have to support them. We are super | ||
| 209 | * fast in the common case, and fast for shared ISA-space IRQs. | ||
| 210 | */ | ||
| 211 | static void add_pin_to_irq(unsigned int irq, int apic, int pin) | ||
| 212 | { | ||
| 213 | static int first_free_entry = NR_IRQS; | ||
| 214 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
| 215 | |||
| 216 | while (entry->next) | ||
| 217 | entry = irq_2_pin + entry->next; | ||
| 218 | |||
| 219 | if (entry->pin != -1) { | ||
| 220 | entry->next = first_free_entry; | ||
| 221 | entry = irq_2_pin + entry->next; | ||
| 222 | if (++first_free_entry >= PIN_MAP_SIZE) | ||
| 223 | panic("io_apic.c: whoops"); | ||
| 224 | } | ||
| 225 | entry->apic = apic; | ||
| 226 | entry->pin = pin; | ||
| 227 | } | ||
| 228 | |||
| 229 | /* | ||
| 230 | * Reroute an IRQ to a different pin. | ||
| 231 | */ | ||
| 232 | static void __init replace_pin_at_irq(unsigned int irq, | ||
| 233 | int oldapic, int oldpin, | ||
| 234 | int newapic, int newpin) | ||
| 235 | { | ||
| 236 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
| 237 | |||
| 238 | while (1) { | ||
| 239 | if (entry->apic == oldapic && entry->pin == oldpin) { | ||
| 240 | entry->apic = newapic; | ||
| 241 | entry->pin = newpin; | ||
| 242 | } | ||
| 243 | if (!entry->next) | ||
| 244 | break; | ||
| 245 | entry = irq_2_pin + entry->next; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) | ||
| 250 | { | ||
| 251 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
| 252 | unsigned int pin, reg; | ||
| 253 | |||
| 254 | for (;;) { | ||
| 255 | pin = entry->pin; | ||
| 256 | if (pin == -1) | ||
| 257 | break; | ||
| 258 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | ||
| 259 | reg &= ~disable; | ||
| 260 | reg |= enable; | ||
| 261 | io_apic_modify(entry->apic, 0x10 + pin*2, reg); | ||
| 262 | if (!entry->next) | ||
| 263 | break; | ||
| 264 | entry = irq_2_pin + entry->next; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | |||
| 268 | /* mask = 1 */ | ||
| 269 | static void __mask_IO_APIC_irq(unsigned int irq) | ||
| 270 | { | ||
| 271 | __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); | ||
| 272 | } | ||
| 273 | |||
| 274 | /* mask = 0 */ | ||
| 275 | static void __unmask_IO_APIC_irq(unsigned int irq) | ||
| 276 | { | ||
| 277 | __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); | ||
| 278 | } | ||
| 279 | |||
| 280 | /* mask = 1, trigger = 0 */ | ||
| 281 | static void __mask_and_edge_IO_APIC_irq(unsigned int irq) | ||
| 282 | { | ||
| 283 | __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, | ||
| 284 | IO_APIC_REDIR_LEVEL_TRIGGER); | ||
| 285 | } | ||
| 286 | |||
| 287 | /* mask = 0, trigger = 1 */ | ||
| 288 | static void __unmask_and_level_IO_APIC_irq(unsigned int irq) | ||
| 289 | { | ||
| 290 | __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, | ||
| 291 | IO_APIC_REDIR_MASKED); | ||
| 292 | } | ||
| 293 | |||
| 294 | static void mask_IO_APIC_irq(unsigned int irq) | ||
| 295 | { | ||
| 296 | unsigned long flags; | ||
| 297 | |||
| 298 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 299 | __mask_IO_APIC_irq(irq); | ||
| 300 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 301 | } | ||
| 302 | |||
| 303 | static void unmask_IO_APIC_irq(unsigned int irq) | ||
| 304 | { | ||
| 305 | unsigned long flags; | ||
| 306 | |||
| 307 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 308 | __unmask_IO_APIC_irq(irq); | ||
| 309 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 310 | } | ||
| 311 | |||
| 312 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | ||
| 313 | { | ||
| 314 | struct IO_APIC_route_entry entry; | ||
| 315 | |||
| 316 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ | ||
| 317 | entry = ioapic_read_entry(apic, pin); | ||
| 318 | if (entry.delivery_mode == dest_SMI) | ||
| 319 | return; | ||
| 320 | |||
| 321 | /* | ||
| 322 | * Disable it in the IO-APIC irq-routing table: | ||
| 323 | */ | ||
| 324 | ioapic_mask_entry(apic, pin); | ||
| 325 | } | ||
| 326 | |||
| 327 | static void clear_IO_APIC(void) | ||
| 328 | { | ||
| 329 | int apic, pin; | ||
| 330 | |||
| 331 | for (apic = 0; apic < nr_ioapics; apic++) | ||
| 332 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | ||
| 333 | clear_IO_APIC_pin(apic, pin); | ||
| 334 | } | ||
| 335 | |||
| 336 | #ifdef CONFIG_SMP | ||
| 337 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) | ||
| 338 | { | ||
| 339 | unsigned long flags; | ||
| 340 | int pin; | ||
| 341 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
| 342 | unsigned int apicid_value; | ||
| 343 | cpumask_t tmp; | ||
| 344 | |||
| 345 | cpus_and(tmp, cpumask, cpu_online_map); | ||
| 346 | if (cpus_empty(tmp)) | ||
| 347 | tmp = TARGET_CPUS; | ||
| 348 | |||
| 349 | cpus_and(cpumask, tmp, CPU_MASK_ALL); | ||
| 350 | |||
| 351 | apicid_value = cpu_mask_to_apicid(cpumask); | ||
| 352 | /* Prepare to do the io_apic_write */ | ||
| 353 | apicid_value = apicid_value << 24; | ||
| 354 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 355 | for (;;) { | ||
| 356 | pin = entry->pin; | ||
| 357 | if (pin == -1) | ||
| 358 | break; | ||
| 359 | io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); | ||
| 360 | if (!entry->next) | ||
| 361 | break; | ||
| 362 | entry = irq_2_pin + entry->next; | ||
| 363 | } | ||
| 364 | irq_desc[irq].affinity = cpumask; | ||
| 365 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 366 | } | ||
| 367 | |||
| 368 | #if defined(CONFIG_IRQBALANCE) | ||
| 369 | # include <asm/processor.h> /* kernel_thread() */ | ||
| 370 | # include <linux/kernel_stat.h> /* kstat */ | ||
| 371 | # include <linux/slab.h> /* kmalloc() */ | ||
| 372 | # include <linux/timer.h> | ||
| 373 | |||
| 374 | #define IRQBALANCE_CHECK_ARCH -999 | ||
| 375 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) | ||
| 376 | #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) | ||
| 377 | #define BALANCED_IRQ_MORE_DELTA (HZ/10) | ||
| 378 | #define BALANCED_IRQ_LESS_DELTA (HZ) | ||
| 379 | |||
| 380 | static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; | ||
| 381 | static int physical_balance __read_mostly; | ||
| 382 | static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; | ||
| 383 | |||
| 384 | static struct irq_cpu_info { | ||
| 385 | unsigned long *last_irq; | ||
| 386 | unsigned long *irq_delta; | ||
| 387 | unsigned long irq; | ||
| 388 | } irq_cpu_data[NR_CPUS]; | ||
| 389 | |||
| 390 | #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) | ||
| 391 | #define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) | ||
| 392 | #define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) | ||
| 393 | |||
| 394 | #define IDLE_ENOUGH(cpu,now) \ | ||
| 395 | (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) | ||
| 396 | |||
| 397 | #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) | ||
| 398 | |||
| 399 | #define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) | ||
| 400 | |||
| 401 | static cpumask_t balance_irq_affinity[NR_IRQS] = { | ||
| 402 | [0 ... NR_IRQS-1] = CPU_MASK_ALL | ||
| 403 | }; | ||
| 404 | |||
| 405 | void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) | ||
| 406 | { | ||
| 407 | balance_irq_affinity[irq] = mask; | ||
| 408 | } | ||
| 409 | |||
| 410 | static unsigned long move(int curr_cpu, cpumask_t allowed_mask, | ||
| 411 | unsigned long now, int direction) | ||
| 412 | { | ||
| 413 | int search_idle = 1; | ||
| 414 | int cpu = curr_cpu; | ||
| 415 | |||
| 416 | goto inside; | ||
| 417 | |||
| 418 | do { | ||
| 419 | if (unlikely(cpu == curr_cpu)) | ||
| 420 | search_idle = 0; | ||
| 421 | inside: | ||
| 422 | if (direction == 1) { | ||
| 423 | cpu++; | ||
| 424 | if (cpu >= NR_CPUS) | ||
| 425 | cpu = 0; | ||
| 426 | } else { | ||
| 427 | cpu--; | ||
| 428 | if (cpu == -1) | ||
| 429 | cpu = NR_CPUS-1; | ||
| 430 | } | ||
| 431 | } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || | ||
| 432 | (search_idle && !IDLE_ENOUGH(cpu, now))); | ||
| 433 | |||
| 434 | return cpu; | ||
| 435 | } | ||
| 436 | |||
| 437 | static inline void balance_irq(int cpu, int irq) | ||
| 438 | { | ||
| 439 | unsigned long now = jiffies; | ||
| 440 | cpumask_t allowed_mask; | ||
| 441 | unsigned int new_cpu; | ||
| 442 | |||
| 443 | if (irqbalance_disabled) | ||
| 444 | return; | ||
| 445 | |||
| 446 | cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); | ||
| 447 | new_cpu = move(cpu, allowed_mask, now, 1); | ||
| 448 | if (cpu != new_cpu) | ||
| 449 | set_pending_irq(irq, cpumask_of_cpu(new_cpu)); | ||
| 450 | } | ||
| 451 | |||
| 452 | static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) | ||
| 453 | { | ||
| 454 | int i, j; | ||
| 455 | |||
| 456 | for_each_online_cpu(i) { | ||
| 457 | for (j = 0; j < NR_IRQS; j++) { | ||
| 458 | if (!irq_desc[j].action) | ||
| 459 | continue; | ||
| 460 | /* Is it a significant load ? */ | ||
| 461 | if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < | ||
| 462 | useful_load_threshold) | ||
| 463 | continue; | ||
| 464 | balance_irq(i, j); | ||
| 465 | } | ||
| 466 | } | ||
| 467 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | ||
| 468 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | ||
| 469 | return; | ||
| 470 | } | ||
| 471 | |||
| 472 | static void do_irq_balance(void) | ||
| 473 | { | ||
| 474 | int i, j; | ||
| 475 | unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); | ||
| 476 | unsigned long move_this_load = 0; | ||
| 477 | int max_loaded = 0, min_loaded = 0; | ||
| 478 | int load; | ||
| 479 | unsigned long useful_load_threshold = balanced_irq_interval + 10; | ||
| 480 | int selected_irq; | ||
| 481 | int tmp_loaded, first_attempt = 1; | ||
| 482 | unsigned long tmp_cpu_irq; | ||
| 483 | unsigned long imbalance = 0; | ||
| 484 | cpumask_t allowed_mask, target_cpu_mask, tmp; | ||
| 485 | |||
| 486 | for_each_possible_cpu(i) { | ||
| 487 | int package_index; | ||
| 488 | CPU_IRQ(i) = 0; | ||
| 489 | if (!cpu_online(i)) | ||
| 490 | continue; | ||
| 491 | package_index = CPU_TO_PACKAGEINDEX(i); | ||
| 492 | for (j = 0; j < NR_IRQS; j++) { | ||
| 493 | unsigned long value_now, delta; | ||
| 494 | /* Is this an active IRQ or balancing disabled ? */ | ||
| 495 | if (!irq_desc[j].action || irq_balancing_disabled(j)) | ||
| 496 | continue; | ||
| 497 | if (package_index == i) | ||
| 498 | IRQ_DELTA(package_index, j) = 0; | ||
| 499 | /* Determine the total count per processor per IRQ */ | ||
| 500 | value_now = (unsigned long) kstat_cpu(i).irqs[j]; | ||
| 501 | |||
| 502 | /* Determine the activity per processor per IRQ */ | ||
| 503 | delta = value_now - LAST_CPU_IRQ(i, j); | ||
| 504 | |||
| 505 | /* Update last_cpu_irq[][] for the next time */ | ||
| 506 | LAST_CPU_IRQ(i, j) = value_now; | ||
| 507 | |||
| 508 | /* Ignore IRQs whose rate is less than the clock */ | ||
| 509 | if (delta < useful_load_threshold) | ||
| 510 | continue; | ||
| 511 | /* update the load for the processor or package total */ | ||
| 512 | IRQ_DELTA(package_index, j) += delta; | ||
| 513 | |||
| 514 | /* Keep track of the higher numbered sibling as well */ | ||
| 515 | if (i != package_index) | ||
| 516 | CPU_IRQ(i) += delta; | ||
| 517 | /* | ||
| 518 | * We have sibling A and sibling B in the package | ||
| 519 | * | ||
| 520 | * cpu_irq[A] = load for cpu A + load for cpu B | ||
| 521 | * cpu_irq[B] = load for cpu B | ||
| 522 | */ | ||
| 523 | CPU_IRQ(package_index) += delta; | ||
| 524 | } | ||
| 525 | } | ||
| 526 | /* Find the least loaded processor package */ | ||
| 527 | for_each_online_cpu(i) { | ||
| 528 | if (i != CPU_TO_PACKAGEINDEX(i)) | ||
| 529 | continue; | ||
| 530 | if (min_cpu_irq > CPU_IRQ(i)) { | ||
| 531 | min_cpu_irq = CPU_IRQ(i); | ||
| 532 | min_loaded = i; | ||
| 533 | } | ||
| 534 | } | ||
| 535 | max_cpu_irq = ULONG_MAX; | ||
| 536 | |||
| 537 | tryanothercpu: | ||
| 538 | /* | ||
| 539 | * Look for heaviest loaded processor. | ||
| 540 | * We may come back to get the next heaviest loaded processor. | ||
| 541 | * Skip processors with trivial loads. | ||
| 542 | */ | ||
| 543 | tmp_cpu_irq = 0; | ||
| 544 | tmp_loaded = -1; | ||
| 545 | for_each_online_cpu(i) { | ||
| 546 | if (i != CPU_TO_PACKAGEINDEX(i)) | ||
| 547 | continue; | ||
| 548 | if (max_cpu_irq <= CPU_IRQ(i)) | ||
| 549 | continue; | ||
| 550 | if (tmp_cpu_irq < CPU_IRQ(i)) { | ||
| 551 | tmp_cpu_irq = CPU_IRQ(i); | ||
| 552 | tmp_loaded = i; | ||
| 553 | } | ||
| 554 | } | ||
| 555 | |||
| 556 | if (tmp_loaded == -1) { | ||
| 557 | /* | ||
| 558 | * In the case of small number of heavy interrupt sources, | ||
| 559 | * loading some of the cpus too much. We use Ingo's original | ||
| 560 | * approach to rotate them around. | ||
| 561 | */ | ||
| 562 | if (!first_attempt && imbalance >= useful_load_threshold) { | ||
| 563 | rotate_irqs_among_cpus(useful_load_threshold); | ||
| 564 | return; | ||
| 565 | } | ||
| 566 | goto not_worth_the_effort; | ||
| 567 | } | ||
| 568 | |||
| 569 | first_attempt = 0; /* heaviest search */ | ||
| 570 | max_cpu_irq = tmp_cpu_irq; /* load */ | ||
| 571 | max_loaded = tmp_loaded; /* processor */ | ||
| 572 | imbalance = (max_cpu_irq - min_cpu_irq) / 2; | ||
| 573 | |||
| 574 | /* | ||
| 575 | * if imbalance is less than approx 10% of max load, then | ||
| 576 | * observe diminishing returns action. - quit | ||
| 577 | */ | ||
| 578 | if (imbalance < (max_cpu_irq >> 3)) | ||
| 579 | goto not_worth_the_effort; | ||
| 580 | |||
| 581 | tryanotherirq: | ||
| 582 | /* if we select an IRQ to move that can't go where we want, then | ||
| 583 | * see if there is another one to try. | ||
| 584 | */ | ||
| 585 | move_this_load = 0; | ||
| 586 | selected_irq = -1; | ||
| 587 | for (j = 0; j < NR_IRQS; j++) { | ||
| 588 | /* Is this an active IRQ? */ | ||
| 589 | if (!irq_desc[j].action) | ||
| 590 | continue; | ||
| 591 | if (imbalance <= IRQ_DELTA(max_loaded, j)) | ||
| 592 | continue; | ||
| 593 | /* Try to find the IRQ that is closest to the imbalance | ||
| 594 | * without going over. | ||
| 595 | */ | ||
| 596 | if (move_this_load < IRQ_DELTA(max_loaded, j)) { | ||
| 597 | move_this_load = IRQ_DELTA(max_loaded, j); | ||
| 598 | selected_irq = j; | ||
| 599 | } | ||
| 600 | } | ||
| 601 | if (selected_irq == -1) | ||
| 602 | goto tryanothercpu; | ||
| 603 | |||
| 604 | imbalance = move_this_load; | ||
| 605 | |||
| 606 | /* For physical_balance case, we accumulated both load | ||
| 607 | * values in the one of the siblings cpu_irq[], | ||
| 608 | * to use the same code for physical and logical processors | ||
| 609 | * as much as possible. | ||
| 610 | * | ||
| 611 | * NOTE: the cpu_irq[] array holds the sum of the load for | ||
| 612 | * sibling A and sibling B in the slot for the lowest numbered | ||
| 613 | * sibling (A), _AND_ the load for sibling B in the slot for | ||
| 614 | * the higher numbered sibling. | ||
| 615 | * | ||
| 616 | * We seek the least loaded sibling by making the comparison | ||
| 617 | * (A+B)/2 vs B | ||
| 618 | */ | ||
| 619 | load = CPU_IRQ(min_loaded) >> 1; | ||
| 620 | for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { | ||
| 621 | if (load > CPU_IRQ(j)) { | ||
| 622 | /* This won't change cpu_sibling_map[min_loaded] */ | ||
| 623 | load = CPU_IRQ(j); | ||
| 624 | min_loaded = j; | ||
| 625 | } | ||
| 626 | } | ||
| 627 | |||
| 628 | cpus_and(allowed_mask, | ||
| 629 | cpu_online_map, | ||
| 630 | balance_irq_affinity[selected_irq]); | ||
| 631 | target_cpu_mask = cpumask_of_cpu(min_loaded); | ||
| 632 | cpus_and(tmp, target_cpu_mask, allowed_mask); | ||
| 633 | |||
| 634 | if (!cpus_empty(tmp)) { | ||
| 635 | /* mark for change destination */ | ||
| 636 | set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); | ||
| 637 | |||
| 638 | /* Since we made a change, come back sooner to | ||
| 639 | * check for more variation. | ||
| 640 | */ | ||
| 641 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | ||
| 642 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | ||
| 643 | return; | ||
| 644 | } | ||
| 645 | goto tryanotherirq; | ||
| 646 | |||
| 647 | not_worth_the_effort: | ||
| 648 | /* | ||
| 649 | * if we did not find an IRQ to move, then adjust the time interval | ||
| 650 | * upward | ||
| 651 | */ | ||
| 652 | balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, | ||
| 653 | balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); | ||
| 654 | return; | ||
| 655 | } | ||
| 656 | |||
| 657 | static int balanced_irq(void *unused) | ||
| 658 | { | ||
| 659 | int i; | ||
| 660 | unsigned long prev_balance_time = jiffies; | ||
| 661 | long time_remaining = balanced_irq_interval; | ||
| 662 | |||
| 663 | /* push everything to CPU 0 to give us a starting point. */ | ||
| 664 | for (i = 0 ; i < NR_IRQS ; i++) { | ||
| 665 | irq_desc[i].pending_mask = cpumask_of_cpu(0); | ||
| 666 | set_pending_irq(i, cpumask_of_cpu(0)); | ||
| 667 | } | ||
| 668 | |||
| 669 | set_freezable(); | ||
| 670 | for ( ; ; ) { | ||
| 671 | time_remaining = schedule_timeout_interruptible(time_remaining); | ||
| 672 | try_to_freeze(); | ||
| 673 | if (time_after(jiffies, | ||
| 674 | prev_balance_time+balanced_irq_interval)) { | ||
| 675 | preempt_disable(); | ||
| 676 | do_irq_balance(); | ||
| 677 | prev_balance_time = jiffies; | ||
| 678 | time_remaining = balanced_irq_interval; | ||
| 679 | preempt_enable(); | ||
| 680 | } | ||
| 681 | } | ||
| 682 | return 0; | ||
| 683 | } | ||
| 684 | |||
| 685 | static int __init balanced_irq_init(void) | ||
| 686 | { | ||
| 687 | int i; | ||
| 688 | struct cpuinfo_x86 *c; | ||
| 689 | cpumask_t tmp; | ||
| 690 | |||
| 691 | cpus_shift_right(tmp, cpu_online_map, 2); | ||
| 692 | c = &boot_cpu_data; | ||
| 693 | /* When not overwritten by the command line ask subarchitecture. */ | ||
| 694 | if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) | ||
| 695 | irqbalance_disabled = NO_BALANCE_IRQ; | ||
| 696 | if (irqbalance_disabled) | ||
| 697 | return 0; | ||
| 698 | |||
| 699 | /* disable irqbalance completely if there is only one processor online */ | ||
| 700 | if (num_online_cpus() < 2) { | ||
| 701 | irqbalance_disabled = 1; | ||
| 702 | return 0; | ||
| 703 | } | ||
| 704 | /* | ||
| 705 | * Enable physical balance only if more than 1 physical processor | ||
| 706 | * is present | ||
| 707 | */ | ||
| 708 | if (smp_num_siblings > 1 && !cpus_empty(tmp)) | ||
| 709 | physical_balance = 1; | ||
| 710 | |||
| 711 | for_each_online_cpu(i) { | ||
| 712 | irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | ||
| 713 | irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | ||
| 714 | if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { | ||
| 715 | printk(KERN_ERR "balanced_irq_init: out of memory"); | ||
| 716 | goto failed; | ||
| 717 | } | ||
| 718 | } | ||
| 719 | |||
| 720 | printk(KERN_INFO "Starting balanced_irq\n"); | ||
| 721 | if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) | ||
| 722 | return 0; | ||
| 723 | printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); | ||
| 724 | failed: | ||
| 725 | for_each_possible_cpu(i) { | ||
| 726 | kfree(irq_cpu_data[i].irq_delta); | ||
| 727 | irq_cpu_data[i].irq_delta = NULL; | ||
| 728 | kfree(irq_cpu_data[i].last_irq); | ||
| 729 | irq_cpu_data[i].last_irq = NULL; | ||
| 730 | } | ||
| 731 | return 0; | ||
| 732 | } | ||
| 733 | |||
| 734 | int __devinit irqbalance_disable(char *str) | ||
| 735 | { | ||
| 736 | irqbalance_disabled = 1; | ||
| 737 | return 1; | ||
| 738 | } | ||
| 739 | |||
| 740 | __setup("noirqbalance", irqbalance_disable); | ||
| 741 | |||
| 742 | late_initcall(balanced_irq_init); | ||
| 743 | #endif /* CONFIG_IRQBALANCE */ | ||
| 744 | #endif /* CONFIG_SMP */ | ||
| 745 | |||
| 746 | #ifndef CONFIG_SMP | ||
| 747 | void send_IPI_self(int vector) | ||
| 748 | { | ||
| 749 | unsigned int cfg; | ||
| 750 | |||
| 751 | /* | ||
| 752 | * Wait for idle. | ||
| 753 | */ | ||
| 754 | apic_wait_icr_idle(); | ||
| 755 | cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; | ||
| 756 | /* | ||
| 757 | * Send the IPI. The write to APIC_ICR fires this off. | ||
| 758 | */ | ||
| 759 | apic_write_around(APIC_ICR, cfg); | ||
| 760 | } | ||
| 761 | #endif /* !CONFIG_SMP */ | ||
| 762 | |||
| 763 | |||
| 764 | /* | ||
| 765 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to | ||
| 766 | * specific CPU-side IRQs. | ||
| 767 | */ | ||
| 768 | |||
| 769 | #define MAX_PIRQS 8 | ||
| 770 | static int pirq_entries [MAX_PIRQS]; | ||
| 771 | static int pirqs_enabled; | ||
| 772 | int skip_ioapic_setup; | ||
| 773 | |||
| 774 | static int __init ioapic_pirq_setup(char *str) | ||
| 775 | { | ||
| 776 | int i, max; | ||
| 777 | int ints[MAX_PIRQS+1]; | ||
| 778 | |||
| 779 | get_options(str, ARRAY_SIZE(ints), ints); | ||
| 780 | |||
| 781 | for (i = 0; i < MAX_PIRQS; i++) | ||
| 782 | pirq_entries[i] = -1; | ||
| 783 | |||
| 784 | pirqs_enabled = 1; | ||
| 785 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 786 | "PIRQ redirection, working around broken MP-BIOS.\n"); | ||
| 787 | max = MAX_PIRQS; | ||
| 788 | if (ints[0] < MAX_PIRQS) | ||
| 789 | max = ints[0]; | ||
| 790 | |||
| 791 | for (i = 0; i < max; i++) { | ||
| 792 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
| 793 | "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); | ||
| 794 | /* | ||
| 795 | * PIRQs are mapped upside down, usually. | ||
| 796 | */ | ||
| 797 | pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; | ||
| 798 | } | ||
| 799 | return 1; | ||
| 800 | } | ||
| 801 | |||
| 802 | __setup("pirq=", ioapic_pirq_setup); | ||
| 803 | |||
| 804 | /* | ||
| 805 | * Find the IRQ entry number of a certain pin. | ||
| 806 | */ | ||
| 807 | static int find_irq_entry(int apic, int pin, int type) | ||
| 808 | { | ||
| 809 | int i; | ||
| 810 | |||
| 811 | for (i = 0; i < mp_irq_entries; i++) | ||
| 812 | if (mp_irqs[i].mp_irqtype == type && | ||
| 813 | (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || | ||
| 814 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) && | ||
| 815 | mp_irqs[i].mp_dstirq == pin) | ||
| 816 | return i; | ||
| 817 | |||
| 818 | return -1; | ||
| 819 | } | ||
| 820 | |||
| 821 | /* | ||
| 822 | * Find the pin to which IRQ[irq] (ISA) is connected | ||
| 823 | */ | ||
| 824 | static int __init find_isa_irq_pin(int irq, int type) | ||
| 825 | { | ||
| 826 | int i; | ||
| 827 | |||
| 828 | for (i = 0; i < mp_irq_entries; i++) { | ||
| 829 | int lbus = mp_irqs[i].mp_srcbus; | ||
| 830 | |||
| 831 | if (test_bit(lbus, mp_bus_not_pci) && | ||
| 832 | (mp_irqs[i].mp_irqtype == type) && | ||
| 833 | (mp_irqs[i].mp_srcbusirq == irq)) | ||
| 834 | |||
| 835 | return mp_irqs[i].mp_dstirq; | ||
| 836 | } | ||
| 837 | return -1; | ||
| 838 | } | ||
| 839 | |||
| 840 | static int __init find_isa_irq_apic(int irq, int type) | ||
| 841 | { | ||
| 842 | int i; | ||
| 843 | |||
| 844 | for (i = 0; i < mp_irq_entries; i++) { | ||
| 845 | int lbus = mp_irqs[i].mp_srcbus; | ||
| 846 | |||
| 847 | if (test_bit(lbus, mp_bus_not_pci) && | ||
| 848 | (mp_irqs[i].mp_irqtype == type) && | ||
| 849 | (mp_irqs[i].mp_srcbusirq == irq)) | ||
| 850 | break; | ||
| 851 | } | ||
| 852 | if (i < mp_irq_entries) { | ||
| 853 | int apic; | ||
| 854 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 855 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) | ||
| 856 | return apic; | ||
| 857 | } | ||
| 858 | } | ||
| 859 | |||
| 860 | return -1; | ||
| 861 | } | ||
| 862 | |||
| 863 | /* | ||
| 864 | * Find a specific PCI IRQ entry. | ||
| 865 | * Not an __init, possibly needed by modules | ||
| 866 | */ | ||
| 867 | static int pin_2_irq(int idx, int apic, int pin); | ||
| 868 | |||
| 869 | int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | ||
| 870 | { | ||
| 871 | int apic, i, best_guess = -1; | ||
| 872 | |||
| 873 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " | ||
| 874 | "slot:%d, pin:%d.\n", bus, slot, pin); | ||
| 875 | if (test_bit(bus, mp_bus_not_pci)) { | ||
| 876 | printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | ||
| 877 | return -1; | ||
| 878 | } | ||
| 879 | for (i = 0; i < mp_irq_entries; i++) { | ||
| 880 | int lbus = mp_irqs[i].mp_srcbus; | ||
| 881 | |||
| 882 | for (apic = 0; apic < nr_ioapics; apic++) | ||
| 883 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || | ||
| 884 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) | ||
| 885 | break; | ||
| 886 | |||
| 887 | if (!test_bit(lbus, mp_bus_not_pci) && | ||
| 888 | !mp_irqs[i].mp_irqtype && | ||
| 889 | (bus == lbus) && | ||
| 890 | (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { | ||
| 891 | int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); | ||
| 892 | |||
| 893 | if (!(apic || IO_APIC_IRQ(irq))) | ||
| 894 | continue; | ||
| 895 | |||
| 896 | if (pin == (mp_irqs[i].mp_srcbusirq & 3)) | ||
| 897 | return irq; | ||
| 898 | /* | ||
| 899 | * Use the first all-but-pin matching entry as a | ||
| 900 | * best-guess fuzzy result for broken mptables. | ||
| 901 | */ | ||
| 902 | if (best_guess < 0) | ||
| 903 | best_guess = irq; | ||
| 904 | } | ||
| 905 | } | ||
| 906 | return best_guess; | ||
| 907 | } | ||
| 908 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | ||
| 909 | |||
| 910 | /* | ||
| 911 | * This function currently is only a helper for the i386 smp boot process where | ||
| 912 | * we need to reprogram the ioredtbls to cater for the cpus which have come online | ||
| 913 | * so mask in all cases should simply be TARGET_CPUS | ||
| 914 | */ | ||
| 915 | #ifdef CONFIG_SMP | ||
| 916 | void __init setup_ioapic_dest(void) | ||
| 917 | { | ||
| 918 | int pin, ioapic, irq, irq_entry; | ||
| 919 | |||
| 920 | if (skip_ioapic_setup == 1) | ||
| 921 | return; | ||
| 922 | |||
| 923 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { | ||
| 924 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { | ||
| 925 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | ||
| 926 | if (irq_entry == -1) | ||
| 927 | continue; | ||
| 928 | irq = pin_2_irq(irq_entry, ioapic, pin); | ||
| 929 | set_ioapic_affinity_irq(irq, TARGET_CPUS); | ||
| 930 | } | ||
| 931 | |||
| 932 | } | ||
| 933 | } | ||
| 934 | #endif | ||
| 935 | |||
| 936 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | ||
| 937 | /* | ||
| 938 | * EISA Edge/Level control register, ELCR | ||
| 939 | */ | ||
| 940 | static int EISA_ELCR(unsigned int irq) | ||
| 941 | { | ||
| 942 | if (irq < 16) { | ||
| 943 | unsigned int port = 0x4d0 + (irq >> 3); | ||
| 944 | return (inb(port) >> (irq & 7)) & 1; | ||
| 945 | } | ||
| 946 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 947 | "Broken MPtable reports ISA irq %d\n", irq); | ||
| 948 | return 0; | ||
| 949 | } | ||
| 950 | #endif | ||
| 951 | |||
| 952 | /* ISA interrupts are always polarity zero edge triggered, | ||
| 953 | * when listed as conforming in the MP table. */ | ||
| 954 | |||
| 955 | #define default_ISA_trigger(idx) (0) | ||
| 956 | #define default_ISA_polarity(idx) (0) | ||
| 957 | |||
| 958 | /* EISA interrupts are always polarity zero and can be edge or level | ||
| 959 | * trigger depending on the ELCR value. If an interrupt is listed as | ||
| 960 | * EISA conforming in the MP table, that means its trigger type must | ||
| 961 | * be read in from the ELCR */ | ||
| 962 | |||
| 963 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) | ||
| 964 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) | ||
| 965 | |||
| 966 | /* PCI interrupts are always polarity one level triggered, | ||
| 967 | * when listed as conforming in the MP table. */ | ||
| 968 | |||
| 969 | #define default_PCI_trigger(idx) (1) | ||
| 970 | #define default_PCI_polarity(idx) (1) | ||
| 971 | |||
| 972 | /* MCA interrupts are always polarity zero level triggered, | ||
| 973 | * when listed as conforming in the MP table. */ | ||
| 974 | |||
| 975 | #define default_MCA_trigger(idx) (1) | ||
| 976 | #define default_MCA_polarity(idx) default_ISA_polarity(idx) | ||
| 977 | |||
| 978 | static int MPBIOS_polarity(int idx) | ||
| 979 | { | ||
| 980 | int bus = mp_irqs[idx].mp_srcbus; | ||
| 981 | int polarity; | ||
| 982 | |||
| 983 | /* | ||
| 984 | * Determine IRQ line polarity (high active or low active): | ||
| 985 | */ | ||
| 986 | switch (mp_irqs[idx].mp_irqflag & 3) { | ||
| 987 | case 0: /* conforms, ie. bus-type dependent polarity */ | ||
| 988 | { | ||
| 989 | polarity = test_bit(bus, mp_bus_not_pci)? | ||
| 990 | default_ISA_polarity(idx): | ||
| 991 | default_PCI_polarity(idx); | ||
| 992 | break; | ||
| 993 | } | ||
| 994 | case 1: /* high active */ | ||
| 995 | { | ||
| 996 | polarity = 0; | ||
| 997 | break; | ||
| 998 | } | ||
| 999 | case 2: /* reserved */ | ||
| 1000 | { | ||
| 1001 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
| 1002 | polarity = 1; | ||
| 1003 | break; | ||
| 1004 | } | ||
| 1005 | case 3: /* low active */ | ||
| 1006 | { | ||
| 1007 | polarity = 1; | ||
| 1008 | break; | ||
| 1009 | } | ||
| 1010 | default: /* invalid */ | ||
| 1011 | { | ||
| 1012 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
| 1013 | polarity = 1; | ||
| 1014 | break; | ||
| 1015 | } | ||
| 1016 | } | ||
| 1017 | return polarity; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | static int MPBIOS_trigger(int idx) | ||
| 1021 | { | ||
| 1022 | int bus = mp_irqs[idx].mp_srcbus; | ||
| 1023 | int trigger; | ||
| 1024 | |||
| 1025 | /* | ||
| 1026 | * Determine IRQ trigger mode (edge or level sensitive): | ||
| 1027 | */ | ||
| 1028 | switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { | ||
| 1029 | case 0: /* conforms, ie. bus-type dependent */ | ||
| 1030 | { | ||
| 1031 | trigger = test_bit(bus, mp_bus_not_pci)? | ||
| 1032 | default_ISA_trigger(idx): | ||
| 1033 | default_PCI_trigger(idx); | ||
| 1034 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | ||
| 1035 | switch (mp_bus_id_to_type[bus]) { | ||
| 1036 | case MP_BUS_ISA: /* ISA pin */ | ||
| 1037 | { | ||
| 1038 | /* set before the switch */ | ||
| 1039 | break; | ||
| 1040 | } | ||
| 1041 | case MP_BUS_EISA: /* EISA pin */ | ||
| 1042 | { | ||
| 1043 | trigger = default_EISA_trigger(idx); | ||
| 1044 | break; | ||
| 1045 | } | ||
| 1046 | case MP_BUS_PCI: /* PCI pin */ | ||
| 1047 | { | ||
| 1048 | /* set before the switch */ | ||
| 1049 | break; | ||
| 1050 | } | ||
| 1051 | case MP_BUS_MCA: /* MCA pin */ | ||
| 1052 | { | ||
| 1053 | trigger = default_MCA_trigger(idx); | ||
| 1054 | break; | ||
| 1055 | } | ||
| 1056 | default: | ||
| 1057 | { | ||
| 1058 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
| 1059 | trigger = 1; | ||
| 1060 | break; | ||
| 1061 | } | ||
| 1062 | } | ||
| 1063 | #endif | ||
| 1064 | break; | ||
| 1065 | } | ||
| 1066 | case 1: /* edge */ | ||
| 1067 | { | ||
| 1068 | trigger = 0; | ||
| 1069 | break; | ||
| 1070 | } | ||
| 1071 | case 2: /* reserved */ | ||
| 1072 | { | ||
| 1073 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
| 1074 | trigger = 1; | ||
| 1075 | break; | ||
| 1076 | } | ||
| 1077 | case 3: /* level */ | ||
| 1078 | { | ||
| 1079 | trigger = 1; | ||
| 1080 | break; | ||
| 1081 | } | ||
| 1082 | default: /* invalid */ | ||
| 1083 | { | ||
| 1084 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
| 1085 | trigger = 0; | ||
| 1086 | break; | ||
| 1087 | } | ||
| 1088 | } | ||
| 1089 | return trigger; | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | static inline int irq_polarity(int idx) | ||
| 1093 | { | ||
| 1094 | return MPBIOS_polarity(idx); | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | static inline int irq_trigger(int idx) | ||
| 1098 | { | ||
| 1099 | return MPBIOS_trigger(idx); | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | static int pin_2_irq(int idx, int apic, int pin) | ||
| 1103 | { | ||
| 1104 | int irq, i; | ||
| 1105 | int bus = mp_irqs[idx].mp_srcbus; | ||
| 1106 | |||
| 1107 | /* | ||
| 1108 | * Debugging check, we are in big trouble if this message pops up! | ||
| 1109 | */ | ||
| 1110 | if (mp_irqs[idx].mp_dstirq != pin) | ||
| 1111 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | ||
| 1112 | |||
| 1113 | if (test_bit(bus, mp_bus_not_pci)) | ||
| 1114 | irq = mp_irqs[idx].mp_srcbusirq; | ||
| 1115 | else { | ||
| 1116 | /* | ||
| 1117 | * PCI IRQs are mapped in order | ||
| 1118 | */ | ||
| 1119 | i = irq = 0; | ||
| 1120 | while (i < apic) | ||
| 1121 | irq += nr_ioapic_registers[i++]; | ||
| 1122 | irq += pin; | ||
| 1123 | |||
| 1124 | /* | ||
| 1125 | * For MPS mode, so far only needed by ES7000 platform | ||
| 1126 | */ | ||
| 1127 | if (ioapic_renumber_irq) | ||
| 1128 | irq = ioapic_renumber_irq(apic, irq); | ||
| 1129 | } | ||
| 1130 | |||
| 1131 | /* | ||
| 1132 | * PCI IRQ command line redirection. Yes, limits are hardcoded. | ||
| 1133 | */ | ||
| 1134 | if ((pin >= 16) && (pin <= 23)) { | ||
| 1135 | if (pirq_entries[pin-16] != -1) { | ||
| 1136 | if (!pirq_entries[pin-16]) { | ||
| 1137 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
| 1138 | "disabling PIRQ%d\n", pin-16); | ||
| 1139 | } else { | ||
| 1140 | irq = pirq_entries[pin-16]; | ||
| 1141 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
| 1142 | "using PIRQ%d -> IRQ %d\n", | ||
| 1143 | pin-16, irq); | ||
| 1144 | } | ||
| 1145 | } | ||
| 1146 | } | ||
| 1147 | return irq; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | static inline int IO_APIC_irq_trigger(int irq) | ||
| 1151 | { | ||
| 1152 | int apic, idx, pin; | ||
| 1153 | |||
| 1154 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1155 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
| 1156 | idx = find_irq_entry(apic, pin, mp_INT); | ||
| 1157 | if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) | ||
| 1158 | return irq_trigger(idx); | ||
| 1159 | } | ||
| 1160 | } | ||
| 1161 | /* | ||
| 1162 | * nonexistent IRQs are edge default | ||
| 1163 | */ | ||
| 1164 | return 0; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ | ||
| 1168 | static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; | ||
| 1169 | |||
| 1170 | static int __assign_irq_vector(int irq) | ||
| 1171 | { | ||
| 1172 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset; | ||
| 1173 | int vector, offset; | ||
| 1174 | |||
| 1175 | BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); | ||
| 1176 | |||
| 1177 | if (irq_vector[irq] > 0) | ||
| 1178 | return irq_vector[irq]; | ||
| 1179 | |||
| 1180 | vector = current_vector; | ||
| 1181 | offset = current_offset; | ||
| 1182 | next: | ||
| 1183 | vector += 8; | ||
| 1184 | if (vector >= first_system_vector) { | ||
| 1185 | offset = (offset + 1) % 8; | ||
| 1186 | vector = FIRST_DEVICE_VECTOR + offset; | ||
| 1187 | } | ||
| 1188 | if (vector == current_vector) | ||
| 1189 | return -ENOSPC; | ||
| 1190 | if (test_and_set_bit(vector, used_vectors)) | ||
| 1191 | goto next; | ||
| 1192 | |||
| 1193 | current_vector = vector; | ||
| 1194 | current_offset = offset; | ||
| 1195 | irq_vector[irq] = vector; | ||
| 1196 | |||
| 1197 | return vector; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | static int assign_irq_vector(int irq) | ||
| 1201 | { | ||
| 1202 | unsigned long flags; | ||
| 1203 | int vector; | ||
| 1204 | |||
| 1205 | spin_lock_irqsave(&vector_lock, flags); | ||
| 1206 | vector = __assign_irq_vector(irq); | ||
| 1207 | spin_unlock_irqrestore(&vector_lock, flags); | ||
| 1208 | |||
| 1209 | return vector; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | void setup_vector_irq(int cpu) | ||
| 1213 | { | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | static struct irq_chip ioapic_chip; | ||
| 1217 | |||
| 1218 | #define IOAPIC_AUTO -1 | ||
| 1219 | #define IOAPIC_EDGE 0 | ||
| 1220 | #define IOAPIC_LEVEL 1 | ||
| 1221 | |||
| 1222 | static void ioapic_register_intr(int irq, int vector, unsigned long trigger) | ||
| 1223 | { | ||
| 1224 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
| 1225 | trigger == IOAPIC_LEVEL) { | ||
| 1226 | irq_desc[irq].status |= IRQ_LEVEL; | ||
| 1227 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | ||
| 1228 | handle_fasteoi_irq, "fasteoi"); | ||
| 1229 | } else { | ||
| 1230 | irq_desc[irq].status &= ~IRQ_LEVEL; | ||
| 1231 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | ||
| 1232 | handle_edge_irq, "edge"); | ||
| 1233 | } | ||
| 1234 | set_intr_gate(vector, interrupt[irq]); | ||
| 1235 | } | ||
| 1236 | |||
| 1237 | static void __init setup_IO_APIC_irqs(void) | ||
| 1238 | { | ||
| 1239 | struct IO_APIC_route_entry entry; | ||
| 1240 | int apic, pin, idx, irq, first_notcon = 1, vector; | ||
| 1241 | |||
| 1242 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | ||
| 1243 | |||
| 1244 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1245 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
| 1246 | |||
| 1247 | /* | ||
| 1248 | * add it to the IO-APIC irq-routing table: | ||
| 1249 | */ | ||
| 1250 | memset(&entry, 0, sizeof(entry)); | ||
| 1251 | |||
| 1252 | entry.delivery_mode = INT_DELIVERY_MODE; | ||
| 1253 | entry.dest_mode = INT_DEST_MODE; | ||
| 1254 | entry.mask = 0; /* enable IRQ */ | ||
| 1255 | entry.dest.logical.logical_dest = | ||
| 1256 | cpu_mask_to_apicid(TARGET_CPUS); | ||
| 1257 | |||
| 1258 | idx = find_irq_entry(apic, pin, mp_INT); | ||
| 1259 | if (idx == -1) { | ||
| 1260 | if (first_notcon) { | ||
| 1261 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
| 1262 | " IO-APIC (apicid-pin) %d-%d", | ||
| 1263 | mp_ioapics[apic].mp_apicid, | ||
| 1264 | pin); | ||
| 1265 | first_notcon = 0; | ||
| 1266 | } else | ||
| 1267 | apic_printk(APIC_VERBOSE, ", %d-%d", | ||
| 1268 | mp_ioapics[apic].mp_apicid, pin); | ||
| 1269 | continue; | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | if (!first_notcon) { | ||
| 1273 | apic_printk(APIC_VERBOSE, " not connected.\n"); | ||
| 1274 | first_notcon = 1; | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | entry.trigger = irq_trigger(idx); | ||
| 1278 | entry.polarity = irq_polarity(idx); | ||
| 1279 | |||
| 1280 | if (irq_trigger(idx)) { | ||
| 1281 | entry.trigger = 1; | ||
| 1282 | entry.mask = 1; | ||
| 1283 | } | ||
| 1284 | |||
| 1285 | irq = pin_2_irq(idx, apic, pin); | ||
| 1286 | /* | ||
| 1287 | * skip adding the timer int on secondary nodes, which causes | ||
| 1288 | * a small but painful rift in the time-space continuum | ||
| 1289 | */ | ||
| 1290 | if (multi_timer_check(apic, irq)) | ||
| 1291 | continue; | ||
| 1292 | else | ||
| 1293 | add_pin_to_irq(irq, apic, pin); | ||
| 1294 | |||
| 1295 | if (!apic && !IO_APIC_IRQ(irq)) | ||
| 1296 | continue; | ||
| 1297 | |||
| 1298 | if (IO_APIC_IRQ(irq)) { | ||
| 1299 | vector = assign_irq_vector(irq); | ||
| 1300 | entry.vector = vector; | ||
| 1301 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); | ||
| 1302 | |||
| 1303 | if (!apic && (irq < 16)) | ||
| 1304 | disable_8259A_irq(irq); | ||
| 1305 | } | ||
| 1306 | ioapic_write_entry(apic, pin, entry); | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | if (!first_notcon) | ||
| 1311 | apic_printk(APIC_VERBOSE, " not connected.\n"); | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | /* | ||
| 1315 | * Set up the timer pin, possibly with the 8259A-master behind. | ||
| 1316 | */ | ||
| 1317 | static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, | ||
| 1318 | int vector) | ||
| 1319 | { | ||
| 1320 | struct IO_APIC_route_entry entry; | ||
| 1321 | |||
| 1322 | memset(&entry, 0, sizeof(entry)); | ||
| 1323 | |||
| 1324 | /* | ||
| 1325 | * We use logical delivery to get the timer IRQ | ||
| 1326 | * to the first CPU. | ||
| 1327 | */ | ||
| 1328 | entry.dest_mode = INT_DEST_MODE; | ||
| 1329 | entry.mask = 1; /* mask IRQ now */ | ||
| 1330 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
| 1331 | entry.delivery_mode = INT_DELIVERY_MODE; | ||
| 1332 | entry.polarity = 0; | ||
| 1333 | entry.trigger = 0; | ||
| 1334 | entry.vector = vector; | ||
| 1335 | |||
| 1336 | /* | ||
| 1337 | * The timer IRQ doesn't have to know that behind the | ||
| 1338 | * scene we may have a 8259A-master in AEOI mode ... | ||
| 1339 | */ | ||
| 1340 | ioapic_register_intr(0, vector, IOAPIC_EDGE); | ||
| 1341 | |||
| 1342 | /* | ||
| 1343 | * Add it to the IO-APIC irq-routing table: | ||
| 1344 | */ | ||
| 1345 | ioapic_write_entry(apic, pin, entry); | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | void __init print_IO_APIC(void) | ||
| 1349 | { | ||
| 1350 | int apic, i; | ||
| 1351 | union IO_APIC_reg_00 reg_00; | ||
| 1352 | union IO_APIC_reg_01 reg_01; | ||
| 1353 | union IO_APIC_reg_02 reg_02; | ||
| 1354 | union IO_APIC_reg_03 reg_03; | ||
| 1355 | unsigned long flags; | ||
| 1356 | |||
| 1357 | if (apic_verbosity == APIC_QUIET) | ||
| 1358 | return; | ||
| 1359 | |||
| 1360 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | ||
| 1361 | for (i = 0; i < nr_ioapics; i++) | ||
| 1362 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | ||
| 1363 | mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); | ||
| 1364 | |||
| 1365 | /* | ||
| 1366 | * We are a bit conservative about what we expect. We have to | ||
| 1367 | * know about every hardware change ASAP. | ||
| 1368 | */ | ||
| 1369 | printk(KERN_INFO "testing the IO APIC.......................\n"); | ||
| 1370 | |||
| 1371 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1372 | |||
| 1373 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1374 | reg_00.raw = io_apic_read(apic, 0); | ||
| 1375 | reg_01.raw = io_apic_read(apic, 1); | ||
| 1376 | if (reg_01.bits.version >= 0x10) | ||
| 1377 | reg_02.raw = io_apic_read(apic, 2); | ||
| 1378 | if (reg_01.bits.version >= 0x20) | ||
| 1379 | reg_03.raw = io_apic_read(apic, 3); | ||
| 1380 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1381 | |||
| 1382 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); | ||
| 1383 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | ||
| 1384 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | ||
| 1385 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | ||
| 1386 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); | ||
| 1387 | |||
| 1388 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); | ||
| 1389 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | ||
| 1390 | |||
| 1391 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | ||
| 1392 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | ||
| 1393 | |||
| 1394 | /* | ||
| 1395 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, | ||
| 1396 | * but the value of reg_02 is read as the previous read register | ||
| 1397 | * value, so ignore it if reg_02 == reg_01. | ||
| 1398 | */ | ||
| 1399 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { | ||
| 1400 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | ||
| 1401 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | /* | ||
| 1405 | * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 | ||
| 1406 | * or reg_03, but the value of reg_0[23] is read as the previous read | ||
| 1407 | * register value, so ignore it if reg_03 == reg_0[12]. | ||
| 1408 | */ | ||
| 1409 | if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && | ||
| 1410 | reg_03.raw != reg_01.raw) { | ||
| 1411 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); | ||
| 1412 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | ||
| 1416 | |||
| 1417 | printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" | ||
| 1418 | " Stat Dest Deli Vect: \n"); | ||
| 1419 | |||
| 1420 | for (i = 0; i <= reg_01.bits.entries; i++) { | ||
| 1421 | struct IO_APIC_route_entry entry; | ||
| 1422 | |||
| 1423 | entry = ioapic_read_entry(apic, i); | ||
| 1424 | |||
| 1425 | printk(KERN_DEBUG " %02x %03X %02X ", | ||
| 1426 | i, | ||
| 1427 | entry.dest.logical.logical_dest, | ||
| 1428 | entry.dest.physical.physical_dest | ||
| 1429 | ); | ||
| 1430 | |||
| 1431 | printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", | ||
| 1432 | entry.mask, | ||
| 1433 | entry.trigger, | ||
| 1434 | entry.irr, | ||
| 1435 | entry.polarity, | ||
| 1436 | entry.delivery_status, | ||
| 1437 | entry.dest_mode, | ||
| 1438 | entry.delivery_mode, | ||
| 1439 | entry.vector | ||
| 1440 | ); | ||
| 1441 | } | ||
| 1442 | } | ||
| 1443 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | ||
| 1444 | for (i = 0; i < NR_IRQS; i++) { | ||
| 1445 | struct irq_pin_list *entry = irq_2_pin + i; | ||
| 1446 | if (entry->pin < 0) | ||
| 1447 | continue; | ||
| 1448 | printk(KERN_DEBUG "IRQ%d ", i); | ||
| 1449 | for (;;) { | ||
| 1450 | printk("-> %d:%d", entry->apic, entry->pin); | ||
| 1451 | if (!entry->next) | ||
| 1452 | break; | ||
| 1453 | entry = irq_2_pin + entry->next; | ||
| 1454 | } | ||
| 1455 | printk("\n"); | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | printk(KERN_INFO ".................................... done.\n"); | ||
| 1459 | |||
| 1460 | return; | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | #if 0 | ||
| 1464 | |||
| 1465 | static void print_APIC_bitfield(int base) | ||
| 1466 | { | ||
| 1467 | unsigned int v; | ||
| 1468 | int i, j; | ||
| 1469 | |||
| 1470 | if (apic_verbosity == APIC_QUIET) | ||
| 1471 | return; | ||
| 1472 | |||
| 1473 | printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); | ||
| 1474 | for (i = 0; i < 8; i++) { | ||
| 1475 | v = apic_read(base + i*0x10); | ||
| 1476 | for (j = 0; j < 32; j++) { | ||
| 1477 | if (v & (1<<j)) | ||
| 1478 | printk("1"); | ||
| 1479 | else | ||
| 1480 | printk("0"); | ||
| 1481 | } | ||
| 1482 | printk("\n"); | ||
| 1483 | } | ||
| 1484 | } | ||
| 1485 | |||
| 1486 | void /*__init*/ print_local_APIC(void *dummy) | ||
| 1487 | { | ||
| 1488 | unsigned int v, ver, maxlvt; | ||
| 1489 | |||
| 1490 | if (apic_verbosity == APIC_QUIET) | ||
| 1491 | return; | ||
| 1492 | |||
| 1493 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | ||
| 1494 | smp_processor_id(), hard_smp_processor_id()); | ||
| 1495 | v = apic_read(APIC_ID); | ||
| 1496 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, | ||
| 1497 | GET_APIC_ID(read_apic_id())); | ||
| 1498 | v = apic_read(APIC_LVR); | ||
| 1499 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | ||
| 1500 | ver = GET_APIC_VERSION(v); | ||
| 1501 | maxlvt = lapic_get_maxlvt(); | ||
| 1502 | |||
| 1503 | v = apic_read(APIC_TASKPRI); | ||
| 1504 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); | ||
| 1505 | |||
| 1506 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ | ||
| 1507 | v = apic_read(APIC_ARBPRI); | ||
| 1508 | printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, | ||
| 1509 | v & APIC_ARBPRI_MASK); | ||
| 1510 | v = apic_read(APIC_PROCPRI); | ||
| 1511 | printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); | ||
| 1512 | } | ||
| 1513 | |||
| 1514 | v = apic_read(APIC_EOI); | ||
| 1515 | printk(KERN_DEBUG "... APIC EOI: %08x\n", v); | ||
| 1516 | v = apic_read(APIC_RRR); | ||
| 1517 | printk(KERN_DEBUG "... APIC RRR: %08x\n", v); | ||
| 1518 | v = apic_read(APIC_LDR); | ||
| 1519 | printk(KERN_DEBUG "... APIC LDR: %08x\n", v); | ||
| 1520 | v = apic_read(APIC_DFR); | ||
| 1521 | printk(KERN_DEBUG "... APIC DFR: %08x\n", v); | ||
| 1522 | v = apic_read(APIC_SPIV); | ||
| 1523 | printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); | ||
| 1524 | |||
| 1525 | printk(KERN_DEBUG "... APIC ISR field:\n"); | ||
| 1526 | print_APIC_bitfield(APIC_ISR); | ||
| 1527 | printk(KERN_DEBUG "... APIC TMR field:\n"); | ||
| 1528 | print_APIC_bitfield(APIC_TMR); | ||
| 1529 | printk(KERN_DEBUG "... APIC IRR field:\n"); | ||
| 1530 | print_APIC_bitfield(APIC_IRR); | ||
| 1531 | |||
| 1532 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ | ||
| 1533 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | ||
| 1534 | apic_write(APIC_ESR, 0); | ||
| 1535 | v = apic_read(APIC_ESR); | ||
| 1536 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); | ||
| 1537 | } | ||
| 1538 | |||
| 1539 | v = apic_read(APIC_ICR); | ||
| 1540 | printk(KERN_DEBUG "... APIC ICR: %08x\n", v); | ||
| 1541 | v = apic_read(APIC_ICR2); | ||
| 1542 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); | ||
| 1543 | |||
| 1544 | v = apic_read(APIC_LVTT); | ||
| 1545 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); | ||
| 1546 | |||
| 1547 | if (maxlvt > 3) { /* PC is LVT#4. */ | ||
| 1548 | v = apic_read(APIC_LVTPC); | ||
| 1549 | printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); | ||
| 1550 | } | ||
| 1551 | v = apic_read(APIC_LVT0); | ||
| 1552 | printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); | ||
| 1553 | v = apic_read(APIC_LVT1); | ||
| 1554 | printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); | ||
| 1555 | |||
| 1556 | if (maxlvt > 2) { /* ERR is LVT#3. */ | ||
| 1557 | v = apic_read(APIC_LVTERR); | ||
| 1558 | printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); | ||
| 1559 | } | ||
| 1560 | |||
| 1561 | v = apic_read(APIC_TMICT); | ||
| 1562 | printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); | ||
| 1563 | v = apic_read(APIC_TMCCT); | ||
| 1564 | printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); | ||
| 1565 | v = apic_read(APIC_TDCR); | ||
| 1566 | printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); | ||
| 1567 | printk("\n"); | ||
| 1568 | } | ||
| 1569 | |||
| 1570 | void print_all_local_APICs(void) | ||
| 1571 | { | ||
| 1572 | on_each_cpu(print_local_APIC, NULL, 1); | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | void /*__init*/ print_PIC(void) | ||
| 1576 | { | ||
| 1577 | unsigned int v; | ||
| 1578 | unsigned long flags; | ||
| 1579 | |||
| 1580 | if (apic_verbosity == APIC_QUIET) | ||
| 1581 | return; | ||
| 1582 | |||
| 1583 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | ||
| 1584 | |||
| 1585 | spin_lock_irqsave(&i8259A_lock, flags); | ||
| 1586 | |||
| 1587 | v = inb(0xa1) << 8 | inb(0x21); | ||
| 1588 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); | ||
| 1589 | |||
| 1590 | v = inb(0xa0) << 8 | inb(0x20); | ||
| 1591 | printk(KERN_DEBUG "... PIC IRR: %04x\n", v); | ||
| 1592 | |||
| 1593 | outb(0x0b, 0xa0); | ||
| 1594 | outb(0x0b, 0x20); | ||
| 1595 | v = inb(0xa0) << 8 | inb(0x20); | ||
| 1596 | outb(0x0a, 0xa0); | ||
| 1597 | outb(0x0a, 0x20); | ||
| 1598 | |||
| 1599 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
| 1600 | |||
| 1601 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); | ||
| 1602 | |||
| 1603 | v = inb(0x4d1) << 8 | inb(0x4d0); | ||
| 1604 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | ||
| 1605 | } | ||
| 1606 | |||
| 1607 | #endif /* 0 */ | ||
| 1608 | |||
| 1609 | static void __init enable_IO_APIC(void) | ||
| 1610 | { | ||
| 1611 | union IO_APIC_reg_01 reg_01; | ||
| 1612 | int i8259_apic, i8259_pin; | ||
| 1613 | int i, apic; | ||
| 1614 | unsigned long flags; | ||
| 1615 | |||
| 1616 | for (i = 0; i < PIN_MAP_SIZE; i++) { | ||
| 1617 | irq_2_pin[i].pin = -1; | ||
| 1618 | irq_2_pin[i].next = 0; | ||
| 1619 | } | ||
| 1620 | if (!pirqs_enabled) | ||
| 1621 | for (i = 0; i < MAX_PIRQS; i++) | ||
| 1622 | pirq_entries[i] = -1; | ||
| 1623 | |||
| 1624 | /* | ||
| 1625 | * The number of IO-APIC IRQ registers (== #pins): | ||
| 1626 | */ | ||
| 1627 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1628 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1629 | reg_01.raw = io_apic_read(apic, 1); | ||
| 1630 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1631 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
| 1632 | } | ||
| 1633 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1634 | int pin; | ||
| 1635 | /* See if any of the pins is in ExtINT mode */ | ||
| 1636 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
| 1637 | struct IO_APIC_route_entry entry; | ||
| 1638 | entry = ioapic_read_entry(apic, pin); | ||
| 1639 | |||
| 1640 | |||
| 1641 | /* If the interrupt line is enabled and in ExtInt mode | ||
| 1642 | * I have found the pin where the i8259 is connected. | ||
| 1643 | */ | ||
| 1644 | if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { | ||
| 1645 | ioapic_i8259.apic = apic; | ||
| 1646 | ioapic_i8259.pin = pin; | ||
| 1647 | goto found_i8259; | ||
| 1648 | } | ||
| 1649 | } | ||
| 1650 | } | ||
| 1651 | found_i8259: | ||
| 1652 | /* Look to see what if the MP table has reported the ExtINT */ | ||
| 1653 | /* If we could not find the appropriate pin by looking at the ioapic | ||
| 1654 | * the i8259 probably is not connected the ioapic but give the | ||
| 1655 | * mptable a chance anyway. | ||
| 1656 | */ | ||
| 1657 | i8259_pin = find_isa_irq_pin(0, mp_ExtINT); | ||
| 1658 | i8259_apic = find_isa_irq_apic(0, mp_ExtINT); | ||
| 1659 | /* Trust the MP table if nothing is setup in the hardware */ | ||
| 1660 | if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { | ||
| 1661 | printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); | ||
| 1662 | ioapic_i8259.pin = i8259_pin; | ||
| 1663 | ioapic_i8259.apic = i8259_apic; | ||
| 1664 | } | ||
| 1665 | /* Complain if the MP table and the hardware disagree */ | ||
| 1666 | if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && | ||
| 1667 | (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) | ||
| 1668 | { | ||
| 1669 | printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); | ||
| 1670 | } | ||
| 1671 | |||
| 1672 | /* | ||
| 1673 | * Do not trust the IO-APIC being empty at bootup | ||
| 1674 | */ | ||
| 1675 | clear_IO_APIC(); | ||
| 1676 | } | ||
| 1677 | |||
| 1678 | /* | ||
| 1679 | * Not an __init, needed by the reboot code | ||
| 1680 | */ | ||
| 1681 | void disable_IO_APIC(void) | ||
| 1682 | { | ||
| 1683 | /* | ||
| 1684 | * Clear the IO-APIC before rebooting: | ||
| 1685 | */ | ||
| 1686 | clear_IO_APIC(); | ||
| 1687 | |||
| 1688 | /* | ||
| 1689 | * If the i8259 is routed through an IOAPIC | ||
| 1690 | * Put that IOAPIC in virtual wire mode | ||
| 1691 | * so legacy interrupts can be delivered. | ||
| 1692 | */ | ||
| 1693 | if (ioapic_i8259.pin != -1) { | ||
| 1694 | struct IO_APIC_route_entry entry; | ||
| 1695 | |||
| 1696 | memset(&entry, 0, sizeof(entry)); | ||
| 1697 | entry.mask = 0; /* Enabled */ | ||
| 1698 | entry.trigger = 0; /* Edge */ | ||
| 1699 | entry.irr = 0; | ||
| 1700 | entry.polarity = 0; /* High */ | ||
| 1701 | entry.delivery_status = 0; | ||
| 1702 | entry.dest_mode = 0; /* Physical */ | ||
| 1703 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ | ||
| 1704 | entry.vector = 0; | ||
| 1705 | entry.dest.physical.physical_dest = | ||
| 1706 | GET_APIC_ID(read_apic_id()); | ||
| 1707 | |||
| 1708 | /* | ||
| 1709 | * Add it to the IO-APIC irq-routing table: | ||
| 1710 | */ | ||
| 1711 | ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); | ||
| 1712 | } | ||
| 1713 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | /* | ||
| 1717 | * function to set the IO-APIC physical IDs based on the | ||
| 1718 | * values stored in the MPC table. | ||
| 1719 | * | ||
| 1720 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | ||
| 1721 | */ | ||
| 1722 | |||
| 1723 | static void __init setup_ioapic_ids_from_mpc(void) | ||
| 1724 | { | ||
| 1725 | union IO_APIC_reg_00 reg_00; | ||
| 1726 | physid_mask_t phys_id_present_map; | ||
| 1727 | int apic; | ||
| 1728 | int i; | ||
| 1729 | unsigned char old_id; | ||
| 1730 | unsigned long flags; | ||
| 1731 | |||
| 1732 | #ifdef CONFIG_X86_NUMAQ | ||
| 1733 | if (found_numaq) | ||
| 1734 | return; | ||
| 1735 | #endif | ||
| 1736 | |||
| 1737 | /* | ||
| 1738 | * Don't check I/O APIC IDs for xAPIC systems. They have | ||
| 1739 | * no meaning without the serial APIC bus. | ||
| 1740 | */ | ||
| 1741 | if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
| 1742 | || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
| 1743 | return; | ||
| 1744 | /* | ||
| 1745 | * This is broken; anything with a real cpu count has to | ||
| 1746 | * circumvent this idiocy regardless. | ||
| 1747 | */ | ||
| 1748 | phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); | ||
| 1749 | |||
| 1750 | /* | ||
| 1751 | * Set the IOAPIC ID to the value stored in the MPC table. | ||
| 1752 | */ | ||
| 1753 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1754 | |||
| 1755 | /* Read the register 0 value */ | ||
| 1756 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1757 | reg_00.raw = io_apic_read(apic, 0); | ||
| 1758 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1759 | |||
| 1760 | old_id = mp_ioapics[apic].mp_apicid; | ||
| 1761 | |||
| 1762 | if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { | ||
| 1763 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | ||
| 1764 | apic, mp_ioapics[apic].mp_apicid); | ||
| 1765 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | ||
| 1766 | reg_00.bits.ID); | ||
| 1767 | mp_ioapics[apic].mp_apicid = reg_00.bits.ID; | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | /* | ||
| 1771 | * Sanity check, is the ID really free? Every APIC in a | ||
| 1772 | * system must have a unique ID or we get lots of nice | ||
| 1773 | * 'stuck on smp_invalidate_needed IPI wait' messages. | ||
| 1774 | */ | ||
| 1775 | if (check_apicid_used(phys_id_present_map, | ||
| 1776 | mp_ioapics[apic].mp_apicid)) { | ||
| 1777 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | ||
| 1778 | apic, mp_ioapics[apic].mp_apicid); | ||
| 1779 | for (i = 0; i < get_physical_broadcast(); i++) | ||
| 1780 | if (!physid_isset(i, phys_id_present_map)) | ||
| 1781 | break; | ||
| 1782 | if (i >= get_physical_broadcast()) | ||
| 1783 | panic("Max APIC ID exceeded!\n"); | ||
| 1784 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | ||
| 1785 | i); | ||
| 1786 | physid_set(i, phys_id_present_map); | ||
| 1787 | mp_ioapics[apic].mp_apicid = i; | ||
| 1788 | } else { | ||
| 1789 | physid_mask_t tmp; | ||
| 1790 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); | ||
| 1791 | apic_printk(APIC_VERBOSE, "Setting %d in the " | ||
| 1792 | "phys_id_present_map\n", | ||
| 1793 | mp_ioapics[apic].mp_apicid); | ||
| 1794 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | ||
| 1795 | } | ||
| 1796 | |||
| 1797 | |||
| 1798 | /* | ||
| 1799 | * We need to adjust the IRQ routing table | ||
| 1800 | * if the ID changed. | ||
| 1801 | */ | ||
| 1802 | if (old_id != mp_ioapics[apic].mp_apicid) | ||
| 1803 | for (i = 0; i < mp_irq_entries; i++) | ||
| 1804 | if (mp_irqs[i].mp_dstapic == old_id) | ||
| 1805 | mp_irqs[i].mp_dstapic | ||
| 1806 | = mp_ioapics[apic].mp_apicid; | ||
| 1807 | |||
| 1808 | /* | ||
| 1809 | * Read the right value from the MPC table and | ||
| 1810 | * write it into the ID register. | ||
| 1811 | */ | ||
| 1812 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 1813 | "...changing IO-APIC physical APIC ID to %d ...", | ||
| 1814 | mp_ioapics[apic].mp_apicid); | ||
| 1815 | |||
| 1816 | reg_00.bits.ID = mp_ioapics[apic].mp_apicid; | ||
| 1817 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1818 | io_apic_write(apic, 0, reg_00.raw); | ||
| 1819 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1820 | |||
| 1821 | /* | ||
| 1822 | * Sanity check | ||
| 1823 | */ | ||
| 1824 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1825 | reg_00.raw = io_apic_read(apic, 0); | ||
| 1826 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1827 | if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) | ||
| 1828 | printk("could not set ID!\n"); | ||
| 1829 | else | ||
| 1830 | apic_printk(APIC_VERBOSE, " ok.\n"); | ||
| 1831 | } | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | int no_timer_check __initdata; | ||
| 1835 | |||
| 1836 | static int __init notimercheck(char *s) | ||
| 1837 | { | ||
| 1838 | no_timer_check = 1; | ||
| 1839 | return 1; | ||
| 1840 | } | ||
| 1841 | __setup("no_timer_check", notimercheck); | ||
| 1842 | |||
| 1843 | /* | ||
| 1844 | * There is a nasty bug in some older SMP boards, their mptable lies | ||
| 1845 | * about the timer IRQ. We do the following to work around the situation: | ||
| 1846 | * | ||
| 1847 | * - timer IRQ defaults to IO-APIC IRQ | ||
| 1848 | * - if this function detects that timer IRQs are defunct, then we fall | ||
| 1849 | * back to ISA timer IRQs | ||
| 1850 | */ | ||
| 1851 | static int __init timer_irq_works(void) | ||
| 1852 | { | ||
| 1853 | unsigned long t1 = jiffies; | ||
| 1854 | unsigned long flags; | ||
| 1855 | |||
| 1856 | if (no_timer_check) | ||
| 1857 | return 1; | ||
| 1858 | |||
| 1859 | local_save_flags(flags); | ||
| 1860 | local_irq_enable(); | ||
| 1861 | /* Let ten ticks pass... */ | ||
| 1862 | mdelay((10 * 1000) / HZ); | ||
| 1863 | local_irq_restore(flags); | ||
| 1864 | |||
| 1865 | /* | ||
| 1866 | * Expect a few ticks at least, to be sure some possible | ||
| 1867 | * glue logic does not lock up after one or two first | ||
| 1868 | * ticks in a non-ExtINT mode. Also the local APIC | ||
| 1869 | * might have cached one ExtINT interrupt. Finally, at | ||
| 1870 | * least one tick may be lost due to delays. | ||
| 1871 | */ | ||
| 1872 | if (time_after(jiffies, t1 + 4)) | ||
| 1873 | return 1; | ||
| 1874 | |||
| 1875 | return 0; | ||
| 1876 | } | ||
| 1877 | |||
| 1878 | /* | ||
| 1879 | * In the SMP+IOAPIC case it might happen that there are an unspecified | ||
| 1880 | * number of pending IRQ events unhandled. These cases are very rare, | ||
| 1881 | * so we 'resend' these IRQs via IPIs, to the same CPU. It's much | ||
| 1882 | * better to do it this way as thus we do not have to be aware of | ||
| 1883 | * 'pending' interrupts in the IRQ path, except at this point. | ||
| 1884 | */ | ||
| 1885 | /* | ||
| 1886 | * Edge triggered needs to resend any interrupt | ||
| 1887 | * that was delayed but this is now handled in the device | ||
| 1888 | * independent code. | ||
| 1889 | */ | ||
| 1890 | |||
| 1891 | /* | ||
| 1892 | * Startup quirk: | ||
| 1893 | * | ||
| 1894 | * Starting up a edge-triggered IO-APIC interrupt is | ||
| 1895 | * nasty - we need to make sure that we get the edge. | ||
| 1896 | * If it is already asserted for some reason, we need | ||
| 1897 | * return 1 to indicate that is was pending. | ||
| 1898 | * | ||
| 1899 | * This is not complete - we should be able to fake | ||
| 1900 | * an edge even if it isn't on the 8259A... | ||
| 1901 | * | ||
| 1902 | * (We do this for level-triggered IRQs too - it cannot hurt.) | ||
| 1903 | */ | ||
| 1904 | static unsigned int startup_ioapic_irq(unsigned int irq) | ||
| 1905 | { | ||
| 1906 | int was_pending = 0; | ||
| 1907 | unsigned long flags; | ||
| 1908 | |||
| 1909 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1910 | if (irq < 16) { | ||
| 1911 | disable_8259A_irq(irq); | ||
| 1912 | if (i8259A_irq_pending(irq)) | ||
| 1913 | was_pending = 1; | ||
| 1914 | } | ||
| 1915 | __unmask_IO_APIC_irq(irq); | ||
| 1916 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1917 | |||
| 1918 | return was_pending; | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | static void ack_ioapic_irq(unsigned int irq) | ||
| 1922 | { | ||
| 1923 | move_native_irq(irq); | ||
| 1924 | ack_APIC_irq(); | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | static void ack_ioapic_quirk_irq(unsigned int irq) | ||
| 1928 | { | ||
| 1929 | unsigned long v; | ||
| 1930 | int i; | ||
| 1931 | |||
| 1932 | move_native_irq(irq); | ||
| 1933 | /* | ||
| 1934 | * It appears there is an erratum which affects at least version 0x11 | ||
| 1935 | * of I/O APIC (that's the 82093AA and cores integrated into various | ||
| 1936 | * chipsets). Under certain conditions a level-triggered interrupt is | ||
| 1937 | * erroneously delivered as edge-triggered one but the respective IRR | ||
| 1938 | * bit gets set nevertheless. As a result the I/O unit expects an EOI | ||
| 1939 | * message but it will never arrive and further interrupts are blocked | ||
| 1940 | * from the source. The exact reason is so far unknown, but the | ||
| 1941 | * phenomenon was observed when two consecutive interrupt requests | ||
| 1942 | * from a given source get delivered to the same CPU and the source is | ||
| 1943 | * temporarily disabled in between. | ||
| 1944 | * | ||
| 1945 | * A workaround is to simulate an EOI message manually. We achieve it | ||
| 1946 | * by setting the trigger mode to edge and then to level when the edge | ||
| 1947 | * trigger mode gets detected in the TMR of a local APIC for a | ||
| 1948 | * level-triggered interrupt. We mask the source for the time of the | ||
| 1949 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | ||
| 1950 | * The idea is from Manfred Spraul. --macro | ||
| 1951 | */ | ||
| 1952 | i = irq_vector[irq]; | ||
| 1953 | |||
| 1954 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | ||
| 1955 | |||
| 1956 | ack_APIC_irq(); | ||
| 1957 | |||
| 1958 | if (!(v & (1 << (i & 0x1f)))) { | ||
| 1959 | atomic_inc(&irq_mis_count); | ||
| 1960 | spin_lock(&ioapic_lock); | ||
| 1961 | __mask_and_edge_IO_APIC_irq(irq); | ||
| 1962 | __unmask_and_level_IO_APIC_irq(irq); | ||
| 1963 | spin_unlock(&ioapic_lock); | ||
| 1964 | } | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | static int ioapic_retrigger_irq(unsigned int irq) | ||
| 1968 | { | ||
| 1969 | send_IPI_self(irq_vector[irq]); | ||
| 1970 | |||
| 1971 | return 1; | ||
| 1972 | } | ||
| 1973 | |||
| 1974 | static struct irq_chip ioapic_chip __read_mostly = { | ||
| 1975 | .name = "IO-APIC", | ||
| 1976 | .startup = startup_ioapic_irq, | ||
| 1977 | .mask = mask_IO_APIC_irq, | ||
| 1978 | .unmask = unmask_IO_APIC_irq, | ||
| 1979 | .ack = ack_ioapic_irq, | ||
| 1980 | .eoi = ack_ioapic_quirk_irq, | ||
| 1981 | #ifdef CONFIG_SMP | ||
| 1982 | .set_affinity = set_ioapic_affinity_irq, | ||
| 1983 | #endif | ||
| 1984 | .retrigger = ioapic_retrigger_irq, | ||
| 1985 | }; | ||
| 1986 | |||
| 1987 | |||
| 1988 | static inline void init_IO_APIC_traps(void) | ||
| 1989 | { | ||
| 1990 | int irq; | ||
| 1991 | |||
| 1992 | /* | ||
| 1993 | * NOTE! The local APIC isn't very good at handling | ||
| 1994 | * multiple interrupts at the same interrupt level. | ||
| 1995 | * As the interrupt level is determined by taking the | ||
| 1996 | * vector number and shifting that right by 4, we | ||
| 1997 | * want to spread these out a bit so that they don't | ||
| 1998 | * all fall in the same interrupt level. | ||
| 1999 | * | ||
| 2000 | * Also, we've got to be careful not to trash gate | ||
| 2001 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | ||
| 2002 | */ | ||
| 2003 | for (irq = 0; irq < NR_IRQS ; irq++) { | ||
| 2004 | if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { | ||
| 2005 | /* | ||
| 2006 | * Hmm.. We don't have an entry for this, | ||
| 2007 | * so default to an old-fashioned 8259 | ||
| 2008 | * interrupt if we can.. | ||
| 2009 | */ | ||
| 2010 | if (irq < 16) | ||
| 2011 | make_8259A_irq(irq); | ||
| 2012 | else | ||
| 2013 | /* Strange. Oh, well.. */ | ||
| 2014 | irq_desc[irq].chip = &no_irq_chip; | ||
| 2015 | } | ||
| 2016 | } | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | /* | ||
| 2020 | * The local APIC irq-chip implementation: | ||
| 2021 | */ | ||
| 2022 | |||
| 2023 | static void ack_lapic_irq(unsigned int irq) | ||
| 2024 | { | ||
| 2025 | ack_APIC_irq(); | ||
| 2026 | } | ||
| 2027 | |||
| 2028 | static void mask_lapic_irq(unsigned int irq) | ||
| 2029 | { | ||
| 2030 | unsigned long v; | ||
| 2031 | |||
| 2032 | v = apic_read(APIC_LVT0); | ||
| 2033 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | static void unmask_lapic_irq(unsigned int irq) | ||
| 2037 | { | ||
| 2038 | unsigned long v; | ||
| 2039 | |||
| 2040 | v = apic_read(APIC_LVT0); | ||
| 2041 | apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); | ||
| 2042 | } | ||
| 2043 | |||
| 2044 | static struct irq_chip lapic_chip __read_mostly = { | ||
| 2045 | .name = "local-APIC", | ||
| 2046 | .mask = mask_lapic_irq, | ||
| 2047 | .unmask = unmask_lapic_irq, | ||
| 2048 | .ack = ack_lapic_irq, | ||
| 2049 | }; | ||
| 2050 | |||
| 2051 | static void lapic_register_intr(int irq, int vector) | ||
| 2052 | { | ||
| 2053 | irq_desc[irq].status &= ~IRQ_LEVEL; | ||
| 2054 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | ||
| 2055 | "edge"); | ||
| 2056 | set_intr_gate(vector, interrupt[irq]); | ||
| 2057 | } | ||
| 2058 | |||
| 2059 | static void __init setup_nmi(void) | ||
| 2060 | { | ||
| 2061 | /* | ||
| 2062 | * Dirty trick to enable the NMI watchdog ... | ||
| 2063 | * We put the 8259A master into AEOI mode and | ||
| 2064 | * unmask on all local APICs LVT0 as NMI. | ||
| 2065 | * | ||
| 2066 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | ||
| 2067 | * is from Maciej W. Rozycki - so we do not have to EOI from | ||
| 2068 | * the NMI handler or the timer interrupt. | ||
| 2069 | */ | ||
| 2070 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | ||
| 2071 | |||
| 2072 | enable_NMI_through_LVT0(); | ||
| 2073 | |||
| 2074 | apic_printk(APIC_VERBOSE, " done.\n"); | ||
| 2075 | } | ||
| 2076 | |||
| 2077 | /* | ||
| 2078 | * This looks a bit hackish but it's about the only one way of sending | ||
| 2079 | * a few INTA cycles to 8259As and any associated glue logic. ICR does | ||
| 2080 | * not support the ExtINT mode, unfortunately. We need to send these | ||
| 2081 | * cycles as some i82489DX-based boards have glue logic that keeps the | ||
| 2082 | * 8259A interrupt line asserted until INTA. --macro | ||
| 2083 | */ | ||
| 2084 | static inline void __init unlock_ExtINT_logic(void) | ||
| 2085 | { | ||
| 2086 | int apic, pin, i; | ||
| 2087 | struct IO_APIC_route_entry entry0, entry1; | ||
| 2088 | unsigned char save_control, save_freq_select; | ||
| 2089 | |||
| 2090 | pin = find_isa_irq_pin(8, mp_INT); | ||
| 2091 | if (pin == -1) { | ||
| 2092 | WARN_ON_ONCE(1); | ||
| 2093 | return; | ||
| 2094 | } | ||
| 2095 | apic = find_isa_irq_apic(8, mp_INT); | ||
| 2096 | if (apic == -1) { | ||
| 2097 | WARN_ON_ONCE(1); | ||
| 2098 | return; | ||
| 2099 | } | ||
| 2100 | |||
| 2101 | entry0 = ioapic_read_entry(apic, pin); | ||
| 2102 | clear_IO_APIC_pin(apic, pin); | ||
| 2103 | |||
| 2104 | memset(&entry1, 0, sizeof(entry1)); | ||
| 2105 | |||
| 2106 | entry1.dest_mode = 0; /* physical delivery */ | ||
| 2107 | entry1.mask = 0; /* unmask IRQ now */ | ||
| 2108 | entry1.dest.physical.physical_dest = hard_smp_processor_id(); | ||
| 2109 | entry1.delivery_mode = dest_ExtINT; | ||
| 2110 | entry1.polarity = entry0.polarity; | ||
| 2111 | entry1.trigger = 0; | ||
| 2112 | entry1.vector = 0; | ||
| 2113 | |||
| 2114 | ioapic_write_entry(apic, pin, entry1); | ||
| 2115 | |||
| 2116 | save_control = CMOS_READ(RTC_CONTROL); | ||
| 2117 | save_freq_select = CMOS_READ(RTC_FREQ_SELECT); | ||
| 2118 | CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, | ||
| 2119 | RTC_FREQ_SELECT); | ||
| 2120 | CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); | ||
| 2121 | |||
| 2122 | i = 100; | ||
| 2123 | while (i-- > 0) { | ||
| 2124 | mdelay(10); | ||
| 2125 | if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) | ||
| 2126 | i -= 10; | ||
| 2127 | } | ||
| 2128 | |||
| 2129 | CMOS_WRITE(save_control, RTC_CONTROL); | ||
| 2130 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); | ||
| 2131 | clear_IO_APIC_pin(apic, pin); | ||
| 2132 | |||
| 2133 | ioapic_write_entry(apic, pin, entry0); | ||
| 2134 | } | ||
| 2135 | |||
| 2136 | /* | ||
| 2137 | * This code may look a bit paranoid, but it's supposed to cooperate with | ||
| 2138 | * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ | ||
| 2139 | * is so screwy. Thanks to Brian Perkins for testing/hacking this beast | ||
| 2140 | * fanatically on his truly buggy board. | ||
| 2141 | */ | ||
| 2142 | static inline void __init check_timer(void) | ||
| 2143 | { | ||
| 2144 | int apic1, pin1, apic2, pin2; | ||
| 2145 | int no_pin1 = 0; | ||
| 2146 | int vector; | ||
| 2147 | unsigned int ver; | ||
| 2148 | unsigned long flags; | ||
| 2149 | |||
| 2150 | local_irq_save(flags); | ||
| 2151 | |||
| 2152 | ver = apic_read(APIC_LVR); | ||
| 2153 | ver = GET_APIC_VERSION(ver); | ||
| 2154 | |||
| 2155 | /* | ||
| 2156 | * get/set the timer IRQ vector: | ||
| 2157 | */ | ||
| 2158 | disable_8259A_irq(0); | ||
| 2159 | vector = assign_irq_vector(0); | ||
| 2160 | set_intr_gate(vector, interrupt[0]); | ||
| 2161 | |||
| 2162 | /* | ||
| 2163 | * As IRQ0 is to be enabled in the 8259A, the virtual | ||
| 2164 | * wire has to be disabled in the local APIC. Also | ||
| 2165 | * timer interrupts need to be acknowledged manually in | ||
| 2166 | * the 8259A for the i82489DX when using the NMI | ||
| 2167 | * watchdog as that APIC treats NMIs as level-triggered. | ||
| 2168 | * The AEOI mode will finish them in the 8259A | ||
| 2169 | * automatically. | ||
| 2170 | */ | ||
| 2171 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | ||
| 2172 | init_8259A(1); | ||
| 2173 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
| 2174 | |||
| 2175 | pin1 = find_isa_irq_pin(0, mp_INT); | ||
| 2176 | apic1 = find_isa_irq_apic(0, mp_INT); | ||
| 2177 | pin2 = ioapic_i8259.pin; | ||
| 2178 | apic2 = ioapic_i8259.apic; | ||
| 2179 | |||
| 2180 | printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | ||
| 2181 | vector, apic1, pin1, apic2, pin2); | ||
| 2182 | |||
| 2183 | /* | ||
| 2184 | * Some BIOS writers are clueless and report the ExtINTA | ||
| 2185 | * I/O APIC input from the cascaded 8259A as the timer | ||
| 2186 | * interrupt input. So just in case, if only one pin | ||
| 2187 | * was found above, try it both directly and through the | ||
| 2188 | * 8259A. | ||
| 2189 | */ | ||
| 2190 | if (pin1 == -1) { | ||
| 2191 | pin1 = pin2; | ||
| 2192 | apic1 = apic2; | ||
| 2193 | no_pin1 = 1; | ||
| 2194 | } else if (pin2 == -1) { | ||
| 2195 | pin2 = pin1; | ||
| 2196 | apic2 = apic1; | ||
| 2197 | } | ||
| 2198 | |||
| 2199 | if (pin1 != -1) { | ||
| 2200 | /* | ||
| 2201 | * Ok, does IRQ0 through the IOAPIC work? | ||
| 2202 | */ | ||
| 2203 | if (no_pin1) { | ||
| 2204 | add_pin_to_irq(0, apic1, pin1); | ||
| 2205 | setup_timer_IRQ0_pin(apic1, pin1, vector); | ||
| 2206 | } | ||
| 2207 | unmask_IO_APIC_irq(0); | ||
| 2208 | if (timer_irq_works()) { | ||
| 2209 | if (nmi_watchdog == NMI_IO_APIC) { | ||
| 2210 | setup_nmi(); | ||
| 2211 | enable_8259A_irq(0); | ||
| 2212 | } | ||
| 2213 | if (disable_timer_pin_1 > 0) | ||
| 2214 | clear_IO_APIC_pin(0, pin1); | ||
| 2215 | goto out; | ||
| 2216 | } | ||
| 2217 | clear_IO_APIC_pin(apic1, pin1); | ||
| 2218 | if (!no_pin1) | ||
| 2219 | printk(KERN_ERR "..MP-BIOS bug: " | ||
| 2220 | "8254 timer not connected to IO-APIC\n"); | ||
| 2221 | |||
| 2222 | printk(KERN_INFO "...trying to set up timer (IRQ0) " | ||
| 2223 | "through the 8259A ... "); | ||
| 2224 | printk("\n..... (found pin %d) ...", pin2); | ||
| 2225 | /* | ||
| 2226 | * legacy devices should be connected to IO APIC #0 | ||
| 2227 | */ | ||
| 2228 | replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | ||
| 2229 | setup_timer_IRQ0_pin(apic2, pin2, vector); | ||
| 2230 | unmask_IO_APIC_irq(0); | ||
| 2231 | enable_8259A_irq(0); | ||
| 2232 | if (timer_irq_works()) { | ||
| 2233 | printk("works.\n"); | ||
| 2234 | timer_through_8259 = 1; | ||
| 2235 | if (nmi_watchdog == NMI_IO_APIC) { | ||
| 2236 | disable_8259A_irq(0); | ||
| 2237 | setup_nmi(); | ||
| 2238 | enable_8259A_irq(0); | ||
| 2239 | } | ||
| 2240 | goto out; | ||
| 2241 | } | ||
| 2242 | /* | ||
| 2243 | * Cleanup, just in case ... | ||
| 2244 | */ | ||
| 2245 | disable_8259A_irq(0); | ||
| 2246 | clear_IO_APIC_pin(apic2, pin2); | ||
| 2247 | printk(" failed.\n"); | ||
| 2248 | } | ||
| 2249 | |||
| 2250 | if (nmi_watchdog == NMI_IO_APIC) { | ||
| 2251 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | ||
| 2252 | nmi_watchdog = NMI_NONE; | ||
| 2253 | } | ||
| 2254 | timer_ack = 0; | ||
| 2255 | |||
| 2256 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | ||
| 2257 | |||
| 2258 | lapic_register_intr(0, vector); | ||
| 2259 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | ||
| 2260 | enable_8259A_irq(0); | ||
| 2261 | |||
| 2262 | if (timer_irq_works()) { | ||
| 2263 | printk(" works.\n"); | ||
| 2264 | goto out; | ||
| 2265 | } | ||
| 2266 | disable_8259A_irq(0); | ||
| 2267 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | ||
| 2268 | printk(" failed.\n"); | ||
| 2269 | |||
| 2270 | printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | ||
| 2271 | |||
| 2272 | init_8259A(0); | ||
| 2273 | make_8259A_irq(0); | ||
| 2274 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | ||
| 2275 | |||
| 2276 | unlock_ExtINT_logic(); | ||
| 2277 | |||
| 2278 | if (timer_irq_works()) { | ||
| 2279 | printk(" works.\n"); | ||
| 2280 | goto out; | ||
| 2281 | } | ||
| 2282 | printk(" failed :(.\n"); | ||
| 2283 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | ||
| 2284 | "report. Then try booting with the 'noapic' option"); | ||
| 2285 | out: | ||
| 2286 | local_irq_restore(flags); | ||
| 2287 | } | ||
| 2288 | |||
| 2289 | /* | ||
| 2290 | * Traditionally ISA IRQ2 is the cascade IRQ, and is not available | ||
| 2291 | * to devices. However there may be an I/O APIC pin available for | ||
| 2292 | * this interrupt regardless. The pin may be left unconnected, but | ||
| 2293 | * typically it will be reused as an ExtINT cascade interrupt for | ||
| 2294 | * the master 8259A. In the MPS case such a pin will normally be | ||
| 2295 | * reported as an ExtINT interrupt in the MP table. With ACPI | ||
| 2296 | * there is no provision for ExtINT interrupts, and in the absence | ||
| 2297 | * of an override it would be treated as an ordinary ISA I/O APIC | ||
| 2298 | * interrupt, that is edge-triggered and unmasked by default. We | ||
| 2299 | * used to do this, but it caused problems on some systems because | ||
| 2300 | * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using | ||
| 2301 | * the same ExtINT cascade interrupt to drive the local APIC of the | ||
| 2302 | * bootstrap processor. Therefore we refrain from routing IRQ2 to | ||
| 2303 | * the I/O APIC in all cases now. No actual device should request | ||
| 2304 | * it anyway. --macro | ||
| 2305 | */ | ||
| 2306 | #define PIC_IRQS (1 << PIC_CASCADE_IR) | ||
| 2307 | |||
| 2308 | void __init setup_IO_APIC(void) | ||
| 2309 | { | ||
| 2310 | int i; | ||
| 2311 | |||
| 2312 | /* Reserve all the system vectors. */ | ||
| 2313 | for (i = first_system_vector; i < NR_VECTORS; i++) | ||
| 2314 | set_bit(i, used_vectors); | ||
| 2315 | |||
| 2316 | enable_IO_APIC(); | ||
| 2317 | |||
| 2318 | io_apic_irqs = ~PIC_IRQS; | ||
| 2319 | |||
| 2320 | printk("ENABLING IO-APIC IRQs\n"); | ||
| 2321 | |||
| 2322 | /* | ||
| 2323 | * Set up IO-APIC IRQ routing. | ||
| 2324 | */ | ||
| 2325 | if (!acpi_ioapic) | ||
| 2326 | setup_ioapic_ids_from_mpc(); | ||
| 2327 | sync_Arb_IDs(); | ||
| 2328 | setup_IO_APIC_irqs(); | ||
| 2329 | init_IO_APIC_traps(); | ||
| 2330 | check_timer(); | ||
| 2331 | if (!acpi_ioapic) | ||
| 2332 | print_IO_APIC(); | ||
| 2333 | } | ||
| 2334 | |||
| 2335 | /* | ||
| 2336 | * Called after all the initialization is done. If we didnt find any | ||
| 2337 | * APIC bugs then we can allow the modify fast path | ||
| 2338 | */ | ||
| 2339 | |||
| 2340 | static int __init io_apic_bug_finalize(void) | ||
| 2341 | { | ||
| 2342 | if (sis_apic_bug == -1) | ||
| 2343 | sis_apic_bug = 0; | ||
| 2344 | return 0; | ||
| 2345 | } | ||
| 2346 | |||
| 2347 | late_initcall(io_apic_bug_finalize); | ||
| 2348 | |||
| 2349 | struct sysfs_ioapic_data { | ||
| 2350 | struct sys_device dev; | ||
| 2351 | struct IO_APIC_route_entry entry[0]; | ||
| 2352 | }; | ||
| 2353 | static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; | ||
| 2354 | |||
| 2355 | static int ioapic_suspend(struct sys_device *dev, pm_message_t state) | ||
| 2356 | { | ||
| 2357 | struct IO_APIC_route_entry *entry; | ||
| 2358 | struct sysfs_ioapic_data *data; | ||
| 2359 | int i; | ||
| 2360 | |||
| 2361 | data = container_of(dev, struct sysfs_ioapic_data, dev); | ||
| 2362 | entry = data->entry; | ||
| 2363 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | ||
| 2364 | entry[i] = ioapic_read_entry(dev->id, i); | ||
| 2365 | |||
| 2366 | return 0; | ||
| 2367 | } | ||
| 2368 | |||
| 2369 | static int ioapic_resume(struct sys_device *dev) | ||
| 2370 | { | ||
| 2371 | struct IO_APIC_route_entry *entry; | ||
| 2372 | struct sysfs_ioapic_data *data; | ||
| 2373 | unsigned long flags; | ||
| 2374 | union IO_APIC_reg_00 reg_00; | ||
| 2375 | int i; | ||
| 2376 | |||
| 2377 | data = container_of(dev, struct sysfs_ioapic_data, dev); | ||
| 2378 | entry = data->entry; | ||
| 2379 | |||
| 2380 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2381 | reg_00.raw = io_apic_read(dev->id, 0); | ||
| 2382 | if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { | ||
| 2383 | reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; | ||
| 2384 | io_apic_write(dev->id, 0, reg_00.raw); | ||
| 2385 | } | ||
| 2386 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 2387 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | ||
| 2388 | ioapic_write_entry(dev->id, i, entry[i]); | ||
| 2389 | |||
| 2390 | return 0; | ||
| 2391 | } | ||
| 2392 | |||
| 2393 | static struct sysdev_class ioapic_sysdev_class = { | ||
| 2394 | .name = "ioapic", | ||
| 2395 | .suspend = ioapic_suspend, | ||
| 2396 | .resume = ioapic_resume, | ||
| 2397 | }; | ||
| 2398 | |||
| 2399 | static int __init ioapic_init_sysfs(void) | ||
| 2400 | { | ||
| 2401 | struct sys_device *dev; | ||
| 2402 | int i, size, error = 0; | ||
| 2403 | |||
| 2404 | error = sysdev_class_register(&ioapic_sysdev_class); | ||
| 2405 | if (error) | ||
| 2406 | return error; | ||
| 2407 | |||
| 2408 | for (i = 0; i < nr_ioapics; i++) { | ||
| 2409 | size = sizeof(struct sys_device) + nr_ioapic_registers[i] | ||
| 2410 | * sizeof(struct IO_APIC_route_entry); | ||
| 2411 | mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); | ||
| 2412 | if (!mp_ioapic_data[i]) { | ||
| 2413 | printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); | ||
| 2414 | continue; | ||
| 2415 | } | ||
| 2416 | dev = &mp_ioapic_data[i]->dev; | ||
| 2417 | dev->id = i; | ||
| 2418 | dev->cls = &ioapic_sysdev_class; | ||
| 2419 | error = sysdev_register(dev); | ||
| 2420 | if (error) { | ||
| 2421 | kfree(mp_ioapic_data[i]); | ||
| 2422 | mp_ioapic_data[i] = NULL; | ||
| 2423 | printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); | ||
| 2424 | continue; | ||
| 2425 | } | ||
| 2426 | } | ||
| 2427 | |||
| 2428 | return 0; | ||
| 2429 | } | ||
| 2430 | |||
| 2431 | device_initcall(ioapic_init_sysfs); | ||
| 2432 | |||
| 2433 | /* | ||
| 2434 | * Dynamic irq allocate and deallocation | ||
| 2435 | */ | ||
| 2436 | int create_irq(void) | ||
| 2437 | { | ||
| 2438 | /* Allocate an unused irq */ | ||
| 2439 | int irq, new, vector = 0; | ||
| 2440 | unsigned long flags; | ||
| 2441 | |||
| 2442 | irq = -ENOSPC; | ||
| 2443 | spin_lock_irqsave(&vector_lock, flags); | ||
| 2444 | for (new = (NR_IRQS - 1); new >= 0; new--) { | ||
| 2445 | if (platform_legacy_irq(new)) | ||
| 2446 | continue; | ||
| 2447 | if (irq_vector[new] != 0) | ||
| 2448 | continue; | ||
| 2449 | vector = __assign_irq_vector(new); | ||
| 2450 | if (likely(vector > 0)) | ||
| 2451 | irq = new; | ||
| 2452 | break; | ||
| 2453 | } | ||
| 2454 | spin_unlock_irqrestore(&vector_lock, flags); | ||
| 2455 | |||
| 2456 | if (irq >= 0) { | ||
| 2457 | set_intr_gate(vector, interrupt[irq]); | ||
| 2458 | dynamic_irq_init(irq); | ||
| 2459 | } | ||
| 2460 | return irq; | ||
| 2461 | } | ||
| 2462 | |||
| 2463 | void destroy_irq(unsigned int irq) | ||
| 2464 | { | ||
| 2465 | unsigned long flags; | ||
| 2466 | |||
| 2467 | dynamic_irq_cleanup(irq); | ||
| 2468 | |||
| 2469 | spin_lock_irqsave(&vector_lock, flags); | ||
| 2470 | clear_bit(irq_vector[irq], used_vectors); | ||
| 2471 | irq_vector[irq] = 0; | ||
| 2472 | spin_unlock_irqrestore(&vector_lock, flags); | ||
| 2473 | } | ||
| 2474 | |||
| 2475 | /* | ||
| 2476 | * MSI message composition | ||
| 2477 | */ | ||
| 2478 | #ifdef CONFIG_PCI_MSI | ||
| 2479 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) | ||
| 2480 | { | ||
| 2481 | int vector; | ||
| 2482 | unsigned dest; | ||
| 2483 | |||
| 2484 | vector = assign_irq_vector(irq); | ||
| 2485 | if (vector >= 0) { | ||
| 2486 | dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
| 2487 | |||
| 2488 | msg->address_hi = MSI_ADDR_BASE_HI; | ||
| 2489 | msg->address_lo = | ||
| 2490 | MSI_ADDR_BASE_LO | | ||
| 2491 | ((INT_DEST_MODE == 0) ? | ||
| 2492 | MSI_ADDR_DEST_MODE_PHYSICAL: | ||
| 2493 | MSI_ADDR_DEST_MODE_LOGICAL) | | ||
| 2494 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | ||
| 2495 | MSI_ADDR_REDIRECTION_CPU: | ||
| 2496 | MSI_ADDR_REDIRECTION_LOWPRI) | | ||
| 2497 | MSI_ADDR_DEST_ID(dest); | ||
| 2498 | |||
| 2499 | msg->data = | ||
| 2500 | MSI_DATA_TRIGGER_EDGE | | ||
| 2501 | MSI_DATA_LEVEL_ASSERT | | ||
| 2502 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | ||
| 2503 | MSI_DATA_DELIVERY_FIXED: | ||
| 2504 | MSI_DATA_DELIVERY_LOWPRI) | | ||
| 2505 | MSI_DATA_VECTOR(vector); | ||
| 2506 | } | ||
| 2507 | return vector; | ||
| 2508 | } | ||
| 2509 | |||
| 2510 | #ifdef CONFIG_SMP | ||
| 2511 | static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | ||
| 2512 | { | ||
| 2513 | struct msi_msg msg; | ||
| 2514 | unsigned int dest; | ||
| 2515 | cpumask_t tmp; | ||
| 2516 | int vector; | ||
| 2517 | |||
| 2518 | cpus_and(tmp, mask, cpu_online_map); | ||
| 2519 | if (cpus_empty(tmp)) | ||
| 2520 | tmp = TARGET_CPUS; | ||
| 2521 | |||
| 2522 | vector = assign_irq_vector(irq); | ||
| 2523 | if (vector < 0) | ||
| 2524 | return; | ||
| 2525 | |||
| 2526 | dest = cpu_mask_to_apicid(mask); | ||
| 2527 | |||
| 2528 | read_msi_msg(irq, &msg); | ||
| 2529 | |||
| 2530 | msg.data &= ~MSI_DATA_VECTOR_MASK; | ||
| 2531 | msg.data |= MSI_DATA_VECTOR(vector); | ||
| 2532 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | ||
| 2533 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | ||
| 2534 | |||
| 2535 | write_msi_msg(irq, &msg); | ||
| 2536 | irq_desc[irq].affinity = mask; | ||
| 2537 | } | ||
| 2538 | #endif /* CONFIG_SMP */ | ||
| 2539 | |||
| 2540 | /* | ||
| 2541 | * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, | ||
| 2542 | * which implement the MSI or MSI-X Capability Structure. | ||
| 2543 | */ | ||
| 2544 | static struct irq_chip msi_chip = { | ||
| 2545 | .name = "PCI-MSI", | ||
| 2546 | .unmask = unmask_msi_irq, | ||
| 2547 | .mask = mask_msi_irq, | ||
| 2548 | .ack = ack_ioapic_irq, | ||
| 2549 | #ifdef CONFIG_SMP | ||
| 2550 | .set_affinity = set_msi_irq_affinity, | ||
| 2551 | #endif | ||
| 2552 | .retrigger = ioapic_retrigger_irq, | ||
| 2553 | }; | ||
| 2554 | |||
| 2555 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | ||
| 2556 | { | ||
| 2557 | struct msi_msg msg; | ||
| 2558 | int irq, ret; | ||
| 2559 | irq = create_irq(); | ||
| 2560 | if (irq < 0) | ||
| 2561 | return irq; | ||
| 2562 | |||
| 2563 | ret = msi_compose_msg(dev, irq, &msg); | ||
| 2564 | if (ret < 0) { | ||
| 2565 | destroy_irq(irq); | ||
| 2566 | return ret; | ||
| 2567 | } | ||
| 2568 | |||
| 2569 | set_irq_msi(irq, desc); | ||
| 2570 | write_msi_msg(irq, &msg); | ||
| 2571 | |||
| 2572 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, | ||
| 2573 | "edge"); | ||
| 2574 | |||
| 2575 | return 0; | ||
| 2576 | } | ||
| 2577 | |||
| 2578 | void arch_teardown_msi_irq(unsigned int irq) | ||
| 2579 | { | ||
| 2580 | destroy_irq(irq); | ||
| 2581 | } | ||
| 2582 | |||
| 2583 | #endif /* CONFIG_PCI_MSI */ | ||
| 2584 | |||
| 2585 | /* | ||
| 2586 | * Hypertransport interrupt support | ||
| 2587 | */ | ||
| 2588 | #ifdef CONFIG_HT_IRQ | ||
| 2589 | |||
| 2590 | #ifdef CONFIG_SMP | ||
| 2591 | |||
| 2592 | static void target_ht_irq(unsigned int irq, unsigned int dest) | ||
| 2593 | { | ||
| 2594 | struct ht_irq_msg msg; | ||
| 2595 | fetch_ht_irq_msg(irq, &msg); | ||
| 2596 | |||
| 2597 | msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); | ||
| 2598 | msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); | ||
| 2599 | |||
| 2600 | msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); | ||
| 2601 | msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); | ||
| 2602 | |||
| 2603 | write_ht_irq_msg(irq, &msg); | ||
| 2604 | } | ||
| 2605 | |||
| 2606 | static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) | ||
| 2607 | { | ||
| 2608 | unsigned int dest; | ||
| 2609 | cpumask_t tmp; | ||
| 2610 | |||
| 2611 | cpus_and(tmp, mask, cpu_online_map); | ||
| 2612 | if (cpus_empty(tmp)) | ||
| 2613 | tmp = TARGET_CPUS; | ||
| 2614 | |||
| 2615 | cpus_and(mask, tmp, CPU_MASK_ALL); | ||
| 2616 | |||
| 2617 | dest = cpu_mask_to_apicid(mask); | ||
| 2618 | |||
| 2619 | target_ht_irq(irq, dest); | ||
| 2620 | irq_desc[irq].affinity = mask; | ||
| 2621 | } | ||
| 2622 | #endif | ||
| 2623 | |||
| 2624 | static struct irq_chip ht_irq_chip = { | ||
| 2625 | .name = "PCI-HT", | ||
| 2626 | .mask = mask_ht_irq, | ||
| 2627 | .unmask = unmask_ht_irq, | ||
| 2628 | .ack = ack_ioapic_irq, | ||
| 2629 | #ifdef CONFIG_SMP | ||
| 2630 | .set_affinity = set_ht_irq_affinity, | ||
| 2631 | #endif | ||
| 2632 | .retrigger = ioapic_retrigger_irq, | ||
| 2633 | }; | ||
| 2634 | |||
| 2635 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | ||
| 2636 | { | ||
| 2637 | int vector; | ||
| 2638 | |||
| 2639 | vector = assign_irq_vector(irq); | ||
| 2640 | if (vector >= 0) { | ||
| 2641 | struct ht_irq_msg msg; | ||
| 2642 | unsigned dest; | ||
| 2643 | cpumask_t tmp; | ||
| 2644 | |||
| 2645 | cpus_clear(tmp); | ||
| 2646 | cpu_set(vector >> 8, tmp); | ||
| 2647 | dest = cpu_mask_to_apicid(tmp); | ||
| 2648 | |||
| 2649 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); | ||
| 2650 | |||
| 2651 | msg.address_lo = | ||
| 2652 | HT_IRQ_LOW_BASE | | ||
| 2653 | HT_IRQ_LOW_DEST_ID(dest) | | ||
| 2654 | HT_IRQ_LOW_VECTOR(vector) | | ||
| 2655 | ((INT_DEST_MODE == 0) ? | ||
| 2656 | HT_IRQ_LOW_DM_PHYSICAL : | ||
| 2657 | HT_IRQ_LOW_DM_LOGICAL) | | ||
| 2658 | HT_IRQ_LOW_RQEOI_EDGE | | ||
| 2659 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | ||
| 2660 | HT_IRQ_LOW_MT_FIXED : | ||
| 2661 | HT_IRQ_LOW_MT_ARBITRATED) | | ||
| 2662 | HT_IRQ_LOW_IRQ_MASKED; | ||
| 2663 | |||
| 2664 | write_ht_irq_msg(irq, &msg); | ||
| 2665 | |||
| 2666 | set_irq_chip_and_handler_name(irq, &ht_irq_chip, | ||
| 2667 | handle_edge_irq, "edge"); | ||
| 2668 | } | ||
| 2669 | return vector; | ||
| 2670 | } | ||
| 2671 | #endif /* CONFIG_HT_IRQ */ | ||
| 2672 | |||
| 2673 | /* -------------------------------------------------------------------------- | ||
| 2674 | ACPI-based IOAPIC Configuration | ||
| 2675 | -------------------------------------------------------------------------- */ | ||
| 2676 | |||
| 2677 | #ifdef CONFIG_ACPI | ||
| 2678 | |||
| 2679 | int __init io_apic_get_unique_id(int ioapic, int apic_id) | ||
| 2680 | { | ||
| 2681 | union IO_APIC_reg_00 reg_00; | ||
| 2682 | static physid_mask_t apic_id_map = PHYSID_MASK_NONE; | ||
| 2683 | physid_mask_t tmp; | ||
| 2684 | unsigned long flags; | ||
| 2685 | int i = 0; | ||
| 2686 | |||
| 2687 | /* | ||
| 2688 | * The P4 platform supports up to 256 APIC IDs on two separate APIC | ||
| 2689 | * buses (one for LAPICs, one for IOAPICs), where predecessors only | ||
| 2690 | * supports up to 16 on one shared APIC bus. | ||
| 2691 | * | ||
| 2692 | * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full | ||
| 2693 | * advantage of new APIC bus architecture. | ||
| 2694 | */ | ||
| 2695 | |||
| 2696 | if (physids_empty(apic_id_map)) | ||
| 2697 | apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); | ||
| 2698 | |||
| 2699 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2700 | reg_00.raw = io_apic_read(ioapic, 0); | ||
| 2701 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 2702 | |||
| 2703 | if (apic_id >= get_physical_broadcast()) { | ||
| 2704 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " | ||
| 2705 | "%d\n", ioapic, apic_id, reg_00.bits.ID); | ||
| 2706 | apic_id = reg_00.bits.ID; | ||
| 2707 | } | ||
| 2708 | |||
| 2709 | /* | ||
| 2710 | * Every APIC in a system must have a unique ID or we get lots of nice | ||
| 2711 | * 'stuck on smp_invalidate_needed IPI wait' messages. | ||
| 2712 | */ | ||
| 2713 | if (check_apicid_used(apic_id_map, apic_id)) { | ||
| 2714 | |||
| 2715 | for (i = 0; i < get_physical_broadcast(); i++) { | ||
| 2716 | if (!check_apicid_used(apic_id_map, i)) | ||
| 2717 | break; | ||
| 2718 | } | ||
| 2719 | |||
| 2720 | if (i == get_physical_broadcast()) | ||
| 2721 | panic("Max apic_id exceeded!\n"); | ||
| 2722 | |||
| 2723 | printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " | ||
| 2724 | "trying %d\n", ioapic, apic_id, i); | ||
| 2725 | |||
| 2726 | apic_id = i; | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | tmp = apicid_to_cpu_present(apic_id); | ||
| 2730 | physids_or(apic_id_map, apic_id_map, tmp); | ||
| 2731 | |||
| 2732 | if (reg_00.bits.ID != apic_id) { | ||
| 2733 | reg_00.bits.ID = apic_id; | ||
| 2734 | |||
| 2735 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2736 | io_apic_write(ioapic, 0, reg_00.raw); | ||
| 2737 | reg_00.raw = io_apic_read(ioapic, 0); | ||
| 2738 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 2739 | |||
| 2740 | /* Sanity check */ | ||
| 2741 | if (reg_00.bits.ID != apic_id) { | ||
| 2742 | printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); | ||
| 2743 | return -1; | ||
| 2744 | } | ||
| 2745 | } | ||
| 2746 | |||
| 2747 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
| 2748 | "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); | ||
| 2749 | |||
| 2750 | return apic_id; | ||
| 2751 | } | ||
| 2752 | |||
| 2753 | |||
| 2754 | int __init io_apic_get_version(int ioapic) | ||
| 2755 | { | ||
| 2756 | union IO_APIC_reg_01 reg_01; | ||
| 2757 | unsigned long flags; | ||
| 2758 | |||
| 2759 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2760 | reg_01.raw = io_apic_read(ioapic, 1); | ||
| 2761 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 2762 | |||
| 2763 | return reg_01.bits.version; | ||
| 2764 | } | ||
| 2765 | |||
| 2766 | |||
| 2767 | int __init io_apic_get_redir_entries(int ioapic) | ||
| 2768 | { | ||
| 2769 | union IO_APIC_reg_01 reg_01; | ||
| 2770 | unsigned long flags; | ||
| 2771 | |||
| 2772 | spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2773 | reg_01.raw = io_apic_read(ioapic, 1); | ||
| 2774 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 2775 | |||
| 2776 | return reg_01.bits.entries; | ||
| 2777 | } | ||
| 2778 | |||
| 2779 | |||
| 2780 | int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) | ||
| 2781 | { | ||
| 2782 | struct IO_APIC_route_entry entry; | ||
| 2783 | |||
| 2784 | if (!IO_APIC_IRQ(irq)) { | ||
| 2785 | printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | ||
| 2786 | ioapic); | ||
| 2787 | return -EINVAL; | ||
| 2788 | } | ||
| 2789 | |||
| 2790 | /* | ||
| 2791 | * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. | ||
| 2792 | * Note that we mask (disable) IRQs now -- these get enabled when the | ||
| 2793 | * corresponding device driver registers for this IRQ. | ||
| 2794 | */ | ||
| 2795 | |||
| 2796 | memset(&entry, 0, sizeof(entry)); | ||
| 2797 | |||
| 2798 | entry.delivery_mode = INT_DELIVERY_MODE; | ||
| 2799 | entry.dest_mode = INT_DEST_MODE; | ||
| 2800 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
| 2801 | entry.trigger = edge_level; | ||
| 2802 | entry.polarity = active_high_low; | ||
| 2803 | entry.mask = 1; | ||
| 2804 | |||
| 2805 | /* | ||
| 2806 | * IRQs < 16 are already in the irq_2_pin[] map | ||
| 2807 | */ | ||
| 2808 | if (irq >= 16) | ||
| 2809 | add_pin_to_irq(irq, ioapic, pin); | ||
| 2810 | |||
| 2811 | entry.vector = assign_irq_vector(irq); | ||
| 2812 | |||
| 2813 | apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " | ||
| 2814 | "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, | ||
| 2815 | mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, | ||
| 2816 | edge_level, active_high_low); | ||
| 2817 | |||
| 2818 | ioapic_register_intr(irq, entry.vector, edge_level); | ||
| 2819 | |||
| 2820 | if (!ioapic && (irq < 16)) | ||
| 2821 | disable_8259A_irq(irq); | ||
| 2822 | |||
| 2823 | ioapic_write_entry(ioapic, pin, entry); | ||
| 2824 | |||
| 2825 | return 0; | ||
| 2826 | } | ||
| 2827 | |||
| 2828 | int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | ||
| 2829 | { | ||
| 2830 | int i; | ||
| 2831 | |||
| 2832 | if (skip_ioapic_setup) | ||
| 2833 | return -1; | ||
| 2834 | |||
| 2835 | for (i = 0; i < mp_irq_entries; i++) | ||
| 2836 | if (mp_irqs[i].mp_irqtype == mp_INT && | ||
| 2837 | mp_irqs[i].mp_srcbusirq == bus_irq) | ||
| 2838 | break; | ||
| 2839 | if (i >= mp_irq_entries) | ||
| 2840 | return -1; | ||
| 2841 | |||
| 2842 | *trigger = irq_trigger(i); | ||
| 2843 | *polarity = irq_polarity(i); | ||
| 2844 | return 0; | ||
| 2845 | } | ||
| 2846 | |||
| 2847 | #endif /* CONFIG_ACPI */ | ||
| 2848 | |||
| 2849 | static int __init parse_disable_timer_pin_1(char *arg) | ||
| 2850 | { | ||
| 2851 | disable_timer_pin_1 = 1; | ||
| 2852 | return 0; | ||
| 2853 | } | ||
| 2854 | early_param("disable_timer_pin_1", parse_disable_timer_pin_1); | ||
| 2855 | |||
| 2856 | static int __init parse_enable_timer_pin_1(char *arg) | ||
| 2857 | { | ||
| 2858 | disable_timer_pin_1 = -1; | ||
| 2859 | return 0; | ||
| 2860 | } | ||
| 2861 | early_param("enable_timer_pin_1", parse_enable_timer_pin_1); | ||
| 2862 | |||
| 2863 | static int __init parse_noapic(char *arg) | ||
| 2864 | { | ||
| 2865 | /* disable IO-APIC */ | ||
| 2866 | disable_ioapic_setup(); | ||
| 2867 | return 0; | ||
| 2868 | } | ||
| 2869 | early_param("noapic", parse_noapic); | ||
| 2870 | |||
| 2871 | void __init ioapic_init_mappings(void) | ||
| 2872 | { | ||
| 2873 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | ||
| 2874 | int i; | ||
| 2875 | |||
| 2876 | for (i = 0; i < nr_ioapics; i++) { | ||
| 2877 | if (smp_found_config) { | ||
| 2878 | ioapic_phys = mp_ioapics[i].mp_apicaddr; | ||
| 2879 | if (!ioapic_phys) { | ||
| 2880 | printk(KERN_ERR | ||
| 2881 | "WARNING: bogus zero IO-APIC " | ||
| 2882 | "address found in MPTABLE, " | ||
| 2883 | "disabling IO/APIC support!\n"); | ||
| 2884 | smp_found_config = 0; | ||
| 2885 | skip_ioapic_setup = 1; | ||
| 2886 | goto fake_ioapic_page; | ||
| 2887 | } | ||
| 2888 | } else { | ||
| 2889 | fake_ioapic_page: | ||
| 2890 | ioapic_phys = (unsigned long) | ||
| 2891 | alloc_bootmem_pages(PAGE_SIZE); | ||
| 2892 | ioapic_phys = __pa(ioapic_phys); | ||
| 2893 | } | ||
| 2894 | set_fixmap_nocache(idx, ioapic_phys); | ||
| 2895 | printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", | ||
| 2896 | __fix_to_virt(idx), ioapic_phys); | ||
| 2897 | idx++; | ||
| 2898 | } | ||
| 2899 | } | ||
| 2900 | |||
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 5921e5f0a640..720d2607aacb 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c | |||
| @@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { | |||
| 92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") | 92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") |
| 93 | } | 93 | } |
| 94 | }, | 94 | }, |
| 95 | { | ||
| 96 | .callback = dmi_io_delay_0xed_port, | ||
| 97 | .ident = "Presario F700", | ||
| 98 | .matches = { | ||
| 99 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
| 100 | DMI_MATCH(DMI_BOARD_NAME, "30D3") | ||
| 101 | } | ||
| 102 | }, | ||
| 95 | { } | 103 | { } |
| 96 | }; | 104 | }; |
| 97 | 105 | ||
| @@ -103,6 +111,9 @@ void __init io_delay_init(void) | |||
| 103 | 111 | ||
| 104 | static int __init io_delay_param(char *s) | 112 | static int __init io_delay_param(char *s) |
| 105 | { | 113 | { |
| 114 | if (!s) | ||
| 115 | return -EINVAL; | ||
| 116 | |||
| 106 | if (!strcmp(s, "0x80")) | 117 | if (!strcmp(s, "0x80")) |
| 107 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; | 118 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; |
| 108 | else if (!strcmp(s, "0xed")) | 119 | else if (!strcmp(s, "0xed")) |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 50e5e4a31c85..191914302744 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| 15 | #include <linux/thread_info.h> | 15 | #include <linux/thread_info.h> |
| 16 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
| 17 | #include <asm/syscalls.h> | ||
| 17 | 18 | ||
| 18 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | 19 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ |
| 19 | static void set_bitmap(unsigned long *bitmap, unsigned int base, | 20 | static void set_bitmap(unsigned long *bitmap, unsigned int base, |
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 9d98cda39ad9..f1c688e46f35 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c | |||
| @@ -20,6 +20,8 @@ | |||
| 20 | 20 | ||
| 21 | #ifdef CONFIG_X86_32 | 21 | #ifdef CONFIG_X86_32 |
| 22 | #include <mach_apic.h> | 22 | #include <mach_apic.h> |
| 23 | #include <mach_ipi.h> | ||
| 24 | |||
| 23 | /* | 25 | /* |
| 24 | * the following functions deal with sending IPIs between CPUs. | 26 | * the following functions deal with sending IPIs between CPUs. |
| 25 | * | 27 | * |
| @@ -70,7 +72,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) | |||
| 70 | /* | 72 | /* |
| 71 | * Send the IPI. The write to APIC_ICR fires this off. | 73 | * Send the IPI. The write to APIC_ICR fires this off. |
| 72 | */ | 74 | */ |
| 73 | apic_write_around(APIC_ICR, cfg); | 75 | apic_write(APIC_ICR, cfg); |
| 74 | } | 76 | } |
| 75 | 77 | ||
| 76 | void send_IPI_self(int vector) | 78 | void send_IPI_self(int vector) |
| @@ -98,7 +100,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
| 98 | * prepare target chip field | 100 | * prepare target chip field |
| 99 | */ | 101 | */ |
| 100 | cfg = __prepare_ICR2(mask); | 102 | cfg = __prepare_ICR2(mask); |
| 101 | apic_write_around(APIC_ICR2, cfg); | 103 | apic_write(APIC_ICR2, cfg); |
| 102 | 104 | ||
| 103 | /* | 105 | /* |
| 104 | * program the ICR | 106 | * program the ICR |
| @@ -108,7 +110,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
| 108 | /* | 110 | /* |
| 109 | * Send the IPI. The write to APIC_ICR fires this off. | 111 | * Send the IPI. The write to APIC_ICR fires this off. |
| 110 | */ | 112 | */ |
| 111 | apic_write_around(APIC_ICR, cfg); | 113 | apic_write(APIC_ICR, cfg); |
| 112 | } | 114 | } |
| 113 | 115 | ||
| 114 | /* | 116 | /* |
| @@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
| 147 | } | 149 | } |
| 148 | 150 | ||
| 149 | /* must come after the send_IPI functions above for inlining */ | 151 | /* must come after the send_IPI functions above for inlining */ |
| 150 | #include <mach_ipi.h> | ||
| 151 | static int convert_apicid_to_cpu(int apic_id) | 152 | static int convert_apicid_to_cpu(int apic_id) |
| 152 | { | 153 | { |
| 153 | int i; | 154 | int i; |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c new file mode 100644 index 000000000000..d1d4dc52f649 --- /dev/null +++ b/arch/x86/kernel/irq.c | |||
| @@ -0,0 +1,189 @@ | |||
| 1 | /* | ||
| 2 | * Common interrupt code for 32 and 64 bit | ||
| 3 | */ | ||
| 4 | #include <linux/cpu.h> | ||
| 5 | #include <linux/interrupt.h> | ||
| 6 | #include <linux/kernel_stat.h> | ||
| 7 | #include <linux/seq_file.h> | ||
| 8 | |||
| 9 | #include <asm/apic.h> | ||
| 10 | #include <asm/io_apic.h> | ||
| 11 | #include <asm/smp.h> | ||
| 12 | |||
| 13 | atomic_t irq_err_count; | ||
| 14 | |||
| 15 | /* | ||
| 16 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
| 17 | * each architecture has to answer this themselves. | ||
| 18 | */ | ||
| 19 | void ack_bad_irq(unsigned int irq) | ||
| 20 | { | ||
| 21 | printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); | ||
| 22 | |||
| 23 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 24 | /* | ||
| 25 | * Currently unexpected vectors happen only on SMP and APIC. | ||
| 26 | * We _must_ ack these because every local APIC has only N | ||
| 27 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
| 28 | * holds up an irq slot - in excessive cases (when multiple | ||
| 29 | * unexpected vectors occur) that might lock up the APIC | ||
| 30 | * completely. | ||
| 31 | * But only ack when the APIC is enabled -AK | ||
| 32 | */ | ||
| 33 | if (cpu_has_apic) | ||
| 34 | ack_APIC_irq(); | ||
| 35 | #endif | ||
| 36 | } | ||
| 37 | |||
| 38 | #ifdef CONFIG_X86_32 | ||
| 39 | # define irq_stats(x) (&per_cpu(irq_stat, x)) | ||
| 40 | #else | ||
| 41 | # define irq_stats(x) cpu_pda(x) | ||
| 42 | #endif | ||
| 43 | /* | ||
| 44 | * /proc/interrupts printing: | ||
| 45 | */ | ||
| 46 | static int show_other_interrupts(struct seq_file *p) | ||
| 47 | { | ||
| 48 | int j; | ||
| 49 | |||
| 50 | seq_printf(p, "NMI: "); | ||
| 51 | for_each_online_cpu(j) | ||
| 52 | seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); | ||
| 53 | seq_printf(p, " Non-maskable interrupts\n"); | ||
| 54 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 55 | seq_printf(p, "LOC: "); | ||
| 56 | for_each_online_cpu(j) | ||
| 57 | seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); | ||
| 58 | seq_printf(p, " Local timer interrupts\n"); | ||
| 59 | #endif | ||
| 60 | #ifdef CONFIG_SMP | ||
| 61 | seq_printf(p, "RES: "); | ||
| 62 | for_each_online_cpu(j) | ||
| 63 | seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); | ||
| 64 | seq_printf(p, " Rescheduling interrupts\n"); | ||
| 65 | seq_printf(p, "CAL: "); | ||
| 66 | for_each_online_cpu(j) | ||
| 67 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); | ||
| 68 | seq_printf(p, " Function call interrupts\n"); | ||
| 69 | seq_printf(p, "TLB: "); | ||
| 70 | for_each_online_cpu(j) | ||
| 71 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); | ||
| 72 | seq_printf(p, " TLB shootdowns\n"); | ||
| 73 | #endif | ||
| 74 | #ifdef CONFIG_X86_MCE | ||
| 75 | seq_printf(p, "TRM: "); | ||
| 76 | for_each_online_cpu(j) | ||
| 77 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | ||
| 78 | seq_printf(p, " Thermal event interrupts\n"); | ||
| 79 | # ifdef CONFIG_X86_64 | ||
| 80 | seq_printf(p, "THR: "); | ||
| 81 | for_each_online_cpu(j) | ||
| 82 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | ||
| 83 | seq_printf(p, " Threshold APIC interrupts\n"); | ||
| 84 | # endif | ||
| 85 | #endif | ||
| 86 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 87 | seq_printf(p, "SPU: "); | ||
| 88 | for_each_online_cpu(j) | ||
| 89 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); | ||
| 90 | seq_printf(p, " Spurious interrupts\n"); | ||
| 91 | #endif | ||
| 92 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | ||
| 93 | #if defined(CONFIG_X86_IO_APIC) | ||
| 94 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); | ||
| 95 | #endif | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | |||
| 99 | int show_interrupts(struct seq_file *p, void *v) | ||
| 100 | { | ||
| 101 | unsigned long flags, any_count = 0; | ||
| 102 | int i = *(loff_t *) v, j; | ||
| 103 | struct irqaction *action; | ||
| 104 | struct irq_desc *desc; | ||
| 105 | |||
| 106 | if (i > nr_irqs) | ||
| 107 | return 0; | ||
| 108 | |||
| 109 | if (i == nr_irqs) | ||
| 110 | return show_other_interrupts(p); | ||
| 111 | |||
| 112 | /* print header */ | ||
| 113 | if (i == 0) { | ||
| 114 | seq_printf(p, " "); | ||
| 115 | for_each_online_cpu(j) | ||
| 116 | seq_printf(p, "CPU%-8d", j); | ||
| 117 | seq_putc(p, '\n'); | ||
| 118 | } | ||
| 119 | |||
| 120 | desc = irq_to_desc(i); | ||
| 121 | spin_lock_irqsave(&desc->lock, flags); | ||
| 122 | #ifndef CONFIG_SMP | ||
| 123 | any_count = kstat_irqs(i); | ||
| 124 | #else | ||
| 125 | for_each_online_cpu(j) | ||
| 126 | any_count |= kstat_irqs_cpu(i, j); | ||
| 127 | #endif | ||
| 128 | action = desc->action; | ||
| 129 | if (!action && !any_count) | ||
| 130 | goto out; | ||
| 131 | |||
| 132 | seq_printf(p, "%3d: ", i); | ||
| 133 | #ifndef CONFIG_SMP | ||
| 134 | seq_printf(p, "%10u ", kstat_irqs(i)); | ||
| 135 | #else | ||
| 136 | for_each_online_cpu(j) | ||
| 137 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | ||
| 138 | #endif | ||
| 139 | seq_printf(p, " %8s", desc->chip->name); | ||
| 140 | seq_printf(p, "-%-8s", desc->name); | ||
| 141 | |||
| 142 | if (action) { | ||
| 143 | seq_printf(p, " %s", action->name); | ||
| 144 | while ((action = action->next) != NULL) | ||
| 145 | seq_printf(p, ", %s", action->name); | ||
| 146 | } | ||
| 147 | |||
| 148 | seq_putc(p, '\n'); | ||
| 149 | out: | ||
| 150 | spin_unlock_irqrestore(&desc->lock, flags); | ||
| 151 | return 0; | ||
| 152 | } | ||
| 153 | |||
| 154 | /* | ||
| 155 | * /proc/stat helpers | ||
| 156 | */ | ||
| 157 | u64 arch_irq_stat_cpu(unsigned int cpu) | ||
| 158 | { | ||
| 159 | u64 sum = irq_stats(cpu)->__nmi_count; | ||
| 160 | |||
| 161 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 162 | sum += irq_stats(cpu)->apic_timer_irqs; | ||
| 163 | #endif | ||
| 164 | #ifdef CONFIG_SMP | ||
| 165 | sum += irq_stats(cpu)->irq_resched_count; | ||
| 166 | sum += irq_stats(cpu)->irq_call_count; | ||
| 167 | sum += irq_stats(cpu)->irq_tlb_count; | ||
| 168 | #endif | ||
| 169 | #ifdef CONFIG_X86_MCE | ||
| 170 | sum += irq_stats(cpu)->irq_thermal_count; | ||
| 171 | # ifdef CONFIG_X86_64 | ||
| 172 | sum += irq_stats(cpu)->irq_threshold_count; | ||
| 173 | #endif | ||
| 174 | #endif | ||
| 175 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 176 | sum += irq_stats(cpu)->irq_spurious_count; | ||
| 177 | #endif | ||
| 178 | return sum; | ||
| 179 | } | ||
| 180 | |||
| 181 | u64 arch_irq_stat(void) | ||
| 182 | { | ||
| 183 | u64 sum = atomic_read(&irq_err_count); | ||
| 184 | |||
| 185 | #ifdef CONFIG_X86_IO_APIC | ||
| 186 | sum += atomic_read(&irq_mis_count); | ||
| 187 | #endif | ||
| 188 | return sum; | ||
| 189 | } | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 47a6f6f12478..a51382672de0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
| @@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); | |||
| 25 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | 25 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); |
| 26 | EXPORT_PER_CPU_SYMBOL(irq_regs); | 26 | EXPORT_PER_CPU_SYMBOL(irq_regs); |
| 27 | 27 | ||
| 28 | /* | ||
| 29 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
| 30 | * each architecture has to answer this themselves. | ||
| 31 | */ | ||
| 32 | void ack_bad_irq(unsigned int irq) | ||
| 33 | { | ||
| 34 | printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); | ||
| 35 | |||
| 36 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 37 | /* | ||
| 38 | * Currently unexpected vectors happen only on SMP and APIC. | ||
| 39 | * We _must_ ack these because every local APIC has only N | ||
| 40 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
| 41 | * holds up an irq slot - in excessive cases (when multiple | ||
| 42 | * unexpected vectors occur) that might lock up the APIC | ||
| 43 | * completely. | ||
| 44 | * But only ack when the APIC is enabled -AK | ||
| 45 | */ | ||
| 46 | if (cpu_has_apic) | ||
| 47 | ack_APIC_irq(); | ||
| 48 | #endif | ||
| 49 | } | ||
| 50 | |||
| 51 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 28 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
| 52 | /* Debugging check for stack overflow: is there less than 1KB free? */ | 29 | /* Debugging check for stack overflow: is there less than 1KB free? */ |
| 53 | static int check_stack_overflow(void) | 30 | static int check_stack_overflow(void) |
| @@ -83,11 +60,8 @@ union irq_ctx { | |||
| 83 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; | 60 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; |
| 84 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; | 61 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; |
| 85 | 62 | ||
| 86 | static char softirq_stack[NR_CPUS * THREAD_SIZE] | 63 | static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; |
| 87 | __attribute__((__section__(".bss.page_aligned"))); | 64 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; |
| 88 | |||
| 89 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] | ||
| 90 | __attribute__((__section__(".bss.page_aligned"))); | ||
| 91 | 65 | ||
| 92 | static void call_on_stack(void *func, void *stack) | 66 | static void call_on_stack(void *func, void *stack) |
| 93 | { | 67 | { |
| @@ -226,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs) | |||
| 226 | { | 200 | { |
| 227 | struct pt_regs *old_regs; | 201 | struct pt_regs *old_regs; |
| 228 | /* high bit used in ret_from_ code */ | 202 | /* high bit used in ret_from_ code */ |
| 229 | int overflow, irq = ~regs->orig_ax; | 203 | int overflow; |
| 230 | struct irq_desc *desc = irq_desc + irq; | 204 | unsigned vector = ~regs->orig_ax; |
| 205 | struct irq_desc *desc; | ||
| 206 | unsigned irq; | ||
| 231 | 207 | ||
| 232 | if (unlikely((unsigned)irq >= NR_IRQS)) { | ||
| 233 | printk(KERN_EMERG "%s: cannot handle IRQ %d\n", | ||
| 234 | __func__, irq); | ||
| 235 | BUG(); | ||
| 236 | } | ||
| 237 | 208 | ||
| 238 | old_regs = set_irq_regs(regs); | 209 | old_regs = set_irq_regs(regs); |
| 239 | irq_enter(); | 210 | irq_enter(); |
| 211 | irq = __get_cpu_var(vector_irq)[vector]; | ||
| 240 | 212 | ||
| 241 | overflow = check_stack_overflow(); | 213 | overflow = check_stack_overflow(); |
| 242 | 214 | ||
| 215 | desc = irq_to_desc(irq); | ||
| 216 | if (unlikely(!desc)) { | ||
| 217 | printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", | ||
| 218 | __func__, irq, vector, smp_processor_id()); | ||
| 219 | BUG(); | ||
| 220 | } | ||
| 221 | |||
| 243 | if (!execute_on_irq_stack(overflow, desc, irq)) { | 222 | if (!execute_on_irq_stack(overflow, desc, irq)) { |
| 244 | if (unlikely(overflow)) | 223 | if (unlikely(overflow)) |
| 245 | print_stack_overflow(); | 224 | print_stack_overflow(); |
| @@ -251,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs) | |||
| 251 | return 1; | 230 | return 1; |
| 252 | } | 231 | } |
| 253 | 232 | ||
| 254 | /* | ||
| 255 | * Interrupt statistics: | ||
| 256 | */ | ||
| 257 | |||
| 258 | atomic_t irq_err_count; | ||
| 259 | |||
| 260 | /* | ||
| 261 | * /proc/interrupts printing: | ||
| 262 | */ | ||
| 263 | |||
| 264 | int show_interrupts(struct seq_file *p, void *v) | ||
| 265 | { | ||
| 266 | int i = *(loff_t *) v, j; | ||
| 267 | struct irqaction * action; | ||
| 268 | unsigned long flags; | ||
| 269 | |||
| 270 | if (i == 0) { | ||
| 271 | seq_printf(p, " "); | ||
| 272 | for_each_online_cpu(j) | ||
| 273 | seq_printf(p, "CPU%-8d",j); | ||
| 274 | seq_putc(p, '\n'); | ||
| 275 | } | ||
| 276 | |||
| 277 | if (i < NR_IRQS) { | ||
| 278 | unsigned any_count = 0; | ||
| 279 | |||
| 280 | spin_lock_irqsave(&irq_desc[i].lock, flags); | ||
| 281 | #ifndef CONFIG_SMP | ||
| 282 | any_count = kstat_irqs(i); | ||
| 283 | #else | ||
| 284 | for_each_online_cpu(j) | ||
| 285 | any_count |= kstat_cpu(j).irqs[i]; | ||
| 286 | #endif | ||
| 287 | action = irq_desc[i].action; | ||
| 288 | if (!action && !any_count) | ||
| 289 | goto skip; | ||
| 290 | seq_printf(p, "%3d: ",i); | ||
| 291 | #ifndef CONFIG_SMP | ||
| 292 | seq_printf(p, "%10u ", kstat_irqs(i)); | ||
| 293 | #else | ||
| 294 | for_each_online_cpu(j) | ||
| 295 | seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); | ||
| 296 | #endif | ||
| 297 | seq_printf(p, " %8s", irq_desc[i].chip->name); | ||
| 298 | seq_printf(p, "-%-8s", irq_desc[i].name); | ||
| 299 | |||
| 300 | if (action) { | ||
| 301 | seq_printf(p, " %s", action->name); | ||
| 302 | while ((action = action->next) != NULL) | ||
| 303 | seq_printf(p, ", %s", action->name); | ||
| 304 | } | ||
| 305 | |||
| 306 | seq_putc(p, '\n'); | ||
| 307 | skip: | ||
| 308 | spin_unlock_irqrestore(&irq_desc[i].lock, flags); | ||
| 309 | } else if (i == NR_IRQS) { | ||
| 310 | seq_printf(p, "NMI: "); | ||
| 311 | for_each_online_cpu(j) | ||
| 312 | seq_printf(p, "%10u ", nmi_count(j)); | ||
| 313 | seq_printf(p, " Non-maskable interrupts\n"); | ||
| 314 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 315 | seq_printf(p, "LOC: "); | ||
| 316 | for_each_online_cpu(j) | ||
| 317 | seq_printf(p, "%10u ", | ||
| 318 | per_cpu(irq_stat,j).apic_timer_irqs); | ||
| 319 | seq_printf(p, " Local timer interrupts\n"); | ||
| 320 | #endif | ||
| 321 | #ifdef CONFIG_SMP | ||
| 322 | seq_printf(p, "RES: "); | ||
| 323 | for_each_online_cpu(j) | ||
| 324 | seq_printf(p, "%10u ", | ||
| 325 | per_cpu(irq_stat,j).irq_resched_count); | ||
| 326 | seq_printf(p, " Rescheduling interrupts\n"); | ||
| 327 | seq_printf(p, "CAL: "); | ||
| 328 | for_each_online_cpu(j) | ||
| 329 | seq_printf(p, "%10u ", | ||
| 330 | per_cpu(irq_stat,j).irq_call_count); | ||
| 331 | seq_printf(p, " function call interrupts\n"); | ||
| 332 | seq_printf(p, "TLB: "); | ||
| 333 | for_each_online_cpu(j) | ||
| 334 | seq_printf(p, "%10u ", | ||
| 335 | per_cpu(irq_stat,j).irq_tlb_count); | ||
| 336 | seq_printf(p, " TLB shootdowns\n"); | ||
| 337 | #endif | ||
| 338 | #ifdef CONFIG_X86_MCE | ||
| 339 | seq_printf(p, "TRM: "); | ||
| 340 | for_each_online_cpu(j) | ||
| 341 | seq_printf(p, "%10u ", | ||
| 342 | per_cpu(irq_stat,j).irq_thermal_count); | ||
| 343 | seq_printf(p, " Thermal event interrupts\n"); | ||
| 344 | #endif | ||
| 345 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 346 | seq_printf(p, "SPU: "); | ||
| 347 | for_each_online_cpu(j) | ||
| 348 | seq_printf(p, "%10u ", | ||
| 349 | per_cpu(irq_stat,j).irq_spurious_count); | ||
| 350 | seq_printf(p, " Spurious interrupts\n"); | ||
| 351 | #endif | ||
| 352 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | ||
| 353 | #if defined(CONFIG_X86_IO_APIC) | ||
| 354 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); | ||
| 355 | #endif | ||
| 356 | } | ||
| 357 | return 0; | ||
| 358 | } | ||
| 359 | |||
| 360 | /* | ||
| 361 | * /proc/stat helpers | ||
| 362 | */ | ||
| 363 | u64 arch_irq_stat_cpu(unsigned int cpu) | ||
| 364 | { | ||
| 365 | u64 sum = nmi_count(cpu); | ||
| 366 | |||
| 367 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 368 | sum += per_cpu(irq_stat, cpu).apic_timer_irqs; | ||
| 369 | #endif | ||
| 370 | #ifdef CONFIG_SMP | ||
| 371 | sum += per_cpu(irq_stat, cpu).irq_resched_count; | ||
| 372 | sum += per_cpu(irq_stat, cpu).irq_call_count; | ||
| 373 | sum += per_cpu(irq_stat, cpu).irq_tlb_count; | ||
| 374 | #endif | ||
| 375 | #ifdef CONFIG_X86_MCE | ||
| 376 | sum += per_cpu(irq_stat, cpu).irq_thermal_count; | ||
| 377 | #endif | ||
| 378 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 379 | sum += per_cpu(irq_stat, cpu).irq_spurious_count; | ||
| 380 | #endif | ||
| 381 | return sum; | ||
| 382 | } | ||
| 383 | |||
| 384 | u64 arch_irq_stat(void) | ||
| 385 | { | ||
| 386 | u64 sum = atomic_read(&irq_err_count); | ||
| 387 | |||
| 388 | #ifdef CONFIG_X86_IO_APIC | ||
| 389 | sum += atomic_read(&irq_mis_count); | ||
| 390 | #endif | ||
| 391 | return sum; | ||
| 392 | } | ||
| 393 | |||
| 394 | #ifdef CONFIG_HOTPLUG_CPU | 233 | #ifdef CONFIG_HOTPLUG_CPU |
| 395 | #include <mach_apic.h> | 234 | #include <mach_apic.h> |
| 396 | 235 | ||
| @@ -398,20 +237,22 @@ void fixup_irqs(cpumask_t map) | |||
| 398 | { | 237 | { |
| 399 | unsigned int irq; | 238 | unsigned int irq; |
| 400 | static int warned; | 239 | static int warned; |
| 240 | struct irq_desc *desc; | ||
| 401 | 241 | ||
| 402 | for (irq = 0; irq < NR_IRQS; irq++) { | 242 | for_each_irq_desc(irq, desc) { |
| 403 | cpumask_t mask; | 243 | cpumask_t mask; |
| 244 | |||
| 404 | if (irq == 2) | 245 | if (irq == 2) |
| 405 | continue; | 246 | continue; |
| 406 | 247 | ||
| 407 | cpus_and(mask, irq_desc[irq].affinity, map); | 248 | cpus_and(mask, desc->affinity, map); |
| 408 | if (any_online_cpu(mask) == NR_CPUS) { | 249 | if (any_online_cpu(mask) == NR_CPUS) { |
| 409 | printk("Breaking affinity for irq %i\n", irq); | 250 | printk("Breaking affinity for irq %i\n", irq); |
| 410 | mask = map; | 251 | mask = map; |
| 411 | } | 252 | } |
| 412 | if (irq_desc[irq].chip->set_affinity) | 253 | if (desc->chip->set_affinity) |
| 413 | irq_desc[irq].chip->set_affinity(irq, mask); | 254 | desc->chip->set_affinity(irq, mask); |
| 414 | else if (irq_desc[irq].action && !(warned++)) | 255 | else if (desc->action && !(warned++)) |
| 415 | printk("Cannot set affinity for irq %i\n", irq); | 256 | printk("Cannot set affinity for irq %i\n", irq); |
| 416 | } | 257 | } |
| 417 | 258 | ||
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1f78b238d8d2..60eb84eb77a0 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
| @@ -18,28 +18,6 @@ | |||
| 18 | #include <asm/idle.h> | 18 | #include <asm/idle.h> |
| 19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
| 20 | 20 | ||
| 21 | atomic_t irq_err_count; | ||
| 22 | |||
| 23 | /* | ||
| 24 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
| 25 | * each architecture has to answer this themselves. | ||
| 26 | */ | ||
| 27 | void ack_bad_irq(unsigned int irq) | ||
| 28 | { | ||
| 29 | printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq); | ||
| 30 | /* | ||
| 31 | * Currently unexpected vectors happen only on SMP and APIC. | ||
| 32 | * We _must_ ack these because every local APIC has only N | ||
| 33 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
| 34 | * holds up an irq slot - in excessive cases (when multiple | ||
| 35 | * unexpected vectors occur) that might lock up the APIC | ||
| 36 | * completely. | ||
| 37 | * But don't ack when the APIC is disabled. -AK | ||
| 38 | */ | ||
| 39 | if (!disable_apic) | ||
| 40 | ack_APIC_irq(); | ||
| 41 | } | ||
| 42 | |||
| 43 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 21 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
| 44 | /* | 22 | /* |
| 45 | * Probabilistic stack overflow check: | 23 | * Probabilistic stack overflow check: |
| @@ -65,122 +43,6 @@ static inline void stack_overflow_check(struct pt_regs *regs) | |||
| 65 | #endif | 43 | #endif |
| 66 | 44 | ||
| 67 | /* | 45 | /* |
| 68 | * Generic, controller-independent functions: | ||
| 69 | */ | ||
| 70 | |||
| 71 | int show_interrupts(struct seq_file *p, void *v) | ||
| 72 | { | ||
| 73 | int i = *(loff_t *) v, j; | ||
| 74 | struct irqaction * action; | ||
| 75 | unsigned long flags; | ||
| 76 | |||
| 77 | if (i == 0) { | ||
| 78 | seq_printf(p, " "); | ||
| 79 | for_each_online_cpu(j) | ||
| 80 | seq_printf(p, "CPU%-8d",j); | ||
| 81 | seq_putc(p, '\n'); | ||
| 82 | } | ||
| 83 | |||
| 84 | if (i < NR_IRQS) { | ||
| 85 | unsigned any_count = 0; | ||
| 86 | |||
| 87 | spin_lock_irqsave(&irq_desc[i].lock, flags); | ||
| 88 | #ifndef CONFIG_SMP | ||
| 89 | any_count = kstat_irqs(i); | ||
| 90 | #else | ||
| 91 | for_each_online_cpu(j) | ||
| 92 | any_count |= kstat_cpu(j).irqs[i]; | ||
| 93 | #endif | ||
| 94 | action = irq_desc[i].action; | ||
| 95 | if (!action && !any_count) | ||
| 96 | goto skip; | ||
| 97 | seq_printf(p, "%3d: ",i); | ||
| 98 | #ifndef CONFIG_SMP | ||
| 99 | seq_printf(p, "%10u ", kstat_irqs(i)); | ||
| 100 | #else | ||
| 101 | for_each_online_cpu(j) | ||
| 102 | seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); | ||
| 103 | #endif | ||
| 104 | seq_printf(p, " %8s", irq_desc[i].chip->name); | ||
| 105 | seq_printf(p, "-%-8s", irq_desc[i].name); | ||
| 106 | |||
| 107 | if (action) { | ||
| 108 | seq_printf(p, " %s", action->name); | ||
| 109 | while ((action = action->next) != NULL) | ||
| 110 | seq_printf(p, ", %s", action->name); | ||
| 111 | } | ||
| 112 | seq_putc(p, '\n'); | ||
| 113 | skip: | ||
| 114 | spin_unlock_irqrestore(&irq_desc[i].lock, flags); | ||
| 115 | } else if (i == NR_IRQS) { | ||
| 116 | seq_printf(p, "NMI: "); | ||
| 117 | for_each_online_cpu(j) | ||
| 118 | seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); | ||
| 119 | seq_printf(p, " Non-maskable interrupts\n"); | ||
| 120 | seq_printf(p, "LOC: "); | ||
| 121 | for_each_online_cpu(j) | ||
| 122 | seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); | ||
| 123 | seq_printf(p, " Local timer interrupts\n"); | ||
| 124 | #ifdef CONFIG_SMP | ||
| 125 | seq_printf(p, "RES: "); | ||
| 126 | for_each_online_cpu(j) | ||
| 127 | seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); | ||
| 128 | seq_printf(p, " Rescheduling interrupts\n"); | ||
| 129 | seq_printf(p, "CAL: "); | ||
| 130 | for_each_online_cpu(j) | ||
| 131 | seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); | ||
| 132 | seq_printf(p, " function call interrupts\n"); | ||
| 133 | seq_printf(p, "TLB: "); | ||
| 134 | for_each_online_cpu(j) | ||
| 135 | seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); | ||
| 136 | seq_printf(p, " TLB shootdowns\n"); | ||
| 137 | #endif | ||
| 138 | #ifdef CONFIG_X86_MCE | ||
| 139 | seq_printf(p, "TRM: "); | ||
| 140 | for_each_online_cpu(j) | ||
| 141 | seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); | ||
| 142 | seq_printf(p, " Thermal event interrupts\n"); | ||
| 143 | seq_printf(p, "THR: "); | ||
| 144 | for_each_online_cpu(j) | ||
| 145 | seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); | ||
| 146 | seq_printf(p, " Threshold APIC interrupts\n"); | ||
| 147 | #endif | ||
| 148 | seq_printf(p, "SPU: "); | ||
| 149 | for_each_online_cpu(j) | ||
| 150 | seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); | ||
| 151 | seq_printf(p, " Spurious interrupts\n"); | ||
| 152 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | ||
| 153 | } | ||
| 154 | return 0; | ||
| 155 | } | ||
| 156 | |||
| 157 | /* | ||
| 158 | * /proc/stat helpers | ||
| 159 | */ | ||
| 160 | u64 arch_irq_stat_cpu(unsigned int cpu) | ||
| 161 | { | ||
| 162 | u64 sum = cpu_pda(cpu)->__nmi_count; | ||
| 163 | |||
| 164 | sum += cpu_pda(cpu)->apic_timer_irqs; | ||
| 165 | #ifdef CONFIG_SMP | ||
| 166 | sum += cpu_pda(cpu)->irq_resched_count; | ||
| 167 | sum += cpu_pda(cpu)->irq_call_count; | ||
| 168 | sum += cpu_pda(cpu)->irq_tlb_count; | ||
| 169 | #endif | ||
| 170 | #ifdef CONFIG_X86_MCE | ||
| 171 | sum += cpu_pda(cpu)->irq_thermal_count; | ||
| 172 | sum += cpu_pda(cpu)->irq_threshold_count; | ||
| 173 | #endif | ||
| 174 | sum += cpu_pda(cpu)->irq_spurious_count; | ||
| 175 | return sum; | ||
| 176 | } | ||
| 177 | |||
| 178 | u64 arch_irq_stat(void) | ||
| 179 | { | ||
| 180 | return atomic_read(&irq_err_count); | ||
| 181 | } | ||
| 182 | |||
| 183 | /* | ||
| 184 | * do_IRQ handles all normal device IRQ's (the special | 46 | * do_IRQ handles all normal device IRQ's (the special |
| 185 | * SMP cross-CPU interrupts have their own specific | 47 | * SMP cross-CPU interrupts have their own specific |
| 186 | * handlers). | 48 | * handlers). |
| @@ -188,6 +50,7 @@ u64 arch_irq_stat(void) | |||
| 188 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | 50 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) |
| 189 | { | 51 | { |
| 190 | struct pt_regs *old_regs = set_irq_regs(regs); | 52 | struct pt_regs *old_regs = set_irq_regs(regs); |
| 53 | struct irq_desc *desc; | ||
| 191 | 54 | ||
| 192 | /* high bit used in ret_from_ code */ | 55 | /* high bit used in ret_from_ code */ |
| 193 | unsigned vector = ~regs->orig_ax; | 56 | unsigned vector = ~regs->orig_ax; |
| @@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
| 201 | stack_overflow_check(regs); | 64 | stack_overflow_check(regs); |
| 202 | #endif | 65 | #endif |
| 203 | 66 | ||
| 204 | if (likely(irq < NR_IRQS)) | 67 | desc = irq_to_desc(irq); |
| 205 | generic_handle_irq(irq); | 68 | if (likely(desc)) |
| 69 | generic_handle_irq_desc(irq, desc); | ||
| 206 | else { | 70 | else { |
| 207 | if (!disable_apic) | 71 | if (!disable_apic) |
| 208 | ack_APIC_irq(); | 72 | ack_APIC_irq(); |
| @@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map) | |||
| 223 | { | 87 | { |
| 224 | unsigned int irq; | 88 | unsigned int irq; |
| 225 | static int warned; | 89 | static int warned; |
| 90 | struct irq_desc *desc; | ||
| 226 | 91 | ||
| 227 | for (irq = 0; irq < NR_IRQS; irq++) { | 92 | for_each_irq_desc(irq, desc) { |
| 228 | cpumask_t mask; | 93 | cpumask_t mask; |
| 229 | int break_affinity = 0; | 94 | int break_affinity = 0; |
| 230 | int set_affinity = 1; | 95 | int set_affinity = 1; |
| @@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map) | |||
| 233 | continue; | 98 | continue; |
| 234 | 99 | ||
| 235 | /* interrupt's are disabled at this point */ | 100 | /* interrupt's are disabled at this point */ |
| 236 | spin_lock(&irq_desc[irq].lock); | 101 | spin_lock(&desc->lock); |
| 237 | 102 | ||
| 238 | if (!irq_has_action(irq) || | 103 | if (!irq_has_action(irq) || |
| 239 | cpus_equal(irq_desc[irq].affinity, map)) { | 104 | cpus_equal(desc->affinity, map)) { |
| 240 | spin_unlock(&irq_desc[irq].lock); | 105 | spin_unlock(&desc->lock); |
| 241 | continue; | 106 | continue; |
| 242 | } | 107 | } |
| 243 | 108 | ||
| 244 | cpus_and(mask, irq_desc[irq].affinity, map); | 109 | cpus_and(mask, desc->affinity, map); |
| 245 | if (cpus_empty(mask)) { | 110 | if (cpus_empty(mask)) { |
| 246 | break_affinity = 1; | 111 | break_affinity = 1; |
| 247 | mask = map; | 112 | mask = map; |
| 248 | } | 113 | } |
| 249 | 114 | ||
| 250 | if (irq_desc[irq].chip->mask) | 115 | if (desc->chip->mask) |
| 251 | irq_desc[irq].chip->mask(irq); | 116 | desc->chip->mask(irq); |
| 252 | 117 | ||
| 253 | if (irq_desc[irq].chip->set_affinity) | 118 | if (desc->chip->set_affinity) |
| 254 | irq_desc[irq].chip->set_affinity(irq, mask); | 119 | desc->chip->set_affinity(irq, mask); |
| 255 | else if (!(warned++)) | 120 | else if (!(warned++)) |
| 256 | set_affinity = 0; | 121 | set_affinity = 0; |
| 257 | 122 | ||
| 258 | if (irq_desc[irq].chip->unmask) | 123 | if (desc->chip->unmask) |
| 259 | irq_desc[irq].chip->unmask(irq); | 124 | desc->chip->unmask(irq); |
| 260 | 125 | ||
| 261 | spin_unlock(&irq_desc[irq].lock); | 126 | spin_unlock(&desc->lock); |
| 262 | 127 | ||
| 263 | if (break_affinity && set_affinity) | 128 | if (break_affinity && set_affinity) |
| 264 | printk("Broke affinity for irq %i\n", irq); | 129 | printk("Broke affinity for irq %i\n", irq); |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index d66914287ee1..845aa9803e80 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
| @@ -69,11 +69,48 @@ void __init init_ISA_irqs (void) | |||
| 69 | * 16 old-style INTA-cycle interrupts: | 69 | * 16 old-style INTA-cycle interrupts: |
| 70 | */ | 70 | */ |
| 71 | for (i = 0; i < 16; i++) { | 71 | for (i = 0; i < 16; i++) { |
| 72 | /* first time call this irq_desc */ | ||
| 73 | struct irq_desc *desc = irq_to_desc(i); | ||
| 74 | |||
| 75 | desc->status = IRQ_DISABLED; | ||
| 76 | desc->action = NULL; | ||
| 77 | desc->depth = 1; | ||
| 78 | |||
| 72 | set_irq_chip_and_handler_name(i, &i8259A_chip, | 79 | set_irq_chip_and_handler_name(i, &i8259A_chip, |
| 73 | handle_level_irq, "XT"); | 80 | handle_level_irq, "XT"); |
| 74 | } | 81 | } |
| 75 | } | 82 | } |
| 76 | 83 | ||
| 84 | /* | ||
| 85 | * IRQ2 is cascade interrupt to second interrupt controller | ||
| 86 | */ | ||
| 87 | static struct irqaction irq2 = { | ||
| 88 | .handler = no_action, | ||
| 89 | .mask = CPU_MASK_NONE, | ||
| 90 | .name = "cascade", | ||
| 91 | }; | ||
| 92 | |||
| 93 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | ||
| 94 | [0 ... IRQ0_VECTOR - 1] = -1, | ||
| 95 | [IRQ0_VECTOR] = 0, | ||
| 96 | [IRQ1_VECTOR] = 1, | ||
| 97 | [IRQ2_VECTOR] = 2, | ||
| 98 | [IRQ3_VECTOR] = 3, | ||
| 99 | [IRQ4_VECTOR] = 4, | ||
| 100 | [IRQ5_VECTOR] = 5, | ||
| 101 | [IRQ6_VECTOR] = 6, | ||
| 102 | [IRQ7_VECTOR] = 7, | ||
| 103 | [IRQ8_VECTOR] = 8, | ||
| 104 | [IRQ9_VECTOR] = 9, | ||
| 105 | [IRQ10_VECTOR] = 10, | ||
| 106 | [IRQ11_VECTOR] = 11, | ||
| 107 | [IRQ12_VECTOR] = 12, | ||
| 108 | [IRQ13_VECTOR] = 13, | ||
| 109 | [IRQ14_VECTOR] = 14, | ||
| 110 | [IRQ15_VECTOR] = 15, | ||
| 111 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | ||
| 112 | }; | ||
| 113 | |||
| 77 | /* Overridden in paravirt.c */ | 114 | /* Overridden in paravirt.c */ |
| 78 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 115 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); |
| 79 | 116 | ||
| @@ -89,15 +126,50 @@ void __init native_init_IRQ(void) | |||
| 89 | * us. (some of these will be overridden and become | 126 | * us. (some of these will be overridden and become |
| 90 | * 'special' SMP interrupts) | 127 | * 'special' SMP interrupts) |
| 91 | */ | 128 | */ |
| 92 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | 129 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
| 93 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
| 94 | if (i >= NR_IRQS) | ||
| 95 | break; | ||
| 96 | /* SYSCALL_VECTOR was reserved in trap_init. */ | 130 | /* SYSCALL_VECTOR was reserved in trap_init. */ |
| 97 | if (!test_bit(vector, used_vectors)) | 131 | if (i != SYSCALL_VECTOR) |
| 98 | set_intr_gate(vector, interrupt[i]); | 132 | set_intr_gate(i, interrupt[i]); |
| 99 | } | 133 | } |
| 100 | 134 | ||
| 135 | |||
| 136 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) | ||
| 137 | /* | ||
| 138 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | ||
| 139 | * IPI, driven by wakeup. | ||
| 140 | */ | ||
| 141 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | ||
| 142 | |||
| 143 | /* IPI for invalidation */ | ||
| 144 | alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); | ||
| 145 | |||
| 146 | /* IPI for generic function call */ | ||
| 147 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | ||
| 148 | |||
| 149 | /* IPI for single call function */ | ||
| 150 | set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); | ||
| 151 | |||
| 152 | /* Low priority IPI to cleanup after moving an irq */ | ||
| 153 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | ||
| 154 | #endif | ||
| 155 | |||
| 156 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 157 | /* self generated IPI for local APIC timer */ | ||
| 158 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
| 159 | |||
| 160 | /* IPI vectors for APIC spurious and error interrupts */ | ||
| 161 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | ||
| 162 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
| 163 | #endif | ||
| 164 | |||
| 165 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) | ||
| 166 | /* thermal monitor LVT interrupt */ | ||
| 167 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
| 168 | #endif | ||
| 169 | |||
| 170 | if (!acpi_ioapic) | ||
| 171 | setup_irq(2, &irq2); | ||
| 172 | |||
| 101 | /* setup after call gates are initialised (usually add in | 173 | /* setup after call gates are initialised (usually add in |
| 102 | * the architecture specific gates) | 174 | * the architecture specific gates) |
| 103 | */ | 175 | */ |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 0373e88de95a..ff0235391285 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
| @@ -43,10 +43,11 @@ | |||
| 43 | 43 | ||
| 44 | #define BUILD_IRQ(nr) \ | 44 | #define BUILD_IRQ(nr) \ |
| 45 | asmlinkage void IRQ_NAME(nr); \ | 45 | asmlinkage void IRQ_NAME(nr); \ |
| 46 | asm("\n.p2align\n" \ | 46 | asm("\n.text\n.p2align\n" \ |
| 47 | "IRQ" #nr "_interrupt:\n\t" \ | 47 | "IRQ" #nr "_interrupt:\n\t" \ |
| 48 | "push $~(" #nr ") ; " \ | 48 | "push $~(" #nr ") ; " \ |
| 49 | "jmp common_interrupt"); | 49 | "jmp common_interrupt\n" \ |
| 50 | ".previous"); | ||
| 50 | 51 | ||
| 51 | #define BI(x,y) \ | 52 | #define BI(x,y) \ |
| 52 | BUILD_IRQ(x##y) | 53 | BUILD_IRQ(x##y) |
| @@ -134,51 +135,33 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
| 134 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 135 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
| 135 | }; | 136 | }; |
| 136 | 137 | ||
| 137 | static void __init init_ISA_irqs (void) | 138 | void __init init_ISA_irqs(void) |
| 138 | { | 139 | { |
| 139 | int i; | 140 | int i; |
| 140 | 141 | ||
| 141 | init_bsp_APIC(); | 142 | init_bsp_APIC(); |
| 142 | init_8259A(0); | 143 | init_8259A(0); |
| 143 | 144 | ||
| 144 | for (i = 0; i < NR_IRQS; i++) { | 145 | for (i = 0; i < 16; i++) { |
| 145 | irq_desc[i].status = IRQ_DISABLED; | 146 | /* first time call this irq_desc */ |
| 146 | irq_desc[i].action = NULL; | 147 | struct irq_desc *desc = irq_to_desc(i); |
| 147 | irq_desc[i].depth = 1; | ||
| 148 | 148 | ||
| 149 | if (i < 16) { | 149 | desc->status = IRQ_DISABLED; |
| 150 | /* | 150 | desc->action = NULL; |
| 151 | * 16 old-style INTA-cycle interrupts: | 151 | desc->depth = 1; |
| 152 | */ | 152 | |
| 153 | set_irq_chip_and_handler_name(i, &i8259A_chip, | 153 | /* |
| 154 | * 16 old-style INTA-cycle interrupts: | ||
| 155 | */ | ||
| 156 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
| 154 | handle_level_irq, "XT"); | 157 | handle_level_irq, "XT"); |
| 155 | } else { | ||
| 156 | /* | ||
| 157 | * 'high' PCI IRQs filled in on demand | ||
| 158 | */ | ||
| 159 | irq_desc[i].chip = &no_irq_chip; | ||
| 160 | } | ||
| 161 | } | 158 | } |
| 162 | } | 159 | } |
| 163 | 160 | ||
| 164 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 161 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); |
| 165 | 162 | ||
| 166 | void __init native_init_IRQ(void) | 163 | static void __init smp_intr_init(void) |
| 167 | { | 164 | { |
| 168 | int i; | ||
| 169 | |||
| 170 | init_ISA_irqs(); | ||
| 171 | /* | ||
| 172 | * Cover the whole vector space, no vector can escape | ||
| 173 | * us. (some of these will be overridden and become | ||
| 174 | * 'special' SMP interrupts) | ||
| 175 | */ | ||
| 176 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
| 177 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
| 178 | if (vector != IA32_SYSCALL_VECTOR) | ||
| 179 | set_intr_gate(vector, interrupt[i]); | ||
| 180 | } | ||
| 181 | |||
| 182 | #ifdef CONFIG_SMP | 165 | #ifdef CONFIG_SMP |
| 183 | /* | 166 | /* |
| 184 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | 167 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper |
| @@ -206,6 +189,12 @@ void __init native_init_IRQ(void) | |||
| 206 | /* Low priority IPI to cleanup after moving an irq */ | 189 | /* Low priority IPI to cleanup after moving an irq */ |
| 207 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 190 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
| 208 | #endif | 191 | #endif |
| 192 | } | ||
| 193 | |||
| 194 | static void __init apic_intr_init(void) | ||
| 195 | { | ||
| 196 | smp_intr_init(); | ||
| 197 | |||
| 209 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 198 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
| 210 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 199 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
| 211 | 200 | ||
| @@ -215,6 +204,25 @@ void __init native_init_IRQ(void) | |||
| 215 | /* IPI vectors for APIC spurious and error interrupts */ | 204 | /* IPI vectors for APIC spurious and error interrupts */ |
| 216 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 205 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
| 217 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 206 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
| 207 | } | ||
| 208 | |||
| 209 | void __init native_init_IRQ(void) | ||
| 210 | { | ||
| 211 | int i; | ||
| 212 | |||
| 213 | init_ISA_irqs(); | ||
| 214 | /* | ||
| 215 | * Cover the whole vector space, no vector can escape | ||
| 216 | * us. (some of these will be overridden and become | ||
| 217 | * 'special' SMP interrupts) | ||
| 218 | */ | ||
| 219 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
| 220 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
| 221 | if (vector != IA32_SYSCALL_VECTOR) | ||
| 222 | set_intr_gate(vector, interrupt[i]); | ||
| 223 | } | ||
| 224 | |||
| 225 | apic_intr_init(); | ||
| 218 | 226 | ||
| 219 | if (!acpi_ioapic) | 227 | if (!acpi_ioapic) |
| 220 | setup_irq(2, &irq2); | 228 | setup_irq(2, &irq2); |
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 7377ccb21335..304d8bad6559 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c | |||
| @@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); | |||
| 16 | static u32 *flush_words; | 16 | static u32 *flush_words; |
| 17 | 17 | ||
| 18 | struct pci_device_id k8_nb_ids[] = { | 18 | struct pci_device_id k8_nb_ids[] = { |
| 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
| 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
| 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, | ||
| 21 | {} | 22 | {} |
| 22 | }; | 23 | }; |
| 23 | EXPORT_SYMBOL(k8_nb_ids); | 24 | EXPORT_SYMBOL(k8_nb_ids); |
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index c03205991718..ff7d3b0124f1 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
| @@ -12,9 +12,13 @@ | |||
| 12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
| 13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
| 14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
| 15 | #include <linux/module.h> | ||
| 15 | 16 | ||
| 16 | #include <asm/setup.h> | 17 | #include <asm/setup.h> |
| 17 | 18 | ||
| 19 | struct dentry *arch_debugfs_dir; | ||
| 20 | EXPORT_SYMBOL(arch_debugfs_dir); | ||
| 21 | |||
| 18 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | 22 | #ifdef CONFIG_DEBUG_BOOT_PARAMS |
| 19 | struct setup_data_node { | 23 | struct setup_data_node { |
| 20 | u64 paddr; | 24 | u64 paddr; |
| @@ -135,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
| 135 | if (PageHighMem(pg)) { | 139 | if (PageHighMem(pg)) { |
| 136 | data = ioremap_cache(pa_data, sizeof(*data)); | 140 | data = ioremap_cache(pa_data, sizeof(*data)); |
| 137 | if (!data) { | 141 | if (!data) { |
| 142 | kfree(node); | ||
| 138 | error = -ENXIO; | 143 | error = -ENXIO; |
| 139 | goto err_dir; | 144 | goto err_dir; |
| 140 | } | 145 | } |
| @@ -209,6 +214,10 @@ static int __init arch_kdebugfs_init(void) | |||
| 209 | { | 214 | { |
| 210 | int error = 0; | 215 | int error = 0; |
| 211 | 216 | ||
| 217 | arch_debugfs_dir = debugfs_create_dir("x86", NULL); | ||
| 218 | if (!arch_debugfs_dir) | ||
| 219 | return -ENOMEM; | ||
| 220 | |||
| 212 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | 221 | #ifdef CONFIG_DEBUG_BOOT_PARAMS |
| 213 | error = boot_params_kdebugfs_init(); | 222 | error = boot_params_kdebugfs_init(); |
| 214 | #endif | 223 | #endif |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f47f0eb886b8..10435a120d22 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
| @@ -69,6 +69,9 @@ static int gdb_x86vector = -1; | |||
| 69 | */ | 69 | */ |
| 70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
| 71 | { | 71 | { |
| 72 | #ifndef CONFIG_X86_32 | ||
| 73 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
| 74 | #endif | ||
| 72 | gdb_regs[GDB_AX] = regs->ax; | 75 | gdb_regs[GDB_AX] = regs->ax; |
| 73 | gdb_regs[GDB_BX] = regs->bx; | 76 | gdb_regs[GDB_BX] = regs->bx; |
| 74 | gdb_regs[GDB_CX] = regs->cx; | 77 | gdb_regs[GDB_CX] = regs->cx; |
| @@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 76 | gdb_regs[GDB_SI] = regs->si; | 79 | gdb_regs[GDB_SI] = regs->si; |
| 77 | gdb_regs[GDB_DI] = regs->di; | 80 | gdb_regs[GDB_DI] = regs->di; |
| 78 | gdb_regs[GDB_BP] = regs->bp; | 81 | gdb_regs[GDB_BP] = regs->bp; |
| 79 | gdb_regs[GDB_PS] = regs->flags; | ||
| 80 | gdb_regs[GDB_PC] = regs->ip; | 82 | gdb_regs[GDB_PC] = regs->ip; |
| 81 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
| 84 | gdb_regs[GDB_PS] = regs->flags; | ||
| 82 | gdb_regs[GDB_DS] = regs->ds; | 85 | gdb_regs[GDB_DS] = regs->ds; |
| 83 | gdb_regs[GDB_ES] = regs->es; | 86 | gdb_regs[GDB_ES] = regs->es; |
| 84 | gdb_regs[GDB_CS] = regs->cs; | 87 | gdb_regs[GDB_CS] = regs->cs; |
| @@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 94 | gdb_regs[GDB_R13] = regs->r13; | 97 | gdb_regs[GDB_R13] = regs->r13; |
| 95 | gdb_regs[GDB_R14] = regs->r14; | 98 | gdb_regs[GDB_R14] = regs->r14; |
| 96 | gdb_regs[GDB_R15] = regs->r15; | 99 | gdb_regs[GDB_R15] = regs->r15; |
| 100 | gdb_regs32[GDB_PS] = regs->flags; | ||
| 101 | gdb_regs32[GDB_CS] = regs->cs; | ||
| 102 | gdb_regs32[GDB_SS] = regs->ss; | ||
| 97 | #endif | 103 | #endif |
| 98 | gdb_regs[GDB_SP] = regs->sp; | 104 | gdb_regs[GDB_SP] = regs->sp; |
| 99 | } | 105 | } |
| @@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 112 | */ | 118 | */ |
| 113 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | 119 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) |
| 114 | { | 120 | { |
| 121 | #ifndef CONFIG_X86_32 | ||
| 122 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
| 123 | #endif | ||
| 115 | gdb_regs[GDB_AX] = 0; | 124 | gdb_regs[GDB_AX] = 0; |
| 116 | gdb_regs[GDB_BX] = 0; | 125 | gdb_regs[GDB_BX] = 0; |
| 117 | gdb_regs[GDB_CX] = 0; | 126 | gdb_regs[GDB_CX] = 0; |
| @@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
| 129 | gdb_regs[GDB_FS] = 0xFFFF; | 138 | gdb_regs[GDB_FS] = 0xFFFF; |
| 130 | gdb_regs[GDB_GS] = 0xFFFF; | 139 | gdb_regs[GDB_GS] = 0xFFFF; |
| 131 | #else | 140 | #else |
| 132 | gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); | 141 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); |
| 133 | gdb_regs[GDB_PC] = 0; | 142 | gdb_regs32[GDB_CS] = __KERNEL_CS; |
| 143 | gdb_regs32[GDB_SS] = __KERNEL_DS; | ||
| 144 | gdb_regs[GDB_PC] = p->thread.ip; | ||
| 134 | gdb_regs[GDB_R8] = 0; | 145 | gdb_regs[GDB_R8] = 0; |
| 135 | gdb_regs[GDB_R9] = 0; | 146 | gdb_regs[GDB_R9] = 0; |
| 136 | gdb_regs[GDB_R10] = 0; | 147 | gdb_regs[GDB_R10] = 0; |
| @@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
| 153 | */ | 164 | */ |
| 154 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 165 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
| 155 | { | 166 | { |
| 167 | #ifndef CONFIG_X86_32 | ||
| 168 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
| 169 | #endif | ||
| 156 | regs->ax = gdb_regs[GDB_AX]; | 170 | regs->ax = gdb_regs[GDB_AX]; |
| 157 | regs->bx = gdb_regs[GDB_BX]; | 171 | regs->bx = gdb_regs[GDB_BX]; |
| 158 | regs->cx = gdb_regs[GDB_CX]; | 172 | regs->cx = gdb_regs[GDB_CX]; |
| @@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 160 | regs->si = gdb_regs[GDB_SI]; | 174 | regs->si = gdb_regs[GDB_SI]; |
| 161 | regs->di = gdb_regs[GDB_DI]; | 175 | regs->di = gdb_regs[GDB_DI]; |
| 162 | regs->bp = gdb_regs[GDB_BP]; | 176 | regs->bp = gdb_regs[GDB_BP]; |
| 163 | regs->flags = gdb_regs[GDB_PS]; | ||
| 164 | regs->ip = gdb_regs[GDB_PC]; | 177 | regs->ip = gdb_regs[GDB_PC]; |
| 165 | #ifdef CONFIG_X86_32 | 178 | #ifdef CONFIG_X86_32 |
| 179 | regs->flags = gdb_regs[GDB_PS]; | ||
| 166 | regs->ds = gdb_regs[GDB_DS]; | 180 | regs->ds = gdb_regs[GDB_DS]; |
| 167 | regs->es = gdb_regs[GDB_ES]; | 181 | regs->es = gdb_regs[GDB_ES]; |
| 168 | regs->cs = gdb_regs[GDB_CS]; | 182 | regs->cs = gdb_regs[GDB_CS]; |
| @@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
| 175 | regs->r13 = gdb_regs[GDB_R13]; | 189 | regs->r13 = gdb_regs[GDB_R13]; |
| 176 | regs->r14 = gdb_regs[GDB_R14]; | 190 | regs->r14 = gdb_regs[GDB_R14]; |
| 177 | regs->r15 = gdb_regs[GDB_R15]; | 191 | regs->r15 = gdb_regs[GDB_R15]; |
| 192 | regs->flags = gdb_regs32[GDB_PS]; | ||
| 193 | regs->cs = gdb_regs32[GDB_CS]; | ||
| 194 | regs->ss = gdb_regs32[GDB_SS]; | ||
| 178 | #endif | 195 | #endif |
| 179 | } | 196 | } |
| 180 | 197 | ||
| @@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
| 378 | if (remcomInBuffer[0] == 's') { | 395 | if (remcomInBuffer[0] == 's') { |
| 379 | linux_regs->flags |= X86_EFLAGS_TF; | 396 | linux_regs->flags |= X86_EFLAGS_TF; |
| 380 | kgdb_single_step = 1; | 397 | kgdb_single_step = 1; |
| 381 | if (kgdb_contthread) { | 398 | atomic_set(&kgdb_cpu_doing_single_step, |
| 382 | atomic_set(&kgdb_cpu_doing_single_step, | 399 | raw_smp_processor_id()); |
| 383 | raw_smp_processor_id()); | ||
| 384 | } | ||
| 385 | } | 400 | } |
| 386 | 401 | ||
| 387 | get_debugreg(dr6, 6); | 402 | get_debugreg(dr6, 6); |
| @@ -440,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
| 440 | return NOTIFY_DONE; | 455 | return NOTIFY_DONE; |
| 441 | 456 | ||
| 442 | case DIE_NMI_IPI: | 457 | case DIE_NMI_IPI: |
| 443 | if (atomic_read(&kgdb_active) != -1) { | 458 | /* Just ignore, we will handle the roundup on DIE_NMI. */ |
| 444 | /* KGDB CPU roundup */ | ||
| 445 | kgdb_nmicallback(raw_smp_processor_id(), regs); | ||
| 446 | was_in_debug_nmi[raw_smp_processor_id()] = 1; | ||
| 447 | touch_nmi_watchdog(); | ||
| 448 | } | ||
| 449 | return NOTIFY_DONE; | 459 | return NOTIFY_DONE; |
| 450 | 460 | ||
| 451 | case DIE_NMIUNKNOWN: | 461 | case DIE_NMIUNKNOWN: |
| @@ -466,9 +476,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
| 466 | 476 | ||
| 467 | case DIE_DEBUG: | 477 | case DIE_DEBUG: |
| 468 | if (atomic_read(&kgdb_cpu_doing_single_step) == | 478 | if (atomic_read(&kgdb_cpu_doing_single_step) == |
| 469 | raw_smp_processor_id() && | 479 | raw_smp_processor_id()) { |
| 470 | user_mode(regs)) | 480 | if (user_mode(regs)) |
| 471 | return single_step_cont(regs, args); | 481 | return single_step_cont(regs, args); |
| 482 | break; | ||
| 483 | } else if (test_thread_flag(TIF_SINGLESTEP)) | ||
| 484 | /* This means a user thread is single stepping | ||
| 485 | * a system call which should be ignored | ||
| 486 | */ | ||
| 487 | return NOTIFY_DONE; | ||
| 472 | /* fall through */ | 488 | /* fall through */ |
| 473 | default: | 489 | default: |
| 474 | if (user_mode(regs)) | 490 | if (user_mode(regs)) |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b8c6743a13da..6c27679ec6aa 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | |||
| 431 | regs->ip = (unsigned long)p->ainsn.insn; | 431 | regs->ip = (unsigned long)p->ainsn.insn; |
| 432 | } | 432 | } |
| 433 | 433 | ||
| 434 | /* Called with kretprobe_lock held */ | ||
| 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 434 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
| 436 | struct pt_regs *regs) | 435 | struct pt_regs *regs) |
| 437 | { | 436 | { |
| @@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 682 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 681 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
| 683 | 682 | ||
| 684 | INIT_HLIST_HEAD(&empty_rp); | 683 | INIT_HLIST_HEAD(&empty_rp); |
| 685 | spin_lock_irqsave(&kretprobe_lock, flags); | 684 | kretprobe_hash_lock(current, &head, &flags); |
| 686 | head = kretprobe_inst_table_head(current); | ||
| 687 | /* fixup registers */ | 685 | /* fixup registers */ |
| 688 | #ifdef CONFIG_X86_64 | 686 | #ifdef CONFIG_X86_64 |
| 689 | regs->cs = __KERNEL_CS; | 687 | regs->cs = __KERNEL_CS; |
| @@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 732 | 730 | ||
| 733 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | 731 | kretprobe_assert(ri, orig_ret_address, trampoline_address); |
| 734 | 732 | ||
| 735 | spin_unlock_irqrestore(&kretprobe_lock, flags); | 733 | kretprobe_hash_unlock(current, &flags); |
| 736 | 734 | ||
| 737 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 735 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { |
| 738 | hlist_del(&ri->hlist); | 736 | hlist_del(&ri->hlist); |
| @@ -860,7 +858,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) | |||
| 860 | 858 | ||
| 861 | resume_execution(cur, regs, kcb); | 859 | resume_execution(cur, regs, kcb); |
| 862 | regs->flags |= kcb->kprobe_saved_flags; | 860 | regs->flags |= kcb->kprobe_saved_flags; |
| 863 | trace_hardirqs_fixup_flags(regs->flags); | ||
| 864 | 861 | ||
| 865 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | 862 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { |
| 866 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | 863 | kcb->kprobe_status = KPROBE_HIT_SSDONE; |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b7a3cf37d2b..478bca986eca 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -178,7 +178,7 @@ static void kvm_flush_tlb(void) | |||
| 178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | 178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | static void kvm_release_pt(u32 pfn) | 181 | static void kvm_release_pt(unsigned long pfn) |
| 182 | { | 182 | { |
| 183 | struct kvm_mmu_op_release_pt rpt = { | 183 | struct kvm_mmu_op_release_pt rpt = { |
| 184 | .header.op = KVM_MMU_OP_RELEASE_PT, | 184 | .header.op = KVM_MMU_OP_RELEASE_PT, |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 87edf1ceb1df..774ac4991568 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -78,6 +78,34 @@ static cycle_t kvm_clock_read(void) | |||
| 78 | return ret; | 78 | return ret; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | /* | ||
| 82 | * If we don't do that, there is the possibility that the guest | ||
| 83 | * will calibrate under heavy load - thus, getting a lower lpj - | ||
| 84 | * and execute the delays themselves without load. This is wrong, | ||
| 85 | * because no delay loop can finish beforehand. | ||
| 86 | * Any heuristics is subject to fail, because ultimately, a large | ||
| 87 | * poll of guests can be running and trouble each other. So we preset | ||
| 88 | * lpj here | ||
| 89 | */ | ||
| 90 | static unsigned long kvm_get_tsc_khz(void) | ||
| 91 | { | ||
| 92 | return preset_lpj; | ||
| 93 | } | ||
| 94 | |||
| 95 | static void kvm_get_preset_lpj(void) | ||
| 96 | { | ||
| 97 | struct pvclock_vcpu_time_info *src; | ||
| 98 | unsigned long khz; | ||
| 99 | u64 lpj; | ||
| 100 | |||
| 101 | src = &per_cpu(hv_clock, 0); | ||
| 102 | khz = pvclock_tsc_khz(src); | ||
| 103 | |||
| 104 | lpj = ((u64)khz * 1000); | ||
| 105 | do_div(lpj, HZ); | ||
| 106 | preset_lpj = lpj; | ||
| 107 | } | ||
| 108 | |||
| 81 | static struct clocksource kvm_clock = { | 109 | static struct clocksource kvm_clock = { |
| 82 | .name = "kvm-clock", | 110 | .name = "kvm-clock", |
| 83 | .read = kvm_clock_read, | 111 | .read = kvm_clock_read, |
| @@ -113,7 +141,7 @@ static void kvm_setup_secondary_clock(void) | |||
| 113 | #endif | 141 | #endif |
| 114 | 142 | ||
| 115 | #ifdef CONFIG_SMP | 143 | #ifdef CONFIG_SMP |
| 116 | void __init kvm_smp_prepare_boot_cpu(void) | 144 | static void __init kvm_smp_prepare_boot_cpu(void) |
| 117 | { | 145 | { |
| 118 | WARN_ON(kvm_register_clock("primary cpu clock")); | 146 | WARN_ON(kvm_register_clock("primary cpu clock")); |
| 119 | native_smp_prepare_boot_cpu(); | 147 | native_smp_prepare_boot_cpu(); |
| @@ -153,6 +181,7 @@ void __init kvmclock_init(void) | |||
| 153 | pv_time_ops.get_wallclock = kvm_get_wallclock; | 181 | pv_time_ops.get_wallclock = kvm_get_wallclock; |
| 154 | pv_time_ops.set_wallclock = kvm_set_wallclock; | 182 | pv_time_ops.set_wallclock = kvm_set_wallclock; |
| 155 | pv_time_ops.sched_clock = kvm_clock_read; | 183 | pv_time_ops.sched_clock = kvm_clock_read; |
| 184 | pv_time_ops.get_tsc_khz = kvm_get_tsc_khz; | ||
| 156 | #ifdef CONFIG_X86_LOCAL_APIC | 185 | #ifdef CONFIG_X86_LOCAL_APIC |
| 157 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 186 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; |
| 158 | #endif | 187 | #endif |
| @@ -163,6 +192,7 @@ void __init kvmclock_init(void) | |||
| 163 | #ifdef CONFIG_KEXEC | 192 | #ifdef CONFIG_KEXEC |
| 164 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 193 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
| 165 | #endif | 194 | #endif |
| 195 | kvm_get_preset_lpj(); | ||
| 166 | clocksource_register(&kvm_clock); | 196 | clocksource_register(&kvm_clock); |
| 167 | } | 197 | } |
| 168 | } | 198 | } |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a8449571858a..eee32b43fee3 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <asm/ldt.h> | 18 | #include <asm/ldt.h> |
| 19 | #include <asm/desc.h> | 19 | #include <asm/desc.h> |
| 20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
| 21 | #include <asm/syscalls.h> | ||
| 21 | 22 | ||
| 22 | #ifdef CONFIG_SMP | 23 | #ifdef CONFIG_SMP |
| 23 | static void flush_ldt(void *current_mm) | 24 | static void flush_ldt(void *current_mm) |
| @@ -51,6 +52,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
| 51 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, | 52 | memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, |
| 52 | (mincount - oldsize) * LDT_ENTRY_SIZE); | 53 | (mincount - oldsize) * LDT_ENTRY_SIZE); |
| 53 | 54 | ||
| 55 | paravirt_alloc_ldt(newldt, mincount); | ||
| 56 | |||
| 54 | #ifdef CONFIG_X86_64 | 57 | #ifdef CONFIG_X86_64 |
| 55 | /* CHECKME: Do we really need this ? */ | 58 | /* CHECKME: Do we really need this ? */ |
| 56 | wmb(); | 59 | wmb(); |
| @@ -62,12 +65,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
| 62 | 65 | ||
| 63 | if (reload) { | 66 | if (reload) { |
| 64 | #ifdef CONFIG_SMP | 67 | #ifdef CONFIG_SMP |
| 65 | cpumask_t mask; | ||
| 66 | |||
| 67 | preempt_disable(); | 68 | preempt_disable(); |
| 68 | load_LDT(pc); | 69 | load_LDT(pc); |
| 69 | mask = cpumask_of_cpu(smp_processor_id()); | 70 | if (!cpus_equal(current->mm->cpu_vm_mask, |
| 70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | 71 | cpumask_of_cpu(smp_processor_id()))) |
| 71 | smp_call_function(flush_ldt, current->mm, 1); | 72 | smp_call_function(flush_ldt, current->mm, 1); |
| 72 | preempt_enable(); | 73 | preempt_enable(); |
| 73 | #else | 74 | #else |
| @@ -75,6 +76,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
| 75 | #endif | 76 | #endif |
| 76 | } | 77 | } |
| 77 | if (oldsize) { | 78 | if (oldsize) { |
| 79 | paravirt_free_ldt(oldldt, oldsize); | ||
| 78 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) | 80 | if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) |
| 79 | vfree(oldldt); | 81 | vfree(oldldt); |
| 80 | else | 82 | else |
| @@ -86,10 +88,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
| 86 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | 88 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) |
| 87 | { | 89 | { |
| 88 | int err = alloc_ldt(new, old->size, 0); | 90 | int err = alloc_ldt(new, old->size, 0); |
| 91 | int i; | ||
| 89 | 92 | ||
| 90 | if (err < 0) | 93 | if (err < 0) |
| 91 | return err; | 94 | return err; |
| 92 | memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); | 95 | |
| 96 | for(i = 0; i < old->size; i++) | ||
| 97 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); | ||
| 93 | return 0; | 98 | return 0; |
| 94 | } | 99 | } |
| 95 | 100 | ||
| @@ -126,6 +131,7 @@ void destroy_context(struct mm_struct *mm) | |||
| 126 | if (mm == current->active_mm) | 131 | if (mm == current->active_mm) |
| 127 | clear_LDT(); | 132 | clear_LDT(); |
| 128 | #endif | 133 | #endif |
| 134 | paravirt_free_ldt(mm->context.ldt, mm->context.size); | ||
| 129 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) | 135 | if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) |
| 130 | vfree(mm->context.ldt); | 136 | vfree(mm->context.ldt); |
| 131 | else | 137 | else |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55af..0732adba05ca 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
| 13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
| 14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
| 15 | #include <linux/suspend.h> | ||
| 15 | 16 | ||
| 16 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
| 17 | #include <asm/pgalloc.h> | 18 | #include <asm/pgalloc.h> |
| @@ -22,6 +23,7 @@ | |||
| 22 | #include <asm/cpufeature.h> | 23 | #include <asm/cpufeature.h> |
| 23 | #include <asm/desc.h> | 24 | #include <asm/desc.h> |
| 24 | #include <asm/system.h> | 25 | #include <asm/system.h> |
| 26 | #include <asm/cacheflush.h> | ||
| 25 | 27 | ||
| 26 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 28 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
| 27 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | 29 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
| @@ -77,7 +79,7 @@ static void load_segments(void) | |||
| 77 | /* | 79 | /* |
| 78 | * A architecture hook called to validate the | 80 | * A architecture hook called to validate the |
| 79 | * proposed image and prepare the control pages | 81 | * proposed image and prepare the control pages |
| 80 | * as needed. The pages for KEXEC_CONTROL_CODE_SIZE | 82 | * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE |
| 81 | * have been allocated, but the segments have yet | 83 | * have been allocated, but the segments have yet |
| 82 | * been copied into the kernel. | 84 | * been copied into the kernel. |
| 83 | * | 85 | * |
| @@ -85,10 +87,12 @@ static void load_segments(void) | |||
| 85 | * reboot code buffer to allow us to avoid allocations | 87 | * reboot code buffer to allow us to avoid allocations |
| 86 | * later. | 88 | * later. |
| 87 | * | 89 | * |
| 88 | * Currently nothing. | 90 | * Make control page executable. |
| 89 | */ | 91 | */ |
| 90 | int machine_kexec_prepare(struct kimage *image) | 92 | int machine_kexec_prepare(struct kimage *image) |
| 91 | { | 93 | { |
| 94 | if (nx_enabled) | ||
| 95 | set_pages_x(image->control_code_page, 1); | ||
| 92 | return 0; | 96 | return 0; |
| 93 | } | 97 | } |
| 94 | 98 | ||
| @@ -98,27 +102,54 @@ int machine_kexec_prepare(struct kimage *image) | |||
| 98 | */ | 102 | */ |
| 99 | void machine_kexec_cleanup(struct kimage *image) | 103 | void machine_kexec_cleanup(struct kimage *image) |
| 100 | { | 104 | { |
| 105 | if (nx_enabled) | ||
| 106 | set_pages_nx(image->control_code_page, 1); | ||
| 101 | } | 107 | } |
| 102 | 108 | ||
| 103 | /* | 109 | /* |
| 104 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 110 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
| 105 | * We are past the point of no return, committed to rebooting now. | 111 | * We are past the point of no return, committed to rebooting now. |
| 106 | */ | 112 | */ |
| 107 | NORET_TYPE void machine_kexec(struct kimage *image) | 113 | void machine_kexec(struct kimage *image) |
| 108 | { | 114 | { |
| 109 | unsigned long page_list[PAGES_NR]; | 115 | unsigned long page_list[PAGES_NR]; |
| 110 | void *control_page; | 116 | void *control_page; |
| 117 | int save_ftrace_enabled; | ||
| 118 | asmlinkage unsigned long | ||
| 119 | (*relocate_kernel_ptr)(unsigned long indirection_page, | ||
| 120 | unsigned long control_page, | ||
| 121 | unsigned long start_address, | ||
| 122 | unsigned int has_pae, | ||
| 123 | unsigned int preserve_context); | ||
| 124 | |||
| 125 | #ifdef CONFIG_KEXEC_JUMP | ||
| 126 | if (kexec_image->preserve_context) | ||
| 127 | save_processor_state(); | ||
| 128 | #endif | ||
| 111 | 129 | ||
| 112 | tracer_disable(); | 130 | save_ftrace_enabled = __ftrace_enabled_save(); |
| 113 | 131 | ||
| 114 | /* Interrupts aren't acceptable while we reboot */ | 132 | /* Interrupts aren't acceptable while we reboot */ |
| 115 | local_irq_disable(); | 133 | local_irq_disable(); |
| 116 | 134 | ||
| 135 | if (image->preserve_context) { | ||
| 136 | #ifdef CONFIG_X86_IO_APIC | ||
| 137 | /* We need to put APICs in legacy mode so that we can | ||
| 138 | * get timer interrupts in second kernel. kexec/kdump | ||
| 139 | * paths already have calls to disable_IO_APIC() in | ||
| 140 | * one form or other. kexec jump path also need | ||
| 141 | * one. | ||
| 142 | */ | ||
| 143 | disable_IO_APIC(); | ||
| 144 | #endif | ||
| 145 | } | ||
| 146 | |||
| 117 | control_page = page_address(image->control_code_page); | 147 | control_page = page_address(image->control_code_page); |
| 118 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 148 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
| 119 | 149 | ||
| 150 | relocate_kernel_ptr = control_page; | ||
| 120 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
| 121 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 152 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
| 122 | page_list[PA_PGD] = __pa(kexec_pgd); | 153 | page_list[PA_PGD] = __pa(kexec_pgd); |
| 123 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 154 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
| 124 | #ifdef CONFIG_X86_PAE | 155 | #ifdef CONFIG_X86_PAE |
| @@ -131,6 +162,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
| 131 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 162 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
| 132 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 163 | page_list[PA_PTE_1] = __pa(kexec_pte1); |
| 133 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 164 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
| 165 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); | ||
| 134 | 166 | ||
| 135 | /* The segment registers are funny things, they have both a | 167 | /* The segment registers are funny things, they have both a |
| 136 | * visible and an invisible part. Whenever the visible part is | 168 | * visible and an invisible part. Whenever the visible part is |
| @@ -149,8 +181,17 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
| 149 | set_idt(phys_to_virt(0),0); | 181 | set_idt(phys_to_virt(0),0); |
| 150 | 182 | ||
| 151 | /* now call it */ | 183 | /* now call it */ |
| 152 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 184 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
| 153 | image->start, cpu_has_pae); | 185 | (unsigned long)page_list, |
| 186 | image->start, cpu_has_pae, | ||
| 187 | image->preserve_context); | ||
| 188 | |||
| 189 | #ifdef CONFIG_KEXEC_JUMP | ||
| 190 | if (kexec_image->preserve_context) | ||
| 191 | restore_processor_state(); | ||
| 192 | #endif | ||
| 193 | |||
| 194 | __ftrace_enabled_restore(save_ftrace_enabled); | ||
| 154 | } | 195 | } |
| 155 | 196 | ||
| 156 | void arch_crash_save_vmcoreinfo(void) | 197 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a3..c43caa3a91f3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
| @@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
| 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
| 182 | * We are past the point of no return, committed to rebooting now. | 182 | * We are past the point of no return, committed to rebooting now. |
| 183 | */ | 183 | */ |
| 184 | NORET_TYPE void machine_kexec(struct kimage *image) | 184 | void machine_kexec(struct kimage *image) |
| 185 | { | 185 | { |
| 186 | unsigned long page_list[PAGES_NR]; | 186 | unsigned long page_list[PAGES_NR]; |
| 187 | void *control_page; | 187 | void *control_page; |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 07c0f828f488..3b599518c322 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
| @@ -33,6 +33,8 @@ | |||
| 33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
| 34 | #include <asm/geode.h> | 34 | #include <asm/geode.h> |
| 35 | 35 | ||
| 36 | #define MFGPT_DEFAULT_IRQ 7 | ||
| 37 | |||
| 36 | static struct mfgpt_timer_t { | 38 | static struct mfgpt_timer_t { |
| 37 | unsigned int avail:1; | 39 | unsigned int avail:1; |
| 38 | } mfgpt_timers[MFGPT_MAX_TIMERS]; | 40 | } mfgpt_timers[MFGPT_MAX_TIMERS]; |
| @@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) | |||
| 157 | } | 159 | } |
| 158 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); | 160 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); |
| 159 | 161 | ||
| 160 | int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) | 162 | int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) |
| 161 | { | 163 | { |
| 162 | u32 val, dummy; | 164 | u32 zsel, lpc, dummy; |
| 163 | int offset; | 165 | int shift; |
| 164 | 166 | ||
| 165 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) | 167 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) |
| 166 | return -EIO; | 168 | return -EIO; |
| 167 | 169 | ||
| 168 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | 170 | /* |
| 171 | * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA | ||
| 172 | * is using the same CMP of the timer's Siamese twin, the IRQ is set to | ||
| 173 | * 2, and we mustn't use nor change it. | ||
| 174 | * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the | ||
| 175 | * IRQ of the 1st. This can only happen if forcing an IRQ, calling this | ||
| 176 | * with *irq==0 is safe. Currently there _are_ no 2 drivers. | ||
| 177 | */ | ||
| 178 | rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); | ||
| 179 | shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; | ||
| 180 | if (((zsel >> shift) & 0xF) == 2) | ||
| 169 | return -EIO; | 181 | return -EIO; |
| 170 | 182 | ||
| 171 | rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); | 183 | /* Choose IRQ: if none supplied, keep IRQ already set or use default */ |
| 184 | if (!*irq) | ||
| 185 | *irq = (zsel >> shift) & 0xF; | ||
| 186 | if (!*irq) | ||
| 187 | *irq = MFGPT_DEFAULT_IRQ; | ||
| 172 | 188 | ||
| 173 | offset = (timer % 4) * 4; | 189 | /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ |
| 174 | 190 | if (*irq < 1 || *irq == 2 || *irq > 15) | |
| 175 | val &= ~((0xF << offset) | (0xF << (offset + 16))); | 191 | return -EIO; |
| 192 | rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); | ||
| 193 | if (lpc & (1 << *irq)) | ||
| 194 | return -EIO; | ||
| 176 | 195 | ||
| 196 | /* All chosen and checked - go for it */ | ||
| 197 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | ||
| 198 | return -EIO; | ||
| 177 | if (enable) { | 199 | if (enable) { |
| 178 | val |= (irq & 0x0F) << (offset); | 200 | zsel = (zsel & ~(0xF << shift)) | (*irq << shift); |
| 179 | val |= (irq & 0x0F) << (offset + 16); | 201 | wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); |
| 180 | } | 202 | } |
| 181 | 203 | ||
| 182 | wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); | ||
| 183 | return 0; | 204 | return 0; |
| 184 | } | 205 | } |
| 185 | 206 | ||
| @@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | |||
| 242 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; | 263 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; |
| 243 | static u16 mfgpt_event_clock; | 264 | static u16 mfgpt_event_clock; |
| 244 | 265 | ||
| 245 | static int irq = 7; | 266 | static int irq; |
| 246 | static int __init mfgpt_setup(char *str) | 267 | static int __init mfgpt_setup(char *str) |
| 247 | { | 268 | { |
| 248 | get_option(&str, &irq); | 269 | get_option(&str, &irq); |
| @@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void) | |||
| 346 | mfgpt_event_clock = timer; | 367 | mfgpt_event_clock = timer; |
| 347 | 368 | ||
| 348 | /* Set up the IRQ on the MFGPT side */ | 369 | /* Set up the IRQ on the MFGPT side */ |
| 349 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { | 370 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { |
| 350 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); | 371 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); |
| 351 | return -EIO; | 372 | return -EIO; |
| 352 | } | 373 | } |
| @@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void) | |||
| 374 | &mfgpt_clockevent); | 395 | &mfgpt_clockevent); |
| 375 | 396 | ||
| 376 | printk(KERN_INFO | 397 | printk(KERN_INFO |
| 377 | "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); | 398 | "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", |
| 399 | timer, irq); | ||
| 378 | clockevents_register_device(&mfgpt_clockevent); | 400 | clockevents_register_device(&mfgpt_clockevent); |
| 379 | 401 | ||
| 380 | return 0; | 402 | return 0; |
| 381 | 403 | ||
| 382 | err: | 404 | err: |
| 383 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); | 405 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); |
| 384 | printk(KERN_ERR | 406 | printk(KERN_ERR |
| 385 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); | 407 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); |
| 386 | return -EIO; | 408 | return -EIO; |
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c deleted file mode 100644 index 56b933119a04..000000000000 --- a/arch/x86/kernel/microcode.c +++ /dev/null | |||
| @@ -1,851 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Intel CPU Microcode Update Driver for Linux | ||
| 3 | * | ||
| 4 | * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | ||
| 5 | * 2006 Shaohua Li <shaohua.li@intel.com> | ||
| 6 | * | ||
| 7 | * This driver allows to upgrade microcode on Intel processors | ||
| 8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
| 9 | * Pentium III, Xeon, Pentium 4, etc. | ||
| 10 | * | ||
| 11 | * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture | ||
| 12 | * Software Developer's Manual | ||
| 13 | * Order Number 253668 or free download from: | ||
| 14 | * | ||
| 15 | * http://developer.intel.com/design/pentium4/manuals/253668.htm | ||
| 16 | * | ||
| 17 | * For more information, go to http://www.urbanmyth.org/microcode | ||
| 18 | * | ||
| 19 | * This program is free software; you can redistribute it and/or | ||
| 20 | * modify it under the terms of the GNU General Public License | ||
| 21 | * as published by the Free Software Foundation; either version | ||
| 22 | * 2 of the License, or (at your option) any later version. | ||
| 23 | * | ||
| 24 | * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 25 | * Initial release. | ||
| 26 | * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 27 | * Added read() support + cleanups. | ||
| 28 | * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 29 | * Added 'device trimming' support. open(O_WRONLY) zeroes | ||
| 30 | * and frees the saved copy of applied microcode. | ||
| 31 | * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 32 | * Made to use devfs (/dev/cpu/microcode) + cleanups. | ||
| 33 | * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
| 34 | * Added misc device support (now uses both devfs and misc). | ||
| 35 | * Added MICROCODE_IOCFREE ioctl to clear memory. | ||
| 36 | * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
| 37 | * Messages for error cases (non Intel & no suitable microcode). | ||
| 38 | * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com> | ||
| 39 | * Removed ->release(). Removed exclusive open and status bitmap. | ||
| 40 | * Added microcode_rwsem to serialize read()/write()/ioctl(). | ||
| 41 | * Removed global kernel lock usage. | ||
| 42 | * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com> | ||
| 43 | * Write 0 to 0x8B msr and then cpuid before reading revision, | ||
| 44 | * so that it works even if there were no update done by the | ||
| 45 | * BIOS. Otherwise, reading from 0x8B gives junk (which happened | ||
| 46 | * to be 0 on my machine which is why it worked even when I | ||
| 47 | * disabled update by the BIOS) | ||
| 48 | * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix. | ||
| 49 | * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and | ||
| 50 | * Tigran Aivazian <tigran@veritas.com> | ||
| 51 | * Intel Pentium 4 processor support and bugfixes. | ||
| 52 | * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com> | ||
| 53 | * Bugfix for HT (Hyper-Threading) enabled processors | ||
| 54 | * whereby processor resources are shared by all logical processors | ||
| 55 | * in a single CPU package. | ||
| 56 | * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and | ||
| 57 | * Tigran Aivazian <tigran@veritas.com>, | ||
| 58 | * Serialize updates as required on HT processors due to speculative | ||
| 59 | * nature of implementation. | ||
| 60 | * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> | ||
| 61 | * Fix the panic when writing zero-length microcode chunk. | ||
| 62 | * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, | ||
| 63 | * Jun Nakajima <jun.nakajima@intel.com> | ||
| 64 | * Support for the microcode updates in the new format. | ||
| 65 | * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> | ||
| 66 | * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl | ||
| 67 | * because we no longer hold a copy of applied microcode | ||
| 68 | * in kernel memory. | ||
| 69 | * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> | ||
| 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | ||
| 71 | * Thanks to Stuart Swales for pointing out this bug. | ||
| 72 | */ | ||
| 73 | |||
| 74 | //#define DEBUG /* pr_debug */ | ||
| 75 | #include <linux/capability.h> | ||
| 76 | #include <linux/kernel.h> | ||
| 77 | #include <linux/init.h> | ||
| 78 | #include <linux/sched.h> | ||
| 79 | #include <linux/smp_lock.h> | ||
| 80 | #include <linux/cpumask.h> | ||
| 81 | #include <linux/module.h> | ||
| 82 | #include <linux/slab.h> | ||
| 83 | #include <linux/vmalloc.h> | ||
| 84 | #include <linux/miscdevice.h> | ||
| 85 | #include <linux/spinlock.h> | ||
| 86 | #include <linux/mm.h> | ||
| 87 | #include <linux/fs.h> | ||
| 88 | #include <linux/mutex.h> | ||
| 89 | #include <linux/cpu.h> | ||
| 90 | #include <linux/firmware.h> | ||
| 91 | #include <linux/platform_device.h> | ||
| 92 | |||
| 93 | #include <asm/msr.h> | ||
| 94 | #include <asm/uaccess.h> | ||
| 95 | #include <asm/processor.h> | ||
| 96 | |||
| 97 | MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); | ||
| 98 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | ||
| 99 | MODULE_LICENSE("GPL"); | ||
| 100 | |||
| 101 | #define MICROCODE_VERSION "1.14a" | ||
| 102 | |||
| 103 | #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ | ||
| 104 | #define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ | ||
| 105 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ | ||
| 106 | #define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ | ||
| 107 | #define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ | ||
| 108 | #define DWSIZE (sizeof (u32)) | ||
| 109 | #define get_totalsize(mc) \ | ||
| 110 | (((microcode_t *)mc)->hdr.totalsize ? \ | ||
| 111 | ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) | ||
| 112 | #define get_datasize(mc) \ | ||
| 113 | (((microcode_t *)mc)->hdr.datasize ? \ | ||
| 114 | ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) | ||
| 115 | |||
| 116 | #define sigmatch(s1, s2, p1, p2) \ | ||
| 117 | (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) | ||
| 118 | |||
| 119 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | ||
| 120 | |||
| 121 | /* serialize access to the physical write to MSR 0x79 */ | ||
| 122 | static DEFINE_SPINLOCK(microcode_update_lock); | ||
| 123 | |||
| 124 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | ||
| 125 | static DEFINE_MUTEX(microcode_mutex); | ||
| 126 | |||
| 127 | static struct ucode_cpu_info { | ||
| 128 | int valid; | ||
| 129 | unsigned int sig; | ||
| 130 | unsigned int pf; | ||
| 131 | unsigned int rev; | ||
| 132 | microcode_t *mc; | ||
| 133 | } ucode_cpu_info[NR_CPUS]; | ||
| 134 | |||
| 135 | static void collect_cpu_info(int cpu_num) | ||
| 136 | { | ||
| 137 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | ||
| 138 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
| 139 | unsigned int val[2]; | ||
| 140 | |||
| 141 | /* We should bind the task to the CPU */ | ||
| 142 | BUG_ON(raw_smp_processor_id() != cpu_num); | ||
| 143 | uci->pf = uci->rev = 0; | ||
| 144 | uci->mc = NULL; | ||
| 145 | uci->valid = 1; | ||
| 146 | |||
| 147 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | ||
| 148 | cpu_has(c, X86_FEATURE_IA64)) { | ||
| 149 | printk(KERN_ERR "microcode: CPU%d not a capable Intel " | ||
| 150 | "processor\n", cpu_num); | ||
| 151 | uci->valid = 0; | ||
| 152 | return; | ||
| 153 | } | ||
| 154 | |||
| 155 | uci->sig = cpuid_eax(0x00000001); | ||
| 156 | |||
| 157 | if ((c->x86_model >= 5) || (c->x86 > 6)) { | ||
| 158 | /* get processor flags from MSR 0x17 */ | ||
| 159 | rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | ||
| 160 | uci->pf = 1 << ((val[1] >> 18) & 7); | ||
| 161 | } | ||
| 162 | |||
| 163 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 164 | /* see notes above for revision 1.07. Apparent chip bug */ | ||
| 165 | sync_core(); | ||
| 166 | /* get the current revision from MSR 0x8B */ | ||
| 167 | rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); | ||
| 168 | pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", | ||
| 169 | uci->sig, uci->pf, uci->rev); | ||
| 170 | } | ||
| 171 | |||
| 172 | static inline int microcode_update_match(int cpu_num, | ||
| 173 | microcode_header_t *mc_header, int sig, int pf) | ||
| 174 | { | ||
| 175 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
| 176 | |||
| 177 | if (!sigmatch(sig, uci->sig, pf, uci->pf) | ||
| 178 | || mc_header->rev <= uci->rev) | ||
| 179 | return 0; | ||
| 180 | return 1; | ||
| 181 | } | ||
| 182 | |||
| 183 | static int microcode_sanity_check(void *mc) | ||
| 184 | { | ||
| 185 | microcode_header_t *mc_header = mc; | ||
| 186 | struct extended_sigtable *ext_header = NULL; | ||
| 187 | struct extended_signature *ext_sig; | ||
| 188 | unsigned long total_size, data_size, ext_table_size; | ||
| 189 | int sum, orig_sum, ext_sigcount = 0, i; | ||
| 190 | |||
| 191 | total_size = get_totalsize(mc_header); | ||
| 192 | data_size = get_datasize(mc_header); | ||
| 193 | if (data_size + MC_HEADER_SIZE > total_size) { | ||
| 194 | printk(KERN_ERR "microcode: error! " | ||
| 195 | "Bad data size in microcode data file\n"); | ||
| 196 | return -EINVAL; | ||
| 197 | } | ||
| 198 | |||
| 199 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | ||
| 200 | printk(KERN_ERR "microcode: error! " | ||
| 201 | "Unknown microcode update format\n"); | ||
| 202 | return -EINVAL; | ||
| 203 | } | ||
| 204 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | ||
| 205 | if (ext_table_size) { | ||
| 206 | if ((ext_table_size < EXT_HEADER_SIZE) | ||
| 207 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | ||
| 208 | printk(KERN_ERR "microcode: error! " | ||
| 209 | "Small exttable size in microcode data file\n"); | ||
| 210 | return -EINVAL; | ||
| 211 | } | ||
| 212 | ext_header = mc + MC_HEADER_SIZE + data_size; | ||
| 213 | if (ext_table_size != exttable_size(ext_header)) { | ||
| 214 | printk(KERN_ERR "microcode: error! " | ||
| 215 | "Bad exttable size in microcode data file\n"); | ||
| 216 | return -EFAULT; | ||
| 217 | } | ||
| 218 | ext_sigcount = ext_header->count; | ||
| 219 | } | ||
| 220 | |||
| 221 | /* check extended table checksum */ | ||
| 222 | if (ext_table_size) { | ||
| 223 | int ext_table_sum = 0; | ||
| 224 | int *ext_tablep = (int *)ext_header; | ||
| 225 | |||
| 226 | i = ext_table_size / DWSIZE; | ||
| 227 | while (i--) | ||
| 228 | ext_table_sum += ext_tablep[i]; | ||
| 229 | if (ext_table_sum) { | ||
| 230 | printk(KERN_WARNING "microcode: aborting, " | ||
| 231 | "bad extended signature table checksum\n"); | ||
| 232 | return -EINVAL; | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | /* calculate the checksum */ | ||
| 237 | orig_sum = 0; | ||
| 238 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | ||
| 239 | while (i--) | ||
| 240 | orig_sum += ((int *)mc)[i]; | ||
| 241 | if (orig_sum) { | ||
| 242 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | ||
| 243 | return -EINVAL; | ||
| 244 | } | ||
| 245 | if (!ext_table_size) | ||
| 246 | return 0; | ||
| 247 | /* check extended signature checksum */ | ||
| 248 | for (i = 0; i < ext_sigcount; i++) { | ||
| 249 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + | ||
| 250 | EXT_SIGNATURE_SIZE * i; | ||
| 251 | sum = orig_sum | ||
| 252 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | ||
| 253 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | ||
| 254 | if (sum) { | ||
| 255 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | ||
| 256 | return -EINVAL; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | return 0; | ||
| 260 | } | ||
| 261 | |||
| 262 | /* | ||
| 263 | * return 0 - no update found | ||
| 264 | * return 1 - found update | ||
| 265 | * return < 0 - error | ||
| 266 | */ | ||
| 267 | static int get_maching_microcode(void *mc, int cpu) | ||
| 268 | { | ||
| 269 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 270 | microcode_header_t *mc_header = mc; | ||
| 271 | struct extended_sigtable *ext_header; | ||
| 272 | unsigned long total_size = get_totalsize(mc_header); | ||
| 273 | int ext_sigcount, i; | ||
| 274 | struct extended_signature *ext_sig; | ||
| 275 | void *new_mc; | ||
| 276 | |||
| 277 | if (microcode_update_match(cpu, mc_header, | ||
| 278 | mc_header->sig, mc_header->pf)) | ||
| 279 | goto find; | ||
| 280 | |||
| 281 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) | ||
| 282 | return 0; | ||
| 283 | |||
| 284 | ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; | ||
| 285 | ext_sigcount = ext_header->count; | ||
| 286 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
| 287 | for (i = 0; i < ext_sigcount; i++) { | ||
| 288 | if (microcode_update_match(cpu, mc_header, | ||
| 289 | ext_sig->sig, ext_sig->pf)) | ||
| 290 | goto find; | ||
| 291 | ext_sig++; | ||
| 292 | } | ||
| 293 | return 0; | ||
| 294 | find: | ||
| 295 | pr_debug("microcode: CPU%d found a matching microcode update with" | ||
| 296 | " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); | ||
| 297 | new_mc = vmalloc(total_size); | ||
| 298 | if (!new_mc) { | ||
| 299 | printk(KERN_ERR "microcode: error! Can not allocate memory\n"); | ||
| 300 | return -ENOMEM; | ||
| 301 | } | ||
| 302 | |||
| 303 | /* free previous update file */ | ||
| 304 | vfree(uci->mc); | ||
| 305 | |||
| 306 | memcpy(new_mc, mc, total_size); | ||
| 307 | uci->mc = new_mc; | ||
| 308 | return 1; | ||
| 309 | } | ||
| 310 | |||
| 311 | static void apply_microcode(int cpu) | ||
| 312 | { | ||
| 313 | unsigned long flags; | ||
| 314 | unsigned int val[2]; | ||
| 315 | int cpu_num = raw_smp_processor_id(); | ||
| 316 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
| 317 | |||
| 318 | /* We should bind the task to the CPU */ | ||
| 319 | BUG_ON(cpu_num != cpu); | ||
| 320 | |||
| 321 | if (uci->mc == NULL) | ||
| 322 | return; | ||
| 323 | |||
| 324 | /* serialize access to the physical write to MSR 0x79 */ | ||
| 325 | spin_lock_irqsave(µcode_update_lock, flags); | ||
| 326 | |||
| 327 | /* write microcode via MSR 0x79 */ | ||
| 328 | wrmsr(MSR_IA32_UCODE_WRITE, | ||
| 329 | (unsigned long) uci->mc->bits, | ||
| 330 | (unsigned long) uci->mc->bits >> 16 >> 16); | ||
| 331 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 332 | |||
| 333 | /* see notes above for revision 1.07. Apparent chip bug */ | ||
| 334 | sync_core(); | ||
| 335 | |||
| 336 | /* get the current revision from MSR 0x8B */ | ||
| 337 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
| 338 | |||
| 339 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
| 340 | if (val[1] != uci->mc->hdr.rev) { | ||
| 341 | printk(KERN_ERR "microcode: CPU%d update from revision " | ||
| 342 | "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); | ||
| 343 | return; | ||
| 344 | } | ||
| 345 | printk(KERN_INFO "microcode: CPU%d updated from revision " | ||
| 346 | "0x%x to 0x%x, date = %08x \n", | ||
| 347 | cpu_num, uci->rev, val[1], uci->mc->hdr.date); | ||
| 348 | uci->rev = val[1]; | ||
| 349 | } | ||
| 350 | |||
| 351 | #ifdef CONFIG_MICROCODE_OLD_INTERFACE | ||
| 352 | static void __user *user_buffer; /* user area microcode data buffer */ | ||
| 353 | static unsigned int user_buffer_size; /* it's size */ | ||
| 354 | |||
| 355 | static long get_next_ucode(void **mc, long offset) | ||
| 356 | { | ||
| 357 | microcode_header_t mc_header; | ||
| 358 | unsigned long total_size; | ||
| 359 | |||
| 360 | /* No more data */ | ||
| 361 | if (offset >= user_buffer_size) | ||
| 362 | return 0; | ||
| 363 | if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { | ||
| 364 | printk(KERN_ERR "microcode: error! Can not read user data\n"); | ||
| 365 | return -EFAULT; | ||
| 366 | } | ||
| 367 | total_size = get_totalsize(&mc_header); | ||
| 368 | if (offset + total_size > user_buffer_size) { | ||
| 369 | printk(KERN_ERR "microcode: error! Bad total size in microcode " | ||
| 370 | "data file\n"); | ||
| 371 | return -EINVAL; | ||
| 372 | } | ||
| 373 | *mc = vmalloc(total_size); | ||
| 374 | if (!*mc) | ||
| 375 | return -ENOMEM; | ||
| 376 | if (copy_from_user(*mc, user_buffer + offset, total_size)) { | ||
| 377 | printk(KERN_ERR "microcode: error! Can not read user data\n"); | ||
| 378 | vfree(*mc); | ||
| 379 | return -EFAULT; | ||
| 380 | } | ||
| 381 | return offset + total_size; | ||
| 382 | } | ||
| 383 | |||
| 384 | static int do_microcode_update (void) | ||
| 385 | { | ||
| 386 | long cursor = 0; | ||
| 387 | int error = 0; | ||
| 388 | void *new_mc = NULL; | ||
| 389 | int cpu; | ||
| 390 | cpumask_t old; | ||
| 391 | |||
| 392 | old = current->cpus_allowed; | ||
| 393 | |||
| 394 | while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { | ||
| 395 | error = microcode_sanity_check(new_mc); | ||
| 396 | if (error) | ||
| 397 | goto out; | ||
| 398 | /* | ||
| 399 | * It's possible the data file has multiple matching ucode, | ||
| 400 | * lets keep searching till the latest version | ||
| 401 | */ | ||
| 402 | for_each_online_cpu(cpu) { | ||
| 403 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 404 | |||
| 405 | if (!uci->valid) | ||
| 406 | continue; | ||
| 407 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 408 | error = get_maching_microcode(new_mc, cpu); | ||
| 409 | if (error < 0) | ||
| 410 | goto out; | ||
| 411 | if (error == 1) | ||
| 412 | apply_microcode(cpu); | ||
| 413 | } | ||
| 414 | vfree(new_mc); | ||
| 415 | } | ||
| 416 | out: | ||
| 417 | if (cursor > 0) | ||
| 418 | vfree(new_mc); | ||
| 419 | if (cursor < 0) | ||
| 420 | error = cursor; | ||
| 421 | set_cpus_allowed_ptr(current, &old); | ||
| 422 | return error; | ||
| 423 | } | ||
| 424 | |||
| 425 | static int microcode_open (struct inode *unused1, struct file *unused2) | ||
| 426 | { | ||
| 427 | cycle_kernel_lock(); | ||
| 428 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | ||
| 429 | } | ||
| 430 | |||
| 431 | static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) | ||
| 432 | { | ||
| 433 | ssize_t ret; | ||
| 434 | |||
| 435 | if ((len >> PAGE_SHIFT) > num_physpages) { | ||
| 436 | printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); | ||
| 437 | return -EINVAL; | ||
| 438 | } | ||
| 439 | |||
| 440 | get_online_cpus(); | ||
| 441 | mutex_lock(µcode_mutex); | ||
| 442 | |||
| 443 | user_buffer = (void __user *) buf; | ||
| 444 | user_buffer_size = (int) len; | ||
| 445 | |||
| 446 | ret = do_microcode_update(); | ||
| 447 | if (!ret) | ||
| 448 | ret = (ssize_t)len; | ||
| 449 | |||
| 450 | mutex_unlock(µcode_mutex); | ||
| 451 | put_online_cpus(); | ||
| 452 | |||
| 453 | return ret; | ||
| 454 | } | ||
| 455 | |||
| 456 | static const struct file_operations microcode_fops = { | ||
| 457 | .owner = THIS_MODULE, | ||
| 458 | .write = microcode_write, | ||
| 459 | .open = microcode_open, | ||
| 460 | }; | ||
| 461 | |||
| 462 | static struct miscdevice microcode_dev = { | ||
| 463 | .minor = MICROCODE_MINOR, | ||
| 464 | .name = "microcode", | ||
| 465 | .fops = µcode_fops, | ||
| 466 | }; | ||
| 467 | |||
| 468 | static int __init microcode_dev_init (void) | ||
| 469 | { | ||
| 470 | int error; | ||
| 471 | |||
| 472 | error = misc_register(µcode_dev); | ||
| 473 | if (error) { | ||
| 474 | printk(KERN_ERR | ||
| 475 | "microcode: can't misc_register on minor=%d\n", | ||
| 476 | MICROCODE_MINOR); | ||
| 477 | return error; | ||
| 478 | } | ||
| 479 | |||
| 480 | return 0; | ||
| 481 | } | ||
| 482 | |||
| 483 | static void microcode_dev_exit (void) | ||
| 484 | { | ||
| 485 | misc_deregister(µcode_dev); | ||
| 486 | } | ||
| 487 | |||
| 488 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | ||
| 489 | #else | ||
| 490 | #define microcode_dev_init() 0 | ||
| 491 | #define microcode_dev_exit() do { } while(0) | ||
| 492 | #endif | ||
| 493 | |||
| 494 | static long get_next_ucode_from_buffer(void **mc, const u8 *buf, | ||
| 495 | unsigned long size, long offset) | ||
| 496 | { | ||
| 497 | microcode_header_t *mc_header; | ||
| 498 | unsigned long total_size; | ||
| 499 | |||
| 500 | /* No more data */ | ||
| 501 | if (offset >= size) | ||
| 502 | return 0; | ||
| 503 | mc_header = (microcode_header_t *)(buf + offset); | ||
| 504 | total_size = get_totalsize(mc_header); | ||
| 505 | |||
| 506 | if (offset + total_size > size) { | ||
| 507 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | ||
| 508 | return -EINVAL; | ||
| 509 | } | ||
| 510 | |||
| 511 | *mc = vmalloc(total_size); | ||
| 512 | if (!*mc) { | ||
| 513 | printk(KERN_ERR "microcode: error! Can not allocate memory\n"); | ||
| 514 | return -ENOMEM; | ||
| 515 | } | ||
| 516 | memcpy(*mc, buf + offset, total_size); | ||
| 517 | return offset + total_size; | ||
| 518 | } | ||
| 519 | |||
| 520 | /* fake device for request_firmware */ | ||
| 521 | static struct platform_device *microcode_pdev; | ||
| 522 | |||
| 523 | static int cpu_request_microcode(int cpu) | ||
| 524 | { | ||
| 525 | char name[30]; | ||
| 526 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 527 | const struct firmware *firmware; | ||
| 528 | const u8 *buf; | ||
| 529 | unsigned long size; | ||
| 530 | long offset = 0; | ||
| 531 | int error; | ||
| 532 | void *mc; | ||
| 533 | |||
| 534 | /* We should bind the task to the CPU */ | ||
| 535 | BUG_ON(cpu != raw_smp_processor_id()); | ||
| 536 | sprintf(name,"intel-ucode/%02x-%02x-%02x", | ||
| 537 | c->x86, c->x86_model, c->x86_mask); | ||
| 538 | error = request_firmware(&firmware, name, µcode_pdev->dev); | ||
| 539 | if (error) { | ||
| 540 | pr_debug("microcode: data file %s load failed\n", name); | ||
| 541 | return error; | ||
| 542 | } | ||
| 543 | buf = firmware->data; | ||
| 544 | size = firmware->size; | ||
| 545 | while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) | ||
| 546 | > 0) { | ||
| 547 | error = microcode_sanity_check(mc); | ||
| 548 | if (error) | ||
| 549 | break; | ||
| 550 | error = get_maching_microcode(mc, cpu); | ||
| 551 | if (error < 0) | ||
| 552 | break; | ||
| 553 | /* | ||
| 554 | * It's possible the data file has multiple matching ucode, | ||
| 555 | * lets keep searching till the latest version | ||
| 556 | */ | ||
| 557 | if (error == 1) { | ||
| 558 | apply_microcode(cpu); | ||
| 559 | error = 0; | ||
| 560 | } | ||
| 561 | vfree(mc); | ||
| 562 | } | ||
| 563 | if (offset > 0) | ||
| 564 | vfree(mc); | ||
| 565 | if (offset < 0) | ||
| 566 | error = offset; | ||
| 567 | release_firmware(firmware); | ||
| 568 | |||
| 569 | return error; | ||
| 570 | } | ||
| 571 | |||
| 572 | static int apply_microcode_check_cpu(int cpu) | ||
| 573 | { | ||
| 574 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 575 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 576 | cpumask_t old; | ||
| 577 | unsigned int val[2]; | ||
| 578 | int err = 0; | ||
| 579 | |||
| 580 | /* Check if the microcode is available */ | ||
| 581 | if (!uci->mc) | ||
| 582 | return 0; | ||
| 583 | |||
| 584 | old = current->cpus_allowed; | ||
| 585 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 586 | |||
| 587 | /* Check if the microcode we have in memory matches the CPU */ | ||
| 588 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | ||
| 589 | cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) | ||
| 590 | err = -EINVAL; | ||
| 591 | |||
| 592 | if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { | ||
| 593 | /* get processor flags from MSR 0x17 */ | ||
| 594 | rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | ||
| 595 | if (uci->pf != (1 << ((val[1] >> 18) & 7))) | ||
| 596 | err = -EINVAL; | ||
| 597 | } | ||
| 598 | |||
| 599 | if (!err) { | ||
| 600 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 601 | /* see notes above for revision 1.07. Apparent chip bug */ | ||
| 602 | sync_core(); | ||
| 603 | /* get the current revision from MSR 0x8B */ | ||
| 604 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
| 605 | if (uci->rev != val[1]) | ||
| 606 | err = -EINVAL; | ||
| 607 | } | ||
| 608 | |||
| 609 | if (!err) | ||
| 610 | apply_microcode(cpu); | ||
| 611 | else | ||
| 612 | printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" | ||
| 613 | " sig=0x%x, pf=0x%x, rev=0x%x\n", | ||
| 614 | cpu, uci->sig, uci->pf, uci->rev); | ||
| 615 | |||
| 616 | set_cpus_allowed_ptr(current, &old); | ||
| 617 | return err; | ||
| 618 | } | ||
| 619 | |||
| 620 | static void microcode_init_cpu(int cpu, int resume) | ||
| 621 | { | ||
| 622 | cpumask_t old; | ||
| 623 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 624 | |||
| 625 | old = current->cpus_allowed; | ||
| 626 | |||
| 627 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 628 | mutex_lock(µcode_mutex); | ||
| 629 | collect_cpu_info(cpu); | ||
| 630 | if (uci->valid && system_state == SYSTEM_RUNNING && !resume) | ||
| 631 | cpu_request_microcode(cpu); | ||
| 632 | mutex_unlock(µcode_mutex); | ||
| 633 | set_cpus_allowed_ptr(current, &old); | ||
| 634 | } | ||
| 635 | |||
| 636 | static void microcode_fini_cpu(int cpu) | ||
| 637 | { | ||
| 638 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 639 | |||
| 640 | mutex_lock(µcode_mutex); | ||
| 641 | uci->valid = 0; | ||
| 642 | vfree(uci->mc); | ||
| 643 | uci->mc = NULL; | ||
| 644 | mutex_unlock(µcode_mutex); | ||
| 645 | } | ||
| 646 | |||
| 647 | static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | ||
| 648 | { | ||
| 649 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | ||
| 650 | char *end; | ||
| 651 | unsigned long val = simple_strtoul(buf, &end, 0); | ||
| 652 | int err = 0; | ||
| 653 | int cpu = dev->id; | ||
| 654 | |||
| 655 | if (end == buf) | ||
| 656 | return -EINVAL; | ||
| 657 | if (val == 1) { | ||
| 658 | cpumask_t old; | ||
| 659 | |||
| 660 | old = current->cpus_allowed; | ||
| 661 | |||
| 662 | get_online_cpus(); | ||
| 663 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 664 | |||
| 665 | mutex_lock(µcode_mutex); | ||
| 666 | if (uci->valid) | ||
| 667 | err = cpu_request_microcode(cpu); | ||
| 668 | mutex_unlock(µcode_mutex); | ||
| 669 | put_online_cpus(); | ||
| 670 | set_cpus_allowed_ptr(current, &old); | ||
| 671 | } | ||
| 672 | if (err) | ||
| 673 | return err; | ||
| 674 | return sz; | ||
| 675 | } | ||
| 676 | |||
| 677 | static ssize_t version_show(struct sys_device *dev, char *buf) | ||
| 678 | { | ||
| 679 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | ||
| 680 | |||
| 681 | return sprintf(buf, "0x%x\n", uci->rev); | ||
| 682 | } | ||
| 683 | |||
| 684 | static ssize_t pf_show(struct sys_device *dev, char *buf) | ||
| 685 | { | ||
| 686 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | ||
| 687 | |||
| 688 | return sprintf(buf, "0x%x\n", uci->pf); | ||
| 689 | } | ||
| 690 | |||
| 691 | static SYSDEV_ATTR(reload, 0200, NULL, reload_store); | ||
| 692 | static SYSDEV_ATTR(version, 0400, version_show, NULL); | ||
| 693 | static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); | ||
| 694 | |||
| 695 | static struct attribute *mc_default_attrs[] = { | ||
| 696 | &attr_reload.attr, | ||
| 697 | &attr_version.attr, | ||
| 698 | &attr_processor_flags.attr, | ||
| 699 | NULL | ||
| 700 | }; | ||
| 701 | |||
| 702 | static struct attribute_group mc_attr_group = { | ||
| 703 | .attrs = mc_default_attrs, | ||
| 704 | .name = "microcode", | ||
| 705 | }; | ||
| 706 | |||
| 707 | static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) | ||
| 708 | { | ||
| 709 | int err, cpu = sys_dev->id; | ||
| 710 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 711 | |||
| 712 | if (!cpu_online(cpu)) | ||
| 713 | return 0; | ||
| 714 | |||
| 715 | pr_debug("microcode: CPU%d added\n", cpu); | ||
| 716 | memset(uci, 0, sizeof(*uci)); | ||
| 717 | |||
| 718 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | ||
| 719 | if (err) | ||
| 720 | return err; | ||
| 721 | |||
| 722 | microcode_init_cpu(cpu, resume); | ||
| 723 | |||
| 724 | return 0; | ||
| 725 | } | ||
| 726 | |||
| 727 | static int mc_sysdev_add(struct sys_device *sys_dev) | ||
| 728 | { | ||
| 729 | return __mc_sysdev_add(sys_dev, 0); | ||
| 730 | } | ||
| 731 | |||
| 732 | static int mc_sysdev_remove(struct sys_device *sys_dev) | ||
| 733 | { | ||
| 734 | int cpu = sys_dev->id; | ||
| 735 | |||
| 736 | if (!cpu_online(cpu)) | ||
| 737 | return 0; | ||
| 738 | |||
| 739 | pr_debug("microcode: CPU%d removed\n", cpu); | ||
| 740 | microcode_fini_cpu(cpu); | ||
| 741 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | ||
| 742 | return 0; | ||
| 743 | } | ||
| 744 | |||
| 745 | static int mc_sysdev_resume(struct sys_device *dev) | ||
| 746 | { | ||
| 747 | int cpu = dev->id; | ||
| 748 | |||
| 749 | if (!cpu_online(cpu)) | ||
| 750 | return 0; | ||
| 751 | pr_debug("microcode: CPU%d resumed\n", cpu); | ||
| 752 | /* only CPU 0 will apply ucode here */ | ||
| 753 | apply_microcode(0); | ||
| 754 | return 0; | ||
| 755 | } | ||
| 756 | |||
| 757 | static struct sysdev_driver mc_sysdev_driver = { | ||
| 758 | .add = mc_sysdev_add, | ||
| 759 | .remove = mc_sysdev_remove, | ||
| 760 | .resume = mc_sysdev_resume, | ||
| 761 | }; | ||
| 762 | |||
| 763 | static __cpuinit int | ||
| 764 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | ||
| 765 | { | ||
| 766 | unsigned int cpu = (unsigned long)hcpu; | ||
| 767 | struct sys_device *sys_dev; | ||
| 768 | |||
| 769 | sys_dev = get_cpu_sysdev(cpu); | ||
| 770 | switch (action) { | ||
| 771 | case CPU_UP_CANCELED_FROZEN: | ||
| 772 | /* The CPU refused to come up during a system resume */ | ||
| 773 | microcode_fini_cpu(cpu); | ||
| 774 | break; | ||
| 775 | case CPU_ONLINE: | ||
| 776 | case CPU_DOWN_FAILED: | ||
| 777 | mc_sysdev_add(sys_dev); | ||
| 778 | break; | ||
| 779 | case CPU_ONLINE_FROZEN: | ||
| 780 | /* System-wide resume is in progress, try to apply microcode */ | ||
| 781 | if (apply_microcode_check_cpu(cpu)) { | ||
| 782 | /* The application of microcode failed */ | ||
| 783 | microcode_fini_cpu(cpu); | ||
| 784 | __mc_sysdev_add(sys_dev, 1); | ||
| 785 | break; | ||
| 786 | } | ||
| 787 | case CPU_DOWN_FAILED_FROZEN: | ||
| 788 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | ||
| 789 | printk(KERN_ERR "microcode: Failed to create the sysfs " | ||
| 790 | "group for CPU%d\n", cpu); | ||
| 791 | break; | ||
| 792 | case CPU_DOWN_PREPARE: | ||
| 793 | mc_sysdev_remove(sys_dev); | ||
| 794 | break; | ||
| 795 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 796 | /* Suspend is in progress, only remove the interface */ | ||
| 797 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | ||
| 798 | break; | ||
| 799 | } | ||
| 800 | return NOTIFY_OK; | ||
| 801 | } | ||
| 802 | |||
| 803 | static struct notifier_block __refdata mc_cpu_notifier = { | ||
| 804 | .notifier_call = mc_cpu_callback, | ||
| 805 | }; | ||
| 806 | |||
| 807 | static int __init microcode_init (void) | ||
| 808 | { | ||
| 809 | int error; | ||
| 810 | |||
| 811 | printk(KERN_INFO | ||
| 812 | "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); | ||
| 813 | |||
| 814 | error = microcode_dev_init(); | ||
| 815 | if (error) | ||
| 816 | return error; | ||
| 817 | microcode_pdev = platform_device_register_simple("microcode", -1, | ||
| 818 | NULL, 0); | ||
| 819 | if (IS_ERR(microcode_pdev)) { | ||
| 820 | microcode_dev_exit(); | ||
| 821 | return PTR_ERR(microcode_pdev); | ||
| 822 | } | ||
| 823 | |||
| 824 | get_online_cpus(); | ||
| 825 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); | ||
| 826 | put_online_cpus(); | ||
| 827 | if (error) { | ||
| 828 | microcode_dev_exit(); | ||
| 829 | platform_device_unregister(microcode_pdev); | ||
| 830 | return error; | ||
| 831 | } | ||
| 832 | |||
| 833 | register_hotcpu_notifier(&mc_cpu_notifier); | ||
| 834 | return 0; | ||
| 835 | } | ||
| 836 | |||
| 837 | static void __exit microcode_exit (void) | ||
| 838 | { | ||
| 839 | microcode_dev_exit(); | ||
| 840 | |||
| 841 | unregister_hotcpu_notifier(&mc_cpu_notifier); | ||
| 842 | |||
| 843 | get_online_cpus(); | ||
| 844 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | ||
| 845 | put_online_cpus(); | ||
| 846 | |||
| 847 | platform_device_unregister(microcode_pdev); | ||
| 848 | } | ||
| 849 | |||
| 850 | module_init(microcode_init) | ||
| 851 | module_exit(microcode_exit) | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c new file mode 100644 index 000000000000..7a1f8eeac2c7 --- /dev/null +++ b/arch/x86/kernel/microcode_amd.c | |||
| @@ -0,0 +1,435 @@ | |||
| 1 | /* | ||
| 2 | * AMD CPU Microcode Update Driver for Linux | ||
| 3 | * Copyright (C) 2008 Advanced Micro Devices Inc. | ||
| 4 | * | ||
| 5 | * Author: Peter Oruba <peter.oruba@amd.com> | ||
| 6 | * | ||
| 7 | * Based on work by: | ||
| 8 | * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | ||
| 9 | * | ||
| 10 | * This driver allows to upgrade microcode on AMD | ||
| 11 | * family 0x10 and 0x11 processors. | ||
| 12 | * | ||
| 13 | * Licensed unter the terms of the GNU General Public | ||
| 14 | * License version 2. See file COPYING for details. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/capability.h> | ||
| 18 | #include <linux/kernel.h> | ||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/sched.h> | ||
| 21 | #include <linux/cpumask.h> | ||
| 22 | #include <linux/module.h> | ||
| 23 | #include <linux/slab.h> | ||
| 24 | #include <linux/vmalloc.h> | ||
| 25 | #include <linux/miscdevice.h> | ||
| 26 | #include <linux/spinlock.h> | ||
| 27 | #include <linux/mm.h> | ||
| 28 | #include <linux/fs.h> | ||
| 29 | #include <linux/mutex.h> | ||
| 30 | #include <linux/cpu.h> | ||
| 31 | #include <linux/firmware.h> | ||
| 32 | #include <linux/platform_device.h> | ||
| 33 | #include <linux/pci.h> | ||
| 34 | #include <linux/pci_ids.h> | ||
| 35 | |||
| 36 | #include <asm/msr.h> | ||
| 37 | #include <asm/uaccess.h> | ||
| 38 | #include <asm/processor.h> | ||
| 39 | #include <asm/microcode.h> | ||
| 40 | |||
| 41 | MODULE_DESCRIPTION("AMD Microcode Update Driver"); | ||
| 42 | MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>"); | ||
| 43 | MODULE_LICENSE("GPL v2"); | ||
| 44 | |||
| 45 | #define UCODE_MAGIC 0x00414d44 | ||
| 46 | #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 | ||
| 47 | #define UCODE_UCODE_TYPE 0x00000001 | ||
| 48 | |||
| 49 | struct equiv_cpu_entry { | ||
| 50 | unsigned int installed_cpu; | ||
| 51 | unsigned int fixed_errata_mask; | ||
| 52 | unsigned int fixed_errata_compare; | ||
| 53 | unsigned int equiv_cpu; | ||
| 54 | }; | ||
| 55 | |||
| 56 | struct microcode_header_amd { | ||
| 57 | unsigned int data_code; | ||
| 58 | unsigned int patch_id; | ||
| 59 | unsigned char mc_patch_data_id[2]; | ||
| 60 | unsigned char mc_patch_data_len; | ||
| 61 | unsigned char init_flag; | ||
| 62 | unsigned int mc_patch_data_checksum; | ||
| 63 | unsigned int nb_dev_id; | ||
| 64 | unsigned int sb_dev_id; | ||
| 65 | unsigned char processor_rev_id[2]; | ||
| 66 | unsigned char nb_rev_id; | ||
| 67 | unsigned char sb_rev_id; | ||
| 68 | unsigned char bios_api_rev; | ||
| 69 | unsigned char reserved1[3]; | ||
| 70 | unsigned int match_reg[8]; | ||
| 71 | }; | ||
| 72 | |||
| 73 | struct microcode_amd { | ||
| 74 | struct microcode_header_amd hdr; | ||
| 75 | unsigned int mpb[0]; | ||
| 76 | }; | ||
| 77 | |||
| 78 | #define UCODE_MAX_SIZE (2048) | ||
| 79 | #define DEFAULT_UCODE_DATASIZE (896) | ||
| 80 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) | ||
| 81 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
| 82 | #define DWSIZE (sizeof(u32)) | ||
| 83 | /* For now we support a fixed ucode total size only */ | ||
| 84 | #define get_totalsize(mc) \ | ||
| 85 | ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ | ||
| 86 | + MC_HEADER_SIZE) | ||
| 87 | |||
| 88 | /* serialize access to the physical write */ | ||
| 89 | static DEFINE_SPINLOCK(microcode_update_lock); | ||
| 90 | |||
| 91 | static struct equiv_cpu_entry *equiv_cpu_table; | ||
| 92 | |||
| 93 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | ||
| 94 | { | ||
| 95 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 96 | |||
| 97 | memset(csig, 0, sizeof(*csig)); | ||
| 98 | |||
| 99 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | ||
| 100 | printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", | ||
| 101 | cpu); | ||
| 102 | return -1; | ||
| 103 | } | ||
| 104 | |||
| 105 | asm volatile("movl %1, %%ecx; rdmsr" | ||
| 106 | : "=a" (csig->rev) | ||
| 107 | : "i" (0x0000008B) : "ecx"); | ||
| 108 | |||
| 109 | printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", | ||
| 110 | csig->rev); | ||
| 111 | |||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | static int get_matching_microcode(int cpu, void *mc, int rev) | ||
| 116 | { | ||
| 117 | struct microcode_header_amd *mc_header = mc; | ||
| 118 | struct pci_dev *nb_pci_dev, *sb_pci_dev; | ||
| 119 | unsigned int current_cpu_id; | ||
| 120 | unsigned int equiv_cpu_id = 0x00; | ||
| 121 | unsigned int i = 0; | ||
| 122 | |||
| 123 | BUG_ON(equiv_cpu_table == NULL); | ||
| 124 | current_cpu_id = cpuid_eax(0x00000001); | ||
| 125 | |||
| 126 | while (equiv_cpu_table[i].installed_cpu != 0) { | ||
| 127 | if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { | ||
| 128 | equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; | ||
| 129 | break; | ||
| 130 | } | ||
| 131 | i++; | ||
| 132 | } | ||
| 133 | |||
| 134 | if (!equiv_cpu_id) { | ||
| 135 | printk(KERN_ERR "microcode: CPU%d cpu_id " | ||
| 136 | "not found in equivalent cpu table \n", cpu); | ||
| 137 | return 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { | ||
| 141 | printk(KERN_ERR | ||
| 142 | "microcode: CPU%d patch does not match " | ||
| 143 | "(patch is %x, cpu extended is %x) \n", | ||
| 144 | cpu, mc_header->processor_rev_id[0], | ||
| 145 | (equiv_cpu_id & 0xff)); | ||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | |||
| 149 | if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { | ||
| 150 | printk(KERN_ERR "microcode: CPU%d patch does not match " | ||
| 151 | "(patch is %x, cpu base id is %x) \n", | ||
| 152 | cpu, mc_header->processor_rev_id[1], | ||
| 153 | ((equiv_cpu_id >> 16) & 0xff)); | ||
| 154 | |||
| 155 | return 0; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* ucode may be northbridge specific */ | ||
| 159 | if (mc_header->nb_dev_id) { | ||
| 160 | nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
| 161 | (mc_header->nb_dev_id & 0xff), | ||
| 162 | NULL); | ||
| 163 | if ((!nb_pci_dev) || | ||
| 164 | (mc_header->nb_rev_id != nb_pci_dev->revision)) { | ||
| 165 | printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); | ||
| 166 | pci_dev_put(nb_pci_dev); | ||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | pci_dev_put(nb_pci_dev); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* ucode may be southbridge specific */ | ||
| 173 | if (mc_header->sb_dev_id) { | ||
| 174 | sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
| 175 | (mc_header->sb_dev_id & 0xff), | ||
| 176 | NULL); | ||
| 177 | if ((!sb_pci_dev) || | ||
| 178 | (mc_header->sb_rev_id != sb_pci_dev->revision)) { | ||
| 179 | printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); | ||
| 180 | pci_dev_put(sb_pci_dev); | ||
| 181 | return 0; | ||
| 182 | } | ||
| 183 | pci_dev_put(sb_pci_dev); | ||
| 184 | } | ||
| 185 | |||
| 186 | if (mc_header->patch_id <= rev) | ||
| 187 | return 0; | ||
| 188 | |||
| 189 | return 1; | ||
| 190 | } | ||
| 191 | |||
| 192 | static void apply_microcode_amd(int cpu) | ||
| 193 | { | ||
| 194 | unsigned long flags; | ||
| 195 | unsigned int eax, edx; | ||
| 196 | unsigned int rev; | ||
| 197 | int cpu_num = raw_smp_processor_id(); | ||
| 198 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
| 199 | struct microcode_amd *mc_amd = uci->mc; | ||
| 200 | unsigned long addr; | ||
| 201 | |||
| 202 | /* We should bind the task to the CPU */ | ||
| 203 | BUG_ON(cpu_num != cpu); | ||
| 204 | |||
| 205 | if (mc_amd == NULL) | ||
| 206 | return; | ||
| 207 | |||
| 208 | spin_lock_irqsave(µcode_update_lock, flags); | ||
| 209 | |||
| 210 | addr = (unsigned long)&mc_amd->hdr.data_code; | ||
| 211 | edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); | ||
| 212 | eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); | ||
| 213 | |||
| 214 | asm volatile("movl %0, %%ecx; wrmsr" : | ||
| 215 | : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); | ||
| 216 | |||
| 217 | /* get patch id after patching */ | ||
| 218 | asm volatile("movl %1, %%ecx; rdmsr" | ||
| 219 | : "=a" (rev) | ||
| 220 | : "i" (0x0000008B) : "ecx"); | ||
| 221 | |||
| 222 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
| 223 | |||
| 224 | /* check current patch id and patch's id for match */ | ||
| 225 | if (rev != mc_amd->hdr.patch_id) { | ||
| 226 | printk(KERN_ERR "microcode: CPU%d update from revision " | ||
| 227 | "0x%x to 0x%x failed\n", cpu_num, | ||
| 228 | mc_amd->hdr.patch_id, rev); | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | |||
| 232 | printk(KERN_INFO "microcode: CPU%d updated from revision " | ||
| 233 | "0x%x to 0x%x \n", | ||
| 234 | cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id); | ||
| 235 | |||
| 236 | uci->cpu_sig.rev = rev; | ||
| 237 | } | ||
| 238 | |||
| 239 | static void * get_next_ucode(u8 *buf, unsigned int size, | ||
| 240 | int (*get_ucode_data)(void *, const void *, size_t), | ||
| 241 | unsigned int *mc_size) | ||
| 242 | { | ||
| 243 | unsigned int total_size; | ||
| 244 | #define UCODE_CONTAINER_SECTION_HDR 8 | ||
| 245 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | ||
| 246 | void *mc; | ||
| 247 | |||
| 248 | if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) | ||
| 249 | return NULL; | ||
| 250 | |||
| 251 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | ||
| 252 | printk(KERN_ERR "microcode: error! " | ||
| 253 | "Wrong microcode payload type field\n"); | ||
| 254 | return NULL; | ||
| 255 | } | ||
| 256 | |||
| 257 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | ||
| 258 | |||
| 259 | printk(KERN_INFO "microcode: size %u, total_size %u\n", | ||
| 260 | size, total_size); | ||
| 261 | |||
| 262 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | ||
| 263 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | ||
| 264 | return NULL; | ||
| 265 | } | ||
| 266 | |||
| 267 | mc = vmalloc(UCODE_MAX_SIZE); | ||
| 268 | if (mc) { | ||
| 269 | memset(mc, 0, UCODE_MAX_SIZE); | ||
| 270 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { | ||
| 271 | vfree(mc); | ||
| 272 | mc = NULL; | ||
| 273 | } else | ||
| 274 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | ||
| 275 | } | ||
| 276 | #undef UCODE_CONTAINER_SECTION_HDR | ||
| 277 | return mc; | ||
| 278 | } | ||
| 279 | |||
| 280 | |||
| 281 | static int install_equiv_cpu_table(u8 *buf, | ||
| 282 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
| 283 | { | ||
| 284 | #define UCODE_CONTAINER_HEADER_SIZE 12 | ||
| 285 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; | ||
| 286 | unsigned int *buf_pos = (unsigned int *)container_hdr; | ||
| 287 | unsigned long size; | ||
| 288 | |||
| 289 | if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) | ||
| 290 | return 0; | ||
| 291 | |||
| 292 | size = buf_pos[2]; | ||
| 293 | |||
| 294 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | ||
| 295 | printk(KERN_ERR "microcode: error! " | ||
| 296 | "Wrong microcode equivalnet cpu table\n"); | ||
| 297 | return 0; | ||
| 298 | } | ||
| 299 | |||
| 300 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | ||
| 301 | if (!equiv_cpu_table) { | ||
| 302 | printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); | ||
| 303 | return 0; | ||
| 304 | } | ||
| 305 | |||
| 306 | buf += UCODE_CONTAINER_HEADER_SIZE; | ||
| 307 | if (get_ucode_data(equiv_cpu_table, buf, size)) { | ||
| 308 | vfree(equiv_cpu_table); | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | |||
| 312 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | ||
| 313 | #undef UCODE_CONTAINER_HEADER_SIZE | ||
| 314 | } | ||
| 315 | |||
| 316 | static void free_equiv_cpu_table(void) | ||
| 317 | { | ||
| 318 | if (equiv_cpu_table) { | ||
| 319 | vfree(equiv_cpu_table); | ||
| 320 | equiv_cpu_table = NULL; | ||
| 321 | } | ||
| 322 | } | ||
| 323 | |||
| 324 | static int generic_load_microcode(int cpu, void *data, size_t size, | ||
| 325 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
| 326 | { | ||
| 327 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 328 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | ||
| 329 | int new_rev = uci->cpu_sig.rev; | ||
| 330 | unsigned int leftover; | ||
| 331 | unsigned long offset; | ||
| 332 | |||
| 333 | offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); | ||
| 334 | if (!offset) { | ||
| 335 | printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); | ||
| 336 | return -EINVAL; | ||
| 337 | } | ||
| 338 | |||
| 339 | ucode_ptr += offset; | ||
| 340 | leftover = size - offset; | ||
| 341 | |||
| 342 | while (leftover) { | ||
| 343 | unsigned int uninitialized_var(mc_size); | ||
| 344 | struct microcode_header_amd *mc_header; | ||
| 345 | |||
| 346 | mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); | ||
| 347 | if (!mc) | ||
| 348 | break; | ||
| 349 | |||
| 350 | mc_header = (struct microcode_header_amd *)mc; | ||
| 351 | if (get_matching_microcode(cpu, mc, new_rev)) { | ||
| 352 | if (new_mc) | ||
| 353 | vfree(new_mc); | ||
| 354 | new_rev = mc_header->patch_id; | ||
| 355 | new_mc = mc; | ||
| 356 | } else | ||
| 357 | vfree(mc); | ||
| 358 | |||
| 359 | ucode_ptr += mc_size; | ||
| 360 | leftover -= mc_size; | ||
| 361 | } | ||
| 362 | |||
| 363 | if (new_mc) { | ||
| 364 | if (!leftover) { | ||
| 365 | if (uci->mc) | ||
| 366 | vfree(uci->mc); | ||
| 367 | uci->mc = new_mc; | ||
| 368 | pr_debug("microcode: CPU%d found a matching microcode update with" | ||
| 369 | " version 0x%x (current=0x%x)\n", | ||
| 370 | cpu, new_rev, uci->cpu_sig.rev); | ||
| 371 | } else | ||
| 372 | vfree(new_mc); | ||
| 373 | } | ||
| 374 | |||
| 375 | free_equiv_cpu_table(); | ||
| 376 | |||
| 377 | return (int)leftover; | ||
| 378 | } | ||
| 379 | |||
| 380 | static int get_ucode_fw(void *to, const void *from, size_t n) | ||
| 381 | { | ||
| 382 | memcpy(to, from, n); | ||
| 383 | return 0; | ||
| 384 | } | ||
| 385 | |||
| 386 | static int request_microcode_fw(int cpu, struct device *device) | ||
| 387 | { | ||
| 388 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | ||
| 389 | const struct firmware *firmware; | ||
| 390 | int ret; | ||
| 391 | |||
| 392 | /* We should bind the task to the CPU */ | ||
| 393 | BUG_ON(cpu != raw_smp_processor_id()); | ||
| 394 | |||
| 395 | ret = request_firmware(&firmware, fw_name, device); | ||
| 396 | if (ret) { | ||
| 397 | printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); | ||
| 398 | return ret; | ||
| 399 | } | ||
| 400 | |||
| 401 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | ||
| 402 | &get_ucode_fw); | ||
| 403 | |||
| 404 | release_firmware(firmware); | ||
| 405 | |||
| 406 | return ret; | ||
| 407 | } | ||
| 408 | |||
| 409 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | ||
| 410 | { | ||
| 411 | printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" | ||
| 412 | "is not supported\n"); | ||
| 413 | return -1; | ||
| 414 | } | ||
| 415 | |||
| 416 | static void microcode_fini_cpu_amd(int cpu) | ||
| 417 | { | ||
| 418 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 419 | |||
| 420 | vfree(uci->mc); | ||
| 421 | uci->mc = NULL; | ||
| 422 | } | ||
| 423 | |||
| 424 | static struct microcode_ops microcode_amd_ops = { | ||
| 425 | .request_microcode_user = request_microcode_user, | ||
| 426 | .request_microcode_fw = request_microcode_fw, | ||
| 427 | .collect_cpu_info = collect_cpu_info_amd, | ||
| 428 | .apply_microcode = apply_microcode_amd, | ||
| 429 | .microcode_fini_cpu = microcode_fini_cpu_amd, | ||
| 430 | }; | ||
| 431 | |||
| 432 | struct microcode_ops * __init init_amd_microcode(void) | ||
| 433 | { | ||
| 434 | return µcode_amd_ops; | ||
| 435 | } | ||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c new file mode 100644 index 000000000000..936d8d55f230 --- /dev/null +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -0,0 +1,508 @@ | |||
| 1 | /* | ||
| 2 | * Intel CPU Microcode Update Driver for Linux | ||
| 3 | * | ||
| 4 | * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | ||
| 5 | * 2006 Shaohua Li <shaohua.li@intel.com> | ||
| 6 | * | ||
| 7 | * This driver allows to upgrade microcode on Intel processors | ||
| 8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
| 9 | * Pentium III, Xeon, Pentium 4, etc. | ||
| 10 | * | ||
| 11 | * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture | ||
| 12 | * Software Developer's Manual | ||
| 13 | * Order Number 253668 or free download from: | ||
| 14 | * | ||
| 15 | * http://developer.intel.com/design/pentium4/manuals/253668.htm | ||
| 16 | * | ||
| 17 | * For more information, go to http://www.urbanmyth.org/microcode | ||
| 18 | * | ||
| 19 | * This program is free software; you can redistribute it and/or | ||
| 20 | * modify it under the terms of the GNU General Public License | ||
| 21 | * as published by the Free Software Foundation; either version | ||
| 22 | * 2 of the License, or (at your option) any later version. | ||
| 23 | * | ||
| 24 | * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 25 | * Initial release. | ||
| 26 | * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 27 | * Added read() support + cleanups. | ||
| 28 | * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 29 | * Added 'device trimming' support. open(O_WRONLY) zeroes | ||
| 30 | * and frees the saved copy of applied microcode. | ||
| 31 | * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 32 | * Made to use devfs (/dev/cpu/microcode) + cleanups. | ||
| 33 | * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
| 34 | * Added misc device support (now uses both devfs and misc). | ||
| 35 | * Added MICROCODE_IOCFREE ioctl to clear memory. | ||
| 36 | * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
| 37 | * Messages for error cases (non Intel & no suitable microcode). | ||
| 38 | * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com> | ||
| 39 | * Removed ->release(). Removed exclusive open and status bitmap. | ||
| 40 | * Added microcode_rwsem to serialize read()/write()/ioctl(). | ||
| 41 | * Removed global kernel lock usage. | ||
| 42 | * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com> | ||
| 43 | * Write 0 to 0x8B msr and then cpuid before reading revision, | ||
| 44 | * so that it works even if there were no update done by the | ||
| 45 | * BIOS. Otherwise, reading from 0x8B gives junk (which happened | ||
| 46 | * to be 0 on my machine which is why it worked even when I | ||
| 47 | * disabled update by the BIOS) | ||
| 48 | * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix. | ||
| 49 | * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and | ||
| 50 | * Tigran Aivazian <tigran@veritas.com> | ||
| 51 | * Intel Pentium 4 processor support and bugfixes. | ||
| 52 | * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com> | ||
| 53 | * Bugfix for HT (Hyper-Threading) enabled processors | ||
| 54 | * whereby processor resources are shared by all logical processors | ||
| 55 | * in a single CPU package. | ||
| 56 | * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and | ||
| 57 | * Tigran Aivazian <tigran@veritas.com>, | ||
| 58 | * Serialize updates as required on HT processors due to | ||
| 59 | * speculative nature of implementation. | ||
| 60 | * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> | ||
| 61 | * Fix the panic when writing zero-length microcode chunk. | ||
| 62 | * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, | ||
| 63 | * Jun Nakajima <jun.nakajima@intel.com> | ||
| 64 | * Support for the microcode updates in the new format. | ||
| 65 | * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> | ||
| 66 | * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl | ||
| 67 | * because we no longer hold a copy of applied microcode | ||
| 68 | * in kernel memory. | ||
| 69 | * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> | ||
| 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | ||
| 71 | * Thanks to Stuart Swales for pointing out this bug. | ||
| 72 | */ | ||
| 73 | #include <linux/capability.h> | ||
| 74 | #include <linux/kernel.h> | ||
| 75 | #include <linux/init.h> | ||
| 76 | #include <linux/sched.h> | ||
| 77 | #include <linux/smp_lock.h> | ||
| 78 | #include <linux/cpumask.h> | ||
| 79 | #include <linux/module.h> | ||
| 80 | #include <linux/slab.h> | ||
| 81 | #include <linux/vmalloc.h> | ||
| 82 | #include <linux/miscdevice.h> | ||
| 83 | #include <linux/spinlock.h> | ||
| 84 | #include <linux/mm.h> | ||
| 85 | #include <linux/fs.h> | ||
| 86 | #include <linux/mutex.h> | ||
| 87 | #include <linux/cpu.h> | ||
| 88 | #include <linux/firmware.h> | ||
| 89 | #include <linux/platform_device.h> | ||
| 90 | |||
| 91 | #include <asm/msr.h> | ||
| 92 | #include <asm/uaccess.h> | ||
| 93 | #include <asm/processor.h> | ||
| 94 | #include <asm/microcode.h> | ||
| 95 | |||
| 96 | MODULE_DESCRIPTION("Microcode Update Driver"); | ||
| 97 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | ||
| 98 | MODULE_LICENSE("GPL"); | ||
| 99 | |||
| 100 | #define MICROCODE_VERSION "2.00" | ||
| 101 | |||
| 102 | struct microcode_ops *microcode_ops; | ||
| 103 | |||
| 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | ||
| 105 | static DEFINE_MUTEX(microcode_mutex); | ||
| 106 | |||
| 107 | struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; | ||
| 108 | EXPORT_SYMBOL_GPL(ucode_cpu_info); | ||
| 109 | |||
| 110 | #ifdef CONFIG_MICROCODE_OLD_INTERFACE | ||
| 111 | static int do_microcode_update(const void __user *buf, size_t size) | ||
| 112 | { | ||
| 113 | cpumask_t old; | ||
| 114 | int error = 0; | ||
| 115 | int cpu; | ||
| 116 | |||
| 117 | old = current->cpus_allowed; | ||
| 118 | |||
| 119 | for_each_online_cpu(cpu) { | ||
| 120 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 121 | |||
| 122 | if (!uci->valid) | ||
| 123 | continue; | ||
| 124 | |||
| 125 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 126 | error = microcode_ops->request_microcode_user(cpu, buf, size); | ||
| 127 | if (error < 0) | ||
| 128 | goto out; | ||
| 129 | if (!error) | ||
| 130 | microcode_ops->apply_microcode(cpu); | ||
| 131 | } | ||
| 132 | out: | ||
| 133 | set_cpus_allowed_ptr(current, &old); | ||
| 134 | return error; | ||
| 135 | } | ||
| 136 | |||
| 137 | static int microcode_open(struct inode *unused1, struct file *unused2) | ||
| 138 | { | ||
| 139 | cycle_kernel_lock(); | ||
| 140 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | ||
| 141 | } | ||
| 142 | |||
| 143 | static ssize_t microcode_write(struct file *file, const char __user *buf, | ||
| 144 | size_t len, loff_t *ppos) | ||
| 145 | { | ||
| 146 | ssize_t ret; | ||
| 147 | |||
| 148 | if ((len >> PAGE_SHIFT) > num_physpages) { | ||
| 149 | printk(KERN_ERR "microcode: too much data (max %ld pages)\n", | ||
| 150 | num_physpages); | ||
| 151 | return -EINVAL; | ||
| 152 | } | ||
| 153 | |||
| 154 | get_online_cpus(); | ||
| 155 | mutex_lock(µcode_mutex); | ||
| 156 | |||
| 157 | ret = do_microcode_update(buf, len); | ||
| 158 | if (!ret) | ||
| 159 | ret = (ssize_t)len; | ||
| 160 | |||
| 161 | mutex_unlock(µcode_mutex); | ||
| 162 | put_online_cpus(); | ||
| 163 | |||
| 164 | return ret; | ||
| 165 | } | ||
| 166 | |||
| 167 | static const struct file_operations microcode_fops = { | ||
| 168 | .owner = THIS_MODULE, | ||
| 169 | .write = microcode_write, | ||
| 170 | .open = microcode_open, | ||
| 171 | }; | ||
| 172 | |||
| 173 | static struct miscdevice microcode_dev = { | ||
| 174 | .minor = MICROCODE_MINOR, | ||
| 175 | .name = "microcode", | ||
| 176 | .fops = µcode_fops, | ||
| 177 | }; | ||
| 178 | |||
| 179 | static int __init microcode_dev_init(void) | ||
| 180 | { | ||
| 181 | int error; | ||
| 182 | |||
| 183 | error = misc_register(µcode_dev); | ||
| 184 | if (error) { | ||
| 185 | printk(KERN_ERR | ||
| 186 | "microcode: can't misc_register on minor=%d\n", | ||
| 187 | MICROCODE_MINOR); | ||
| 188 | return error; | ||
| 189 | } | ||
| 190 | |||
| 191 | return 0; | ||
| 192 | } | ||
| 193 | |||
| 194 | static void microcode_dev_exit(void) | ||
| 195 | { | ||
| 196 | misc_deregister(µcode_dev); | ||
| 197 | } | ||
| 198 | |||
| 199 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | ||
| 200 | #else | ||
| 201 | #define microcode_dev_init() 0 | ||
| 202 | #define microcode_dev_exit() do { } while (0) | ||
| 203 | #endif | ||
| 204 | |||
| 205 | /* fake device for request_firmware */ | ||
| 206 | struct platform_device *microcode_pdev; | ||
| 207 | |||
| 208 | static ssize_t reload_store(struct sys_device *dev, | ||
| 209 | struct sysdev_attribute *attr, | ||
| 210 | const char *buf, size_t sz) | ||
| 211 | { | ||
| 212 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | ||
| 213 | char *end; | ||
| 214 | unsigned long val = simple_strtoul(buf, &end, 0); | ||
| 215 | int err = 0; | ||
| 216 | int cpu = dev->id; | ||
| 217 | |||
| 218 | if (end == buf) | ||
| 219 | return -EINVAL; | ||
| 220 | if (val == 1) { | ||
| 221 | cpumask_t old = current->cpus_allowed; | ||
| 222 | |||
| 223 | get_online_cpus(); | ||
| 224 | if (cpu_online(cpu)) { | ||
| 225 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 226 | mutex_lock(µcode_mutex); | ||
| 227 | if (uci->valid) { | ||
| 228 | err = microcode_ops->request_microcode_fw(cpu, | ||
| 229 | µcode_pdev->dev); | ||
| 230 | if (!err) | ||
| 231 | microcode_ops->apply_microcode(cpu); | ||
| 232 | } | ||
| 233 | mutex_unlock(µcode_mutex); | ||
| 234 | set_cpus_allowed_ptr(current, &old); | ||
| 235 | } | ||
| 236 | put_online_cpus(); | ||
| 237 | } | ||
| 238 | if (err) | ||
| 239 | return err; | ||
| 240 | return sz; | ||
| 241 | } | ||
| 242 | |||
| 243 | static ssize_t version_show(struct sys_device *dev, | ||
| 244 | struct sysdev_attribute *attr, char *buf) | ||
| 245 | { | ||
| 246 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | ||
| 247 | |||
| 248 | return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); | ||
| 249 | } | ||
| 250 | |||
| 251 | static ssize_t pf_show(struct sys_device *dev, | ||
| 252 | struct sysdev_attribute *attr, char *buf) | ||
| 253 | { | ||
| 254 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | ||
| 255 | |||
| 256 | return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); | ||
| 257 | } | ||
| 258 | |||
| 259 | static SYSDEV_ATTR(reload, 0200, NULL, reload_store); | ||
| 260 | static SYSDEV_ATTR(version, 0400, version_show, NULL); | ||
| 261 | static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); | ||
| 262 | |||
| 263 | static struct attribute *mc_default_attrs[] = { | ||
| 264 | &attr_reload.attr, | ||
| 265 | &attr_version.attr, | ||
| 266 | &attr_processor_flags.attr, | ||
| 267 | NULL | ||
| 268 | }; | ||
| 269 | |||
| 270 | static struct attribute_group mc_attr_group = { | ||
| 271 | .attrs = mc_default_attrs, | ||
| 272 | .name = "microcode", | ||
| 273 | }; | ||
| 274 | |||
| 275 | static void microcode_fini_cpu(int cpu) | ||
| 276 | { | ||
| 277 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 278 | |||
| 279 | mutex_lock(µcode_mutex); | ||
| 280 | microcode_ops->microcode_fini_cpu(cpu); | ||
| 281 | uci->valid = 0; | ||
| 282 | mutex_unlock(µcode_mutex); | ||
| 283 | } | ||
| 284 | |||
| 285 | static void collect_cpu_info(int cpu) | ||
| 286 | { | ||
| 287 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 288 | |||
| 289 | memset(uci, 0, sizeof(*uci)); | ||
| 290 | if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) | ||
| 291 | uci->valid = 1; | ||
| 292 | } | ||
| 293 | |||
| 294 | static int microcode_resume_cpu(int cpu) | ||
| 295 | { | ||
| 296 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 297 | struct cpu_signature nsig; | ||
| 298 | |||
| 299 | pr_debug("microcode: CPU%d resumed\n", cpu); | ||
| 300 | |||
| 301 | if (!uci->mc) | ||
| 302 | return 1; | ||
| 303 | |||
| 304 | /* | ||
| 305 | * Let's verify that the 'cached' ucode does belong | ||
| 306 | * to this cpu (a bit of paranoia): | ||
| 307 | */ | ||
| 308 | if (microcode_ops->collect_cpu_info(cpu, &nsig)) { | ||
| 309 | microcode_fini_cpu(cpu); | ||
| 310 | return -1; | ||
| 311 | } | ||
| 312 | |||
| 313 | if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { | ||
| 314 | microcode_fini_cpu(cpu); | ||
| 315 | /* Should we look for a new ucode here? */ | ||
| 316 | return 1; | ||
| 317 | } | ||
| 318 | |||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 322 | void microcode_update_cpu(int cpu) | ||
| 323 | { | ||
| 324 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 325 | int err = 0; | ||
| 326 | |||
| 327 | /* | ||
| 328 | * Check if the system resume is in progress (uci->valid != NULL), | ||
| 329 | * otherwise just request a firmware: | ||
| 330 | */ | ||
| 331 | if (uci->valid) { | ||
| 332 | err = microcode_resume_cpu(cpu); | ||
| 333 | } else { | ||
| 334 | collect_cpu_info(cpu); | ||
| 335 | if (uci->valid && system_state == SYSTEM_RUNNING) | ||
| 336 | err = microcode_ops->request_microcode_fw(cpu, | ||
| 337 | µcode_pdev->dev); | ||
| 338 | } | ||
| 339 | if (!err) | ||
| 340 | microcode_ops->apply_microcode(cpu); | ||
| 341 | } | ||
| 342 | |||
| 343 | static void microcode_init_cpu(int cpu) | ||
| 344 | { | ||
| 345 | cpumask_t old = current->cpus_allowed; | ||
| 346 | |||
| 347 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
| 348 | /* We should bind the task to the CPU */ | ||
| 349 | BUG_ON(raw_smp_processor_id() != cpu); | ||
| 350 | |||
| 351 | mutex_lock(µcode_mutex); | ||
| 352 | microcode_update_cpu(cpu); | ||
| 353 | mutex_unlock(µcode_mutex); | ||
| 354 | |||
| 355 | set_cpus_allowed_ptr(current, &old); | ||
| 356 | } | ||
| 357 | |||
| 358 | static int mc_sysdev_add(struct sys_device *sys_dev) | ||
| 359 | { | ||
| 360 | int err, cpu = sys_dev->id; | ||
| 361 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 362 | |||
| 363 | if (!cpu_online(cpu)) | ||
| 364 | return 0; | ||
| 365 | |||
| 366 | pr_debug("microcode: CPU%d added\n", cpu); | ||
| 367 | memset(uci, 0, sizeof(*uci)); | ||
| 368 | |||
| 369 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | ||
| 370 | if (err) | ||
| 371 | return err; | ||
| 372 | |||
| 373 | microcode_init_cpu(cpu); | ||
| 374 | return 0; | ||
| 375 | } | ||
| 376 | |||
| 377 | static int mc_sysdev_remove(struct sys_device *sys_dev) | ||
| 378 | { | ||
| 379 | int cpu = sys_dev->id; | ||
| 380 | |||
| 381 | if (!cpu_online(cpu)) | ||
| 382 | return 0; | ||
| 383 | |||
| 384 | pr_debug("microcode: CPU%d removed\n", cpu); | ||
| 385 | microcode_fini_cpu(cpu); | ||
| 386 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | ||
| 387 | return 0; | ||
| 388 | } | ||
| 389 | |||
| 390 | static int mc_sysdev_resume(struct sys_device *dev) | ||
| 391 | { | ||
| 392 | int cpu = dev->id; | ||
| 393 | |||
| 394 | if (!cpu_online(cpu)) | ||
| 395 | return 0; | ||
| 396 | |||
| 397 | /* only CPU 0 will apply ucode here */ | ||
| 398 | microcode_update_cpu(0); | ||
| 399 | return 0; | ||
| 400 | } | ||
| 401 | |||
| 402 | static struct sysdev_driver mc_sysdev_driver = { | ||
| 403 | .add = mc_sysdev_add, | ||
| 404 | .remove = mc_sysdev_remove, | ||
| 405 | .resume = mc_sysdev_resume, | ||
| 406 | }; | ||
| 407 | |||
| 408 | static __cpuinit int | ||
| 409 | mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | ||
| 410 | { | ||
| 411 | unsigned int cpu = (unsigned long)hcpu; | ||
| 412 | struct sys_device *sys_dev; | ||
| 413 | |||
| 414 | sys_dev = get_cpu_sysdev(cpu); | ||
| 415 | switch (action) { | ||
| 416 | case CPU_ONLINE: | ||
| 417 | case CPU_ONLINE_FROZEN: | ||
| 418 | microcode_init_cpu(cpu); | ||
| 419 | case CPU_DOWN_FAILED: | ||
| 420 | case CPU_DOWN_FAILED_FROZEN: | ||
| 421 | pr_debug("microcode: CPU%d added\n", cpu); | ||
| 422 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | ||
| 423 | printk(KERN_ERR "microcode: Failed to create the sysfs " | ||
| 424 | "group for CPU%d\n", cpu); | ||
| 425 | break; | ||
| 426 | case CPU_DOWN_PREPARE: | ||
| 427 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 428 | /* Suspend is in progress, only remove the interface */ | ||
| 429 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | ||
| 430 | pr_debug("microcode: CPU%d removed\n", cpu); | ||
| 431 | break; | ||
| 432 | case CPU_DEAD: | ||
| 433 | case CPU_UP_CANCELED_FROZEN: | ||
| 434 | /* The CPU refused to come up during a system resume */ | ||
| 435 | microcode_fini_cpu(cpu); | ||
| 436 | break; | ||
| 437 | } | ||
| 438 | return NOTIFY_OK; | ||
| 439 | } | ||
| 440 | |||
| 441 | static struct notifier_block __refdata mc_cpu_notifier = { | ||
| 442 | .notifier_call = mc_cpu_callback, | ||
| 443 | }; | ||
| 444 | |||
| 445 | static int __init microcode_init(void) | ||
| 446 | { | ||
| 447 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
| 448 | int error; | ||
| 449 | |||
| 450 | if (c->x86_vendor == X86_VENDOR_INTEL) | ||
| 451 | microcode_ops = init_intel_microcode(); | ||
| 452 | else if (c->x86_vendor == X86_VENDOR_AMD) | ||
| 453 | microcode_ops = init_amd_microcode(); | ||
| 454 | |||
| 455 | if (!microcode_ops) { | ||
| 456 | printk(KERN_ERR "microcode: no support for this CPU vendor\n"); | ||
| 457 | return -ENODEV; | ||
| 458 | } | ||
| 459 | |||
| 460 | error = microcode_dev_init(); | ||
| 461 | if (error) | ||
| 462 | return error; | ||
| 463 | microcode_pdev = platform_device_register_simple("microcode", -1, | ||
| 464 | NULL, 0); | ||
| 465 | if (IS_ERR(microcode_pdev)) { | ||
| 466 | microcode_dev_exit(); | ||
| 467 | return PTR_ERR(microcode_pdev); | ||
| 468 | } | ||
| 469 | |||
| 470 | get_online_cpus(); | ||
| 471 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); | ||
| 472 | put_online_cpus(); | ||
| 473 | if (error) { | ||
| 474 | microcode_dev_exit(); | ||
| 475 | platform_device_unregister(microcode_pdev); | ||
| 476 | return error; | ||
| 477 | } | ||
| 478 | |||
| 479 | register_hotcpu_notifier(&mc_cpu_notifier); | ||
| 480 | |||
| 481 | printk(KERN_INFO | ||
| 482 | "Microcode Update Driver: v" MICROCODE_VERSION | ||
| 483 | " <tigran@aivazian.fsnet.co.uk>" | ||
| 484 | " <peter.oruba@amd.com>\n"); | ||
| 485 | |||
| 486 | return 0; | ||
| 487 | } | ||
| 488 | |||
| 489 | static void __exit microcode_exit(void) | ||
| 490 | { | ||
| 491 | microcode_dev_exit(); | ||
| 492 | |||
| 493 | unregister_hotcpu_notifier(&mc_cpu_notifier); | ||
| 494 | |||
| 495 | get_online_cpus(); | ||
| 496 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | ||
| 497 | put_online_cpus(); | ||
| 498 | |||
| 499 | platform_device_unregister(microcode_pdev); | ||
| 500 | |||
| 501 | microcode_ops = NULL; | ||
| 502 | |||
| 503 | printk(KERN_INFO | ||
| 504 | "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); | ||
| 505 | } | ||
| 506 | |||
| 507 | module_init(microcode_init); | ||
| 508 | module_exit(microcode_exit); | ||
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c new file mode 100644 index 000000000000..622dc4a21784 --- /dev/null +++ b/arch/x86/kernel/microcode_intel.c | |||
| @@ -0,0 +1,480 @@ | |||
| 1 | /* | ||
| 2 | * Intel CPU Microcode Update Driver for Linux | ||
| 3 | * | ||
| 4 | * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | ||
| 5 | * 2006 Shaohua Li <shaohua.li@intel.com> | ||
| 6 | * | ||
| 7 | * This driver allows to upgrade microcode on Intel processors | ||
| 8 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
| 9 | * Pentium III, Xeon, Pentium 4, etc. | ||
| 10 | * | ||
| 11 | * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture | ||
| 12 | * Software Developer's Manual | ||
| 13 | * Order Number 253668 or free download from: | ||
| 14 | * | ||
| 15 | * http://developer.intel.com/design/pentium4/manuals/253668.htm | ||
| 16 | * | ||
| 17 | * For more information, go to http://www.urbanmyth.org/microcode | ||
| 18 | * | ||
| 19 | * This program is free software; you can redistribute it and/or | ||
| 20 | * modify it under the terms of the GNU General Public License | ||
| 21 | * as published by the Free Software Foundation; either version | ||
| 22 | * 2 of the License, or (at your option) any later version. | ||
| 23 | * | ||
| 24 | * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 25 | * Initial release. | ||
| 26 | * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 27 | * Added read() support + cleanups. | ||
| 28 | * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 29 | * Added 'device trimming' support. open(O_WRONLY) zeroes | ||
| 30 | * and frees the saved copy of applied microcode. | ||
| 31 | * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
| 32 | * Made to use devfs (/dev/cpu/microcode) + cleanups. | ||
| 33 | * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
| 34 | * Added misc device support (now uses both devfs and misc). | ||
| 35 | * Added MICROCODE_IOCFREE ioctl to clear memory. | ||
| 36 | * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
| 37 | * Messages for error cases (non Intel & no suitable microcode). | ||
| 38 | * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com> | ||
| 39 | * Removed ->release(). Removed exclusive open and status bitmap. | ||
| 40 | * Added microcode_rwsem to serialize read()/write()/ioctl(). | ||
| 41 | * Removed global kernel lock usage. | ||
| 42 | * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com> | ||
| 43 | * Write 0 to 0x8B msr and then cpuid before reading revision, | ||
| 44 | * so that it works even if there were no update done by the | ||
| 45 | * BIOS. Otherwise, reading from 0x8B gives junk (which happened | ||
| 46 | * to be 0 on my machine which is why it worked even when I | ||
| 47 | * disabled update by the BIOS) | ||
| 48 | * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix. | ||
| 49 | * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and | ||
| 50 | * Tigran Aivazian <tigran@veritas.com> | ||
| 51 | * Intel Pentium 4 processor support and bugfixes. | ||
| 52 | * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com> | ||
| 53 | * Bugfix for HT (Hyper-Threading) enabled processors | ||
| 54 | * whereby processor resources are shared by all logical processors | ||
| 55 | * in a single CPU package. | ||
| 56 | * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and | ||
| 57 | * Tigran Aivazian <tigran@veritas.com>, | ||
| 58 | * Serialize updates as required on HT processors due to | ||
| 59 | * speculative nature of implementation. | ||
| 60 | * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> | ||
| 61 | * Fix the panic when writing zero-length microcode chunk. | ||
| 62 | * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, | ||
| 63 | * Jun Nakajima <jun.nakajima@intel.com> | ||
| 64 | * Support for the microcode updates in the new format. | ||
| 65 | * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> | ||
| 66 | * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl | ||
| 67 | * because we no longer hold a copy of applied microcode | ||
| 68 | * in kernel memory. | ||
| 69 | * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> | ||
| 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | ||
| 71 | * Thanks to Stuart Swales for pointing out this bug. | ||
| 72 | */ | ||
| 73 | #include <linux/capability.h> | ||
| 74 | #include <linux/kernel.h> | ||
| 75 | #include <linux/init.h> | ||
| 76 | #include <linux/sched.h> | ||
| 77 | #include <linux/smp_lock.h> | ||
| 78 | #include <linux/cpumask.h> | ||
| 79 | #include <linux/module.h> | ||
| 80 | #include <linux/slab.h> | ||
| 81 | #include <linux/vmalloc.h> | ||
| 82 | #include <linux/miscdevice.h> | ||
| 83 | #include <linux/spinlock.h> | ||
| 84 | #include <linux/mm.h> | ||
| 85 | #include <linux/fs.h> | ||
| 86 | #include <linux/mutex.h> | ||
| 87 | #include <linux/cpu.h> | ||
| 88 | #include <linux/firmware.h> | ||
| 89 | #include <linux/platform_device.h> | ||
| 90 | |||
| 91 | #include <asm/msr.h> | ||
| 92 | #include <asm/uaccess.h> | ||
| 93 | #include <asm/processor.h> | ||
| 94 | #include <asm/microcode.h> | ||
| 95 | |||
| 96 | MODULE_DESCRIPTION("Microcode Update Driver"); | ||
| 97 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | ||
| 98 | MODULE_LICENSE("GPL"); | ||
| 99 | |||
| 100 | struct microcode_header_intel { | ||
| 101 | unsigned int hdrver; | ||
| 102 | unsigned int rev; | ||
| 103 | unsigned int date; | ||
| 104 | unsigned int sig; | ||
| 105 | unsigned int cksum; | ||
| 106 | unsigned int ldrver; | ||
| 107 | unsigned int pf; | ||
| 108 | unsigned int datasize; | ||
| 109 | unsigned int totalsize; | ||
| 110 | unsigned int reserved[3]; | ||
| 111 | }; | ||
| 112 | |||
| 113 | struct microcode_intel { | ||
| 114 | struct microcode_header_intel hdr; | ||
| 115 | unsigned int bits[0]; | ||
| 116 | }; | ||
| 117 | |||
| 118 | /* microcode format is extended from prescott processors */ | ||
| 119 | struct extended_signature { | ||
| 120 | unsigned int sig; | ||
| 121 | unsigned int pf; | ||
| 122 | unsigned int cksum; | ||
| 123 | }; | ||
| 124 | |||
| 125 | struct extended_sigtable { | ||
| 126 | unsigned int count; | ||
| 127 | unsigned int cksum; | ||
| 128 | unsigned int reserved[3]; | ||
| 129 | struct extended_signature sigs[0]; | ||
| 130 | }; | ||
| 131 | |||
| 132 | #define DEFAULT_UCODE_DATASIZE (2000) | ||
| 133 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) | ||
| 134 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
| 135 | #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) | ||
| 136 | #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) | ||
| 137 | #define DWSIZE (sizeof(u32)) | ||
| 138 | #define get_totalsize(mc) \ | ||
| 139 | (((struct microcode_intel *)mc)->hdr.totalsize ? \ | ||
| 140 | ((struct microcode_intel *)mc)->hdr.totalsize : \ | ||
| 141 | DEFAULT_UCODE_TOTALSIZE) | ||
| 142 | |||
| 143 | #define get_datasize(mc) \ | ||
| 144 | (((struct microcode_intel *)mc)->hdr.datasize ? \ | ||
| 145 | ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) | ||
| 146 | |||
| 147 | #define sigmatch(s1, s2, p1, p2) \ | ||
| 148 | (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) | ||
| 149 | |||
| 150 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | ||
| 151 | |||
| 152 | /* serialize access to the physical write to MSR 0x79 */ | ||
| 153 | static DEFINE_SPINLOCK(microcode_update_lock); | ||
| 154 | |||
| 155 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | ||
| 156 | { | ||
| 157 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | ||
| 158 | unsigned int val[2]; | ||
| 159 | |||
| 160 | memset(csig, 0, sizeof(*csig)); | ||
| 161 | |||
| 162 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | ||
| 163 | cpu_has(c, X86_FEATURE_IA64)) { | ||
| 164 | printk(KERN_ERR "microcode: CPU%d not a capable Intel " | ||
| 165 | "processor\n", cpu_num); | ||
| 166 | return -1; | ||
| 167 | } | ||
| 168 | |||
| 169 | csig->sig = cpuid_eax(0x00000001); | ||
| 170 | |||
| 171 | if ((c->x86_model >= 5) || (c->x86 > 6)) { | ||
| 172 | /* get processor flags from MSR 0x17 */ | ||
| 173 | rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | ||
| 174 | csig->pf = 1 << ((val[1] >> 18) & 7); | ||
| 175 | } | ||
| 176 | |||
| 177 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 178 | /* see notes above for revision 1.07. Apparent chip bug */ | ||
| 179 | sync_core(); | ||
| 180 | /* get the current revision from MSR 0x8B */ | ||
| 181 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); | ||
| 182 | pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", | ||
| 183 | csig->sig, csig->pf, csig->rev); | ||
| 184 | |||
| 185 | return 0; | ||
| 186 | } | ||
| 187 | |||
| 188 | static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) | ||
| 189 | { | ||
| 190 | return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; | ||
| 191 | } | ||
| 192 | |||
| 193 | static inline int | ||
| 194 | update_match_revision(struct microcode_header_intel *mc_header, int rev) | ||
| 195 | { | ||
| 196 | return (mc_header->rev <= rev) ? 0 : 1; | ||
| 197 | } | ||
| 198 | |||
| 199 | static int microcode_sanity_check(void *mc) | ||
| 200 | { | ||
| 201 | struct microcode_header_intel *mc_header = mc; | ||
| 202 | struct extended_sigtable *ext_header = NULL; | ||
| 203 | struct extended_signature *ext_sig; | ||
| 204 | unsigned long total_size, data_size, ext_table_size; | ||
| 205 | int sum, orig_sum, ext_sigcount = 0, i; | ||
| 206 | |||
| 207 | total_size = get_totalsize(mc_header); | ||
| 208 | data_size = get_datasize(mc_header); | ||
| 209 | if (data_size + MC_HEADER_SIZE > total_size) { | ||
| 210 | printk(KERN_ERR "microcode: error! " | ||
| 211 | "Bad data size in microcode data file\n"); | ||
| 212 | return -EINVAL; | ||
| 213 | } | ||
| 214 | |||
| 215 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | ||
| 216 | printk(KERN_ERR "microcode: error! " | ||
| 217 | "Unknown microcode update format\n"); | ||
| 218 | return -EINVAL; | ||
| 219 | } | ||
| 220 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | ||
| 221 | if (ext_table_size) { | ||
| 222 | if ((ext_table_size < EXT_HEADER_SIZE) | ||
| 223 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | ||
| 224 | printk(KERN_ERR "microcode: error! " | ||
| 225 | "Small exttable size in microcode data file\n"); | ||
| 226 | return -EINVAL; | ||
| 227 | } | ||
| 228 | ext_header = mc + MC_HEADER_SIZE + data_size; | ||
| 229 | if (ext_table_size != exttable_size(ext_header)) { | ||
| 230 | printk(KERN_ERR "microcode: error! " | ||
| 231 | "Bad exttable size in microcode data file\n"); | ||
| 232 | return -EFAULT; | ||
| 233 | } | ||
| 234 | ext_sigcount = ext_header->count; | ||
| 235 | } | ||
| 236 | |||
| 237 | /* check extended table checksum */ | ||
| 238 | if (ext_table_size) { | ||
| 239 | int ext_table_sum = 0; | ||
| 240 | int *ext_tablep = (int *)ext_header; | ||
| 241 | |||
| 242 | i = ext_table_size / DWSIZE; | ||
| 243 | while (i--) | ||
| 244 | ext_table_sum += ext_tablep[i]; | ||
| 245 | if (ext_table_sum) { | ||
| 246 | printk(KERN_WARNING "microcode: aborting, " | ||
| 247 | "bad extended signature table checksum\n"); | ||
| 248 | return -EINVAL; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | /* calculate the checksum */ | ||
| 253 | orig_sum = 0; | ||
| 254 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | ||
| 255 | while (i--) | ||
| 256 | orig_sum += ((int *)mc)[i]; | ||
| 257 | if (orig_sum) { | ||
| 258 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | ||
| 259 | return -EINVAL; | ||
| 260 | } | ||
| 261 | if (!ext_table_size) | ||
| 262 | return 0; | ||
| 263 | /* check extended signature checksum */ | ||
| 264 | for (i = 0; i < ext_sigcount; i++) { | ||
| 265 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE + | ||
| 266 | EXT_SIGNATURE_SIZE * i; | ||
| 267 | sum = orig_sum | ||
| 268 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | ||
| 269 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | ||
| 270 | if (sum) { | ||
| 271 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | ||
| 272 | return -EINVAL; | ||
| 273 | } | ||
| 274 | } | ||
| 275 | return 0; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | ||
| 279 | * return 0 - no update found | ||
| 280 | * return 1 - found update | ||
| 281 | */ | ||
| 282 | static int | ||
| 283 | get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) | ||
| 284 | { | ||
| 285 | struct microcode_header_intel *mc_header = mc; | ||
| 286 | struct extended_sigtable *ext_header; | ||
| 287 | unsigned long total_size = get_totalsize(mc_header); | ||
| 288 | int ext_sigcount, i; | ||
| 289 | struct extended_signature *ext_sig; | ||
| 290 | |||
| 291 | if (!update_match_revision(mc_header, rev)) | ||
| 292 | return 0; | ||
| 293 | |||
| 294 | if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) | ||
| 295 | return 1; | ||
| 296 | |||
| 297 | /* Look for ext. headers: */ | ||
| 298 | if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) | ||
| 299 | return 0; | ||
| 300 | |||
| 301 | ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; | ||
| 302 | ext_sigcount = ext_header->count; | ||
| 303 | ext_sig = (void *)ext_header + EXT_HEADER_SIZE; | ||
| 304 | |||
| 305 | for (i = 0; i < ext_sigcount; i++) { | ||
| 306 | if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) | ||
| 307 | return 1; | ||
| 308 | ext_sig++; | ||
| 309 | } | ||
| 310 | return 0; | ||
| 311 | } | ||
| 312 | |||
| 313 | static void apply_microcode(int cpu) | ||
| 314 | { | ||
| 315 | unsigned long flags; | ||
| 316 | unsigned int val[2]; | ||
| 317 | int cpu_num = raw_smp_processor_id(); | ||
| 318 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 319 | struct microcode_intel *mc_intel = uci->mc; | ||
| 320 | |||
| 321 | /* We should bind the task to the CPU */ | ||
| 322 | BUG_ON(cpu_num != cpu); | ||
| 323 | |||
| 324 | if (mc_intel == NULL) | ||
| 325 | return; | ||
| 326 | |||
| 327 | /* serialize access to the physical write to MSR 0x79 */ | ||
| 328 | spin_lock_irqsave(µcode_update_lock, flags); | ||
| 329 | |||
| 330 | /* write microcode via MSR 0x79 */ | ||
| 331 | wrmsr(MSR_IA32_UCODE_WRITE, | ||
| 332 | (unsigned long) mc_intel->bits, | ||
| 333 | (unsigned long) mc_intel->bits >> 16 >> 16); | ||
| 334 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 335 | |||
| 336 | /* see notes above for revision 1.07. Apparent chip bug */ | ||
| 337 | sync_core(); | ||
| 338 | |||
| 339 | /* get the current revision from MSR 0x8B */ | ||
| 340 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
| 341 | |||
| 342 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
| 343 | if (val[1] != mc_intel->hdr.rev) { | ||
| 344 | printk(KERN_ERR "microcode: CPU%d update from revision " | ||
| 345 | "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | printk(KERN_INFO "microcode: CPU%d updated from revision " | ||
| 349 | "0x%x to 0x%x, date = %04x-%02x-%02x \n", | ||
| 350 | cpu_num, uci->cpu_sig.rev, val[1], | ||
| 351 | mc_intel->hdr.date & 0xffff, | ||
| 352 | mc_intel->hdr.date >> 24, | ||
| 353 | (mc_intel->hdr.date >> 16) & 0xff); | ||
| 354 | uci->cpu_sig.rev = val[1]; | ||
| 355 | } | ||
| 356 | |||
| 357 | static int generic_load_microcode(int cpu, void *data, size_t size, | ||
| 358 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
| 359 | { | ||
| 360 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 361 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | ||
| 362 | int new_rev = uci->cpu_sig.rev; | ||
| 363 | unsigned int leftover = size; | ||
| 364 | |||
| 365 | while (leftover) { | ||
| 366 | struct microcode_header_intel mc_header; | ||
| 367 | unsigned int mc_size; | ||
| 368 | |||
| 369 | if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) | ||
| 370 | break; | ||
| 371 | |||
| 372 | mc_size = get_totalsize(&mc_header); | ||
| 373 | if (!mc_size || mc_size > leftover) { | ||
| 374 | printk(KERN_ERR "microcode: error!" | ||
| 375 | "Bad data in microcode data file\n"); | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | |||
| 379 | mc = vmalloc(mc_size); | ||
| 380 | if (!mc) | ||
| 381 | break; | ||
| 382 | |||
| 383 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | ||
| 384 | microcode_sanity_check(mc) < 0) { | ||
| 385 | vfree(mc); | ||
| 386 | break; | ||
| 387 | } | ||
| 388 | |||
| 389 | if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { | ||
| 390 | if (new_mc) | ||
| 391 | vfree(new_mc); | ||
| 392 | new_rev = mc_header.rev; | ||
| 393 | new_mc = mc; | ||
| 394 | } else | ||
| 395 | vfree(mc); | ||
| 396 | |||
| 397 | ucode_ptr += mc_size; | ||
| 398 | leftover -= mc_size; | ||
| 399 | } | ||
| 400 | |||
| 401 | if (new_mc) { | ||
| 402 | if (!leftover) { | ||
| 403 | if (uci->mc) | ||
| 404 | vfree(uci->mc); | ||
| 405 | uci->mc = (struct microcode_intel *)new_mc; | ||
| 406 | pr_debug("microcode: CPU%d found a matching microcode update with" | ||
| 407 | " version 0x%x (current=0x%x)\n", | ||
| 408 | cpu, new_rev, uci->cpu_sig.rev); | ||
| 409 | } else | ||
| 410 | vfree(new_mc); | ||
| 411 | } | ||
| 412 | |||
| 413 | return (int)leftover; | ||
| 414 | } | ||
| 415 | |||
| 416 | static int get_ucode_fw(void *to, const void *from, size_t n) | ||
| 417 | { | ||
| 418 | memcpy(to, from, n); | ||
| 419 | return 0; | ||
| 420 | } | ||
| 421 | |||
| 422 | static int request_microcode_fw(int cpu, struct device *device) | ||
| 423 | { | ||
| 424 | char name[30]; | ||
| 425 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 426 | const struct firmware *firmware; | ||
| 427 | int ret; | ||
| 428 | |||
| 429 | /* We should bind the task to the CPU */ | ||
| 430 | BUG_ON(cpu != raw_smp_processor_id()); | ||
| 431 | sprintf(name, "intel-ucode/%02x-%02x-%02x", | ||
| 432 | c->x86, c->x86_model, c->x86_mask); | ||
| 433 | ret = request_firmware(&firmware, name, device); | ||
| 434 | if (ret) { | ||
| 435 | pr_debug("microcode: data file %s load failed\n", name); | ||
| 436 | return ret; | ||
| 437 | } | ||
| 438 | |||
| 439 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | ||
| 440 | &get_ucode_fw); | ||
| 441 | |||
| 442 | release_firmware(firmware); | ||
| 443 | |||
| 444 | return ret; | ||
| 445 | } | ||
| 446 | |||
| 447 | static int get_ucode_user(void *to, const void *from, size_t n) | ||
| 448 | { | ||
| 449 | return copy_from_user(to, from, n); | ||
| 450 | } | ||
| 451 | |||
| 452 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | ||
| 453 | { | ||
| 454 | /* We should bind the task to the CPU */ | ||
| 455 | BUG_ON(cpu != raw_smp_processor_id()); | ||
| 456 | |||
| 457 | return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); | ||
| 458 | } | ||
| 459 | |||
| 460 | static void microcode_fini_cpu(int cpu) | ||
| 461 | { | ||
| 462 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
| 463 | |||
| 464 | vfree(uci->mc); | ||
| 465 | uci->mc = NULL; | ||
| 466 | } | ||
| 467 | |||
| 468 | struct microcode_ops microcode_intel_ops = { | ||
| 469 | .request_microcode_user = request_microcode_user, | ||
| 470 | .request_microcode_fw = request_microcode_fw, | ||
| 471 | .collect_cpu_info = collect_cpu_info, | ||
| 472 | .apply_microcode = apply_microcode, | ||
| 473 | .microcode_fini_cpu = microcode_fini_cpu, | ||
| 474 | }; | ||
| 475 | |||
| 476 | struct microcode_ops * __init init_intel_microcode(void) | ||
| 477 | { | ||
| 478 | return µcode_intel_ops; | ||
| 479 | } | ||
| 480 | |||
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b366..efc2f361fe85 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
| @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { | |||
| 238 | {} | 238 | {} |
| 239 | }; | 239 | }; |
| 240 | 240 | ||
| 241 | void __init check_enable_amd_mmconf_dmi(void) | 241 | void __cpuinit check_enable_amd_mmconf_dmi(void) |
| 242 | { | 242 | { |
| 243 | dmi_check_system(mmconf_dmi_table); | 243 | dmi_check_system(mmconf_dmi_table); |
| 244 | } | 244 | } |
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index a888e67f5874..6ba87830d4b1 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
| 23 | #include <linux/string.h> | 23 | #include <linux/string.h> |
| 24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
| 25 | #include <linux/mm.h> | ||
| 25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
| 26 | #include <linux/bug.h> | 27 | #include <linux/bug.h> |
| 27 | 28 | ||
| @@ -150,7 +151,8 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
| 150 | const Elf_Shdr *sechdrs, | 151 | const Elf_Shdr *sechdrs, |
| 151 | struct module *me) | 152 | struct module *me) |
| 152 | { | 153 | { |
| 153 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; | 154 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, |
| 155 | *para = NULL; | ||
| 154 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 156 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
| 155 | 157 | ||
| 156 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | 158 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { |
| @@ -160,6 +162,8 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
| 160 | alt = s; | 162 | alt = s; |
| 161 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | 163 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
| 162 | locks= s; | 164 | locks= s; |
| 165 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | ||
| 166 | para = s; | ||
| 163 | } | 167 | } |
| 164 | 168 | ||
| 165 | if (alt) { | 169 | if (alt) { |
| @@ -175,6 +179,11 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
| 175 | tseg, tseg + text->sh_size); | 179 | tseg, tseg + text->sh_size); |
| 176 | } | 180 | } |
| 177 | 181 | ||
| 182 | if (para) { | ||
| 183 | void *pseg = (void *)para->sh_addr; | ||
| 184 | apply_paravirt(pseg, pseg + para->sh_size); | ||
| 185 | } | ||
| 186 | |||
| 178 | return module_bug_finalize(hdr, sechdrs, me); | 187 | return module_bug_finalize(hdr, sechdrs, me); |
| 179 | } | 188 | } |
| 180 | 189 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c6..f98f4e1dba09 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
| 28 | #include <asm/e820.h> | 28 | #include <asm/e820.h> |
| 29 | #include <asm/trampoline.h> | 29 | #include <asm/trampoline.h> |
| 30 | #include <asm/setup.h> | ||
| 30 | 31 | ||
| 31 | #include <mach_apic.h> | 32 | #include <mach_apic.h> |
| 32 | #ifdef CONFIG_X86_32 | 33 | #ifdef CONFIG_X86_32 |
| @@ -48,77 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
| 48 | return sum & 0xFF; | 49 | return sum & 0xFF; |
| 49 | } | 50 | } |
| 50 | 51 | ||
| 51 | #ifdef CONFIG_X86_NUMAQ | 52 | static void __init MP_processor_info(struct mpc_config_processor *m) |
| 52 | int found_numaq; | ||
| 53 | /* | ||
| 54 | * Have to match translation table entries to main table entries by counter | ||
| 55 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
| 56 | * doing this .... | ||
| 57 | */ | ||
| 58 | struct mpc_config_translation { | ||
| 59 | unsigned char mpc_type; | ||
| 60 | unsigned char trans_len; | ||
| 61 | unsigned char trans_type; | ||
| 62 | unsigned char trans_quad; | ||
| 63 | unsigned char trans_global; | ||
| 64 | unsigned char trans_local; | ||
| 65 | unsigned short trans_reserved; | ||
| 66 | }; | ||
| 67 | |||
| 68 | |||
| 69 | static int mpc_record; | ||
| 70 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | ||
| 71 | __cpuinitdata; | ||
| 72 | |||
| 73 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
| 74 | { | ||
| 75 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
| 76 | } | ||
| 77 | |||
| 78 | |||
| 79 | static inline int mpc_apic_id(struct mpc_config_processor *m, | ||
| 80 | struct mpc_config_translation *translation_record) | ||
| 81 | { | ||
| 82 | int quad = translation_record->trans_quad; | ||
| 83 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
| 84 | |||
| 85 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
| 86 | m->mpc_apicid, | ||
| 87 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
| 88 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
| 89 | m->mpc_apicver, quad, logical_apicid); | ||
| 90 | return logical_apicid; | ||
| 91 | } | ||
| 92 | |||
| 93 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
| 94 | |||
| 95 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
| 96 | |||
| 97 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, | ||
| 98 | struct mpc_config_translation *translation) | ||
| 99 | { | ||
| 100 | int quad = translation->trans_quad; | ||
| 101 | int local = translation->trans_local; | ||
| 102 | |||
| 103 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
| 104 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
| 105 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
| 106 | m->mpc_busid, name, quad); | ||
| 107 | } | ||
| 108 | |||
| 109 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
| 110 | static void mpc_oem_pci_bus(struct mpc_config_bus *m, | ||
| 111 | struct mpc_config_translation *translation) | ||
| 112 | { | ||
| 113 | int quad = translation->trans_quad; | ||
| 114 | int local = translation->trans_local; | ||
| 115 | |||
| 116 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
| 117 | } | ||
| 118 | |||
| 119 | #endif | ||
| 120 | |||
| 121 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | ||
| 122 | { | 53 | { |
| 123 | int apicid; | 54 | int apicid; |
| 124 | char *bootup_cpu = ""; | 55 | char *bootup_cpu = ""; |
| @@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
| 127 | disabled_cpus++; | 58 | disabled_cpus++; |
| 128 | return; | 59 | return; |
| 129 | } | 60 | } |
| 130 | #ifdef CONFIG_X86_NUMAQ | 61 | |
| 131 | if (found_numaq) | 62 | if (x86_quirks->mpc_apic_id) |
| 132 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | 63 | apicid = x86_quirks->mpc_apic_id(m); |
| 133 | else | 64 | else |
| 134 | apicid = m->mpc_apicid; | 65 | apicid = m->mpc_apicid; |
| 135 | #else | 66 | |
| 136 | apicid = m->mpc_apicid; | ||
| 137 | #endif | ||
| 138 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | 67 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
| 139 | bootup_cpu = " (Bootup-CPU)"; | 68 | bootup_cpu = " (Bootup-CPU)"; |
| 140 | boot_cpu_physical_apicid = m->mpc_apicid; | 69 | boot_cpu_physical_apicid = m->mpc_apicid; |
| @@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
| 151 | memcpy(str, m->mpc_bustype, 6); | 80 | memcpy(str, m->mpc_bustype, 6); |
| 152 | str[6] = 0; | 81 | str[6] = 0; |
| 153 | 82 | ||
| 154 | #ifdef CONFIG_X86_NUMAQ | 83 | if (x86_quirks->mpc_oem_bus_info) |
| 155 | if (found_numaq) | 84 | x86_quirks->mpc_oem_bus_info(m, str); |
| 156 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | 85 | else |
| 157 | #else | 86 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); |
| 158 | printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); | ||
| 159 | #endif | ||
| 160 | 87 | ||
| 161 | #if MAX_MP_BUSSES < 256 | 88 | #if MAX_MP_BUSSES < 256 |
| 162 | if (m->mpc_busid >= MAX_MP_BUSSES) { | 89 | if (m->mpc_busid >= MAX_MP_BUSSES) { |
| @@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
| 173 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | 100 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; |
| 174 | #endif | 101 | #endif |
| 175 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
| 176 | #ifdef CONFIG_X86_NUMAQ | 103 | if (x86_quirks->mpc_oem_pci_bus) |
| 177 | if (found_numaq) | 104 | x86_quirks->mpc_oem_pci_bus(m); |
| 178 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | 105 | |
| 179 | #endif | ||
| 180 | clear_bit(m->mpc_busid, mp_bus_not_pci); | 106 | clear_bit(m->mpc_busid, mp_bus_not_pci); |
| 181 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 107 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) |
| 182 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 108 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; |
| @@ -228,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) | |||
| 228 | 154 | ||
| 229 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | 155 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) |
| 230 | { | 156 | { |
| 231 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | 157 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
| 232 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 158 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
| 233 | m->mpc_irqtype, m->mpc_irqflag & 3, | 159 | m->mpc_irqtype, m->mpc_irqflag & 3, |
| 234 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 160 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
| @@ -237,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | |||
| 237 | 163 | ||
| 238 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | 164 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) |
| 239 | { | 165 | { |
| 240 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | 166 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
| 241 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 167 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
| 242 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | 168 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, |
| 243 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | 169 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, |
| @@ -309,90 +235,13 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |||
| 309 | 235 | ||
| 310 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | 236 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) |
| 311 | { | 237 | { |
| 312 | printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," | 238 | apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," |
| 313 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | 239 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", |
| 314 | m->mpc_irqtype, m->mpc_irqflag & 3, | 240 | m->mpc_irqtype, m->mpc_irqflag & 3, |
| 315 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | 241 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, |
| 316 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | 242 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); |
| 317 | } | 243 | } |
| 318 | 244 | ||
| 319 | #ifdef CONFIG_X86_NUMAQ | ||
| 320 | static void __init MP_translation_info(struct mpc_config_translation *m) | ||
| 321 | { | ||
| 322 | printk(KERN_INFO | ||
| 323 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
| 324 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
| 325 | m->trans_local); | ||
| 326 | |||
| 327 | if (mpc_record >= MAX_MPC_ENTRY) | ||
| 328 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
| 329 | else | ||
| 330 | translation_table[mpc_record] = m; /* stash this for later */ | ||
| 331 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
| 332 | node_set_online(m->trans_quad); | ||
| 333 | } | ||
| 334 | |||
| 335 | /* | ||
| 336 | * Read/parse the MPC oem tables | ||
| 337 | */ | ||
| 338 | |||
| 339 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | ||
| 340 | unsigned short oemsize) | ||
| 341 | { | ||
| 342 | int count = sizeof(*oemtable); /* the header size */ | ||
| 343 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
| 344 | |||
| 345 | mpc_record = 0; | ||
| 346 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | ||
| 347 | oemtable); | ||
| 348 | if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | ||
| 349 | printk(KERN_WARNING | ||
| 350 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
| 351 | oemtable->oem_signature[0], oemtable->oem_signature[1], | ||
| 352 | oemtable->oem_signature[2], oemtable->oem_signature[3]); | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | ||
| 356 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
| 357 | return; | ||
| 358 | } | ||
| 359 | while (count < oemtable->oem_length) { | ||
| 360 | switch (*oemptr) { | ||
| 361 | case MP_TRANSLATION: | ||
| 362 | { | ||
| 363 | struct mpc_config_translation *m = | ||
| 364 | (struct mpc_config_translation *)oemptr; | ||
| 365 | MP_translation_info(m); | ||
| 366 | oemptr += sizeof(*m); | ||
| 367 | count += sizeof(*m); | ||
| 368 | ++mpc_record; | ||
| 369 | break; | ||
| 370 | } | ||
| 371 | default: | ||
| 372 | { | ||
| 373 | printk(KERN_WARNING | ||
| 374 | "Unrecognised OEM table entry type! - %d\n", | ||
| 375 | (int)*oemptr); | ||
| 376 | return; | ||
| 377 | } | ||
| 378 | } | ||
| 379 | } | ||
| 380 | } | ||
| 381 | |||
| 382 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
| 383 | char *productid) | ||
| 384 | { | ||
| 385 | if (strncmp(oem, "IBM NUMA", 8)) | ||
| 386 | printk("Warning! Not a NUMA-Q system!\n"); | ||
| 387 | else | ||
| 388 | found_numaq = 1; | ||
| 389 | |||
| 390 | if (mpc->mpc_oemptr) | ||
| 391 | smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, | ||
| 392 | mpc->mpc_oemsize); | ||
| 393 | } | ||
| 394 | #endif /* CONFIG_X86_NUMAQ */ | ||
| 395 | |||
| 396 | /* | 245 | /* |
| 397 | * Read/parse the MPC | 246 | * Read/parse the MPC |
| 398 | */ | 247 | */ |
| @@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
| 457 | } else | 306 | } else |
| 458 | mps_oem_check(mpc, oem, str); | 307 | mps_oem_check(mpc, oem, str); |
| 459 | #endif | 308 | #endif |
| 460 | |||
| 461 | /* save the local APIC address, it might be non-default */ | 309 | /* save the local APIC address, it might be non-default */ |
| 462 | if (!acpi_lapic) | 310 | if (!acpi_lapic) |
| 463 | mp_lapic_addr = mpc->mpc_lapic; | 311 | mp_lapic_addr = mpc->mpc_lapic; |
| @@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
| 465 | if (early) | 313 | if (early) |
| 466 | return 1; | 314 | return 1; |
| 467 | 315 | ||
| 316 | if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { | ||
| 317 | struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; | ||
| 318 | x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); | ||
| 319 | } | ||
| 320 | |||
| 468 | /* | 321 | /* |
| 469 | * Now process the configuration blocks. | 322 | * Now process the configuration blocks. |
| 470 | */ | 323 | */ |
| 471 | #ifdef CONFIG_X86_NUMAQ | 324 | if (x86_quirks->mpc_record) |
| 472 | mpc_record = 0; | 325 | *x86_quirks->mpc_record = 0; |
| 473 | #endif | 326 | |
| 474 | while (count < mpc->mpc_length) { | 327 | while (count < mpc->mpc_length) { |
| 475 | switch (*mpt) { | 328 | switch (*mpt) { |
| 476 | case MP_PROCESSOR: | 329 | case MP_PROCESSOR: |
| @@ -536,16 +389,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
| 536 | count = mpc->mpc_length; | 389 | count = mpc->mpc_length; |
| 537 | break; | 390 | break; |
| 538 | } | 391 | } |
| 539 | #ifdef CONFIG_X86_NUMAQ | 392 | if (x86_quirks->mpc_record) |
| 540 | ++mpc_record; | 393 | (*x86_quirks->mpc_record)++; |
| 541 | #endif | ||
| 542 | } | 394 | } |
| 543 | 395 | ||
| 544 | #ifdef CONFIG_X86_GENERICARCH | 396 | #ifdef CONFIG_X86_GENERICARCH |
| 545 | generic_bigsmp_probe(); | 397 | generic_bigsmp_probe(); |
| 546 | #endif | 398 | #endif |
| 547 | 399 | ||
| 400 | #ifdef CONFIG_X86_32 | ||
| 548 | setup_apic_routing(); | 401 | setup_apic_routing(); |
| 402 | #endif | ||
| 549 | if (!num_processors) | 403 | if (!num_processors) |
| 550 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 404 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
| 551 | return num_processors; | 405 | return num_processors; |
| @@ -632,7 +486,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
| 632 | } | 486 | } |
| 633 | 487 | ||
| 634 | 488 | ||
| 635 | static void construct_ioapic_table(int mpc_default_type) | 489 | static void __init construct_ioapic_table(int mpc_default_type) |
| 636 | { | 490 | { |
| 637 | struct mpc_config_ioapic ioapic; | 491 | struct mpc_config_ioapic ioapic; |
| 638 | struct mpc_config_bus bus; | 492 | struct mpc_config_bus bus; |
| @@ -677,7 +531,7 @@ static void construct_ioapic_table(int mpc_default_type) | |||
| 677 | construct_default_ioirq_mptable(mpc_default_type); | 531 | construct_default_ioirq_mptable(mpc_default_type); |
| 678 | } | 532 | } |
| 679 | #else | 533 | #else |
| 680 | static inline void construct_ioapic_table(int mpc_default_type) { } | 534 | static inline void __init construct_ioapic_table(int mpc_default_type) { } |
| 681 | #endif | 535 | #endif |
| 682 | 536 | ||
| 683 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | 537 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) |
| @@ -726,20 +580,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
| 726 | static struct intel_mp_floating *mpf_found; | 580 | static struct intel_mp_floating *mpf_found; |
| 727 | 581 | ||
| 728 | /* | 582 | /* |
| 729 | * Machine specific quirk for finding the SMP config before other setup | ||
| 730 | * activities destroy the table: | ||
| 731 | */ | ||
| 732 | int (*mach_get_smp_config_quirk)(unsigned int early); | ||
| 733 | |||
| 734 | /* | ||
| 735 | * Scan the memory blocks for an SMP configuration block. | 583 | * Scan the memory blocks for an SMP configuration block. |
| 736 | */ | 584 | */ |
| 737 | static void __init __get_smp_config(unsigned int early) | 585 | static void __init __get_smp_config(unsigned int early) |
| 738 | { | 586 | { |
| 739 | struct intel_mp_floating *mpf = mpf_found; | 587 | struct intel_mp_floating *mpf = mpf_found; |
| 740 | 588 | ||
| 741 | if (mach_get_smp_config_quirk) { | 589 | if (x86_quirks->mach_get_smp_config) { |
| 742 | if (mach_get_smp_config_quirk(early)) | 590 | if (x86_quirks->mach_get_smp_config(early)) |
| 743 | return; | 591 | return; |
| 744 | } | 592 | } |
| 745 | if (acpi_lapic && early) | 593 | if (acpi_lapic && early) |
| @@ -849,7 +697,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
| 849 | unsigned int *bp = phys_to_virt(base); | 697 | unsigned int *bp = phys_to_virt(base); |
| 850 | struct intel_mp_floating *mpf; | 698 | struct intel_mp_floating *mpf; |
| 851 | 699 | ||
| 852 | printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); | 700 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
| 701 | bp, length); | ||
| 853 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 702 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
| 854 | 703 | ||
| 855 | while (length > 0) { | 704 | while (length > 0) { |
| @@ -899,14 +748,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
| 899 | return 0; | 748 | return 0; |
| 900 | } | 749 | } |
| 901 | 750 | ||
| 902 | int (*mach_find_smp_config_quirk)(unsigned int reserve); | ||
| 903 | |||
| 904 | static void __init __find_smp_config(unsigned int reserve) | 751 | static void __init __find_smp_config(unsigned int reserve) |
| 905 | { | 752 | { |
| 906 | unsigned int address; | 753 | unsigned int address; |
| 907 | 754 | ||
| 908 | if (mach_find_smp_config_quirk) { | 755 | if (x86_quirks->mach_find_smp_config) { |
| 909 | if (mach_find_smp_config_quirk(reserve)) | 756 | if (x86_quirks->mach_find_smp_config(reserve)) |
| 910 | return; | 757 | return; |
| 911 | } | 758 | } |
| 912 | /* | 759 | /* |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index a153b3905f60..82a7c7ed6d45 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
| @@ -72,21 +72,28 @@ static ssize_t msr_read(struct file *file, char __user *buf, | |||
| 72 | u32 data[2]; | 72 | u32 data[2]; |
| 73 | u32 reg = *ppos; | 73 | u32 reg = *ppos; |
| 74 | int cpu = iminor(file->f_path.dentry->d_inode); | 74 | int cpu = iminor(file->f_path.dentry->d_inode); |
| 75 | int err; | 75 | int err = 0; |
| 76 | ssize_t bytes = 0; | ||
| 76 | 77 | ||
| 77 | if (count % 8) | 78 | if (count % 8) |
| 78 | return -EINVAL; /* Invalid chunk size */ | 79 | return -EINVAL; /* Invalid chunk size */ |
| 79 | 80 | ||
| 80 | for (; count; count -= 8) { | 81 | for (; count; count -= 8) { |
| 81 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); | 82 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); |
| 82 | if (err) | 83 | if (err) { |
| 83 | return -EIO; | 84 | if (err == -EFAULT) /* Fix idiotic error code */ |
| 84 | if (copy_to_user(tmp, &data, 8)) | 85 | err = -EIO; |
| 85 | return -EFAULT; | 86 | break; |
| 87 | } | ||
| 88 | if (copy_to_user(tmp, &data, 8)) { | ||
| 89 | err = -EFAULT; | ||
| 90 | break; | ||
| 91 | } | ||
| 86 | tmp += 2; | 92 | tmp += 2; |
| 93 | bytes += 8; | ||
| 87 | } | 94 | } |
| 88 | 95 | ||
| 89 | return ((char __user *)tmp) - buf; | 96 | return bytes ? bytes : err; |
| 90 | } | 97 | } |
| 91 | 98 | ||
| 92 | static ssize_t msr_write(struct file *file, const char __user *buf, | 99 | static ssize_t msr_write(struct file *file, const char __user *buf, |
| @@ -96,21 +103,28 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
| 96 | u32 data[2]; | 103 | u32 data[2]; |
| 97 | u32 reg = *ppos; | 104 | u32 reg = *ppos; |
| 98 | int cpu = iminor(file->f_path.dentry->d_inode); | 105 | int cpu = iminor(file->f_path.dentry->d_inode); |
| 99 | int err; | 106 | int err = 0; |
| 107 | ssize_t bytes = 0; | ||
| 100 | 108 | ||
| 101 | if (count % 8) | 109 | if (count % 8) |
| 102 | return -EINVAL; /* Invalid chunk size */ | 110 | return -EINVAL; /* Invalid chunk size */ |
| 103 | 111 | ||
| 104 | for (; count; count -= 8) { | 112 | for (; count; count -= 8) { |
| 105 | if (copy_from_user(&data, tmp, 8)) | 113 | if (copy_from_user(&data, tmp, 8)) { |
| 106 | return -EFAULT; | 114 | err = -EFAULT; |
| 115 | break; | ||
| 116 | } | ||
| 107 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); | 117 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); |
| 108 | if (err) | 118 | if (err) { |
| 109 | return -EIO; | 119 | if (err == -EFAULT) /* Fix idiotic error code */ |
| 120 | err = -EIO; | ||
| 121 | break; | ||
| 122 | } | ||
| 110 | tmp += 2; | 123 | tmp += 2; |
| 124 | bytes += 8; | ||
| 111 | } | 125 | } |
| 112 | 126 | ||
| 113 | return ((char __user *)tmp) - buf; | 127 | return bytes ? bytes : err; |
| 114 | } | 128 | } |
| 115 | 129 | ||
| 116 | static int msr_open(struct inode *inode, struct file *file) | 130 | static int msr_open(struct inode *inode, struct file *file) |
| @@ -131,7 +145,7 @@ static int msr_open(struct inode *inode, struct file *file) | |||
| 131 | ret = -EIO; /* MSR not supported */ | 145 | ret = -EIO; /* MSR not supported */ |
| 132 | out: | 146 | out: |
| 133 | unlock_kernel(); | 147 | unlock_kernel(); |
| 134 | return 0; | 148 | return ret; |
| 135 | } | 149 | } |
| 136 | 150 | ||
| 137 | /* | 151 | /* |
| @@ -149,7 +163,7 @@ static int __cpuinit msr_device_create(int cpu) | |||
| 149 | { | 163 | { |
| 150 | struct device *dev; | 164 | struct device *dev; |
| 151 | 165 | ||
| 152 | dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), | 166 | dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL, |
| 153 | "msr%d", cpu); | 167 | "msr%d", cpu); |
| 154 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; | 168 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; |
| 155 | } | 169 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ec024b3baad0..2c97f07f1c2c 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
| @@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data) | |||
| 114 | } | 114 | } |
| 115 | #endif | 115 | #endif |
| 116 | 116 | ||
| 117 | static void report_broken_nmi(int cpu, int *prev_nmi_count) | ||
| 118 | { | ||
| 119 | printk(KERN_CONT "\n"); | ||
| 120 | |||
| 121 | printk(KERN_WARNING | ||
| 122 | "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
| 123 | cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); | ||
| 124 | |||
| 125 | printk(KERN_WARNING | ||
| 126 | "Please report this to bugzilla.kernel.org,\n"); | ||
| 127 | printk(KERN_WARNING | ||
| 128 | "and attach the output of the 'dmesg' command.\n"); | ||
| 129 | |||
| 130 | per_cpu(wd_enabled, cpu) = 0; | ||
| 131 | atomic_dec(&nmi_active); | ||
| 132 | } | ||
| 133 | |||
| 117 | int __init check_nmi_watchdog(void) | 134 | int __init check_nmi_watchdog(void) |
| 118 | { | 135 | { |
| 119 | unsigned int *prev_nmi_count; | 136 | unsigned int *prev_nmi_count; |
| @@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void) | |||
| 141 | for_each_online_cpu(cpu) { | 158 | for_each_online_cpu(cpu) { |
| 142 | if (!per_cpu(wd_enabled, cpu)) | 159 | if (!per_cpu(wd_enabled, cpu)) |
| 143 | continue; | 160 | continue; |
| 144 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 161 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) |
| 145 | printk(KERN_WARNING "WARNING: CPU#%d: NMI " | 162 | report_broken_nmi(cpu, prev_nmi_count); |
| 146 | "appears to be stuck (%d->%d)!\n", | ||
| 147 | cpu, | ||
| 148 | prev_nmi_count[cpu], | ||
| 149 | get_nmi_count(cpu)); | ||
| 150 | per_cpu(wd_enabled, cpu) = 0; | ||
| 151 | atomic_dec(&nmi_active); | ||
| 152 | } | ||
| 153 | } | 163 | } |
| 154 | endflag = 1; | 164 | endflag = 1; |
| 155 | if (!atomic_read(&nmi_active)) { | 165 | if (!atomic_read(&nmi_active)) { |
| @@ -263,7 +273,7 @@ late_initcall(init_lapic_nmi_sysfs); | |||
| 263 | 273 | ||
| 264 | static void __acpi_nmi_enable(void *__unused) | 274 | static void __acpi_nmi_enable(void *__unused) |
| 265 | { | 275 | { |
| 266 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | 276 | apic_write(APIC_LVT0, APIC_DM_NMI); |
| 267 | } | 277 | } |
| 268 | 278 | ||
| 269 | /* | 279 | /* |
| @@ -277,7 +287,7 @@ void acpi_nmi_enable(void) | |||
| 277 | 287 | ||
| 278 | static void __acpi_nmi_disable(void *__unused) | 288 | static void __acpi_nmi_disable(void *__unused) |
| 279 | { | 289 | { |
| 280 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | 290 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); |
| 281 | } | 291 | } |
| 282 | 292 | ||
| 283 | /* | 293 | /* |
| @@ -289,6 +299,15 @@ void acpi_nmi_disable(void) | |||
| 289 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | 299 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
| 290 | } | 300 | } |
| 291 | 301 | ||
| 302 | /* | ||
| 303 | * This function is called as soon the LAPIC NMI watchdog driver has everything | ||
| 304 | * in place and it's ready to check if the NMIs belong to the NMI watchdog | ||
| 305 | */ | ||
| 306 | void cpu_nmi_set_wd_enabled(void) | ||
| 307 | { | ||
| 308 | __get_cpu_var(wd_enabled) = 1; | ||
| 309 | } | ||
| 310 | |||
| 292 | void setup_apic_nmi_watchdog(void *unused) | 311 | void setup_apic_nmi_watchdog(void *unused) |
| 293 | { | 312 | { |
| 294 | if (__get_cpu_var(wd_enabled)) | 313 | if (__get_cpu_var(wd_enabled)) |
| @@ -301,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused) | |||
| 301 | 320 | ||
| 302 | switch (nmi_watchdog) { | 321 | switch (nmi_watchdog) { |
| 303 | case NMI_LOCAL_APIC: | 322 | case NMI_LOCAL_APIC: |
| 304 | /* enable it before to avoid race with handler */ | ||
| 305 | __get_cpu_var(wd_enabled) = 1; | ||
| 306 | if (lapic_watchdog_init(nmi_hz) < 0) { | 323 | if (lapic_watchdog_init(nmi_hz) < 0) { |
| 307 | __get_cpu_var(wd_enabled) = 0; | 324 | __get_cpu_var(wd_enabled) = 0; |
| 308 | return; | 325 | return; |
| @@ -448,6 +465,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
| 448 | 465 | ||
| 449 | #ifdef CONFIG_SYSCTL | 466 | #ifdef CONFIG_SYSCTL |
| 450 | 467 | ||
| 468 | static int __init setup_unknown_nmi_panic(char *str) | ||
| 469 | { | ||
| 470 | unknown_nmi_panic = 1; | ||
| 471 | return 1; | ||
| 472 | } | ||
| 473 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
| 474 | |||
| 451 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | 475 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) |
| 452 | { | 476 | { |
| 453 | unsigned char reason = get_nmi_reason(); | 477 | unsigned char reason = get_nmi_reason(); |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index a23e8233b9ac..4caff39078e0 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
| 34 | #include <asm/mpspec.h> | 34 | #include <asm/mpspec.h> |
| 35 | #include <asm/e820.h> | 35 | #include <asm/e820.h> |
| 36 | #include <asm/setup.h> | ||
| 36 | 37 | ||
| 37 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | 38 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) |
| 38 | 39 | ||
| @@ -71,6 +72,195 @@ static void __init smp_dump_qct(void) | |||
| 71 | } | 72 | } |
| 72 | } | 73 | } |
| 73 | 74 | ||
| 75 | |||
| 76 | void __cpuinit numaq_tsc_disable(void) | ||
| 77 | { | ||
| 78 | if (!found_numaq) | ||
| 79 | return; | ||
| 80 | |||
| 81 | if (num_online_nodes() > 1) { | ||
| 82 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
| 83 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | static int __init numaq_pre_time_init(void) | ||
| 88 | { | ||
| 89 | numaq_tsc_disable(); | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | int found_numaq; | ||
| 94 | /* | ||
| 95 | * Have to match translation table entries to main table entries by counter | ||
| 96 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
| 97 | * doing this .... | ||
| 98 | */ | ||
| 99 | struct mpc_config_translation { | ||
| 100 | unsigned char mpc_type; | ||
| 101 | unsigned char trans_len; | ||
| 102 | unsigned char trans_type; | ||
| 103 | unsigned char trans_quad; | ||
| 104 | unsigned char trans_global; | ||
| 105 | unsigned char trans_local; | ||
| 106 | unsigned short trans_reserved; | ||
| 107 | }; | ||
| 108 | |||
| 109 | /* x86_quirks member */ | ||
| 110 | static int mpc_record; | ||
| 111 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | ||
| 112 | __cpuinitdata; | ||
| 113 | |||
| 114 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
| 115 | { | ||
| 116 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
| 117 | } | ||
| 118 | |||
| 119 | /* x86_quirks member */ | ||
| 120 | static int mpc_apic_id(struct mpc_config_processor *m) | ||
| 121 | { | ||
| 122 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 123 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
| 124 | |||
| 125 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
| 126 | m->mpc_apicid, | ||
| 127 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
| 128 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
| 129 | m->mpc_apicver, quad, logical_apicid); | ||
| 130 | return logical_apicid; | ||
| 131 | } | ||
| 132 | |||
| 133 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
| 134 | |||
| 135 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
| 136 | |||
| 137 | /* x86_quirks member */ | ||
| 138 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name) | ||
| 139 | { | ||
| 140 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 141 | int local = translation_table[mpc_record]->trans_local; | ||
| 142 | |||
| 143 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
| 144 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
| 145 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
| 146 | m->mpc_busid, name, quad); | ||
| 147 | } | ||
| 148 | |||
| 149 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
| 150 | |||
| 151 | /* x86_quirks member */ | ||
| 152 | static void mpc_oem_pci_bus(struct mpc_config_bus *m) | ||
| 153 | { | ||
| 154 | int quad = translation_table[mpc_record]->trans_quad; | ||
| 155 | int local = translation_table[mpc_record]->trans_local; | ||
| 156 | |||
| 157 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
| 158 | } | ||
| 159 | |||
| 160 | static void __init MP_translation_info(struct mpc_config_translation *m) | ||
| 161 | { | ||
| 162 | printk(KERN_INFO | ||
| 163 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
| 164 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
| 165 | m->trans_local); | ||
| 166 | |||
| 167 | if (mpc_record >= MAX_MPC_ENTRY) | ||
| 168 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
| 169 | else | ||
| 170 | translation_table[mpc_record] = m; /* stash this for later */ | ||
| 171 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
| 172 | node_set_online(m->trans_quad); | ||
| 173 | } | ||
| 174 | |||
| 175 | static int __init mpf_checksum(unsigned char *mp, int len) | ||
| 176 | { | ||
| 177 | int sum = 0; | ||
| 178 | |||
| 179 | while (len--) | ||
| 180 | sum += *mp++; | ||
| 181 | |||
| 182 | return sum & 0xFF; | ||
| 183 | } | ||
| 184 | |||
| 185 | /* | ||
| 186 | * Read/parse the MPC oem tables | ||
| 187 | */ | ||
| 188 | |||
| 189 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | ||
| 190 | unsigned short oemsize) | ||
| 191 | { | ||
| 192 | int count = sizeof(*oemtable); /* the header size */ | ||
| 193 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
| 194 | |||
| 195 | mpc_record = 0; | ||
| 196 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | ||
| 197 | oemtable); | ||
| 198 | if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | ||
| 199 | printk(KERN_WARNING | ||
| 200 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
| 201 | oemtable->oem_signature[0], oemtable->oem_signature[1], | ||
| 202 | oemtable->oem_signature[2], oemtable->oem_signature[3]); | ||
| 203 | return; | ||
| 204 | } | ||
| 205 | if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | ||
| 206 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
| 207 | return; | ||
| 208 | } | ||
| 209 | while (count < oemtable->oem_length) { | ||
| 210 | switch (*oemptr) { | ||
| 211 | case MP_TRANSLATION: | ||
| 212 | { | ||
| 213 | struct mpc_config_translation *m = | ||
| 214 | (struct mpc_config_translation *)oemptr; | ||
| 215 | MP_translation_info(m); | ||
| 216 | oemptr += sizeof(*m); | ||
| 217 | count += sizeof(*m); | ||
| 218 | ++mpc_record; | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | default: | ||
| 222 | { | ||
| 223 | printk(KERN_WARNING | ||
| 224 | "Unrecognised OEM table entry type! - %d\n", | ||
| 225 | (int)*oemptr); | ||
| 226 | return; | ||
| 227 | } | ||
| 228 | } | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | static int __init numaq_setup_ioapic_ids(void) | ||
| 233 | { | ||
| 234 | /* so can skip it */ | ||
| 235 | return 1; | ||
| 236 | } | ||
| 237 | |||
| 238 | static struct x86_quirks numaq_x86_quirks __initdata = { | ||
| 239 | .arch_pre_time_init = numaq_pre_time_init, | ||
| 240 | .arch_time_init = NULL, | ||
| 241 | .arch_pre_intr_init = NULL, | ||
| 242 | .arch_memory_setup = NULL, | ||
| 243 | .arch_intr_init = NULL, | ||
| 244 | .arch_trap_init = NULL, | ||
| 245 | .mach_get_smp_config = NULL, | ||
| 246 | .mach_find_smp_config = NULL, | ||
| 247 | .mpc_record = &mpc_record, | ||
| 248 | .mpc_apic_id = mpc_apic_id, | ||
| 249 | .mpc_oem_bus_info = mpc_oem_bus_info, | ||
| 250 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | ||
| 251 | .smp_read_mpc_oem = smp_read_mpc_oem, | ||
| 252 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | ||
| 253 | }; | ||
| 254 | |||
| 255 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
| 256 | char *productid) | ||
| 257 | { | ||
| 258 | if (strncmp(oem, "IBM NUMA", 8)) | ||
| 259 | printk("Warning! Not a NUMA-Q system!\n"); | ||
| 260 | else | ||
| 261 | found_numaq = 1; | ||
| 262 | } | ||
| 263 | |||
| 74 | static __init void early_check_numaq(void) | 264 | static __init void early_check_numaq(void) |
| 75 | { | 265 | { |
| 76 | /* | 266 | /* |
| @@ -82,6 +272,9 @@ static __init void early_check_numaq(void) | |||
| 82 | */ | 272 | */ |
| 83 | if (smp_found_config) | 273 | if (smp_found_config) |
| 84 | early_get_smp_config(); | 274 | early_get_smp_config(); |
| 275 | |||
| 276 | if (found_numaq) | ||
| 277 | x86_quirks = &numaq_x86_quirks; | ||
| 85 | } | 278 | } |
| 86 | 279 | ||
| 87 | int __init get_memcfg_numaq(void) | 280 | int __init get_memcfg_numaq(void) |
| @@ -92,14 +285,3 @@ int __init get_memcfg_numaq(void) | |||
| 92 | smp_dump_qct(); | 285 | smp_dump_qct(); |
| 93 | return 1; | 286 | return 1; |
| 94 | } | 287 | } |
| 95 | |||
| 96 | void __init numaq_tsc_disable(void) | ||
| 97 | { | ||
| 98 | if (!found_numaq) | ||
| 99 | return; | ||
| 100 | |||
| 101 | if (num_online_nodes() > 1) { | ||
| 102 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
| 103 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
| 104 | } | ||
| 105 | } | ||
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 3e6672274807..7a13fac63a1f 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
| @@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd); | |||
| 190 | static void __init platform_detect(void) | 190 | static void __init platform_detect(void) |
| 191 | { | 191 | { |
| 192 | size_t propsize; | 192 | size_t propsize; |
| 193 | u32 rev; | 193 | __be32 rev; |
| 194 | 194 | ||
| 195 | if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, | 195 | if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, |
| 196 | &propsize) || propsize != 4) { | 196 | &propsize) || propsize != 4) { |
| 197 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 197 | printk(KERN_ERR "ofw: getprop call failed!\n"); |
| 198 | rev = 0; | 198 | rev = cpu_to_be32(0); |
| 199 | } | 199 | } |
| 200 | olpc_platform_info.boardrev = be32_to_cpu(rev); | 200 | olpc_platform_info.boardrev = be32_to_cpu(rev); |
| 201 | } | 201 | } |
| @@ -203,7 +203,7 @@ static void __init platform_detect(void) | |||
| 203 | static void __init platform_detect(void) | 203 | static void __init platform_detect(void) |
| 204 | { | 204 | { |
| 205 | /* stopgap until OFW support is added to the kernel */ | 205 | /* stopgap until OFW support is added to the kernel */ |
| 206 | olpc_platform_info.boardrev = be32_to_cpu(0xc2); | 206 | olpc_platform_info.boardrev = 0xc2; |
| 207 | } | 207 | } |
| 208 | #endif | 208 | #endif |
| 209 | 209 | ||
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000000..0e9f1982b1dd --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | /* | ||
| 2 | * Split spinlock implementation out into its own file, so it can be | ||
| 3 | * compiled in a FTRACE-compatible way. | ||
| 4 | */ | ||
| 5 | #include <linux/spinlock.h> | ||
| 6 | #include <linux/module.h> | ||
| 7 | |||
| 8 | #include <asm/paravirt.h> | ||
| 9 | |||
| 10 | static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) | ||
| 11 | { | ||
| 12 | __raw_spin_lock(lock); | ||
| 13 | } | ||
| 14 | |||
| 15 | struct pv_lock_ops pv_lock_ops = { | ||
| 16 | #ifdef CONFIG_SMP | ||
| 17 | .spin_is_locked = __ticket_spin_is_locked, | ||
| 18 | .spin_is_contended = __ticket_spin_is_contended, | ||
| 19 | |||
| 20 | .spin_lock = __ticket_spin_lock, | ||
| 21 | .spin_lock_flags = default_spin_lock_flags, | ||
| 22 | .spin_trylock = __ticket_spin_trylock, | ||
| 23 | .spin_unlock = __ticket_spin_unlock, | ||
| 24 | #endif | ||
| 25 | }; | ||
| 26 | EXPORT_SYMBOL(pv_lock_ops); | ||
| 27 | |||
| 28 | void __init paravirt_use_bytelocks(void) | ||
| 29 | { | ||
| 30 | #ifdef CONFIG_SMP | ||
| 31 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
| 32 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
| 33 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
| 34 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
| 35 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
| 36 | #endif | ||
| 37 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c19..e4c8fb608873 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
| 30 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
| 31 | #include <asm/arch_hooks.h> | 31 | #include <asm/arch_hooks.h> |
| 32 | #include <asm/pgtable.h> | ||
| 32 | #include <asm/time.h> | 33 | #include <asm/time.h> |
| 33 | #include <asm/pgalloc.h> | 34 | #include <asm/pgalloc.h> |
| 34 | #include <asm/irq.h> | 35 | #include <asm/irq.h> |
| @@ -123,6 +124,7 @@ static void *get_call_destination(u8 type) | |||
| 123 | .pv_irq_ops = pv_irq_ops, | 124 | .pv_irq_ops = pv_irq_ops, |
| 124 | .pv_apic_ops = pv_apic_ops, | 125 | .pv_apic_ops = pv_apic_ops, |
| 125 | .pv_mmu_ops = pv_mmu_ops, | 126 | .pv_mmu_ops = pv_mmu_ops, |
| 127 | .pv_lock_ops = pv_lock_ops, | ||
| 126 | }; | 128 | }; |
| 127 | return *((void **)&tmpl + type); | 129 | return *((void **)&tmpl + type); |
| 128 | } | 130 | } |
| @@ -317,6 +319,7 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
| 317 | #endif | 319 | #endif |
| 318 | .wbinvd = native_wbinvd, | 320 | .wbinvd = native_wbinvd, |
| 319 | .read_msr = native_read_msr_safe, | 321 | .read_msr = native_read_msr_safe, |
| 322 | .read_msr_amd = native_read_msr_amd_safe, | ||
| 320 | .write_msr = native_write_msr_safe, | 323 | .write_msr = native_write_msr_safe, |
| 321 | .read_tsc = native_read_tsc, | 324 | .read_tsc = native_read_tsc, |
| 322 | .read_pmc = native_read_pmc, | 325 | .read_pmc = native_read_pmc, |
| @@ -335,6 +338,10 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
| 335 | .write_ldt_entry = native_write_ldt_entry, | 338 | .write_ldt_entry = native_write_ldt_entry, |
| 336 | .write_gdt_entry = native_write_gdt_entry, | 339 | .write_gdt_entry = native_write_gdt_entry, |
| 337 | .write_idt_entry = native_write_idt_entry, | 340 | .write_idt_entry = native_write_idt_entry, |
| 341 | |||
| 342 | .alloc_ldt = paravirt_nop, | ||
| 343 | .free_ldt = paravirt_nop, | ||
| 344 | |||
| 338 | .load_sp0 = native_load_sp0, | 345 | .load_sp0 = native_load_sp0, |
| 339 | 346 | ||
| 340 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | 347 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
| @@ -360,9 +367,6 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
| 360 | 367 | ||
| 361 | struct pv_apic_ops pv_apic_ops = { | 368 | struct pv_apic_ops pv_apic_ops = { |
| 362 | #ifdef CONFIG_X86_LOCAL_APIC | 369 | #ifdef CONFIG_X86_LOCAL_APIC |
| 363 | .apic_write = native_apic_write, | ||
| 364 | .apic_write_atomic = native_apic_write_atomic, | ||
| 365 | .apic_read = native_apic_read, | ||
| 366 | .setup_boot_clock = setup_boot_APIC_clock, | 370 | .setup_boot_clock = setup_boot_APIC_clock, |
| 367 | .setup_secondary_clock = setup_secondary_APIC_clock, | 371 | .setup_secondary_clock = setup_secondary_APIC_clock, |
| 368 | .startup_ipi_hook = paravirt_nop, | 372 | .startup_ipi_hook = paravirt_nop, |
| @@ -373,6 +377,9 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
| 373 | #ifndef CONFIG_X86_64 | 377 | #ifndef CONFIG_X86_64 |
| 374 | .pagetable_setup_start = native_pagetable_setup_start, | 378 | .pagetable_setup_start = native_pagetable_setup_start, |
| 375 | .pagetable_setup_done = native_pagetable_setup_done, | 379 | .pagetable_setup_done = native_pagetable_setup_done, |
| 380 | #else | ||
| 381 | .pagetable_setup_start = paravirt_nop, | ||
| 382 | .pagetable_setup_done = paravirt_nop, | ||
| 376 | #endif | 383 | #endif |
| 377 | 384 | ||
| 378 | .read_cr2 = native_read_cr2, | 385 | .read_cr2 = native_read_cr2, |
| @@ -428,7 +435,7 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
| 428 | #endif /* PAGETABLE_LEVELS >= 3 */ | 435 | #endif /* PAGETABLE_LEVELS >= 3 */ |
| 429 | 436 | ||
| 430 | .pte_val = native_pte_val, | 437 | .pte_val = native_pte_val, |
| 431 | .pte_flags = native_pte_val, | 438 | .pte_flags = native_pte_flags, |
| 432 | .pgd_val = native_pgd_val, | 439 | .pgd_val = native_pgd_val, |
| 433 | 440 | ||
| 434 | .make_pte = native_make_pte, | 441 | .make_pte = native_make_pte, |
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 58262218781b..9fe644f4861d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
| @@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
| 23 | start = start_##ops##_##x; \ | 23 | start = start_##ops##_##x; \ |
| 24 | end = end_##ops##_##x; \ | 24 | end = end_##ops##_##x; \ |
| 25 | goto patch_site | 25 | goto patch_site |
| 26 | switch(type) { | 26 | switch (type) { |
| 27 | PATCH_SITE(pv_irq_ops, irq_disable); | 27 | PATCH_SITE(pv_irq_ops, irq_disable); |
| 28 | PATCH_SITE(pv_irq_ops, irq_enable); | 28 | PATCH_SITE(pv_irq_ops, irq_enable); |
| 29 | PATCH_SITE(pv_irq_ops, restore_fl); | 29 | PATCH_SITE(pv_irq_ops, restore_fl); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 6959b5c45df4..e1e731d78f38 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
| 30 | #include <linux/spinlock.h> | 30 | #include <linux/spinlock.h> |
| 31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
| 32 | #include <linux/crash_dump.h> | ||
| 32 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
| 33 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
| 34 | #include <linux/pci_ids.h> | 35 | #include <linux/pci_ids.h> |
| @@ -36,7 +37,8 @@ | |||
| 36 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
| 37 | #include <linux/scatterlist.h> | 38 | #include <linux/scatterlist.h> |
| 38 | #include <linux/iommu-helper.h> | 39 | #include <linux/iommu-helper.h> |
| 39 | #include <asm/gart.h> | 40 | |
| 41 | #include <asm/iommu.h> | ||
| 40 | #include <asm/calgary.h> | 42 | #include <asm/calgary.h> |
| 41 | #include <asm/tce.h> | 43 | #include <asm/tce.h> |
| 42 | #include <asm/pci-direct.h> | 44 | #include <asm/pci-direct.h> |
| @@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl); | |||
| 167 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); | 169 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); |
| 168 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); | 170 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); |
| 169 | static void calioc2_dump_error_regs(struct iommu_table *tbl); | 171 | static void calioc2_dump_error_regs(struct iommu_table *tbl); |
| 172 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); | ||
| 173 | static void get_tce_space_from_tar(void); | ||
| 170 | 174 | ||
| 171 | static struct cal_chipset_ops calgary_chip_ops = { | 175 | static struct cal_chipset_ops calgary_chip_ops = { |
| 172 | .handle_quirks = calgary_handle_quirks, | 176 | .handle_quirks = calgary_handle_quirks, |
| @@ -213,16 +217,6 @@ static inline unsigned long verify_bit_range(unsigned long* bitmap, | |||
| 213 | 217 | ||
| 214 | #endif /* CONFIG_IOMMU_DEBUG */ | 218 | #endif /* CONFIG_IOMMU_DEBUG */ |
| 215 | 219 | ||
| 216 | static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) | ||
| 217 | { | ||
| 218 | unsigned int npages; | ||
| 219 | |||
| 220 | npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK); | ||
| 221 | npages >>= PAGE_SHIFT; | ||
| 222 | |||
| 223 | return npages; | ||
| 224 | } | ||
| 225 | |||
| 226 | static inline int translation_enabled(struct iommu_table *tbl) | 220 | static inline int translation_enabled(struct iommu_table *tbl) |
| 227 | { | 221 | { |
| 228 | /* only PHBs with translation enabled have an IOMMU table */ | 222 | /* only PHBs with translation enabled have an IOMMU table */ |
| @@ -257,7 +251,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
| 257 | badbit, tbl, start_addr, npages); | 251 | badbit, tbl, start_addr, npages); |
| 258 | } | 252 | } |
| 259 | 253 | ||
| 260 | set_bit_string(tbl->it_map, index, npages); | 254 | iommu_area_reserve(tbl->it_map, index, npages); |
| 261 | 255 | ||
| 262 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 256 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
| 263 | } | 257 | } |
| @@ -339,9 +333,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
| 339 | /* were we called with bad_dma_address? */ | 333 | /* were we called with bad_dma_address? */ |
| 340 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); | 334 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); |
| 341 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { | 335 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { |
| 342 | printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " | 336 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " |
| 343 | "address 0x%Lx\n", dma_addr); | 337 | "address 0x%Lx\n", dma_addr); |
| 344 | WARN_ON(1); | ||
| 345 | return; | 338 | return; |
| 346 | } | 339 | } |
| 347 | 340 | ||
| @@ -405,27 +398,11 @@ static void calgary_unmap_sg(struct device *dev, | |||
| 405 | if (dmalen == 0) | 398 | if (dmalen == 0) |
| 406 | break; | 399 | break; |
| 407 | 400 | ||
| 408 | npages = num_dma_pages(dma, dmalen); | 401 | npages = iommu_num_pages(dma, dmalen, PAGE_SIZE); |
| 409 | iommu_free(tbl, dma, npages); | 402 | iommu_free(tbl, dma, npages); |
| 410 | } | 403 | } |
| 411 | } | 404 | } |
| 412 | 405 | ||
| 413 | static int calgary_nontranslate_map_sg(struct device* dev, | ||
| 414 | struct scatterlist *sg, int nelems, int direction) | ||
| 415 | { | ||
| 416 | struct scatterlist *s; | ||
| 417 | int i; | ||
| 418 | |||
| 419 | for_each_sg(sg, s, nelems, i) { | ||
| 420 | struct page *p = sg_page(s); | ||
| 421 | |||
| 422 | BUG_ON(!p); | ||
| 423 | s->dma_address = virt_to_bus(sg_virt(s)); | ||
| 424 | s->dma_length = s->length; | ||
| 425 | } | ||
| 426 | return nelems; | ||
| 427 | } | ||
| 428 | |||
| 429 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | 406 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, |
| 430 | int nelems, int direction) | 407 | int nelems, int direction) |
| 431 | { | 408 | { |
| @@ -436,14 +413,11 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
| 436 | unsigned long entry; | 413 | unsigned long entry; |
| 437 | int i; | 414 | int i; |
| 438 | 415 | ||
| 439 | if (!translation_enabled(tbl)) | ||
| 440 | return calgary_nontranslate_map_sg(dev, sg, nelems, direction); | ||
| 441 | |||
| 442 | for_each_sg(sg, s, nelems, i) { | 416 | for_each_sg(sg, s, nelems, i) { |
| 443 | BUG_ON(!sg_page(s)); | 417 | BUG_ON(!sg_page(s)); |
| 444 | 418 | ||
| 445 | vaddr = (unsigned long) sg_virt(s); | 419 | vaddr = (unsigned long) sg_virt(s); |
| 446 | npages = num_dma_pages(vaddr, s->length); | 420 | npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); |
| 447 | 421 | ||
| 448 | entry = iommu_range_alloc(dev, tbl, npages); | 422 | entry = iommu_range_alloc(dev, tbl, npages); |
| 449 | if (entry == bad_dma_address) { | 423 | if (entry == bad_dma_address) { |
| @@ -474,21 +448,15 @@ error: | |||
| 474 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | 448 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, |
| 475 | size_t size, int direction) | 449 | size_t size, int direction) |
| 476 | { | 450 | { |
| 477 | dma_addr_t dma_handle = bad_dma_address; | ||
| 478 | void *vaddr = phys_to_virt(paddr); | 451 | void *vaddr = phys_to_virt(paddr); |
| 479 | unsigned long uaddr; | 452 | unsigned long uaddr; |
| 480 | unsigned int npages; | 453 | unsigned int npages; |
| 481 | struct iommu_table *tbl = find_iommu_table(dev); | 454 | struct iommu_table *tbl = find_iommu_table(dev); |
| 482 | 455 | ||
| 483 | uaddr = (unsigned long)vaddr; | 456 | uaddr = (unsigned long)vaddr; |
| 484 | npages = num_dma_pages(uaddr, size); | 457 | npages = iommu_num_pages(uaddr, size, PAGE_SIZE); |
| 485 | 458 | ||
| 486 | if (translation_enabled(tbl)) | 459 | return iommu_alloc(dev, tbl, vaddr, npages, direction); |
| 487 | dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); | ||
| 488 | else | ||
| 489 | dma_handle = virt_to_bus(vaddr); | ||
| 490 | |||
| 491 | return dma_handle; | ||
| 492 | } | 460 | } |
| 493 | 461 | ||
| 494 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | 462 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, |
| @@ -497,10 +465,7 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | |||
| 497 | struct iommu_table *tbl = find_iommu_table(dev); | 465 | struct iommu_table *tbl = find_iommu_table(dev); |
| 498 | unsigned int npages; | 466 | unsigned int npages; |
| 499 | 467 | ||
| 500 | if (!translation_enabled(tbl)) | 468 | npages = iommu_num_pages(dma_handle, size, PAGE_SIZE); |
| 501 | return; | ||
| 502 | |||
| 503 | npages = num_dma_pages(dma_handle, size); | ||
| 504 | iommu_free(tbl, dma_handle, npages); | 469 | iommu_free(tbl, dma_handle, npages); |
| 505 | } | 470 | } |
| 506 | 471 | ||
| @@ -516,24 +481,20 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
| 516 | npages = size >> PAGE_SHIFT; | 481 | npages = size >> PAGE_SHIFT; |
| 517 | order = get_order(size); | 482 | order = get_order(size); |
| 518 | 483 | ||
| 484 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 485 | |||
| 519 | /* alloc enough pages (and possibly more) */ | 486 | /* alloc enough pages (and possibly more) */ |
| 520 | ret = (void *)__get_free_pages(flag, order); | 487 | ret = (void *)__get_free_pages(flag, order); |
| 521 | if (!ret) | 488 | if (!ret) |
| 522 | goto error; | 489 | goto error; |
| 523 | memset(ret, 0, size); | 490 | memset(ret, 0, size); |
| 524 | 491 | ||
| 525 | if (translation_enabled(tbl)) { | 492 | /* set up tces to cover the allocated range */ |
| 526 | /* set up tces to cover the allocated range */ | 493 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
| 527 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 494 | if (mapping == bad_dma_address) |
| 528 | if (mapping == bad_dma_address) | 495 | goto free; |
| 529 | goto free; | 496 | *dma_handle = mapping; |
| 530 | |||
| 531 | *dma_handle = mapping; | ||
| 532 | } else /* non translated slot */ | ||
| 533 | *dma_handle = virt_to_bus(ret); | ||
| 534 | |||
| 535 | return ret; | 497 | return ret; |
| 536 | |||
| 537 | free: | 498 | free: |
| 538 | free_pages((unsigned long)ret, get_order(size)); | 499 | free_pages((unsigned long)ret, get_order(size)); |
| 539 | ret = NULL; | 500 | ret = NULL; |
| @@ -541,8 +502,22 @@ error: | |||
| 541 | return ret; | 502 | return ret; |
| 542 | } | 503 | } |
| 543 | 504 | ||
| 544 | static const struct dma_mapping_ops calgary_dma_ops = { | 505 | static void calgary_free_coherent(struct device *dev, size_t size, |
| 506 | void *vaddr, dma_addr_t dma_handle) | ||
| 507 | { | ||
| 508 | unsigned int npages; | ||
| 509 | struct iommu_table *tbl = find_iommu_table(dev); | ||
| 510 | |||
| 511 | size = PAGE_ALIGN(size); | ||
| 512 | npages = size >> PAGE_SHIFT; | ||
| 513 | |||
| 514 | iommu_free(tbl, dma_handle, npages); | ||
| 515 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 516 | } | ||
| 517 | |||
| 518 | static struct dma_mapping_ops calgary_dma_ops = { | ||
| 545 | .alloc_coherent = calgary_alloc_coherent, | 519 | .alloc_coherent = calgary_alloc_coherent, |
| 520 | .free_coherent = calgary_free_coherent, | ||
| 546 | .map_single = calgary_map_single, | 521 | .map_single = calgary_map_single, |
| 547 | .unmap_single = calgary_unmap_single, | 522 | .unmap_single = calgary_unmap_single, |
| 548 | .map_sg = calgary_map_sg, | 523 | .map_sg = calgary_map_sg, |
| @@ -830,7 +805,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) | |||
| 830 | 805 | ||
| 831 | tbl = pci_iommu(dev->bus); | 806 | tbl = pci_iommu(dev->bus); |
| 832 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; | 807 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; |
| 833 | tce_free(tbl, 0, tbl->it_size); | 808 | |
| 809 | if (is_kdump_kernel()) | ||
| 810 | calgary_init_bitmap_from_tce_table(tbl); | ||
| 811 | else | ||
| 812 | tce_free(tbl, 0, tbl->it_size); | ||
| 834 | 813 | ||
| 835 | if (is_calgary(dev->device)) | 814 | if (is_calgary(dev->device)) |
| 836 | tbl->chip_ops = &calgary_chip_ops; | 815 | tbl->chip_ops = &calgary_chip_ops; |
| @@ -1209,6 +1188,10 @@ static int __init calgary_init(void) | |||
| 1209 | if (ret) | 1188 | if (ret) |
| 1210 | return ret; | 1189 | return ret; |
| 1211 | 1190 | ||
| 1191 | /* Purely for kdump kernel case */ | ||
| 1192 | if (is_kdump_kernel()) | ||
| 1193 | get_tce_space_from_tar(); | ||
| 1194 | |||
| 1212 | do { | 1195 | do { |
| 1213 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); | 1196 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); |
| 1214 | if (!dev) | 1197 | if (!dev) |
| @@ -1230,6 +1213,16 @@ static int __init calgary_init(void) | |||
| 1230 | goto error; | 1213 | goto error; |
| 1231 | } while (1); | 1214 | } while (1); |
| 1232 | 1215 | ||
| 1216 | dev = NULL; | ||
| 1217 | for_each_pci_dev(dev) { | ||
| 1218 | struct iommu_table *tbl; | ||
| 1219 | |||
| 1220 | tbl = find_iommu_table(&dev->dev); | ||
| 1221 | |||
| 1222 | if (translation_enabled(tbl)) | ||
| 1223 | dev->dev.archdata.dma_ops = &calgary_dma_ops; | ||
| 1224 | } | ||
| 1225 | |||
| 1233 | return ret; | 1226 | return ret; |
| 1234 | 1227 | ||
| 1235 | error: | 1228 | error: |
| @@ -1251,6 +1244,7 @@ error: | |||
| 1251 | calgary_disable_translation(dev); | 1244 | calgary_disable_translation(dev); |
| 1252 | calgary_free_bus(dev); | 1245 | calgary_free_bus(dev); |
| 1253 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ | 1246 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ |
| 1247 | dev->dev.archdata.dma_ops = NULL; | ||
| 1254 | } while (1); | 1248 | } while (1); |
| 1255 | 1249 | ||
| 1256 | return ret; | 1250 | return ret; |
| @@ -1280,13 +1274,15 @@ static inline int __init determine_tce_table_size(u64 ram) | |||
| 1280 | static int __init build_detail_arrays(void) | 1274 | static int __init build_detail_arrays(void) |
| 1281 | { | 1275 | { |
| 1282 | unsigned long ptr; | 1276 | unsigned long ptr; |
| 1283 | int i, scal_detail_size, rio_detail_size; | 1277 | unsigned numnodes, i; |
| 1278 | int scal_detail_size, rio_detail_size; | ||
| 1284 | 1279 | ||
| 1285 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ | 1280 | numnodes = rio_table_hdr->num_scal_dev; |
| 1281 | if (numnodes > MAX_NUMNODES){ | ||
| 1286 | printk(KERN_WARNING | 1282 | printk(KERN_WARNING |
| 1287 | "Calgary: MAX_NUMNODES too low! Defined as %d, " | 1283 | "Calgary: MAX_NUMNODES too low! Defined as %d, " |
| 1288 | "but system has %d nodes.\n", | 1284 | "but system has %d nodes.\n", |
| 1289 | MAX_NUMNODES, rio_table_hdr->num_scal_dev); | 1285 | MAX_NUMNODES, numnodes); |
| 1290 | return -ENODEV; | 1286 | return -ENODEV; |
| 1291 | } | 1287 | } |
| 1292 | 1288 | ||
| @@ -1307,8 +1303,7 @@ static int __init build_detail_arrays(void) | |||
| 1307 | } | 1303 | } |
| 1308 | 1304 | ||
| 1309 | ptr = ((unsigned long)rio_table_hdr) + 3; | 1305 | ptr = ((unsigned long)rio_table_hdr) + 3; |
| 1310 | for (i = 0; i < rio_table_hdr->num_scal_dev; | 1306 | for (i = 0; i < numnodes; i++, ptr += scal_detail_size) |
| 1311 | i++, ptr += scal_detail_size) | ||
| 1312 | scal_devs[i] = (struct scal_detail *)ptr; | 1307 | scal_devs[i] = (struct scal_detail *)ptr; |
| 1313 | 1308 | ||
| 1314 | for (i = 0; i < rio_table_hdr->num_rio_dev; | 1309 | for (i = 0; i < rio_table_hdr->num_rio_dev; |
| @@ -1339,6 +1334,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
| 1339 | return (val != 0xffffffff); | 1334 | return (val != 0xffffffff); |
| 1340 | } | 1335 | } |
| 1341 | 1336 | ||
| 1337 | /* | ||
| 1338 | * calgary_init_bitmap_from_tce_table(): | ||
| 1339 | * Funtion for kdump case. In the second/kdump kernel initialize | ||
| 1340 | * the bitmap based on the tce table entries obtained from first kernel | ||
| 1341 | */ | ||
| 1342 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | ||
| 1343 | { | ||
| 1344 | u64 *tp; | ||
| 1345 | unsigned int index; | ||
| 1346 | tp = ((u64 *)tbl->it_base); | ||
| 1347 | for (index = 0 ; index < tbl->it_size; index++) { | ||
| 1348 | if (*tp != 0x0) | ||
| 1349 | set_bit(index, tbl->it_map); | ||
| 1350 | tp++; | ||
| 1351 | } | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | /* | ||
| 1355 | * get_tce_space_from_tar(): | ||
| 1356 | * Function for kdump case. Get the tce tables from first kernel | ||
| 1357 | * by reading the contents of the base adress register of calgary iommu | ||
| 1358 | */ | ||
| 1359 | static void __init get_tce_space_from_tar(void) | ||
| 1360 | { | ||
| 1361 | int bus; | ||
| 1362 | void __iomem *target; | ||
| 1363 | unsigned long tce_space; | ||
| 1364 | |||
| 1365 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | ||
| 1366 | struct calgary_bus_info *info = &bus_info[bus]; | ||
| 1367 | unsigned short pci_device; | ||
| 1368 | u32 val; | ||
| 1369 | |||
| 1370 | val = read_pci_config(bus, 0, 0, 0); | ||
| 1371 | pci_device = (val & 0xFFFF0000) >> 16; | ||
| 1372 | |||
| 1373 | if (!is_cal_pci_dev(pci_device)) | ||
| 1374 | continue; | ||
| 1375 | if (info->translation_disabled) | ||
| 1376 | continue; | ||
| 1377 | |||
| 1378 | if (calgary_bus_has_devices(bus, pci_device) || | ||
| 1379 | translate_empty_slots) { | ||
| 1380 | target = calgary_reg(bus_info[bus].bbar, | ||
| 1381 | tar_offset(bus)); | ||
| 1382 | tce_space = be64_to_cpu(readq(target)); | ||
| 1383 | tce_space = tce_space & TAR_SW_BITS; | ||
| 1384 | |||
| 1385 | tce_space = tce_space & (~specified_table_size); | ||
| 1386 | info->tce_space = (u64 *)__va(tce_space); | ||
| 1387 | } | ||
| 1388 | } | ||
| 1389 | return; | ||
| 1390 | } | ||
| 1391 | |||
| 1342 | void __init detect_calgary(void) | 1392 | void __init detect_calgary(void) |
| 1343 | { | 1393 | { |
| 1344 | int bus; | 1394 | int bus; |
| @@ -1394,7 +1444,8 @@ void __init detect_calgary(void) | |||
| 1394 | return; | 1444 | return; |
| 1395 | } | 1445 | } |
| 1396 | 1446 | ||
| 1397 | specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); | 1447 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? |
| 1448 | saved_max_pfn : max_pfn) * PAGE_SIZE); | ||
| 1398 | 1449 | ||
| 1399 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | 1450 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { |
| 1400 | struct calgary_bus_info *info = &bus_info[bus]; | 1451 | struct calgary_bus_info *info = &bus_info[bus]; |
| @@ -1412,10 +1463,16 @@ void __init detect_calgary(void) | |||
| 1412 | 1463 | ||
| 1413 | if (calgary_bus_has_devices(bus, pci_device) || | 1464 | if (calgary_bus_has_devices(bus, pci_device) || |
| 1414 | translate_empty_slots) { | 1465 | translate_empty_slots) { |
| 1415 | tbl = alloc_tce_table(); | 1466 | /* |
| 1416 | if (!tbl) | 1467 | * If it is kdump kernel, find and use tce tables |
| 1417 | goto cleanup; | 1468 | * from first kernel, else allocate tce tables here |
| 1418 | info->tce_space = tbl; | 1469 | */ |
| 1470 | if (!is_kdump_kernel()) { | ||
| 1471 | tbl = alloc_tce_table(); | ||
| 1472 | if (!tbl) | ||
| 1473 | goto cleanup; | ||
| 1474 | info->tce_space = tbl; | ||
| 1475 | } | ||
| 1419 | calgary_found = 1; | 1476 | calgary_found = 1; |
| 1420 | } | 1477 | } |
| 1421 | } | 1478 | } |
| @@ -1430,6 +1487,10 @@ void __init detect_calgary(void) | |||
| 1430 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " | 1487 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " |
| 1431 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, | 1488 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, |
| 1432 | debugging ? "enabled" : "disabled"); | 1489 | debugging ? "enabled" : "disabled"); |
| 1490 | |||
| 1491 | /* swiotlb for devices that aren't behind the Calgary. */ | ||
| 1492 | if (max_pfn > MAX_DMA32_PFN) | ||
| 1493 | swiotlb = 1; | ||
| 1433 | } | 1494 | } |
| 1434 | return; | 1495 | return; |
| 1435 | 1496 | ||
| @@ -1446,7 +1507,7 @@ int __init calgary_iommu_init(void) | |||
| 1446 | { | 1507 | { |
| 1447 | int ret; | 1508 | int ret; |
| 1448 | 1509 | ||
| 1449 | if (no_iommu || swiotlb) | 1510 | if (no_iommu || (swiotlb && !calgary_detected)) |
| 1450 | return -ENODEV; | 1511 | return -ENODEV; |
| 1451 | 1512 | ||
| 1452 | if (!calgary_detected) | 1513 | if (!calgary_detected) |
| @@ -1459,15 +1520,14 @@ int __init calgary_iommu_init(void) | |||
| 1459 | if (ret) { | 1520 | if (ret) { |
| 1460 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | 1521 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " |
| 1461 | "falling back to no_iommu\n", ret); | 1522 | "falling back to no_iommu\n", ret); |
| 1462 | if (max_pfn > MAX_DMA32_PFN) | ||
| 1463 | printk(KERN_ERR "WARNING more than 4GB of memory, " | ||
| 1464 | "32bit PCI may malfunction.\n"); | ||
| 1465 | return ret; | 1523 | return ret; |
| 1466 | } | 1524 | } |
| 1467 | 1525 | ||
| 1468 | force_iommu = 1; | 1526 | force_iommu = 1; |
| 1469 | bad_dma_address = 0x0; | 1527 | bad_dma_address = 0x0; |
| 1470 | dma_ops = &calgary_dma_ops; | 1528 | /* dma_ops is set to swiotlb or nommu */ |
| 1529 | if (!dma_ops) | ||
| 1530 | dma_ops = &nommu_dma_ops; | ||
| 1471 | 1531 | ||
| 1472 | return 0; | 1532 | return 0; |
| 1473 | } | 1533 | } |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8467ec2320f1..1972266e8ba5 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -5,14 +5,11 @@ | |||
| 5 | 5 | ||
| 6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
| 7 | #include <asm/dma.h> | 7 | #include <asm/dma.h> |
| 8 | #include <asm/gart.h> | 8 | #include <asm/iommu.h> |
| 9 | #include <asm/calgary.h> | 9 | #include <asm/calgary.h> |
| 10 | #include <asm/amd_iommu.h> | 10 | #include <asm/amd_iommu.h> |
| 11 | 11 | ||
| 12 | int forbid_dac __read_mostly; | 12 | struct dma_mapping_ops *dma_ops; |
| 13 | EXPORT_SYMBOL(forbid_dac); | ||
| 14 | |||
| 15 | const struct dma_mapping_ops *dma_ops; | ||
| 16 | EXPORT_SYMBOL(dma_ops); | 13 | EXPORT_SYMBOL(dma_ops); |
| 17 | 14 | ||
| 18 | static int iommu_sac_force __read_mostly; | 15 | static int iommu_sac_force __read_mostly; |
| @@ -42,11 +39,12 @@ EXPORT_SYMBOL(bad_dma_address); | |||
| 42 | /* Dummy device used for NULL arguments (normally ISA). Better would | 39 | /* Dummy device used for NULL arguments (normally ISA). Better would |
| 43 | be probably a smaller DMA mask, but this is bug-to-bug compatible | 40 | be probably a smaller DMA mask, but this is bug-to-bug compatible |
| 44 | to older i386. */ | 41 | to older i386. */ |
| 45 | struct device fallback_dev = { | 42 | struct device x86_dma_fallback_dev = { |
| 46 | .bus_id = "fallback device", | 43 | .bus_id = "fallback device", |
| 47 | .coherent_dma_mask = DMA_32BIT_MASK, | 44 | .coherent_dma_mask = DMA_32BIT_MASK, |
| 48 | .dma_mask = &fallback_dev.coherent_dma_mask, | 45 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, |
| 49 | }; | 46 | }; |
| 47 | EXPORT_SYMBOL(x86_dma_fallback_dev); | ||
| 50 | 48 | ||
| 51 | int dma_set_mask(struct device *dev, u64 mask) | 49 | int dma_set_mask(struct device *dev, u64 mask) |
| 52 | { | 50 | { |
| @@ -83,7 +81,7 @@ void __init dma32_reserve_bootmem(void) | |||
| 83 | * using 512M as goal | 81 | * using 512M as goal |
| 84 | */ | 82 | */ |
| 85 | align = 64ULL<<20; | 83 | align = 64ULL<<20; |
| 86 | size = round_up(dma32_bootmem_size, align); | 84 | size = roundup(dma32_bootmem_size, align); |
| 87 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 85 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
| 88 | 512ULL<<20); | 86 | 512ULL<<20); |
| 89 | if (dma32_bootmem_ptr) | 87 | if (dma32_bootmem_ptr) |
| @@ -114,24 +112,57 @@ void __init pci_iommu_alloc(void) | |||
| 114 | * The order of these functions is important for | 112 | * The order of these functions is important for |
| 115 | * fall-back/fail-over reasons | 113 | * fall-back/fail-over reasons |
| 116 | */ | 114 | */ |
| 117 | #ifdef CONFIG_GART_IOMMU | ||
| 118 | gart_iommu_hole_init(); | 115 | gart_iommu_hole_init(); |
| 119 | #endif | ||
| 120 | 116 | ||
| 121 | #ifdef CONFIG_CALGARY_IOMMU | ||
| 122 | detect_calgary(); | 117 | detect_calgary(); |
| 123 | #endif | ||
| 124 | 118 | ||
| 125 | detect_intel_iommu(); | 119 | detect_intel_iommu(); |
| 126 | 120 | ||
| 127 | amd_iommu_detect(); | 121 | amd_iommu_detect(); |
| 128 | 122 | ||
| 129 | #ifdef CONFIG_SWIOTLB | ||
| 130 | pci_swiotlb_init(); | 123 | pci_swiotlb_init(); |
| 131 | #endif | ||
| 132 | } | 124 | } |
| 125 | |||
| 126 | unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) | ||
| 127 | { | ||
| 128 | unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | ||
| 129 | |||
| 130 | return size >> PAGE_SHIFT; | ||
| 131 | } | ||
| 132 | EXPORT_SYMBOL(iommu_nr_pages); | ||
| 133 | #endif | 133 | #endif |
| 134 | 134 | ||
| 135 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | ||
| 136 | dma_addr_t *dma_addr, gfp_t flag) | ||
| 137 | { | ||
| 138 | unsigned long dma_mask; | ||
| 139 | struct page *page; | ||
| 140 | dma_addr_t addr; | ||
| 141 | |||
| 142 | dma_mask = dma_alloc_coherent_mask(dev, flag); | ||
| 143 | |||
| 144 | flag |= __GFP_ZERO; | ||
| 145 | again: | ||
| 146 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | ||
| 147 | if (!page) | ||
| 148 | return NULL; | ||
| 149 | |||
| 150 | addr = page_to_phys(page); | ||
| 151 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | ||
| 152 | __free_pages(page, get_order(size)); | ||
| 153 | |||
| 154 | if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { | ||
| 155 | flag = (flag & ~GFP_DMA32) | GFP_DMA; | ||
| 156 | goto again; | ||
| 157 | } | ||
| 158 | |||
| 159 | return NULL; | ||
| 160 | } | ||
| 161 | |||
| 162 | *dma_addr = addr; | ||
| 163 | return page_address(page); | ||
| 164 | } | ||
| 165 | |||
| 135 | /* | 166 | /* |
| 136 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | 167 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter |
| 137 | * documentation. | 168 | * documentation. |
| @@ -184,9 +215,7 @@ static __init int iommu_setup(char *p) | |||
| 184 | swiotlb = 1; | 215 | swiotlb = 1; |
| 185 | #endif | 216 | #endif |
| 186 | 217 | ||
| 187 | #ifdef CONFIG_GART_IOMMU | ||
| 188 | gart_parse_options(p); | 218 | gart_parse_options(p); |
| 189 | #endif | ||
| 190 | 219 | ||
| 191 | #ifdef CONFIG_CALGARY_IOMMU | 220 | #ifdef CONFIG_CALGARY_IOMMU |
| 192 | if (!strncmp(p, "calgary", 7)) | 221 | if (!strncmp(p, "calgary", 7)) |
| @@ -201,136 +230,19 @@ static __init int iommu_setup(char *p) | |||
| 201 | } | 230 | } |
| 202 | early_param("iommu", iommu_setup); | 231 | early_param("iommu", iommu_setup); |
| 203 | 232 | ||
| 204 | #ifdef CONFIG_X86_32 | ||
| 205 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | ||
| 206 | dma_addr_t device_addr, size_t size, int flags) | ||
| 207 | { | ||
| 208 | void __iomem *mem_base = NULL; | ||
| 209 | int pages = size >> PAGE_SHIFT; | ||
| 210 | int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); | ||
| 211 | |||
| 212 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | ||
| 213 | goto out; | ||
| 214 | if (!size) | ||
| 215 | goto out; | ||
| 216 | if (dev->dma_mem) | ||
| 217 | goto out; | ||
| 218 | |||
| 219 | /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ | ||
| 220 | |||
| 221 | mem_base = ioremap(bus_addr, size); | ||
| 222 | if (!mem_base) | ||
| 223 | goto out; | ||
| 224 | |||
| 225 | dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
| 226 | if (!dev->dma_mem) | ||
| 227 | goto out; | ||
| 228 | dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
| 229 | if (!dev->dma_mem->bitmap) | ||
| 230 | goto free1_out; | ||
| 231 | |||
| 232 | dev->dma_mem->virt_base = mem_base; | ||
| 233 | dev->dma_mem->device_base = device_addr; | ||
| 234 | dev->dma_mem->size = pages; | ||
| 235 | dev->dma_mem->flags = flags; | ||
| 236 | |||
| 237 | if (flags & DMA_MEMORY_MAP) | ||
| 238 | return DMA_MEMORY_MAP; | ||
| 239 | |||
| 240 | return DMA_MEMORY_IO; | ||
| 241 | |||
| 242 | free1_out: | ||
| 243 | kfree(dev->dma_mem); | ||
| 244 | out: | ||
| 245 | if (mem_base) | ||
| 246 | iounmap(mem_base); | ||
| 247 | return 0; | ||
| 248 | } | ||
| 249 | EXPORT_SYMBOL(dma_declare_coherent_memory); | ||
| 250 | |||
| 251 | void dma_release_declared_memory(struct device *dev) | ||
| 252 | { | ||
| 253 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
| 254 | |||
| 255 | if (!mem) | ||
| 256 | return; | ||
| 257 | dev->dma_mem = NULL; | ||
| 258 | iounmap(mem->virt_base); | ||
| 259 | kfree(mem->bitmap); | ||
| 260 | kfree(mem); | ||
| 261 | } | ||
| 262 | EXPORT_SYMBOL(dma_release_declared_memory); | ||
| 263 | |||
| 264 | void *dma_mark_declared_memory_occupied(struct device *dev, | ||
| 265 | dma_addr_t device_addr, size_t size) | ||
| 266 | { | ||
| 267 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
| 268 | int pos, err; | ||
| 269 | int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); | ||
| 270 | |||
| 271 | pages >>= PAGE_SHIFT; | ||
| 272 | |||
| 273 | if (!mem) | ||
| 274 | return ERR_PTR(-EINVAL); | ||
| 275 | |||
| 276 | pos = (device_addr - mem->device_base) >> PAGE_SHIFT; | ||
| 277 | err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); | ||
| 278 | if (err != 0) | ||
| 279 | return ERR_PTR(err); | ||
| 280 | return mem->virt_base + (pos << PAGE_SHIFT); | ||
| 281 | } | ||
| 282 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | ||
| 283 | |||
| 284 | static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, | ||
| 285 | dma_addr_t *dma_handle, void **ret) | ||
| 286 | { | ||
| 287 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
| 288 | int order = get_order(size); | ||
| 289 | |||
| 290 | if (mem) { | ||
| 291 | int page = bitmap_find_free_region(mem->bitmap, mem->size, | ||
| 292 | order); | ||
| 293 | if (page >= 0) { | ||
| 294 | *dma_handle = mem->device_base + (page << PAGE_SHIFT); | ||
| 295 | *ret = mem->virt_base + (page << PAGE_SHIFT); | ||
| 296 | memset(*ret, 0, size); | ||
| 297 | } | ||
| 298 | if (mem->flags & DMA_MEMORY_EXCLUSIVE) | ||
| 299 | *ret = NULL; | ||
| 300 | } | ||
| 301 | return (mem != NULL); | ||
| 302 | } | ||
| 303 | |||
| 304 | static int dma_release_coherent(struct device *dev, int order, void *vaddr) | ||
| 305 | { | ||
| 306 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
| 307 | |||
| 308 | if (mem && vaddr >= mem->virt_base && vaddr < | ||
| 309 | (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
| 310 | int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
| 311 | |||
| 312 | bitmap_release_region(mem->bitmap, page, order); | ||
| 313 | return 1; | ||
| 314 | } | ||
| 315 | return 0; | ||
| 316 | } | ||
| 317 | #else | ||
| 318 | #define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) | ||
| 319 | #define dma_release_coherent(dev, order, vaddr) (0) | ||
| 320 | #endif /* CONFIG_X86_32 */ | ||
| 321 | |||
| 322 | int dma_supported(struct device *dev, u64 mask) | 233 | int dma_supported(struct device *dev, u64 mask) |
| 323 | { | 234 | { |
| 235 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 236 | |||
| 324 | #ifdef CONFIG_PCI | 237 | #ifdef CONFIG_PCI |
| 325 | if (mask > 0xffffffff && forbid_dac > 0) { | 238 | if (mask > 0xffffffff && forbid_dac > 0) { |
| 326 | printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", | 239 | dev_info(dev, "PCI: Disallowing DAC for device\n"); |
| 327 | dev->bus_id); | ||
| 328 | return 0; | 240 | return 0; |
| 329 | } | 241 | } |
| 330 | #endif | 242 | #endif |
| 331 | 243 | ||
| 332 | if (dma_ops->dma_supported) | 244 | if (ops->dma_supported) |
| 333 | return dma_ops->dma_supported(dev, mask); | 245 | return ops->dma_supported(dev, mask); |
| 334 | 246 | ||
| 335 | /* Copied from i386. Doesn't make much sense, because it will | 247 | /* Copied from i386. Doesn't make much sense, because it will |
| 336 | only work for pci_alloc_coherent. | 248 | only work for pci_alloc_coherent. |
| @@ -351,8 +263,7 @@ int dma_supported(struct device *dev, u64 mask) | |||
| 351 | type. Normally this doesn't make any difference, but gives | 263 | type. Normally this doesn't make any difference, but gives |
| 352 | more gentle handling of IOMMU overflow. */ | 264 | more gentle handling of IOMMU overflow. */ |
| 353 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { | 265 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { |
| 354 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", | 266 | dev_info(dev, "Force SAC with mask %Lx\n", mask); |
| 355 | dev->bus_id, mask); | ||
| 356 | return 0; | 267 | return 0; |
| 357 | } | 268 | } |
| 358 | 269 | ||
| @@ -360,157 +271,15 @@ int dma_supported(struct device *dev, u64 mask) | |||
| 360 | } | 271 | } |
| 361 | EXPORT_SYMBOL(dma_supported); | 272 | EXPORT_SYMBOL(dma_supported); |
| 362 | 273 | ||
| 363 | /* Allocate DMA memory on node near device */ | ||
| 364 | static noinline struct page * | ||
| 365 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
| 366 | { | ||
| 367 | int node; | ||
| 368 | |||
| 369 | node = dev_to_node(dev); | ||
| 370 | |||
| 371 | return alloc_pages_node(node, gfp, order); | ||
| 372 | } | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Allocate memory for a coherent mapping. | ||
| 376 | */ | ||
| 377 | void * | ||
| 378 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
| 379 | gfp_t gfp) | ||
| 380 | { | ||
| 381 | void *memory = NULL; | ||
| 382 | struct page *page; | ||
| 383 | unsigned long dma_mask = 0; | ||
| 384 | dma_addr_t bus; | ||
| 385 | int noretry = 0; | ||
| 386 | |||
| 387 | /* ignore region specifiers */ | ||
| 388 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 389 | |||
| 390 | if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) | ||
| 391 | return memory; | ||
| 392 | |||
| 393 | if (!dev) { | ||
| 394 | dev = &fallback_dev; | ||
| 395 | gfp |= GFP_DMA; | ||
| 396 | } | ||
| 397 | dma_mask = dev->coherent_dma_mask; | ||
| 398 | if (dma_mask == 0) | ||
| 399 | dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; | ||
| 400 | |||
| 401 | /* Device not DMA able */ | ||
| 402 | if (dev->dma_mask == NULL) | ||
| 403 | return NULL; | ||
| 404 | |||
| 405 | /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ | ||
| 406 | if (gfp & __GFP_DMA) | ||
| 407 | noretry = 1; | ||
| 408 | |||
| 409 | #ifdef CONFIG_X86_64 | ||
| 410 | /* Why <=? Even when the mask is smaller than 4GB it is often | ||
| 411 | larger than 16MB and in this case we have a chance of | ||
| 412 | finding fitting memory in the next higher zone first. If | ||
| 413 | not retry with true GFP_DMA. -AK */ | ||
| 414 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
| 415 | gfp |= GFP_DMA32; | ||
| 416 | if (dma_mask < DMA_32BIT_MASK) | ||
| 417 | noretry = 1; | ||
| 418 | } | ||
| 419 | #endif | ||
| 420 | |||
| 421 | again: | ||
| 422 | page = dma_alloc_pages(dev, | ||
| 423 | noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); | ||
| 424 | if (page == NULL) | ||
| 425 | return NULL; | ||
| 426 | |||
| 427 | { | ||
| 428 | int high, mmu; | ||
| 429 | bus = page_to_phys(page); | ||
| 430 | memory = page_address(page); | ||
| 431 | high = (bus + size) >= dma_mask; | ||
| 432 | mmu = high; | ||
| 433 | if (force_iommu && !(gfp & GFP_DMA)) | ||
| 434 | mmu = 1; | ||
| 435 | else if (high) { | ||
| 436 | free_pages((unsigned long)memory, | ||
| 437 | get_order(size)); | ||
| 438 | |||
| 439 | /* Don't use the 16MB ZONE_DMA unless absolutely | ||
| 440 | needed. It's better to use remapping first. */ | ||
| 441 | if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
| 442 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
| 443 | goto again; | ||
| 444 | } | ||
| 445 | |||
| 446 | /* Let low level make its own zone decisions */ | ||
| 447 | gfp &= ~(GFP_DMA32|GFP_DMA); | ||
| 448 | |||
| 449 | if (dma_ops->alloc_coherent) | ||
| 450 | return dma_ops->alloc_coherent(dev, size, | ||
| 451 | dma_handle, gfp); | ||
| 452 | return NULL; | ||
| 453 | } | ||
| 454 | |||
| 455 | memset(memory, 0, size); | ||
| 456 | if (!mmu) { | ||
| 457 | *dma_handle = bus; | ||
| 458 | return memory; | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 462 | if (dma_ops->alloc_coherent) { | ||
| 463 | free_pages((unsigned long)memory, get_order(size)); | ||
| 464 | gfp &= ~(GFP_DMA|GFP_DMA32); | ||
| 465 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | ||
| 466 | } | ||
| 467 | |||
| 468 | if (dma_ops->map_simple) { | ||
| 469 | *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), | ||
| 470 | size, | ||
| 471 | PCI_DMA_BIDIRECTIONAL); | ||
| 472 | if (*dma_handle != bad_dma_address) | ||
| 473 | return memory; | ||
| 474 | } | ||
| 475 | |||
| 476 | if (panic_on_overflow) | ||
| 477 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", | ||
| 478 | (unsigned long)size); | ||
| 479 | free_pages((unsigned long)memory, get_order(size)); | ||
| 480 | return NULL; | ||
| 481 | } | ||
| 482 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
| 483 | |||
| 484 | /* | ||
| 485 | * Unmap coherent memory. | ||
| 486 | * The caller must ensure that the device has finished accessing the mapping. | ||
| 487 | */ | ||
| 488 | void dma_free_coherent(struct device *dev, size_t size, | ||
| 489 | void *vaddr, dma_addr_t bus) | ||
| 490 | { | ||
| 491 | int order = get_order(size); | ||
| 492 | WARN_ON(irqs_disabled()); /* for portability */ | ||
| 493 | if (dma_release_coherent(dev, order, vaddr)) | ||
| 494 | return; | ||
| 495 | if (dma_ops->unmap_single) | ||
| 496 | dma_ops->unmap_single(dev, bus, size, 0); | ||
| 497 | free_pages((unsigned long)vaddr, order); | ||
| 498 | } | ||
| 499 | EXPORT_SYMBOL(dma_free_coherent); | ||
| 500 | |||
| 501 | static int __init pci_iommu_init(void) | 274 | static int __init pci_iommu_init(void) |
| 502 | { | 275 | { |
| 503 | #ifdef CONFIG_CALGARY_IOMMU | ||
| 504 | calgary_iommu_init(); | 276 | calgary_iommu_init(); |
| 505 | #endif | ||
| 506 | 277 | ||
| 507 | intel_iommu_init(); | 278 | intel_iommu_init(); |
| 508 | 279 | ||
| 509 | amd_iommu_init(); | 280 | amd_iommu_init(); |
| 510 | 281 | ||
| 511 | #ifdef CONFIG_GART_IOMMU | ||
| 512 | gart_iommu_init(); | 282 | gart_iommu_init(); |
| 513 | #endif | ||
| 514 | 283 | ||
| 515 | no_iommu_init(); | 284 | no_iommu_init(); |
| 516 | return 0; | 285 | return 0; |
| @@ -522,17 +291,3 @@ void pci_iommu_shutdown(void) | |||
| 522 | } | 291 | } |
| 523 | /* Must execute after PCI subsystem */ | 292 | /* Must execute after PCI subsystem */ |
| 524 | fs_initcall(pci_iommu_init); | 293 | fs_initcall(pci_iommu_init); |
| 525 | |||
| 526 | #ifdef CONFIG_PCI | ||
| 527 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ | ||
| 528 | |||
| 529 | static __devinit void via_no_dac(struct pci_dev *dev) | ||
| 530 | { | ||
| 531 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { | ||
| 532 | printk(KERN_INFO "PCI: VIA PCI bridge detected." | ||
| 533 | "Disabling DAC.\n"); | ||
| 534 | forbid_dac = 1; | ||
| 535 | } | ||
| 536 | } | ||
| 537 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); | ||
| 538 | #endif | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c3fe78406d18..e3f75bbcedea 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
| @@ -27,11 +27,12 @@ | |||
| 27 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
| 28 | #include <linux/iommu-helper.h> | 28 | #include <linux/iommu-helper.h> |
| 29 | #include <linux/sysdev.h> | 29 | #include <linux/sysdev.h> |
| 30 | #include <linux/io.h> | ||
| 30 | #include <asm/atomic.h> | 31 | #include <asm/atomic.h> |
| 31 | #include <asm/io.h> | ||
| 32 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
| 33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
| 34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
| 35 | #include <asm/iommu.h> | ||
| 35 | #include <asm/gart.h> | 36 | #include <asm/gart.h> |
| 36 | #include <asm/cacheflush.h> | 37 | #include <asm/cacheflush.h> |
| 37 | #include <asm/swiotlb.h> | 38 | #include <asm/swiotlb.h> |
| @@ -66,9 +67,6 @@ static u32 gart_unmapped_entry; | |||
| 66 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) | 67 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) |
| 67 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) | 68 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) |
| 68 | 69 | ||
| 69 | #define to_pages(addr, size) \ | ||
| 70 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
| 71 | |||
| 72 | #define EMERGENCY_PAGES 32 /* = 128KB */ | 70 | #define EMERGENCY_PAGES 32 /* = 128KB */ |
| 73 | 71 | ||
| 74 | #ifdef CONFIG_AGP | 72 | #ifdef CONFIG_AGP |
| @@ -82,9 +80,10 @@ AGPEXTERN int agp_memory_reserved; | |||
| 82 | AGPEXTERN __u32 *agp_gatt_table; | 80 | AGPEXTERN __u32 *agp_gatt_table; |
| 83 | 81 | ||
| 84 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 82 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
| 85 | static int need_flush; /* global flush state. set for each gart wrap */ | 83 | static bool need_flush; /* global flush state. set for each gart wrap */ |
| 86 | 84 | ||
| 87 | static unsigned long alloc_iommu(struct device *dev, int size) | 85 | static unsigned long alloc_iommu(struct device *dev, int size, |
| 86 | unsigned long align_mask) | ||
| 88 | { | 87 | { |
| 89 | unsigned long offset, flags; | 88 | unsigned long offset, flags; |
| 90 | unsigned long boundary_size; | 89 | unsigned long boundary_size; |
| @@ -92,26 +91,27 @@ static unsigned long alloc_iommu(struct device *dev, int size) | |||
| 92 | 91 | ||
| 93 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | 92 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), |
| 94 | PAGE_SIZE) >> PAGE_SHIFT; | 93 | PAGE_SIZE) >> PAGE_SHIFT; |
| 95 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 94 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, |
| 96 | PAGE_SIZE) >> PAGE_SHIFT; | 95 | PAGE_SIZE) >> PAGE_SHIFT; |
| 97 | 96 | ||
| 98 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 97 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
| 99 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, | 98 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, |
| 100 | size, base_index, boundary_size, 0); | 99 | size, base_index, boundary_size, align_mask); |
| 101 | if (offset == -1) { | 100 | if (offset == -1) { |
| 102 | need_flush = 1; | 101 | need_flush = true; |
| 103 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, | 102 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, |
| 104 | size, base_index, boundary_size, 0); | 103 | size, base_index, boundary_size, |
| 104 | align_mask); | ||
| 105 | } | 105 | } |
| 106 | if (offset != -1) { | 106 | if (offset != -1) { |
| 107 | next_bit = offset+size; | 107 | next_bit = offset+size; |
| 108 | if (next_bit >= iommu_pages) { | 108 | if (next_bit >= iommu_pages) { |
| 109 | next_bit = 0; | 109 | next_bit = 0; |
| 110 | need_flush = 1; | 110 | need_flush = true; |
| 111 | } | 111 | } |
| 112 | } | 112 | } |
| 113 | if (iommu_fullflush) | 113 | if (iommu_fullflush) |
| 114 | need_flush = 1; | 114 | need_flush = true; |
| 115 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 115 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
| 116 | 116 | ||
| 117 | return offset; | 117 | return offset; |
| @@ -136,7 +136,7 @@ static void flush_gart(void) | |||
| 136 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 136 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
| 137 | if (need_flush) { | 137 | if (need_flush) { |
| 138 | k8_flush_garts(); | 138 | k8_flush_garts(); |
| 139 | need_flush = 0; | 139 | need_flush = false; |
| 140 | } | 140 | } |
| 141 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 141 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
| 142 | } | 142 | } |
| @@ -175,7 +175,8 @@ static void dump_leak(void) | |||
| 175 | iommu_leak_pages); | 175 | iommu_leak_pages); |
| 176 | for (i = 0; i < iommu_leak_pages; i += 2) { | 176 | for (i = 0; i < iommu_leak_pages; i += 2) { |
| 177 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); | 177 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); |
| 178 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); | 178 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], |
| 179 | 0); | ||
| 179 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); | 180 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); |
| 180 | } | 181 | } |
| 181 | printk(KERN_DEBUG "\n"); | 182 | printk(KERN_DEBUG "\n"); |
| @@ -197,9 +198,7 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
| 197 | * out. Hopefully no network devices use single mappings that big. | 198 | * out. Hopefully no network devices use single mappings that big. |
| 198 | */ | 199 | */ |
| 199 | 200 | ||
| 200 | printk(KERN_ERR | 201 | dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size); |
| 201 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | ||
| 202 | size, dev->bus_id); | ||
| 203 | 202 | ||
| 204 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { | 203 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { |
| 205 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 204 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
| @@ -216,34 +215,24 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
| 216 | static inline int | 215 | static inline int |
| 217 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 216 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
| 218 | { | 217 | { |
| 219 | u64 mask = *dev->dma_mask; | 218 | return force_iommu || |
| 220 | int high = addr + size > mask; | 219 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
| 221 | int mmu = high; | ||
| 222 | |||
| 223 | if (force_iommu) | ||
| 224 | mmu = 1; | ||
| 225 | |||
| 226 | return mmu; | ||
| 227 | } | 220 | } |
| 228 | 221 | ||
| 229 | static inline int | 222 | static inline int |
| 230 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 223 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
| 231 | { | 224 | { |
| 232 | u64 mask = *dev->dma_mask; | 225 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
| 233 | int high = addr + size > mask; | ||
| 234 | int mmu = high; | ||
| 235 | |||
| 236 | return mmu; | ||
| 237 | } | 226 | } |
| 238 | 227 | ||
| 239 | /* Map a single continuous physical area into the IOMMU. | 228 | /* Map a single continuous physical area into the IOMMU. |
| 240 | * Caller needs to check if the iommu is needed and flush. | 229 | * Caller needs to check if the iommu is needed and flush. |
| 241 | */ | 230 | */ |
| 242 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 231 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
| 243 | size_t size, int dir) | 232 | size_t size, int dir, unsigned long align_mask) |
| 244 | { | 233 | { |
| 245 | unsigned long npages = to_pages(phys_mem, size); | 234 | unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); |
| 246 | unsigned long iommu_page = alloc_iommu(dev, npages); | 235 | unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); |
| 247 | int i; | 236 | int i; |
| 248 | 237 | ||
| 249 | if (iommu_page == -1) { | 238 | if (iommu_page == -1) { |
| @@ -263,16 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
| 263 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 252 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
| 264 | } | 253 | } |
| 265 | 254 | ||
| 266 | static dma_addr_t | ||
| 267 | gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) | ||
| 268 | { | ||
| 269 | dma_addr_t map = dma_map_area(dev, paddr, size, dir); | ||
| 270 | |||
| 271 | flush_gart(); | ||
| 272 | |||
| 273 | return map; | ||
| 274 | } | ||
| 275 | |||
| 276 | /* Map a single area into the IOMMU */ | 255 | /* Map a single area into the IOMMU */ |
| 277 | static dma_addr_t | 256 | static dma_addr_t |
| 278 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | 257 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) |
| @@ -280,12 +259,13 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | |||
| 280 | unsigned long bus; | 259 | unsigned long bus; |
| 281 | 260 | ||
| 282 | if (!dev) | 261 | if (!dev) |
| 283 | dev = &fallback_dev; | 262 | dev = &x86_dma_fallback_dev; |
| 284 | 263 | ||
| 285 | if (!need_iommu(dev, paddr, size)) | 264 | if (!need_iommu(dev, paddr, size)) |
| 286 | return paddr; | 265 | return paddr; |
| 287 | 266 | ||
| 288 | bus = gart_map_simple(dev, paddr, size, dir); | 267 | bus = dma_map_area(dev, paddr, size, dir, 0); |
| 268 | flush_gart(); | ||
| 289 | 269 | ||
| 290 | return bus; | 270 | return bus; |
| 291 | } | 271 | } |
| @@ -305,7 +285,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
| 305 | return; | 285 | return; |
| 306 | 286 | ||
| 307 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | 287 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; |
| 308 | npages = to_pages(dma_addr, size); | 288 | npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); |
| 309 | for (i = 0; i < npages; i++) { | 289 | for (i = 0; i < npages; i++) { |
| 310 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | 290 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; |
| 311 | CLEAR_LEAK(iommu_page + i); | 291 | CLEAR_LEAK(iommu_page + i); |
| @@ -344,7 +324,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
| 344 | unsigned long addr = sg_phys(s); | 324 | unsigned long addr = sg_phys(s); |
| 345 | 325 | ||
| 346 | if (nonforced_iommu(dev, addr, s->length)) { | 326 | if (nonforced_iommu(dev, addr, s->length)) { |
| 347 | addr = dma_map_area(dev, addr, s->length, dir); | 327 | addr = dma_map_area(dev, addr, s->length, dir, 0); |
| 348 | if (addr == bad_dma_address) { | 328 | if (addr == bad_dma_address) { |
| 349 | if (i > 0) | 329 | if (i > 0) |
| 350 | gart_unmap_sg(dev, sg, i, dir); | 330 | gart_unmap_sg(dev, sg, i, dir); |
| @@ -366,7 +346,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
| 366 | int nelems, struct scatterlist *sout, | 346 | int nelems, struct scatterlist *sout, |
| 367 | unsigned long pages) | 347 | unsigned long pages) |
| 368 | { | 348 | { |
| 369 | unsigned long iommu_start = alloc_iommu(dev, pages); | 349 | unsigned long iommu_start = alloc_iommu(dev, pages, 0); |
| 370 | unsigned long iommu_page = iommu_start; | 350 | unsigned long iommu_page = iommu_start; |
| 371 | struct scatterlist *s; | 351 | struct scatterlist *s; |
| 372 | int i; | 352 | int i; |
| @@ -388,7 +368,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
| 388 | } | 368 | } |
| 389 | 369 | ||
| 390 | addr = phys_addr; | 370 | addr = phys_addr; |
| 391 | pages = to_pages(s->offset, s->length); | 371 | pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); |
| 392 | while (pages--) { | 372 | while (pages--) { |
| 393 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | 373 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); |
| 394 | SET_LEAK(iommu_page); | 374 | SET_LEAK(iommu_page); |
| @@ -431,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
| 431 | return 0; | 411 | return 0; |
| 432 | 412 | ||
| 433 | if (!dev) | 413 | if (!dev) |
| 434 | dev = &fallback_dev; | 414 | dev = &x86_dma_fallback_dev; |
| 435 | 415 | ||
| 436 | out = 0; | 416 | out = 0; |
| 437 | start = 0; | 417 | start = 0; |
| @@ -471,7 +451,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
| 471 | 451 | ||
| 472 | seg_size += s->length; | 452 | seg_size += s->length; |
| 473 | need = nextneed; | 453 | need = nextneed; |
| 474 | pages += to_pages(s->offset, s->length); | 454 | pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE); |
| 475 | ps = s; | 455 | ps = s; |
| 476 | } | 456 | } |
| 477 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) | 457 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) |
| @@ -503,6 +483,46 @@ error: | |||
| 503 | return 0; | 483 | return 0; |
| 504 | } | 484 | } |
| 505 | 485 | ||
| 486 | /* allocate and map a coherent mapping */ | ||
| 487 | static void * | ||
| 488 | gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | ||
| 489 | gfp_t flag) | ||
| 490 | { | ||
| 491 | dma_addr_t paddr; | ||
| 492 | unsigned long align_mask; | ||
| 493 | struct page *page; | ||
| 494 | |||
| 495 | if (force_iommu && !(flag & GFP_DMA)) { | ||
| 496 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
| 497 | page = alloc_pages(flag | __GFP_ZERO, get_order(size)); | ||
| 498 | if (!page) | ||
| 499 | return NULL; | ||
| 500 | |||
| 501 | align_mask = (1UL << get_order(size)) - 1; | ||
| 502 | paddr = dma_map_area(dev, page_to_phys(page), size, | ||
| 503 | DMA_BIDIRECTIONAL, align_mask); | ||
| 504 | |||
| 505 | flush_gart(); | ||
| 506 | if (paddr != bad_dma_address) { | ||
| 507 | *dma_addr = paddr; | ||
| 508 | return page_address(page); | ||
| 509 | } | ||
| 510 | __free_pages(page, get_order(size)); | ||
| 511 | } else | ||
| 512 | return dma_generic_alloc_coherent(dev, size, dma_addr, flag); | ||
| 513 | |||
| 514 | return NULL; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* free a coherent mapping */ | ||
| 518 | static void | ||
| 519 | gart_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
| 520 | dma_addr_t dma_addr) | ||
| 521 | { | ||
| 522 | gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); | ||
| 523 | free_pages((unsigned long)vaddr, get_order(size)); | ||
| 524 | } | ||
| 525 | |||
| 506 | static int no_agp; | 526 | static int no_agp; |
| 507 | 527 | ||
| 508 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 528 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
| @@ -630,7 +650,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 630 | struct pci_dev *dev; | 650 | struct pci_dev *dev; |
| 631 | void *gatt; | 651 | void *gatt; |
| 632 | int i, error; | 652 | int i, error; |
| 633 | unsigned long start_pfn, end_pfn; | ||
| 634 | 653 | ||
| 635 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 654 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
| 636 | aper_size = aper_base = info->aper_size = 0; | 655 | aper_size = aper_base = info->aper_size = 0; |
| @@ -654,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 654 | info->aper_size = aper_size >> 20; | 673 | info->aper_size = aper_size >> 20; |
| 655 | 674 | ||
| 656 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); | 675 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); |
| 657 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); | 676 | gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 677 | get_order(gatt_size)); | ||
| 658 | if (!gatt) | 678 | if (!gatt) |
| 659 | panic("Cannot allocate GATT table"); | 679 | panic("Cannot allocate GATT table"); |
| 660 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) | 680 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) |
| 661 | panic("Could not set GART PTEs to uncacheable pages"); | 681 | panic("Could not set GART PTEs to uncacheable pages"); |
| 662 | 682 | ||
| 663 | memset(gatt, 0, gatt_size); | ||
| 664 | agp_gatt_table = gatt; | 683 | agp_gatt_table = gatt; |
| 665 | 684 | ||
| 666 | enable_gart_translations(); | 685 | enable_gart_translations(); |
| @@ -669,19 +688,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 669 | if (!error) | 688 | if (!error) |
| 670 | error = sysdev_register(&device_gart); | 689 | error = sysdev_register(&device_gart); |
| 671 | if (error) | 690 | if (error) |
| 672 | panic("Could not register gart_sysdev -- would corrupt data on next suspend"); | 691 | panic("Could not register gart_sysdev -- " |
| 692 | "would corrupt data on next suspend"); | ||
| 673 | 693 | ||
| 674 | flush_gart(); | 694 | flush_gart(); |
| 675 | 695 | ||
| 676 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 696 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", |
| 677 | aper_base, aper_size>>10); | 697 | aper_base, aper_size>>10); |
| 678 | 698 | ||
| 679 | /* need to map that range */ | ||
| 680 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | ||
| 681 | if (end_pfn > max_low_pfn_mapped) { | ||
| 682 | start_pfn = (aper_base>>PAGE_SHIFT); | ||
| 683 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
| 684 | } | ||
| 685 | return 0; | 699 | return 0; |
| 686 | 700 | ||
| 687 | nommu: | 701 | nommu: |
| @@ -691,21 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 691 | return -1; | 705 | return -1; |
| 692 | } | 706 | } |
| 693 | 707 | ||
| 694 | extern int agp_amd64_init(void); | 708 | static struct dma_mapping_ops gart_dma_ops = { |
| 695 | |||
| 696 | static const struct dma_mapping_ops gart_dma_ops = { | ||
| 697 | .mapping_error = NULL, | ||
| 698 | .map_single = gart_map_single, | 709 | .map_single = gart_map_single, |
| 699 | .map_simple = gart_map_simple, | ||
| 700 | .unmap_single = gart_unmap_single, | 710 | .unmap_single = gart_unmap_single, |
| 701 | .sync_single_for_cpu = NULL, | ||
| 702 | .sync_single_for_device = NULL, | ||
| 703 | .sync_single_range_for_cpu = NULL, | ||
| 704 | .sync_single_range_for_device = NULL, | ||
| 705 | .sync_sg_for_cpu = NULL, | ||
| 706 | .sync_sg_for_device = NULL, | ||
| 707 | .map_sg = gart_map_sg, | 711 | .map_sg = gart_map_sg, |
| 708 | .unmap_sg = gart_unmap_sg, | 712 | .unmap_sg = gart_unmap_sg, |
| 713 | .alloc_coherent = gart_alloc_coherent, | ||
| 714 | .free_coherent = gart_free_coherent, | ||
| 709 | }; | 715 | }; |
| 710 | 716 | ||
| 711 | void gart_iommu_shutdown(void) | 717 | void gart_iommu_shutdown(void) |
| @@ -732,7 +738,8 @@ void __init gart_iommu_init(void) | |||
| 732 | { | 738 | { |
| 733 | struct agp_kern_info info; | 739 | struct agp_kern_info info; |
| 734 | unsigned long iommu_start; | 740 | unsigned long iommu_start; |
| 735 | unsigned long aper_size; | 741 | unsigned long aper_base, aper_size; |
| 742 | unsigned long start_pfn, end_pfn; | ||
| 736 | unsigned long scratch; | 743 | unsigned long scratch; |
| 737 | long i; | 744 | long i; |
| 738 | 745 | ||
| @@ -764,30 +771,35 @@ void __init gart_iommu_init(void) | |||
| 764 | (no_agp && init_k8_gatt(&info) < 0)) { | 771 | (no_agp && init_k8_gatt(&info) < 0)) { |
| 765 | if (max_pfn > MAX_DMA32_PFN) { | 772 | if (max_pfn > MAX_DMA32_PFN) { |
| 766 | printk(KERN_WARNING "More than 4GB of memory " | 773 | printk(KERN_WARNING "More than 4GB of memory " |
| 767 | "but GART IOMMU not available.\n" | 774 | "but GART IOMMU not available.\n"); |
| 768 | KERN_WARNING "falling back to iommu=soft.\n"); | 775 | printk(KERN_WARNING "falling back to iommu=soft.\n"); |
| 769 | } | 776 | } |
| 770 | return; | 777 | return; |
| 771 | } | 778 | } |
| 772 | 779 | ||
| 780 | /* need to map that range */ | ||
| 781 | aper_size = info.aper_size << 20; | ||
| 782 | aper_base = info.aper_base; | ||
| 783 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | ||
| 784 | if (end_pfn > max_low_pfn_mapped) { | ||
| 785 | start_pfn = (aper_base>>PAGE_SHIFT); | ||
| 786 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
| 787 | } | ||
| 788 | |||
| 773 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 789 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); |
| 774 | aper_size = info.aper_size * 1024 * 1024; | ||
| 775 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 790 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
| 776 | iommu_pages = iommu_size >> PAGE_SHIFT; | 791 | iommu_pages = iommu_size >> PAGE_SHIFT; |
| 777 | 792 | ||
| 778 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, | 793 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, |
| 779 | get_order(iommu_pages/8)); | 794 | get_order(iommu_pages/8)); |
| 780 | if (!iommu_gart_bitmap) | 795 | if (!iommu_gart_bitmap) |
| 781 | panic("Cannot allocate iommu bitmap\n"); | 796 | panic("Cannot allocate iommu bitmap\n"); |
| 782 | memset(iommu_gart_bitmap, 0, iommu_pages/8); | ||
| 783 | 797 | ||
| 784 | #ifdef CONFIG_IOMMU_LEAK | 798 | #ifdef CONFIG_IOMMU_LEAK |
| 785 | if (leak_trace) { | 799 | if (leak_trace) { |
| 786 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, | 800 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, |
| 787 | get_order(iommu_pages*sizeof(void *))); | 801 | get_order(iommu_pages*sizeof(void *))); |
| 788 | if (iommu_leak_tab) | 802 | if (!iommu_leak_tab) |
| 789 | memset(iommu_leak_tab, 0, iommu_pages * 8); | ||
| 790 | else | ||
| 791 | printk(KERN_DEBUG | 803 | printk(KERN_DEBUG |
| 792 | "PCI-DMA: Cannot allocate leak trace area\n"); | 804 | "PCI-DMA: Cannot allocate leak trace area\n"); |
| 793 | } | 805 | } |
| @@ -797,7 +809,7 @@ void __init gart_iommu_init(void) | |||
| 797 | * Out of IOMMU space handling. | 809 | * Out of IOMMU space handling. |
| 798 | * Reserve some invalid pages at the beginning of the GART. | 810 | * Reserve some invalid pages at the beginning of the GART. |
| 799 | */ | 811 | */ |
| 800 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 812 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
| 801 | 813 | ||
| 802 | agp_memory_reserved = iommu_size; | 814 | agp_memory_reserved = iommu_size; |
| 803 | printk(KERN_INFO | 815 | printk(KERN_INFO |
| @@ -855,7 +867,8 @@ void __init gart_parse_options(char *p) | |||
| 855 | if (!strncmp(p, "leak", 4)) { | 867 | if (!strncmp(p, "leak", 4)) { |
| 856 | leak_trace = 1; | 868 | leak_trace = 1; |
| 857 | p += 4; | 869 | p += 4; |
| 858 | if (*p == '=') ++p; | 870 | if (*p == '=') |
| 871 | ++p; | ||
| 859 | if (isdigit(*p) && get_option(&p, &arg)) | 872 | if (isdigit(*p) && get_option(&p, &arg)) |
| 860 | iommu_leak_pages = arg; | 873 | iommu_leak_pages = arg; |
| 861 | } | 874 | } |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index aec43d56f49c..c70ab5a5d4c8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
| @@ -7,14 +7,14 @@ | |||
| 7 | #include <linux/dma-mapping.h> | 7 | #include <linux/dma-mapping.h> |
| 8 | #include <linux/scatterlist.h> | 8 | #include <linux/scatterlist.h> |
| 9 | 9 | ||
| 10 | #include <asm/gart.h> | 10 | #include <asm/iommu.h> |
| 11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
| 12 | #include <asm/dma.h> | 12 | #include <asm/dma.h> |
| 13 | 13 | ||
| 14 | static int | 14 | static int |
| 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
| 16 | { | 16 | { |
| 17 | if (hwdev && bus + size > *hwdev->dma_mask) { | 17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { |
| 18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) | 18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) |
| 19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
| 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
| @@ -72,21 +72,17 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
| 72 | return nents; | 72 | return nents; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | /* Make sure we keep the same behaviour */ | 75 | static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, |
| 76 | static int nommu_mapping_error(dma_addr_t dma_addr) | 76 | dma_addr_t dma_addr) |
| 77 | { | 77 | { |
| 78 | #ifdef CONFIG_X86_32 | 78 | free_pages((unsigned long)vaddr, get_order(size)); |
| 79 | return 0; | ||
| 80 | #else | ||
| 81 | return (dma_addr == bad_dma_address); | ||
| 82 | #endif | ||
| 83 | } | 79 | } |
| 84 | 80 | ||
| 85 | 81 | struct dma_mapping_ops nommu_dma_ops = { | |
| 86 | const struct dma_mapping_ops nommu_dma_ops = { | 82 | .alloc_coherent = dma_generic_alloc_coherent, |
| 83 | .free_coherent = nommu_free_coherent, | ||
| 87 | .map_single = nommu_map_single, | 84 | .map_single = nommu_map_single, |
| 88 | .map_sg = nommu_map_sg, | 85 | .map_sg = nommu_map_sg, |
| 89 | .mapping_error = nommu_mapping_error, | ||
| 90 | .is_phys = 1, | 86 | .is_phys = 1, |
| 91 | }; | 87 | }; |
| 92 | 88 | ||
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82299cd1d04d..c4ce0332759e 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/dma-mapping.h> | 6 | #include <linux/dma-mapping.h> |
| 7 | 7 | ||
| 8 | #include <asm/gart.h> | 8 | #include <asm/iommu.h> |
| 9 | #include <asm/swiotlb.h> | 9 | #include <asm/swiotlb.h> |
| 10 | #include <asm/dma.h> | 10 | #include <asm/dma.h> |
| 11 | 11 | ||
| @@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | |||
| 18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); | 18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | const struct dma_mapping_ops swiotlb_dma_ops = { | 21 | struct dma_mapping_ops swiotlb_dma_ops = { |
| 22 | .mapping_error = swiotlb_dma_mapping_error, | 22 | .mapping_error = swiotlb_dma_mapping_error, |
| 23 | .alloc_coherent = swiotlb_alloc_coherent, | 23 | .alloc_coherent = swiotlb_alloc_coherent, |
| 24 | .free_coherent = swiotlb_free_coherent, | 24 | .free_coherent = swiotlb_free_coherent, |
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c index bc1f2d3ea277..a311ffcaad16 100644 --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c | |||
| @@ -1,20 +1,13 @@ | |||
| 1 | #include <linux/platform_device.h> | 1 | #include <linux/platform_device.h> |
| 2 | #include <linux/errno.h> | 2 | #include <linux/err.h> |
| 3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
| 4 | 4 | ||
| 5 | static __init int add_pcspkr(void) | 5 | static __init int add_pcspkr(void) |
| 6 | { | 6 | { |
| 7 | struct platform_device *pd; | 7 | struct platform_device *pd; |
| 8 | int ret; | ||
| 9 | 8 | ||
| 10 | pd = platform_device_alloc("pcspkr", -1); | 9 | pd = platform_device_register_simple("pcspkr", -1, NULL, 0); |
| 11 | if (!pd) | ||
| 12 | return -ENOMEM; | ||
| 13 | 10 | ||
| 14 | ret = platform_device_add(pd); | 11 | return IS_ERR(pd) ? PTR_ERR(pd) : 0; |
| 15 | if (ret) | ||
| 16 | platform_device_put(pd); | ||
| 17 | |||
| 18 | return ret; | ||
| 19 | } | 12 | } |
| 20 | device_initcall(add_pcspkr); | 13 | device_initcall(add_pcspkr); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4d629c62f4f8..c622772744d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -184,7 +184,8 @@ static void mwait_idle(void) | |||
| 184 | static void poll_idle(void) | 184 | static void poll_idle(void) |
| 185 | { | 185 | { |
| 186 | local_irq_enable(); | 186 | local_irq_enable(); |
| 187 | cpu_relax(); | 187 | while (!need_resched()) |
| 188 | cpu_relax(); | ||
| 188 | } | 189 | } |
| 189 | 190 | ||
| 190 | /* | 191 | /* |
| @@ -199,6 +200,7 @@ static void poll_idle(void) | |||
| 199 | * | 200 | * |
| 200 | * idle=mwait overrides this decision and forces the usage of mwait. | 201 | * idle=mwait overrides this decision and forces the usage of mwait. |
| 201 | */ | 202 | */ |
| 203 | static int __cpuinitdata force_mwait; | ||
| 202 | 204 | ||
| 203 | #define MWAIT_INFO 0x05 | 205 | #define MWAIT_INFO 0x05 |
| 204 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | 206 | #define MWAIT_ECX_EXTENDED_INFO 0x01 |
| @@ -244,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
| 244 | return 1; | 246 | return 1; |
| 245 | } | 247 | } |
| 246 | 248 | ||
| 249 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
| 250 | static int c1e_detected; | ||
| 251 | |||
| 252 | void c1e_remove_cpu(int cpu) | ||
| 253 | { | ||
| 254 | cpu_clear(cpu, c1e_mask); | ||
| 255 | } | ||
| 256 | |||
| 247 | /* | 257 | /* |
| 248 | * C1E aware idle routine. We check for C1E active in the interrupt | 258 | * C1E aware idle routine. We check for C1E active in the interrupt |
| 249 | * pending message MSR. If we detect C1E, then we handle it the same | 259 | * pending message MSR. If we detect C1E, then we handle it the same |
| @@ -251,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
| 251 | */ | 261 | */ |
| 252 | static void c1e_idle(void) | 262 | static void c1e_idle(void) |
| 253 | { | 263 | { |
| 254 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
| 255 | static int c1e_detected; | ||
| 256 | |||
| 257 | if (need_resched()) | 264 | if (need_resched()) |
| 258 | return; | 265 | return; |
| 259 | 266 | ||
| @@ -263,8 +270,10 @@ static void c1e_idle(void) | |||
| 263 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 270 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
| 264 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 271 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
| 265 | c1e_detected = 1; | 272 | c1e_detected = 1; |
| 266 | mark_tsc_unstable("TSC halt in C1E"); | 273 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
| 267 | printk(KERN_INFO "System has C1E enabled\n"); | 274 | mark_tsc_unstable("TSC halt in AMD C1E"); |
| 275 | printk(KERN_INFO "System has AMD C1E enabled\n"); | ||
| 276 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | ||
| 268 | } | 277 | } |
| 269 | } | 278 | } |
| 270 | 279 | ||
| @@ -326,6 +335,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
| 326 | 335 | ||
| 327 | static int __init idle_setup(char *str) | 336 | static int __init idle_setup(char *str) |
| 328 | { | 337 | { |
| 338 | if (!str) | ||
| 339 | return -EINVAL; | ||
| 340 | |||
| 329 | if (!strcmp(str, "poll")) { | 341 | if (!strcmp(str, "poll")) { |
| 330 | printk("using polling idle threads.\n"); | 342 | printk("using polling idle threads.\n"); |
| 331 | pm_idle = poll_idle; | 343 | pm_idle = poll_idle; |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb00..0a1302fe6d45 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/tick.h> | 37 | #include <linux/tick.h> |
| 38 | #include <linux/percpu.h> | 38 | #include <linux/percpu.h> |
| 39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
| 40 | #include <linux/dmi.h> | ||
| 40 | 41 | ||
| 41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
| 42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
| @@ -55,6 +56,9 @@ | |||
| 55 | #include <asm/tlbflush.h> | 56 | #include <asm/tlbflush.h> |
| 56 | #include <asm/cpu.h> | 57 | #include <asm/cpu.h> |
| 57 | #include <asm/kdebug.h> | 58 | #include <asm/kdebug.h> |
| 59 | #include <asm/idle.h> | ||
| 60 | #include <asm/syscalls.h> | ||
| 61 | #include <asm/smp.h> | ||
| 58 | 62 | ||
| 59 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| 60 | 64 | ||
| @@ -72,47 +76,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk) | |||
| 72 | return ((unsigned long *)tsk->thread.sp)[3]; | 76 | return ((unsigned long *)tsk->thread.sp)[3]; |
| 73 | } | 77 | } |
| 74 | 78 | ||
| 75 | #ifdef CONFIG_HOTPLUG_CPU | 79 | #ifndef CONFIG_SMP |
| 76 | #include <asm/nmi.h> | ||
| 77 | |||
| 78 | static void cpu_exit_clear(void) | ||
| 79 | { | ||
| 80 | int cpu = raw_smp_processor_id(); | ||
| 81 | |||
| 82 | idle_task_exit(); | ||
| 83 | |||
| 84 | cpu_uninit(); | ||
| 85 | irq_ctx_exit(cpu); | ||
| 86 | |||
| 87 | cpu_clear(cpu, cpu_callout_map); | ||
| 88 | cpu_clear(cpu, cpu_callin_map); | ||
| 89 | |||
| 90 | numa_remove_cpu(cpu); | ||
| 91 | } | ||
| 92 | |||
| 93 | /* We don't actually take CPU down, just spin without interrupts. */ | ||
| 94 | static inline void play_dead(void) | ||
| 95 | { | ||
| 96 | /* This must be done before dead CPU ack */ | ||
| 97 | cpu_exit_clear(); | ||
| 98 | wbinvd(); | ||
| 99 | mb(); | ||
| 100 | /* Ack it */ | ||
| 101 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
| 102 | |||
| 103 | /* | ||
| 104 | * With physical CPU hotplug, we should halt the cpu | ||
| 105 | */ | ||
| 106 | local_irq_disable(); | ||
| 107 | while (1) | ||
| 108 | halt(); | ||
| 109 | } | ||
| 110 | #else | ||
| 111 | static inline void play_dead(void) | 80 | static inline void play_dead(void) |
| 112 | { | 81 | { |
| 113 | BUG(); | 82 | BUG(); |
| 114 | } | 83 | } |
| 115 | #endif /* CONFIG_HOTPLUG_CPU */ | 84 | #endif |
| 116 | 85 | ||
| 117 | /* | 86 | /* |
| 118 | * The idle thread. There's no useful work to be | 87 | * The idle thread. There's no useful work to be |
| @@ -128,7 +97,7 @@ void cpu_idle(void) | |||
| 128 | 97 | ||
| 129 | /* endless idle loop with no priority at all */ | 98 | /* endless idle loop with no priority at all */ |
| 130 | while (1) { | 99 | while (1) { |
| 131 | tick_nohz_stop_sched_tick(); | 100 | tick_nohz_stop_sched_tick(1); |
| 132 | while (!need_resched()) { | 101 | while (!need_resched()) { |
| 133 | 102 | ||
| 134 | check_pgt_cache(); | 103 | check_pgt_cache(); |
| @@ -154,12 +123,13 @@ void cpu_idle(void) | |||
| 154 | } | 123 | } |
| 155 | } | 124 | } |
| 156 | 125 | ||
| 157 | void __show_registers(struct pt_regs *regs, int all) | 126 | void __show_regs(struct pt_regs *regs, int all) |
| 158 | { | 127 | { |
| 159 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; | 128 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; |
| 160 | unsigned long d0, d1, d2, d3, d6, d7; | 129 | unsigned long d0, d1, d2, d3, d6, d7; |
| 161 | unsigned long sp; | 130 | unsigned long sp; |
| 162 | unsigned short ss, gs; | 131 | unsigned short ss, gs; |
| 132 | const char *board; | ||
| 163 | 133 | ||
| 164 | if (user_mode_vm(regs)) { | 134 | if (user_mode_vm(regs)) { |
| 165 | sp = regs->sp; | 135 | sp = regs->sp; |
| @@ -172,11 +142,15 @@ void __show_registers(struct pt_regs *regs, int all) | |||
| 172 | } | 142 | } |
| 173 | 143 | ||
| 174 | printk("\n"); | 144 | printk("\n"); |
| 175 | printk("Pid: %d, comm: %s %s (%s %.*s)\n", | 145 | |
| 146 | board = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
| 147 | if (!board) | ||
| 148 | board = ""; | ||
| 149 | printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", | ||
| 176 | task_pid_nr(current), current->comm, | 150 | task_pid_nr(current), current->comm, |
| 177 | print_tainted(), init_utsname()->release, | 151 | print_tainted(), init_utsname()->release, |
| 178 | (int)strcspn(init_utsname()->version, " "), | 152 | (int)strcspn(init_utsname()->version, " "), |
| 179 | init_utsname()->version); | 153 | init_utsname()->version, board); |
| 180 | 154 | ||
| 181 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | 155 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", |
| 182 | (u16)regs->cs, regs->ip, regs->flags, | 156 | (u16)regs->cs, regs->ip, regs->flags, |
| @@ -215,7 +189,7 @@ void __show_registers(struct pt_regs *regs, int all) | |||
| 215 | 189 | ||
| 216 | void show_regs(struct pt_regs *regs) | 190 | void show_regs(struct pt_regs *regs) |
| 217 | { | 191 | { |
| 218 | __show_registers(regs, 1); | 192 | __show_regs(regs, 1); |
| 219 | show_trace(NULL, regs, ®s->sp, regs->bp); | 193 | show_trace(NULL, regs, ®s->sp, regs->bp); |
| 220 | } | 194 | } |
| 221 | 195 | ||
| @@ -276,6 +250,14 @@ void exit_thread(void) | |||
| 276 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 250 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
| 277 | put_cpu(); | 251 | put_cpu(); |
| 278 | } | 252 | } |
| 253 | #ifdef CONFIG_X86_DS | ||
| 254 | /* Free any DS contexts that have not been properly released. */ | ||
| 255 | if (unlikely(current->thread.ds_ctx)) { | ||
| 256 | /* we clear debugctl to make sure DS is not used. */ | ||
| 257 | update_debugctlmsr(0); | ||
| 258 | ds_free(current->thread.ds_ctx); | ||
| 259 | } | ||
| 260 | #endif /* CONFIG_X86_DS */ | ||
| 279 | } | 261 | } |
| 280 | 262 | ||
| 281 | void flush_thread(void) | 263 | void flush_thread(void) |
| @@ -437,6 +419,35 @@ int set_tsc_mode(unsigned int val) | |||
| 437 | return 0; | 419 | return 0; |
| 438 | } | 420 | } |
| 439 | 421 | ||
| 422 | #ifdef CONFIG_X86_DS | ||
| 423 | static int update_debugctl(struct thread_struct *prev, | ||
| 424 | struct thread_struct *next, unsigned long debugctl) | ||
| 425 | { | ||
| 426 | unsigned long ds_prev = 0; | ||
| 427 | unsigned long ds_next = 0; | ||
| 428 | |||
| 429 | if (prev->ds_ctx) | ||
| 430 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 431 | if (next->ds_ctx) | ||
| 432 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 433 | |||
| 434 | if (ds_next != ds_prev) { | ||
| 435 | /* we clear debugctl to make sure DS | ||
| 436 | * is not in use when we change it */ | ||
| 437 | debugctl = 0; | ||
| 438 | update_debugctlmsr(0); | ||
| 439 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
| 440 | } | ||
| 441 | return debugctl; | ||
| 442 | } | ||
| 443 | #else | ||
| 444 | static int update_debugctl(struct thread_struct *prev, | ||
| 445 | struct thread_struct *next, unsigned long debugctl) | ||
| 446 | { | ||
| 447 | return debugctl; | ||
| 448 | } | ||
| 449 | #endif /* CONFIG_X86_DS */ | ||
| 450 | |||
| 440 | static noinline void | 451 | static noinline void |
| 441 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 452 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
| 442 | struct tss_struct *tss) | 453 | struct tss_struct *tss) |
| @@ -447,14 +458,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 447 | prev = &prev_p->thread; | 458 | prev = &prev_p->thread; |
| 448 | next = &next_p->thread; | 459 | next = &next_p->thread; |
| 449 | 460 | ||
| 450 | debugctl = prev->debugctlmsr; | 461 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); |
| 451 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
| 452 | /* we clear debugctl to make sure DS | ||
| 453 | * is not in use when we change it */ | ||
| 454 | debugctl = 0; | ||
| 455 | update_debugctlmsr(0); | ||
| 456 | wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); | ||
| 457 | } | ||
| 458 | 462 | ||
| 459 | if (next->debugctlmsr != debugctl) | 463 | if (next->debugctlmsr != debugctl) |
| 460 | update_debugctlmsr(next->debugctlmsr); | 464 | update_debugctlmsr(next->debugctlmsr); |
| @@ -478,13 +482,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 478 | hard_enable_TSC(); | 482 | hard_enable_TSC(); |
| 479 | } | 483 | } |
| 480 | 484 | ||
| 481 | #ifdef X86_BTS | 485 | #ifdef CONFIG_X86_PTRACE_BTS |
| 482 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 486 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
| 483 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 487 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
| 484 | 488 | ||
| 485 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 489 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
| 486 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 490 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
| 487 | #endif | 491 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 488 | 492 | ||
| 489 | 493 | ||
| 490 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 494 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a8e53626ac9a..c958120fb1b6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -37,11 +37,11 @@ | |||
| 37 | #include <linux/kdebug.h> | 37 | #include <linux/kdebug.h> |
| 38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
| 39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
| 40 | #include <linux/uaccess.h> | ||
| 41 | #include <linux/io.h> | ||
| 40 | 42 | ||
| 41 | #include <asm/uaccess.h> | ||
| 42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
| 43 | #include <asm/system.h> | 44 | #include <asm/system.h> |
| 44 | #include <asm/io.h> | ||
| 45 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
| 46 | #include <asm/i387.h> | 46 | #include <asm/i387.h> |
| 47 | #include <asm/mmu_context.h> | 47 | #include <asm/mmu_context.h> |
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <asm/proto.h> | 51 | #include <asm/proto.h> |
| 52 | #include <asm/ia32.h> | 52 | #include <asm/ia32.h> |
| 53 | #include <asm/idle.h> | 53 | #include <asm/idle.h> |
| 54 | #include <asm/syscalls.h> | ||
| 54 | 55 | ||
| 55 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
| 56 | 57 | ||
| @@ -62,6 +63,13 @@ void idle_notifier_register(struct notifier_block *n) | |||
| 62 | { | 63 | { |
| 63 | atomic_notifier_chain_register(&idle_notifier, n); | 64 | atomic_notifier_chain_register(&idle_notifier, n); |
| 64 | } | 65 | } |
| 66 | EXPORT_SYMBOL_GPL(idle_notifier_register); | ||
| 67 | |||
| 68 | void idle_notifier_unregister(struct notifier_block *n) | ||
| 69 | { | ||
| 70 | atomic_notifier_chain_unregister(&idle_notifier, n); | ||
| 71 | } | ||
| 72 | EXPORT_SYMBOL_GPL(idle_notifier_unregister); | ||
| 65 | 73 | ||
| 66 | void enter_idle(void) | 74 | void enter_idle(void) |
| 67 | { | 75 | { |
| @@ -85,29 +93,12 @@ void exit_idle(void) | |||
| 85 | __exit_idle(); | 93 | __exit_idle(); |
| 86 | } | 94 | } |
| 87 | 95 | ||
| 88 | #ifdef CONFIG_HOTPLUG_CPU | 96 | #ifndef CONFIG_SMP |
| 89 | DECLARE_PER_CPU(int, cpu_state); | ||
| 90 | |||
| 91 | #include <asm/nmi.h> | ||
| 92 | /* We halt the CPU with physical CPU hotplug */ | ||
| 93 | static inline void play_dead(void) | ||
| 94 | { | ||
| 95 | idle_task_exit(); | ||
| 96 | wbinvd(); | ||
| 97 | mb(); | ||
| 98 | /* Ack it */ | ||
| 99 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
| 100 | |||
| 101 | local_irq_disable(); | ||
| 102 | while (1) | ||
| 103 | halt(); | ||
| 104 | } | ||
| 105 | #else | ||
| 106 | static inline void play_dead(void) | 97 | static inline void play_dead(void) |
| 107 | { | 98 | { |
| 108 | BUG(); | 99 | BUG(); |
| 109 | } | 100 | } |
| 110 | #endif /* CONFIG_HOTPLUG_CPU */ | 101 | #endif |
| 111 | 102 | ||
| 112 | /* | 103 | /* |
| 113 | * The idle thread. There's no useful work to be | 104 | * The idle thread. There's no useful work to be |
| @@ -120,7 +111,7 @@ void cpu_idle(void) | |||
| 120 | current_thread_info()->status |= TS_POLLING; | 111 | current_thread_info()->status |= TS_POLLING; |
| 121 | /* endless idle loop with no priority at all */ | 112 | /* endless idle loop with no priority at all */ |
| 122 | while (1) { | 113 | while (1) { |
| 123 | tick_nohz_stop_sched_tick(); | 114 | tick_nohz_stop_sched_tick(1); |
| 124 | while (!need_resched()) { | 115 | while (!need_resched()) { |
| 125 | 116 | ||
| 126 | rmb(); | 117 | rmb(); |
| @@ -152,7 +143,7 @@ void cpu_idle(void) | |||
| 152 | } | 143 | } |
| 153 | 144 | ||
| 154 | /* Prints also some state that isn't saved in the pt_regs */ | 145 | /* Prints also some state that isn't saved in the pt_regs */ |
| 155 | void __show_regs(struct pt_regs * regs) | 146 | void __show_regs(struct pt_regs *regs, int all) |
| 156 | { | 147 | { |
| 157 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; | 148 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
| 158 | unsigned long d0, d1, d2, d3, d6, d7; | 149 | unsigned long d0, d1, d2, d3, d6, d7; |
| @@ -161,60 +152,65 @@ void __show_regs(struct pt_regs * regs) | |||
| 161 | 152 | ||
| 162 | printk("\n"); | 153 | printk("\n"); |
| 163 | print_modules(); | 154 | print_modules(); |
| 164 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 155 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", |
| 165 | current->pid, current->comm, print_tainted(), | 156 | current->pid, current->comm, print_tainted(), |
| 166 | init_utsname()->release, | 157 | init_utsname()->release, |
| 167 | (int)strcspn(init_utsname()->version, " "), | 158 | (int)strcspn(init_utsname()->version, " "), |
| 168 | init_utsname()->version); | 159 | init_utsname()->version); |
| 169 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | 160 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
| 170 | printk_address(regs->ip, 1); | 161 | printk_address(regs->ip, 1); |
| 171 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, | 162 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, |
| 172 | regs->flags); | 163 | regs->sp, regs->flags); |
| 173 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 164 | printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
| 174 | regs->ax, regs->bx, regs->cx); | 165 | regs->ax, regs->bx, regs->cx); |
| 175 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | 166 | printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
| 176 | regs->dx, regs->si, regs->di); | 167 | regs->dx, regs->si, regs->di); |
| 177 | printk("RBP: %016lx R08: %016lx R09: %016lx\n", | 168 | printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", |
| 178 | regs->bp, regs->r8, regs->r9); | 169 | regs->bp, regs->r8, regs->r9); |
| 179 | printk("R10: %016lx R11: %016lx R12: %016lx\n", | 170 | printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", |
| 180 | regs->r10, regs->r11, regs->r12); | 171 | regs->r10, regs->r11, regs->r12); |
| 181 | printk("R13: %016lx R14: %016lx R15: %016lx\n", | 172 | printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", |
| 182 | regs->r13, regs->r14, regs->r15); | 173 | regs->r13, regs->r14, regs->r15); |
| 183 | 174 | ||
| 184 | asm("movl %%ds,%0" : "=r" (ds)); | 175 | asm("movl %%ds,%0" : "=r" (ds)); |
| 185 | asm("movl %%cs,%0" : "=r" (cs)); | 176 | asm("movl %%cs,%0" : "=r" (cs)); |
| 186 | asm("movl %%es,%0" : "=r" (es)); | 177 | asm("movl %%es,%0" : "=r" (es)); |
| 187 | asm("movl %%fs,%0" : "=r" (fsindex)); | 178 | asm("movl %%fs,%0" : "=r" (fsindex)); |
| 188 | asm("movl %%gs,%0" : "=r" (gsindex)); | 179 | asm("movl %%gs,%0" : "=r" (gsindex)); |
| 189 | 180 | ||
| 190 | rdmsrl(MSR_FS_BASE, fs); | 181 | rdmsrl(MSR_FS_BASE, fs); |
| 191 | rdmsrl(MSR_GS_BASE, gs); | 182 | rdmsrl(MSR_GS_BASE, gs); |
| 192 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); | 183 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); |
| 184 | |||
| 185 | if (!all) | ||
| 186 | return; | ||
| 193 | 187 | ||
| 194 | cr0 = read_cr0(); | 188 | cr0 = read_cr0(); |
| 195 | cr2 = read_cr2(); | 189 | cr2 = read_cr2(); |
| 196 | cr3 = read_cr3(); | 190 | cr3 = read_cr3(); |
| 197 | cr4 = read_cr4(); | 191 | cr4 = read_cr4(); |
| 198 | 192 | ||
| 199 | printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 193 | printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
| 200 | fs,fsindex,gs,gsindex,shadowgs); | 194 | fs, fsindex, gs, gsindex, shadowgs); |
| 201 | printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); | 195 | printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
| 202 | printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); | 196 | es, cr0); |
| 197 | printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, | ||
| 198 | cr4); | ||
| 203 | 199 | ||
| 204 | get_debugreg(d0, 0); | 200 | get_debugreg(d0, 0); |
| 205 | get_debugreg(d1, 1); | 201 | get_debugreg(d1, 1); |
| 206 | get_debugreg(d2, 2); | 202 | get_debugreg(d2, 2); |
| 207 | printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); | 203 | printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); |
| 208 | get_debugreg(d3, 3); | 204 | get_debugreg(d3, 3); |
| 209 | get_debugreg(d6, 6); | 205 | get_debugreg(d6, 6); |
| 210 | get_debugreg(d7, 7); | 206 | get_debugreg(d7, 7); |
| 211 | printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); | 207 | printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); |
| 212 | } | 208 | } |
| 213 | 209 | ||
| 214 | void show_regs(struct pt_regs *regs) | 210 | void show_regs(struct pt_regs *regs) |
| 215 | { | 211 | { |
| 216 | printk("CPU %d:", smp_processor_id()); | 212 | printk(KERN_INFO "CPU %d:", smp_processor_id()); |
| 217 | __show_regs(regs); | 213 | __show_regs(regs, 1); |
| 218 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | 214 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
| 219 | } | 215 | } |
| 220 | 216 | ||
| @@ -239,6 +235,14 @@ void exit_thread(void) | |||
| 239 | t->io_bitmap_max = 0; | 235 | t->io_bitmap_max = 0; |
| 240 | put_cpu(); | 236 | put_cpu(); |
| 241 | } | 237 | } |
| 238 | #ifdef CONFIG_X86_DS | ||
| 239 | /* Free any DS contexts that have not been properly released. */ | ||
| 240 | if (unlikely(t->ds_ctx)) { | ||
| 241 | /* we clear debugctl to make sure DS is not used. */ | ||
| 242 | update_debugctlmsr(0); | ||
| 243 | ds_free(t->ds_ctx); | ||
| 244 | } | ||
| 245 | #endif /* CONFIG_X86_DS */ | ||
| 242 | } | 246 | } |
| 243 | 247 | ||
| 244 | void flush_thread(void) | 248 | void flush_thread(void) |
| @@ -314,10 +318,10 @@ void prepare_to_copy(struct task_struct *tsk) | |||
| 314 | 318 | ||
| 315 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | 319 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, |
| 316 | unsigned long unused, | 320 | unsigned long unused, |
| 317 | struct task_struct * p, struct pt_regs * regs) | 321 | struct task_struct *p, struct pt_regs *regs) |
| 318 | { | 322 | { |
| 319 | int err; | 323 | int err; |
| 320 | struct pt_regs * childregs; | 324 | struct pt_regs *childregs; |
| 321 | struct task_struct *me = current; | 325 | struct task_struct *me = current; |
| 322 | 326 | ||
| 323 | childregs = ((struct pt_regs *) | 327 | childregs = ((struct pt_regs *) |
| @@ -362,10 +366,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
| 362 | if (test_thread_flag(TIF_IA32)) | 366 | if (test_thread_flag(TIF_IA32)) |
| 363 | err = do_set_thread_area(p, -1, | 367 | err = do_set_thread_area(p, -1, |
| 364 | (struct user_desc __user *)childregs->si, 0); | 368 | (struct user_desc __user *)childregs->si, 0); |
| 365 | else | 369 | else |
| 366 | #endif | 370 | #endif |
| 367 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); | 371 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); |
| 368 | if (err) | 372 | if (err) |
| 369 | goto out; | 373 | goto out; |
| 370 | } | 374 | } |
| 371 | err = 0; | 375 | err = 0; |
| @@ -472,13 +476,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 472 | next = &next_p->thread; | 476 | next = &next_p->thread; |
| 473 | 477 | ||
| 474 | debugctl = prev->debugctlmsr; | 478 | debugctl = prev->debugctlmsr; |
| 475 | if (next->ds_area_msr != prev->ds_area_msr) { | 479 | |
| 476 | /* we clear debugctl to make sure DS | 480 | #ifdef CONFIG_X86_DS |
| 477 | * is not in use when we change it */ | 481 | { |
| 478 | debugctl = 0; | 482 | unsigned long ds_prev = 0, ds_next = 0; |
| 479 | update_debugctlmsr(0); | 483 | |
| 480 | wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); | 484 | if (prev->ds_ctx) |
| 485 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 486 | if (next->ds_ctx) | ||
| 487 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 488 | |||
| 489 | if (ds_next != ds_prev) { | ||
| 490 | /* | ||
| 491 | * We clear debugctl to make sure DS | ||
| 492 | * is not in use when we change it: | ||
| 493 | */ | ||
| 494 | debugctl = 0; | ||
| 495 | update_debugctlmsr(0); | ||
| 496 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
| 497 | } | ||
| 481 | } | 498 | } |
| 499 | #endif /* CONFIG_X86_DS */ | ||
| 482 | 500 | ||
| 483 | if (next->debugctlmsr != debugctl) | 501 | if (next->debugctlmsr != debugctl) |
| 484 | update_debugctlmsr(next->debugctlmsr); | 502 | update_debugctlmsr(next->debugctlmsr); |
| @@ -516,13 +534,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 516 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 534 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
| 517 | } | 535 | } |
| 518 | 536 | ||
| 519 | #ifdef X86_BTS | 537 | #ifdef CONFIG_X86_PTRACE_BTS |
| 520 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 538 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
| 521 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 539 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
| 522 | 540 | ||
| 523 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 541 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
| 524 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 542 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
| 525 | #endif | 543 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 526 | } | 544 | } |
| 527 | 545 | ||
| 528 | /* | 546 | /* |
| @@ -537,14 +555,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 537 | struct task_struct * | 555 | struct task_struct * |
| 538 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 556 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
| 539 | { | 557 | { |
| 540 | struct thread_struct *prev = &prev_p->thread, | 558 | struct thread_struct *prev = &prev_p->thread; |
| 541 | *next = &next_p->thread; | 559 | struct thread_struct *next = &next_p->thread; |
| 542 | int cpu = smp_processor_id(); | 560 | int cpu = smp_processor_id(); |
| 543 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 561 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
| 544 | unsigned fsindex, gsindex; | 562 | unsigned fsindex, gsindex; |
| 545 | 563 | ||
| 546 | /* we're going to use this soon, after a few expensive things */ | 564 | /* we're going to use this soon, after a few expensive things */ |
| 547 | if (next_p->fpu_counter>5) | 565 | if (next_p->fpu_counter > 5) |
| 548 | prefetch(next->xstate); | 566 | prefetch(next->xstate); |
| 549 | 567 | ||
| 550 | /* | 568 | /* |
| @@ -552,13 +570,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 552 | */ | 570 | */ |
| 553 | load_sp0(tss, next); | 571 | load_sp0(tss, next); |
| 554 | 572 | ||
| 555 | /* | 573 | /* |
| 556 | * Switch DS and ES. | 574 | * Switch DS and ES. |
| 557 | * This won't pick up thread selector changes, but I guess that is ok. | 575 | * This won't pick up thread selector changes, but I guess that is ok. |
| 558 | */ | 576 | */ |
| 559 | savesegment(es, prev->es); | 577 | savesegment(es, prev->es); |
| 560 | if (unlikely(next->es | prev->es)) | 578 | if (unlikely(next->es | prev->es)) |
| 561 | loadsegment(es, next->es); | 579 | loadsegment(es, next->es); |
| 562 | 580 | ||
| 563 | savesegment(ds, prev->ds); | 581 | savesegment(ds, prev->ds); |
| 564 | if (unlikely(next->ds | prev->ds)) | 582 | if (unlikely(next->ds | prev->ds)) |
| @@ -584,50 +602,50 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 584 | */ | 602 | */ |
| 585 | arch_leave_lazy_cpu_mode(); | 603 | arch_leave_lazy_cpu_mode(); |
| 586 | 604 | ||
| 587 | /* | 605 | /* |
| 588 | * Switch FS and GS. | 606 | * Switch FS and GS. |
| 607 | * | ||
| 608 | * Segment register != 0 always requires a reload. Also | ||
| 609 | * reload when it has changed. When prev process used 64bit | ||
| 610 | * base always reload to avoid an information leak. | ||
| 589 | */ | 611 | */ |
| 590 | { | 612 | if (unlikely(fsindex | next->fsindex | prev->fs)) { |
| 591 | /* segment register != 0 always requires a reload. | 613 | loadsegment(fs, next->fsindex); |
| 592 | also reload when it has changed. | 614 | /* |
| 593 | when prev process used 64bit base always reload | 615 | * Check if the user used a selector != 0; if yes |
| 594 | to avoid an information leak. */ | 616 | * clear 64bit base, since overloaded base is always |
| 595 | if (unlikely(fsindex | next->fsindex | prev->fs)) { | 617 | * mapped to the Null selector |
| 596 | loadsegment(fs, next->fsindex); | 618 | */ |
| 597 | /* check if the user used a selector != 0 | 619 | if (fsindex) |
| 598 | * if yes clear 64bit base, since overloaded base | 620 | prev->fs = 0; |
| 599 | * is always mapped to the Null selector | 621 | } |
| 600 | */ | 622 | /* when next process has a 64bit base use it */ |
| 601 | if (fsindex) | 623 | if (next->fs) |
| 602 | prev->fs = 0; | 624 | wrmsrl(MSR_FS_BASE, next->fs); |
| 603 | } | 625 | prev->fsindex = fsindex; |
| 604 | /* when next process has a 64bit base use it */ | 626 | |
| 605 | if (next->fs) | 627 | if (unlikely(gsindex | next->gsindex | prev->gs)) { |
| 606 | wrmsrl(MSR_FS_BASE, next->fs); | 628 | load_gs_index(next->gsindex); |
| 607 | prev->fsindex = fsindex; | 629 | if (gsindex) |
| 608 | 630 | prev->gs = 0; | |
| 609 | if (unlikely(gsindex | next->gsindex | prev->gs)) { | ||
| 610 | load_gs_index(next->gsindex); | ||
| 611 | if (gsindex) | ||
| 612 | prev->gs = 0; | ||
| 613 | } | ||
| 614 | if (next->gs) | ||
| 615 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
| 616 | prev->gsindex = gsindex; | ||
| 617 | } | 631 | } |
| 632 | if (next->gs) | ||
| 633 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
| 634 | prev->gsindex = gsindex; | ||
| 618 | 635 | ||
| 619 | /* Must be after DS reload */ | 636 | /* Must be after DS reload */ |
| 620 | unlazy_fpu(prev_p); | 637 | unlazy_fpu(prev_p); |
| 621 | 638 | ||
| 622 | /* | 639 | /* |
| 623 | * Switch the PDA and FPU contexts. | 640 | * Switch the PDA and FPU contexts. |
| 624 | */ | 641 | */ |
| 625 | prev->usersp = read_pda(oldrsp); | 642 | prev->usersp = read_pda(oldrsp); |
| 626 | write_pda(oldrsp, next->usersp); | 643 | write_pda(oldrsp, next->usersp); |
| 627 | write_pda(pcurrent, next_p); | 644 | write_pda(pcurrent, next_p); |
| 628 | 645 | ||
| 629 | write_pda(kernelstack, | 646 | write_pda(kernelstack, |
| 630 | (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); | 647 | (unsigned long)task_stack_page(next_p) + |
| 648 | THREAD_SIZE - PDA_STACKOFFSET); | ||
| 631 | #ifdef CONFIG_CC_STACKPROTECTOR | 649 | #ifdef CONFIG_CC_STACKPROTECTOR |
| 632 | write_pda(stack_canary, next_p->stack_canary); | 650 | write_pda(stack_canary, next_p->stack_canary); |
| 633 | /* | 651 | /* |
| @@ -664,7 +682,7 @@ long sys_execve(char __user *name, char __user * __user *argv, | |||
| 664 | char __user * __user *envp, struct pt_regs *regs) | 682 | char __user * __user *envp, struct pt_regs *regs) |
| 665 | { | 683 | { |
| 666 | long error; | 684 | long error; |
| 667 | char * filename; | 685 | char *filename; |
| 668 | 686 | ||
| 669 | filename = getname(name); | 687 | filename = getname(name); |
| 670 | error = PTR_ERR(filename); | 688 | error = PTR_ERR(filename); |
| @@ -722,55 +740,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs) | |||
| 722 | unsigned long get_wchan(struct task_struct *p) | 740 | unsigned long get_wchan(struct task_struct *p) |
| 723 | { | 741 | { |
| 724 | unsigned long stack; | 742 | unsigned long stack; |
| 725 | u64 fp,ip; | 743 | u64 fp, ip; |
| 726 | int count = 0; | 744 | int count = 0; |
| 727 | 745 | ||
| 728 | if (!p || p == current || p->state==TASK_RUNNING) | 746 | if (!p || p == current || p->state == TASK_RUNNING) |
| 729 | return 0; | 747 | return 0; |
| 730 | stack = (unsigned long)task_stack_page(p); | 748 | stack = (unsigned long)task_stack_page(p); |
| 731 | if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) | 749 | if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) |
| 732 | return 0; | 750 | return 0; |
| 733 | fp = *(u64 *)(p->thread.sp); | 751 | fp = *(u64 *)(p->thread.sp); |
| 734 | do { | 752 | do { |
| 735 | if (fp < (unsigned long)stack || | 753 | if (fp < (unsigned long)stack || |
| 736 | fp > (unsigned long)stack+THREAD_SIZE) | 754 | fp >= (unsigned long)stack+THREAD_SIZE) |
| 737 | return 0; | 755 | return 0; |
| 738 | ip = *(u64 *)(fp+8); | 756 | ip = *(u64 *)(fp+8); |
| 739 | if (!in_sched_functions(ip)) | 757 | if (!in_sched_functions(ip)) |
| 740 | return ip; | 758 | return ip; |
| 741 | fp = *(u64 *)fp; | 759 | fp = *(u64 *)fp; |
| 742 | } while (count++ < 16); | 760 | } while (count++ < 16); |
| 743 | return 0; | 761 | return 0; |
| 744 | } | 762 | } |
| 745 | 763 | ||
| 746 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | 764 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) |
| 747 | { | 765 | { |
| 748 | int ret = 0; | 766 | int ret = 0; |
| 749 | int doit = task == current; | 767 | int doit = task == current; |
| 750 | int cpu; | 768 | int cpu; |
| 751 | 769 | ||
| 752 | switch (code) { | 770 | switch (code) { |
| 753 | case ARCH_SET_GS: | 771 | case ARCH_SET_GS: |
| 754 | if (addr >= TASK_SIZE_OF(task)) | 772 | if (addr >= TASK_SIZE_OF(task)) |
| 755 | return -EPERM; | 773 | return -EPERM; |
| 756 | cpu = get_cpu(); | 774 | cpu = get_cpu(); |
| 757 | /* handle small bases via the GDT because that's faster to | 775 | /* handle small bases via the GDT because that's faster to |
| 758 | switch. */ | 776 | switch. */ |
| 759 | if (addr <= 0xffffffff) { | 777 | if (addr <= 0xffffffff) { |
| 760 | set_32bit_tls(task, GS_TLS, addr); | 778 | set_32bit_tls(task, GS_TLS, addr); |
| 761 | if (doit) { | 779 | if (doit) { |
| 762 | load_TLS(&task->thread, cpu); | 780 | load_TLS(&task->thread, cpu); |
| 763 | load_gs_index(GS_TLS_SEL); | 781 | load_gs_index(GS_TLS_SEL); |
| 764 | } | 782 | } |
| 765 | task->thread.gsindex = GS_TLS_SEL; | 783 | task->thread.gsindex = GS_TLS_SEL; |
| 766 | task->thread.gs = 0; | 784 | task->thread.gs = 0; |
| 767 | } else { | 785 | } else { |
| 768 | task->thread.gsindex = 0; | 786 | task->thread.gsindex = 0; |
| 769 | task->thread.gs = addr; | 787 | task->thread.gs = addr; |
| 770 | if (doit) { | 788 | if (doit) { |
| 771 | load_gs_index(0); | 789 | load_gs_index(0); |
| 772 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 790 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); |
| 773 | } | 791 | } |
| 774 | } | 792 | } |
| 775 | put_cpu(); | 793 | put_cpu(); |
| 776 | break; | 794 | break; |
| @@ -824,8 +842,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
| 824 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 842 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
| 825 | else | 843 | else |
| 826 | base = task->thread.gs; | 844 | base = task->thread.gs; |
| 827 | } | 845 | } else |
| 828 | else | ||
| 829 | base = task->thread.gs; | 846 | base = task->thread.gs; |
| 830 | ret = put_user(base, (unsigned long __user *)addr); | 847 | ret = put_user(base, (unsigned long __user *)addr); |
| 831 | break; | 848 | break; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 77040b6070e1..0a6d8c12e10d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
| 15 | #include <linux/ptrace.h> | 15 | #include <linux/ptrace.h> |
| 16 | #include <linux/regset.h> | 16 | #include <linux/regset.h> |
| 17 | #include <linux/tracehook.h> | ||
| 17 | #include <linux/user.h> | 18 | #include <linux/user.h> |
| 18 | #include <linux/elf.h> | 19 | #include <linux/elf.h> |
| 19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
| @@ -39,7 +40,9 @@ enum x86_regset { | |||
| 39 | REGSET_GENERAL, | 40 | REGSET_GENERAL, |
| 40 | REGSET_FP, | 41 | REGSET_FP, |
| 41 | REGSET_XFP, | 42 | REGSET_XFP, |
| 43 | REGSET_IOPERM64 = REGSET_XFP, | ||
| 42 | REGSET_TLS, | 44 | REGSET_TLS, |
| 45 | REGSET_IOPERM32, | ||
| 43 | }; | 46 | }; |
| 44 | 47 | ||
| 45 | /* | 48 | /* |
| @@ -69,7 +72,7 @@ static inline bool invalid_selector(u16 value) | |||
| 69 | 72 | ||
| 70 | #define FLAG_MASK FLAG_MASK_32 | 73 | #define FLAG_MASK FLAG_MASK_32 |
| 71 | 74 | ||
| 72 | static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) | 75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) |
| 73 | { | 76 | { |
| 74 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); | 77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); |
| 75 | regno >>= 2; | 78 | regno >>= 2; |
| @@ -554,45 +557,138 @@ static int ptrace_set_debugreg(struct task_struct *child, | |||
| 554 | return 0; | 557 | return 0; |
| 555 | } | 558 | } |
| 556 | 559 | ||
| 557 | #ifdef X86_BTS | 560 | /* |
| 561 | * These access the current or another (stopped) task's io permission | ||
| 562 | * bitmap for debugging or core dump. | ||
| 563 | */ | ||
| 564 | static int ioperm_active(struct task_struct *target, | ||
| 565 | const struct user_regset *regset) | ||
| 566 | { | ||
| 567 | return target->thread.io_bitmap_max / regset->size; | ||
| 568 | } | ||
| 558 | 569 | ||
| 559 | static int ptrace_bts_get_size(struct task_struct *child) | 570 | static int ioperm_get(struct task_struct *target, |
| 571 | const struct user_regset *regset, | ||
| 572 | unsigned int pos, unsigned int count, | ||
| 573 | void *kbuf, void __user *ubuf) | ||
| 560 | { | 574 | { |
| 561 | if (!child->thread.ds_area_msr) | 575 | if (!target->thread.io_bitmap_ptr) |
| 562 | return -ENXIO; | 576 | return -ENXIO; |
| 563 | 577 | ||
| 564 | return ds_get_bts_index((void *)child->thread.ds_area_msr); | 578 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
| 579 | target->thread.io_bitmap_ptr, | ||
| 580 | 0, IO_BITMAP_BYTES); | ||
| 581 | } | ||
| 582 | |||
| 583 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 584 | /* | ||
| 585 | * The configuration for a particular BTS hardware implementation. | ||
| 586 | */ | ||
| 587 | struct bts_configuration { | ||
| 588 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
| 589 | unsigned char sizeof_bts; | ||
| 590 | /* the size of a field in the BTS record in bytes */ | ||
| 591 | unsigned char sizeof_field; | ||
| 592 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
| 593 | unsigned long debugctl_mask; | ||
| 594 | }; | ||
| 595 | static struct bts_configuration bts_cfg; | ||
| 596 | |||
| 597 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
| 598 | |||
| 599 | |||
| 600 | /* | ||
| 601 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 602 | * architectures vary in the size of those fields. | ||
| 603 | * - source linear address | ||
| 604 | * - destination linear address | ||
| 605 | * - flags | ||
| 606 | * | ||
| 607 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 608 | * architectures use 32bit pointers in 32bit mode. | ||
| 609 | * | ||
| 610 | * We compute the base address for the first 8 fields based on: | ||
| 611 | * - the field size stored in the DS configuration | ||
| 612 | * - the relative field position | ||
| 613 | * | ||
| 614 | * In order to store additional information in the BTS buffer, we use | ||
| 615 | * a special source address to indicate that the record requires | ||
| 616 | * special interpretation. | ||
| 617 | * | ||
| 618 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 619 | * was predicted; this is ignored. | ||
| 620 | */ | ||
| 621 | |||
| 622 | enum bts_field { | ||
| 623 | bts_from = 0, | ||
| 624 | bts_to, | ||
| 625 | bts_flags, | ||
| 626 | |||
| 627 | bts_escape = (unsigned long)-1, | ||
| 628 | bts_qual = bts_to, | ||
| 629 | bts_jiffies = bts_flags | ||
| 630 | }; | ||
| 631 | |||
| 632 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
| 633 | { | ||
| 634 | base += (bts_cfg.sizeof_field * field); | ||
| 635 | return *(unsigned long *)base; | ||
| 565 | } | 636 | } |
| 566 | 637 | ||
| 567 | static int ptrace_bts_read_record(struct task_struct *child, | 638 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
| 568 | long index, | 639 | { |
| 640 | base += (bts_cfg.sizeof_field * field);; | ||
| 641 | (*(unsigned long *)base) = val; | ||
| 642 | } | ||
| 643 | |||
| 644 | /* | ||
| 645 | * Translate a BTS record from the raw format into the bts_struct format | ||
| 646 | * | ||
| 647 | * out (out): bts_struct interpretation | ||
| 648 | * raw: raw BTS record | ||
| 649 | */ | ||
| 650 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
| 651 | { | ||
| 652 | memset(out, 0, sizeof(*out)); | ||
| 653 | if (bts_get(raw, bts_from) == bts_escape) { | ||
| 654 | out->qualifier = bts_get(raw, bts_qual); | ||
| 655 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
| 656 | } else { | ||
| 657 | out->qualifier = BTS_BRANCH; | ||
| 658 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
| 659 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
| 660 | } | ||
| 661 | } | ||
| 662 | |||
| 663 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | ||
| 569 | struct bts_struct __user *out) | 664 | struct bts_struct __user *out) |
| 570 | { | 665 | { |
| 571 | struct bts_struct ret; | 666 | struct bts_struct ret; |
| 572 | int retval; | 667 | const void *bts_record; |
| 573 | int bts_end; | 668 | size_t bts_index, bts_end; |
| 574 | int bts_index; | 669 | int error; |
| 575 | 670 | ||
| 576 | if (!child->thread.ds_area_msr) | 671 | error = ds_get_bts_end(child, &bts_end); |
| 577 | return -ENXIO; | 672 | if (error < 0) |
| 673 | return error; | ||
| 578 | 674 | ||
| 579 | if (index < 0) | ||
| 580 | return -EINVAL; | ||
| 581 | |||
| 582 | bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); | ||
| 583 | if (bts_end <= index) | 675 | if (bts_end <= index) |
| 584 | return -EINVAL; | 676 | return -EINVAL; |
| 585 | 677 | ||
| 678 | error = ds_get_bts_index(child, &bts_index); | ||
| 679 | if (error < 0) | ||
| 680 | return error; | ||
| 681 | |||
| 586 | /* translate the ptrace bts index into the ds bts index */ | 682 | /* translate the ptrace bts index into the ds bts index */ |
| 587 | bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); | 683 | bts_index += bts_end - (index + 1); |
| 588 | bts_index -= (index + 1); | 684 | if (bts_end <= bts_index) |
| 589 | if (bts_index < 0) | 685 | bts_index -= bts_end; |
| 590 | bts_index += bts_end; | 686 | |
| 687 | error = ds_access_bts(child, bts_index, &bts_record); | ||
| 688 | if (error < 0) | ||
| 689 | return error; | ||
| 591 | 690 | ||
| 592 | retval = ds_read_bts((void *)child->thread.ds_area_msr, | 691 | ptrace_bts_translate_record(&ret, bts_record); |
| 593 | bts_index, &ret); | ||
| 594 | if (retval < 0) | ||
| 595 | return retval; | ||
| 596 | 692 | ||
| 597 | if (copy_to_user(out, &ret, sizeof(ret))) | 693 | if (copy_to_user(out, &ret, sizeof(ret))) |
| 598 | return -EFAULT; | 694 | return -EFAULT; |
| @@ -600,101 +696,106 @@ static int ptrace_bts_read_record(struct task_struct *child, | |||
| 600 | return sizeof(ret); | 696 | return sizeof(ret); |
| 601 | } | 697 | } |
| 602 | 698 | ||
| 603 | static int ptrace_bts_clear(struct task_struct *child) | ||
| 604 | { | ||
| 605 | if (!child->thread.ds_area_msr) | ||
| 606 | return -ENXIO; | ||
| 607 | |||
| 608 | return ds_clear((void *)child->thread.ds_area_msr); | ||
| 609 | } | ||
| 610 | |||
| 611 | static int ptrace_bts_drain(struct task_struct *child, | 699 | static int ptrace_bts_drain(struct task_struct *child, |
| 612 | long size, | 700 | long size, |
| 613 | struct bts_struct __user *out) | 701 | struct bts_struct __user *out) |
| 614 | { | 702 | { |
| 615 | int end, i; | 703 | struct bts_struct ret; |
| 616 | void *ds = (void *)child->thread.ds_area_msr; | 704 | const unsigned char *raw; |
| 617 | 705 | size_t end, i; | |
| 618 | if (!ds) | 706 | int error; |
| 619 | return -ENXIO; | ||
| 620 | 707 | ||
| 621 | end = ds_get_bts_index(ds); | 708 | error = ds_get_bts_index(child, &end); |
| 622 | if (end <= 0) | 709 | if (error < 0) |
| 623 | return end; | 710 | return error; |
| 624 | 711 | ||
| 625 | if (size < (end * sizeof(struct bts_struct))) | 712 | if (size < (end * sizeof(struct bts_struct))) |
| 626 | return -EIO; | 713 | return -EIO; |
| 627 | 714 | ||
| 628 | for (i = 0; i < end; i++, out++) { | 715 | error = ds_access_bts(child, 0, (const void **)&raw); |
| 629 | struct bts_struct ret; | 716 | if (error < 0) |
| 630 | int retval; | 717 | return error; |
| 631 | 718 | ||
| 632 | retval = ds_read_bts(ds, i, &ret); | 719 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { |
| 633 | if (retval < 0) | 720 | ptrace_bts_translate_record(&ret, raw); |
| 634 | return retval; | ||
| 635 | 721 | ||
| 636 | if (copy_to_user(out, &ret, sizeof(ret))) | 722 | if (copy_to_user(out, &ret, sizeof(ret))) |
| 637 | return -EFAULT; | 723 | return -EFAULT; |
| 638 | } | 724 | } |
| 639 | 725 | ||
| 640 | ds_clear(ds); | 726 | error = ds_clear_bts(child); |
| 727 | if (error < 0) | ||
| 728 | return error; | ||
| 641 | 729 | ||
| 642 | return end; | 730 | return end; |
| 643 | } | 731 | } |
| 644 | 732 | ||
| 733 | static void ptrace_bts_ovfl(struct task_struct *child) | ||
| 734 | { | ||
| 735 | send_sig(child->thread.bts_ovfl_signal, child, 0); | ||
| 736 | } | ||
| 737 | |||
| 645 | static int ptrace_bts_config(struct task_struct *child, | 738 | static int ptrace_bts_config(struct task_struct *child, |
| 646 | long cfg_size, | 739 | long cfg_size, |
| 647 | const struct ptrace_bts_config __user *ucfg) | 740 | const struct ptrace_bts_config __user *ucfg) |
| 648 | { | 741 | { |
| 649 | struct ptrace_bts_config cfg; | 742 | struct ptrace_bts_config cfg; |
| 650 | int bts_size, ret = 0; | 743 | int error = 0; |
| 651 | void *ds; | ||
| 652 | 744 | ||
| 745 | error = -EOPNOTSUPP; | ||
| 746 | if (!bts_cfg.sizeof_bts) | ||
| 747 | goto errout; | ||
| 748 | |||
| 749 | error = -EIO; | ||
| 653 | if (cfg_size < sizeof(cfg)) | 750 | if (cfg_size < sizeof(cfg)) |
| 654 | return -EIO; | 751 | goto errout; |
| 655 | 752 | ||
| 753 | error = -EFAULT; | ||
| 656 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 754 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
| 657 | return -EFAULT; | 755 | goto errout; |
| 658 | 756 | ||
| 659 | if ((int)cfg.size < 0) | 757 | error = -EINVAL; |
| 660 | return -EINVAL; | 758 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && |
| 759 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | ||
| 760 | goto errout; | ||
| 661 | 761 | ||
| 662 | bts_size = 0; | 762 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { |
| 663 | ds = (void *)child->thread.ds_area_msr; | 763 | ds_ovfl_callback_t ovfl = NULL; |
| 664 | if (ds) { | 764 | unsigned int sig = 0; |
| 665 | bts_size = ds_get_bts_size(ds); | ||
| 666 | if (bts_size < 0) | ||
| 667 | return bts_size; | ||
| 668 | } | ||
| 669 | cfg.size = PAGE_ALIGN(cfg.size); | ||
| 670 | 765 | ||
| 671 | if (bts_size != cfg.size) { | 766 | /* we ignore the error in case we were not tracing child */ |
| 672 | ret = ptrace_bts_realloc(child, cfg.size, | 767 | (void)ds_release_bts(child); |
| 673 | cfg.flags & PTRACE_BTS_O_CUT_SIZE); | 768 | |
| 674 | if (ret < 0) | 769 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
| 770 | if (!cfg.signal) | ||
| 771 | goto errout; | ||
| 772 | |||
| 773 | sig = cfg.signal; | ||
| 774 | ovfl = ptrace_bts_ovfl; | ||
| 775 | } | ||
| 776 | |||
| 777 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); | ||
| 778 | if (error < 0) | ||
| 675 | goto errout; | 779 | goto errout; |
| 676 | 780 | ||
| 677 | ds = (void *)child->thread.ds_area_msr; | 781 | child->thread.bts_ovfl_signal = sig; |
| 678 | } | 782 | } |
| 679 | 783 | ||
| 680 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) | 784 | error = -EINVAL; |
| 681 | ret = ds_set_overflow(ds, DS_O_SIGNAL); | 785 | if (!child->thread.ds_ctx && cfg.flags) |
| 682 | else | ||
| 683 | ret = ds_set_overflow(ds, DS_O_WRAP); | ||
| 684 | if (ret < 0) | ||
| 685 | goto errout; | 786 | goto errout; |
| 686 | 787 | ||
| 687 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 788 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
| 688 | child->thread.debugctlmsr |= ds_debugctl_mask(); | 789 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; |
| 689 | else | 790 | else |
| 690 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 791 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 691 | 792 | ||
| 692 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 793 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
| 693 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 794 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 694 | else | 795 | else |
| 695 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 796 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 696 | 797 | ||
| 697 | ret = sizeof(cfg); | 798 | error = sizeof(cfg); |
| 698 | 799 | ||
| 699 | out: | 800 | out: |
| 700 | if (child->thread.debugctlmsr) | 801 | if (child->thread.debugctlmsr) |
| @@ -702,10 +803,10 @@ out: | |||
| 702 | else | 803 | else |
| 703 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 804 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
| 704 | 805 | ||
| 705 | return ret; | 806 | return error; |
| 706 | 807 | ||
| 707 | errout: | 808 | errout: |
| 708 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 809 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 709 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 810 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 710 | goto out; | 811 | goto out; |
| 711 | } | 812 | } |
| @@ -714,29 +815,40 @@ static int ptrace_bts_status(struct task_struct *child, | |||
| 714 | long cfg_size, | 815 | long cfg_size, |
| 715 | struct ptrace_bts_config __user *ucfg) | 816 | struct ptrace_bts_config __user *ucfg) |
| 716 | { | 817 | { |
| 717 | void *ds = (void *)child->thread.ds_area_msr; | ||
| 718 | struct ptrace_bts_config cfg; | 818 | struct ptrace_bts_config cfg; |
| 819 | size_t end; | ||
| 820 | const void *base, *max; | ||
| 821 | int error; | ||
| 719 | 822 | ||
| 720 | if (cfg_size < sizeof(cfg)) | 823 | if (cfg_size < sizeof(cfg)) |
| 721 | return -EIO; | 824 | return -EIO; |
| 722 | 825 | ||
| 723 | memset(&cfg, 0, sizeof(cfg)); | 826 | error = ds_get_bts_end(child, &end); |
| 827 | if (error < 0) | ||
| 828 | return error; | ||
| 829 | |||
| 830 | error = ds_access_bts(child, /* index = */ 0, &base); | ||
| 831 | if (error < 0) | ||
| 832 | return error; | ||
| 724 | 833 | ||
| 725 | if (ds) { | 834 | error = ds_access_bts(child, /* index = */ end, &max); |
| 726 | cfg.size = ds_get_bts_size(ds); | 835 | if (error < 0) |
| 836 | return error; | ||
| 727 | 837 | ||
| 728 | if (ds_get_overflow(ds) == DS_O_SIGNAL) | 838 | memset(&cfg, 0, sizeof(cfg)); |
| 729 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 839 | cfg.size = (max - base); |
| 840 | cfg.signal = child->thread.bts_ovfl_signal; | ||
| 841 | cfg.bts_size = sizeof(struct bts_struct); | ||
| 730 | 842 | ||
| 731 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 843 | if (cfg.signal) |
| 732 | child->thread.debugctlmsr & ds_debugctl_mask()) | 844 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
| 733 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
| 734 | 845 | ||
| 735 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 846 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && |
| 736 | cfg.flags |= PTRACE_BTS_O_SCHED; | 847 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) |
| 737 | } | 848 | cfg.flags |= PTRACE_BTS_O_TRACE; |
| 738 | 849 | ||
| 739 | cfg.bts_size = sizeof(struct bts_struct); | 850 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) |
| 851 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
| 740 | 852 | ||
| 741 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 853 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
| 742 | return -EFAULT; | 854 | return -EFAULT; |
| @@ -744,89 +856,38 @@ static int ptrace_bts_status(struct task_struct *child, | |||
| 744 | return sizeof(cfg); | 856 | return sizeof(cfg); |
| 745 | } | 857 | } |
| 746 | 858 | ||
| 747 | |||
| 748 | static int ptrace_bts_write_record(struct task_struct *child, | 859 | static int ptrace_bts_write_record(struct task_struct *child, |
| 749 | const struct bts_struct *in) | 860 | const struct bts_struct *in) |
| 750 | { | 861 | { |
| 751 | int retval; | 862 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; |
| 752 | 863 | ||
| 753 | if (!child->thread.ds_area_msr) | 864 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); |
| 754 | return -ENXIO; | ||
| 755 | 865 | ||
| 756 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | 866 | memset(bts_record, 0, bts_cfg.sizeof_bts); |
| 757 | if (retval) | 867 | switch (in->qualifier) { |
| 758 | return retval; | 868 | case BTS_INVALID: |
| 869 | break; | ||
| 759 | 870 | ||
| 760 | return sizeof(*in); | 871 | case BTS_BRANCH: |
| 761 | } | 872 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); |
| 873 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
| 874 | break; | ||
| 762 | 875 | ||
| 763 | static int ptrace_bts_realloc(struct task_struct *child, | 876 | case BTS_TASK_ARRIVES: |
| 764 | int size, int reduce_size) | 877 | case BTS_TASK_DEPARTS: |
| 765 | { | 878 | bts_set(bts_record, bts_from, bts_escape); |
| 766 | unsigned long rlim, vm; | 879 | bts_set(bts_record, bts_qual, in->qualifier); |
| 767 | int ret, old_size; | 880 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); |
| 881 | break; | ||
| 768 | 882 | ||
| 769 | if (size < 0) | 883 | default: |
| 770 | return -EINVAL; | 884 | return -EINVAL; |
| 771 | |||
| 772 | old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
| 773 | if (old_size < 0) | ||
| 774 | return old_size; | ||
| 775 | |||
| 776 | ret = ds_free((void **)&child->thread.ds_area_msr); | ||
| 777 | if (ret < 0) | ||
| 778 | goto out; | ||
| 779 | |||
| 780 | size >>= PAGE_SHIFT; | ||
| 781 | old_size >>= PAGE_SHIFT; | ||
| 782 | |||
| 783 | current->mm->total_vm -= old_size; | ||
| 784 | current->mm->locked_vm -= old_size; | ||
| 785 | |||
| 786 | if (size == 0) | ||
| 787 | goto out; | ||
| 788 | |||
| 789 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
| 790 | vm = current->mm->total_vm + size; | ||
| 791 | if (rlim < vm) { | ||
| 792 | ret = -ENOMEM; | ||
| 793 | |||
| 794 | if (!reduce_size) | ||
| 795 | goto out; | ||
| 796 | |||
| 797 | size = rlim - current->mm->total_vm; | ||
| 798 | if (size <= 0) | ||
| 799 | goto out; | ||
| 800 | } | 885 | } |
| 801 | 886 | ||
| 802 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 887 | /* The writing task will be the switched-to task on a context |
| 803 | vm = current->mm->locked_vm + size; | 888 | * switch. It needs to write into the switched-from task's BTS |
| 804 | if (rlim < vm) { | 889 | * buffer. */ |
| 805 | ret = -ENOMEM; | 890 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); |
| 806 | |||
| 807 | if (!reduce_size) | ||
| 808 | goto out; | ||
| 809 | |||
| 810 | size = rlim - current->mm->locked_vm; | ||
| 811 | if (size <= 0) | ||
| 812 | goto out; | ||
| 813 | } | ||
| 814 | |||
| 815 | ret = ds_allocate((void **)&child->thread.ds_area_msr, | ||
| 816 | size << PAGE_SHIFT); | ||
| 817 | if (ret < 0) | ||
| 818 | goto out; | ||
| 819 | |||
| 820 | current->mm->total_vm += size; | ||
| 821 | current->mm->locked_vm += size; | ||
| 822 | |||
| 823 | out: | ||
| 824 | if (child->thread.ds_area_msr) | ||
| 825 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
| 826 | else | ||
| 827 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
| 828 | |||
| 829 | return ret; | ||
| 830 | } | 891 | } |
| 831 | 892 | ||
| 832 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 893 | void ptrace_bts_take_timestamp(struct task_struct *tsk, |
| @@ -839,7 +900,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, | |||
| 839 | 900 | ||
| 840 | ptrace_bts_write_record(tsk, &rec); | 901 | ptrace_bts_write_record(tsk, &rec); |
| 841 | } | 902 | } |
| 842 | #endif /* X86_BTS */ | 903 | |
| 904 | static const struct bts_configuration bts_cfg_netburst = { | ||
| 905 | .sizeof_bts = sizeof(long) * 3, | ||
| 906 | .sizeof_field = sizeof(long), | ||
| 907 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | ||
| 908 | }; | ||
| 909 | |||
| 910 | static const struct bts_configuration bts_cfg_pentium_m = { | ||
| 911 | .sizeof_bts = sizeof(long) * 3, | ||
| 912 | .sizeof_field = sizeof(long), | ||
| 913 | .debugctl_mask = (1<<6)|(1<<7) | ||
| 914 | }; | ||
| 915 | |||
| 916 | static const struct bts_configuration bts_cfg_core2 = { | ||
| 917 | .sizeof_bts = 8 * 3, | ||
| 918 | .sizeof_field = 8, | ||
| 919 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
| 920 | }; | ||
| 921 | |||
| 922 | static inline void bts_configure(const struct bts_configuration *cfg) | ||
| 923 | { | ||
| 924 | bts_cfg = *cfg; | ||
| 925 | } | ||
| 926 | |||
| 927 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | ||
| 928 | { | ||
| 929 | switch (c->x86) { | ||
| 930 | case 0x6: | ||
| 931 | switch (c->x86_model) { | ||
| 932 | case 0xD: | ||
| 933 | case 0xE: /* Pentium M */ | ||
| 934 | bts_configure(&bts_cfg_pentium_m); | ||
| 935 | break; | ||
| 936 | case 0xF: /* Core2 */ | ||
| 937 | case 0x1C: /* Atom */ | ||
| 938 | bts_configure(&bts_cfg_core2); | ||
| 939 | break; | ||
| 940 | default: | ||
| 941 | /* sorry, don't know about them */ | ||
| 942 | break; | ||
| 943 | } | ||
| 944 | break; | ||
| 945 | case 0xF: | ||
| 946 | switch (c->x86_model) { | ||
| 947 | case 0x0: | ||
| 948 | case 0x1: | ||
| 949 | case 0x2: /* Netburst */ | ||
| 950 | bts_configure(&bts_cfg_netburst); | ||
| 951 | break; | ||
| 952 | default: | ||
| 953 | /* sorry, don't know about them */ | ||
| 954 | break; | ||
| 955 | } | ||
| 956 | break; | ||
| 957 | default: | ||
| 958 | /* sorry, don't know about them */ | ||
| 959 | break; | ||
| 960 | } | ||
| 961 | } | ||
| 962 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 843 | 963 | ||
| 844 | /* | 964 | /* |
| 845 | * Called by kernel/ptrace.c when detaching.. | 965 | * Called by kernel/ptrace.c when detaching.. |
| @@ -852,15 +972,15 @@ void ptrace_disable(struct task_struct *child) | |||
| 852 | #ifdef TIF_SYSCALL_EMU | 972 | #ifdef TIF_SYSCALL_EMU |
| 853 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 973 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
| 854 | #endif | 974 | #endif |
| 855 | if (child->thread.ds_area_msr) { | 975 | #ifdef CONFIG_X86_PTRACE_BTS |
| 856 | #ifdef X86_BTS | 976 | (void)ds_release_bts(child); |
| 857 | ptrace_bts_realloc(child, 0, 0); | 977 | |
| 858 | #endif | 978 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 859 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 979 | if (!child->thread.debugctlmsr) |
| 860 | if (!child->thread.debugctlmsr) | 980 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
| 861 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 981 | |
| 862 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 982 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 863 | } | 983 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 864 | } | 984 | } |
| 865 | 985 | ||
| 866 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 986 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
| @@ -980,7 +1100,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 980 | /* | 1100 | /* |
| 981 | * These bits need more cooking - not enabled yet: | 1101 | * These bits need more cooking - not enabled yet: |
| 982 | */ | 1102 | */ |
| 983 | #ifdef X86_BTS | 1103 | #ifdef CONFIG_X86_PTRACE_BTS |
| 984 | case PTRACE_BTS_CONFIG: | 1104 | case PTRACE_BTS_CONFIG: |
| 985 | ret = ptrace_bts_config | 1105 | ret = ptrace_bts_config |
| 986 | (child, data, (struct ptrace_bts_config __user *)addr); | 1106 | (child, data, (struct ptrace_bts_config __user *)addr); |
| @@ -992,7 +1112,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 992 | break; | 1112 | break; |
| 993 | 1113 | ||
| 994 | case PTRACE_BTS_SIZE: | 1114 | case PTRACE_BTS_SIZE: |
| 995 | ret = ptrace_bts_get_size(child); | 1115 | ret = ds_get_bts_index(child, /* pos = */ NULL); |
| 996 | break; | 1116 | break; |
| 997 | 1117 | ||
| 998 | case PTRACE_BTS_GET: | 1118 | case PTRACE_BTS_GET: |
| @@ -1001,14 +1121,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 1001 | break; | 1121 | break; |
| 1002 | 1122 | ||
| 1003 | case PTRACE_BTS_CLEAR: | 1123 | case PTRACE_BTS_CLEAR: |
| 1004 | ret = ptrace_bts_clear(child); | 1124 | ret = ds_clear_bts(child); |
| 1005 | break; | 1125 | break; |
| 1006 | 1126 | ||
| 1007 | case PTRACE_BTS_DRAIN: | 1127 | case PTRACE_BTS_DRAIN: |
| 1008 | ret = ptrace_bts_drain | 1128 | ret = ptrace_bts_drain |
| 1009 | (child, data, (struct bts_struct __user *) addr); | 1129 | (child, data, (struct bts_struct __user *) addr); |
| 1010 | break; | 1130 | break; |
| 1011 | #endif | 1131 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 1012 | 1132 | ||
| 1013 | default: | 1133 | default: |
| 1014 | ret = ptrace_request(child, request, addr, data); | 1134 | ret = ptrace_request(child, request, addr, data); |
| @@ -1290,6 +1410,12 @@ static const struct user_regset x86_64_regsets[] = { | |||
| 1290 | .size = sizeof(long), .align = sizeof(long), | 1410 | .size = sizeof(long), .align = sizeof(long), |
| 1291 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1411 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set |
| 1292 | }, | 1412 | }, |
| 1413 | [REGSET_IOPERM64] = { | ||
| 1414 | .core_note_type = NT_386_IOPERM, | ||
| 1415 | .n = IO_BITMAP_LONGS, | ||
| 1416 | .size = sizeof(long), .align = sizeof(long), | ||
| 1417 | .active = ioperm_active, .get = ioperm_get | ||
| 1418 | }, | ||
| 1293 | }; | 1419 | }; |
| 1294 | 1420 | ||
| 1295 | static const struct user_regset_view user_x86_64_view = { | 1421 | static const struct user_regset_view user_x86_64_view = { |
| @@ -1336,6 +1462,12 @@ static const struct user_regset x86_32_regsets[] = { | |||
| 1336 | .active = regset_tls_active, | 1462 | .active = regset_tls_active, |
| 1337 | .get = regset_tls_get, .set = regset_tls_set | 1463 | .get = regset_tls_get, .set = regset_tls_set |
| 1338 | }, | 1464 | }, |
| 1465 | [REGSET_IOPERM32] = { | ||
| 1466 | .core_note_type = NT_386_IOPERM, | ||
| 1467 | .n = IO_BITMAP_BYTES / sizeof(u32), | ||
| 1468 | .size = sizeof(u32), .align = sizeof(u32), | ||
| 1469 | .active = ioperm_active, .get = ioperm_get | ||
| 1470 | }, | ||
| 1339 | }; | 1471 | }; |
| 1340 | 1472 | ||
| 1341 | static const struct user_regset_view user_x86_32_view = { | 1473 | static const struct user_regset_view user_x86_32_view = { |
| @@ -1357,9 +1489,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) | |||
| 1357 | #endif | 1489 | #endif |
| 1358 | } | 1490 | } |
| 1359 | 1491 | ||
| 1360 | #ifdef CONFIG_X86_32 | 1492 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, |
| 1361 | 1493 | int error_code, int si_code) | |
| 1362 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | ||
| 1363 | { | 1494 | { |
| 1364 | struct siginfo info; | 1495 | struct siginfo info; |
| 1365 | 1496 | ||
| @@ -1368,7 +1499,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | |||
| 1368 | 1499 | ||
| 1369 | memset(&info, 0, sizeof(info)); | 1500 | memset(&info, 0, sizeof(info)); |
| 1370 | info.si_signo = SIGTRAP; | 1501 | info.si_signo = SIGTRAP; |
| 1371 | info.si_code = TRAP_BRKPT; | 1502 | info.si_code = si_code; |
| 1372 | 1503 | ||
| 1373 | /* User-mode ip? */ | 1504 | /* User-mode ip? */ |
| 1374 | info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; | 1505 | info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; |
| @@ -1377,143 +1508,83 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | |||
| 1377 | force_sig_info(SIGTRAP, &info, tsk); | 1508 | force_sig_info(SIGTRAP, &info, tsk); |
| 1378 | } | 1509 | } |
| 1379 | 1510 | ||
| 1380 | /* notification of system call entry/exit | ||
| 1381 | * - triggered by current->work.syscall_trace | ||
| 1382 | */ | ||
| 1383 | int do_syscall_trace(struct pt_regs *regs, int entryexit) | ||
| 1384 | { | ||
| 1385 | int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); | ||
| 1386 | /* | ||
| 1387 | * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall | ||
| 1388 | * interception | ||
| 1389 | */ | ||
| 1390 | int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); | ||
| 1391 | int ret = 0; | ||
| 1392 | |||
| 1393 | /* do the secure computing check first */ | ||
| 1394 | if (!entryexit) | ||
| 1395 | secure_computing(regs->orig_ax); | ||
| 1396 | |||
| 1397 | if (unlikely(current->audit_context)) { | ||
| 1398 | if (entryexit) | ||
| 1399 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), | ||
| 1400 | regs->ax); | ||
| 1401 | /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only | ||
| 1402 | * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is | ||
| 1403 | * not used, entry.S will call us only on syscall exit, not | ||
| 1404 | * entry; so when TIF_SYSCALL_AUDIT is used we must avoid | ||
| 1405 | * calling send_sigtrap() on syscall entry. | ||
| 1406 | * | ||
| 1407 | * Note that when PTRACE_SYSEMU_SINGLESTEP is used, | ||
| 1408 | * is_singlestep is false, despite his name, so we will still do | ||
| 1409 | * the correct thing. | ||
| 1410 | */ | ||
| 1411 | else if (is_singlestep) | ||
| 1412 | goto out; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | if (!(current->ptrace & PT_PTRACED)) | ||
| 1416 | goto out; | ||
| 1417 | |||
| 1418 | /* If a process stops on the 1st tracepoint with SYSCALL_TRACE | ||
| 1419 | * and then is resumed with SYSEMU_SINGLESTEP, it will come in | ||
| 1420 | * here. We have to check this and return */ | ||
| 1421 | if (is_sysemu && entryexit) | ||
| 1422 | return 0; | ||
| 1423 | |||
| 1424 | /* Fake a debug trap */ | ||
| 1425 | if (is_singlestep) | ||
| 1426 | send_sigtrap(current, regs, 0); | ||
| 1427 | |||
| 1428 | if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) | ||
| 1429 | goto out; | ||
| 1430 | |||
| 1431 | /* the 0x80 provides a way for the tracing parent to distinguish | ||
| 1432 | between a syscall stop and SIGTRAP delivery */ | ||
| 1433 | /* Note that the debugger could change the result of test_thread_flag!*/ | ||
| 1434 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); | ||
| 1435 | 1511 | ||
| 1436 | /* | 1512 | #ifdef CONFIG_X86_32 |
| 1437 | * this isn't the same as continuing with a signal, but it will do | 1513 | # define IS_IA32 1 |
| 1438 | * for normal use. strace only continues with a signal if the | 1514 | #elif defined CONFIG_IA32_EMULATION |
| 1439 | * stopping signal is not SIGTRAP. -brl | 1515 | # define IS_IA32 test_thread_flag(TIF_IA32) |
| 1440 | */ | 1516 | #else |
| 1441 | if (current->exit_code) { | 1517 | # define IS_IA32 0 |
| 1442 | send_sig(current->exit_code, current, 1); | 1518 | #endif |
| 1443 | current->exit_code = 0; | ||
| 1444 | } | ||
| 1445 | ret = is_sysemu; | ||
| 1446 | out: | ||
| 1447 | if (unlikely(current->audit_context) && !entryexit) | ||
| 1448 | audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, | ||
| 1449 | regs->bx, regs->cx, regs->dx, regs->si); | ||
| 1450 | if (ret == 0) | ||
| 1451 | return 0; | ||
| 1452 | |||
| 1453 | regs->orig_ax = -1; /* force skip of syscall restarting */ | ||
| 1454 | if (unlikely(current->audit_context)) | ||
| 1455 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
| 1456 | return 1; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | #else /* CONFIG_X86_64 */ | ||
| 1460 | 1519 | ||
| 1461 | static void syscall_trace(struct pt_regs *regs) | 1520 | /* |
| 1521 | * We must return the syscall number to actually look up in the table. | ||
| 1522 | * This can be -1L to skip running any syscall at all. | ||
| 1523 | */ | ||
| 1524 | asmregparm long syscall_trace_enter(struct pt_regs *regs) | ||
| 1462 | { | 1525 | { |
| 1526 | long ret = 0; | ||
| 1463 | 1527 | ||
| 1464 | #if 0 | ||
| 1465 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", | ||
| 1466 | current->comm, | ||
| 1467 | regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), | ||
| 1468 | current_thread_info()->flags, current->ptrace); | ||
| 1469 | #endif | ||
| 1470 | |||
| 1471 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) | ||
| 1472 | ? 0x80 : 0)); | ||
| 1473 | /* | 1528 | /* |
| 1474 | * this isn't the same as continuing with a signal, but it will do | 1529 | * If we stepped into a sysenter/syscall insn, it trapped in |
| 1475 | * for normal use. strace only continues with a signal if the | 1530 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. |
| 1476 | * stopping signal is not SIGTRAP. -brl | 1531 | * If user-mode had set TF itself, then it's still clear from |
| 1532 | * do_debug() and we need to set it again to restore the user | ||
| 1533 | * state. If we entered on the slow path, TF was already set. | ||
| 1477 | */ | 1534 | */ |
| 1478 | if (current->exit_code) { | 1535 | if (test_thread_flag(TIF_SINGLESTEP)) |
| 1479 | send_sig(current->exit_code, current, 1); | 1536 | regs->flags |= X86_EFLAGS_TF; |
| 1480 | current->exit_code = 0; | ||
| 1481 | } | ||
| 1482 | } | ||
| 1483 | 1537 | ||
| 1484 | asmlinkage void syscall_trace_enter(struct pt_regs *regs) | ||
| 1485 | { | ||
| 1486 | /* do the secure computing check first */ | 1538 | /* do the secure computing check first */ |
| 1487 | secure_computing(regs->orig_ax); | 1539 | secure_computing(regs->orig_ax); |
| 1488 | 1540 | ||
| 1489 | if (test_thread_flag(TIF_SYSCALL_TRACE) | 1541 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) |
| 1490 | && (current->ptrace & PT_PTRACED)) | 1542 | ret = -1L; |
| 1491 | syscall_trace(regs); | 1543 | |
| 1544 | if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && | ||
| 1545 | tracehook_report_syscall_entry(regs)) | ||
| 1546 | ret = -1L; | ||
| 1492 | 1547 | ||
| 1493 | if (unlikely(current->audit_context)) { | 1548 | if (unlikely(current->audit_context)) { |
| 1494 | if (test_thread_flag(TIF_IA32)) { | 1549 | if (IS_IA32) |
| 1495 | audit_syscall_entry(AUDIT_ARCH_I386, | 1550 | audit_syscall_entry(AUDIT_ARCH_I386, |
| 1496 | regs->orig_ax, | 1551 | regs->orig_ax, |
| 1497 | regs->bx, regs->cx, | 1552 | regs->bx, regs->cx, |
| 1498 | regs->dx, regs->si); | 1553 | regs->dx, regs->si); |
| 1499 | } else { | 1554 | #ifdef CONFIG_X86_64 |
| 1555 | else | ||
| 1500 | audit_syscall_entry(AUDIT_ARCH_X86_64, | 1556 | audit_syscall_entry(AUDIT_ARCH_X86_64, |
| 1501 | regs->orig_ax, | 1557 | regs->orig_ax, |
| 1502 | regs->di, regs->si, | 1558 | regs->di, regs->si, |
| 1503 | regs->dx, regs->r10); | 1559 | regs->dx, regs->r10); |
| 1504 | } | 1560 | #endif |
| 1505 | } | 1561 | } |
| 1562 | |||
| 1563 | return ret ?: regs->orig_ax; | ||
| 1506 | } | 1564 | } |
| 1507 | 1565 | ||
| 1508 | asmlinkage void syscall_trace_leave(struct pt_regs *regs) | 1566 | asmregparm void syscall_trace_leave(struct pt_regs *regs) |
| 1509 | { | 1567 | { |
| 1510 | if (unlikely(current->audit_context)) | 1568 | if (unlikely(current->audit_context)) |
| 1511 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1569 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
| 1512 | 1570 | ||
| 1513 | if ((test_thread_flag(TIF_SYSCALL_TRACE) | 1571 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
| 1514 | || test_thread_flag(TIF_SINGLESTEP)) | 1572 | tracehook_report_syscall_exit(regs, 0); |
| 1515 | && (current->ptrace & PT_PTRACED)) | ||
| 1516 | syscall_trace(regs); | ||
| 1517 | } | ||
| 1518 | 1573 | ||
| 1519 | #endif /* CONFIG_X86_32 */ | 1574 | /* |
| 1575 | * If TIF_SYSCALL_EMU is set, we only get here because of | ||
| 1576 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | ||
| 1577 | * We already reported this syscall instruction in | ||
| 1578 | * syscall_trace_enter(), so don't do any more now. | ||
| 1579 | */ | ||
| 1580 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | ||
| 1581 | return; | ||
| 1582 | |||
| 1583 | /* | ||
| 1584 | * If we are single-stepping, synthesize a trap to follow the | ||
| 1585 | * system call instruction. | ||
| 1586 | */ | ||
| 1587 | if (test_thread_flag(TIF_SINGLESTEP) && | ||
| 1588 | tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) | ||
| 1589 | send_sigtrap(current, regs, 0, TRAP_BRKPT); | ||
| 1590 | } | ||
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 05fbe9a0325a..4f9c55f3a7c0 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
| @@ -97,6 +97,18 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
| 97 | return dst->version; | 97 | return dst->version; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | ||
| 101 | { | ||
| 102 | u64 pv_tsc_khz = 1000000ULL << 32; | ||
| 103 | |||
| 104 | do_div(pv_tsc_khz, src->tsc_to_system_mul); | ||
| 105 | if (src->tsc_shift < 0) | ||
| 106 | pv_tsc_khz <<= -src->tsc_shift; | ||
| 107 | else | ||
| 108 | pv_tsc_khz >>= src->tsc_shift; | ||
| 109 | return pv_tsc_khz; | ||
| 110 | } | ||
| 111 | |||
| 100 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
| 101 | { | 113 | { |
| 102 | struct pvclock_shadow_time shadow; | 114 | struct pvclock_shadow_time shadow; |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d13858818100..67465ed89310 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
| @@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | |||
| 35 | if (!(word & (1 << 13))) { | 35 | if (!(word & (1 << 13))) { |
| 36 | dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " | 36 | dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " |
| 37 | "disabling irq balancing and affinity\n"); | 37 | "disabling irq balancing and affinity\n"); |
| 38 | #ifdef CONFIG_IRQBALANCE | ||
| 39 | irqbalance_disable(""); | ||
| 40 | #endif | ||
| 41 | noirqdebug_setup(""); | 38 | noirqdebug_setup(""); |
| 42 | #ifdef CONFIG_PROC_FS | 39 | #ifdef CONFIG_PROC_FS |
| 43 | no_irq_affinity = 1; | 40 | no_irq_affinity = 1; |
| @@ -354,9 +351,27 @@ static void ati_force_hpet_resume(void) | |||
| 354 | printk(KERN_DEBUG "Force enabled HPET at resume\n"); | 351 | printk(KERN_DEBUG "Force enabled HPET at resume\n"); |
| 355 | } | 352 | } |
| 356 | 353 | ||
| 354 | static u32 ati_ixp4x0_rev(struct pci_dev *dev) | ||
| 355 | { | ||
| 356 | u32 d; | ||
| 357 | u8 b; | ||
| 358 | |||
| 359 | pci_read_config_byte(dev, 0xac, &b); | ||
| 360 | b &= ~(1<<5); | ||
| 361 | pci_write_config_byte(dev, 0xac, b); | ||
| 362 | pci_read_config_dword(dev, 0x70, &d); | ||
| 363 | d |= 1<<8; | ||
| 364 | pci_write_config_dword(dev, 0x70, d); | ||
| 365 | pci_read_config_dword(dev, 0x8, &d); | ||
| 366 | d &= 0xff; | ||
| 367 | dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d); | ||
| 368 | return d; | ||
| 369 | } | ||
| 370 | |||
| 357 | static void ati_force_enable_hpet(struct pci_dev *dev) | 371 | static void ati_force_enable_hpet(struct pci_dev *dev) |
| 358 | { | 372 | { |
| 359 | u32 uninitialized_var(val); | 373 | u32 d, val; |
| 374 | u8 b; | ||
| 360 | 375 | ||
| 361 | if (hpet_address || force_hpet_address) | 376 | if (hpet_address || force_hpet_address) |
| 362 | return; | 377 | return; |
| @@ -366,14 +381,33 @@ static void ati_force_enable_hpet(struct pci_dev *dev) | |||
| 366 | return; | 381 | return; |
| 367 | } | 382 | } |
| 368 | 383 | ||
| 384 | d = ati_ixp4x0_rev(dev); | ||
| 385 | if (d < 0x82) | ||
| 386 | return; | ||
| 387 | |||
| 388 | /* base address */ | ||
| 369 | pci_write_config_dword(dev, 0x14, 0xfed00000); | 389 | pci_write_config_dword(dev, 0x14, 0xfed00000); |
| 370 | pci_read_config_dword(dev, 0x14, &val); | 390 | pci_read_config_dword(dev, 0x14, &val); |
| 391 | |||
| 392 | /* enable interrupt */ | ||
| 393 | outb(0x72, 0xcd6); b = inb(0xcd7); | ||
| 394 | b |= 0x1; | ||
| 395 | outb(0x72, 0xcd6); outb(b, 0xcd7); | ||
| 396 | outb(0x72, 0xcd6); b = inb(0xcd7); | ||
| 397 | if (!(b & 0x1)) | ||
| 398 | return; | ||
| 399 | pci_read_config_dword(dev, 0x64, &d); | ||
| 400 | d |= (1<<10); | ||
| 401 | pci_write_config_dword(dev, 0x64, d); | ||
| 402 | pci_read_config_dword(dev, 0x64, &d); | ||
| 403 | if (!(d & (1<<10))) | ||
| 404 | return; | ||
| 405 | |||
| 371 | force_hpet_address = val; | 406 | force_hpet_address = val; |
| 372 | force_hpet_resume_type = ATI_FORCE_HPET_RESUME; | 407 | force_hpet_resume_type = ATI_FORCE_HPET_RESUME; |
| 373 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", | 408 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", |
| 374 | force_hpet_address); | 409 | force_hpet_address); |
| 375 | cached_dev = dev; | 410 | cached_dev = dev; |
| 376 | return; | ||
| 377 | } | 411 | } |
| 378 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, | 412 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, |
| 379 | ati_force_enable_hpet); | 413 | ati_force_enable_hpet); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8a62160e151..f4c93f1cfc19 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off); | |||
| 29 | 29 | ||
| 30 | static const struct desc_ptr no_idt = {}; | 30 | static const struct desc_ptr no_idt = {}; |
| 31 | static int reboot_mode; | 31 | static int reboot_mode; |
| 32 | enum reboot_type reboot_type = BOOT_KBD; | 32 | /* |
| 33 | * Keyboard reset and triple fault may result in INIT, not RESET, which | ||
| 34 | * doesn't work when we're in vmx root mode. Try ACPI first. | ||
| 35 | */ | ||
| 36 | enum reboot_type reboot_type = BOOT_ACPI; | ||
| 33 | int reboot_force; | 37 | int reboot_force; |
| 34 | 38 | ||
| 35 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 39 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) |
| @@ -177,6 +181,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
| 177 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), | 181 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), |
| 178 | }, | 182 | }, |
| 179 | }, | 183 | }, |
| 184 | { /* Handle problems with rebooting on Dell T5400's */ | ||
| 185 | .callback = set_bios_reboot, | ||
| 186 | .ident = "Dell Precision T5400", | ||
| 187 | .matches = { | ||
| 188 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
| 189 | DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), | ||
| 190 | }, | ||
| 191 | }, | ||
| 180 | { /* Handle problems with rebooting on HP laptops */ | 192 | { /* Handle problems with rebooting on HP laptops */ |
| 181 | .callback = set_bios_reboot, | 193 | .callback = set_bios_reboot, |
| 182 | .ident = "HP Compaq Laptop", | 194 | .ident = "HP Compaq Laptop", |
| @@ -403,10 +415,9 @@ void native_machine_shutdown(void) | |||
| 403 | { | 415 | { |
| 404 | /* Stop the cpus and apics */ | 416 | /* Stop the cpus and apics */ |
| 405 | #ifdef CONFIG_SMP | 417 | #ifdef CONFIG_SMP |
| 406 | int reboot_cpu_id; | ||
| 407 | 418 | ||
| 408 | /* The boot cpu is always logical cpu 0 */ | 419 | /* The boot cpu is always logical cpu 0 */ |
| 409 | reboot_cpu_id = 0; | 420 | int reboot_cpu_id = 0; |
| 410 | 421 | ||
| 411 | #ifdef CONFIG_X86_32 | 422 | #ifdef CONFIG_X86_32 |
| 412 | /* See if there has been given a command line override */ | 423 | /* See if there has been given a command line override */ |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470d..6f50664b2ba5 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
| @@ -20,11 +20,45 @@ | |||
| 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
| 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) |
| 22 | 22 | ||
| 23 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE | ||
| 24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | ||
| 25 | * jumping back | ||
| 26 | */ | ||
| 27 | #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) | ||
| 28 | |||
| 29 | /* Minimal CPU state */ | ||
| 30 | #define ESP DATA(0x0) | ||
| 31 | #define CR0 DATA(0x4) | ||
| 32 | #define CR3 DATA(0x8) | ||
| 33 | #define CR4 DATA(0xc) | ||
| 34 | |||
| 35 | /* other data */ | ||
| 36 | #define CP_VA_CONTROL_PAGE DATA(0x10) | ||
| 37 | #define CP_PA_PGD DATA(0x14) | ||
| 38 | #define CP_PA_SWAP_PAGE DATA(0x18) | ||
| 39 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | ||
| 40 | |||
| 23 | .text | 41 | .text |
| 24 | .align PAGE_SIZE | 42 | .align PAGE_SIZE |
| 25 | .globl relocate_kernel | 43 | .globl relocate_kernel |
| 26 | relocate_kernel: | 44 | relocate_kernel: |
| 27 | movl 8(%esp), %ebp /* list of pages */ | 45 | /* Save the CPU context, used for jumping back */ |
| 46 | |||
| 47 | pushl %ebx | ||
| 48 | pushl %esi | ||
| 49 | pushl %edi | ||
| 50 | pushl %ebp | ||
| 51 | pushf | ||
| 52 | |||
| 53 | movl 20+8(%esp), %ebp /* list of pages */ | ||
| 54 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
| 55 | movl %esp, ESP(%edi) | ||
| 56 | movl %cr0, %eax | ||
| 57 | movl %eax, CR0(%edi) | ||
| 58 | movl %cr3, %eax | ||
| 59 | movl %eax, CR3(%edi) | ||
| 60 | movl %cr4, %eax | ||
| 61 | movl %eax, CR4(%edi) | ||
| 28 | 62 | ||
| 29 | #ifdef CONFIG_X86_PAE | 63 | #ifdef CONFIG_X86_PAE |
| 30 | /* map the control page at its virtual address */ | 64 | /* map the control page at its virtual address */ |
| @@ -138,15 +172,25 @@ relocate_kernel: | |||
| 138 | 172 | ||
| 139 | relocate_new_kernel: | 173 | relocate_new_kernel: |
| 140 | /* read the arguments and say goodbye to the stack */ | 174 | /* read the arguments and say goodbye to the stack */ |
| 141 | movl 4(%esp), %ebx /* page_list */ | 175 | movl 20+4(%esp), %ebx /* page_list */ |
| 142 | movl 8(%esp), %ebp /* list of pages */ | 176 | movl 20+8(%esp), %ebp /* list of pages */ |
| 143 | movl 12(%esp), %edx /* start address */ | 177 | movl 20+12(%esp), %edx /* start address */ |
| 144 | movl 16(%esp), %ecx /* cpu_has_pae */ | 178 | movl 20+16(%esp), %ecx /* cpu_has_pae */ |
| 179 | movl 20+20(%esp), %esi /* preserve_context */ | ||
| 145 | 180 | ||
| 146 | /* zero out flags, and disable interrupts */ | 181 | /* zero out flags, and disable interrupts */ |
| 147 | pushl $0 | 182 | pushl $0 |
| 148 | popfl | 183 | popfl |
| 149 | 184 | ||
| 185 | /* save some information for jumping back */ | ||
| 186 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
| 187 | movl %edi, CP_VA_CONTROL_PAGE(%edi) | ||
| 188 | movl PTR(PA_PGD)(%ebp), %eax | ||
| 189 | movl %eax, CP_PA_PGD(%edi) | ||
| 190 | movl PTR(PA_SWAP_PAGE)(%ebp), %eax | ||
| 191 | movl %eax, CP_PA_SWAP_PAGE(%edi) | ||
| 192 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) | ||
| 193 | |||
| 150 | /* get physical address of control page now */ | 194 | /* get physical address of control page now */ |
| 151 | /* this is impossible after page table switch */ | 195 | /* this is impossible after page table switch */ |
| 152 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | 196 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi |
| @@ -197,8 +241,90 @@ identity_mapped: | |||
| 197 | xorl %eax, %eax | 241 | xorl %eax, %eax |
| 198 | movl %eax, %cr3 | 242 | movl %eax, %cr3 |
| 199 | 243 | ||
| 244 | movl CP_PA_SWAP_PAGE(%edi), %eax | ||
| 245 | pushl %eax | ||
| 246 | pushl %ebx | ||
| 247 | call swap_pages | ||
| 248 | addl $8, %esp | ||
| 249 | |||
| 250 | /* To be certain of avoiding problems with self-modifying code | ||
| 251 | * I need to execute a serializing instruction here. | ||
| 252 | * So I flush the TLB, it's handy, and not processor dependent. | ||
| 253 | */ | ||
| 254 | xorl %eax, %eax | ||
| 255 | movl %eax, %cr3 | ||
| 256 | |||
| 257 | /* set all of the registers to known values */ | ||
| 258 | /* leave %esp alone */ | ||
| 259 | |||
| 260 | testl %esi, %esi | ||
| 261 | jnz 1f | ||
| 262 | xorl %edi, %edi | ||
| 263 | xorl %eax, %eax | ||
| 264 | xorl %ebx, %ebx | ||
| 265 | xorl %ecx, %ecx | ||
| 266 | xorl %edx, %edx | ||
| 267 | xorl %esi, %esi | ||
| 268 | xorl %ebp, %ebp | ||
| 269 | ret | ||
| 270 | 1: | ||
| 271 | popl %edx | ||
| 272 | movl CP_PA_SWAP_PAGE(%edi), %esp | ||
| 273 | addl $PAGE_SIZE, %esp | ||
| 274 | 2: | ||
| 275 | call *%edx | ||
| 276 | |||
| 277 | /* get the re-entry point of the peer system */ | ||
| 278 | movl 0(%esp), %ebp | ||
| 279 | call 1f | ||
| 280 | 1: | ||
| 281 | popl %ebx | ||
| 282 | subl $(1b - relocate_kernel), %ebx | ||
| 283 | movl CP_VA_CONTROL_PAGE(%ebx), %edi | ||
| 284 | lea PAGE_SIZE(%ebx), %esp | ||
| 285 | movl CP_PA_SWAP_PAGE(%ebx), %eax | ||
| 286 | movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx | ||
| 287 | pushl %eax | ||
| 288 | pushl %edx | ||
| 289 | call swap_pages | ||
| 290 | addl $8, %esp | ||
| 291 | movl CP_PA_PGD(%ebx), %eax | ||
| 292 | movl %eax, %cr3 | ||
| 293 | movl %cr0, %eax | ||
| 294 | orl $(1<<31), %eax | ||
| 295 | movl %eax, %cr0 | ||
| 296 | lea PAGE_SIZE(%edi), %esp | ||
| 297 | movl %edi, %eax | ||
| 298 | addl $(virtual_mapped - relocate_kernel), %eax | ||
| 299 | pushl %eax | ||
| 300 | ret | ||
| 301 | |||
| 302 | virtual_mapped: | ||
| 303 | movl CR4(%edi), %eax | ||
| 304 | movl %eax, %cr4 | ||
| 305 | movl CR3(%edi), %eax | ||
| 306 | movl %eax, %cr3 | ||
| 307 | movl CR0(%edi), %eax | ||
| 308 | movl %eax, %cr0 | ||
| 309 | movl ESP(%edi), %esp | ||
| 310 | movl %ebp, %eax | ||
| 311 | |||
| 312 | popf | ||
| 313 | popl %ebp | ||
| 314 | popl %edi | ||
| 315 | popl %esi | ||
| 316 | popl %ebx | ||
| 317 | ret | ||
| 318 | |||
| 200 | /* Do the copies */ | 319 | /* Do the copies */ |
| 201 | movl %ebx, %ecx | 320 | swap_pages: |
| 321 | movl 8(%esp), %edx | ||
| 322 | movl 4(%esp), %ecx | ||
| 323 | pushl %ebp | ||
| 324 | pushl %ebx | ||
| 325 | pushl %edi | ||
| 326 | pushl %esi | ||
| 327 | movl %ecx, %ebx | ||
| 202 | jmp 1f | 328 | jmp 1f |
| 203 | 329 | ||
| 204 | 0: /* top, read another word from the indirection page */ | 330 | 0: /* top, read another word from the indirection page */ |
| @@ -226,27 +352,31 @@ identity_mapped: | |||
| 226 | movl %ecx, %esi /* For every source page do a copy */ | 352 | movl %ecx, %esi /* For every source page do a copy */ |
| 227 | andl $0xfffff000, %esi | 353 | andl $0xfffff000, %esi |
| 228 | 354 | ||
| 355 | movl %edi, %eax | ||
| 356 | movl %esi, %ebp | ||
| 357 | |||
| 358 | movl %edx, %edi | ||
| 229 | movl $1024, %ecx | 359 | movl $1024, %ecx |
| 230 | rep ; movsl | 360 | rep ; movsl |
| 231 | jmp 0b | ||
| 232 | |||
| 233 | 3: | ||
| 234 | 361 | ||
| 235 | /* To be certain of avoiding problems with self-modifying code | 362 | movl %ebp, %edi |
| 236 | * I need to execute a serializing instruction here. | 363 | movl %eax, %esi |
| 237 | * So I flush the TLB, it's handy, and not processor dependent. | 364 | movl $1024, %ecx |
| 238 | */ | 365 | rep ; movsl |
| 239 | xorl %eax, %eax | ||
| 240 | movl %eax, %cr3 | ||
| 241 | 366 | ||
| 242 | /* set all of the registers to known values */ | 367 | movl %eax, %edi |
| 243 | /* leave %esp alone */ | 368 | movl %edx, %esi |
| 369 | movl $1024, %ecx | ||
| 370 | rep ; movsl | ||
| 244 | 371 | ||
| 245 | xorl %eax, %eax | 372 | lea PAGE_SIZE(%ebp), %esi |
| 246 | xorl %ebx, %ebx | 373 | jmp 0b |
| 247 | xorl %ecx, %ecx | 374 | 3: |
| 248 | xorl %edx, %edx | 375 | popl %esi |
| 249 | xorl %esi, %esi | 376 | popl %edi |
| 250 | xorl %edi, %edi | 377 | popl %ebx |
| 251 | xorl %ebp, %ebp | 378 | popl %ebp |
| 252 | ret | 379 | ret |
| 380 | |||
| 381 | .globl kexec_control_code_size | ||
| 382 | .set kexec_control_code_size, . - relocate_kernel | ||
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 05191bbc68b8..dd6f2b71561b 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
| @@ -52,7 +52,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) | |||
| 52 | 52 | ||
| 53 | cmos_minutes = CMOS_READ(RTC_MINUTES); | 53 | cmos_minutes = CMOS_READ(RTC_MINUTES); |
| 54 | if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) | 54 | if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) |
| 55 | BCD_TO_BIN(cmos_minutes); | 55 | cmos_minutes = bcd2bin(cmos_minutes); |
| 56 | 56 | ||
| 57 | /* | 57 | /* |
| 58 | * since we're only adjusting minutes and seconds, | 58 | * since we're only adjusting minutes and seconds, |
| @@ -69,8 +69,8 @@ int mach_set_rtc_mmss(unsigned long nowtime) | |||
| 69 | 69 | ||
| 70 | if (abs(real_minutes - cmos_minutes) < 30) { | 70 | if (abs(real_minutes - cmos_minutes) < 30) { |
| 71 | if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { | 71 | if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { |
| 72 | BIN_TO_BCD(real_seconds); | 72 | real_seconds = bin2bcd(real_seconds); |
| 73 | BIN_TO_BCD(real_minutes); | 73 | real_minutes = bin2bcd(real_minutes); |
| 74 | } | 74 | } |
| 75 | CMOS_WRITE(real_seconds,RTC_SECONDS); | 75 | CMOS_WRITE(real_seconds,RTC_SECONDS); |
| 76 | CMOS_WRITE(real_minutes,RTC_MINUTES); | 76 | CMOS_WRITE(real_minutes,RTC_MINUTES); |
| @@ -124,16 +124,16 @@ unsigned long mach_get_cmos_time(void) | |||
| 124 | WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); | 124 | WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); |
| 125 | 125 | ||
| 126 | if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { | 126 | if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { |
| 127 | BCD_TO_BIN(sec); | 127 | sec = bcd2bin(sec); |
| 128 | BCD_TO_BIN(min); | 128 | min = bcd2bin(min); |
| 129 | BCD_TO_BIN(hour); | 129 | hour = bcd2bin(hour); |
| 130 | BCD_TO_BIN(day); | 130 | day = bcd2bin(day); |
| 131 | BCD_TO_BIN(mon); | 131 | mon = bcd2bin(mon); |
| 132 | BCD_TO_BIN(year); | 132 | year = bcd2bin(year); |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | if (century) { | 135 | if (century) { |
| 136 | BCD_TO_BIN(century); | 136 | century = bcd2bin(century); |
| 137 | year += century * 100; | 137 | year += century * 100; |
| 138 | printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); | 138 | printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); |
| 139 | } else | 139 | } else |
| @@ -223,11 +223,25 @@ static struct platform_device rtc_device = { | |||
| 223 | static __init int add_rtc_cmos(void) | 223 | static __init int add_rtc_cmos(void) |
| 224 | { | 224 | { |
| 225 | #ifdef CONFIG_PNP | 225 | #ifdef CONFIG_PNP |
| 226 | if (!pnp_platform_devices) | 226 | static const char *ids[] __initconst = |
| 227 | platform_device_register(&rtc_device); | 227 | { "PNP0b00", "PNP0b01", "PNP0b02", }; |
| 228 | #else | 228 | struct pnp_dev *dev; |
| 229 | struct pnp_id *id; | ||
| 230 | int i; | ||
| 231 | |||
| 232 | pnp_for_each_dev(dev) { | ||
| 233 | for (id = dev->id; id; id = id->next) { | ||
| 234 | for (i = 0; i < ARRAY_SIZE(ids); i++) { | ||
| 235 | if (compare_pnp_id(id, ids[i]) != 0) | ||
| 236 | return 0; | ||
| 237 | } | ||
| 238 | } | ||
| 239 | } | ||
| 240 | #endif | ||
| 241 | |||
| 229 | platform_device_register(&rtc_device); | 242 | platform_device_register(&rtc_device); |
| 230 | #endif /* CONFIG_PNP */ | 243 | dev_info(&rtc_device.dev, |
| 244 | "registered platform RTC device (no PNP device found)\n"); | ||
| 231 | return 0; | 245 | return 0; |
| 232 | } | 246 | } |
| 233 | device_initcall(add_rtc_cmos); | 247 | device_initcall(add_rtc_cmos); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 531b55b8e81a..0fa6790c1dd3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -57,12 +57,8 @@ | |||
| 57 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
| 58 | #include <linux/user.h> | 58 | #include <linux/user.h> |
| 59 | #include <linux/delay.h> | 59 | #include <linux/delay.h> |
| 60 | #include <linux/highmem.h> | ||
| 61 | 60 | ||
| 62 | #include <linux/kallsyms.h> | 61 | #include <linux/kallsyms.h> |
| 63 | #include <linux/edd.h> | ||
| 64 | #include <linux/iscsi_ibft.h> | ||
| 65 | #include <linux/kexec.h> | ||
| 66 | #include <linux/cpufreq.h> | 62 | #include <linux/cpufreq.h> |
| 67 | #include <linux/dma-mapping.h> | 63 | #include <linux/dma-mapping.h> |
| 68 | #include <linux/ctype.h> | 64 | #include <linux/ctype.h> |
| @@ -96,7 +92,7 @@ | |||
| 96 | #include <asm/smp.h> | 92 | #include <asm/smp.h> |
| 97 | #include <asm/desc.h> | 93 | #include <asm/desc.h> |
| 98 | #include <asm/dma.h> | 94 | #include <asm/dma.h> |
| 99 | #include <asm/gart.h> | 95 | #include <asm/iommu.h> |
| 100 | #include <asm/mmu_context.h> | 96 | #include <asm/mmu_context.h> |
| 101 | #include <asm/proto.h> | 97 | #include <asm/proto.h> |
| 102 | 98 | ||
| @@ -104,7 +100,6 @@ | |||
| 104 | #include <asm/paravirt.h> | 100 | #include <asm/paravirt.h> |
| 105 | 101 | ||
| 106 | #include <asm/percpu.h> | 102 | #include <asm/percpu.h> |
| 107 | #include <asm/sections.h> | ||
| 108 | #include <asm/topology.h> | 103 | #include <asm/topology.h> |
| 109 | #include <asm/apicdef.h> | 104 | #include <asm/apicdef.h> |
| 110 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
| @@ -228,6 +223,9 @@ unsigned long saved_video_mode; | |||
| 228 | #define RAMDISK_LOAD_FLAG 0x4000 | 223 | #define RAMDISK_LOAD_FLAG 0x4000 |
| 229 | 224 | ||
| 230 | static char __initdata command_line[COMMAND_LINE_SIZE]; | 225 | static char __initdata command_line[COMMAND_LINE_SIZE]; |
| 226 | #ifdef CONFIG_CMDLINE_BOOL | ||
| 227 | static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; | ||
| 228 | #endif | ||
| 231 | 229 | ||
| 232 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | 230 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
| 233 | struct edd edd; | 231 | struct edd edd; |
| @@ -304,7 +302,7 @@ static void __init relocate_initrd(void) | |||
| 304 | if (clen > MAX_MAP_CHUNK-slop) | 302 | if (clen > MAX_MAP_CHUNK-slop) |
| 305 | clen = MAX_MAP_CHUNK-slop; | 303 | clen = MAX_MAP_CHUNK-slop; |
| 306 | mapaddr = ramdisk_image & PAGE_MASK; | 304 | mapaddr = ramdisk_image & PAGE_MASK; |
| 307 | p = early_ioremap(mapaddr, clen+slop); | 305 | p = early_memremap(mapaddr, clen+slop); |
| 308 | memcpy(q, p+slop, clen); | 306 | memcpy(q, p+slop, clen); |
| 309 | early_iounmap(p, clen+slop); | 307 | early_iounmap(p, clen+slop); |
| 310 | q += clen; | 308 | q += clen; |
| @@ -381,7 +379,7 @@ static void __init parse_setup_data(void) | |||
| 381 | return; | 379 | return; |
| 382 | pa_data = boot_params.hdr.setup_data; | 380 | pa_data = boot_params.hdr.setup_data; |
| 383 | while (pa_data) { | 381 | while (pa_data) { |
| 384 | data = early_ioremap(pa_data, PAGE_SIZE); | 382 | data = early_memremap(pa_data, PAGE_SIZE); |
| 385 | switch (data->type) { | 383 | switch (data->type) { |
| 386 | case SETUP_E820_EXT: | 384 | case SETUP_E820_EXT: |
| 387 | parse_e820_ext(data, pa_data); | 385 | parse_e820_ext(data, pa_data); |
| @@ -404,7 +402,7 @@ static void __init e820_reserve_setup_data(void) | |||
| 404 | return; | 402 | return; |
| 405 | pa_data = boot_params.hdr.setup_data; | 403 | pa_data = boot_params.hdr.setup_data; |
| 406 | while (pa_data) { | 404 | while (pa_data) { |
| 407 | data = early_ioremap(pa_data, sizeof(*data)); | 405 | data = early_memremap(pa_data, sizeof(*data)); |
| 408 | e820_update_range(pa_data, sizeof(*data)+data->len, | 406 | e820_update_range(pa_data, sizeof(*data)+data->len, |
| 409 | E820_RAM, E820_RESERVED_KERN); | 407 | E820_RAM, E820_RESERVED_KERN); |
| 410 | found = 1; | 408 | found = 1; |
| @@ -430,7 +428,7 @@ static void __init reserve_early_setup_data(void) | |||
| 430 | return; | 428 | return; |
| 431 | pa_data = boot_params.hdr.setup_data; | 429 | pa_data = boot_params.hdr.setup_data; |
| 432 | while (pa_data) { | 430 | while (pa_data) { |
| 433 | data = early_ioremap(pa_data, sizeof(*data)); | 431 | data = early_memremap(pa_data, sizeof(*data)); |
| 434 | sprintf(buf, "setup data %x", data->type); | 432 | sprintf(buf, "setup data %x", data->type); |
| 435 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | 433 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); |
| 436 | pa_data = data->next; | 434 | pa_data = data->next; |
| @@ -450,7 +448,7 @@ static void __init reserve_early_setup_data(void) | |||
| 450 | * @size: Size of the crashkernel memory to reserve. | 448 | * @size: Size of the crashkernel memory to reserve. |
| 451 | * Returns the base address on success, and -1ULL on failure. | 449 | * Returns the base address on success, and -1ULL on failure. |
| 452 | */ | 450 | */ |
| 453 | unsigned long long find_and_reserve_crashkernel(unsigned long long size) | 451 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) |
| 454 | { | 452 | { |
| 455 | const unsigned long long alignment = 16<<20; /* 16M */ | 453 | const unsigned long long alignment = 16<<20; /* 16M */ |
| 456 | unsigned long long start = 0LL; | 454 | unsigned long long start = 0LL; |
| @@ -563,7 +561,13 @@ static void __init reserve_standard_io_resources(void) | |||
| 563 | 561 | ||
| 564 | } | 562 | } |
| 565 | 563 | ||
| 566 | #ifdef CONFIG_PROC_VMCORE | 564 | /* |
| 565 | * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by | ||
| 566 | * is_kdump_kernel() to determine if we are booting after a panic. Hence | ||
| 567 | * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. | ||
| 568 | */ | ||
| 569 | |||
| 570 | #ifdef CONFIG_CRASH_DUMP | ||
| 567 | /* elfcorehdr= specifies the location of elf core header | 571 | /* elfcorehdr= specifies the location of elf core header |
| 568 | * stored by the crashed kernel. This option will be passed | 572 | * stored by the crashed kernel. This option will be passed |
| 569 | * by kexec loader to the capture kernel. | 573 | * by kexec loader to the capture kernel. |
| @@ -579,6 +583,194 @@ static int __init setup_elfcorehdr(char *arg) | |||
| 579 | early_param("elfcorehdr", setup_elfcorehdr); | 583 | early_param("elfcorehdr", setup_elfcorehdr); |
| 580 | #endif | 584 | #endif |
| 581 | 585 | ||
| 586 | static struct x86_quirks default_x86_quirks __initdata; | ||
| 587 | |||
| 588 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
| 589 | |||
| 590 | /* | ||
| 591 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
| 592 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
| 593 | * remaining free memory in that area and fill it with a distinct | ||
| 594 | * pattern. | ||
| 595 | */ | ||
| 596 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
| 597 | #define MAX_SCAN_AREAS 8 | ||
| 598 | |||
| 599 | static int __read_mostly memory_corruption_check = -1; | ||
| 600 | |||
| 601 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
| 602 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
| 603 | |||
| 604 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
| 605 | static int num_scan_areas; | ||
| 606 | |||
| 607 | |||
| 608 | static int set_corruption_check(char *arg) | ||
| 609 | { | ||
| 610 | char *end; | ||
| 611 | |||
| 612 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
| 613 | |||
| 614 | return (*end == 0) ? 0 : -EINVAL; | ||
| 615 | } | ||
| 616 | early_param("memory_corruption_check", set_corruption_check); | ||
| 617 | |||
| 618 | static int set_corruption_check_period(char *arg) | ||
| 619 | { | ||
| 620 | char *end; | ||
| 621 | |||
| 622 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
| 623 | |||
| 624 | return (*end == 0) ? 0 : -EINVAL; | ||
| 625 | } | ||
| 626 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
| 627 | |||
| 628 | static int set_corruption_check_size(char *arg) | ||
| 629 | { | ||
| 630 | char *end; | ||
| 631 | unsigned size; | ||
| 632 | |||
| 633 | size = memparse(arg, &end); | ||
| 634 | |||
| 635 | if (*end == '\0') | ||
| 636 | corruption_check_size = size; | ||
| 637 | |||
| 638 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
| 639 | } | ||
| 640 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
| 641 | |||
| 642 | |||
| 643 | static void __init setup_bios_corruption_check(void) | ||
| 644 | { | ||
| 645 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
| 646 | |||
| 647 | if (memory_corruption_check == -1) { | ||
| 648 | memory_corruption_check = | ||
| 649 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
| 650 | 1 | ||
| 651 | #else | ||
| 652 | 0 | ||
| 653 | #endif | ||
| 654 | ; | ||
| 655 | } | ||
| 656 | |||
| 657 | if (corruption_check_size == 0) | ||
| 658 | memory_corruption_check = 0; | ||
| 659 | |||
| 660 | if (!memory_corruption_check) | ||
| 661 | return; | ||
| 662 | |||
| 663 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
| 664 | |||
| 665 | while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | ||
| 666 | u64 size; | ||
| 667 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
| 668 | |||
| 669 | if (addr == 0) | ||
| 670 | break; | ||
| 671 | |||
| 672 | if ((addr + size) > corruption_check_size) | ||
| 673 | size = corruption_check_size - addr; | ||
| 674 | |||
| 675 | if (size == 0) | ||
| 676 | break; | ||
| 677 | |||
| 678 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
| 679 | scan_areas[num_scan_areas].addr = addr; | ||
| 680 | scan_areas[num_scan_areas].size = size; | ||
| 681 | num_scan_areas++; | ||
| 682 | |||
| 683 | /* Assume we've already mapped this early memory */ | ||
| 684 | memset(__va(addr), 0, size); | ||
| 685 | |||
| 686 | addr += size; | ||
| 687 | } | ||
| 688 | |||
| 689 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
| 690 | num_scan_areas); | ||
| 691 | update_e820(); | ||
| 692 | } | ||
| 693 | |||
| 694 | static struct timer_list periodic_check_timer; | ||
| 695 | |||
| 696 | void check_for_bios_corruption(void) | ||
| 697 | { | ||
| 698 | int i; | ||
| 699 | int corruption = 0; | ||
| 700 | |||
| 701 | if (!memory_corruption_check) | ||
| 702 | return; | ||
| 703 | |||
| 704 | for(i = 0; i < num_scan_areas; i++) { | ||
| 705 | unsigned long *addr = __va(scan_areas[i].addr); | ||
| 706 | unsigned long size = scan_areas[i].size; | ||
| 707 | |||
| 708 | for(; size; addr++, size -= sizeof(unsigned long)) { | ||
| 709 | if (!*addr) | ||
| 710 | continue; | ||
| 711 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
| 712 | addr, __pa(addr), *addr); | ||
| 713 | corruption = 1; | ||
| 714 | *addr = 0; | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
| 719 | } | ||
| 720 | |||
| 721 | static void periodic_check_for_corruption(unsigned long data) | ||
| 722 | { | ||
| 723 | check_for_bios_corruption(); | ||
| 724 | mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); | ||
| 725 | } | ||
| 726 | |||
| 727 | void start_periodic_check_for_corruption(void) | ||
| 728 | { | ||
| 729 | if (!memory_corruption_check || corruption_check_period == 0) | ||
| 730 | return; | ||
| 731 | |||
| 732 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
| 733 | corruption_check_period); | ||
| 734 | |||
| 735 | init_timer(&periodic_check_timer); | ||
| 736 | periodic_check_timer.function = &periodic_check_for_corruption; | ||
| 737 | periodic_check_for_corruption(0); | ||
| 738 | } | ||
| 739 | #endif | ||
| 740 | |||
| 741 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | ||
| 742 | { | ||
| 743 | printk(KERN_NOTICE | ||
| 744 | "%s detected: BIOS may corrupt low RAM, working it around.\n", | ||
| 745 | d->ident); | ||
| 746 | |||
| 747 | e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); | ||
| 748 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
| 749 | |||
| 750 | return 0; | ||
| 751 | } | ||
| 752 | |||
| 753 | /* List of systems that have known low memory corruption BIOS problems */ | ||
| 754 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | ||
| 755 | #ifdef CONFIG_X86_RESERVE_LOW_64K | ||
| 756 | { | ||
| 757 | .callback = dmi_low_memory_corruption, | ||
| 758 | .ident = "AMI BIOS", | ||
| 759 | .matches = { | ||
| 760 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), | ||
| 761 | }, | ||
| 762 | }, | ||
| 763 | { | ||
| 764 | .callback = dmi_low_memory_corruption, | ||
| 765 | .ident = "Phoenix BIOS", | ||
| 766 | .matches = { | ||
| 767 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), | ||
| 768 | }, | ||
| 769 | }, | ||
| 770 | #endif | ||
| 771 | {} | ||
| 772 | }; | ||
| 773 | |||
| 582 | /* | 774 | /* |
| 583 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 775 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
| 584 | * passed the efi memmap, systab, etc., so we should use these data structures | 776 | * passed the efi memmap, systab, etc., so we should use these data structures |
| @@ -598,11 +790,11 @@ void __init setup_arch(char **cmdline_p) | |||
| 598 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 790 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
| 599 | visws_early_detect(); | 791 | visws_early_detect(); |
| 600 | pre_setup_arch_hook(); | 792 | pre_setup_arch_hook(); |
| 601 | early_cpu_init(); | ||
| 602 | #else | 793 | #else |
| 603 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 794 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
| 604 | #endif | 795 | #endif |
| 605 | 796 | ||
| 797 | early_cpu_init(); | ||
| 606 | early_ioremap_init(); | 798 | early_ioremap_init(); |
| 607 | 799 | ||
| 608 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 800 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
| @@ -666,14 +858,36 @@ void __init setup_arch(char **cmdline_p) | |||
| 666 | bss_resource.start = virt_to_phys(&__bss_start); | 858 | bss_resource.start = virt_to_phys(&__bss_start); |
| 667 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 859 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
| 668 | 860 | ||
| 669 | #ifdef CONFIG_X86_64 | 861 | #ifdef CONFIG_CMDLINE_BOOL |
| 670 | early_cpu_init(); | 862 | #ifdef CONFIG_CMDLINE_OVERRIDE |
| 863 | strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); | ||
| 864 | #else | ||
| 865 | if (builtin_cmdline[0]) { | ||
| 866 | /* append boot loader cmdline to builtin */ | ||
| 867 | strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); | ||
| 868 | strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); | ||
| 869 | strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); | ||
| 870 | } | ||
| 671 | #endif | 871 | #endif |
| 872 | #endif | ||
| 873 | |||
| 672 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 874 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
| 673 | *cmdline_p = command_line; | 875 | *cmdline_p = command_line; |
| 674 | 876 | ||
| 675 | parse_early_param(); | 877 | parse_early_param(); |
| 676 | 878 | ||
| 879 | #ifdef CONFIG_X86_64 | ||
| 880 | check_efer(); | ||
| 881 | #endif | ||
| 882 | |||
| 883 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | ||
| 884 | /* | ||
| 885 | * Must be before kernel pagetables are setup | ||
| 886 | * or fixmap area is touched. | ||
| 887 | */ | ||
| 888 | vmi_init(); | ||
| 889 | #endif | ||
| 890 | |||
| 677 | /* after early param, so could get panic from serial */ | 891 | /* after early param, so could get panic from serial */ |
| 678 | reserve_early_setup_data(); | 892 | reserve_early_setup_data(); |
| 679 | 893 | ||
| @@ -681,7 +895,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 681 | #ifdef CONFIG_X86_LOCAL_APIC | 895 | #ifdef CONFIG_X86_LOCAL_APIC |
| 682 | disable_apic = 1; | 896 | disable_apic = 1; |
| 683 | #endif | 897 | #endif |
| 684 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 898 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
| 685 | } | 899 | } |
| 686 | 900 | ||
| 687 | #ifdef CONFIG_PCI | 901 | #ifdef CONFIG_PCI |
| @@ -691,6 +905,10 @@ void __init setup_arch(char **cmdline_p) | |||
| 691 | 905 | ||
| 692 | finish_e820_parsing(); | 906 | finish_e820_parsing(); |
| 693 | 907 | ||
| 908 | dmi_scan_machine(); | ||
| 909 | |||
| 910 | dmi_check_system(bad_bios_dmi_table); | ||
| 911 | |||
| 694 | #ifdef CONFIG_X86_32 | 912 | #ifdef CONFIG_X86_32 |
| 695 | probe_roms(); | 913 | probe_roms(); |
| 696 | #endif | 914 | #endif |
| @@ -734,7 +952,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 734 | #else | 952 | #else |
| 735 | num_physpages = max_pfn; | 953 | num_physpages = max_pfn; |
| 736 | 954 | ||
| 737 | check_efer(); | 955 | if (cpu_has_x2apic) |
| 956 | check_x2apic(); | ||
| 738 | 957 | ||
| 739 | /* How many end-of-memory variables you have, grandma! */ | 958 | /* How many end-of-memory variables you have, grandma! */ |
| 740 | /* need this before calling reserve_initrd */ | 959 | /* need this before calling reserve_initrd */ |
| @@ -746,6 +965,10 @@ void __init setup_arch(char **cmdline_p) | |||
| 746 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | 965 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; |
| 747 | #endif | 966 | #endif |
| 748 | 967 | ||
| 968 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
| 969 | setup_bios_corruption_check(); | ||
| 970 | #endif | ||
| 971 | |||
| 749 | /* max_pfn_mapped is updated here */ | 972 | /* max_pfn_mapped is updated here */ |
| 750 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | 973 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); |
| 751 | max_pfn_mapped = max_low_pfn_mapped; | 974 | max_pfn_mapped = max_low_pfn_mapped; |
| @@ -774,8 +997,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 774 | vsmp_init(); | 997 | vsmp_init(); |
| 775 | #endif | 998 | #endif |
| 776 | 999 | ||
| 777 | dmi_scan_machine(); | ||
| 778 | |||
| 779 | io_delay_init(); | 1000 | io_delay_init(); |
| 780 | 1001 | ||
| 781 | /* | 1002 | /* |
| @@ -783,6 +1004,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 783 | */ | 1004 | */ |
| 784 | acpi_boot_table_init(); | 1005 | acpi_boot_table_init(); |
| 785 | 1006 | ||
| 1007 | early_acpi_boot_init(); | ||
| 1008 | |||
| 786 | #ifdef CONFIG_ACPI_NUMA | 1009 | #ifdef CONFIG_ACPI_NUMA |
| 787 | /* | 1010 | /* |
| 788 | * Parse SRAT to discover nodes. | 1011 | * Parse SRAT to discover nodes. |
| @@ -792,10 +1015,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 792 | 1015 | ||
| 793 | initmem_init(0, max_pfn); | 1016 | initmem_init(0, max_pfn); |
| 794 | 1017 | ||
| 795 | #ifdef CONFIG_X86_64 | ||
| 796 | dma32_reserve_bootmem(); | ||
| 797 | #endif | ||
| 798 | |||
| 799 | #ifdef CONFIG_ACPI_SLEEP | 1018 | #ifdef CONFIG_ACPI_SLEEP |
| 800 | /* | 1019 | /* |
| 801 | * Reserve low memory region for sleep support. | 1020 | * Reserve low memory region for sleep support. |
| @@ -810,21 +1029,25 @@ void __init setup_arch(char **cmdline_p) | |||
| 810 | #endif | 1029 | #endif |
| 811 | reserve_crashkernel(); | 1030 | reserve_crashkernel(); |
| 812 | 1031 | ||
| 1032 | #ifdef CONFIG_X86_64 | ||
| 1033 | /* | ||
| 1034 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
| 1035 | * with the crashkernel command line, so do that after | ||
| 1036 | * reserve_crashkernel() | ||
| 1037 | */ | ||
| 1038 | dma32_reserve_bootmem(); | ||
| 1039 | #endif | ||
| 1040 | |||
| 813 | reserve_ibft_region(); | 1041 | reserve_ibft_region(); |
| 814 | 1042 | ||
| 815 | #ifdef CONFIG_KVM_CLOCK | 1043 | #ifdef CONFIG_KVM_CLOCK |
| 816 | kvmclock_init(); | 1044 | kvmclock_init(); |
| 817 | #endif | 1045 | #endif |
| 818 | 1046 | ||
| 819 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | 1047 | paravirt_pagetable_setup_start(swapper_pg_dir); |
| 820 | /* | ||
| 821 | * Must be after max_low_pfn is determined, and before kernel | ||
| 822 | * pagetables are setup. | ||
| 823 | */ | ||
| 824 | vmi_init(); | ||
| 825 | #endif | ||
| 826 | |||
| 827 | paging_init(); | 1048 | paging_init(); |
| 1049 | paravirt_pagetable_setup_done(swapper_pg_dir); | ||
| 1050 | paravirt_post_allocator_init(); | ||
| 828 | 1051 | ||
| 829 | #ifdef CONFIG_X86_64 | 1052 | #ifdef CONFIG_X86_64 |
| 830 | map_vsyscall(); | 1053 | map_vsyscall(); |
| @@ -850,27 +1073,17 @@ void __init setup_arch(char **cmdline_p) | |||
| 850 | #endif | 1073 | #endif |
| 851 | 1074 | ||
| 852 | prefill_possible_map(); | 1075 | prefill_possible_map(); |
| 1076 | |||
| 853 | #ifdef CONFIG_X86_64 | 1077 | #ifdef CONFIG_X86_64 |
| 854 | init_cpu_to_node(); | 1078 | init_cpu_to_node(); |
| 855 | #endif | 1079 | #endif |
| 856 | 1080 | ||
| 857 | #ifdef CONFIG_X86_NUMAQ | ||
| 858 | /* | ||
| 859 | * need to check online nodes num, call it | ||
| 860 | * here before time_init/tsc_init | ||
| 861 | */ | ||
| 862 | numaq_tsc_disable(); | ||
| 863 | #endif | ||
| 864 | |||
| 865 | init_apic_mappings(); | 1081 | init_apic_mappings(); |
| 866 | ioapic_init_mappings(); | 1082 | ioapic_init_mappings(); |
| 867 | 1083 | ||
| 868 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | 1084 | /* need to wait for io_apic is mapped */ |
| 869 | if (def_to_bigsmp) | 1085 | nr_irqs = probe_nr_irqs(); |
| 870 | printk(KERN_WARNING "More than 8 CPUs detected and " | 1086 | |
| 871 | "CONFIG_X86_PC cannot handle it.\nUse " | ||
| 872 | "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); | ||
| 873 | #endif | ||
| 874 | kvm_guest_init(); | 1087 | kvm_guest_init(); |
| 875 | 1088 | ||
| 876 | e820_reserve_resources(); | 1089 | e820_reserve_resources(); |
| @@ -892,3 +1105,5 @@ void __init setup_arch(char **cmdline_p) | |||
| 892 | #endif | 1105 | #endif |
| 893 | #endif | 1106 | #endif |
| 894 | } | 1107 | } |
| 1108 | |||
| 1109 | |||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index cac68430d31f..ae0c0d3bb770 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void) | |||
| 80 | #endif | 80 | #endif |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | ||
| 84 | cpumask_t *cpumask_of_cpu_map __read_mostly; | ||
| 85 | EXPORT_SYMBOL(cpumask_of_cpu_map); | ||
| 86 | |||
| 87 | /* requires nr_cpu_ids to be initialized */ | ||
| 88 | static void __init setup_cpumask_of_cpu(void) | ||
| 89 | { | ||
| 90 | int i; | ||
| 91 | |||
| 92 | /* alloc_bootmem zeroes memory */ | ||
| 93 | cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); | ||
| 94 | for (i = 0; i < nr_cpu_ids; i++) | ||
| 95 | cpu_set(i, cpumask_of_cpu_map[i]); | ||
| 96 | } | ||
| 97 | #else | ||
| 98 | static inline void setup_cpumask_of_cpu(void) { } | ||
| 99 | #endif | ||
| 100 | |||
| 101 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
| 102 | /* | 84 | /* |
| 103 | * Great future not-so-futuristic plan: make i386 and x86_64 do it | 85 | * Great future not-so-futuristic plan: make i386 and x86_64 do it |
| @@ -158,35 +140,47 @@ static void __init setup_cpu_pda_map(void) | |||
| 158 | */ | 140 | */ |
| 159 | void __init setup_per_cpu_areas(void) | 141 | void __init setup_per_cpu_areas(void) |
| 160 | { | 142 | { |
| 161 | ssize_t size = PERCPU_ENOUGH_ROOM; | 143 | ssize_t size, old_size; |
| 162 | char *ptr; | 144 | char *ptr; |
| 163 | int cpu; | 145 | int cpu; |
| 146 | unsigned long align = 1; | ||
| 164 | 147 | ||
| 165 | /* Setup cpu_pda map */ | 148 | /* Setup cpu_pda map */ |
| 166 | setup_cpu_pda_map(); | 149 | setup_cpu_pda_map(); |
| 167 | 150 | ||
| 168 | /* Copy section for each CPU (we discard the original) */ | 151 | /* Copy section for each CPU (we discard the original) */ |
| 169 | size = PERCPU_ENOUGH_ROOM; | 152 | old_size = PERCPU_ENOUGH_ROOM; |
| 153 | align = max_t(unsigned long, PAGE_SIZE, align); | ||
| 154 | size = roundup(old_size, align); | ||
| 170 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", | 155 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", |
| 171 | size); | 156 | size); |
| 172 | 157 | ||
| 173 | for_each_possible_cpu(cpu) { | 158 | for_each_possible_cpu(cpu) { |
| 174 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 159 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
| 175 | ptr = alloc_bootmem_pages(size); | 160 | ptr = __alloc_bootmem(size, align, |
| 161 | __pa(MAX_DMA_ADDRESS)); | ||
| 176 | #else | 162 | #else |
| 177 | int node = early_cpu_to_node(cpu); | 163 | int node = early_cpu_to_node(cpu); |
| 178 | if (!node_online(node) || !NODE_DATA(node)) { | 164 | if (!node_online(node) || !NODE_DATA(node)) { |
| 179 | ptr = alloc_bootmem_pages(size); | 165 | ptr = __alloc_bootmem(size, align, |
| 166 | __pa(MAX_DMA_ADDRESS)); | ||
| 180 | printk(KERN_INFO | 167 | printk(KERN_INFO |
| 181 | "cpu %d has no node %d or node-local memory\n", | 168 | "cpu %d has no node %d or node-local memory\n", |
| 182 | cpu, node); | 169 | cpu, node); |
| 170 | if (ptr) | ||
| 171 | printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", | ||
| 172 | cpu, __pa(ptr)); | ||
| 173 | } | ||
| 174 | else { | ||
| 175 | ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, | ||
| 176 | __pa(MAX_DMA_ADDRESS)); | ||
| 177 | if (ptr) | ||
| 178 | printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", | ||
| 179 | cpu, node, __pa(ptr)); | ||
| 183 | } | 180 | } |
| 184 | else | ||
| 185 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); | ||
| 186 | #endif | 181 | #endif |
| 187 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | 182 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
| 188 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 183 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
| 189 | |||
| 190 | } | 184 | } |
| 191 | 185 | ||
| 192 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", | 186 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", |
| @@ -197,9 +191,6 @@ void __init setup_per_cpu_areas(void) | |||
| 197 | 191 | ||
| 198 | /* Setup node to cpumask map */ | 192 | /* Setup node to cpumask map */ |
| 199 | setup_node_to_cpumask_map(); | 193 | setup_node_to_cpumask_map(); |
| 200 | |||
| 201 | /* Setup cpumask_of_cpu map */ | ||
| 202 | setup_cpumask_of_cpu(); | ||
| 203 | } | 194 | } |
| 204 | 195 | ||
| 205 | #endif | 196 | #endif |
| @@ -227,8 +218,8 @@ static void __init setup_node_to_cpumask_map(void) | |||
| 227 | /* allocate the map */ | 218 | /* allocate the map */ |
| 228 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | 219 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); |
| 229 | 220 | ||
| 230 | Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", | 221 | pr_debug("Node to cpumask map at %p for %d nodes\n", |
| 231 | map, nr_node_ids); | 222 | map, nr_node_ids); |
| 232 | 223 | ||
| 233 | /* node_to_cpumask() will now work */ | 224 | /* node_to_cpumask() will now work */ |
| 234 | node_to_cpumask_map = map; | 225 | node_to_cpumask_map = map; |
| @@ -248,7 +239,7 @@ void __cpuinit numa_set_node(int cpu, int node) | |||
| 248 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 239 | per_cpu(x86_cpu_to_node_map, cpu) = node; |
| 249 | 240 | ||
| 250 | else | 241 | else |
| 251 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | 242 | pr_debug("Setting node for non-present cpu %d\n", cpu); |
| 252 | } | 243 | } |
| 253 | 244 | ||
| 254 | void __cpuinit numa_clear_node(int cpu) | 245 | void __cpuinit numa_clear_node(int cpu) |
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2dc..cc673aa55ce4 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h | |||
| @@ -3,9 +3,18 @@ struct sigframe { | |||
| 3 | char __user *pretcode; | 3 | char __user *pretcode; |
| 4 | int sig; | 4 | int sig; |
| 5 | struct sigcontext sc; | 5 | struct sigcontext sc; |
| 6 | struct _fpstate fpstate; | 6 | /* |
| 7 | * fpstate is unused. fpstate is moved/allocated after | ||
| 8 | * retcode[] below. This movement allows to have the FP state and the | ||
| 9 | * future state extensions (xsave) stay together. | ||
| 10 | * And at the same time retaining the unused fpstate, prevents changing | ||
| 11 | * the offset of extramask[] in the sigframe and thus prevent any | ||
| 12 | * legacy application accessing/modifying it. | ||
| 13 | */ | ||
| 14 | struct _fpstate fpstate_unused; | ||
| 7 | unsigned long extramask[_NSIG_WORDS-1]; | 15 | unsigned long extramask[_NSIG_WORDS-1]; |
| 8 | char retcode[8]; | 16 | char retcode[8]; |
| 17 | /* fp state follows here */ | ||
| 9 | }; | 18 | }; |
| 10 | 19 | ||
| 11 | struct rt_sigframe { | 20 | struct rt_sigframe { |
| @@ -15,13 +24,19 @@ struct rt_sigframe { | |||
| 15 | void __user *puc; | 24 | void __user *puc; |
| 16 | struct siginfo info; | 25 | struct siginfo info; |
| 17 | struct ucontext uc; | 26 | struct ucontext uc; |
| 18 | struct _fpstate fpstate; | ||
| 19 | char retcode[8]; | 27 | char retcode[8]; |
| 28 | /* fp state follows here */ | ||
| 20 | }; | 29 | }; |
| 21 | #else | 30 | #else |
| 22 | struct rt_sigframe { | 31 | struct rt_sigframe { |
| 23 | char __user *pretcode; | 32 | char __user *pretcode; |
| 24 | struct ucontext uc; | 33 | struct ucontext uc; |
| 25 | struct siginfo info; | 34 | struct siginfo info; |
| 35 | /* fp state follows here */ | ||
| 26 | }; | 36 | }; |
| 37 | |||
| 38 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 39 | sigset_t *set, struct pt_regs *regs); | ||
| 40 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
| 41 | sigset_t *set, struct pt_regs *regs); | ||
| 27 | #endif | 42 | #endif |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d92373630963..d6dd057d0f22 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/wait.h> | 19 | #include <linux/wait.h> |
| 20 | #include <linux/tracehook.h> | ||
| 20 | #include <linux/elf.h> | 21 | #include <linux/elf.h> |
| 21 | #include <linux/smp.h> | 22 | #include <linux/smp.h> |
| 22 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
| @@ -26,6 +27,8 @@ | |||
| 26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
| 27 | #include <asm/i387.h> | 28 | #include <asm/i387.h> |
| 28 | #include <asm/vdso.h> | 29 | #include <asm/vdso.h> |
| 30 | #include <asm/syscall.h> | ||
| 31 | #include <asm/syscalls.h> | ||
| 29 | 32 | ||
| 30 | #include "sigframe.h" | 33 | #include "sigframe.h" |
| 31 | 34 | ||
| @@ -110,6 +113,27 @@ asmlinkage int sys_sigaltstack(unsigned long bx) | |||
| 110 | return do_sigaltstack(uss, uoss, regs->sp); | 113 | return do_sigaltstack(uss, uoss, regs->sp); |
| 111 | } | 114 | } |
| 112 | 115 | ||
| 116 | #define COPY(x) { \ | ||
| 117 | err |= __get_user(regs->x, &sc->x); \ | ||
| 118 | } | ||
| 119 | |||
| 120 | #define COPY_SEG(seg) { \ | ||
| 121 | unsigned short tmp; \ | ||
| 122 | err |= __get_user(tmp, &sc->seg); \ | ||
| 123 | regs->seg = tmp; \ | ||
| 124 | } | ||
| 125 | |||
| 126 | #define COPY_SEG_STRICT(seg) { \ | ||
| 127 | unsigned short tmp; \ | ||
| 128 | err |= __get_user(tmp, &sc->seg); \ | ||
| 129 | regs->seg = tmp | 3; \ | ||
| 130 | } | ||
| 131 | |||
| 132 | #define GET_SEG(seg) { \ | ||
| 133 | unsigned short tmp; \ | ||
| 134 | err |= __get_user(tmp, &sc->seg); \ | ||
| 135 | loadsegment(seg, tmp); \ | ||
| 136 | } | ||
| 113 | 137 | ||
| 114 | /* | 138 | /* |
| 115 | * Do a signal return; undo the signal stack. | 139 | * Do a signal return; undo the signal stack. |
| @@ -118,28 +142,13 @@ static int | |||
| 118 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 142 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
| 119 | unsigned long *pax) | 143 | unsigned long *pax) |
| 120 | { | 144 | { |
| 145 | void __user *buf; | ||
| 146 | unsigned int tmpflags; | ||
| 121 | unsigned int err = 0; | 147 | unsigned int err = 0; |
| 122 | 148 | ||
| 123 | /* Always make any pending restarted system calls return -EINTR */ | 149 | /* Always make any pending restarted system calls return -EINTR */ |
| 124 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 150 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
| 125 | 151 | ||
| 126 | #define COPY(x) err |= __get_user(regs->x, &sc->x) | ||
| 127 | |||
| 128 | #define COPY_SEG(seg) \ | ||
| 129 | { unsigned short tmp; \ | ||
| 130 | err |= __get_user(tmp, &sc->seg); \ | ||
| 131 | regs->seg = tmp; } | ||
| 132 | |||
| 133 | #define COPY_SEG_STRICT(seg) \ | ||
| 134 | { unsigned short tmp; \ | ||
| 135 | err |= __get_user(tmp, &sc->seg); \ | ||
| 136 | regs->seg = tmp|3; } | ||
| 137 | |||
| 138 | #define GET_SEG(seg) \ | ||
| 139 | { unsigned short tmp; \ | ||
| 140 | err |= __get_user(tmp, &sc->seg); \ | ||
| 141 | loadsegment(seg, tmp); } | ||
| 142 | |||
| 143 | GET_SEG(gs); | 152 | GET_SEG(gs); |
| 144 | COPY_SEG(fs); | 153 | COPY_SEG(fs); |
| 145 | COPY_SEG(es); | 154 | COPY_SEG(es); |
| @@ -149,38 +158,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
| 149 | COPY_SEG_STRICT(cs); | 158 | COPY_SEG_STRICT(cs); |
| 150 | COPY_SEG_STRICT(ss); | 159 | COPY_SEG_STRICT(ss); |
| 151 | 160 | ||
| 152 | { | 161 | err |= __get_user(tmpflags, &sc->flags); |
| 153 | unsigned int tmpflags; | 162 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
| 154 | 163 | regs->orig_ax = -1; /* disable syscall checks */ | |
| 155 | err |= __get_user(tmpflags, &sc->flags); | ||
| 156 | regs->flags = (regs->flags & ~FIX_EFLAGS) | | ||
| 157 | (tmpflags & FIX_EFLAGS); | ||
| 158 | regs->orig_ax = -1; /* disable syscall checks */ | ||
| 159 | } | ||
| 160 | 164 | ||
| 161 | { | 165 | err |= __get_user(buf, &sc->fpstate); |
| 162 | struct _fpstate __user *buf; | 166 | err |= restore_i387_xstate(buf); |
| 163 | |||
| 164 | err |= __get_user(buf, &sc->fpstate); | ||
| 165 | if (buf) { | ||
| 166 | if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) | ||
| 167 | goto badframe; | ||
| 168 | err |= restore_i387(buf); | ||
| 169 | } else { | ||
| 170 | struct task_struct *me = current; | ||
| 171 | |||
| 172 | if (used_math()) { | ||
| 173 | clear_fpu(me); | ||
| 174 | clear_used_math(); | ||
| 175 | } | ||
| 176 | } | ||
| 177 | } | ||
| 178 | 167 | ||
| 179 | err |= __get_user(*pax, &sc->ax); | 168 | err |= __get_user(*pax, &sc->ax); |
| 180 | return err; | 169 | return err; |
| 181 | |||
| 182 | badframe: | ||
| 183 | return 1; | ||
| 184 | } | 170 | } |
| 185 | 171 | ||
| 186 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | 172 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) |
| @@ -212,7 +198,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | |||
| 212 | 198 | ||
| 213 | badframe: | 199 | badframe: |
| 214 | if (show_unhandled_signals && printk_ratelimit()) { | 200 | if (show_unhandled_signals && printk_ratelimit()) { |
| 215 | printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" | 201 | printk("%s%s[%d] bad frame in sigreturn frame:" |
| 216 | "%p ip:%lx sp:%lx oeax:%lx", | 202 | "%p ip:%lx sp:%lx oeax:%lx", |
| 217 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | 203 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, |
| 218 | current->comm, task_pid_nr(current), frame, regs->ip, | 204 | current->comm, task_pid_nr(current), frame, regs->ip, |
| @@ -226,9 +212,8 @@ badframe: | |||
| 226 | return 0; | 212 | return 0; |
| 227 | } | 213 | } |
| 228 | 214 | ||
| 229 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | 215 | static long do_rt_sigreturn(struct pt_regs *regs) |
| 230 | { | 216 | { |
| 231 | struct pt_regs *regs = (struct pt_regs *)&__unused; | ||
| 232 | struct rt_sigframe __user *frame; | 217 | struct rt_sigframe __user *frame; |
| 233 | unsigned long ax; | 218 | unsigned long ax; |
| 234 | sigset_t set; | 219 | sigset_t set; |
| @@ -254,15 +239,22 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) | |||
| 254 | return ax; | 239 | return ax; |
| 255 | 240 | ||
| 256 | badframe: | 241 | badframe: |
| 257 | force_sig(SIGSEGV, current); | 242 | signal_fault(regs, frame, "rt_sigreturn"); |
| 258 | return 0; | 243 | return 0; |
| 259 | } | 244 | } |
| 260 | 245 | ||
| 246 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | ||
| 247 | { | ||
| 248 | struct pt_regs *regs = (struct pt_regs *)&__unused; | ||
| 249 | |||
| 250 | return do_rt_sigreturn(regs); | ||
| 251 | } | ||
| 252 | |||
| 261 | /* | 253 | /* |
| 262 | * Set up a signal frame. | 254 | * Set up a signal frame. |
| 263 | */ | 255 | */ |
| 264 | static int | 256 | static int |
| 265 | setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | 257 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, |
| 266 | struct pt_regs *regs, unsigned long mask) | 258 | struct pt_regs *regs, unsigned long mask) |
| 267 | { | 259 | { |
| 268 | int tmp, err = 0; | 260 | int tmp, err = 0; |
| @@ -289,7 +281,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | |||
| 289 | err |= __put_user(regs->sp, &sc->sp_at_signal); | 281 | err |= __put_user(regs->sp, &sc->sp_at_signal); |
| 290 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | 282 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); |
| 291 | 283 | ||
| 292 | tmp = save_i387(fpstate); | 284 | tmp = save_i387_xstate(fpstate); |
| 293 | if (tmp < 0) | 285 | if (tmp < 0) |
| 294 | err = 1; | 286 | err = 1; |
| 295 | else | 287 | else |
| @@ -306,7 +298,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | |||
| 306 | * Determine which stack to use.. | 298 | * Determine which stack to use.. |
| 307 | */ | 299 | */ |
| 308 | static inline void __user * | 300 | static inline void __user * |
| 309 | get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) | 301 | get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, |
| 302 | void **fpstate) | ||
| 310 | { | 303 | { |
| 311 | unsigned long sp; | 304 | unsigned long sp; |
| 312 | 305 | ||
| @@ -332,6 +325,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) | |||
| 332 | sp = (unsigned long) ka->sa.sa_restorer; | 325 | sp = (unsigned long) ka->sa.sa_restorer; |
| 333 | } | 326 | } |
| 334 | 327 | ||
| 328 | if (used_math()) { | ||
| 329 | sp = sp - sig_xstate_size; | ||
| 330 | *fpstate = (struct _fpstate *) sp; | ||
| 331 | } | ||
| 332 | |||
| 335 | sp -= frame_size; | 333 | sp -= frame_size; |
| 336 | /* | 334 | /* |
| 337 | * Align the stack pointer according to the i386 ABI, | 335 | * Align the stack pointer according to the i386 ABI, |
| @@ -343,38 +341,29 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) | |||
| 343 | } | 341 | } |
| 344 | 342 | ||
| 345 | static int | 343 | static int |
| 346 | setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | 344 | __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, |
| 347 | struct pt_regs *regs) | 345 | struct pt_regs *regs) |
| 348 | { | 346 | { |
| 349 | struct sigframe __user *frame; | 347 | struct sigframe __user *frame; |
| 350 | void __user *restorer; | 348 | void __user *restorer; |
| 351 | int err = 0; | 349 | int err = 0; |
| 352 | int usig; | 350 | void __user *fpstate = NULL; |
| 353 | 351 | ||
| 354 | frame = get_sigframe(ka, regs, sizeof(*frame)); | 352 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); |
| 355 | 353 | ||
| 356 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 354 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
| 357 | goto give_sigsegv; | 355 | return -EFAULT; |
| 358 | 356 | ||
| 359 | usig = current_thread_info()->exec_domain | 357 | if (__put_user(sig, &frame->sig)) |
| 360 | && current_thread_info()->exec_domain->signal_invmap | 358 | return -EFAULT; |
| 361 | && sig < 32 | ||
| 362 | ? current_thread_info()->exec_domain->signal_invmap[sig] | ||
| 363 | : sig; | ||
| 364 | 359 | ||
| 365 | err = __put_user(usig, &frame->sig); | 360 | if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0])) |
| 366 | if (err) | 361 | return -EFAULT; |
| 367 | goto give_sigsegv; | ||
| 368 | |||
| 369 | err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); | ||
| 370 | if (err) | ||
| 371 | goto give_sigsegv; | ||
| 372 | 362 | ||
| 373 | if (_NSIG_WORDS > 1) { | 363 | if (_NSIG_WORDS > 1) { |
| 374 | err = __copy_to_user(&frame->extramask, &set->sig[1], | 364 | if (__copy_to_user(&frame->extramask, &set->sig[1], |
| 375 | sizeof(frame->extramask)); | 365 | sizeof(frame->extramask))) |
| 376 | if (err) | 366 | return -EFAULT; |
| 377 | goto give_sigsegv; | ||
| 378 | } | 367 | } |
| 379 | 368 | ||
| 380 | if (current->mm->context.vdso) | 369 | if (current->mm->context.vdso) |
| @@ -399,7 +388,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
| 399 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); | 388 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); |
| 400 | 389 | ||
| 401 | if (err) | 390 | if (err) |
| 402 | goto give_sigsegv; | 391 | return -EFAULT; |
| 403 | 392 | ||
| 404 | /* Set up registers for signal handler */ | 393 | /* Set up registers for signal handler */ |
| 405 | regs->sp = (unsigned long)frame; | 394 | regs->sp = (unsigned long)frame; |
| @@ -414,50 +403,43 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
| 414 | regs->cs = __USER_CS; | 403 | regs->cs = __USER_CS; |
| 415 | 404 | ||
| 416 | return 0; | 405 | return 0; |
| 417 | |||
| 418 | give_sigsegv: | ||
| 419 | force_sigsegv(sig, current); | ||
| 420 | return -EFAULT; | ||
| 421 | } | 406 | } |
| 422 | 407 | ||
| 423 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 408 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
| 424 | sigset_t *set, struct pt_regs *regs) | 409 | sigset_t *set, struct pt_regs *regs) |
| 425 | { | 410 | { |
| 426 | struct rt_sigframe __user *frame; | 411 | struct rt_sigframe __user *frame; |
| 427 | void __user *restorer; | 412 | void __user *restorer; |
| 428 | int err = 0; | 413 | int err = 0; |
| 429 | int usig; | 414 | void __user *fpstate = NULL; |
| 430 | 415 | ||
| 431 | frame = get_sigframe(ka, regs, sizeof(*frame)); | 416 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); |
| 432 | 417 | ||
| 433 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 418 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
| 434 | goto give_sigsegv; | 419 | return -EFAULT; |
| 435 | |||
| 436 | usig = current_thread_info()->exec_domain | ||
| 437 | && current_thread_info()->exec_domain->signal_invmap | ||
| 438 | && sig < 32 | ||
| 439 | ? current_thread_info()->exec_domain->signal_invmap[sig] | ||
| 440 | : sig; | ||
| 441 | 420 | ||
| 442 | err |= __put_user(usig, &frame->sig); | 421 | err |= __put_user(sig, &frame->sig); |
| 443 | err |= __put_user(&frame->info, &frame->pinfo); | 422 | err |= __put_user(&frame->info, &frame->pinfo); |
| 444 | err |= __put_user(&frame->uc, &frame->puc); | 423 | err |= __put_user(&frame->uc, &frame->puc); |
| 445 | err |= copy_siginfo_to_user(&frame->info, info); | 424 | err |= copy_siginfo_to_user(&frame->info, info); |
| 446 | if (err) | 425 | if (err) |
| 447 | goto give_sigsegv; | 426 | return -EFAULT; |
| 448 | 427 | ||
| 449 | /* Create the ucontext. */ | 428 | /* Create the ucontext. */ |
| 450 | err |= __put_user(0, &frame->uc.uc_flags); | 429 | if (cpu_has_xsave) |
| 430 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
| 431 | else | ||
| 432 | err |= __put_user(0, &frame->uc.uc_flags); | ||
| 451 | err |= __put_user(0, &frame->uc.uc_link); | 433 | err |= __put_user(0, &frame->uc.uc_link); |
| 452 | err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 434 | err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); |
| 453 | err |= __put_user(sas_ss_flags(regs->sp), | 435 | err |= __put_user(sas_ss_flags(regs->sp), |
| 454 | &frame->uc.uc_stack.ss_flags); | 436 | &frame->uc.uc_stack.ss_flags); |
| 455 | err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | 437 | err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); |
| 456 | err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, | 438 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, |
| 457 | regs, set->sig[0]); | 439 | regs, set->sig[0]); |
| 458 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | 440 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); |
| 459 | if (err) | 441 | if (err) |
| 460 | goto give_sigsegv; | 442 | return -EFAULT; |
| 461 | 443 | ||
| 462 | /* Set up to return from userspace. */ | 444 | /* Set up to return from userspace. */ |
| 463 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 445 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); |
| @@ -477,12 +459,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 477 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); | 459 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); |
| 478 | 460 | ||
| 479 | if (err) | 461 | if (err) |
| 480 | goto give_sigsegv; | 462 | return -EFAULT; |
| 481 | 463 | ||
| 482 | /* Set up registers for signal handler */ | 464 | /* Set up registers for signal handler */ |
| 483 | regs->sp = (unsigned long)frame; | 465 | regs->sp = (unsigned long)frame; |
| 484 | regs->ip = (unsigned long)ka->sa.sa_handler; | 466 | regs->ip = (unsigned long)ka->sa.sa_handler; |
| 485 | regs->ax = (unsigned long)usig; | 467 | regs->ax = (unsigned long)sig; |
| 486 | regs->dx = (unsigned long)&frame->info; | 468 | regs->dx = (unsigned long)&frame->info; |
| 487 | regs->cx = (unsigned long)&frame->uc; | 469 | regs->cx = (unsigned long)&frame->uc; |
| 488 | 470 | ||
| @@ -492,15 +474,48 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 492 | regs->cs = __USER_CS; | 474 | regs->cs = __USER_CS; |
| 493 | 475 | ||
| 494 | return 0; | 476 | return 0; |
| 495 | |||
| 496 | give_sigsegv: | ||
| 497 | force_sigsegv(sig, current); | ||
| 498 | return -EFAULT; | ||
| 499 | } | 477 | } |
| 500 | 478 | ||
| 501 | /* | 479 | /* |
| 502 | * OK, we're invoking a handler: | 480 | * OK, we're invoking a handler: |
| 503 | */ | 481 | */ |
| 482 | static int signr_convert(int sig) | ||
| 483 | { | ||
| 484 | struct thread_info *info = current_thread_info(); | ||
| 485 | |||
| 486 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) | ||
| 487 | return info->exec_domain->signal_invmap[sig]; | ||
| 488 | return sig; | ||
| 489 | } | ||
| 490 | |||
| 491 | #define is_ia32 1 | ||
| 492 | #define ia32_setup_frame __setup_frame | ||
| 493 | #define ia32_setup_rt_frame __setup_rt_frame | ||
| 494 | |||
| 495 | static int | ||
| 496 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 497 | sigset_t *set, struct pt_regs *regs) | ||
| 498 | { | ||
| 499 | int usig = signr_convert(sig); | ||
| 500 | int ret; | ||
| 501 | |||
| 502 | /* Set up the stack frame */ | ||
| 503 | if (is_ia32) { | ||
| 504 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
| 505 | ret = ia32_setup_rt_frame(usig, ka, info, set, regs); | ||
| 506 | else | ||
| 507 | ret = ia32_setup_frame(usig, ka, set, regs); | ||
| 508 | } else | ||
| 509 | ret = __setup_rt_frame(sig, ka, info, set, regs); | ||
| 510 | |||
| 511 | if (ret) { | ||
| 512 | force_sigsegv(sig, current); | ||
| 513 | return -EFAULT; | ||
| 514 | } | ||
| 515 | |||
| 516 | return ret; | ||
| 517 | } | ||
| 518 | |||
| 504 | static int | 519 | static int |
| 505 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 520 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
| 506 | sigset_t *oldset, struct pt_regs *regs) | 521 | sigset_t *oldset, struct pt_regs *regs) |
| @@ -508,9 +523,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 508 | int ret; | 523 | int ret; |
| 509 | 524 | ||
| 510 | /* Are we from a system call? */ | 525 | /* Are we from a system call? */ |
| 511 | if ((long)regs->orig_ax >= 0) { | 526 | if (syscall_get_nr(current, regs) >= 0) { |
| 512 | /* If so, check system call restarting.. */ | 527 | /* If so, check system call restarting.. */ |
| 513 | switch (regs->ax) { | 528 | switch (syscall_get_error(current, regs)) { |
| 514 | case -ERESTART_RESTARTBLOCK: | 529 | case -ERESTART_RESTARTBLOCK: |
| 515 | case -ERESTARTNOHAND: | 530 | case -ERESTARTNOHAND: |
| 516 | regs->ax = -EINTR; | 531 | regs->ax = -EINTR; |
| @@ -537,15 +552,20 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 537 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | 552 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) |
| 538 | regs->flags &= ~X86_EFLAGS_TF; | 553 | regs->flags &= ~X86_EFLAGS_TF; |
| 539 | 554 | ||
| 540 | /* Set up the stack frame */ | 555 | ret = setup_rt_frame(sig, ka, info, oldset, regs); |
| 541 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
| 542 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | ||
| 543 | else | ||
| 544 | ret = setup_frame(sig, ka, oldset, regs); | ||
| 545 | 556 | ||
| 546 | if (ret) | 557 | if (ret) |
| 547 | return ret; | 558 | return ret; |
| 548 | 559 | ||
| 560 | #ifdef CONFIG_X86_64 | ||
| 561 | /* | ||
| 562 | * This has nothing to do with segment registers, | ||
| 563 | * despite the name. This magic affects uaccess.h | ||
| 564 | * macros' behavior. Reset it to the normal setting. | ||
| 565 | */ | ||
| 566 | set_fs(USER_DS); | ||
| 567 | #endif | ||
| 568 | |||
| 549 | /* | 569 | /* |
| 550 | * Clear the direction flag as per the ABI for function entry. | 570 | * Clear the direction flag as per the ABI for function entry. |
| 551 | */ | 571 | */ |
| @@ -558,8 +578,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 558 | * handler too. | 578 | * handler too. |
| 559 | */ | 579 | */ |
| 560 | regs->flags &= ~X86_EFLAGS_TF; | 580 | regs->flags &= ~X86_EFLAGS_TF; |
| 561 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
| 562 | ptrace_notify(SIGTRAP); | ||
| 563 | 581 | ||
| 564 | spin_lock_irq(¤t->sighand->siglock); | 582 | spin_lock_irq(¤t->sighand->siglock); |
| 565 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | 583 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); |
| @@ -568,9 +586,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 568 | recalc_sigpending(); | 586 | recalc_sigpending(); |
| 569 | spin_unlock_irq(¤t->sighand->siglock); | 587 | spin_unlock_irq(¤t->sighand->siglock); |
| 570 | 588 | ||
| 589 | tracehook_signal_handler(sig, info, ka, regs, | ||
| 590 | test_thread_flag(TIF_SINGLESTEP)); | ||
| 591 | |||
| 571 | return 0; | 592 | return 0; |
| 572 | } | 593 | } |
| 573 | 594 | ||
| 595 | #define NR_restart_syscall __NR_restart_syscall | ||
| 574 | /* | 596 | /* |
| 575 | * Note that 'init' is a special process: it doesn't get signals it doesn't | 597 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
| 576 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 598 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
| @@ -623,9 +645,9 @@ static void do_signal(struct pt_regs *regs) | |||
| 623 | } | 645 | } |
| 624 | 646 | ||
| 625 | /* Did we come from a system call? */ | 647 | /* Did we come from a system call? */ |
| 626 | if ((long)regs->orig_ax >= 0) { | 648 | if (syscall_get_nr(current, regs) >= 0) { |
| 627 | /* Restart the system call - no handlers present */ | 649 | /* Restart the system call - no handlers present */ |
| 628 | switch (regs->ax) { | 650 | switch (syscall_get_error(current, regs)) { |
| 629 | case -ERESTARTNOHAND: | 651 | case -ERESTARTNOHAND: |
| 630 | case -ERESTARTSYS: | 652 | case -ERESTARTSYS: |
| 631 | case -ERESTARTNOINTR: | 653 | case -ERESTARTNOINTR: |
| @@ -634,7 +656,7 @@ static void do_signal(struct pt_regs *regs) | |||
| 634 | break; | 656 | break; |
| 635 | 657 | ||
| 636 | case -ERESTART_RESTARTBLOCK: | 658 | case -ERESTART_RESTARTBLOCK: |
| 637 | regs->ax = __NR_restart_syscall; | 659 | regs->ax = NR_restart_syscall; |
| 638 | regs->ip -= 2; | 660 | regs->ip -= 2; |
| 639 | break; | 661 | break; |
| 640 | } | 662 | } |
| @@ -657,18 +679,38 @@ static void do_signal(struct pt_regs *regs) | |||
| 657 | void | 679 | void |
| 658 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 680 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
| 659 | { | 681 | { |
| 660 | /* Pending single-step? */ | 682 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) |
| 661 | if (thread_info_flags & _TIF_SINGLESTEP) { | 683 | /* notify userspace of pending MCEs */ |
| 662 | regs->flags |= X86_EFLAGS_TF; | 684 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
| 663 | clear_thread_flag(TIF_SINGLESTEP); | 685 | mce_notify_user(); |
| 664 | } | 686 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ |
| 665 | 687 | ||
| 666 | /* deal with pending signal delivery */ | 688 | /* deal with pending signal delivery */ |
| 667 | if (thread_info_flags & _TIF_SIGPENDING) | 689 | if (thread_info_flags & _TIF_SIGPENDING) |
| 668 | do_signal(regs); | 690 | do_signal(regs); |
| 669 | 691 | ||
| 670 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | 692 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { |
| 671 | hrtick_resched(); | 693 | clear_thread_flag(TIF_NOTIFY_RESUME); |
| 694 | tracehook_notify_resume(regs); | ||
| 695 | } | ||
| 672 | 696 | ||
| 697 | #ifdef CONFIG_X86_32 | ||
| 673 | clear_thread_flag(TIF_IRET); | 698 | clear_thread_flag(TIF_IRET); |
| 699 | #endif /* CONFIG_X86_32 */ | ||
| 700 | } | ||
| 701 | |||
| 702 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | ||
| 703 | { | ||
| 704 | struct task_struct *me = current; | ||
| 705 | |||
| 706 | if (show_unhandled_signals && printk_ratelimit()) { | ||
| 707 | printk(KERN_INFO | ||
| 708 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | ||
| 709 | me->comm, me->pid, where, frame, | ||
| 710 | regs->ip, regs->sp, regs->orig_ax); | ||
| 711 | print_vma_addr(" in ", regs->ip); | ||
| 712 | printk(KERN_CONT "\n"); | ||
| 713 | } | ||
| 714 | |||
| 715 | force_sig(SIGSEGV, me); | ||
| 674 | } | 716 | } |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index e53b267662e7..a5c9627f4db9 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
| @@ -15,17 +15,21 @@ | |||
| 15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
| 16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
| 17 | #include <linux/ptrace.h> | 17 | #include <linux/ptrace.h> |
| 18 | #include <linux/tracehook.h> | ||
| 18 | #include <linux/unistd.h> | 19 | #include <linux/unistd.h> |
| 19 | #include <linux/stddef.h> | 20 | #include <linux/stddef.h> |
| 20 | #include <linux/personality.h> | 21 | #include <linux/personality.h> |
| 21 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
| 23 | #include <linux/uaccess.h> | ||
| 24 | |||
| 22 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
| 23 | #include <asm/ucontext.h> | 26 | #include <asm/ucontext.h> |
| 24 | #include <asm/uaccess.h> | ||
| 25 | #include <asm/i387.h> | 27 | #include <asm/i387.h> |
| 26 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
| 27 | #include <asm/ia32_unistd.h> | 29 | #include <asm/ia32_unistd.h> |
| 28 | #include <asm/mce.h> | 30 | #include <asm/mce.h> |
| 31 | #include <asm/syscall.h> | ||
| 32 | #include <asm/syscalls.h> | ||
| 29 | #include "sigframe.h" | 33 | #include "sigframe.h" |
| 30 | 34 | ||
| 31 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | 35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) |
| @@ -41,11 +45,6 @@ | |||
| 41 | # define FIX_EFLAGS __FIX_EFLAGS | 45 | # define FIX_EFLAGS __FIX_EFLAGS |
| 42 | #endif | 46 | #endif |
| 43 | 47 | ||
| 44 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
| 45 | sigset_t *set, struct pt_regs * regs); | ||
| 46 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
| 47 | sigset_t *set, struct pt_regs * regs); | ||
| 48 | |||
| 49 | asmlinkage long | 48 | asmlinkage long |
| 50 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 49 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
| 51 | struct pt_regs *regs) | 50 | struct pt_regs *regs) |
| @@ -53,6 +52,15 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | |||
| 53 | return do_sigaltstack(uss, uoss, regs->sp); | 52 | return do_sigaltstack(uss, uoss, regs->sp); |
| 54 | } | 53 | } |
| 55 | 54 | ||
| 55 | #define COPY(x) { \ | ||
| 56 | err |= __get_user(regs->x, &sc->x); \ | ||
| 57 | } | ||
| 58 | |||
| 59 | #define COPY_SEG_STRICT(seg) { \ | ||
| 60 | unsigned short tmp; \ | ||
| 61 | err |= __get_user(tmp, &sc->seg); \ | ||
| 62 | regs->seg = tmp | 3; \ | ||
| 63 | } | ||
| 56 | 64 | ||
| 57 | /* | 65 | /* |
| 58 | * Do a signal return; undo the signal stack. | 66 | * Do a signal return; undo the signal stack. |
| @@ -61,13 +69,13 @@ static int | |||
| 61 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 69 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
| 62 | unsigned long *pax) | 70 | unsigned long *pax) |
| 63 | { | 71 | { |
| 72 | void __user *buf; | ||
| 73 | unsigned int tmpflags; | ||
| 64 | unsigned int err = 0; | 74 | unsigned int err = 0; |
| 65 | 75 | ||
| 66 | /* Always make any pending restarted system calls return -EINTR */ | 76 | /* Always make any pending restarted system calls return -EINTR */ |
| 67 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 77 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
| 68 | 78 | ||
| 69 | #define COPY(x) err |= __get_user(regs->x, &sc->x) | ||
| 70 | |||
| 71 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 79 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
| 72 | COPY(dx); COPY(cx); COPY(ip); | 80 | COPY(dx); COPY(cx); COPY(ip); |
| 73 | COPY(r8); | 81 | COPY(r8); |
| @@ -82,48 +90,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
| 82 | /* Kernel saves and restores only the CS segment register on signals, | 90 | /* Kernel saves and restores only the CS segment register on signals, |
| 83 | * which is the bare minimum needed to allow mixed 32/64-bit code. | 91 | * which is the bare minimum needed to allow mixed 32/64-bit code. |
| 84 | * App's signal handler can save/restore other segments if needed. */ | 92 | * App's signal handler can save/restore other segments if needed. */ |
| 85 | { | 93 | COPY_SEG_STRICT(cs); |
| 86 | unsigned cs; | ||
| 87 | err |= __get_user(cs, &sc->cs); | ||
| 88 | regs->cs = cs | 3; /* Force into user mode */ | ||
| 89 | } | ||
| 90 | 94 | ||
| 91 | { | 95 | err |= __get_user(tmpflags, &sc->flags); |
| 92 | unsigned int tmpflags; | 96 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
| 93 | err |= __get_user(tmpflags, &sc->flags); | 97 | regs->orig_ax = -1; /* disable syscall checks */ |
| 94 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | ||
| 95 | regs->orig_ax = -1; /* disable syscall checks */ | ||
| 96 | } | ||
| 97 | 98 | ||
| 98 | { | 99 | err |= __get_user(buf, &sc->fpstate); |
| 99 | struct _fpstate __user * buf; | 100 | err |= restore_i387_xstate(buf); |
| 100 | err |= __get_user(buf, &sc->fpstate); | ||
| 101 | |||
| 102 | if (buf) { | ||
| 103 | if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) | ||
| 104 | goto badframe; | ||
| 105 | err |= restore_i387(buf); | ||
| 106 | } else { | ||
| 107 | struct task_struct *me = current; | ||
| 108 | if (used_math()) { | ||
| 109 | clear_fpu(me); | ||
| 110 | clear_used_math(); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | } | ||
| 114 | 101 | ||
| 115 | err |= __get_user(*pax, &sc->ax); | 102 | err |= __get_user(*pax, &sc->ax); |
| 116 | return err; | 103 | return err; |
| 117 | |||
| 118 | badframe: | ||
| 119 | return 1; | ||
| 120 | } | 104 | } |
| 121 | 105 | ||
| 122 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | 106 | static long do_rt_sigreturn(struct pt_regs *regs) |
| 123 | { | 107 | { |
| 124 | struct rt_sigframe __user *frame; | 108 | struct rt_sigframe __user *frame; |
| 125 | sigset_t set; | ||
| 126 | unsigned long ax; | 109 | unsigned long ax; |
| 110 | sigset_t set; | ||
| 127 | 111 | ||
| 128 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | 112 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); |
| 129 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 113 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
| @@ -136,7 +120,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
| 136 | current->blocked = set; | 120 | current->blocked = set; |
| 137 | recalc_sigpending(); | 121 | recalc_sigpending(); |
| 138 | spin_unlock_irq(¤t->sighand->siglock); | 122 | spin_unlock_irq(¤t->sighand->siglock); |
| 139 | 123 | ||
| 140 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 124 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
| 141 | goto badframe; | 125 | goto badframe; |
| 142 | 126 | ||
| @@ -146,16 +130,22 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
| 146 | return ax; | 130 | return ax; |
| 147 | 131 | ||
| 148 | badframe: | 132 | badframe: |
| 149 | signal_fault(regs,frame,"sigreturn"); | 133 | signal_fault(regs, frame, "rt_sigreturn"); |
| 150 | return 0; | 134 | return 0; |
| 151 | } | 135 | } |
| 136 | |||
| 137 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
| 138 | { | ||
| 139 | return do_rt_sigreturn(regs); | ||
| 140 | } | ||
| 152 | 141 | ||
| 153 | /* | 142 | /* |
| 154 | * Set up a signal frame. | 143 | * Set up a signal frame. |
| 155 | */ | 144 | */ |
| 156 | 145 | ||
| 157 | static inline int | 146 | static inline int |
| 158 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) | 147 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, |
| 148 | unsigned long mask, struct task_struct *me) | ||
| 159 | { | 149 | { |
| 160 | int err = 0; | 150 | int err = 0; |
| 161 | 151 | ||
| @@ -207,41 +197,40 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | |||
| 207 | sp = current->sas_ss_sp + current->sas_ss_size; | 197 | sp = current->sas_ss_sp + current->sas_ss_size; |
| 208 | } | 198 | } |
| 209 | 199 | ||
| 210 | return (void __user *)round_down(sp - size, 16); | 200 | return (void __user *)round_down(sp - size, 64); |
| 211 | } | 201 | } |
| 212 | 202 | ||
| 213 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 203 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
| 214 | sigset_t *set, struct pt_regs * regs) | 204 | sigset_t *set, struct pt_regs *regs) |
| 215 | { | 205 | { |
| 216 | struct rt_sigframe __user *frame; | 206 | struct rt_sigframe __user *frame; |
| 217 | struct _fpstate __user *fp = NULL; | 207 | void __user *fp = NULL; |
| 218 | int err = 0; | 208 | int err = 0; |
| 219 | struct task_struct *me = current; | 209 | struct task_struct *me = current; |
| 220 | 210 | ||
| 221 | if (used_math()) { | 211 | if (used_math()) { |
| 222 | fp = get_stack(ka, regs, sizeof(struct _fpstate)); | 212 | fp = get_stack(ka, regs, sig_xstate_size); |
| 223 | frame = (void __user *)round_down( | 213 | frame = (void __user *)round_down( |
| 224 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | 214 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; |
| 225 | 215 | ||
| 226 | if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) | 216 | if (save_i387_xstate(fp) < 0) |
| 227 | goto give_sigsegv; | 217 | return -EFAULT; |
| 228 | |||
| 229 | if (save_i387(fp) < 0) | ||
| 230 | err |= -1; | ||
| 231 | } else | 218 | } else |
| 232 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; | 219 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; |
| 233 | 220 | ||
| 234 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 221 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
| 235 | goto give_sigsegv; | 222 | return -EFAULT; |
| 236 | 223 | ||
| 237 | if (ka->sa.sa_flags & SA_SIGINFO) { | 224 | if (ka->sa.sa_flags & SA_SIGINFO) { |
| 238 | err |= copy_siginfo_to_user(&frame->info, info); | 225 | if (copy_siginfo_to_user(&frame->info, info)) |
| 239 | if (err) | 226 | return -EFAULT; |
| 240 | goto give_sigsegv; | ||
| 241 | } | 227 | } |
| 242 | 228 | ||
| 243 | /* Create the ucontext. */ | 229 | /* Create the ucontext. */ |
| 244 | err |= __put_user(0, &frame->uc.uc_flags); | 230 | if (cpu_has_xsave) |
| 231 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
| 232 | else | ||
| 233 | err |= __put_user(0, &frame->uc.uc_flags); | ||
| 245 | err |= __put_user(0, &frame->uc.uc_link); | 234 | err |= __put_user(0, &frame->uc.uc_link); |
| 246 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | 235 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); |
| 247 | err |= __put_user(sas_ss_flags(regs->sp), | 236 | err |= __put_user(sas_ss_flags(regs->sp), |
| @@ -249,9 +238,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 249 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | 238 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); |
| 250 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | 239 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); |
| 251 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); | 240 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); |
| 252 | if (sizeof(*set) == 16) { | 241 | if (sizeof(*set) == 16) { |
| 253 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | 242 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); |
| 254 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | 243 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); |
| 255 | } else | 244 | } else |
| 256 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | 245 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); |
| 257 | 246 | ||
| @@ -262,15 +251,15 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 262 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | 251 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); |
| 263 | } else { | 252 | } else { |
| 264 | /* could use a vstub here */ | 253 | /* could use a vstub here */ |
| 265 | goto give_sigsegv; | 254 | return -EFAULT; |
| 266 | } | 255 | } |
| 267 | 256 | ||
| 268 | if (err) | 257 | if (err) |
| 269 | goto give_sigsegv; | 258 | return -EFAULT; |
| 270 | 259 | ||
| 271 | /* Set up registers for signal handler */ | 260 | /* Set up registers for signal handler */ |
| 272 | regs->di = sig; | 261 | regs->di = sig; |
| 273 | /* In case the signal handler was declared without prototypes */ | 262 | /* In case the signal handler was declared without prototypes */ |
| 274 | regs->ax = 0; | 263 | regs->ax = 0; |
| 275 | 264 | ||
| 276 | /* This also works for non SA_SIGINFO handlers because they expect the | 265 | /* This also works for non SA_SIGINFO handlers because they expect the |
| @@ -286,44 +275,45 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
| 286 | regs->cs = __USER_CS; | 275 | regs->cs = __USER_CS; |
| 287 | 276 | ||
| 288 | return 0; | 277 | return 0; |
| 289 | |||
| 290 | give_sigsegv: | ||
| 291 | force_sigsegv(sig, current); | ||
| 292 | return -EFAULT; | ||
| 293 | } | 278 | } |
| 294 | 279 | ||
| 295 | /* | 280 | /* |
| 296 | * Return -1L or the syscall number that @regs is executing. | 281 | * OK, we're invoking a handler |
| 297 | */ | 282 | */ |
| 298 | static long current_syscall(struct pt_regs *regs) | 283 | static int signr_convert(int sig) |
| 299 | { | 284 | { |
| 300 | /* | 285 | return sig; |
| 301 | * We always sign-extend a -1 value being set here, | ||
| 302 | * so this is always either -1L or a syscall number. | ||
| 303 | */ | ||
| 304 | return regs->orig_ax; | ||
| 305 | } | 286 | } |
| 306 | 287 | ||
| 307 | /* | ||
| 308 | * Return a value that is -EFOO if the system call in @regs->orig_ax | ||
| 309 | * returned an error. This only works for @regs from @current. | ||
| 310 | */ | ||
| 311 | static long current_syscall_ret(struct pt_regs *regs) | ||
| 312 | { | ||
| 313 | #ifdef CONFIG_IA32_EMULATION | 288 | #ifdef CONFIG_IA32_EMULATION |
| 314 | if (test_thread_flag(TIF_IA32)) | 289 | #define is_ia32 test_thread_flag(TIF_IA32) |
| 315 | /* | 290 | #else |
| 316 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO | 291 | #define is_ia32 0 |
| 317 | * and will match correctly in comparisons. | ||
| 318 | */ | ||
| 319 | return (int) regs->ax; | ||
| 320 | #endif | 292 | #endif |
| 321 | return regs->ax; | ||
| 322 | } | ||
| 323 | 293 | ||
| 324 | /* | 294 | static int |
| 325 | * OK, we're invoking a handler | 295 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
| 326 | */ | 296 | sigset_t *set, struct pt_regs *regs) |
| 297 | { | ||
| 298 | int usig = signr_convert(sig); | ||
| 299 | int ret; | ||
| 300 | |||
| 301 | /* Set up the stack frame */ | ||
| 302 | if (is_ia32) { | ||
| 303 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
| 304 | ret = ia32_setup_rt_frame(usig, ka, info, set, regs); | ||
| 305 | else | ||
| 306 | ret = ia32_setup_frame(usig, ka, set, regs); | ||
| 307 | } else | ||
| 308 | ret = __setup_rt_frame(sig, ka, info, set, regs); | ||
| 309 | |||
| 310 | if (ret) { | ||
| 311 | force_sigsegv(sig, current); | ||
| 312 | return -EFAULT; | ||
| 313 | } | ||
| 314 | |||
| 315 | return ret; | ||
| 316 | } | ||
| 327 | 317 | ||
| 328 | static int | 318 | static int |
| 329 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 319 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
| @@ -332,9 +322,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 332 | int ret; | 322 | int ret; |
| 333 | 323 | ||
| 334 | /* Are we from a system call? */ | 324 | /* Are we from a system call? */ |
| 335 | if (current_syscall(regs) >= 0) { | 325 | if (syscall_get_nr(current, regs) >= 0) { |
| 336 | /* If so, check system call restarting.. */ | 326 | /* If so, check system call restarting.. */ |
| 337 | switch (current_syscall_ret(regs)) { | 327 | switch (syscall_get_error(current, regs)) { |
| 338 | case -ERESTART_RESTARTBLOCK: | 328 | case -ERESTART_RESTARTBLOCK: |
| 339 | case -ERESTARTNOHAND: | 329 | case -ERESTARTNOHAND: |
| 340 | regs->ax = -EINTR; | 330 | regs->ax = -EINTR; |
| @@ -361,50 +351,48 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
| 361 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | 351 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) |
| 362 | regs->flags &= ~X86_EFLAGS_TF; | 352 | regs->flags &= ~X86_EFLAGS_TF; |
| 363 | 353 | ||
| 364 | #ifdef CONFIG_IA32_EMULATION | ||
| 365 | if (test_thread_flag(TIF_IA32)) { | ||
| 366 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
| 367 | ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); | ||
| 368 | else | ||
| 369 | ret = ia32_setup_frame(sig, ka, oldset, regs); | ||
| 370 | } else | ||
| 371 | #endif | ||
| 372 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | 354 | ret = setup_rt_frame(sig, ka, info, oldset, regs); |
| 373 | 355 | ||
| 374 | if (ret == 0) { | 356 | if (ret) |
| 375 | /* | 357 | return ret; |
| 376 | * This has nothing to do with segment registers, | ||
| 377 | * despite the name. This magic affects uaccess.h | ||
| 378 | * macros' behavior. Reset it to the normal setting. | ||
| 379 | */ | ||
| 380 | set_fs(USER_DS); | ||
| 381 | 358 | ||
| 382 | /* | 359 | #ifdef CONFIG_X86_64 |
| 383 | * Clear the direction flag as per the ABI for function entry. | 360 | /* |
| 384 | */ | 361 | * This has nothing to do with segment registers, |
| 385 | regs->flags &= ~X86_EFLAGS_DF; | 362 | * despite the name. This magic affects uaccess.h |
| 363 | * macros' behavior. Reset it to the normal setting. | ||
| 364 | */ | ||
| 365 | set_fs(USER_DS); | ||
| 366 | #endif | ||
| 386 | 367 | ||
| 387 | /* | 368 | /* |
| 388 | * Clear TF when entering the signal handler, but | 369 | * Clear the direction flag as per the ABI for function entry. |
| 389 | * notify any tracer that was single-stepping it. | 370 | */ |
| 390 | * The tracer may want to single-step inside the | 371 | regs->flags &= ~X86_EFLAGS_DF; |
| 391 | * handler too. | ||
| 392 | */ | ||
| 393 | regs->flags &= ~X86_EFLAGS_TF; | ||
| 394 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
| 395 | ptrace_notify(SIGTRAP); | ||
| 396 | |||
| 397 | spin_lock_irq(¤t->sighand->siglock); | ||
| 398 | sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); | ||
| 399 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
| 400 | sigaddset(¤t->blocked,sig); | ||
| 401 | recalc_sigpending(); | ||
| 402 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 403 | } | ||
| 404 | 372 | ||
| 405 | return ret; | 373 | /* |
| 374 | * Clear TF when entering the signal handler, but | ||
| 375 | * notify any tracer that was single-stepping it. | ||
| 376 | * The tracer may want to single-step inside the | ||
| 377 | * handler too. | ||
| 378 | */ | ||
| 379 | regs->flags &= ~X86_EFLAGS_TF; | ||
| 380 | |||
| 381 | spin_lock_irq(¤t->sighand->siglock); | ||
| 382 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
| 383 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
| 384 | sigaddset(¤t->blocked, sig); | ||
| 385 | recalc_sigpending(); | ||
| 386 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 387 | |||
| 388 | tracehook_signal_handler(sig, info, ka, regs, | ||
| 389 | test_thread_flag(TIF_SINGLESTEP)); | ||
| 390 | |||
| 391 | return 0; | ||
| 406 | } | 392 | } |
| 407 | 393 | ||
| 394 | #define NR_restart_syscall \ | ||
| 395 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
| 408 | /* | 396 | /* |
| 409 | * Note that 'init' is a special process: it doesn't get signals it doesn't | 397 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
| 410 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 398 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
| @@ -434,7 +422,8 @@ static void do_signal(struct pt_regs *regs) | |||
| 434 | 422 | ||
| 435 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 423 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
| 436 | if (signr > 0) { | 424 | if (signr > 0) { |
| 437 | /* Re-enable any watchpoints before delivering the | 425 | /* |
| 426 | * Re-enable any watchpoints before delivering the | ||
| 438 | * signal to user space. The processor register will | 427 | * signal to user space. The processor register will |
| 439 | * have been cleared if the watchpoint triggered | 428 | * have been cleared if the watchpoint triggered |
| 440 | * inside the kernel. | 429 | * inside the kernel. |
| @@ -442,7 +431,7 @@ static void do_signal(struct pt_regs *regs) | |||
| 442 | if (current->thread.debugreg7) | 431 | if (current->thread.debugreg7) |
| 443 | set_debugreg(current->thread.debugreg7, 7); | 432 | set_debugreg(current->thread.debugreg7, 7); |
| 444 | 433 | ||
| 445 | /* Whee! Actually deliver the signal. */ | 434 | /* Whee! Actually deliver the signal. */ |
| 446 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 435 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
| 447 | /* | 436 | /* |
| 448 | * A signal was successfully delivered; the saved | 437 | * A signal was successfully delivered; the saved |
| @@ -456,19 +445,18 @@ static void do_signal(struct pt_regs *regs) | |||
| 456 | } | 445 | } |
| 457 | 446 | ||
| 458 | /* Did we come from a system call? */ | 447 | /* Did we come from a system call? */ |
| 459 | if (current_syscall(regs) >= 0) { | 448 | if (syscall_get_nr(current, regs) >= 0) { |
| 460 | /* Restart the system call - no handlers present */ | 449 | /* Restart the system call - no handlers present */ |
| 461 | switch (current_syscall_ret(regs)) { | 450 | switch (syscall_get_error(current, regs)) { |
| 462 | case -ERESTARTNOHAND: | 451 | case -ERESTARTNOHAND: |
| 463 | case -ERESTARTSYS: | 452 | case -ERESTARTSYS: |
| 464 | case -ERESTARTNOINTR: | 453 | case -ERESTARTNOINTR: |
| 465 | regs->ax = regs->orig_ax; | 454 | regs->ax = regs->orig_ax; |
| 466 | regs->ip -= 2; | 455 | regs->ip -= 2; |
| 467 | break; | 456 | break; |
| 457 | |||
| 468 | case -ERESTART_RESTARTBLOCK: | 458 | case -ERESTART_RESTARTBLOCK: |
| 469 | regs->ax = test_thread_flag(TIF_IA32) ? | 459 | regs->ax = NR_restart_syscall; |
| 470 | __NR_ia32_restart_syscall : | ||
| 471 | __NR_restart_syscall; | ||
| 472 | regs->ip -= 2; | 460 | regs->ip -= 2; |
| 473 | break; | 461 | break; |
| 474 | } | 462 | } |
| @@ -484,38 +472,45 @@ static void do_signal(struct pt_regs *regs) | |||
| 484 | } | 472 | } |
| 485 | } | 473 | } |
| 486 | 474 | ||
| 487 | void do_notify_resume(struct pt_regs *regs, void *unused, | 475 | /* |
| 488 | __u32 thread_info_flags) | 476 | * notification of userspace execution resumption |
| 477 | * - triggered by the TIF_WORK_MASK flags | ||
| 478 | */ | ||
| 479 | void | ||
| 480 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | ||
| 489 | { | 481 | { |
| 490 | /* Pending single-step? */ | 482 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) |
| 491 | if (thread_info_flags & _TIF_SINGLESTEP) { | ||
| 492 | regs->flags |= X86_EFLAGS_TF; | ||
| 493 | clear_thread_flag(TIF_SINGLESTEP); | ||
| 494 | } | ||
| 495 | |||
| 496 | #ifdef CONFIG_X86_MCE | ||
| 497 | /* notify userspace of pending MCEs */ | 483 | /* notify userspace of pending MCEs */ |
| 498 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 484 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
| 499 | mce_notify_user(); | 485 | mce_notify_user(); |
| 500 | #endif /* CONFIG_X86_MCE */ | 486 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ |
| 501 | 487 | ||
| 502 | /* deal with pending signal delivery */ | 488 | /* deal with pending signal delivery */ |
| 503 | if (thread_info_flags & _TIF_SIGPENDING) | 489 | if (thread_info_flags & _TIF_SIGPENDING) |
| 504 | do_signal(regs); | 490 | do_signal(regs); |
| 505 | 491 | ||
| 506 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | 492 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { |
| 507 | hrtick_resched(); | 493 | clear_thread_flag(TIF_NOTIFY_RESUME); |
| 494 | tracehook_notify_resume(regs); | ||
| 495 | } | ||
| 496 | |||
| 497 | #ifdef CONFIG_X86_32 | ||
| 498 | clear_thread_flag(TIF_IRET); | ||
| 499 | #endif /* CONFIG_X86_32 */ | ||
| 508 | } | 500 | } |
| 509 | 501 | ||
| 510 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 502 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
| 511 | { | 503 | { |
| 512 | struct task_struct *me = current; | 504 | struct task_struct *me = current; |
| 505 | |||
| 513 | if (show_unhandled_signals && printk_ratelimit()) { | 506 | if (show_unhandled_signals && printk_ratelimit()) { |
| 514 | printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | 507 | printk(KERN_INFO |
| 515 | me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); | 508 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
| 509 | me->comm, me->pid, where, frame, | ||
| 510 | regs->ip, regs->sp, regs->orig_ax); | ||
| 516 | print_vma_addr(" in ", regs->ip); | 511 | print_vma_addr(" in ", regs->ip); |
| 517 | printk("\n"); | 512 | printk(KERN_CONT "\n"); |
| 518 | } | 513 | } |
| 519 | 514 | ||
| 520 | force_sig(SIGSEGV, me); | 515 | force_sig(SIGSEGV, me); |
| 521 | } | 516 | } |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 361b7a4c640c..18f9b19f5f8f 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
| @@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
| 214 | struct smp_ops smp_ops = { | 214 | struct smp_ops smp_ops = { |
| 215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 215 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
| 216 | .smp_prepare_cpus = native_smp_prepare_cpus, | 216 | .smp_prepare_cpus = native_smp_prepare_cpus, |
| 217 | .cpu_up = native_cpu_up, | ||
| 218 | .smp_cpus_done = native_smp_cpus_done, | 217 | .smp_cpus_done = native_smp_cpus_done, |
| 219 | 218 | ||
| 220 | .smp_send_stop = native_smp_send_stop, | 219 | .smp_send_stop = native_smp_send_stop, |
| 221 | .smp_send_reschedule = native_smp_send_reschedule, | 220 | .smp_send_reschedule = native_smp_send_reschedule, |
| 222 | 221 | ||
| 222 | .cpu_up = native_cpu_up, | ||
| 223 | .cpu_die = native_cpu_die, | ||
| 224 | .cpu_disable = native_cpu_disable, | ||
| 225 | .play_dead = native_play_dead, | ||
| 226 | |||
| 223 | .send_call_func_ipi = native_send_call_func_ipi, | 227 | .send_call_func_ipi = native_send_call_func_ipi, |
| 224 | .send_call_func_single_ipi = native_send_call_func_single_ipi, | 228 | .send_call_func_single_ipi = native_send_call_func_single_ipi, |
| 225 | }; | 229 | }; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 687376ab07e8..7b1093397319 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -52,6 +52,7 @@ | |||
| 52 | #include <asm/desc.h> | 52 | #include <asm/desc.h> |
| 53 | #include <asm/nmi.h> | 53 | #include <asm/nmi.h> |
| 54 | #include <asm/irq.h> | 54 | #include <asm/irq.h> |
| 55 | #include <asm/idle.h> | ||
| 55 | #include <asm/smp.h> | 56 | #include <asm/smp.h> |
| 56 | #include <asm/trampoline.h> | 57 | #include <asm/trampoline.h> |
| 57 | #include <asm/cpu.h> | 58 | #include <asm/cpu.h> |
| @@ -88,7 +89,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); | |||
| 88 | #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) | 89 | #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) |
| 89 | #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) | 90 | #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) |
| 90 | #else | 91 | #else |
| 91 | struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; | 92 | static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; |
| 92 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) | 93 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) |
| 93 | #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) | 94 | #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) |
| 94 | #endif | 95 | #endif |
| @@ -123,13 +124,12 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
| 123 | 124 | ||
| 124 | static atomic_t init_deasserted; | 125 | static atomic_t init_deasserted; |
| 125 | 126 | ||
| 126 | static int boot_cpu_logical_apicid; | ||
| 127 | 127 | ||
| 128 | /* representing cpus for which sibling maps can be computed */ | 128 | /* representing cpus for which sibling maps can be computed */ |
| 129 | static cpumask_t cpu_sibling_setup_map; | 129 | static cpumask_t cpu_sibling_setup_map; |
| 130 | 130 | ||
| 131 | /* Set if we find a B stepping CPU */ | 131 | /* Set if we find a B stepping CPU */ |
| 132 | int __cpuinitdata smp_b_stepping; | 132 | static int __cpuinitdata smp_b_stepping; |
| 133 | 133 | ||
| 134 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) | 134 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) |
| 135 | 135 | ||
| @@ -165,6 +165,8 @@ static void unmap_cpu_to_node(int cpu) | |||
| 165 | #endif | 165 | #endif |
| 166 | 166 | ||
| 167 | #ifdef CONFIG_X86_32 | 167 | #ifdef CONFIG_X86_32 |
| 168 | static int boot_cpu_logical_apicid; | ||
| 169 | |||
| 168 | u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = | 170 | u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = |
| 169 | { [0 ... NR_CPUS-1] = BAD_APICID }; | 171 | { [0 ... NR_CPUS-1] = BAD_APICID }; |
| 170 | 172 | ||
| @@ -210,13 +212,13 @@ static void __cpuinit smp_callin(void) | |||
| 210 | /* | 212 | /* |
| 211 | * (This works even if the APIC is not enabled.) | 213 | * (This works even if the APIC is not enabled.) |
| 212 | */ | 214 | */ |
| 213 | phys_id = GET_APIC_ID(read_apic_id()); | 215 | phys_id = read_apic_id(); |
| 214 | cpuid = smp_processor_id(); | 216 | cpuid = smp_processor_id(); |
| 215 | if (cpu_isset(cpuid, cpu_callin_map)) { | 217 | if (cpu_isset(cpuid, cpu_callin_map)) { |
| 216 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, | 218 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, |
| 217 | phys_id, cpuid); | 219 | phys_id, cpuid); |
| 218 | } | 220 | } |
| 219 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); | 221 | pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); |
| 220 | 222 | ||
| 221 | /* | 223 | /* |
| 222 | * STARTUP IPIs are fragile beasts as they might sometimes | 224 | * STARTUP IPIs are fragile beasts as they might sometimes |
| @@ -251,12 +253,13 @@ static void __cpuinit smp_callin(void) | |||
| 251 | * boards) | 253 | * boards) |
| 252 | */ | 254 | */ |
| 253 | 255 | ||
| 254 | Dprintk("CALLIN, before setup_local_APIC().\n"); | 256 | pr_debug("CALLIN, before setup_local_APIC().\n"); |
| 255 | smp_callin_clear_local_apic(); | 257 | smp_callin_clear_local_apic(); |
| 256 | setup_local_APIC(); | 258 | setup_local_APIC(); |
| 257 | end_local_APIC_setup(); | 259 | end_local_APIC_setup(); |
| 258 | map_cpu_to_logical_apicid(); | 260 | map_cpu_to_logical_apicid(); |
| 259 | 261 | ||
| 262 | notify_cpu_starting(cpuid); | ||
| 260 | /* | 263 | /* |
| 261 | * Get our bogomips. | 264 | * Get our bogomips. |
| 262 | * | 265 | * |
| @@ -266,7 +269,7 @@ static void __cpuinit smp_callin(void) | |||
| 266 | local_irq_enable(); | 269 | local_irq_enable(); |
| 267 | calibrate_delay(); | 270 | calibrate_delay(); |
| 268 | local_irq_disable(); | 271 | local_irq_disable(); |
| 269 | Dprintk("Stack at about %p\n", &cpuid); | 272 | pr_debug("Stack at about %p\n", &cpuid); |
| 270 | 273 | ||
| 271 | /* | 274 | /* |
| 272 | * Save our processor parameters | 275 | * Save our processor parameters |
| @@ -279,6 +282,8 @@ static void __cpuinit smp_callin(void) | |||
| 279 | cpu_set(cpuid, cpu_callin_map); | 282 | cpu_set(cpuid, cpu_callin_map); |
| 280 | } | 283 | } |
| 281 | 284 | ||
| 285 | static int __cpuinitdata unsafe_smp; | ||
| 286 | |||
| 282 | /* | 287 | /* |
| 283 | * Activate a secondary processor. | 288 | * Activate a secondary processor. |
| 284 | */ | 289 | */ |
| @@ -326,15 +331,22 @@ static void __cpuinit start_secondary(void *unused) | |||
| 326 | * for which cpus receive the IPI. Holding this | 331 | * for which cpus receive the IPI. Holding this |
| 327 | * lock helps us to not include this cpu in a currently in progress | 332 | * lock helps us to not include this cpu in a currently in progress |
| 328 | * smp_call_function(). | 333 | * smp_call_function(). |
| 334 | * | ||
| 335 | * We need to hold vector_lock so there the set of online cpus | ||
| 336 | * does not change while we are assigning vectors to cpus. Holding | ||
| 337 | * this lock ensures we don't half assign or remove an irq from a cpu. | ||
| 329 | */ | 338 | */ |
| 330 | ipi_call_lock_irq(); | 339 | ipi_call_lock(); |
| 331 | #ifdef CONFIG_X86_IO_APIC | 340 | lock_vector_lock(); |
| 332 | setup_vector_irq(smp_processor_id()); | 341 | __setup_vector_irq(smp_processor_id()); |
| 333 | #endif | ||
| 334 | cpu_set(smp_processor_id(), cpu_online_map); | 342 | cpu_set(smp_processor_id(), cpu_online_map); |
| 335 | ipi_call_unlock_irq(); | 343 | unlock_vector_lock(); |
| 344 | ipi_call_unlock(); | ||
| 336 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 345 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
| 337 | 346 | ||
| 347 | /* enable local interrupts */ | ||
| 348 | local_irq_enable(); | ||
| 349 | |||
| 338 | setup_secondary_clock(); | 350 | setup_secondary_clock(); |
| 339 | 351 | ||
| 340 | wmb(); | 352 | wmb(); |
| @@ -387,7 +399,7 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) | |||
| 387 | goto valid_k7; | 399 | goto valid_k7; |
| 388 | 400 | ||
| 389 | /* If we get here, not a certified SMP capable AMD system. */ | 401 | /* If we get here, not a certified SMP capable AMD system. */ |
| 390 | add_taint(TAINT_UNSAFE_SMP); | 402 | unsafe_smp = 1; |
| 391 | } | 403 | } |
| 392 | 404 | ||
| 393 | valid_k7: | 405 | valid_k7: |
| @@ -404,12 +416,10 @@ static void __cpuinit smp_checks(void) | |||
| 404 | * Don't taint if we are running SMP kernel on a single non-MP | 416 | * Don't taint if we are running SMP kernel on a single non-MP |
| 405 | * approved Athlon | 417 | * approved Athlon |
| 406 | */ | 418 | */ |
| 407 | if (tainted & TAINT_UNSAFE_SMP) { | 419 | if (unsafe_smp && num_online_cpus() > 1) { |
| 408 | if (num_online_cpus()) | 420 | printk(KERN_INFO "WARNING: This combination of AMD" |
| 409 | printk(KERN_INFO "WARNING: This combination of AMD" | 421 | "processors is not suitable for SMP.\n"); |
| 410 | "processors is not suitable for SMP.\n"); | 422 | add_taint(TAINT_UNSAFE_SMP); |
| 411 | else | ||
| 412 | tainted &= ~TAINT_UNSAFE_SMP; | ||
| 413 | } | 423 | } |
| 414 | } | 424 | } |
| 415 | 425 | ||
| @@ -438,7 +448,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 438 | cpu_set(cpu, cpu_sibling_setup_map); | 448 | cpu_set(cpu, cpu_sibling_setup_map); |
| 439 | 449 | ||
| 440 | if (smp_num_siblings > 1) { | 450 | if (smp_num_siblings > 1) { |
| 441 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 451 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
| 442 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && | 452 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && |
| 443 | c->cpu_core_id == cpu_data(i).cpu_core_id) { | 453 | c->cpu_core_id == cpu_data(i).cpu_core_id) { |
| 444 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); | 454 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); |
| @@ -461,7 +471,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 461 | return; | 471 | return; |
| 462 | } | 472 | } |
| 463 | 473 | ||
| 464 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 474 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
| 465 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 475 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
| 466 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 476 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
| 467 | cpu_set(i, c->llc_shared_map); | 477 | cpu_set(i, c->llc_shared_map); |
| @@ -513,7 +523,7 @@ static void impress_friends(void) | |||
| 513 | /* | 523 | /* |
| 514 | * Allow the user to impress friends. | 524 | * Allow the user to impress friends. |
| 515 | */ | 525 | */ |
| 516 | Dprintk("Before bogomips.\n"); | 526 | pr_debug("Before bogomips.\n"); |
| 517 | for_each_possible_cpu(cpu) | 527 | for_each_possible_cpu(cpu) |
| 518 | if (cpu_isset(cpu, cpu_callout_map)) | 528 | if (cpu_isset(cpu, cpu_callout_map)) |
| 519 | bogosum += cpu_data(cpu).loops_per_jiffy; | 529 | bogosum += cpu_data(cpu).loops_per_jiffy; |
| @@ -523,7 +533,7 @@ static void impress_friends(void) | |||
| 523 | bogosum/(500000/HZ), | 533 | bogosum/(500000/HZ), |
| 524 | (bogosum/(5000/HZ))%100); | 534 | (bogosum/(5000/HZ))%100); |
| 525 | 535 | ||
| 526 | Dprintk("Before bogocount - setting activated=1.\n"); | 536 | pr_debug("Before bogocount - setting activated=1.\n"); |
| 527 | } | 537 | } |
| 528 | 538 | ||
| 529 | static inline void __inquire_remote_apic(int apicid) | 539 | static inline void __inquire_remote_apic(int apicid) |
| @@ -533,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid) | |||
| 533 | int timeout; | 543 | int timeout; |
| 534 | u32 status; | 544 | u32 status; |
| 535 | 545 | ||
| 536 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); | 546 | printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); |
| 537 | 547 | ||
| 538 | for (i = 0; i < ARRAY_SIZE(regs); i++) { | 548 | for (i = 0; i < ARRAY_SIZE(regs); i++) { |
| 539 | printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); | 549 | printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); |
| 540 | 550 | ||
| 541 | /* | 551 | /* |
| 542 | * Wait for idle. | 552 | * Wait for idle. |
| @@ -546,8 +556,7 @@ static inline void __inquire_remote_apic(int apicid) | |||
| 546 | printk(KERN_CONT | 556 | printk(KERN_CONT |
| 547 | "a previous APIC delivery may have failed\n"); | 557 | "a previous APIC delivery may have failed\n"); |
| 548 | 558 | ||
| 549 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 559 | apic_icr_write(APIC_DM_REMRD | regs[i], apicid); |
| 550 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); | ||
| 551 | 560 | ||
| 552 | timeout = 0; | 561 | timeout = 0; |
| 553 | do { | 562 | do { |
| @@ -579,29 +588,24 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
| 579 | int maxlvt; | 588 | int maxlvt; |
| 580 | 589 | ||
| 581 | /* Target chip */ | 590 | /* Target chip */ |
| 582 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); | ||
| 583 | |||
| 584 | /* Boot on the stack */ | 591 | /* Boot on the stack */ |
| 585 | /* Kick the second */ | 592 | /* Kick the second */ |
| 586 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); | 593 | apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); |
| 587 | 594 | ||
| 588 | Dprintk("Waiting for send to finish...\n"); | 595 | pr_debug("Waiting for send to finish...\n"); |
| 589 | send_status = safe_apic_wait_icr_idle(); | 596 | send_status = safe_apic_wait_icr_idle(); |
| 590 | 597 | ||
| 591 | /* | 598 | /* |
| 592 | * Give the other CPU some time to accept the IPI. | 599 | * Give the other CPU some time to accept the IPI. |
| 593 | */ | 600 | */ |
| 594 | udelay(200); | 601 | udelay(200); |
| 595 | /* | 602 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { |
| 596 | * Due to the Pentium erratum 3AP. | 603 | maxlvt = lapic_get_maxlvt(); |
| 597 | */ | 604 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
| 598 | maxlvt = lapic_get_maxlvt(); | 605 | apic_write(APIC_ESR, 0); |
| 599 | if (maxlvt > 3) { | 606 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
| 600 | apic_read_around(APIC_SPIV); | ||
| 601 | apic_write(APIC_ESR, 0); | ||
| 602 | } | 607 | } |
| 603 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 608 | pr_debug("NMI sent.\n"); |
| 604 | Dprintk("NMI sent.\n"); | ||
| 605 | 609 | ||
| 606 | if (send_status) | 610 | if (send_status) |
| 607 | printk(KERN_ERR "APIC never delivered???\n"); | 611 | printk(KERN_ERR "APIC never delivered???\n"); |
| @@ -625,42 +629,40 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
| 625 | return send_status; | 629 | return send_status; |
| 626 | } | 630 | } |
| 627 | 631 | ||
| 632 | maxlvt = lapic_get_maxlvt(); | ||
| 633 | |||
| 628 | /* | 634 | /* |
| 629 | * Be paranoid about clearing APIC errors. | 635 | * Be paranoid about clearing APIC errors. |
| 630 | */ | 636 | */ |
| 631 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | 637 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { |
| 632 | apic_read_around(APIC_SPIV); | 638 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
| 633 | apic_write(APIC_ESR, 0); | 639 | apic_write(APIC_ESR, 0); |
| 634 | apic_read(APIC_ESR); | 640 | apic_read(APIC_ESR); |
| 635 | } | 641 | } |
| 636 | 642 | ||
| 637 | Dprintk("Asserting INIT.\n"); | 643 | pr_debug("Asserting INIT.\n"); |
| 638 | 644 | ||
| 639 | /* | 645 | /* |
| 640 | * Turn INIT on target chip | 646 | * Turn INIT on target chip |
| 641 | */ | 647 | */ |
| 642 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
| 643 | |||
| 644 | /* | 648 | /* |
| 645 | * Send IPI | 649 | * Send IPI |
| 646 | */ | 650 | */ |
| 647 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | 651 | apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, |
| 648 | | APIC_DM_INIT); | 652 | phys_apicid); |
| 649 | 653 | ||
| 650 | Dprintk("Waiting for send to finish...\n"); | 654 | pr_debug("Waiting for send to finish...\n"); |
| 651 | send_status = safe_apic_wait_icr_idle(); | 655 | send_status = safe_apic_wait_icr_idle(); |
| 652 | 656 | ||
| 653 | mdelay(10); | 657 | mdelay(10); |
| 654 | 658 | ||
| 655 | Dprintk("Deasserting INIT.\n"); | 659 | pr_debug("Deasserting INIT.\n"); |
| 656 | 660 | ||
| 657 | /* Target chip */ | 661 | /* Target chip */ |
| 658 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
| 659 | |||
| 660 | /* Send IPI */ | 662 | /* Send IPI */ |
| 661 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | 663 | apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); |
| 662 | 664 | ||
| 663 | Dprintk("Waiting for send to finish...\n"); | 665 | pr_debug("Waiting for send to finish...\n"); |
| 664 | send_status = safe_apic_wait_icr_idle(); | 666 | send_status = safe_apic_wait_icr_idle(); |
| 665 | 667 | ||
| 666 | mb(); | 668 | mb(); |
| @@ -687,55 +689,46 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
| 687 | /* | 689 | /* |
| 688 | * Run STARTUP IPI loop. | 690 | * Run STARTUP IPI loop. |
| 689 | */ | 691 | */ |
| 690 | Dprintk("#startup loops: %d.\n", num_starts); | 692 | pr_debug("#startup loops: %d.\n", num_starts); |
| 691 | |||
| 692 | maxlvt = lapic_get_maxlvt(); | ||
| 693 | 693 | ||
| 694 | for (j = 1; j <= num_starts; j++) { | 694 | for (j = 1; j <= num_starts; j++) { |
| 695 | Dprintk("Sending STARTUP #%d.\n", j); | 695 | pr_debug("Sending STARTUP #%d.\n", j); |
| 696 | apic_read_around(APIC_SPIV); | 696 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
| 697 | apic_write(APIC_ESR, 0); | 697 | apic_write(APIC_ESR, 0); |
| 698 | apic_read(APIC_ESR); | 698 | apic_read(APIC_ESR); |
| 699 | Dprintk("After apic_write.\n"); | 699 | pr_debug("After apic_write.\n"); |
| 700 | 700 | ||
| 701 | /* | 701 | /* |
| 702 | * STARTUP IPI | 702 | * STARTUP IPI |
| 703 | */ | 703 | */ |
| 704 | 704 | ||
| 705 | /* Target chip */ | 705 | /* Target chip */ |
| 706 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
| 707 | |||
| 708 | /* Boot on the stack */ | 706 | /* Boot on the stack */ |
| 709 | /* Kick the second */ | 707 | /* Kick the second */ |
| 710 | apic_write_around(APIC_ICR, APIC_DM_STARTUP | 708 | apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), |
| 711 | | (start_eip >> 12)); | 709 | phys_apicid); |
| 712 | 710 | ||
| 713 | /* | 711 | /* |
| 714 | * Give the other CPU some time to accept the IPI. | 712 | * Give the other CPU some time to accept the IPI. |
| 715 | */ | 713 | */ |
| 716 | udelay(300); | 714 | udelay(300); |
| 717 | 715 | ||
| 718 | Dprintk("Startup point 1.\n"); | 716 | pr_debug("Startup point 1.\n"); |
| 719 | 717 | ||
| 720 | Dprintk("Waiting for send to finish...\n"); | 718 | pr_debug("Waiting for send to finish...\n"); |
| 721 | send_status = safe_apic_wait_icr_idle(); | 719 | send_status = safe_apic_wait_icr_idle(); |
| 722 | 720 | ||
| 723 | /* | 721 | /* |
| 724 | * Give the other CPU some time to accept the IPI. | 722 | * Give the other CPU some time to accept the IPI. |
| 725 | */ | 723 | */ |
| 726 | udelay(200); | 724 | udelay(200); |
| 727 | /* | 725 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
| 728 | * Due to the Pentium erratum 3AP. | ||
| 729 | */ | ||
| 730 | if (maxlvt > 3) { | ||
| 731 | apic_read_around(APIC_SPIV); | ||
| 732 | apic_write(APIC_ESR, 0); | 726 | apic_write(APIC_ESR, 0); |
| 733 | } | ||
| 734 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 727 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
| 735 | if (send_status || accept_status) | 728 | if (send_status || accept_status) |
| 736 | break; | 729 | break; |
| 737 | } | 730 | } |
| 738 | Dprintk("After Startup.\n"); | 731 | pr_debug("After Startup.\n"); |
| 739 | 732 | ||
| 740 | if (send_status) | 733 | if (send_status) |
| 741 | printk(KERN_ERR "APIC never delivered???\n"); | 734 | printk(KERN_ERR "APIC never delivered???\n"); |
| @@ -763,12 +756,20 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
| 763 | } | 756 | } |
| 764 | 757 | ||
| 765 | #ifdef CONFIG_X86_64 | 758 | #ifdef CONFIG_X86_64 |
| 759 | |||
| 760 | /* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ | ||
| 761 | static void __ref free_bootmem_pda(struct x8664_pda *oldpda) | ||
| 762 | { | ||
| 763 | if (!after_bootmem) | ||
| 764 | free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); | ||
| 765 | } | ||
| 766 | |||
| 766 | /* | 767 | /* |
| 767 | * Allocate node local memory for the AP pda. | 768 | * Allocate node local memory for the AP pda. |
| 768 | * | 769 | * |
| 769 | * Must be called after the _cpu_pda pointer table is initialized. | 770 | * Must be called after the _cpu_pda pointer table is initialized. |
| 770 | */ | 771 | */ |
| 771 | static int __cpuinit get_local_pda(int cpu) | 772 | int __cpuinit get_local_pda(int cpu) |
| 772 | { | 773 | { |
| 773 | struct x8664_pda *oldpda, *newpda; | 774 | struct x8664_pda *oldpda, *newpda; |
| 774 | unsigned long size = sizeof(struct x8664_pda); | 775 | unsigned long size = sizeof(struct x8664_pda); |
| @@ -791,8 +792,7 @@ static int __cpuinit get_local_pda(int cpu) | |||
| 791 | 792 | ||
| 792 | if (oldpda) { | 793 | if (oldpda) { |
| 793 | memcpy(newpda, oldpda, size); | 794 | memcpy(newpda, oldpda, size); |
| 794 | if (!after_bootmem) | 795 | free_bootmem_pda(oldpda); |
| 795 | free_bootmem((unsigned long)oldpda, size); | ||
| 796 | } | 796 | } |
| 797 | 797 | ||
| 798 | newpda->in_bootmem = 0; | 798 | newpda->in_bootmem = 0; |
| @@ -874,7 +874,7 @@ do_rest: | |||
| 874 | start_ip = setup_trampoline(); | 874 | start_ip = setup_trampoline(); |
| 875 | 875 | ||
| 876 | /* So we see what's up */ | 876 | /* So we see what's up */ |
| 877 | printk(KERN_INFO "Booting processor %d/%d ip %lx\n", | 877 | printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", |
| 878 | cpu, apicid, start_ip); | 878 | cpu, apicid, start_ip); |
| 879 | 879 | ||
| 880 | /* | 880 | /* |
| @@ -886,16 +886,18 @@ do_rest: | |||
| 886 | 886 | ||
| 887 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 887 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
| 888 | 888 | ||
| 889 | Dprintk("Setting warm reset code and vector.\n"); | 889 | pr_debug("Setting warm reset code and vector.\n"); |
| 890 | 890 | ||
| 891 | store_NMI_vector(&nmi_high, &nmi_low); | 891 | store_NMI_vector(&nmi_high, &nmi_low); |
| 892 | 892 | ||
| 893 | smpboot_setup_warm_reset_vector(start_ip); | 893 | smpboot_setup_warm_reset_vector(start_ip); |
| 894 | /* | 894 | /* |
| 895 | * Be paranoid about clearing APIC errors. | 895 | * Be paranoid about clearing APIC errors. |
| 896 | */ | 896 | */ |
| 897 | apic_write(APIC_ESR, 0); | 897 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
| 898 | apic_read(APIC_ESR); | 898 | apic_write(APIC_ESR, 0); |
| 899 | apic_read(APIC_ESR); | ||
| 900 | } | ||
| 899 | } | 901 | } |
| 900 | 902 | ||
| 901 | /* | 903 | /* |
| @@ -907,9 +909,9 @@ do_rest: | |||
| 907 | /* | 909 | /* |
| 908 | * allow APs to start initializing. | 910 | * allow APs to start initializing. |
| 909 | */ | 911 | */ |
| 910 | Dprintk("Before Callout %d.\n", cpu); | 912 | pr_debug("Before Callout %d.\n", cpu); |
| 911 | cpu_set(cpu, cpu_callout_map); | 913 | cpu_set(cpu, cpu_callout_map); |
| 912 | Dprintk("After Callout %d.\n", cpu); | 914 | pr_debug("After Callout %d.\n", cpu); |
| 913 | 915 | ||
| 914 | /* | 916 | /* |
| 915 | * Wait 5s total for a response | 917 | * Wait 5s total for a response |
| @@ -922,10 +924,10 @@ do_rest: | |||
| 922 | 924 | ||
| 923 | if (cpu_isset(cpu, cpu_callin_map)) { | 925 | if (cpu_isset(cpu, cpu_callin_map)) { |
| 924 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 926 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
| 925 | Dprintk("OK.\n"); | 927 | pr_debug("OK.\n"); |
| 926 | printk(KERN_INFO "CPU%d: ", cpu); | 928 | printk(KERN_INFO "CPU%d: ", cpu); |
| 927 | print_cpu_info(&cpu_data(cpu)); | 929 | print_cpu_info(&cpu_data(cpu)); |
| 928 | Dprintk("CPU has booted.\n"); | 930 | pr_debug("CPU has booted.\n"); |
| 929 | } else { | 931 | } else { |
| 930 | boot_error = 1; | 932 | boot_error = 1; |
| 931 | if (*((volatile unsigned char *)trampoline_base) | 933 | if (*((volatile unsigned char *)trampoline_base) |
| @@ -970,7 +972,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
| 970 | 972 | ||
| 971 | WARN_ON(irqs_disabled()); | 973 | WARN_ON(irqs_disabled()); |
| 972 | 974 | ||
| 973 | Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 975 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
| 974 | 976 | ||
| 975 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 977 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || |
| 976 | !physid_isset(apicid, phys_cpu_present_map)) { | 978 | !physid_isset(apicid, phys_cpu_present_map)) { |
| @@ -982,7 +984,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
| 982 | * Already booted CPU? | 984 | * Already booted CPU? |
| 983 | */ | 985 | */ |
| 984 | if (cpu_isset(cpu, cpu_callin_map)) { | 986 | if (cpu_isset(cpu, cpu_callin_map)) { |
| 985 | Dprintk("do_boot_cpu %d Already started\n", cpu); | 987 | pr_debug("do_boot_cpu %d Already started\n", cpu); |
| 986 | return -ENOSYS; | 988 | return -ENOSYS; |
| 987 | } | 989 | } |
| 988 | 990 | ||
| @@ -1009,7 +1011,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
| 1009 | err = do_boot_cpu(apicid, cpu); | 1011 | err = do_boot_cpu(apicid, cpu); |
| 1010 | #endif | 1012 | #endif |
| 1011 | if (err) { | 1013 | if (err) { |
| 1012 | Dprintk("do_boot_cpu failed %d\n", err); | 1014 | pr_debug("do_boot_cpu failed %d\n", err); |
| 1013 | return -EIO; | 1015 | return -EIO; |
| 1014 | } | 1016 | } |
| 1015 | 1017 | ||
| @@ -1055,6 +1057,34 @@ static __init void disable_smp(void) | |||
| 1055 | static int __init smp_sanity_check(unsigned max_cpus) | 1057 | static int __init smp_sanity_check(unsigned max_cpus) |
| 1056 | { | 1058 | { |
| 1057 | preempt_disable(); | 1059 | preempt_disable(); |
| 1060 | |||
| 1061 | #if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | ||
| 1062 | if (def_to_bigsmp && nr_cpu_ids > 8) { | ||
| 1063 | unsigned int cpu; | ||
| 1064 | unsigned nr; | ||
| 1065 | |||
| 1066 | printk(KERN_WARNING | ||
| 1067 | "More than 8 CPUs detected - skipping them.\n" | ||
| 1068 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | ||
| 1069 | |||
| 1070 | nr = 0; | ||
| 1071 | for_each_present_cpu(cpu) { | ||
| 1072 | if (nr >= 8) | ||
| 1073 | cpu_clear(cpu, cpu_present_map); | ||
| 1074 | nr++; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | nr = 0; | ||
| 1078 | for_each_possible_cpu(cpu) { | ||
| 1079 | if (nr >= 8) | ||
| 1080 | cpu_clear(cpu, cpu_possible_map); | ||
| 1081 | nr++; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | nr_cpu_ids = 8; | ||
| 1085 | } | ||
| 1086 | #endif | ||
| 1087 | |||
| 1058 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 1088 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
| 1059 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" | 1089 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" |
| 1060 | "by the BIOS.\n", hard_smp_processor_id()); | 1090 | "by the BIOS.\n", hard_smp_processor_id()); |
| @@ -1147,10 +1177,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1147 | * Setup boot CPU information | 1177 | * Setup boot CPU information |
| 1148 | */ | 1178 | */ |
| 1149 | smp_store_cpu_info(0); /* Final full version of the data */ | 1179 | smp_store_cpu_info(0); /* Final full version of the data */ |
| 1180 | #ifdef CONFIG_X86_32 | ||
| 1150 | boot_cpu_logical_apicid = logical_smp_processor_id(); | 1181 | boot_cpu_logical_apicid = logical_smp_processor_id(); |
| 1182 | #endif | ||
| 1151 | current_thread_info()->cpu = 0; /* needed? */ | 1183 | current_thread_info()->cpu = 0; /* needed? */ |
| 1152 | set_cpu_sibling_map(0); | 1184 | set_cpu_sibling_map(0); |
| 1153 | 1185 | ||
| 1186 | #ifdef CONFIG_X86_64 | ||
| 1187 | enable_IR_x2apic(); | ||
| 1188 | setup_apic_routing(); | ||
| 1189 | #endif | ||
| 1190 | |||
| 1154 | if (smp_sanity_check(max_cpus) < 0) { | 1191 | if (smp_sanity_check(max_cpus) < 0) { |
| 1155 | printk(KERN_INFO "SMP disabled\n"); | 1192 | printk(KERN_INFO "SMP disabled\n"); |
| 1156 | disable_smp(); | 1193 | disable_smp(); |
| @@ -1158,9 +1195,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1158 | } | 1195 | } |
| 1159 | 1196 | ||
| 1160 | preempt_disable(); | 1197 | preempt_disable(); |
| 1161 | if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { | 1198 | if (read_apic_id() != boot_cpu_physical_apicid) { |
| 1162 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", | 1199 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", |
| 1163 | GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); | 1200 | read_apic_id(), boot_cpu_physical_apicid); |
| 1164 | /* Or can we switch back to PIC here? */ | 1201 | /* Or can we switch back to PIC here? */ |
| 1165 | } | 1202 | } |
| 1166 | preempt_enable(); | 1203 | preempt_enable(); |
| @@ -1193,6 +1230,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1193 | printk(KERN_INFO "CPU%d: ", 0); | 1230 | printk(KERN_INFO "CPU%d: ", 0); |
| 1194 | print_cpu_info(&cpu_data(0)); | 1231 | print_cpu_info(&cpu_data(0)); |
| 1195 | setup_boot_clock(); | 1232 | setup_boot_clock(); |
| 1233 | |||
| 1234 | if (is_uv_system()) | ||
| 1235 | uv_system_init(); | ||
| 1196 | out: | 1236 | out: |
| 1197 | preempt_enable(); | 1237 | preempt_enable(); |
| 1198 | } | 1238 | } |
| @@ -1213,7 +1253,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
| 1213 | 1253 | ||
| 1214 | void __init native_smp_cpus_done(unsigned int max_cpus) | 1254 | void __init native_smp_cpus_done(unsigned int max_cpus) |
| 1215 | { | 1255 | { |
| 1216 | Dprintk("Boot done.\n"); | 1256 | pr_debug("Boot done.\n"); |
| 1217 | 1257 | ||
| 1218 | impress_friends(); | 1258 | impress_friends(); |
| 1219 | smp_checks(); | 1259 | smp_checks(); |
| @@ -1223,39 +1263,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
| 1223 | check_nmi_watchdog(); | 1263 | check_nmi_watchdog(); |
| 1224 | } | 1264 | } |
| 1225 | 1265 | ||
| 1226 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1227 | |||
| 1228 | static void remove_siblinginfo(int cpu) | ||
| 1229 | { | ||
| 1230 | int sibling; | ||
| 1231 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 1232 | |||
| 1233 | for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { | ||
| 1234 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | ||
| 1235 | /*/ | ||
| 1236 | * last thread sibling in this cpu core going down | ||
| 1237 | */ | ||
| 1238 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) | ||
| 1239 | cpu_data(sibling).booted_cores--; | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) | ||
| 1243 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | ||
| 1244 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | ||
| 1245 | cpus_clear(per_cpu(cpu_core_map, cpu)); | ||
| 1246 | c->phys_proc_id = 0; | ||
| 1247 | c->cpu_core_id = 0; | ||
| 1248 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | static int additional_cpus __initdata = -1; | ||
| 1252 | |||
| 1253 | static __init int setup_additional_cpus(char *s) | ||
| 1254 | { | ||
| 1255 | return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; | ||
| 1256 | } | ||
| 1257 | early_param("additional_cpus", setup_additional_cpus); | ||
| 1258 | |||
| 1259 | /* | 1266 | /* |
| 1260 | * cpu_possible_map should be static, it cannot change as cpu's | 1267 | * cpu_possible_map should be static, it cannot change as cpu's |
| 1261 | * are onlined, or offlined. The reason is per-cpu data-structures | 1268 | * are onlined, or offlined. The reason is per-cpu data-structures |
| @@ -1275,24 +1282,13 @@ early_param("additional_cpus", setup_additional_cpus); | |||
| 1275 | */ | 1282 | */ |
| 1276 | __init void prefill_possible_map(void) | 1283 | __init void prefill_possible_map(void) |
| 1277 | { | 1284 | { |
| 1278 | int i; | 1285 | int i, possible; |
| 1279 | int possible; | ||
| 1280 | 1286 | ||
| 1281 | /* no processor from mptable or madt */ | 1287 | /* no processor from mptable or madt */ |
| 1282 | if (!num_processors) | 1288 | if (!num_processors) |
| 1283 | num_processors = 1; | 1289 | num_processors = 1; |
| 1284 | 1290 | ||
| 1285 | #ifdef CONFIG_HOTPLUG_CPU | 1291 | possible = num_processors + disabled_cpus; |
| 1286 | if (additional_cpus == -1) { | ||
| 1287 | if (disabled_cpus > 0) | ||
| 1288 | additional_cpus = disabled_cpus; | ||
| 1289 | else | ||
| 1290 | additional_cpus = 0; | ||
| 1291 | } | ||
| 1292 | #else | ||
| 1293 | additional_cpus = 0; | ||
| 1294 | #endif | ||
| 1295 | possible = num_processors + additional_cpus; | ||
| 1296 | if (possible > NR_CPUS) | 1292 | if (possible > NR_CPUS) |
| 1297 | possible = NR_CPUS; | 1293 | possible = NR_CPUS; |
| 1298 | 1294 | ||
| @@ -1305,17 +1301,64 @@ __init void prefill_possible_map(void) | |||
| 1305 | nr_cpu_ids = possible; | 1301 | nr_cpu_ids = possible; |
| 1306 | } | 1302 | } |
| 1307 | 1303 | ||
| 1304 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1305 | |||
| 1306 | static void remove_siblinginfo(int cpu) | ||
| 1307 | { | ||
| 1308 | int sibling; | ||
| 1309 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 1310 | |||
| 1311 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { | ||
| 1312 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | ||
| 1313 | /*/ | ||
| 1314 | * last thread sibling in this cpu core going down | ||
| 1315 | */ | ||
| 1316 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) | ||
| 1317 | cpu_data(sibling).booted_cores--; | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) | ||
| 1321 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | ||
| 1322 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | ||
| 1323 | cpus_clear(per_cpu(cpu_core_map, cpu)); | ||
| 1324 | c->phys_proc_id = 0; | ||
| 1325 | c->cpu_core_id = 0; | ||
| 1326 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
| 1327 | } | ||
| 1328 | |||
| 1308 | static void __ref remove_cpu_from_maps(int cpu) | 1329 | static void __ref remove_cpu_from_maps(int cpu) |
| 1309 | { | 1330 | { |
| 1310 | cpu_clear(cpu, cpu_online_map); | 1331 | cpu_clear(cpu, cpu_online_map); |
| 1311 | cpu_clear(cpu, cpu_callout_map); | 1332 | cpu_clear(cpu, cpu_callout_map); |
| 1312 | cpu_clear(cpu, cpu_callin_map); | 1333 | cpu_clear(cpu, cpu_callin_map); |
| 1313 | /* was set by cpu_init() */ | 1334 | /* was set by cpu_init() */ |
| 1314 | clear_bit(cpu, (unsigned long *)&cpu_initialized); | 1335 | cpu_clear(cpu, cpu_initialized); |
| 1315 | numa_remove_cpu(cpu); | 1336 | numa_remove_cpu(cpu); |
| 1316 | } | 1337 | } |
| 1317 | 1338 | ||
| 1318 | int __cpu_disable(void) | 1339 | void cpu_disable_common(void) |
| 1340 | { | ||
| 1341 | int cpu = smp_processor_id(); | ||
| 1342 | /* | ||
| 1343 | * HACK: | ||
| 1344 | * Allow any queued timer interrupts to get serviced | ||
| 1345 | * This is only a temporary solution until we cleanup | ||
| 1346 | * fixup_irqs as we do for IA64. | ||
| 1347 | */ | ||
| 1348 | local_irq_enable(); | ||
| 1349 | mdelay(1); | ||
| 1350 | |||
| 1351 | local_irq_disable(); | ||
| 1352 | remove_siblinginfo(cpu); | ||
| 1353 | |||
| 1354 | /* It's now safe to remove this processor from the online map */ | ||
| 1355 | lock_vector_lock(); | ||
| 1356 | remove_cpu_from_maps(cpu); | ||
| 1357 | unlock_vector_lock(); | ||
| 1358 | fixup_irqs(cpu_online_map); | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | int native_cpu_disable(void) | ||
| 1319 | { | 1362 | { |
| 1320 | int cpu = smp_processor_id(); | 1363 | int cpu = smp_processor_id(); |
| 1321 | 1364 | ||
| @@ -1334,25 +1377,11 @@ int __cpu_disable(void) | |||
| 1334 | stop_apic_nmi_watchdog(NULL); | 1377 | stop_apic_nmi_watchdog(NULL); |
| 1335 | clear_local_APIC(); | 1378 | clear_local_APIC(); |
| 1336 | 1379 | ||
| 1337 | /* | 1380 | cpu_disable_common(); |
| 1338 | * HACK: | ||
| 1339 | * Allow any queued timer interrupts to get serviced | ||
| 1340 | * This is only a temporary solution until we cleanup | ||
| 1341 | * fixup_irqs as we do for IA64. | ||
| 1342 | */ | ||
| 1343 | local_irq_enable(); | ||
| 1344 | mdelay(1); | ||
| 1345 | |||
| 1346 | local_irq_disable(); | ||
| 1347 | remove_siblinginfo(cpu); | ||
| 1348 | |||
| 1349 | /* It's now safe to remove this processor from the online map */ | ||
| 1350 | remove_cpu_from_maps(cpu); | ||
| 1351 | fixup_irqs(cpu_online_map); | ||
| 1352 | return 0; | 1381 | return 0; |
| 1353 | } | 1382 | } |
| 1354 | 1383 | ||
| 1355 | void __cpu_die(unsigned int cpu) | 1384 | void native_cpu_die(unsigned int cpu) |
| 1356 | { | 1385 | { |
| 1357 | /* We don't do anything here: idle task is faking death itself. */ | 1386 | /* We don't do anything here: idle task is faking death itself. */ |
| 1358 | unsigned int i; | 1387 | unsigned int i; |
| @@ -1369,28 +1398,45 @@ void __cpu_die(unsigned int cpu) | |||
| 1369 | } | 1398 | } |
| 1370 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1399 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
| 1371 | } | 1400 | } |
| 1401 | |||
| 1402 | void play_dead_common(void) | ||
| 1403 | { | ||
| 1404 | idle_task_exit(); | ||
| 1405 | reset_lazy_tlbstate(); | ||
| 1406 | irq_ctx_exit(raw_smp_processor_id()); | ||
| 1407 | c1e_remove_cpu(raw_smp_processor_id()); | ||
| 1408 | |||
| 1409 | mb(); | ||
| 1410 | /* Ack it */ | ||
| 1411 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
| 1412 | |||
| 1413 | /* | ||
| 1414 | * With physical CPU hotplug, we should halt the cpu | ||
| 1415 | */ | ||
| 1416 | local_irq_disable(); | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | void native_play_dead(void) | ||
| 1420 | { | ||
| 1421 | play_dead_common(); | ||
| 1422 | wbinvd_halt(); | ||
| 1423 | } | ||
| 1424 | |||
| 1372 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1425 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
| 1373 | int __cpu_disable(void) | 1426 | int native_cpu_disable(void) |
| 1374 | { | 1427 | { |
| 1375 | return -ENOSYS; | 1428 | return -ENOSYS; |
| 1376 | } | 1429 | } |
| 1377 | 1430 | ||
| 1378 | void __cpu_die(unsigned int cpu) | 1431 | void native_cpu_die(unsigned int cpu) |
| 1379 | { | 1432 | { |
| 1380 | /* We said "no" in __cpu_disable */ | 1433 | /* We said "no" in __cpu_disable */ |
| 1381 | BUG(); | 1434 | BUG(); |
| 1382 | } | 1435 | } |
| 1383 | #endif | ||
| 1384 | 1436 | ||
| 1385 | /* | 1437 | void native_play_dead(void) |
| 1386 | * If the BIOS enumerates physical processors before logical, | ||
| 1387 | * maxcpus=N at enumeration-time can be used to disable HT. | ||
| 1388 | */ | ||
| 1389 | static int __init parse_maxcpus(char *arg) | ||
| 1390 | { | 1438 | { |
| 1391 | extern unsigned int maxcpus; | 1439 | BUG(); |
| 1392 | |||
| 1393 | maxcpus = simple_strtoul(arg, NULL, 0); | ||
| 1394 | return 0; | ||
| 1395 | } | 1440 | } |
| 1396 | early_param("maxcpus", parse_maxcpus); | 1441 | |
| 1442 | #endif | ||
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 99941b37eca0..397e309839dd 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c | |||
| @@ -8,18 +8,21 @@ | |||
| 8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | 8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); |
| 9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | 9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
| 10 | 10 | ||
| 11 | /* Initialize the CPU's GDT. This is either the boot CPU doing itself | 11 | /* |
| 12 | (still using the master per-cpu area), or a CPU doing it for a | 12 | * Initialize the CPU's GDT. This is either the boot CPU doing itself |
| 13 | secondary which will soon come up. */ | 13 | * (still using the master per-cpu area), or a CPU doing it for a |
| 14 | * secondary which will soon come up. | ||
| 15 | */ | ||
| 14 | __cpuinit void init_gdt(int cpu) | 16 | __cpuinit void init_gdt(int cpu) |
| 15 | { | 17 | { |
| 16 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 18 | struct desc_struct gdt; |
| 17 | 19 | ||
| 18 | pack_descriptor(&gdt[GDT_ENTRY_PERCPU], | 20 | pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, |
| 19 | __per_cpu_offset[cpu], 0xFFFFF, | ||
| 20 | 0x2 | DESCTYPE_S, 0x8); | 21 | 0x2 | DESCTYPE_S, 0x8); |
| 22 | gdt.s = 1; | ||
| 21 | 23 | ||
| 22 | gdt[GDT_ENTRY_PERCPU].s = 1; | 24 | write_gdt_entry(get_cpu_gdt_table(cpu), |
| 25 | GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); | ||
| 23 | 26 | ||
| 24 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | 27 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; |
| 25 | per_cpu(cpu_number, cpu) = cpu; | 28 | per_cpu(cpu_number, cpu) = cpu; |
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c deleted file mode 100644 index 8b137891791f..000000000000 --- a/arch/x86/kernel/smpcommon_32.c +++ /dev/null | |||
| @@ -1 +0,0 @@ | |||
| 1 | |||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 92c20fee6781..e8b9863ef8c4 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
| @@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | |||
| 105 | static int enable_single_step(struct task_struct *child) | 105 | static int enable_single_step(struct task_struct *child) |
| 106 | { | 106 | { |
| 107 | struct pt_regs *regs = task_pt_regs(child); | 107 | struct pt_regs *regs = task_pt_regs(child); |
| 108 | unsigned long oflags; | ||
| 109 | |||
| 110 | /* | ||
| 111 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
| 112 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
| 113 | * If user-mode had set TF itself, then it's still clear from | ||
| 114 | * do_debug() and we need to set it again to restore the user | ||
| 115 | * state so we don't wrongly set TIF_FORCED_TF below. | ||
| 116 | * If enable_single_step() was used last and that is what | ||
| 117 | * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are | ||
| 118 | * already set and our bookkeeping is fine. | ||
| 119 | */ | ||
| 120 | if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP))) | ||
| 121 | regs->flags |= X86_EFLAGS_TF; | ||
| 108 | 122 | ||
| 109 | /* | 123 | /* |
| 110 | * Always set TIF_SINGLESTEP - this guarantees that | 124 | * Always set TIF_SINGLESTEP - this guarantees that |
| @@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child) | |||
| 113 | */ | 127 | */ |
| 114 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | 128 | set_tsk_thread_flag(child, TIF_SINGLESTEP); |
| 115 | 129 | ||
| 116 | /* | 130 | oflags = regs->flags; |
| 117 | * If TF was already set, don't do anything else | ||
| 118 | */ | ||
| 119 | if (regs->flags & X86_EFLAGS_TF) | ||
| 120 | return 0; | ||
| 121 | 131 | ||
| 122 | /* Set TF on the kernel stack.. */ | 132 | /* Set TF on the kernel stack.. */ |
| 123 | regs->flags |= X86_EFLAGS_TF; | 133 | regs->flags |= X86_EFLAGS_TF; |
| @@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child) | |||
| 126 | * ..but if TF is changed by the instruction we will trace, | 136 | * ..but if TF is changed by the instruction we will trace, |
| 127 | * don't mark it as being "us" that set it, so that we | 137 | * don't mark it as being "us" that set it, so that we |
| 128 | * won't clear it by hand later. | 138 | * won't clear it by hand later. |
| 139 | * | ||
| 140 | * Note that if we don't actually execute the popf because | ||
| 141 | * of a signal arriving right now or suchlike, we will lose | ||
| 142 | * track of the fact that it really was "us" that set it. | ||
| 129 | */ | 143 | */ |
| 130 | if (is_setting_trap_flag(child, regs)) | 144 | if (is_setting_trap_flag(child, regs)) { |
| 145 | clear_tsk_thread_flag(child, TIF_FORCED_TF); | ||
| 131 | return 0; | 146 | return 0; |
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * If TF was already set, check whether it was us who set it. | ||
| 151 | * If not, we should never attempt a block step. | ||
| 152 | */ | ||
| 153 | if (oflags & X86_EFLAGS_TF) | ||
| 154 | return test_tsk_thread_flag(child, TIF_FORCED_TF); | ||
| 132 | 155 | ||
| 133 | set_tsk_thread_flag(child, TIF_FORCED_TF); | 156 | set_tsk_thread_flag(child, TIF_FORCED_TF); |
| 134 | 157 | ||
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index d67ce5f044ba..7b987852e876 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
| 31 | #include <asm/io.h> | 31 | #include <asm/io.h> |
| 32 | #include <asm/bios_ebda.h> | 32 | #include <asm/bios_ebda.h> |
| 33 | #include <asm/mach-summit/mach_mpparse.h> | 33 | #include <asm/summit/mpparse.h> |
| 34 | 34 | ||
| 35 | static struct rio_table_hdr *rio_table_hdr __initdata; | 35 | static struct rio_table_hdr *rio_table_hdr __initdata; |
| 36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; | 36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 7066cb855a60..1884a8d12bfa 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
| 23 | #include <linux/unistd.h> | 23 | #include <linux/unistd.h> |
| 24 | 24 | ||
| 25 | #include <asm/syscalls.h> | ||
| 26 | |||
| 25 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | 27 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, |
| 26 | unsigned long prot, unsigned long flags, | 28 | unsigned long prot, unsigned long flags, |
| 27 | unsigned long fd, unsigned long pgoff) | 29 | unsigned long fd, unsigned long pgoff) |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef33817..6bc211accf08 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
| @@ -13,15 +13,17 @@ | |||
| 13 | #include <linux/utsname.h> | 13 | #include <linux/utsname.h> |
| 14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
| 15 | #include <linux/random.h> | 15 | #include <linux/random.h> |
| 16 | #include <linux/uaccess.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/uaccess.h> | ||
| 18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
| 19 | #include <asm/syscalls.h> | ||
| 19 | 20 | ||
| 20 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, | 21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, |
| 21 | unsigned long fd, unsigned long off) | 22 | unsigned long prot, unsigned long flags, |
| 23 | unsigned long fd, unsigned long off) | ||
| 22 | { | 24 | { |
| 23 | long error; | 25 | long error; |
| 24 | struct file * file; | 26 | struct file *file; |
| 25 | 27 | ||
| 26 | error = -EINVAL; | 28 | error = -EINVAL; |
| 27 | if (off & ~PAGE_MASK) | 29 | if (off & ~PAGE_MASK) |
| @@ -56,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
| 56 | unmapped base down for this case. This can give | 58 | unmapped base down for this case. This can give |
| 57 | conflicts with the heap, but we assume that glibc | 59 | conflicts with the heap, but we assume that glibc |
| 58 | malloc knows how to fall back to mmap. Give it 1GB | 60 | malloc knows how to fall back to mmap. Give it 1GB |
| 59 | of playground for now. -AK */ | 61 | of playground for now. -AK */ |
| 60 | *begin = 0x40000000; | 62 | *begin = 0x40000000; |
| 61 | *end = 0x80000000; | 63 | *end = 0x80000000; |
| 62 | if (current->flags & PF_RANDOMIZE) { | 64 | if (current->flags & PF_RANDOMIZE) { |
| 63 | new_begin = randomize_range(*begin, *begin + 0x02000000, 0); | 65 | new_begin = randomize_range(*begin, *begin + 0x02000000, 0); |
| 64 | if (new_begin) | 66 | if (new_begin) |
| @@ -66,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
| 66 | } | 68 | } |
| 67 | } else { | 69 | } else { |
| 68 | *begin = TASK_UNMAPPED_BASE; | 70 | *begin = TASK_UNMAPPED_BASE; |
| 69 | *end = TASK_SIZE; | 71 | *end = TASK_SIZE; |
| 70 | } | 72 | } |
| 71 | } | 73 | } |
| 72 | 74 | ||
| 73 | unsigned long | 75 | unsigned long |
| 74 | arch_get_unmapped_area(struct file *filp, unsigned long addr, | 76 | arch_get_unmapped_area(struct file *filp, unsigned long addr, |
| @@ -78,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
| 78 | struct vm_area_struct *vma; | 80 | struct vm_area_struct *vma; |
| 79 | unsigned long start_addr; | 81 | unsigned long start_addr; |
| 80 | unsigned long begin, end; | 82 | unsigned long begin, end; |
| 81 | 83 | ||
| 82 | if (flags & MAP_FIXED) | 84 | if (flags & MAP_FIXED) |
| 83 | return addr; | 85 | return addr; |
| 84 | 86 | ||
| 85 | find_start_end(flags, &begin, &end); | 87 | find_start_end(flags, &begin, &end); |
| 86 | 88 | ||
| 87 | if (len > end) | 89 | if (len > end) |
| 88 | return -ENOMEM; | 90 | return -ENOMEM; |
| @@ -96,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
| 96 | } | 98 | } |
| 97 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) | 99 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) |
| 98 | && len <= mm->cached_hole_size) { | 100 | && len <= mm->cached_hole_size) { |
| 99 | mm->cached_hole_size = 0; | 101 | mm->cached_hole_size = 0; |
| 100 | mm->free_area_cache = begin; | 102 | mm->free_area_cache = begin; |
| 101 | } | 103 | } |
| 102 | addr = mm->free_area_cache; | 104 | addr = mm->free_area_cache; |
| 103 | if (addr < begin) | 105 | if (addr < begin) |
| 104 | addr = begin; | 106 | addr = begin; |
| 105 | start_addr = addr; | 107 | start_addr = addr; |
| 106 | 108 | ||
| 107 | full_search: | 109 | full_search: |
| @@ -127,7 +129,7 @@ full_search: | |||
| 127 | return addr; | 129 | return addr; |
| 128 | } | 130 | } |
| 129 | if (addr + mm->cached_hole_size < vma->vm_start) | 131 | if (addr + mm->cached_hole_size < vma->vm_start) |
| 130 | mm->cached_hole_size = vma->vm_start - addr; | 132 | mm->cached_hole_size = vma->vm_start - addr; |
| 131 | 133 | ||
| 132 | addr = vma->vm_end; | 134 | addr = vma->vm_end; |
| 133 | } | 135 | } |
| @@ -177,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 177 | vma = find_vma(mm, addr-len); | 179 | vma = find_vma(mm, addr-len); |
| 178 | if (!vma || addr <= vma->vm_start) | 180 | if (!vma || addr <= vma->vm_start) |
| 179 | /* remember the address as a hint for next time */ | 181 | /* remember the address as a hint for next time */ |
| 180 | return (mm->free_area_cache = addr-len); | 182 | return mm->free_area_cache = addr-len; |
| 181 | } | 183 | } |
| 182 | 184 | ||
| 183 | if (mm->mmap_base < len) | 185 | if (mm->mmap_base < len) |
| @@ -194,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 194 | vma = find_vma(mm, addr); | 196 | vma = find_vma(mm, addr); |
| 195 | if (!vma || addr+len <= vma->vm_start) | 197 | if (!vma || addr+len <= vma->vm_start) |
| 196 | /* remember the address as a hint for next time */ | 198 | /* remember the address as a hint for next time */ |
| 197 | return (mm->free_area_cache = addr); | 199 | return mm->free_area_cache = addr; |
| 198 | 200 | ||
| 199 | /* remember the largest hole we saw so far */ | 201 | /* remember the largest hole we saw so far */ |
| 200 | if (addr + mm->cached_hole_size < vma->vm_start) | 202 | if (addr + mm->cached_hole_size < vma->vm_start) |
| @@ -224,13 +226,13 @@ bottomup: | |||
| 224 | } | 226 | } |
| 225 | 227 | ||
| 226 | 228 | ||
| 227 | asmlinkage long sys_uname(struct new_utsname __user * name) | 229 | asmlinkage long sys_uname(struct new_utsname __user *name) |
| 228 | { | 230 | { |
| 229 | int err; | 231 | int err; |
| 230 | down_read(&uts_sem); | 232 | down_read(&uts_sem); |
| 231 | err = copy_to_user(name, utsname(), sizeof (*name)); | 233 | err = copy_to_user(name, utsname(), sizeof(*name)); |
| 232 | up_read(&uts_sem); | 234 | up_read(&uts_sem); |
| 233 | if (personality(current->personality) == PER_LINUX32) | 235 | if (personality(current->personality) == PER_LINUX32) |
| 234 | err |= copy_to_user(&name->machine, "i686", 5); | 236 | err |= copy_to_user(&name->machine, "i686", 5); |
| 235 | return err ? -EFAULT : 0; | 237 | return err ? -EFAULT : 0; |
| 236 | } | 238 | } |
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 170d43c17487..de87d6008295 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c | |||
| @@ -8,12 +8,12 @@ | |||
| 8 | #define __NO_STUBS | 8 | #define __NO_STUBS |
| 9 | 9 | ||
| 10 | #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; | 10 | #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; |
| 11 | #undef _ASM_X86_64_UNISTD_H_ | 11 | #undef _ASM_X86_UNISTD_64_H |
| 12 | #include <asm/unistd_64.h> | 12 | #include <asm/unistd_64.h> |
| 13 | 13 | ||
| 14 | #undef __SYSCALL | 14 | #undef __SYSCALL |
| 15 | #define __SYSCALL(nr, sym) [nr] = sym, | 15 | #define __SYSCALL(nr, sym) [nr] = sym, |
| 16 | #undef _ASM_X86_64_UNISTD_H_ | 16 | #undef _ASM_X86_UNISTD_64_H |
| 17 | 17 | ||
| 18 | typedef void (*sys_call_ptr_t)(void); | 18 | typedef void (*sys_call_ptr_t)(void); |
| 19 | 19 | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index adff5562f5fd..d44395ff34c3 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
| @@ -326,3 +326,9 @@ ENTRY(sys_call_table) | |||
| 326 | .long sys_fallocate | 326 | .long sys_fallocate |
| 327 | .long sys_timerfd_settime /* 325 */ | 327 | .long sys_timerfd_settime /* 325 */ |
| 328 | .long sys_timerfd_gettime | 328 | .long sys_timerfd_gettime |
| 329 | .long sys_signalfd4 | ||
| 330 | .long sys_eventfd2 | ||
| 331 | .long sys_epoll_create1 | ||
| 332 | .long sys_dup3 /* 330 */ | ||
| 333 | .long sys_pipe2 | ||
| 334 | .long sys_inotify_init1 | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 059ca6ee59b4..77b400f06ea2 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <asm/arch_hooks.h> | 36 | #include <asm/arch_hooks.h> |
| 37 | #include <asm/hpet.h> | 37 | #include <asm/hpet.h> |
| 38 | #include <asm/time.h> | 38 | #include <asm/time.h> |
| 39 | #include <asm/timer.h> | ||
| 39 | 40 | ||
| 40 | #include "do_timer.h" | 41 | #include "do_timer.h" |
| 41 | 42 | ||
| @@ -46,10 +47,9 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
| 46 | unsigned long pc = instruction_pointer(regs); | 47 | unsigned long pc = instruction_pointer(regs); |
| 47 | 48 | ||
| 48 | #ifdef CONFIG_SMP | 49 | #ifdef CONFIG_SMP |
| 49 | if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) && | 50 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { |
| 50 | in_lock_functions(pc)) { | ||
| 51 | #ifdef CONFIG_FRAME_POINTER | 51 | #ifdef CONFIG_FRAME_POINTER |
| 52 | return *(unsigned long *)(regs->bp + 4); | 52 | return *(unsigned long *)(regs->bp + sizeof(long)); |
| 53 | #else | 53 | #else |
| 54 | unsigned long *sp = (unsigned long *)®s->sp; | 54 | unsigned long *sp = (unsigned long *)®s->sp; |
| 55 | 55 | ||
| @@ -94,6 +94,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
| 94 | 94 | ||
| 95 | do_timer_interrupt_hook(); | 95 | do_timer_interrupt_hook(); |
| 96 | 96 | ||
| 97 | #ifdef CONFIG_MCA | ||
| 97 | if (MCA_bus) { | 98 | if (MCA_bus) { |
| 98 | /* The PS/2 uses level-triggered interrupts. You can't | 99 | /* The PS/2 uses level-triggered interrupts. You can't |
| 99 | turn them off, nor would you want to (any attempt to | 100 | turn them off, nor would you want to (any attempt to |
| @@ -107,6 +108,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
| 107 | u8 irq_v = inb_p( 0x61 ); /* read the current state */ | 108 | u8 irq_v = inb_p( 0x61 ); /* read the current state */ |
| 108 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ | 109 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ |
| 109 | } | 110 | } |
| 111 | #endif | ||
| 110 | 112 | ||
| 111 | return IRQ_HANDLED; | 113 | return IRQ_HANDLED; |
| 112 | } | 114 | } |
| @@ -129,6 +131,7 @@ void __init hpet_time_init(void) | |||
| 129 | */ | 131 | */ |
| 130 | void __init time_init(void) | 132 | void __init time_init(void) |
| 131 | { | 133 | { |
| 134 | pre_time_init_hook(); | ||
| 132 | tsc_init(); | 135 | tsc_init(); |
| 133 | late_time_init = choose_time_init(); | 136 | late_time_init = choose_time_init(); |
| 134 | } | 137 | } |
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index e3d49c553af2..cb19d650c216 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
| 18 | #include <linux/time.h> | 18 | #include <linux/time.h> |
| 19 | #include <linux/mca.h> | ||
| 19 | 20 | ||
| 20 | #include <asm/i8253.h> | 21 | #include <asm/i8253.h> |
| 21 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
| @@ -33,23 +34,34 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
| 33 | /* Assume the lock function has either no stack frame or a copy | 34 | /* Assume the lock function has either no stack frame or a copy |
| 34 | of flags from PUSHF | 35 | of flags from PUSHF |
| 35 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ | 36 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ |
| 36 | if (!user_mode(regs) && in_lock_functions(pc)) { | 37 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { |
| 38 | #ifdef CONFIG_FRAME_POINTER | ||
| 39 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
| 40 | #else | ||
| 37 | unsigned long *sp = (unsigned long *)regs->sp; | 41 | unsigned long *sp = (unsigned long *)regs->sp; |
| 38 | if (sp[0] >> 22) | 42 | if (sp[0] >> 22) |
| 39 | return sp[0]; | 43 | return sp[0]; |
| 40 | if (sp[1] >> 22) | 44 | if (sp[1] >> 22) |
| 41 | return sp[1]; | 45 | return sp[1]; |
| 46 | #endif | ||
| 42 | } | 47 | } |
| 43 | return pc; | 48 | return pc; |
| 44 | } | 49 | } |
| 45 | EXPORT_SYMBOL(profile_pc); | 50 | EXPORT_SYMBOL(profile_pc); |
| 46 | 51 | ||
| 47 | static irqreturn_t timer_event_interrupt(int irq, void *dev_id) | 52 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
| 48 | { | 53 | { |
| 49 | add_pda(irq0_irqs, 1); | 54 | add_pda(irq0_irqs, 1); |
| 50 | 55 | ||
| 51 | global_clock_event->event_handler(global_clock_event); | 56 | global_clock_event->event_handler(global_clock_event); |
| 52 | 57 | ||
| 58 | #ifdef CONFIG_MCA | ||
| 59 | if (MCA_bus) { | ||
| 60 | u8 irq_v = inb_p(0x61); /* read the current state */ | ||
| 61 | outb_p(irq_v|0x80, 0x61); /* reset the IRQ */ | ||
| 62 | } | ||
| 63 | #endif | ||
| 64 | |||
| 53 | return IRQ_HANDLED; | 65 | return IRQ_HANDLED; |
| 54 | } | 66 | } |
| 55 | 67 | ||
| @@ -100,7 +112,7 @@ unsigned long __init calibrate_cpu(void) | |||
| 100 | } | 112 | } |
| 101 | 113 | ||
| 102 | static struct irqaction irq0 = { | 114 | static struct irqaction irq0 = { |
| 103 | .handler = timer_event_interrupt, | 115 | .handler = timer_interrupt, |
| 104 | .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, | 116 | .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, |
| 105 | .mask = CPU_MASK_NONE, | 117 | .mask = CPU_MASK_NONE, |
| 106 | .name = "timer" | 118 | .name = "timer" |
| @@ -111,16 +123,13 @@ void __init hpet_time_init(void) | |||
| 111 | if (!hpet_enable()) | 123 | if (!hpet_enable()) |
| 112 | setup_pit_timer(); | 124 | setup_pit_timer(); |
| 113 | 125 | ||
| 126 | irq0.mask = cpumask_of_cpu(0); | ||
| 114 | setup_irq(0, &irq0); | 127 | setup_irq(0, &irq0); |
| 115 | } | 128 | } |
| 116 | 129 | ||
| 117 | void __init time_init(void) | 130 | void __init time_init(void) |
| 118 | { | 131 | { |
| 119 | tsc_init(); | 132 | tsc_init(); |
| 120 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
| 121 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
| 122 | else | ||
| 123 | vgetcpu_mode = VGETCPU_LSL; | ||
| 124 | 133 | ||
| 125 | late_time_init = choose_time_init(); | 134 | late_time_init = choose_time_init(); |
| 126 | } | 135 | } |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index fec1ecedc9b7..e00534b33534 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
| @@ -241,3 +241,11 @@ void flush_tlb_all(void) | |||
| 241 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 241 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
| 242 | } | 242 | } |
| 243 | 243 | ||
| 244 | void reset_lazy_tlbstate(void) | ||
| 245 | { | ||
| 246 | int cpu = raw_smp_processor_id(); | ||
| 247 | |||
| 248 | per_cpu(cpu_tlbstate, cpu).state = 0; | ||
| 249 | per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; | ||
| 250 | } | ||
| 251 | |||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb7712ab0..04431f34fd16 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
| 7 | * later. | 7 | * later. |
| 8 | */ | 8 | */ |
| 9 | #include <linux/mc146818rtc.h> | 9 | #include <linux/seq_file.h> |
| 10 | #include <linux/proc_fs.h> | 10 | #include <linux/proc_fs.h> |
| 11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
| 12 | 12 | ||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <asm/genapic.h> | 17 | #include <asm/genapic.h> |
| 18 | #include <asm/idle.h> | 18 | #include <asm/idle.h> |
| 19 | #include <asm/tsc.h> | 19 | #include <asm/tsc.h> |
| 20 | #include <asm/irq_vectors.h> | ||
| 20 | 21 | ||
| 21 | #include <mach_apic.h> | 22 | #include <mach_apic.h> |
| 22 | 23 | ||
| @@ -783,7 +784,7 @@ static int __init uv_bau_init(void) | |||
| 783 | uv_init_blade(blade, node, cur_cpu); | 784 | uv_init_blade(blade, node, cur_cpu); |
| 784 | cur_cpu += uv_blade_nr_possible_cpus(blade); | 785 | cur_cpu += uv_blade_nr_possible_cpus(blade); |
| 785 | } | 786 | } |
| 786 | set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); | 787 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); |
| 787 | uv_enable_timeouts(); | 788 | uv_enable_timeouts(); |
| 788 | 789 | ||
| 789 | return 0; | 790 | return 0; |
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index ab6bf375a307..6bb7b8579e70 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <asm/ldt.h> | 10 | #include <asm/ldt.h> |
| 11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
| 12 | #include <asm/proto.h> | 12 | #include <asm/proto.h> |
| 13 | #include <asm/syscalls.h> | ||
| 13 | 14 | ||
| 14 | #include "tls.h" | 15 | #include "tls.h" |
| 15 | 16 | ||
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps.c index 8a768973c4f0..04d242ab0161 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -7,13 +7,11 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | /* | 9 | /* |
| 10 | * 'Traps.c' handles hardware traps and faults after we have saved some | 10 | * Handle hardware traps and faults. |
| 11 | * state in 'asm.s'. | ||
| 12 | */ | 11 | */ |
| 13 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
| 14 | #include <linux/kallsyms.h> | 13 | #include <linux/kallsyms.h> |
| 15 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
| 16 | #include <linux/highmem.h> | ||
| 17 | #include <linux/kprobes.h> | 15 | #include <linux/kprobes.h> |
| 18 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
| 19 | #include <linux/utsname.h> | 17 | #include <linux/utsname.h> |
| @@ -32,6 +30,8 @@ | |||
| 32 | #include <linux/bug.h> | 30 | #include <linux/bug.h> |
| 33 | #include <linux/nmi.h> | 31 | #include <linux/nmi.h> |
| 34 | #include <linux/mm.h> | 32 | #include <linux/mm.h> |
| 33 | #include <linux/smp.h> | ||
| 34 | #include <linux/io.h> | ||
| 35 | 35 | ||
| 36 | #ifdef CONFIG_EISA | 36 | #ifdef CONFIG_EISA |
| 37 | #include <linux/ioport.h> | 37 | #include <linux/ioport.h> |
| @@ -46,20 +46,31 @@ | |||
| 46 | #include <linux/edac.h> | 46 | #include <linux/edac.h> |
| 47 | #endif | 47 | #endif |
| 48 | 48 | ||
| 49 | #include <asm/arch_hooks.h> | ||
| 50 | #include <asm/stacktrace.h> | 49 | #include <asm/stacktrace.h> |
| 51 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
| 52 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
| 53 | #include <asm/atomic.h> | 52 | #include <asm/atomic.h> |
| 54 | #include <asm/system.h> | 53 | #include <asm/system.h> |
| 55 | #include <asm/unwind.h> | 54 | #include <asm/unwind.h> |
| 55 | #include <asm/traps.h> | ||
| 56 | #include <asm/desc.h> | 56 | #include <asm/desc.h> |
| 57 | #include <asm/i387.h> | 57 | #include <asm/i387.h> |
| 58 | |||
| 59 | #include <mach_traps.h> | ||
| 60 | |||
| 61 | #ifdef CONFIG_X86_64 | ||
| 62 | #include <asm/pgalloc.h> | ||
| 63 | #include <asm/proto.h> | ||
| 64 | #include <asm/pda.h> | ||
| 65 | #else | ||
| 66 | #include <asm/processor-flags.h> | ||
| 67 | #include <asm/arch_hooks.h> | ||
| 58 | #include <asm/nmi.h> | 68 | #include <asm/nmi.h> |
| 59 | #include <asm/smp.h> | 69 | #include <asm/smp.h> |
| 60 | #include <asm/io.h> | 70 | #include <asm/io.h> |
| 71 | #include <asm/traps.h> | ||
| 61 | 72 | ||
| 62 | #include "mach_traps.h" | 73 | #include "cpu/mcheck/mce.h" |
| 63 | 74 | ||
| 64 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | 75 | DECLARE_BITMAP(used_vectors, NR_VECTORS); |
| 65 | EXPORT_SYMBOL_GPL(used_vectors); | 76 | EXPORT_SYMBOL_GPL(used_vectors); |
| @@ -76,431 +87,104 @@ char ignore_fpu_irq; | |||
| 76 | */ | 87 | */ |
| 77 | gate_desc idt_table[256] | 88 | gate_desc idt_table[256] |
| 78 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 89 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
| 79 | |||
| 80 | asmlinkage void divide_error(void); | ||
| 81 | asmlinkage void debug(void); | ||
| 82 | asmlinkage void nmi(void); | ||
| 83 | asmlinkage void int3(void); | ||
| 84 | asmlinkage void overflow(void); | ||
| 85 | asmlinkage void bounds(void); | ||
| 86 | asmlinkage void invalid_op(void); | ||
| 87 | asmlinkage void device_not_available(void); | ||
| 88 | asmlinkage void coprocessor_segment_overrun(void); | ||
| 89 | asmlinkage void invalid_TSS(void); | ||
| 90 | asmlinkage void segment_not_present(void); | ||
| 91 | asmlinkage void stack_segment(void); | ||
| 92 | asmlinkage void general_protection(void); | ||
| 93 | asmlinkage void page_fault(void); | ||
| 94 | asmlinkage void coprocessor_error(void); | ||
| 95 | asmlinkage void simd_coprocessor_error(void); | ||
| 96 | asmlinkage void alignment_check(void); | ||
| 97 | asmlinkage void spurious_interrupt_bug(void); | ||
| 98 | asmlinkage void machine_check(void); | ||
| 99 | |||
| 100 | int panic_on_unrecovered_nmi; | ||
| 101 | int kstack_depth_to_print = 24; | ||
| 102 | static unsigned int code_bytes = 64; | ||
| 103 | static int ignore_nmis; | ||
| 104 | static int die_counter; | ||
| 105 | |||
| 106 | void printk_address(unsigned long address, int reliable) | ||
| 107 | { | ||
| 108 | #ifdef CONFIG_KALLSYMS | ||
| 109 | unsigned long offset = 0; | ||
| 110 | unsigned long symsize; | ||
| 111 | const char *symname; | ||
| 112 | char *modname; | ||
| 113 | char *delim = ":"; | ||
| 114 | char namebuf[KSYM_NAME_LEN]; | ||
| 115 | char reliab[4] = ""; | ||
| 116 | |||
| 117 | symname = kallsyms_lookup(address, &symsize, &offset, | ||
| 118 | &modname, namebuf); | ||
| 119 | if (!symname) { | ||
| 120 | printk(" [<%08lx>]\n", address); | ||
| 121 | return; | ||
| 122 | } | ||
| 123 | if (!reliable) | ||
| 124 | strcpy(reliab, "? "); | ||
| 125 | |||
| 126 | if (!modname) | ||
| 127 | modname = delim = ""; | ||
| 128 | printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", | ||
| 129 | address, reliab, delim, modname, delim, symname, offset, symsize); | ||
| 130 | #else | ||
| 131 | printk(" [<%08lx>]\n", address); | ||
| 132 | #endif | 90 | #endif |
| 133 | } | ||
| 134 | |||
| 135 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
| 136 | void *p, unsigned int size) | ||
| 137 | { | ||
| 138 | void *t = tinfo; | ||
| 139 | return p > t && p <= t + THREAD_SIZE - size; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* The form of the top of the frame on the stack */ | ||
| 143 | struct stack_frame { | ||
| 144 | struct stack_frame *next_frame; | ||
| 145 | unsigned long return_address; | ||
| 146 | }; | ||
| 147 | 91 | ||
| 148 | static inline unsigned long | 92 | static int ignore_nmis; |
| 149 | print_context_stack(struct thread_info *tinfo, | ||
| 150 | unsigned long *stack, unsigned long bp, | ||
| 151 | const struct stacktrace_ops *ops, void *data) | ||
| 152 | { | ||
| 153 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
| 154 | |||
| 155 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { | ||
| 156 | unsigned long addr; | ||
| 157 | |||
| 158 | addr = *stack; | ||
| 159 | if (__kernel_text_address(addr)) { | ||
| 160 | if ((unsigned long) stack == bp + 4) { | ||
| 161 | ops->address(data, addr, 1); | ||
| 162 | frame = frame->next_frame; | ||
| 163 | bp = (unsigned long) frame; | ||
| 164 | } else { | ||
| 165 | ops->address(data, addr, bp == 0); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | stack++; | ||
| 169 | } | ||
| 170 | return bp; | ||
| 171 | } | ||
| 172 | |||
| 173 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 174 | unsigned long *stack, unsigned long bp, | ||
| 175 | const struct stacktrace_ops *ops, void *data) | ||
| 176 | { | ||
| 177 | if (!task) | ||
| 178 | task = current; | ||
| 179 | |||
| 180 | if (!stack) { | ||
| 181 | unsigned long dummy; | ||
| 182 | stack = &dummy; | ||
| 183 | if (task != current) | ||
| 184 | stack = (unsigned long *)task->thread.sp; | ||
| 185 | } | ||
| 186 | |||
| 187 | #ifdef CONFIG_FRAME_POINTER | ||
| 188 | if (!bp) { | ||
| 189 | if (task == current) { | ||
| 190 | /* Grab bp right from our regs */ | ||
| 191 | asm("movl %%ebp, %0" : "=r" (bp) :); | ||
| 192 | } else { | ||
| 193 | /* bp is the last reg pushed by switch_to */ | ||
| 194 | bp = *(unsigned long *) task->thread.sp; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | #endif | ||
| 198 | |||
| 199 | for (;;) { | ||
| 200 | struct thread_info *context; | ||
| 201 | |||
| 202 | context = (struct thread_info *) | ||
| 203 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | ||
| 204 | bp = print_context_stack(context, stack, bp, ops, data); | ||
| 205 | /* | ||
| 206 | * Should be after the line below, but somewhere | ||
| 207 | * in early boot context comes out corrupted and we | ||
| 208 | * can't reference it: | ||
| 209 | */ | ||
| 210 | if (ops->stack(data, "IRQ") < 0) | ||
| 211 | break; | ||
| 212 | stack = (unsigned long *)context->previous_esp; | ||
| 213 | if (!stack) | ||
| 214 | break; | ||
| 215 | touch_nmi_watchdog(); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | EXPORT_SYMBOL(dump_trace); | ||
| 219 | |||
| 220 | static void | ||
| 221 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
| 222 | { | ||
| 223 | printk(data); | ||
| 224 | print_symbol(msg, symbol); | ||
| 225 | printk("\n"); | ||
| 226 | } | ||
| 227 | |||
| 228 | static void print_trace_warning(void *data, char *msg) | ||
| 229 | { | ||
| 230 | printk("%s%s\n", (char *)data, msg); | ||
| 231 | } | ||
| 232 | |||
| 233 | static int print_trace_stack(void *data, char *name) | ||
| 234 | { | ||
| 235 | return 0; | ||
| 236 | } | ||
| 237 | |||
| 238 | /* | ||
| 239 | * Print one address/symbol entries per line. | ||
| 240 | */ | ||
| 241 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
| 242 | { | ||
| 243 | printk("%s [<%08lx>] ", (char *)data, addr); | ||
| 244 | if (!reliable) | ||
| 245 | printk("? "); | ||
| 246 | print_symbol("%s\n", addr); | ||
| 247 | touch_nmi_watchdog(); | ||
| 248 | } | ||
| 249 | |||
| 250 | static const struct stacktrace_ops print_trace_ops = { | ||
| 251 | .warning = print_trace_warning, | ||
| 252 | .warning_symbol = print_trace_warning_symbol, | ||
| 253 | .stack = print_trace_stack, | ||
| 254 | .address = print_trace_address, | ||
| 255 | }; | ||
| 256 | 93 | ||
| 257 | static void | 94 | static inline void conditional_sti(struct pt_regs *regs) |
| 258 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 259 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
| 260 | { | 95 | { |
| 261 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 96 | if (regs->flags & X86_EFLAGS_IF) |
| 262 | printk("%s =======================\n", log_lvl); | 97 | local_irq_enable(); |
| 263 | } | 98 | } |
| 264 | 99 | ||
| 265 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 100 | static inline void preempt_conditional_sti(struct pt_regs *regs) |
| 266 | unsigned long *stack, unsigned long bp) | ||
| 267 | { | 101 | { |
| 268 | show_trace_log_lvl(task, regs, stack, bp, ""); | 102 | inc_preempt_count(); |
| 103 | if (regs->flags & X86_EFLAGS_IF) | ||
| 104 | local_irq_enable(); | ||
| 269 | } | 105 | } |
| 270 | 106 | ||
| 271 | static void | 107 | static inline void preempt_conditional_cli(struct pt_regs *regs) |
| 272 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
| 273 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
| 274 | { | 108 | { |
| 275 | unsigned long *stack; | 109 | if (regs->flags & X86_EFLAGS_IF) |
| 276 | int i; | 110 | local_irq_disable(); |
| 277 | 111 | dec_preempt_count(); | |
| 278 | if (sp == NULL) { | ||
| 279 | if (task) | ||
| 280 | sp = (unsigned long *)task->thread.sp; | ||
| 281 | else | ||
| 282 | sp = (unsigned long *)&sp; | ||
| 283 | } | ||
| 284 | |||
| 285 | stack = sp; | ||
| 286 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
| 287 | if (kstack_end(stack)) | ||
| 288 | break; | ||
| 289 | if (i && ((i % 8) == 0)) | ||
| 290 | printk("\n%s ", log_lvl); | ||
| 291 | printk("%08lx ", *stack++); | ||
| 292 | } | ||
| 293 | printk("\n%sCall Trace:\n", log_lvl); | ||
| 294 | |||
| 295 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | ||
| 296 | } | 112 | } |
| 297 | 113 | ||
| 298 | void show_stack(struct task_struct *task, unsigned long *sp) | 114 | #ifdef CONFIG_X86_32 |
| 115 | static inline void | ||
| 116 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
| 299 | { | 117 | { |
| 300 | printk(" "); | 118 | if (!user_mode_vm(regs)) |
| 301 | show_stack_log_lvl(task, NULL, sp, 0, ""); | 119 | die(str, regs, err); |
| 302 | } | 120 | } |
| 303 | 121 | ||
| 304 | /* | 122 | /* |
| 305 | * The architecture-independent dump_stack generator | 123 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an |
| 124 | * invalid offset set (the LAZY one) and the faulting thread has | ||
| 125 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, | ||
| 126 | * we set the offset field correctly and return 1. | ||
| 306 | */ | 127 | */ |
| 307 | void dump_stack(void) | 128 | static int lazy_iobitmap_copy(void) |
| 308 | { | ||
| 309 | unsigned long bp = 0; | ||
| 310 | unsigned long stack; | ||
| 311 | |||
| 312 | #ifdef CONFIG_FRAME_POINTER | ||
| 313 | if (!bp) | ||
| 314 | asm("movl %%ebp, %0" : "=r" (bp):); | ||
| 315 | #endif | ||
| 316 | |||
| 317 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
| 318 | current->pid, current->comm, print_tainted(), | ||
| 319 | init_utsname()->release, | ||
| 320 | (int)strcspn(init_utsname()->version, " "), | ||
| 321 | init_utsname()->version); | ||
| 322 | |||
| 323 | show_trace(current, NULL, &stack, bp); | ||
| 324 | } | ||
| 325 | |||
| 326 | EXPORT_SYMBOL(dump_stack); | ||
| 327 | |||
| 328 | void show_registers(struct pt_regs *regs) | ||
| 329 | { | 129 | { |
| 330 | int i; | 130 | struct thread_struct *thread; |
| 131 | struct tss_struct *tss; | ||
| 132 | int cpu; | ||
| 331 | 133 | ||
| 332 | print_modules(); | 134 | cpu = get_cpu(); |
| 333 | __show_registers(regs, 0); | 135 | tss = &per_cpu(init_tss, cpu); |
| 136 | thread = ¤t->thread; | ||
| 334 | 137 | ||
| 335 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", | 138 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && |
| 336 | TASK_COMM_LEN, current->comm, task_pid_nr(current), | 139 | thread->io_bitmap_ptr) { |
| 337 | current_thread_info(), current, task_thread_info(current)); | 140 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, |
| 338 | /* | 141 | thread->io_bitmap_max); |
| 339 | * When in-kernel, we also print out the stack and code at the | 142 | /* |
| 340 | * time of the fault.. | 143 | * If the previously set map was extending to higher ports |
| 341 | */ | 144 | * than the current one, pad extra space with 0xff (no access). |
| 342 | if (!user_mode_vm(regs)) { | 145 | */ |
| 343 | unsigned int code_prologue = code_bytes * 43 / 64; | 146 | if (thread->io_bitmap_max < tss->io_bitmap_max) { |
| 344 | unsigned int code_len = code_bytes; | 147 | memset((char *) tss->io_bitmap + |
| 345 | unsigned char c; | 148 | thread->io_bitmap_max, 0xff, |
| 346 | u8 *ip; | 149 | tss->io_bitmap_max - thread->io_bitmap_max); |
| 347 | |||
| 348 | printk("\n" KERN_EMERG "Stack: "); | ||
| 349 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); | ||
| 350 | |||
| 351 | printk(KERN_EMERG "Code: "); | ||
| 352 | |||
| 353 | ip = (u8 *)regs->ip - code_prologue; | ||
| 354 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
| 355 | /* try starting at EIP */ | ||
| 356 | ip = (u8 *)regs->ip; | ||
| 357 | code_len = code_len - code_prologue + 1; | ||
| 358 | } | ||
| 359 | for (i = 0; i < code_len; i++, ip++) { | ||
| 360 | if (ip < (u8 *)PAGE_OFFSET || | ||
| 361 | probe_kernel_address(ip, c)) { | ||
| 362 | printk(" Bad EIP value."); | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | if (ip == (u8 *)regs->ip) | ||
| 366 | printk("<%02x> ", c); | ||
| 367 | else | ||
| 368 | printk("%02x ", c); | ||
| 369 | } | 150 | } |
| 370 | } | 151 | tss->io_bitmap_max = thread->io_bitmap_max; |
| 371 | printk("\n"); | 152 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
| 372 | } | 153 | tss->io_bitmap_owner = thread; |
| 373 | 154 | put_cpu(); | |
| 374 | int is_valid_bugaddr(unsigned long ip) | ||
| 375 | { | ||
| 376 | unsigned short ud2; | ||
| 377 | |||
| 378 | if (ip < PAGE_OFFSET) | ||
| 379 | return 0; | ||
| 380 | if (probe_kernel_address((unsigned short *)ip, ud2)) | ||
| 381 | return 0; | ||
| 382 | |||
| 383 | return ud2 == 0x0b0f; | ||
| 384 | } | ||
| 385 | |||
| 386 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
| 387 | { | ||
| 388 | unsigned short ss; | ||
| 389 | unsigned long sp; | ||
| 390 | 155 | ||
| 391 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
| 392 | #ifdef CONFIG_PREEMPT | ||
| 393 | printk("PREEMPT "); | ||
| 394 | #endif | ||
| 395 | #ifdef CONFIG_SMP | ||
| 396 | printk("SMP "); | ||
| 397 | #endif | ||
| 398 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 399 | printk("DEBUG_PAGEALLOC"); | ||
| 400 | #endif | ||
| 401 | printk("\n"); | ||
| 402 | if (notify_die(DIE_OOPS, str, regs, err, | ||
| 403 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
| 404 | return 1; | 156 | return 1; |
| 405 | |||
| 406 | show_registers(regs); | ||
| 407 | /* Executive summary in case the oops scrolled away */ | ||
| 408 | sp = (unsigned long) (®s->sp); | ||
| 409 | savesegment(ss, ss); | ||
| 410 | if (user_mode(regs)) { | ||
| 411 | sp = regs->sp; | ||
| 412 | ss = regs->ss & 0xffff; | ||
| 413 | } | 157 | } |
| 414 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | 158 | put_cpu(); |
| 415 | print_symbol("%s", regs->ip); | ||
| 416 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
| 417 | return 0; | ||
| 418 | } | ||
| 419 | |||
| 420 | /* | ||
| 421 | * This is gone through when something in the kernel has done something bad | ||
| 422 | * and is about to be terminated: | ||
| 423 | */ | ||
| 424 | void die(const char *str, struct pt_regs *regs, long err) | ||
| 425 | { | ||
| 426 | static struct { | ||
| 427 | raw_spinlock_t lock; | ||
| 428 | u32 lock_owner; | ||
| 429 | int lock_owner_depth; | ||
| 430 | } die = { | ||
| 431 | .lock = __RAW_SPIN_LOCK_UNLOCKED, | ||
| 432 | .lock_owner = -1, | ||
| 433 | .lock_owner_depth = 0 | ||
| 434 | }; | ||
| 435 | unsigned long flags; | ||
| 436 | |||
| 437 | oops_enter(); | ||
| 438 | |||
| 439 | if (die.lock_owner != raw_smp_processor_id()) { | ||
| 440 | console_verbose(); | ||
| 441 | raw_local_irq_save(flags); | ||
| 442 | __raw_spin_lock(&die.lock); | ||
| 443 | die.lock_owner = smp_processor_id(); | ||
| 444 | die.lock_owner_depth = 0; | ||
| 445 | bust_spinlocks(1); | ||
| 446 | } else { | ||
| 447 | raw_local_irq_save(flags); | ||
| 448 | } | ||
| 449 | |||
| 450 | if (++die.lock_owner_depth < 3) { | ||
| 451 | report_bug(regs->ip, regs); | ||
| 452 | |||
| 453 | if (__die(str, regs, err)) | ||
| 454 | regs = NULL; | ||
| 455 | } else { | ||
| 456 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
| 457 | } | ||
| 458 | |||
| 459 | bust_spinlocks(0); | ||
| 460 | die.lock_owner = -1; | ||
| 461 | add_taint(TAINT_DIE); | ||
| 462 | __raw_spin_unlock(&die.lock); | ||
| 463 | raw_local_irq_restore(flags); | ||
| 464 | |||
| 465 | if (!regs) | ||
| 466 | return; | ||
| 467 | |||
| 468 | if (kexec_should_crash(current)) | ||
| 469 | crash_kexec(regs); | ||
| 470 | |||
| 471 | if (in_interrupt()) | ||
| 472 | panic("Fatal exception in interrupt"); | ||
| 473 | |||
| 474 | if (panic_on_oops) | ||
| 475 | panic("Fatal exception"); | ||
| 476 | |||
| 477 | oops_exit(); | ||
| 478 | do_exit(SIGSEGV); | ||
| 479 | } | ||
| 480 | 159 | ||
| 481 | static inline void | 160 | return 0; |
| 482 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
| 483 | { | ||
| 484 | if (!user_mode_vm(regs)) | ||
| 485 | die(str, regs, err); | ||
| 486 | } | 161 | } |
| 162 | #endif | ||
| 487 | 163 | ||
| 488 | static void __kprobes | 164 | static void __kprobes |
| 489 | do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, | 165 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
| 490 | long error_code, siginfo_t *info) | 166 | long error_code, siginfo_t *info) |
| 491 | { | 167 | { |
| 492 | struct task_struct *tsk = current; | 168 | struct task_struct *tsk = current; |
| 493 | 169 | ||
| 170 | #ifdef CONFIG_X86_32 | ||
| 494 | if (regs->flags & X86_VM_MASK) { | 171 | if (regs->flags & X86_VM_MASK) { |
| 495 | if (vm86) | 172 | /* |
| 173 | * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. | ||
| 174 | * On nmi (interrupt 2), do_trap should not be called. | ||
| 175 | */ | ||
| 176 | if (trapnr < 6) | ||
| 496 | goto vm86_trap; | 177 | goto vm86_trap; |
| 497 | goto trap_signal; | 178 | goto trap_signal; |
| 498 | } | 179 | } |
| 180 | #endif | ||
| 499 | 181 | ||
| 500 | if (!user_mode(regs)) | 182 | if (!user_mode(regs)) |
| 501 | goto kernel_trap; | 183 | goto kernel_trap; |
| 502 | 184 | ||
| 185 | #ifdef CONFIG_X86_32 | ||
| 503 | trap_signal: | 186 | trap_signal: |
| 187 | #endif | ||
| 504 | /* | 188 | /* |
| 505 | * We want error_code and trap_no set for userspace faults and | 189 | * We want error_code and trap_no set for userspace faults and |
| 506 | * kernelspace faults which result in die(), but not | 190 | * kernelspace faults which result in die(), but not |
| @@ -513,6 +197,18 @@ trap_signal: | |||
| 513 | tsk->thread.error_code = error_code; | 197 | tsk->thread.error_code = error_code; |
| 514 | tsk->thread.trap_no = trapnr; | 198 | tsk->thread.trap_no = trapnr; |
| 515 | 199 | ||
| 200 | #ifdef CONFIG_X86_64 | ||
| 201 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
| 202 | printk_ratelimit()) { | ||
| 203 | printk(KERN_INFO | ||
| 204 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
| 205 | tsk->comm, tsk->pid, str, | ||
| 206 | regs->ip, regs->sp, error_code); | ||
| 207 | print_vma_addr(" in ", regs->ip); | ||
| 208 | printk("\n"); | ||
| 209 | } | ||
| 210 | #endif | ||
| 211 | |||
| 516 | if (info) | 212 | if (info) |
| 517 | force_sig_info(signr, info, tsk); | 213 | force_sig_info(signr, info, tsk); |
| 518 | else | 214 | else |
| @@ -527,120 +223,98 @@ kernel_trap: | |||
| 527 | } | 223 | } |
| 528 | return; | 224 | return; |
| 529 | 225 | ||
| 226 | #ifdef CONFIG_X86_32 | ||
| 530 | vm86_trap: | 227 | vm86_trap: |
| 531 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, | 228 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, |
| 532 | error_code, trapnr)) | 229 | error_code, trapnr)) |
| 533 | goto trap_signal; | 230 | goto trap_signal; |
| 534 | return; | 231 | return; |
| 232 | #endif | ||
| 535 | } | 233 | } |
| 536 | 234 | ||
| 537 | #define DO_ERROR(trapnr, signr, str, name) \ | 235 | #define DO_ERROR(trapnr, signr, str, name) \ |
| 538 | void do_##name(struct pt_regs *regs, long error_code) \ | 236 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
| 539 | { \ | ||
| 540 | trace_hardirqs_fixup(); \ | ||
| 541 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
| 542 | == NOTIFY_STOP) \ | ||
| 543 | return; \ | ||
| 544 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ | ||
| 545 | } | ||
| 546 | |||
| 547 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ | ||
| 548 | void do_##name(struct pt_regs *regs, long error_code) \ | ||
| 549 | { \ | ||
| 550 | siginfo_t info; \ | ||
| 551 | if (irq) \ | ||
| 552 | local_irq_enable(); \ | ||
| 553 | info.si_signo = signr; \ | ||
| 554 | info.si_errno = 0; \ | ||
| 555 | info.si_code = sicode; \ | ||
| 556 | info.si_addr = (void __user *)siaddr; \ | ||
| 557 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
| 558 | == NOTIFY_STOP) \ | ||
| 559 | return; \ | ||
| 560 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ | ||
| 561 | } | ||
| 562 | |||
| 563 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ | ||
| 564 | void do_##name(struct pt_regs *regs, long error_code) \ | ||
| 565 | { \ | 237 | { \ |
| 566 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 238 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
| 567 | == NOTIFY_STOP) \ | 239 | == NOTIFY_STOP) \ |
| 568 | return; \ | 240 | return; \ |
| 569 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ | 241 | conditional_sti(regs); \ |
| 242 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | ||
| 570 | } | 243 | } |
| 571 | 244 | ||
| 572 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 245 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
| 573 | void do_##name(struct pt_regs *regs, long error_code) \ | 246 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
| 574 | { \ | 247 | { \ |
| 575 | siginfo_t info; \ | 248 | siginfo_t info; \ |
| 576 | info.si_signo = signr; \ | 249 | info.si_signo = signr; \ |
| 577 | info.si_errno = 0; \ | 250 | info.si_errno = 0; \ |
| 578 | info.si_code = sicode; \ | 251 | info.si_code = sicode; \ |
| 579 | info.si_addr = (void __user *)siaddr; \ | 252 | info.si_addr = (void __user *)siaddr; \ |
| 580 | trace_hardirqs_fixup(); \ | ||
| 581 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 253 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
| 582 | == NOTIFY_STOP) \ | 254 | == NOTIFY_STOP) \ |
| 583 | return; \ | 255 | return; \ |
| 584 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | 256 | conditional_sti(regs); \ |
| 257 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
| 585 | } | 258 | } |
| 586 | 259 | ||
| 587 | DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | 260 | DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) |
| 588 | #ifndef CONFIG_KPROBES | 261 | DO_ERROR(4, SIGSEGV, "overflow", overflow) |
| 589 | DO_VM86_ERROR(3, SIGTRAP, "int3", int3) | 262 | DO_ERROR(5, SIGSEGV, "bounds", bounds) |
| 590 | #endif | 263 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) |
| 591 | DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) | ||
| 592 | DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) | ||
| 593 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) | ||
| 594 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 264 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
| 595 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 265 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
| 596 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 266 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
| 267 | #ifdef CONFIG_X86_32 | ||
| 597 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | 268 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) |
| 598 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) | 269 | #endif |
| 599 | DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) | 270 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) |
| 271 | |||
| 272 | #ifdef CONFIG_X86_64 | ||
| 273 | /* Runs on IST stack */ | ||
| 274 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
| 275 | { | ||
| 276 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
| 277 | 12, SIGBUS) == NOTIFY_STOP) | ||
| 278 | return; | ||
| 279 | preempt_conditional_sti(regs); | ||
| 280 | do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); | ||
| 281 | preempt_conditional_cli(regs); | ||
| 282 | } | ||
| 600 | 283 | ||
| 601 | void __kprobes | 284 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
| 285 | { | ||
| 286 | static const char str[] = "double fault"; | ||
| 287 | struct task_struct *tsk = current; | ||
| 288 | |||
| 289 | /* Return not checked because double check cannot be ignored */ | ||
| 290 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | ||
| 291 | |||
| 292 | tsk->thread.error_code = error_code; | ||
| 293 | tsk->thread.trap_no = 8; | ||
| 294 | |||
| 295 | /* This is always a kernel trap and never fixable (and thus must | ||
| 296 | never return). */ | ||
| 297 | for (;;) | ||
| 298 | die(str, regs, error_code); | ||
| 299 | } | ||
| 300 | #endif | ||
| 301 | |||
| 302 | dotraplinkage void __kprobes | ||
| 602 | do_general_protection(struct pt_regs *regs, long error_code) | 303 | do_general_protection(struct pt_regs *regs, long error_code) |
| 603 | { | 304 | { |
| 604 | struct task_struct *tsk; | 305 | struct task_struct *tsk; |
| 605 | struct thread_struct *thread; | ||
| 606 | struct tss_struct *tss; | ||
| 607 | int cpu; | ||
| 608 | 306 | ||
| 609 | cpu = get_cpu(); | 307 | conditional_sti(regs); |
| 610 | tss = &per_cpu(init_tss, cpu); | ||
| 611 | thread = ¤t->thread; | ||
| 612 | |||
| 613 | /* | ||
| 614 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an | ||
| 615 | * invalid offset set (the LAZY one) and the faulting thread has | ||
| 616 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS | ||
| 617 | * and we set the offset field correctly. Then we let the CPU to | ||
| 618 | * restart the faulting instruction. | ||
| 619 | */ | ||
| 620 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | ||
| 621 | thread->io_bitmap_ptr) { | ||
| 622 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | ||
| 623 | thread->io_bitmap_max); | ||
| 624 | /* | ||
| 625 | * If the previously set map was extending to higher ports | ||
| 626 | * than the current one, pad extra space with 0xff (no access). | ||
| 627 | */ | ||
| 628 | if (thread->io_bitmap_max < tss->io_bitmap_max) { | ||
| 629 | memset((char *) tss->io_bitmap + | ||
| 630 | thread->io_bitmap_max, 0xff, | ||
| 631 | tss->io_bitmap_max - thread->io_bitmap_max); | ||
| 632 | } | ||
| 633 | tss->io_bitmap_max = thread->io_bitmap_max; | ||
| 634 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
| 635 | tss->io_bitmap_owner = thread; | ||
| 636 | put_cpu(); | ||
| 637 | 308 | ||
| 309 | #ifdef CONFIG_X86_32 | ||
| 310 | if (lazy_iobitmap_copy()) { | ||
| 311 | /* restart the faulting instruction */ | ||
| 638 | return; | 312 | return; |
| 639 | } | 313 | } |
| 640 | put_cpu(); | ||
| 641 | 314 | ||
| 642 | if (regs->flags & X86_VM_MASK) | 315 | if (regs->flags & X86_VM_MASK) |
| 643 | goto gp_in_vm86; | 316 | goto gp_in_vm86; |
| 317 | #endif | ||
| 644 | 318 | ||
| 645 | tsk = current; | 319 | tsk = current; |
| 646 | if (!user_mode(regs)) | 320 | if (!user_mode(regs)) |
| @@ -662,10 +336,12 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
| 662 | force_sig(SIGSEGV, tsk); | 336 | force_sig(SIGSEGV, tsk); |
| 663 | return; | 337 | return; |
| 664 | 338 | ||
| 339 | #ifdef CONFIG_X86_32 | ||
| 665 | gp_in_vm86: | 340 | gp_in_vm86: |
| 666 | local_irq_enable(); | 341 | local_irq_enable(); |
| 667 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | 342 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); |
| 668 | return; | 343 | return; |
| 344 | #endif | ||
| 669 | 345 | ||
| 670 | gp_in_kernel: | 346 | gp_in_kernel: |
| 671 | if (fixup_exception(regs)) | 347 | if (fixup_exception(regs)) |
| @@ -702,7 +378,8 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) | |||
| 702 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 378 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
| 703 | 379 | ||
| 704 | /* Clear and disable the memory parity error line. */ | 380 | /* Clear and disable the memory parity error line. */ |
| 705 | clear_mem_error(reason); | 381 | reason = (reason & 0xf) | 4; |
| 382 | outb(reason, 0x61); | ||
| 706 | } | 383 | } |
| 707 | 384 | ||
| 708 | static notrace __kprobes void | 385 | static notrace __kprobes void |
| @@ -728,7 +405,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
| 728 | static notrace __kprobes void | 405 | static notrace __kprobes void |
| 729 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | 406 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) |
| 730 | { | 407 | { |
| 731 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 408 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == |
| 409 | NOTIFY_STOP) | ||
| 732 | return; | 410 | return; |
| 733 | #ifdef CONFIG_MCA | 411 | #ifdef CONFIG_MCA |
| 734 | /* | 412 | /* |
| @@ -751,41 +429,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
| 751 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 429 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
| 752 | } | 430 | } |
| 753 | 431 | ||
| 754 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
| 755 | |||
| 756 | void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
| 757 | { | ||
| 758 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 759 | return; | ||
| 760 | |||
| 761 | spin_lock(&nmi_print_lock); | ||
| 762 | /* | ||
| 763 | * We are in trouble anyway, lets at least try | ||
| 764 | * to get a message out: | ||
| 765 | */ | ||
| 766 | bust_spinlocks(1); | ||
| 767 | printk(KERN_EMERG "%s", str); | ||
| 768 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
| 769 | smp_processor_id(), regs->ip); | ||
| 770 | show_registers(regs); | ||
| 771 | if (do_panic) | ||
| 772 | panic("Non maskable interrupt"); | ||
| 773 | console_silent(); | ||
| 774 | spin_unlock(&nmi_print_lock); | ||
| 775 | bust_spinlocks(0); | ||
| 776 | |||
| 777 | /* | ||
| 778 | * If we are in kernel we are probably nested up pretty bad | ||
| 779 | * and might aswell get out now while we still can: | ||
| 780 | */ | ||
| 781 | if (!user_mode_vm(regs)) { | ||
| 782 | current->thread.trap_no = 2; | ||
| 783 | crash_kexec(regs); | ||
| 784 | } | ||
| 785 | |||
| 786 | do_exit(SIGSEGV); | ||
| 787 | } | ||
| 788 | |||
| 789 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 432 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) |
| 790 | { | 433 | { |
| 791 | unsigned char reason = 0; | 434 | unsigned char reason = 0; |
| @@ -824,22 +467,25 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
| 824 | mem_parity_error(reason, regs); | 467 | mem_parity_error(reason, regs); |
| 825 | if (reason & 0x40) | 468 | if (reason & 0x40) |
| 826 | io_check_error(reason, regs); | 469 | io_check_error(reason, regs); |
| 470 | #ifdef CONFIG_X86_32 | ||
| 827 | /* | 471 | /* |
| 828 | * Reassert NMI in case it became active meanwhile | 472 | * Reassert NMI in case it became active meanwhile |
| 829 | * as it's edge-triggered: | 473 | * as it's edge-triggered: |
| 830 | */ | 474 | */ |
| 831 | reassert_nmi(); | 475 | reassert_nmi(); |
| 476 | #endif | ||
| 832 | } | 477 | } |
| 833 | 478 | ||
| 834 | notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) | 479 | dotraplinkage notrace __kprobes void |
| 480 | do_nmi(struct pt_regs *regs, long error_code) | ||
| 835 | { | 481 | { |
| 836 | int cpu; | ||
| 837 | |||
| 838 | nmi_enter(); | 482 | nmi_enter(); |
| 839 | 483 | ||
| 840 | cpu = smp_processor_id(); | 484 | #ifdef CONFIG_X86_32 |
| 841 | 485 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | |
| 842 | ++nmi_count(cpu); | 486 | #else |
| 487 | add_pda(__nmi_count, 1); | ||
| 488 | #endif | ||
| 843 | 489 | ||
| 844 | if (!ignore_nmis) | 490 | if (!ignore_nmis) |
| 845 | default_do_nmi(regs); | 491 | default_do_nmi(regs); |
| @@ -859,21 +505,44 @@ void restart_nmi(void) | |||
| 859 | acpi_nmi_enable(); | 505 | acpi_nmi_enable(); |
| 860 | } | 506 | } |
| 861 | 507 | ||
| 862 | #ifdef CONFIG_KPROBES | 508 | /* May run on IST stack. */ |
| 863 | void __kprobes do_int3(struct pt_regs *regs, long error_code) | 509 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) |
| 864 | { | 510 | { |
| 865 | trace_hardirqs_fixup(); | 511 | #ifdef CONFIG_KPROBES |
| 866 | |||
| 867 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 512 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
| 868 | == NOTIFY_STOP) | 513 | == NOTIFY_STOP) |
| 869 | return; | 514 | return; |
| 870 | /* | 515 | #else |
| 871 | * This is an interrupt gate, because kprobes wants interrupts | 516 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) |
| 872 | * disabled. Normal trap handlers don't. | 517 | == NOTIFY_STOP) |
| 873 | */ | 518 | return; |
| 874 | restore_interrupts(regs); | 519 | #endif |
| 875 | 520 | ||
| 876 | do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); | 521 | preempt_conditional_sti(regs); |
| 522 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | ||
| 523 | preempt_conditional_cli(regs); | ||
| 524 | } | ||
| 525 | |||
| 526 | #ifdef CONFIG_X86_64 | ||
| 527 | /* Help handler running on IST stack to switch back to user stack | ||
| 528 | for scheduling or signal handling. The actual stack switch is done in | ||
| 529 | entry.S */ | ||
| 530 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | ||
| 531 | { | ||
| 532 | struct pt_regs *regs = eregs; | ||
| 533 | /* Did already sync */ | ||
| 534 | if (eregs == (struct pt_regs *)eregs->sp) | ||
| 535 | ; | ||
| 536 | /* Exception from user space */ | ||
| 537 | else if (user_mode(eregs)) | ||
| 538 | regs = task_pt_regs(current); | ||
| 539 | /* Exception from kernel and interrupts are enabled. Move to | ||
| 540 | kernel process stack. */ | ||
| 541 | else if (eregs->flags & X86_EFLAGS_IF) | ||
| 542 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
| 543 | if (eregs != regs) | ||
| 544 | *regs = *eregs; | ||
| 545 | return regs; | ||
| 877 | } | 546 | } |
| 878 | #endif | 547 | #endif |
| 879 | 548 | ||
| @@ -898,13 +567,14 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
| 898 | * about restoring all the debug state, and ptrace doesn't have to | 567 | * about restoring all the debug state, and ptrace doesn't have to |
| 899 | * find every occurrence of the TF bit that could be saved away even | 568 | * find every occurrence of the TF bit that could be saved away even |
| 900 | * by user code) | 569 | * by user code) |
| 570 | * | ||
| 571 | * May run on IST stack. | ||
| 901 | */ | 572 | */ |
| 902 | void __kprobes do_debug(struct pt_regs *regs, long error_code) | 573 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
| 903 | { | 574 | { |
| 904 | struct task_struct *tsk = current; | 575 | struct task_struct *tsk = current; |
| 905 | unsigned int condition; | 576 | unsigned long condition; |
| 906 | 577 | int si_code; | |
| 907 | trace_hardirqs_fixup(); | ||
| 908 | 578 | ||
| 909 | get_debugreg(condition, 6); | 579 | get_debugreg(condition, 6); |
| 910 | 580 | ||
| @@ -917,9 +587,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 917 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 587 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
| 918 | SIGTRAP) == NOTIFY_STOP) | 588 | SIGTRAP) == NOTIFY_STOP) |
| 919 | return; | 589 | return; |
| 590 | |||
| 920 | /* It's safe to allow irq's after DR6 has been saved */ | 591 | /* It's safe to allow irq's after DR6 has been saved */ |
| 921 | if (regs->flags & X86_EFLAGS_IF) | 592 | preempt_conditional_sti(regs); |
| 922 | local_irq_enable(); | ||
| 923 | 593 | ||
| 924 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 594 | /* Mask out spurious debug traps due to lazy DR7 setting */ |
| 925 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 595 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { |
| @@ -927,8 +597,10 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 927 | goto clear_dr7; | 597 | goto clear_dr7; |
| 928 | } | 598 | } |
| 929 | 599 | ||
| 600 | #ifdef CONFIG_X86_32 | ||
| 930 | if (regs->flags & X86_VM_MASK) | 601 | if (regs->flags & X86_VM_MASK) |
| 931 | goto debug_vm86; | 602 | goto debug_vm86; |
| 603 | #endif | ||
| 932 | 604 | ||
| 933 | /* Save debug status register where ptrace can see it */ | 605 | /* Save debug status register where ptrace can see it */ |
| 934 | tsk->thread.debugreg6 = condition; | 606 | tsk->thread.debugreg6 = condition; |
| @@ -938,17 +610,13 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 938 | * kernel space (but re-enable TF when returning to user mode). | 610 | * kernel space (but re-enable TF when returning to user mode). |
| 939 | */ | 611 | */ |
| 940 | if (condition & DR_STEP) { | 612 | if (condition & DR_STEP) { |
| 941 | /* | ||
| 942 | * We already checked v86 mode above, so we can | ||
| 943 | * check for kernel mode by just checking the CPL | ||
| 944 | * of CS. | ||
| 945 | */ | ||
| 946 | if (!user_mode(regs)) | 613 | if (!user_mode(regs)) |
| 947 | goto clear_TF_reenable; | 614 | goto clear_TF_reenable; |
| 948 | } | 615 | } |
| 949 | 616 | ||
| 617 | si_code = get_si_code(condition); | ||
| 950 | /* Ok, finally something we can handle */ | 618 | /* Ok, finally something we can handle */ |
| 951 | send_sigtrap(tsk, regs, error_code); | 619 | send_sigtrap(tsk, regs, error_code, si_code); |
| 952 | 620 | ||
| 953 | /* | 621 | /* |
| 954 | * Disable additional traps. They'll be re-enabled when | 622 | * Disable additional traps. They'll be re-enabled when |
| @@ -956,18 +624,37 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 956 | */ | 624 | */ |
| 957 | clear_dr7: | 625 | clear_dr7: |
| 958 | set_debugreg(0, 7); | 626 | set_debugreg(0, 7); |
| 627 | preempt_conditional_cli(regs); | ||
| 959 | return; | 628 | return; |
| 960 | 629 | ||
| 630 | #ifdef CONFIG_X86_32 | ||
| 961 | debug_vm86: | 631 | debug_vm86: |
| 962 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | 632 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); |
| 633 | preempt_conditional_cli(regs); | ||
| 963 | return; | 634 | return; |
| 635 | #endif | ||
| 964 | 636 | ||
| 965 | clear_TF_reenable: | 637 | clear_TF_reenable: |
| 966 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 638 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
| 967 | regs->flags &= ~X86_EFLAGS_TF; | 639 | regs->flags &= ~X86_EFLAGS_TF; |
| 640 | preempt_conditional_cli(regs); | ||
| 968 | return; | 641 | return; |
| 969 | } | 642 | } |
| 970 | 643 | ||
| 644 | #ifdef CONFIG_X86_64 | ||
| 645 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
| 646 | { | ||
| 647 | if (fixup_exception(regs)) | ||
| 648 | return 1; | ||
| 649 | |||
| 650 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
| 651 | /* Illegal floating point operation in the kernel */ | ||
| 652 | current->thread.trap_no = trapnr; | ||
| 653 | die(str, regs, 0); | ||
| 654 | return 0; | ||
| 655 | } | ||
| 656 | #endif | ||
| 657 | |||
| 971 | /* | 658 | /* |
| 972 | * Note that we play around with the 'TS' bit in an attempt to get | 659 | * Note that we play around with the 'TS' bit in an attempt to get |
| 973 | * the correct behaviour even in the presence of the asynchronous | 660 | * the correct behaviour even in the presence of the asynchronous |
| @@ -1004,7 +691,9 @@ void math_error(void __user *ip) | |||
| 1004 | swd = get_fpu_swd(task); | 691 | swd = get_fpu_swd(task); |
| 1005 | switch (swd & ~cwd & 0x3f) { | 692 | switch (swd & ~cwd & 0x3f) { |
| 1006 | case 0x000: /* No unmasked exception */ | 693 | case 0x000: /* No unmasked exception */ |
| 694 | #ifdef CONFIG_X86_32 | ||
| 1007 | return; | 695 | return; |
| 696 | #endif | ||
| 1008 | default: /* Multiple exceptions */ | 697 | default: /* Multiple exceptions */ |
| 1009 | break; | 698 | break; |
| 1010 | case 0x001: /* Invalid Op */ | 699 | case 0x001: /* Invalid Op */ |
| @@ -1032,9 +721,18 @@ void math_error(void __user *ip) | |||
| 1032 | force_sig_info(SIGFPE, &info, task); | 721 | force_sig_info(SIGFPE, &info, task); |
| 1033 | } | 722 | } |
| 1034 | 723 | ||
| 1035 | void do_coprocessor_error(struct pt_regs *regs, long error_code) | 724 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
| 1036 | { | 725 | { |
| 726 | conditional_sti(regs); | ||
| 727 | |||
| 728 | #ifdef CONFIG_X86_32 | ||
| 1037 | ignore_fpu_irq = 1; | 729 | ignore_fpu_irq = 1; |
| 730 | #else | ||
| 731 | if (!user_mode(regs) && | ||
| 732 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
| 733 | return; | ||
| 734 | #endif | ||
| 735 | |||
| 1038 | math_error((void __user *)regs->ip); | 736 | math_error((void __user *)regs->ip); |
| 1039 | } | 737 | } |
| 1040 | 738 | ||
| @@ -1086,8 +784,12 @@ static void simd_math_error(void __user *ip) | |||
| 1086 | force_sig_info(SIGFPE, &info, task); | 784 | force_sig_info(SIGFPE, &info, task); |
| 1087 | } | 785 | } |
| 1088 | 786 | ||
| 1089 | void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 787 | dotraplinkage void |
| 788 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | ||
| 1090 | { | 789 | { |
| 790 | conditional_sti(regs); | ||
| 791 | |||
| 792 | #ifdef CONFIG_X86_32 | ||
| 1091 | if (cpu_has_xmm) { | 793 | if (cpu_has_xmm) { |
| 1092 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | 794 | /* Handle SIMD FPU exceptions on PIII+ processors. */ |
| 1093 | ignore_fpu_irq = 1; | 795 | ignore_fpu_irq = 1; |
| @@ -1106,16 +808,25 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | |||
| 1106 | current->thread.error_code = error_code; | 808 | current->thread.error_code = error_code; |
| 1107 | die_if_kernel("cache flush denied", regs, error_code); | 809 | die_if_kernel("cache flush denied", regs, error_code); |
| 1108 | force_sig(SIGSEGV, current); | 810 | force_sig(SIGSEGV, current); |
| 811 | #else | ||
| 812 | if (!user_mode(regs) && | ||
| 813 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
| 814 | return; | ||
| 815 | simd_math_error((void __user *)regs->ip); | ||
| 816 | #endif | ||
| 1109 | } | 817 | } |
| 1110 | 818 | ||
| 1111 | void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | 819 | dotraplinkage void |
| 820 | do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | ||
| 1112 | { | 821 | { |
| 822 | conditional_sti(regs); | ||
| 1113 | #if 0 | 823 | #if 0 |
| 1114 | /* No need to warn about this any longer. */ | 824 | /* No need to warn about this any longer. */ |
| 1115 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | 825 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); |
| 1116 | #endif | 826 | #endif |
| 1117 | } | 827 | } |
| 1118 | 828 | ||
| 829 | #ifdef CONFIG_X86_32 | ||
| 1119 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | 830 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) |
| 1120 | { | 831 | { |
| 1121 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); | 832 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); |
| @@ -1134,6 +845,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | |||
| 1134 | 845 | ||
| 1135 | return new_kesp; | 846 | return new_kesp; |
| 1136 | } | 847 | } |
| 848 | #else | ||
| 849 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | ||
| 850 | { | ||
| 851 | } | ||
| 852 | |||
| 853 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
| 854 | { | ||
| 855 | } | ||
| 856 | #endif | ||
| 1137 | 857 | ||
| 1138 | /* | 858 | /* |
| 1139 | * 'math_state_restore()' saves the current math information in the | 859 | * 'math_state_restore()' saves the current math information in the |
| @@ -1166,14 +886,24 @@ asmlinkage void math_state_restore(void) | |||
| 1166 | } | 886 | } |
| 1167 | 887 | ||
| 1168 | clts(); /* Allow maths ops (or we recurse) */ | 888 | clts(); /* Allow maths ops (or we recurse) */ |
| 889 | #ifdef CONFIG_X86_32 | ||
| 1169 | restore_fpu(tsk); | 890 | restore_fpu(tsk); |
| 891 | #else | ||
| 892 | /* | ||
| 893 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
| 894 | */ | ||
| 895 | if (unlikely(restore_fpu_checking(tsk))) { | ||
| 896 | stts(); | ||
| 897 | force_sig(SIGSEGV, tsk); | ||
| 898 | return; | ||
| 899 | } | ||
| 900 | #endif | ||
| 1170 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 901 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ |
| 1171 | tsk->fpu_counter++; | 902 | tsk->fpu_counter++; |
| 1172 | } | 903 | } |
| 1173 | EXPORT_SYMBOL_GPL(math_state_restore); | 904 | EXPORT_SYMBOL_GPL(math_state_restore); |
| 1174 | 905 | ||
| 1175 | #ifndef CONFIG_MATH_EMULATION | 906 | #ifndef CONFIG_MATH_EMULATION |
| 1176 | |||
| 1177 | asmlinkage void math_emulate(long arg) | 907 | asmlinkage void math_emulate(long arg) |
| 1178 | { | 908 | { |
| 1179 | printk(KERN_EMERG | 909 | printk(KERN_EMERG |
| @@ -1182,12 +912,46 @@ asmlinkage void math_emulate(long arg) | |||
| 1182 | force_sig(SIGFPE, current); | 912 | force_sig(SIGFPE, current); |
| 1183 | schedule(); | 913 | schedule(); |
| 1184 | } | 914 | } |
| 1185 | |||
| 1186 | #endif /* CONFIG_MATH_EMULATION */ | 915 | #endif /* CONFIG_MATH_EMULATION */ |
| 1187 | 916 | ||
| 917 | dotraplinkage void __kprobes | ||
| 918 | do_device_not_available(struct pt_regs *regs, long error) | ||
| 919 | { | ||
| 920 | #ifdef CONFIG_X86_32 | ||
| 921 | if (read_cr0() & X86_CR0_EM) { | ||
| 922 | conditional_sti(regs); | ||
| 923 | math_emulate(0); | ||
| 924 | } else { | ||
| 925 | math_state_restore(); /* interrupts still off */ | ||
| 926 | conditional_sti(regs); | ||
| 927 | } | ||
| 928 | #else | ||
| 929 | math_state_restore(); | ||
| 930 | #endif | ||
| 931 | } | ||
| 932 | |||
| 933 | #ifdef CONFIG_X86_32 | ||
| 934 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | ||
| 935 | { | ||
| 936 | siginfo_t info; | ||
| 937 | local_irq_enable(); | ||
| 938 | |||
| 939 | info.si_signo = SIGILL; | ||
| 940 | info.si_errno = 0; | ||
| 941 | info.si_code = ILL_BADSTK; | ||
| 942 | info.si_addr = 0; | ||
| 943 | if (notify_die(DIE_TRAP, "iret exception", | ||
| 944 | regs, error_code, 32, SIGILL) == NOTIFY_STOP) | ||
| 945 | return; | ||
| 946 | do_trap(32, SIGILL, "iret exception", regs, error_code, &info); | ||
| 947 | } | ||
| 948 | #endif | ||
| 949 | |||
| 1188 | void __init trap_init(void) | 950 | void __init trap_init(void) |
| 1189 | { | 951 | { |
| 952 | #ifdef CONFIG_X86_32 | ||
| 1190 | int i; | 953 | int i; |
| 954 | #endif | ||
| 1191 | 955 | ||
| 1192 | #ifdef CONFIG_EISA | 956 | #ifdef CONFIG_EISA |
| 1193 | void __iomem *p = early_ioremap(0x0FFFD9, 4); | 957 | void __iomem *p = early_ioremap(0x0FFFD9, 4); |
| @@ -1197,29 +961,40 @@ void __init trap_init(void) | |||
| 1197 | early_iounmap(p, 4); | 961 | early_iounmap(p, 4); |
| 1198 | #endif | 962 | #endif |
| 1199 | 963 | ||
| 1200 | set_trap_gate(0, ÷_error); | 964 | set_intr_gate(0, ÷_error); |
| 1201 | set_intr_gate(1, &debug); | 965 | set_intr_gate_ist(1, &debug, DEBUG_STACK); |
| 1202 | set_intr_gate(2, &nmi); | 966 | set_intr_gate_ist(2, &nmi, NMI_STACK); |
| 1203 | set_system_intr_gate(3, &int3); /* int3 can be called from all */ | 967 | /* int3 can be called from all */ |
| 1204 | set_system_gate(4, &overflow); /* int4 can be called from all */ | 968 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); |
| 1205 | set_trap_gate(5, &bounds); | 969 | /* int4 can be called from all */ |
| 1206 | set_trap_gate(6, &invalid_op); | 970 | set_system_intr_gate(4, &overflow); |
| 1207 | set_trap_gate(7, &device_not_available); | 971 | set_intr_gate(5, &bounds); |
| 972 | set_intr_gate(6, &invalid_op); | ||
| 973 | set_intr_gate(7, &device_not_available); | ||
| 974 | #ifdef CONFIG_X86_32 | ||
| 1208 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); | 975 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); |
| 1209 | set_trap_gate(9, &coprocessor_segment_overrun); | 976 | #else |
| 1210 | set_trap_gate(10, &invalid_TSS); | 977 | set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); |
| 1211 | set_trap_gate(11, &segment_not_present); | 978 | #endif |
| 1212 | set_trap_gate(12, &stack_segment); | 979 | set_intr_gate(9, &coprocessor_segment_overrun); |
| 1213 | set_trap_gate(13, &general_protection); | 980 | set_intr_gate(10, &invalid_TSS); |
| 981 | set_intr_gate(11, &segment_not_present); | ||
| 982 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | ||
| 983 | set_intr_gate(13, &general_protection); | ||
| 1214 | set_intr_gate(14, &page_fault); | 984 | set_intr_gate(14, &page_fault); |
| 1215 | set_trap_gate(15, &spurious_interrupt_bug); | 985 | set_intr_gate(15, &spurious_interrupt_bug); |
| 1216 | set_trap_gate(16, &coprocessor_error); | 986 | set_intr_gate(16, &coprocessor_error); |
| 1217 | set_trap_gate(17, &alignment_check); | 987 | set_intr_gate(17, &alignment_check); |
| 1218 | #ifdef CONFIG_X86_MCE | 988 | #ifdef CONFIG_X86_MCE |
| 1219 | set_trap_gate(18, &machine_check); | 989 | set_intr_gate_ist(18, &machine_check, MCE_STACK); |
| 1220 | #endif | 990 | #endif |
| 1221 | set_trap_gate(19, &simd_coprocessor_error); | 991 | set_intr_gate(19, &simd_coprocessor_error); |
| 1222 | 992 | ||
| 993 | #ifdef CONFIG_IA32_EMULATION | ||
| 994 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | ||
| 995 | #endif | ||
| 996 | |||
| 997 | #ifdef CONFIG_X86_32 | ||
| 1223 | if (cpu_has_fxsr) { | 998 | if (cpu_has_fxsr) { |
| 1224 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | 999 | printk(KERN_INFO "Enabling fast FPU save and restore... "); |
| 1225 | set_in_cr4(X86_CR4_OSFXSR); | 1000 | set_in_cr4(X86_CR4_OSFXSR); |
| @@ -1232,37 +1007,20 @@ void __init trap_init(void) | |||
| 1232 | printk("done.\n"); | 1007 | printk("done.\n"); |
| 1233 | } | 1008 | } |
| 1234 | 1009 | ||
| 1235 | set_system_gate(SYSCALL_VECTOR, &system_call); | 1010 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
| 1236 | 1011 | ||
| 1237 | /* Reserve all the builtin and the syscall vector: */ | 1012 | /* Reserve all the builtin and the syscall vector: */ |
| 1238 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | 1013 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) |
| 1239 | set_bit(i, used_vectors); | 1014 | set_bit(i, used_vectors); |
| 1240 | 1015 | ||
| 1241 | set_bit(SYSCALL_VECTOR, used_vectors); | 1016 | set_bit(SYSCALL_VECTOR, used_vectors); |
| 1242 | 1017 | #endif | |
| 1243 | init_thread_xstate(); | ||
| 1244 | /* | 1018 | /* |
| 1245 | * Should be a barrier for any external CPU state: | 1019 | * Should be a barrier for any external CPU state: |
| 1246 | */ | 1020 | */ |
| 1247 | cpu_init(); | 1021 | cpu_init(); |
| 1248 | 1022 | ||
| 1023 | #ifdef CONFIG_X86_32 | ||
| 1249 | trap_init_hook(); | 1024 | trap_init_hook(); |
| 1025 | #endif | ||
| 1250 | } | 1026 | } |
| 1251 | |||
| 1252 | static int __init kstack_setup(char *s) | ||
| 1253 | { | ||
| 1254 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
| 1255 | |||
| 1256 | return 1; | ||
| 1257 | } | ||
| 1258 | __setup("kstack=", kstack_setup); | ||
| 1259 | |||
| 1260 | static int __init code_bytes_setup(char *s) | ||
| 1261 | { | ||
| 1262 | code_bytes = simple_strtoul(s, NULL, 0); | ||
| 1263 | if (code_bytes > 8192) | ||
| 1264 | code_bytes = 8192; | ||
| 1265 | |||
| 1266 | return 1; | ||
| 1267 | } | ||
| 1268 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c deleted file mode 100644 index 2696a6837782..000000000000 --- a/arch/x86/kernel/traps_64.c +++ /dev/null | |||
| @@ -1,1217 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | * | ||
| 5 | * Pentium III FXSR, SSE support | ||
| 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 7 | */ | ||
| 8 | |||
| 9 | /* | ||
| 10 | * 'Traps.c' handles hardware traps and faults after we have saved some | ||
| 11 | * state in 'entry.S'. | ||
| 12 | */ | ||
| 13 | #include <linux/moduleparam.h> | ||
| 14 | #include <linux/interrupt.h> | ||
| 15 | #include <linux/kallsyms.h> | ||
| 16 | #include <linux/spinlock.h> | ||
| 17 | #include <linux/kprobes.h> | ||
| 18 | #include <linux/uaccess.h> | ||
| 19 | #include <linux/utsname.h> | ||
| 20 | #include <linux/kdebug.h> | ||
| 21 | #include <linux/kernel.h> | ||
| 22 | #include <linux/module.h> | ||
| 23 | #include <linux/ptrace.h> | ||
| 24 | #include <linux/string.h> | ||
| 25 | #include <linux/unwind.h> | ||
| 26 | #include <linux/delay.h> | ||
| 27 | #include <linux/errno.h> | ||
| 28 | #include <linux/kexec.h> | ||
| 29 | #include <linux/sched.h> | ||
| 30 | #include <linux/timer.h> | ||
| 31 | #include <linux/init.h> | ||
| 32 | #include <linux/bug.h> | ||
| 33 | #include <linux/nmi.h> | ||
| 34 | #include <linux/mm.h> | ||
| 35 | |||
| 36 | #if defined(CONFIG_EDAC) | ||
| 37 | #include <linux/edac.h> | ||
| 38 | #endif | ||
| 39 | |||
| 40 | #include <asm/stacktrace.h> | ||
| 41 | #include <asm/processor.h> | ||
| 42 | #include <asm/debugreg.h> | ||
| 43 | #include <asm/atomic.h> | ||
| 44 | #include <asm/system.h> | ||
| 45 | #include <asm/unwind.h> | ||
| 46 | #include <asm/desc.h> | ||
| 47 | #include <asm/i387.h> | ||
| 48 | #include <asm/nmi.h> | ||
| 49 | #include <asm/smp.h> | ||
| 50 | #include <asm/io.h> | ||
| 51 | #include <asm/pgalloc.h> | ||
| 52 | #include <asm/proto.h> | ||
| 53 | #include <asm/pda.h> | ||
| 54 | |||
| 55 | #include <mach_traps.h> | ||
| 56 | |||
| 57 | asmlinkage void divide_error(void); | ||
| 58 | asmlinkage void debug(void); | ||
| 59 | asmlinkage void nmi(void); | ||
| 60 | asmlinkage void int3(void); | ||
| 61 | asmlinkage void overflow(void); | ||
| 62 | asmlinkage void bounds(void); | ||
| 63 | asmlinkage void invalid_op(void); | ||
| 64 | asmlinkage void device_not_available(void); | ||
| 65 | asmlinkage void double_fault(void); | ||
| 66 | asmlinkage void coprocessor_segment_overrun(void); | ||
| 67 | asmlinkage void invalid_TSS(void); | ||
| 68 | asmlinkage void segment_not_present(void); | ||
| 69 | asmlinkage void stack_segment(void); | ||
| 70 | asmlinkage void general_protection(void); | ||
| 71 | asmlinkage void page_fault(void); | ||
| 72 | asmlinkage void coprocessor_error(void); | ||
| 73 | asmlinkage void simd_coprocessor_error(void); | ||
| 74 | asmlinkage void alignment_check(void); | ||
| 75 | asmlinkage void spurious_interrupt_bug(void); | ||
| 76 | asmlinkage void machine_check(void); | ||
| 77 | |||
| 78 | int panic_on_unrecovered_nmi; | ||
| 79 | int kstack_depth_to_print = 12; | ||
| 80 | static unsigned int code_bytes = 64; | ||
| 81 | static int ignore_nmis; | ||
| 82 | static int die_counter; | ||
| 83 | |||
| 84 | static inline void conditional_sti(struct pt_regs *regs) | ||
| 85 | { | ||
| 86 | if (regs->flags & X86_EFLAGS_IF) | ||
| 87 | local_irq_enable(); | ||
| 88 | } | ||
| 89 | |||
| 90 | static inline void preempt_conditional_sti(struct pt_regs *regs) | ||
| 91 | { | ||
| 92 | inc_preempt_count(); | ||
| 93 | if (regs->flags & X86_EFLAGS_IF) | ||
| 94 | local_irq_enable(); | ||
| 95 | } | ||
| 96 | |||
| 97 | static inline void preempt_conditional_cli(struct pt_regs *regs) | ||
| 98 | { | ||
| 99 | if (regs->flags & X86_EFLAGS_IF) | ||
| 100 | local_irq_disable(); | ||
| 101 | /* Make sure to not schedule here because we could be running | ||
| 102 | on an exception stack. */ | ||
| 103 | dec_preempt_count(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void printk_address(unsigned long address, int reliable) | ||
| 107 | { | ||
| 108 | printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); | ||
| 109 | } | ||
| 110 | |||
| 111 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | ||
| 112 | unsigned *usedp, char **idp) | ||
| 113 | { | ||
| 114 | static char ids[][8] = { | ||
| 115 | [DEBUG_STACK - 1] = "#DB", | ||
| 116 | [NMI_STACK - 1] = "NMI", | ||
| 117 | [DOUBLEFAULT_STACK - 1] = "#DF", | ||
| 118 | [STACKFAULT_STACK - 1] = "#SS", | ||
| 119 | [MCE_STACK - 1] = "#MC", | ||
| 120 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
| 121 | [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
| 122 | #endif | ||
| 123 | }; | ||
| 124 | unsigned k; | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Iterate over all exception stacks, and figure out whether | ||
| 128 | * 'stack' is in one of them: | ||
| 129 | */ | ||
| 130 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | ||
| 131 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; | ||
| 132 | /* | ||
| 133 | * Is 'stack' above this exception frame's end? | ||
| 134 | * If yes then skip to the next frame. | ||
| 135 | */ | ||
| 136 | if (stack >= end) | ||
| 137 | continue; | ||
| 138 | /* | ||
| 139 | * Is 'stack' above this exception frame's start address? | ||
| 140 | * If yes then we found the right frame. | ||
| 141 | */ | ||
| 142 | if (stack >= end - EXCEPTION_STKSZ) { | ||
| 143 | /* | ||
| 144 | * Make sure we only iterate through an exception | ||
| 145 | * stack once. If it comes up for the second time | ||
| 146 | * then there's something wrong going on - just | ||
| 147 | * break out and return NULL: | ||
| 148 | */ | ||
| 149 | if (*usedp & (1U << k)) | ||
| 150 | break; | ||
| 151 | *usedp |= 1U << k; | ||
| 152 | *idp = ids[k]; | ||
| 153 | return (unsigned long *)end; | ||
| 154 | } | ||
| 155 | /* | ||
| 156 | * If this is a debug stack, and if it has a larger size than | ||
| 157 | * the usual exception stacks, then 'stack' might still | ||
| 158 | * be within the lower portion of the debug stack: | ||
| 159 | */ | ||
| 160 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
| 161 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
| 162 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Black magic. A large debug stack is composed of | ||
| 166 | * multiple exception stack entries, which we | ||
| 167 | * iterate through now. Dont look: | ||
| 168 | */ | ||
| 169 | do { | ||
| 170 | ++j; | ||
| 171 | end -= EXCEPTION_STKSZ; | ||
| 172 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | ||
| 173 | } while (stack < end - EXCEPTION_STKSZ); | ||
| 174 | if (*usedp & (1U << j)) | ||
| 175 | break; | ||
| 176 | *usedp |= 1U << j; | ||
| 177 | *idp = ids[j]; | ||
| 178 | return (unsigned long *)end; | ||
| 179 | } | ||
| 180 | #endif | ||
| 181 | } | ||
| 182 | return NULL; | ||
| 183 | } | ||
| 184 | |||
| 185 | /* | ||
| 186 | * x86-64 can have up to three kernel stacks: | ||
| 187 | * process stack | ||
| 188 | * interrupt stack | ||
| 189 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
| 190 | */ | ||
| 191 | |||
| 192 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
| 193 | void *p, unsigned int size, void *end) | ||
| 194 | { | ||
| 195 | void *t = tinfo; | ||
| 196 | if (end) { | ||
| 197 | if (p < end && p >= (end-THREAD_SIZE)) | ||
| 198 | return 1; | ||
| 199 | else | ||
| 200 | return 0; | ||
| 201 | } | ||
| 202 | return p > t && p < t + THREAD_SIZE - size; | ||
| 203 | } | ||
| 204 | |||
| 205 | /* The form of the top of the frame on the stack */ | ||
| 206 | struct stack_frame { | ||
| 207 | struct stack_frame *next_frame; | ||
| 208 | unsigned long return_address; | ||
| 209 | }; | ||
| 210 | |||
| 211 | static inline unsigned long | ||
| 212 | print_context_stack(struct thread_info *tinfo, | ||
| 213 | unsigned long *stack, unsigned long bp, | ||
| 214 | const struct stacktrace_ops *ops, void *data, | ||
| 215 | unsigned long *end) | ||
| 216 | { | ||
| 217 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
| 218 | |||
| 219 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
| 220 | unsigned long addr; | ||
| 221 | |||
| 222 | addr = *stack; | ||
| 223 | if (__kernel_text_address(addr)) { | ||
| 224 | if ((unsigned long) stack == bp + 8) { | ||
| 225 | ops->address(data, addr, 1); | ||
| 226 | frame = frame->next_frame; | ||
| 227 | bp = (unsigned long) frame; | ||
| 228 | } else { | ||
| 229 | ops->address(data, addr, bp == 0); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | stack++; | ||
| 233 | } | ||
| 234 | return bp; | ||
| 235 | } | ||
| 236 | |||
| 237 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 238 | unsigned long *stack, unsigned long bp, | ||
| 239 | const struct stacktrace_ops *ops, void *data) | ||
| 240 | { | ||
| 241 | const unsigned cpu = get_cpu(); | ||
| 242 | unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; | ||
| 243 | unsigned used = 0; | ||
| 244 | struct thread_info *tinfo; | ||
| 245 | |||
| 246 | if (!task) | ||
| 247 | task = current; | ||
| 248 | |||
| 249 | if (!stack) { | ||
| 250 | unsigned long dummy; | ||
| 251 | stack = &dummy; | ||
| 252 | if (task && task != current) | ||
| 253 | stack = (unsigned long *)task->thread.sp; | ||
| 254 | } | ||
| 255 | |||
| 256 | #ifdef CONFIG_FRAME_POINTER | ||
| 257 | if (!bp) { | ||
| 258 | if (task == current) { | ||
| 259 | /* Grab bp right from our regs */ | ||
| 260 | asm("movq %%rbp, %0" : "=r" (bp) :); | ||
| 261 | } else { | ||
| 262 | /* bp is the last reg pushed by switch_to */ | ||
| 263 | bp = *(unsigned long *) task->thread.sp; | ||
| 264 | } | ||
| 265 | } | ||
| 266 | #endif | ||
| 267 | |||
| 268 | /* | ||
| 269 | * Print function call entries in all stacks, starting at the | ||
| 270 | * current stack address. If the stacks consist of nested | ||
| 271 | * exceptions | ||
| 272 | */ | ||
| 273 | tinfo = task_thread_info(task); | ||
| 274 | for (;;) { | ||
| 275 | char *id; | ||
| 276 | unsigned long *estack_end; | ||
| 277 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | ||
| 278 | &used, &id); | ||
| 279 | |||
| 280 | if (estack_end) { | ||
| 281 | if (ops->stack(data, id) < 0) | ||
| 282 | break; | ||
| 283 | |||
| 284 | bp = print_context_stack(tinfo, stack, bp, ops, | ||
| 285 | data, estack_end); | ||
| 286 | ops->stack(data, "<EOE>"); | ||
| 287 | /* | ||
| 288 | * We link to the next stack via the | ||
| 289 | * second-to-last pointer (index -2 to end) in the | ||
| 290 | * exception stack: | ||
| 291 | */ | ||
| 292 | stack = (unsigned long *) estack_end[-2]; | ||
| 293 | continue; | ||
| 294 | } | ||
| 295 | if (irqstack_end) { | ||
| 296 | unsigned long *irqstack; | ||
| 297 | irqstack = irqstack_end - | ||
| 298 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | ||
| 299 | |||
| 300 | if (stack >= irqstack && stack < irqstack_end) { | ||
| 301 | if (ops->stack(data, "IRQ") < 0) | ||
| 302 | break; | ||
| 303 | bp = print_context_stack(tinfo, stack, bp, | ||
| 304 | ops, data, irqstack_end); | ||
| 305 | /* | ||
| 306 | * We link to the next stack (which would be | ||
| 307 | * the process stack normally) the last | ||
| 308 | * pointer (index -1 to end) in the IRQ stack: | ||
| 309 | */ | ||
| 310 | stack = (unsigned long *) (irqstack_end[-1]); | ||
| 311 | irqstack_end = NULL; | ||
| 312 | ops->stack(data, "EOI"); | ||
| 313 | continue; | ||
| 314 | } | ||
| 315 | } | ||
| 316 | break; | ||
| 317 | } | ||
| 318 | |||
| 319 | /* | ||
| 320 | * This handles the process stack: | ||
| 321 | */ | ||
| 322 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | ||
| 323 | put_cpu(); | ||
| 324 | } | ||
| 325 | EXPORT_SYMBOL(dump_trace); | ||
| 326 | |||
| 327 | static void | ||
| 328 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
| 329 | { | ||
| 330 | print_symbol(msg, symbol); | ||
| 331 | printk("\n"); | ||
| 332 | } | ||
| 333 | |||
| 334 | static void print_trace_warning(void *data, char *msg) | ||
| 335 | { | ||
| 336 | printk("%s\n", msg); | ||
| 337 | } | ||
| 338 | |||
| 339 | static int print_trace_stack(void *data, char *name) | ||
| 340 | { | ||
| 341 | printk(" <%s> ", name); | ||
| 342 | return 0; | ||
| 343 | } | ||
| 344 | |||
| 345 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
| 346 | { | ||
| 347 | touch_nmi_watchdog(); | ||
| 348 | printk_address(addr, reliable); | ||
| 349 | } | ||
| 350 | |||
| 351 | static const struct stacktrace_ops print_trace_ops = { | ||
| 352 | .warning = print_trace_warning, | ||
| 353 | .warning_symbol = print_trace_warning_symbol, | ||
| 354 | .stack = print_trace_stack, | ||
| 355 | .address = print_trace_address, | ||
| 356 | }; | ||
| 357 | |||
| 358 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
| 359 | unsigned long *stack, unsigned long bp) | ||
| 360 | { | ||
| 361 | printk("\nCall Trace:\n"); | ||
| 362 | dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); | ||
| 363 | printk("\n"); | ||
| 364 | } | ||
| 365 | |||
| 366 | static void | ||
| 367 | _show_stack(struct task_struct *task, struct pt_regs *regs, | ||
| 368 | unsigned long *sp, unsigned long bp) | ||
| 369 | { | ||
| 370 | unsigned long *stack; | ||
| 371 | int i; | ||
| 372 | const int cpu = smp_processor_id(); | ||
| 373 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); | ||
| 374 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | ||
| 375 | |||
| 376 | // debugging aid: "show_stack(NULL, NULL);" prints the | ||
| 377 | // back trace for this cpu. | ||
| 378 | |||
| 379 | if (sp == NULL) { | ||
| 380 | if (task) | ||
| 381 | sp = (unsigned long *)task->thread.sp; | ||
| 382 | else | ||
| 383 | sp = (unsigned long *)&sp; | ||
| 384 | } | ||
| 385 | |||
| 386 | stack = sp; | ||
| 387 | for (i = 0; i < kstack_depth_to_print; i++) { | ||
| 388 | if (stack >= irqstack && stack <= irqstack_end) { | ||
| 389 | if (stack == irqstack_end) { | ||
| 390 | stack = (unsigned long *) (irqstack_end[-1]); | ||
| 391 | printk(" <EOI> "); | ||
| 392 | } | ||
| 393 | } else { | ||
| 394 | if (((long) stack & (THREAD_SIZE-1)) == 0) | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | if (i && ((i % 4) == 0)) | ||
| 398 | printk("\n"); | ||
| 399 | printk(" %016lx", *stack++); | ||
| 400 | touch_nmi_watchdog(); | ||
| 401 | } | ||
| 402 | show_trace(task, regs, sp, bp); | ||
| 403 | } | ||
| 404 | |||
| 405 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
| 406 | { | ||
| 407 | _show_stack(task, NULL, sp, 0); | ||
| 408 | } | ||
| 409 | |||
| 410 | /* | ||
| 411 | * The architecture-independent dump_stack generator | ||
| 412 | */ | ||
| 413 | void dump_stack(void) | ||
| 414 | { | ||
| 415 | unsigned long bp = 0; | ||
| 416 | unsigned long stack; | ||
| 417 | |||
| 418 | #ifdef CONFIG_FRAME_POINTER | ||
| 419 | if (!bp) | ||
| 420 | asm("movq %%rbp, %0" : "=r" (bp):); | ||
| 421 | #endif | ||
| 422 | |||
| 423 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
| 424 | current->pid, current->comm, print_tainted(), | ||
| 425 | init_utsname()->release, | ||
| 426 | (int)strcspn(init_utsname()->version, " "), | ||
| 427 | init_utsname()->version); | ||
| 428 | show_trace(NULL, NULL, &stack, bp); | ||
| 429 | } | ||
| 430 | |||
| 431 | EXPORT_SYMBOL(dump_stack); | ||
| 432 | |||
| 433 | void show_registers(struct pt_regs *regs) | ||
| 434 | { | ||
| 435 | int i; | ||
| 436 | unsigned long sp; | ||
| 437 | const int cpu = smp_processor_id(); | ||
| 438 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | ||
| 439 | |||
| 440 | sp = regs->sp; | ||
| 441 | printk("CPU %d ", cpu); | ||
| 442 | __show_regs(regs); | ||
| 443 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | ||
| 444 | cur->comm, cur->pid, task_thread_info(cur), cur); | ||
| 445 | |||
| 446 | /* | ||
| 447 | * When in-kernel, we also print out the stack and code at the | ||
| 448 | * time of the fault.. | ||
| 449 | */ | ||
| 450 | if (!user_mode(regs)) { | ||
| 451 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
| 452 | unsigned int code_len = code_bytes; | ||
| 453 | unsigned char c; | ||
| 454 | u8 *ip; | ||
| 455 | |||
| 456 | printk("Stack: "); | ||
| 457 | _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); | ||
| 458 | printk("\n"); | ||
| 459 | |||
| 460 | printk(KERN_EMERG "Code: "); | ||
| 461 | |||
| 462 | ip = (u8 *)regs->ip - code_prologue; | ||
| 463 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | ||
| 464 | /* try starting at RIP */ | ||
| 465 | ip = (u8 *)regs->ip; | ||
| 466 | code_len = code_len - code_prologue + 1; | ||
| 467 | } | ||
| 468 | for (i = 0; i < code_len; i++, ip++) { | ||
| 469 | if (ip < (u8 *)PAGE_OFFSET || | ||
| 470 | probe_kernel_address(ip, c)) { | ||
| 471 | printk(" Bad RIP value."); | ||
| 472 | break; | ||
| 473 | } | ||
| 474 | if (ip == (u8 *)regs->ip) | ||
| 475 | printk("<%02x> ", c); | ||
| 476 | else | ||
| 477 | printk("%02x ", c); | ||
| 478 | } | ||
| 479 | } | ||
| 480 | printk("\n"); | ||
| 481 | } | ||
| 482 | |||
| 483 | int is_valid_bugaddr(unsigned long ip) | ||
| 484 | { | ||
| 485 | unsigned short ud2; | ||
| 486 | |||
| 487 | if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) | ||
| 488 | return 0; | ||
| 489 | |||
| 490 | return ud2 == 0x0b0f; | ||
| 491 | } | ||
| 492 | |||
| 493 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
| 494 | static int die_owner = -1; | ||
| 495 | static unsigned int die_nest_count; | ||
| 496 | |||
| 497 | unsigned __kprobes long oops_begin(void) | ||
| 498 | { | ||
| 499 | int cpu; | ||
| 500 | unsigned long flags; | ||
| 501 | |||
| 502 | oops_enter(); | ||
| 503 | |||
| 504 | /* racy, but better than risking deadlock. */ | ||
| 505 | raw_local_irq_save(flags); | ||
| 506 | cpu = smp_processor_id(); | ||
| 507 | if (!__raw_spin_trylock(&die_lock)) { | ||
| 508 | if (cpu == die_owner) | ||
| 509 | /* nested oops. should stop eventually */; | ||
| 510 | else | ||
| 511 | __raw_spin_lock(&die_lock); | ||
| 512 | } | ||
| 513 | die_nest_count++; | ||
| 514 | die_owner = cpu; | ||
| 515 | console_verbose(); | ||
| 516 | bust_spinlocks(1); | ||
| 517 | return flags; | ||
| 518 | } | ||
| 519 | |||
| 520 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
| 521 | { | ||
| 522 | die_owner = -1; | ||
| 523 | bust_spinlocks(0); | ||
| 524 | die_nest_count--; | ||
| 525 | if (!die_nest_count) | ||
| 526 | /* Nest count reaches zero, release the lock. */ | ||
| 527 | __raw_spin_unlock(&die_lock); | ||
| 528 | raw_local_irq_restore(flags); | ||
| 529 | if (!regs) { | ||
| 530 | oops_exit(); | ||
| 531 | return; | ||
| 532 | } | ||
| 533 | if (panic_on_oops) | ||
| 534 | panic("Fatal exception"); | ||
| 535 | oops_exit(); | ||
| 536 | do_exit(signr); | ||
| 537 | } | ||
| 538 | |||
| 539 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
| 540 | { | ||
| 541 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); | ||
| 542 | #ifdef CONFIG_PREEMPT | ||
| 543 | printk("PREEMPT "); | ||
| 544 | #endif | ||
| 545 | #ifdef CONFIG_SMP | ||
| 546 | printk("SMP "); | ||
| 547 | #endif | ||
| 548 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
| 549 | printk("DEBUG_PAGEALLOC"); | ||
| 550 | #endif | ||
| 551 | printk("\n"); | ||
| 552 | if (notify_die(DIE_OOPS, str, regs, err, | ||
| 553 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
| 554 | return 1; | ||
| 555 | |||
| 556 | show_registers(regs); | ||
| 557 | add_taint(TAINT_DIE); | ||
| 558 | /* Executive summary in case the oops scrolled away */ | ||
| 559 | printk(KERN_ALERT "RIP "); | ||
| 560 | printk_address(regs->ip, 1); | ||
| 561 | printk(" RSP <%016lx>\n", regs->sp); | ||
| 562 | if (kexec_should_crash(current)) | ||
| 563 | crash_kexec(regs); | ||
| 564 | return 0; | ||
| 565 | } | ||
| 566 | |||
| 567 | void die(const char *str, struct pt_regs *regs, long err) | ||
| 568 | { | ||
| 569 | unsigned long flags = oops_begin(); | ||
| 570 | |||
| 571 | if (!user_mode(regs)) | ||
| 572 | report_bug(regs->ip, regs); | ||
| 573 | |||
| 574 | if (__die(str, regs, err)) | ||
| 575 | regs = NULL; | ||
| 576 | oops_end(flags, regs, SIGSEGV); | ||
| 577 | } | ||
| 578 | |||
| 579 | notrace __kprobes void | ||
| 580 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
| 581 | { | ||
| 582 | unsigned long flags; | ||
| 583 | |||
| 584 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 585 | return; | ||
| 586 | |||
| 587 | flags = oops_begin(); | ||
| 588 | /* | ||
| 589 | * We are in trouble anyway, lets at least try | ||
| 590 | * to get a message out. | ||
| 591 | */ | ||
| 592 | printk(KERN_EMERG "%s", str); | ||
| 593 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
| 594 | smp_processor_id(), regs->ip); | ||
| 595 | show_registers(regs); | ||
| 596 | if (kexec_should_crash(current)) | ||
| 597 | crash_kexec(regs); | ||
| 598 | if (do_panic || panic_on_oops) | ||
| 599 | panic("Non maskable interrupt"); | ||
| 600 | oops_end(flags, NULL, SIGBUS); | ||
| 601 | nmi_exit(); | ||
| 602 | local_irq_enable(); | ||
| 603 | do_exit(SIGBUS); | ||
| 604 | } | ||
| 605 | |||
| 606 | static void __kprobes | ||
| 607 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | ||
| 608 | long error_code, siginfo_t *info) | ||
| 609 | { | ||
| 610 | struct task_struct *tsk = current; | ||
| 611 | |||
| 612 | if (!user_mode(regs)) | ||
| 613 | goto kernel_trap; | ||
| 614 | |||
| 615 | /* | ||
| 616 | * We want error_code and trap_no set for userspace faults and | ||
| 617 | * kernelspace faults which result in die(), but not | ||
| 618 | * kernelspace faults which are fixed up. die() gives the | ||
| 619 | * process no chance to handle the signal and notice the | ||
| 620 | * kernel fault information, so that won't result in polluting | ||
| 621 | * the information about previously queued, but not yet | ||
| 622 | * delivered, faults. See also do_general_protection below. | ||
| 623 | */ | ||
| 624 | tsk->thread.error_code = error_code; | ||
| 625 | tsk->thread.trap_no = trapnr; | ||
| 626 | |||
| 627 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
| 628 | printk_ratelimit()) { | ||
| 629 | printk(KERN_INFO | ||
| 630 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
| 631 | tsk->comm, tsk->pid, str, | ||
| 632 | regs->ip, regs->sp, error_code); | ||
| 633 | print_vma_addr(" in ", regs->ip); | ||
| 634 | printk("\n"); | ||
| 635 | } | ||
| 636 | |||
| 637 | if (info) | ||
| 638 | force_sig_info(signr, info, tsk); | ||
| 639 | else | ||
| 640 | force_sig(signr, tsk); | ||
| 641 | return; | ||
| 642 | |||
| 643 | kernel_trap: | ||
| 644 | if (!fixup_exception(regs)) { | ||
| 645 | tsk->thread.error_code = error_code; | ||
| 646 | tsk->thread.trap_no = trapnr; | ||
| 647 | die(str, regs, error_code); | ||
| 648 | } | ||
| 649 | return; | ||
| 650 | } | ||
| 651 | |||
| 652 | #define DO_ERROR(trapnr, signr, str, name) \ | ||
| 653 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | ||
| 654 | { \ | ||
| 655 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
| 656 | == NOTIFY_STOP) \ | ||
| 657 | return; \ | ||
| 658 | conditional_sti(regs); \ | ||
| 659 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | ||
| 660 | } | ||
| 661 | |||
| 662 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | ||
| 663 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | ||
| 664 | { \ | ||
| 665 | siginfo_t info; \ | ||
| 666 | info.si_signo = signr; \ | ||
| 667 | info.si_errno = 0; \ | ||
| 668 | info.si_code = sicode; \ | ||
| 669 | info.si_addr = (void __user *)siaddr; \ | ||
| 670 | trace_hardirqs_fixup(); \ | ||
| 671 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
| 672 | == NOTIFY_STOP) \ | ||
| 673 | return; \ | ||
| 674 | conditional_sti(regs); \ | ||
| 675 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
| 676 | } | ||
| 677 | |||
| 678 | DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | ||
| 679 | DO_ERROR(4, SIGSEGV, "overflow", overflow) | ||
| 680 | DO_ERROR(5, SIGSEGV, "bounds", bounds) | ||
| 681 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | ||
| 682 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | ||
| 683 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | ||
| 684 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | ||
| 685 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | ||
| 686 | |||
| 687 | /* Runs on IST stack */ | ||
| 688 | asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
| 689 | { | ||
| 690 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
| 691 | 12, SIGBUS) == NOTIFY_STOP) | ||
| 692 | return; | ||
| 693 | preempt_conditional_sti(regs); | ||
| 694 | do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); | ||
| 695 | preempt_conditional_cli(regs); | ||
| 696 | } | ||
| 697 | |||
| 698 | asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) | ||
| 699 | { | ||
| 700 | static const char str[] = "double fault"; | ||
| 701 | struct task_struct *tsk = current; | ||
| 702 | |||
| 703 | /* Return not checked because double check cannot be ignored */ | ||
| 704 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | ||
| 705 | |||
| 706 | tsk->thread.error_code = error_code; | ||
| 707 | tsk->thread.trap_no = 8; | ||
| 708 | |||
| 709 | /* This is always a kernel trap and never fixable (and thus must | ||
| 710 | never return). */ | ||
| 711 | for (;;) | ||
| 712 | die(str, regs, error_code); | ||
| 713 | } | ||
| 714 | |||
| 715 | asmlinkage void __kprobes | ||
| 716 | do_general_protection(struct pt_regs *regs, long error_code) | ||
| 717 | { | ||
| 718 | struct task_struct *tsk; | ||
| 719 | |||
| 720 | conditional_sti(regs); | ||
| 721 | |||
| 722 | tsk = current; | ||
| 723 | if (!user_mode(regs)) | ||
| 724 | goto gp_in_kernel; | ||
| 725 | |||
| 726 | tsk->thread.error_code = error_code; | ||
| 727 | tsk->thread.trap_no = 13; | ||
| 728 | |||
| 729 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
| 730 | printk_ratelimit()) { | ||
| 731 | printk(KERN_INFO | ||
| 732 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
| 733 | tsk->comm, tsk->pid, | ||
| 734 | regs->ip, regs->sp, error_code); | ||
| 735 | print_vma_addr(" in ", regs->ip); | ||
| 736 | printk("\n"); | ||
| 737 | } | ||
| 738 | |||
| 739 | force_sig(SIGSEGV, tsk); | ||
| 740 | return; | ||
| 741 | |||
| 742 | gp_in_kernel: | ||
| 743 | if (fixup_exception(regs)) | ||
| 744 | return; | ||
| 745 | |||
| 746 | tsk->thread.error_code = error_code; | ||
| 747 | tsk->thread.trap_no = 13; | ||
| 748 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
| 749 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | ||
| 750 | return; | ||
| 751 | die("general protection fault", regs, error_code); | ||
| 752 | } | ||
| 753 | |||
| 754 | static notrace __kprobes void | ||
| 755 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | ||
| 756 | { | ||
| 757 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | ||
| 758 | reason); | ||
| 759 | printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | ||
| 760 | |||
| 761 | #if defined(CONFIG_EDAC) | ||
| 762 | if (edac_handler_set()) { | ||
| 763 | edac_atomic_assert_error(); | ||
| 764 | return; | ||
| 765 | } | ||
| 766 | #endif | ||
| 767 | |||
| 768 | if (panic_on_unrecovered_nmi) | ||
| 769 | panic("NMI: Not continuing"); | ||
| 770 | |||
| 771 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
| 772 | |||
| 773 | /* Clear and disable the memory parity error line. */ | ||
| 774 | reason = (reason & 0xf) | 4; | ||
| 775 | outb(reason, 0x61); | ||
| 776 | } | ||
| 777 | |||
| 778 | static notrace __kprobes void | ||
| 779 | io_check_error(unsigned char reason, struct pt_regs *regs) | ||
| 780 | { | ||
| 781 | printk("NMI: IOCK error (debug interrupt?)\n"); | ||
| 782 | show_registers(regs); | ||
| 783 | |||
| 784 | /* Re-enable the IOCK line, wait for a few seconds */ | ||
| 785 | reason = (reason & 0xf) | 8; | ||
| 786 | outb(reason, 0x61); | ||
| 787 | mdelay(2000); | ||
| 788 | reason &= ~8; | ||
| 789 | outb(reason, 0x61); | ||
| 790 | } | ||
| 791 | |||
| 792 | static notrace __kprobes void | ||
| 793 | unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | ||
| 794 | { | ||
| 795 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | ||
| 796 | return; | ||
| 797 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | ||
| 798 | reason); | ||
| 799 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | ||
| 800 | |||
| 801 | if (panic_on_unrecovered_nmi) | ||
| 802 | panic("NMI: Not continuing"); | ||
| 803 | |||
| 804 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
| 805 | } | ||
| 806 | |||
| 807 | /* Runs on IST stack. This code must keep interrupts off all the time. | ||
| 808 | Nested NMIs are prevented by the CPU. */ | ||
| 809 | asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) | ||
| 810 | { | ||
| 811 | unsigned char reason = 0; | ||
| 812 | int cpu; | ||
| 813 | |||
| 814 | cpu = smp_processor_id(); | ||
| 815 | |||
| 816 | /* Only the BSP gets external NMIs from the system. */ | ||
| 817 | if (!cpu) | ||
| 818 | reason = get_nmi_reason(); | ||
| 819 | |||
| 820 | if (!(reason & 0xc0)) { | ||
| 821 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | ||
| 822 | == NOTIFY_STOP) | ||
| 823 | return; | ||
| 824 | /* | ||
| 825 | * Ok, so this is none of the documented NMI sources, | ||
| 826 | * so it must be the NMI watchdog. | ||
| 827 | */ | ||
| 828 | if (nmi_watchdog_tick(regs, reason)) | ||
| 829 | return; | ||
| 830 | if (!do_nmi_callback(regs, cpu)) | ||
| 831 | unknown_nmi_error(reason, regs); | ||
| 832 | |||
| 833 | return; | ||
| 834 | } | ||
| 835 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | ||
| 836 | return; | ||
| 837 | |||
| 838 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | ||
| 839 | if (reason & 0x80) | ||
| 840 | mem_parity_error(reason, regs); | ||
| 841 | if (reason & 0x40) | ||
| 842 | io_check_error(reason, regs); | ||
| 843 | } | ||
| 844 | |||
| 845 | asmlinkage notrace __kprobes void | ||
| 846 | do_nmi(struct pt_regs *regs, long error_code) | ||
| 847 | { | ||
| 848 | nmi_enter(); | ||
| 849 | |||
| 850 | add_pda(__nmi_count, 1); | ||
| 851 | |||
| 852 | if (!ignore_nmis) | ||
| 853 | default_do_nmi(regs); | ||
| 854 | |||
| 855 | nmi_exit(); | ||
| 856 | } | ||
| 857 | |||
| 858 | void stop_nmi(void) | ||
| 859 | { | ||
| 860 | acpi_nmi_disable(); | ||
| 861 | ignore_nmis++; | ||
| 862 | } | ||
| 863 | |||
| 864 | void restart_nmi(void) | ||
| 865 | { | ||
| 866 | ignore_nmis--; | ||
| 867 | acpi_nmi_enable(); | ||
| 868 | } | ||
| 869 | |||
| 870 | /* runs on IST stack. */ | ||
| 871 | asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | ||
| 872 | { | ||
| 873 | trace_hardirqs_fixup(); | ||
| 874 | |||
| 875 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | ||
| 876 | == NOTIFY_STOP) | ||
| 877 | return; | ||
| 878 | |||
| 879 | preempt_conditional_sti(regs); | ||
| 880 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | ||
| 881 | preempt_conditional_cli(regs); | ||
| 882 | } | ||
| 883 | |||
| 884 | /* Help handler running on IST stack to switch back to user stack | ||
| 885 | for scheduling or signal handling. The actual stack switch is done in | ||
| 886 | entry.S */ | ||
| 887 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | ||
| 888 | { | ||
| 889 | struct pt_regs *regs = eregs; | ||
| 890 | /* Did already sync */ | ||
| 891 | if (eregs == (struct pt_regs *)eregs->sp) | ||
| 892 | ; | ||
| 893 | /* Exception from user space */ | ||
| 894 | else if (user_mode(eregs)) | ||
| 895 | regs = task_pt_regs(current); | ||
| 896 | /* Exception from kernel and interrupts are enabled. Move to | ||
| 897 | kernel process stack. */ | ||
| 898 | else if (eregs->flags & X86_EFLAGS_IF) | ||
| 899 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
| 900 | if (eregs != regs) | ||
| 901 | *regs = *eregs; | ||
| 902 | return regs; | ||
| 903 | } | ||
| 904 | |||
| 905 | /* runs on IST stack. */ | ||
| 906 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, | ||
| 907 | unsigned long error_code) | ||
| 908 | { | ||
| 909 | struct task_struct *tsk = current; | ||
| 910 | unsigned long condition; | ||
| 911 | siginfo_t info; | ||
| 912 | |||
| 913 | trace_hardirqs_fixup(); | ||
| 914 | |||
| 915 | get_debugreg(condition, 6); | ||
| 916 | |||
| 917 | /* | ||
| 918 | * The processor cleared BTF, so don't mark that we need it set. | ||
| 919 | */ | ||
| 920 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | ||
| 921 | tsk->thread.debugctlmsr = 0; | ||
| 922 | |||
| 923 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | ||
| 924 | SIGTRAP) == NOTIFY_STOP) | ||
| 925 | return; | ||
| 926 | |||
| 927 | preempt_conditional_sti(regs); | ||
| 928 | |||
| 929 | /* Mask out spurious debug traps due to lazy DR7 setting */ | ||
| 930 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | ||
| 931 | if (!tsk->thread.debugreg7) | ||
| 932 | goto clear_dr7; | ||
| 933 | } | ||
| 934 | |||
| 935 | tsk->thread.debugreg6 = condition; | ||
| 936 | |||
| 937 | /* | ||
| 938 | * Single-stepping through TF: make sure we ignore any events in | ||
| 939 | * kernel space (but re-enable TF when returning to user mode). | ||
| 940 | */ | ||
| 941 | if (condition & DR_STEP) { | ||
| 942 | if (!user_mode(regs)) | ||
| 943 | goto clear_TF_reenable; | ||
| 944 | } | ||
| 945 | |||
| 946 | /* Ok, finally something we can handle */ | ||
| 947 | tsk->thread.trap_no = 1; | ||
| 948 | tsk->thread.error_code = error_code; | ||
| 949 | info.si_signo = SIGTRAP; | ||
| 950 | info.si_errno = 0; | ||
| 951 | info.si_code = TRAP_BRKPT; | ||
| 952 | info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; | ||
| 953 | force_sig_info(SIGTRAP, &info, tsk); | ||
| 954 | |||
| 955 | clear_dr7: | ||
| 956 | set_debugreg(0, 7); | ||
| 957 | preempt_conditional_cli(regs); | ||
| 958 | return; | ||
| 959 | |||
| 960 | clear_TF_reenable: | ||
| 961 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
| 962 | regs->flags &= ~X86_EFLAGS_TF; | ||
| 963 | preempt_conditional_cli(regs); | ||
| 964 | return; | ||
| 965 | } | ||
| 966 | |||
| 967 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
| 968 | { | ||
| 969 | if (fixup_exception(regs)) | ||
| 970 | return 1; | ||
| 971 | |||
| 972 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
| 973 | /* Illegal floating point operation in the kernel */ | ||
| 974 | current->thread.trap_no = trapnr; | ||
| 975 | die(str, regs, 0); | ||
| 976 | return 0; | ||
| 977 | } | ||
| 978 | |||
| 979 | /* | ||
| 980 | * Note that we play around with the 'TS' bit in an attempt to get | ||
| 981 | * the correct behaviour even in the presence of the asynchronous | ||
| 982 | * IRQ13 behaviour | ||
| 983 | */ | ||
| 984 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) | ||
| 985 | { | ||
| 986 | void __user *ip = (void __user *)(regs->ip); | ||
| 987 | struct task_struct *task; | ||
| 988 | siginfo_t info; | ||
| 989 | unsigned short cwd, swd; | ||
| 990 | |||
| 991 | conditional_sti(regs); | ||
| 992 | if (!user_mode(regs) && | ||
| 993 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
| 994 | return; | ||
| 995 | |||
| 996 | /* | ||
| 997 | * Save the info for the exception handler and clear the error. | ||
| 998 | */ | ||
| 999 | task = current; | ||
| 1000 | save_init_fpu(task); | ||
| 1001 | task->thread.trap_no = 16; | ||
| 1002 | task->thread.error_code = 0; | ||
| 1003 | info.si_signo = SIGFPE; | ||
| 1004 | info.si_errno = 0; | ||
| 1005 | info.si_code = __SI_FAULT; | ||
| 1006 | info.si_addr = ip; | ||
| 1007 | /* | ||
| 1008 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | ||
| 1009 | * status. 0x3f is the exception bits in these regs, 0x200 is the | ||
| 1010 | * C1 reg you need in case of a stack fault, 0x040 is the stack | ||
| 1011 | * fault bit. We should only be taking one exception at a time, | ||
| 1012 | * so if this combination doesn't produce any single exception, | ||
| 1013 | * then we have a bad program that isn't synchronizing its FPU usage | ||
| 1014 | * and it will suffer the consequences since we won't be able to | ||
| 1015 | * fully reproduce the context of the exception | ||
| 1016 | */ | ||
| 1017 | cwd = get_fpu_cwd(task); | ||
| 1018 | swd = get_fpu_swd(task); | ||
| 1019 | switch (swd & ~cwd & 0x3f) { | ||
| 1020 | case 0x000: /* No unmasked exception */ | ||
| 1021 | default: /* Multiple exceptions */ | ||
| 1022 | break; | ||
| 1023 | case 0x001: /* Invalid Op */ | ||
| 1024 | /* | ||
| 1025 | * swd & 0x240 == 0x040: Stack Underflow | ||
| 1026 | * swd & 0x240 == 0x240: Stack Overflow | ||
| 1027 | * User must clear the SF bit (0x40) if set | ||
| 1028 | */ | ||
| 1029 | info.si_code = FPE_FLTINV; | ||
| 1030 | break; | ||
| 1031 | case 0x002: /* Denormalize */ | ||
| 1032 | case 0x010: /* Underflow */ | ||
| 1033 | info.si_code = FPE_FLTUND; | ||
| 1034 | break; | ||
| 1035 | case 0x004: /* Zero Divide */ | ||
| 1036 | info.si_code = FPE_FLTDIV; | ||
| 1037 | break; | ||
| 1038 | case 0x008: /* Overflow */ | ||
| 1039 | info.si_code = FPE_FLTOVF; | ||
| 1040 | break; | ||
| 1041 | case 0x020: /* Precision */ | ||
| 1042 | info.si_code = FPE_FLTRES; | ||
| 1043 | break; | ||
| 1044 | } | ||
| 1045 | force_sig_info(SIGFPE, &info, task); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | asmlinkage void bad_intr(void) | ||
| 1049 | { | ||
| 1050 | printk("bad interrupt"); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | ||
| 1054 | { | ||
| 1055 | void __user *ip = (void __user *)(regs->ip); | ||
| 1056 | struct task_struct *task; | ||
| 1057 | siginfo_t info; | ||
| 1058 | unsigned short mxcsr; | ||
| 1059 | |||
| 1060 | conditional_sti(regs); | ||
| 1061 | if (!user_mode(regs) && | ||
| 1062 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
| 1063 | return; | ||
| 1064 | |||
| 1065 | /* | ||
| 1066 | * Save the info for the exception handler and clear the error. | ||
| 1067 | */ | ||
| 1068 | task = current; | ||
| 1069 | save_init_fpu(task); | ||
| 1070 | task->thread.trap_no = 19; | ||
| 1071 | task->thread.error_code = 0; | ||
| 1072 | info.si_signo = SIGFPE; | ||
| 1073 | info.si_errno = 0; | ||
| 1074 | info.si_code = __SI_FAULT; | ||
| 1075 | info.si_addr = ip; | ||
| 1076 | /* | ||
| 1077 | * The SIMD FPU exceptions are handled a little differently, as there | ||
| 1078 | * is only a single status/control register. Thus, to determine which | ||
| 1079 | * unmasked exception was caught we must mask the exception mask bits | ||
| 1080 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
| 1081 | */ | ||
| 1082 | mxcsr = get_fpu_mxcsr(task); | ||
| 1083 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
| 1084 | case 0x000: | ||
| 1085 | default: | ||
| 1086 | break; | ||
| 1087 | case 0x001: /* Invalid Op */ | ||
| 1088 | info.si_code = FPE_FLTINV; | ||
| 1089 | break; | ||
| 1090 | case 0x002: /* Denormalize */ | ||
| 1091 | case 0x010: /* Underflow */ | ||
| 1092 | info.si_code = FPE_FLTUND; | ||
| 1093 | break; | ||
| 1094 | case 0x004: /* Zero Divide */ | ||
| 1095 | info.si_code = FPE_FLTDIV; | ||
| 1096 | break; | ||
| 1097 | case 0x008: /* Overflow */ | ||
| 1098 | info.si_code = FPE_FLTOVF; | ||
| 1099 | break; | ||
| 1100 | case 0x020: /* Precision */ | ||
| 1101 | info.si_code = FPE_FLTRES; | ||
| 1102 | break; | ||
| 1103 | } | ||
| 1104 | force_sig_info(SIGFPE, &info, task); | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) | ||
| 1108 | { | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | ||
| 1112 | { | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
| 1116 | { | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | /* | ||
| 1120 | * 'math_state_restore()' saves the current math information in the | ||
| 1121 | * old math state array, and gets the new ones from the current task | ||
| 1122 | * | ||
| 1123 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
| 1124 | * Don't touch unless you *really* know how it works. | ||
| 1125 | */ | ||
| 1126 | asmlinkage void math_state_restore(void) | ||
| 1127 | { | ||
| 1128 | struct task_struct *me = current; | ||
| 1129 | |||
| 1130 | if (!used_math()) { | ||
| 1131 | local_irq_enable(); | ||
| 1132 | /* | ||
| 1133 | * does a slab alloc which can sleep | ||
| 1134 | */ | ||
| 1135 | if (init_fpu(me)) { | ||
| 1136 | /* | ||
| 1137 | * ran out of memory! | ||
| 1138 | */ | ||
| 1139 | do_group_exit(SIGKILL); | ||
| 1140 | return; | ||
| 1141 | } | ||
| 1142 | local_irq_disable(); | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | clts(); /* Allow maths ops (or we recurse) */ | ||
| 1146 | restore_fpu_checking(&me->thread.xstate->fxsave); | ||
| 1147 | task_thread_info(me)->status |= TS_USEDFPU; | ||
| 1148 | me->fpu_counter++; | ||
| 1149 | } | ||
| 1150 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
| 1151 | |||
| 1152 | void __init trap_init(void) | ||
| 1153 | { | ||
| 1154 | set_intr_gate(0, ÷_error); | ||
| 1155 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
| 1156 | set_intr_gate_ist(2, &nmi, NMI_STACK); | ||
| 1157 | set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ | ||
| 1158 | set_system_gate(4, &overflow); /* int4 can be called from all */ | ||
| 1159 | set_intr_gate(5, &bounds); | ||
| 1160 | set_intr_gate(6, &invalid_op); | ||
| 1161 | set_intr_gate(7, &device_not_available); | ||
| 1162 | set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); | ||
| 1163 | set_intr_gate(9, &coprocessor_segment_overrun); | ||
| 1164 | set_intr_gate(10, &invalid_TSS); | ||
| 1165 | set_intr_gate(11, &segment_not_present); | ||
| 1166 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | ||
| 1167 | set_intr_gate(13, &general_protection); | ||
| 1168 | set_intr_gate(14, &page_fault); | ||
| 1169 | set_intr_gate(15, &spurious_interrupt_bug); | ||
| 1170 | set_intr_gate(16, &coprocessor_error); | ||
| 1171 | set_intr_gate(17, &alignment_check); | ||
| 1172 | #ifdef CONFIG_X86_MCE | ||
| 1173 | set_intr_gate_ist(18, &machine_check, MCE_STACK); | ||
| 1174 | #endif | ||
| 1175 | set_intr_gate(19, &simd_coprocessor_error); | ||
| 1176 | |||
| 1177 | #ifdef CONFIG_IA32_EMULATION | ||
| 1178 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | ||
| 1179 | #endif | ||
| 1180 | /* | ||
| 1181 | * initialize the per thread extended state: | ||
| 1182 | */ | ||
| 1183 | init_thread_xstate(); | ||
| 1184 | /* | ||
| 1185 | * Should be a barrier for any external CPU state: | ||
| 1186 | */ | ||
| 1187 | cpu_init(); | ||
| 1188 | } | ||
| 1189 | |||
| 1190 | static int __init oops_setup(char *s) | ||
| 1191 | { | ||
| 1192 | if (!s) | ||
| 1193 | return -EINVAL; | ||
| 1194 | if (!strcmp(s, "panic")) | ||
| 1195 | panic_on_oops = 1; | ||
| 1196 | return 0; | ||
| 1197 | } | ||
| 1198 | early_param("oops", oops_setup); | ||
| 1199 | |||
| 1200 | static int __init kstack_setup(char *s) | ||
| 1201 | { | ||
| 1202 | if (!s) | ||
| 1203 | return -EINVAL; | ||
| 1204 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
| 1205 | return 0; | ||
| 1206 | } | ||
| 1207 | early_param("kstack", kstack_setup); | ||
| 1208 | |||
| 1209 | static int __init code_bytes_setup(char *s) | ||
| 1210 | { | ||
| 1211 | code_bytes = simple_strtoul(s, NULL, 0); | ||
| 1212 | if (code_bytes > 8192) | ||
| 1213 | code_bytes = 8192; | ||
| 1214 | |||
| 1215 | return 1; | ||
| 1216 | } | ||
| 1217 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c0553909..161bb850fc47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
| @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); | |||
| 104 | /* | 104 | /* |
| 105 | * Read TSC and the reference counters. Take care of SMI disturbance | 105 | * Read TSC and the reference counters. Take care of SMI disturbance |
| 106 | */ | 106 | */ |
| 107 | static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | 107 | static u64 tsc_read_refs(u64 *p, int hpet) |
| 108 | { | 108 | { |
| 109 | u64 t1, t2; | 109 | u64 t1, t2; |
| 110 | int i; | 110 | int i; |
| @@ -112,9 +112,9 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | |||
| 112 | for (i = 0; i < MAX_RETRIES; i++) { | 112 | for (i = 0; i < MAX_RETRIES; i++) { |
| 113 | t1 = get_cycles(); | 113 | t1 = get_cycles(); |
| 114 | if (hpet) | 114 | if (hpet) |
| 115 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | 115 | *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; |
| 116 | else | 116 | else |
| 117 | *pm = acpi_pm_read_early(); | 117 | *p = acpi_pm_read_early(); |
| 118 | t2 = get_cycles(); | 118 | t2 = get_cycles(); |
| 119 | if ((t2 - t1) < SMI_TRESHOLD) | 119 | if ((t2 - t1) < SMI_TRESHOLD) |
| 120 | return t2; | 120 | return t2; |
| @@ -122,80 +122,390 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | |||
| 122 | return ULLONG_MAX; | 122 | return ULLONG_MAX; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /** | 125 | /* |
| 126 | * native_calibrate_tsc - calibrate the tsc on boot | 126 | * Calculate the TSC frequency from HPET reference |
| 127 | */ | 127 | */ |
| 128 | unsigned long native_calibrate_tsc(void) | 128 | static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) |
| 129 | { | 129 | { |
| 130 | unsigned long flags; | 130 | u64 tmp; |
| 131 | u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; | ||
| 132 | int hpet = is_hpet_enabled(); | ||
| 133 | unsigned int tsc_khz_val = 0; | ||
| 134 | 131 | ||
| 135 | local_irq_save(flags); | 132 | if (hpet2 < hpet1) |
| 133 | hpet2 += 0x100000000ULL; | ||
| 134 | hpet2 -= hpet1; | ||
| 135 | tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
| 136 | do_div(tmp, 1000000); | ||
| 137 | do_div(deltatsc, tmp); | ||
| 138 | |||
| 139 | return (unsigned long) deltatsc; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Calculate the TSC frequency from PMTimer reference | ||
| 144 | */ | ||
| 145 | static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) | ||
| 146 | { | ||
| 147 | u64 tmp; | ||
| 136 | 148 | ||
| 137 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | 149 | if (!pm1 && !pm2) |
| 150 | return ULONG_MAX; | ||
| 151 | |||
| 152 | if (pm2 < pm1) | ||
| 153 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
| 154 | pm2 -= pm1; | ||
| 155 | tmp = pm2 * 1000000000LL; | ||
| 156 | do_div(tmp, PMTMR_TICKS_PER_SEC); | ||
| 157 | do_div(deltatsc, tmp); | ||
| 158 | |||
| 159 | return (unsigned long) deltatsc; | ||
| 160 | } | ||
| 161 | |||
| 162 | #define CAL_MS 10 | ||
| 163 | #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) | ||
| 164 | #define CAL_PIT_LOOPS 1000 | ||
| 165 | |||
| 166 | #define CAL2_MS 50 | ||
| 167 | #define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) | ||
| 168 | #define CAL2_PIT_LOOPS 5000 | ||
| 169 | |||
| 170 | |||
| 171 | /* | ||
| 172 | * Try to calibrate the TSC against the Programmable | ||
| 173 | * Interrupt Timer and return the frequency of the TSC | ||
| 174 | * in kHz. | ||
| 175 | * | ||
| 176 | * Return ULONG_MAX on failure to calibrate. | ||
| 177 | */ | ||
| 178 | static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) | ||
| 179 | { | ||
| 180 | u64 tsc, t1, t2, delta; | ||
| 181 | unsigned long tscmin, tscmax; | ||
| 182 | int pitcnt; | ||
| 138 | 183 | ||
| 184 | /* Set the Gate high, disable speaker */ | ||
| 139 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | 185 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); |
| 140 | 186 | ||
| 187 | /* | ||
| 188 | * Setup CTC channel 2* for mode 0, (interrupt on terminal | ||
| 189 | * count mode), binary count. Set the latch register to 50ms | ||
| 190 | * (LSB then MSB) to begin countdown. | ||
| 191 | */ | ||
| 141 | outb(0xb0, 0x43); | 192 | outb(0xb0, 0x43); |
| 142 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 193 | outb(latch & 0xff, 0x42); |
| 143 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 194 | outb(latch >> 8, 0x42); |
| 144 | tr1 = get_cycles(); | 195 | |
| 145 | while ((inb(0x61) & 0x20) == 0); | 196 | tsc = t1 = t2 = get_cycles(); |
| 146 | tr2 = get_cycles(); | 197 | |
| 198 | pitcnt = 0; | ||
| 199 | tscmax = 0; | ||
| 200 | tscmin = ULONG_MAX; | ||
| 201 | while ((inb(0x61) & 0x20) == 0) { | ||
| 202 | t2 = get_cycles(); | ||
| 203 | delta = t2 - tsc; | ||
| 204 | tsc = t2; | ||
| 205 | if ((unsigned long) delta < tscmin) | ||
| 206 | tscmin = (unsigned int) delta; | ||
| 207 | if ((unsigned long) delta > tscmax) | ||
| 208 | tscmax = (unsigned int) delta; | ||
| 209 | pitcnt++; | ||
| 210 | } | ||
| 211 | |||
| 212 | /* | ||
| 213 | * Sanity checks: | ||
| 214 | * | ||
| 215 | * If we were not able to read the PIT more than loopmin | ||
| 216 | * times, then we have been hit by a massive SMI | ||
| 217 | * | ||
| 218 | * If the maximum is 10 times larger than the minimum, | ||
| 219 | * then we got hit by an SMI as well. | ||
| 220 | */ | ||
| 221 | if (pitcnt < loopmin || tscmax > 10 * tscmin) | ||
| 222 | return ULONG_MAX; | ||
| 223 | |||
| 224 | /* Calculate the PIT value */ | ||
| 225 | delta = t2 - t1; | ||
| 226 | do_div(delta, ms); | ||
| 227 | return delta; | ||
| 228 | } | ||
| 147 | 229 | ||
| 148 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | 230 | /* |
| 231 | * This reads the current MSB of the PIT counter, and | ||
| 232 | * checks if we are running on sufficiently fast and | ||
| 233 | * non-virtualized hardware. | ||
| 234 | * | ||
| 235 | * Our expectations are: | ||
| 236 | * | ||
| 237 | * - the PIT is running at roughly 1.19MHz | ||
| 238 | * | ||
| 239 | * - each IO is going to take about 1us on real hardware, | ||
| 240 | * but we allow it to be much faster (by a factor of 10) or | ||
| 241 | * _slightly_ slower (ie we allow up to a 2us read+counter | ||
| 242 | * update - anything else implies a unacceptably slow CPU | ||
| 243 | * or PIT for the fast calibration to work. | ||
| 244 | * | ||
| 245 | * - with 256 PIT ticks to read the value, we have 214us to | ||
| 246 | * see the same MSB (and overhead like doing a single TSC | ||
| 247 | * read per MSB value etc). | ||
| 248 | * | ||
| 249 | * - We're doing 2 reads per loop (LSB, MSB), and we expect | ||
| 250 | * them each to take about a microsecond on real hardware. | ||
| 251 | * So we expect a count value of around 100. But we'll be | ||
| 252 | * generous, and accept anything over 50. | ||
| 253 | * | ||
| 254 | * - if the PIT is stuck, and we see *many* more reads, we | ||
| 255 | * return early (and the next caller of pit_expect_msb() | ||
| 256 | * then consider it a failure when they don't see the | ||
| 257 | * next expected value). | ||
| 258 | * | ||
| 259 | * These expectations mean that we know that we have seen the | ||
| 260 | * transition from one expected value to another with a fairly | ||
| 261 | * high accuracy, and we didn't miss any events. We can thus | ||
| 262 | * use the TSC value at the transitions to calculate a pretty | ||
| 263 | * good value for the TSC frequencty. | ||
| 264 | */ | ||
| 265 | static inline int pit_expect_msb(unsigned char val) | ||
| 266 | { | ||
| 267 | int count = 0; | ||
| 149 | 268 | ||
| 269 | for (count = 0; count < 50000; count++) { | ||
| 270 | /* Ignore LSB */ | ||
| 271 | inb(0x42); | ||
| 272 | if (inb(0x42) != val) | ||
| 273 | break; | ||
| 274 | } | ||
| 275 | return count > 50; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | ||
| 279 | * How many MSB values do we want to see? We aim for a | ||
| 280 | * 15ms calibration, which assuming a 2us counter read | ||
| 281 | * error should give us roughly 150 ppm precision for | ||
| 282 | * the calibration. | ||
| 283 | */ | ||
| 284 | #define QUICK_PIT_MS 15 | ||
| 285 | #define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) | ||
| 286 | |||
| 287 | static unsigned long quick_pit_calibrate(void) | ||
| 288 | { | ||
| 289 | /* Set the Gate high, disable speaker */ | ||
| 290 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | ||
| 291 | |||
| 292 | /* | ||
| 293 | * Counter 2, mode 0 (one-shot), binary count | ||
| 294 | * | ||
| 295 | * NOTE! Mode 2 decrements by two (and then the | ||
| 296 | * output is flipped each time, giving the same | ||
| 297 | * final output frequency as a decrement-by-one), | ||
| 298 | * so mode 0 is much better when looking at the | ||
| 299 | * individual counts. | ||
| 300 | */ | ||
| 301 | outb(0xb0, 0x43); | ||
| 302 | |||
| 303 | /* Start at 0xffff */ | ||
| 304 | outb(0xff, 0x42); | ||
| 305 | outb(0xff, 0x42); | ||
| 306 | |||
| 307 | if (pit_expect_msb(0xff)) { | ||
| 308 | int i; | ||
| 309 | u64 t1, t2, delta; | ||
| 310 | unsigned char expect = 0xfe; | ||
| 311 | |||
| 312 | t1 = get_cycles(); | ||
| 313 | for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { | ||
| 314 | if (!pit_expect_msb(expect)) | ||
| 315 | goto failed; | ||
| 316 | } | ||
| 317 | t2 = get_cycles(); | ||
| 318 | |||
| 319 | /* | ||
| 320 | * Make sure we can rely on the second TSC timestamp: | ||
| 321 | */ | ||
| 322 | if (!pit_expect_msb(expect)) | ||
| 323 | goto failed; | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Ok, if we get here, then we've seen the | ||
| 327 | * MSB of the PIT decrement QUICK_PIT_ITERATIONS | ||
| 328 | * times, and each MSB had many hits, so we never | ||
| 329 | * had any sudden jumps. | ||
| 330 | * | ||
| 331 | * As a result, we can depend on there not being | ||
| 332 | * any odd delays anywhere, and the TSC reads are | ||
| 333 | * reliable. | ||
| 334 | * | ||
| 335 | * kHz = ticks / time-in-seconds / 1000; | ||
| 336 | * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 | ||
| 337 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) | ||
| 338 | */ | ||
| 339 | delta = (t2 - t1)*PIT_TICK_RATE; | ||
| 340 | do_div(delta, QUICK_PIT_ITERATIONS*256*1000); | ||
| 341 | printk("Fast TSC calibration using PIT\n"); | ||
| 342 | return delta; | ||
| 343 | } | ||
| 344 | failed: | ||
| 345 | return 0; | ||
| 346 | } | ||
| 347 | |||
| 348 | /** | ||
| 349 | * native_calibrate_tsc - calibrate the tsc on boot | ||
| 350 | */ | ||
| 351 | unsigned long native_calibrate_tsc(void) | ||
| 352 | { | ||
| 353 | u64 tsc1, tsc2, delta, ref1, ref2; | ||
| 354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | ||
| 355 | unsigned long flags, latch, ms, fast_calibrate; | ||
| 356 | int hpet = is_hpet_enabled(), i, loopmin; | ||
| 357 | |||
| 358 | local_irq_save(flags); | ||
| 359 | fast_calibrate = quick_pit_calibrate(); | ||
| 150 | local_irq_restore(flags); | 360 | local_irq_restore(flags); |
| 361 | if (fast_calibrate) | ||
| 362 | return fast_calibrate; | ||
| 151 | 363 | ||
| 152 | /* | 364 | /* |
| 153 | * Preset the result with the raw and inaccurate PIT | 365 | * Run 5 calibration loops to get the lowest frequency value |
| 154 | * calibration value | 366 | * (the best estimate). We use two different calibration modes |
| 367 | * here: | ||
| 368 | * | ||
| 369 | * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and | ||
| 370 | * load a timeout of 50ms. We read the time right after we | ||
| 371 | * started the timer and wait until the PIT count down reaches | ||
| 372 | * zero. In each wait loop iteration we read the TSC and check | ||
| 373 | * the delta to the previous read. We keep track of the min | ||
| 374 | * and max values of that delta. The delta is mostly defined | ||
| 375 | * by the IO time of the PIT access, so we can detect when a | ||
| 376 | * SMI/SMM disturbance happend between the two reads. If the | ||
| 377 | * maximum time is significantly larger than the minimum time, | ||
| 378 | * then we discard the result and have another try. | ||
| 379 | * | ||
| 380 | * 2) Reference counter. If available we use the HPET or the | ||
| 381 | * PMTIMER as a reference to check the sanity of that value. | ||
| 382 | * We use separate TSC readouts and check inside of the | ||
| 383 | * reference read for a SMI/SMM disturbance. We dicard | ||
| 384 | * disturbed values here as well. We do that around the PIT | ||
| 385 | * calibration delay loop as we have to wait for a certain | ||
| 386 | * amount of time anyway. | ||
| 155 | */ | 387 | */ |
| 156 | delta = (tr2 - tr1); | 388 | |
| 157 | do_div(delta, 50); | 389 | /* Preset PIT loop values */ |
| 158 | tsc_khz_val = delta; | 390 | latch = CAL_LATCH; |
| 159 | 391 | ms = CAL_MS; | |
| 160 | /* hpet or pmtimer available ? */ | 392 | loopmin = CAL_PIT_LOOPS; |
| 161 | if (!hpet && !pm1 && !pm2) { | 393 | |
| 162 | printk(KERN_INFO "TSC calibrated against PIT\n"); | 394 | for (i = 0; i < 3; i++) { |
| 163 | goto out; | 395 | unsigned long tsc_pit_khz; |
| 396 | |||
| 397 | /* | ||
| 398 | * Read the start value and the reference count of | ||
| 399 | * hpet/pmtimer when available. Then do the PIT | ||
| 400 | * calibration, which will take at least 50ms, and | ||
| 401 | * read the end value. | ||
| 402 | */ | ||
| 403 | local_irq_save(flags); | ||
| 404 | tsc1 = tsc_read_refs(&ref1, hpet); | ||
| 405 | tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); | ||
| 406 | tsc2 = tsc_read_refs(&ref2, hpet); | ||
| 407 | local_irq_restore(flags); | ||
| 408 | |||
| 409 | /* Pick the lowest PIT TSC calibration so far */ | ||
| 410 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | ||
| 411 | |||
| 412 | /* hpet or pmtimer available ? */ | ||
| 413 | if (!hpet && !ref1 && !ref2) | ||
| 414 | continue; | ||
| 415 | |||
| 416 | /* Check, whether the sampling was disturbed by an SMI */ | ||
| 417 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) | ||
| 418 | continue; | ||
| 419 | |||
| 420 | tsc2 = (tsc2 - tsc1) * 1000000LL; | ||
| 421 | if (hpet) | ||
| 422 | tsc2 = calc_hpet_ref(tsc2, ref1, ref2); | ||
| 423 | else | ||
| 424 | tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); | ||
| 425 | |||
| 426 | tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); | ||
| 427 | |||
| 428 | /* Check the reference deviation */ | ||
| 429 | delta = ((u64) tsc_pit_min) * 100; | ||
| 430 | do_div(delta, tsc_ref_min); | ||
| 431 | |||
| 432 | /* | ||
| 433 | * If both calibration results are inside a 10% window | ||
| 434 | * then we can be sure, that the calibration | ||
| 435 | * succeeded. We break out of the loop right away. We | ||
| 436 | * use the reference value, as it is more precise. | ||
| 437 | */ | ||
| 438 | if (delta >= 90 && delta <= 110) { | ||
| 439 | printk(KERN_INFO | ||
| 440 | "TSC: PIT calibration matches %s. %d loops\n", | ||
| 441 | hpet ? "HPET" : "PMTIMER", i + 1); | ||
| 442 | return tsc_ref_min; | ||
| 443 | } | ||
| 444 | |||
| 445 | /* | ||
| 446 | * Check whether PIT failed more than once. This | ||
| 447 | * happens in virtualized environments. We need to | ||
| 448 | * give the virtual PC a slightly longer timeframe for | ||
| 449 | * the HPET/PMTIMER to make the result precise. | ||
| 450 | */ | ||
| 451 | if (i == 1 && tsc_pit_min == ULONG_MAX) { | ||
| 452 | latch = CAL2_LATCH; | ||
| 453 | ms = CAL2_MS; | ||
| 454 | loopmin = CAL2_PIT_LOOPS; | ||
| 455 | } | ||
| 164 | } | 456 | } |
| 165 | 457 | ||
| 166 | /* Check, whether the sampling was disturbed by an SMI */ | 458 | /* |
| 167 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { | 459 | * Now check the results. |
| 168 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " | 460 | */ |
| 169 | "using PIT calibration result\n"); | 461 | if (tsc_pit_min == ULONG_MAX) { |
| 170 | goto out; | 462 | /* PIT gave no useful value */ |
| 463 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); | ||
| 464 | |||
| 465 | /* We don't have an alternative source, disable TSC */ | ||
| 466 | if (!hpet && !ref1 && !ref2) { | ||
| 467 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | ||
| 468 | return 0; | ||
| 469 | } | ||
| 470 | |||
| 471 | /* The alternative source failed as well, disable TSC */ | ||
| 472 | if (tsc_ref_min == ULONG_MAX) { | ||
| 473 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | ||
| 474 | "failed.\n"); | ||
| 475 | return 0; | ||
| 476 | } | ||
| 477 | |||
| 478 | /* Use the alternative source */ | ||
| 479 | printk(KERN_INFO "TSC: using %s reference calibration\n", | ||
| 480 | hpet ? "HPET" : "PMTIMER"); | ||
| 481 | |||
| 482 | return tsc_ref_min; | ||
| 171 | } | 483 | } |
| 172 | 484 | ||
| 173 | tsc2 = (tsc2 - tsc1) * 1000000LL; | 485 | /* We don't have an alternative source, use the PIT calibration value */ |
| 174 | 486 | if (!hpet && !ref1 && !ref2) { | |
| 175 | if (hpet) { | 487 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); |
| 176 | printk(KERN_INFO "TSC calibrated against HPET\n"); | 488 | return tsc_pit_min; |
| 177 | if (hpet2 < hpet1) | ||
| 178 | hpet2 += 0x100000000ULL; | ||
| 179 | hpet2 -= hpet1; | ||
| 180 | tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
| 181 | do_div(tsc1, 1000000); | ||
| 182 | } else { | ||
| 183 | printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); | ||
| 184 | if (pm2 < pm1) | ||
| 185 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
| 186 | pm2 -= pm1; | ||
| 187 | tsc1 = pm2 * 1000000000LL; | ||
| 188 | do_div(tsc1, PMTMR_TICKS_PER_SEC); | ||
| 189 | } | 489 | } |
| 190 | 490 | ||
| 191 | do_div(tsc2, tsc1); | 491 | /* The alternative source failed, use the PIT calibration value */ |
| 192 | tsc_khz_val = tsc2; | 492 | if (tsc_ref_min == ULONG_MAX) { |
| 493 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " | ||
| 494 | "Using PIT calibration\n"); | ||
| 495 | return tsc_pit_min; | ||
| 496 | } | ||
| 193 | 497 | ||
| 194 | out: | 498 | /* |
| 195 | return tsc_khz_val; | 499 | * The calibration values differ too much. In doubt, we use |
| 500 | * the PIT value as we know that there are PMTIMERs around | ||
| 501 | * running at double speed. At least we let the user know: | ||
| 502 | */ | ||
| 503 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | ||
| 504 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | ||
| 505 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | ||
| 506 | return tsc_pit_min; | ||
| 196 | } | 507 | } |
| 197 | 508 | ||
| 198 | |||
| 199 | #ifdef CONFIG_X86_32 | 509 | #ifdef CONFIG_X86_32 |
| 200 | /* Only called from the Powernow K7 cpu freq driver */ | 510 | /* Only called from the Powernow K7 cpu freq driver */ |
| 201 | int recalibrate_cpu_khz(void) | 511 | int recalibrate_cpu_khz(void) |
| @@ -314,7 +624,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
| 314 | mark_tsc_unstable("cpufreq changes"); | 624 | mark_tsc_unstable("cpufreq changes"); |
| 315 | } | 625 | } |
| 316 | 626 | ||
| 317 | set_cyc2ns_scale(tsc_khz_ref, freq->cpu); | 627 | set_cyc2ns_scale(tsc_khz, freq->cpu); |
| 318 | 628 | ||
| 319 | return 0; | 629 | return 0; |
| 320 | } | 630 | } |
| @@ -325,6 +635,10 @@ static struct notifier_block time_cpufreq_notifier_block = { | |||
| 325 | 635 | ||
| 326 | static int __init cpufreq_tsc(void) | 636 | static int __init cpufreq_tsc(void) |
| 327 | { | 637 | { |
| 638 | if (!cpu_has_tsc) | ||
| 639 | return 0; | ||
| 640 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
| 641 | return 0; | ||
| 328 | cpufreq_register_notifier(&time_cpufreq_notifier_block, | 642 | cpufreq_register_notifier(&time_cpufreq_notifier_block, |
| 329 | CPUFREQ_TRANSITION_NOTIFIER); | 643 | CPUFREQ_TRANSITION_NOTIFIER); |
| 330 | return 0; | 644 | return 0; |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825cf89b..9ffb01c31c40 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
| @@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) | |||
| 88 | __raw_spin_unlock(&sync_lock); | 88 | __raw_spin_unlock(&sync_lock); |
| 89 | } | 89 | } |
| 90 | } | 90 | } |
| 91 | if (!(now-start)) { | 91 | WARN(!(now-start), |
| 92 | printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", | 92 | "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", |
| 93 | now-start, end-start); | 93 | now-start, end-start); |
| 94 | WARN_ON(1); | ||
| 95 | } | ||
| 96 | } | 94 | } |
| 97 | 95 | ||
| 98 | /* | 96 | /* |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c new file mode 100644 index 000000000000..aeef529917e4 --- /dev/null +++ b/arch/x86/kernel/uv_irq.c | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | /* | ||
| 2 | * This file is subject to the terms and conditions of the GNU General Public | ||
| 3 | * License. See the file "COPYING" in the main directory of this archive | ||
| 4 | * for more details. | ||
| 5 | * | ||
| 6 | * SGI UV IRQ functions | ||
| 7 | * | ||
| 8 | * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/module.h> | ||
| 12 | #include <linux/irq.h> | ||
| 13 | |||
| 14 | #include <asm/apic.h> | ||
| 15 | #include <asm/uv/uv_irq.h> | ||
| 16 | |||
| 17 | static void uv_noop(unsigned int irq) | ||
| 18 | { | ||
| 19 | } | ||
| 20 | |||
| 21 | static unsigned int uv_noop_ret(unsigned int irq) | ||
| 22 | { | ||
| 23 | return 0; | ||
| 24 | } | ||
| 25 | |||
| 26 | static void uv_ack_apic(unsigned int irq) | ||
| 27 | { | ||
| 28 | ack_APIC_irq(); | ||
| 29 | } | ||
| 30 | |||
| 31 | struct irq_chip uv_irq_chip = { | ||
| 32 | .name = "UV-CORE", | ||
| 33 | .startup = uv_noop_ret, | ||
| 34 | .shutdown = uv_noop, | ||
| 35 | .enable = uv_noop, | ||
| 36 | .disable = uv_noop, | ||
| 37 | .ack = uv_noop, | ||
| 38 | .mask = uv_noop, | ||
| 39 | .unmask = uv_noop, | ||
| 40 | .eoi = uv_ack_apic, | ||
| 41 | .end = uv_noop, | ||
| 42 | }; | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Set up a mapping of an available irq and vector, and enable the specified | ||
| 46 | * MMR that defines the MSI that is to be sent to the specified CPU when an | ||
| 47 | * interrupt is raised. | ||
| 48 | */ | ||
| 49 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | ||
| 50 | unsigned long mmr_offset) | ||
| 51 | { | ||
| 52 | int irq; | ||
| 53 | int ret; | ||
| 54 | |||
| 55 | irq = create_irq(); | ||
| 56 | if (irq <= 0) | ||
| 57 | return -EBUSY; | ||
| 58 | |||
| 59 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); | ||
| 60 | if (ret != irq) | ||
| 61 | destroy_irq(irq); | ||
| 62 | |||
| 63 | return ret; | ||
| 64 | } | ||
| 65 | EXPORT_SYMBOL_GPL(uv_setup_irq); | ||
| 66 | |||
| 67 | /* | ||
| 68 | * Tear down a mapping of an irq and vector, and disable the specified MMR that | ||
| 69 | * defined the MSI that was to be sent to the specified CPU when an interrupt | ||
| 70 | * was raised. | ||
| 71 | * | ||
| 72 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). | ||
| 73 | */ | ||
| 74 | void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) | ||
| 75 | { | ||
| 76 | arch_disable_uv_irq(mmr_blade, mmr_offset); | ||
| 77 | destroy_irq(irq); | ||
| 78 | } | ||
| 79 | EXPORT_SYMBOL_GPL(uv_teardown_irq); | ||
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c new file mode 100644 index 000000000000..67f9b9dbf800 --- /dev/null +++ b/arch/x86/kernel/uv_sysfs.c | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | /* | ||
| 2 | * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 17 | * | ||
| 18 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
| 19 | * Copyright (c) Russ Anderson | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/sysdev.h> | ||
| 23 | #include <asm/uv/bios.h> | ||
| 24 | |||
| 25 | struct kobject *sgi_uv_kobj; | ||
| 26 | |||
| 27 | static ssize_t partition_id_show(struct kobject *kobj, | ||
| 28 | struct kobj_attribute *attr, char *buf) | ||
| 29 | { | ||
| 30 | return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); | ||
| 31 | } | ||
| 32 | |||
| 33 | static ssize_t coherence_id_show(struct kobject *kobj, | ||
| 34 | struct kobj_attribute *attr, char *buf) | ||
| 35 | { | ||
| 36 | return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); | ||
| 37 | } | ||
| 38 | |||
| 39 | static struct kobj_attribute partition_id_attr = | ||
| 40 | __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); | ||
| 41 | |||
| 42 | static struct kobj_attribute coherence_id_attr = | ||
| 43 | __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); | ||
| 44 | |||
| 45 | |||
| 46 | static int __init sgi_uv_sysfs_init(void) | ||
| 47 | { | ||
| 48 | unsigned long ret; | ||
| 49 | |||
| 50 | if (!sgi_uv_kobj) | ||
| 51 | sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); | ||
| 52 | if (!sgi_uv_kobj) { | ||
| 53 | printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); | ||
| 54 | return -EINVAL; | ||
| 55 | } | ||
| 56 | |||
| 57 | ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); | ||
| 58 | if (ret) { | ||
| 59 | printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); | ||
| 60 | return ret; | ||
| 61 | } | ||
| 62 | |||
| 63 | ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); | ||
| 64 | if (ret) { | ||
| 65 | printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); | ||
| 66 | return ret; | ||
| 67 | } | ||
| 68 | |||
| 69 | return 0; | ||
| 70 | } | ||
| 71 | |||
| 72 | device_initcall(sgi_uv_sysfs_init); | ||
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index e94bdb6add1d..0c9667f0752a 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
| @@ -25,45 +25,31 @@ | |||
| 25 | #include <asm/visws/cobalt.h> | 25 | #include <asm/visws/cobalt.h> |
| 26 | #include <asm/visws/piix4.h> | 26 | #include <asm/visws/piix4.h> |
| 27 | #include <asm/arch_hooks.h> | 27 | #include <asm/arch_hooks.h> |
| 28 | #include <asm/io_apic.h> | ||
| 28 | #include <asm/fixmap.h> | 29 | #include <asm/fixmap.h> |
| 29 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
| 30 | #include <asm/setup.h> | 31 | #include <asm/setup.h> |
| 31 | #include <asm/e820.h> | 32 | #include <asm/e820.h> |
| 32 | #include <asm/smp.h> | ||
| 33 | #include <asm/io.h> | 33 | #include <asm/io.h> |
| 34 | 34 | ||
| 35 | #include <mach_ipi.h> | 35 | #include <mach_ipi.h> |
| 36 | 36 | ||
| 37 | #include "mach_apic.h" | 37 | #include "mach_apic.h" |
| 38 | 38 | ||
| 39 | #include <linux/init.h> | ||
| 40 | #include <linux/smp.h> | ||
| 41 | |||
| 42 | #include <linux/kernel_stat.h> | 39 | #include <linux/kernel_stat.h> |
| 43 | #include <linux/interrupt.h> | ||
| 44 | #include <linux/init.h> | ||
| 45 | 40 | ||
| 46 | #include <asm/io.h> | ||
| 47 | #include <asm/apic.h> | ||
| 48 | #include <asm/i8259.h> | 41 | #include <asm/i8259.h> |
| 49 | #include <asm/irq_vectors.h> | 42 | #include <asm/irq_vectors.h> |
| 50 | #include <asm/visws/cobalt.h> | ||
| 51 | #include <asm/visws/lithium.h> | 43 | #include <asm/visws/lithium.h> |
| 52 | #include <asm/visws/piix4.h> | ||
| 53 | 44 | ||
| 54 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
| 55 | #include <linux/kernel.h> | 46 | #include <linux/kernel.h> |
| 56 | #include <linux/init.h> | ||
| 57 | #include <linux/pci.h> | 47 | #include <linux/pci.h> |
| 58 | #include <linux/pci_ids.h> | 48 | #include <linux/pci_ids.h> |
| 59 | 49 | ||
| 60 | extern int no_broadcast; | 50 | extern int no_broadcast; |
| 61 | 51 | ||
| 62 | #include <asm/io.h> | ||
| 63 | #include <asm/apic.h> | 52 | #include <asm/apic.h> |
| 64 | #include <asm/arch_hooks.h> | ||
| 65 | #include <asm/visws/cobalt.h> | ||
| 66 | #include <asm/visws/lithium.h> | ||
| 67 | 53 | ||
| 68 | char visws_board_type = -1; | 54 | char visws_board_type = -1; |
| 69 | char visws_board_rev = -1; | 55 | char visws_board_rev = -1; |
| @@ -73,7 +59,7 @@ int is_visws_box(void) | |||
| 73 | return visws_board_type >= 0; | 59 | return visws_board_type >= 0; |
| 74 | } | 60 | } |
| 75 | 61 | ||
| 76 | static int __init visws_time_init_quirk(void) | 62 | static int __init visws_time_init(void) |
| 77 | { | 63 | { |
| 78 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | 64 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); |
| 79 | 65 | ||
| @@ -93,7 +79,7 @@ static int __init visws_time_init_quirk(void) | |||
| 93 | return 0; | 79 | return 0; |
| 94 | } | 80 | } |
| 95 | 81 | ||
| 96 | static int __init visws_pre_intr_init_quirk(void) | 82 | static int __init visws_pre_intr_init(void) |
| 97 | { | 83 | { |
| 98 | init_VISWS_APIC_irqs(); | 84 | init_VISWS_APIC_irqs(); |
| 99 | 85 | ||
| @@ -114,7 +100,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size); | |||
| 114 | 100 | ||
| 115 | long long mem_size __initdata = 0; | 101 | long long mem_size __initdata = 0; |
| 116 | 102 | ||
| 117 | static char * __init visws_memory_setup_quirk(void) | 103 | static char * __init visws_memory_setup(void) |
| 118 | { | 104 | { |
| 119 | long long gfx_mem_size = 8 * MB; | 105 | long long gfx_mem_size = 8 * MB; |
| 120 | 106 | ||
| @@ -176,7 +162,7 @@ static void visws_machine_power_off(void) | |||
| 176 | outl(PIIX_SPECIAL_STOP, 0xCFC); | 162 | outl(PIIX_SPECIAL_STOP, 0xCFC); |
| 177 | } | 163 | } |
| 178 | 164 | ||
| 179 | static int __init visws_get_smp_config_quirk(unsigned int early) | 165 | static int __init visws_get_smp_config(unsigned int early) |
| 180 | { | 166 | { |
| 181 | /* | 167 | /* |
| 182 | * Prevent MP-table parsing by the generic code: | 168 | * Prevent MP-table parsing by the generic code: |
| @@ -184,15 +170,13 @@ static int __init visws_get_smp_config_quirk(unsigned int early) | |||
| 184 | return 1; | 170 | return 1; |
| 185 | } | 171 | } |
| 186 | 172 | ||
| 187 | extern unsigned int __cpuinitdata maxcpus; | ||
| 188 | |||
| 189 | /* | 173 | /* |
| 190 | * The Visual Workstation is Intel MP compliant in the hardware | 174 | * The Visual Workstation is Intel MP compliant in the hardware |
| 191 | * sense, but it doesn't have a BIOS(-configuration table). | 175 | * sense, but it doesn't have a BIOS(-configuration table). |
| 192 | * No problem for Linux. | 176 | * No problem for Linux. |
| 193 | */ | 177 | */ |
| 194 | 178 | ||
| 195 | static void __init MP_processor_info (struct mpc_config_processor *m) | 179 | static void __init MP_processor_info(struct mpc_config_processor *m) |
| 196 | { | 180 | { |
| 197 | int ver, logical_apicid; | 181 | int ver, logical_apicid; |
| 198 | physid_mask_t apic_cpus; | 182 | physid_mask_t apic_cpus; |
| @@ -232,7 +216,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
| 232 | apic_version[m->mpc_apicid] = ver; | 216 | apic_version[m->mpc_apicid] = ver; |
| 233 | } | 217 | } |
| 234 | 218 | ||
| 235 | int __init visws_find_smp_config_quirk(unsigned int reserve) | 219 | static int __init visws_find_smp_config(unsigned int reserve) |
| 236 | { | 220 | { |
| 237 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 221 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
| 238 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 222 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
| @@ -244,8 +228,8 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) | |||
| 244 | ncpus = CO_CPU_MAX; | 228 | ncpus = CO_CPU_MAX; |
| 245 | } | 229 | } |
| 246 | 230 | ||
| 247 | if (ncpus > maxcpus) | 231 | if (ncpus > setup_max_cpus) |
| 248 | ncpus = maxcpus; | 232 | ncpus = setup_max_cpus; |
| 249 | 233 | ||
| 250 | #ifdef CONFIG_X86_LOCAL_APIC | 234 | #ifdef CONFIG_X86_LOCAL_APIC |
| 251 | smp_found_config = 1; | 235 | smp_found_config = 1; |
| @@ -258,7 +242,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) | |||
| 258 | return 1; | 242 | return 1; |
| 259 | } | 243 | } |
| 260 | 244 | ||
| 261 | extern int visws_trap_init_quirk(void); | 245 | static int visws_trap_init(void); |
| 246 | |||
| 247 | static struct x86_quirks visws_x86_quirks __initdata = { | ||
| 248 | .arch_time_init = visws_time_init, | ||
| 249 | .arch_pre_intr_init = visws_pre_intr_init, | ||
| 250 | .arch_memory_setup = visws_memory_setup, | ||
| 251 | .arch_intr_init = NULL, | ||
| 252 | .arch_trap_init = visws_trap_init, | ||
| 253 | .mach_get_smp_config = visws_get_smp_config, | ||
| 254 | .mach_find_smp_config = visws_find_smp_config, | ||
| 255 | }; | ||
| 262 | 256 | ||
| 263 | void __init visws_early_detect(void) | 257 | void __init visws_early_detect(void) |
| 264 | { | 258 | { |
| @@ -272,16 +266,10 @@ void __init visws_early_detect(void) | |||
| 272 | 266 | ||
| 273 | /* | 267 | /* |
| 274 | * Install special quirks for timer, interrupt and memory setup: | 268 | * Install special quirks for timer, interrupt and memory setup: |
| 275 | */ | ||
| 276 | arch_time_init_quirk = visws_time_init_quirk; | ||
| 277 | arch_pre_intr_init_quirk = visws_pre_intr_init_quirk; | ||
| 278 | arch_memory_setup_quirk = visws_memory_setup_quirk; | ||
| 279 | |||
| 280 | /* | ||
| 281 | * Fall back to generic behavior for traps: | 269 | * Fall back to generic behavior for traps: |
| 270 | * Override generic MP-table parsing: | ||
| 282 | */ | 271 | */ |
| 283 | arch_intr_init_quirk = NULL; | 272 | x86_quirks = &visws_x86_quirks; |
| 284 | arch_trap_init_quirk = visws_trap_init_quirk; | ||
| 285 | 273 | ||
| 286 | /* | 274 | /* |
| 287 | * Install reboot quirks: | 275 | * Install reboot quirks: |
| @@ -294,12 +282,6 @@ void __init visws_early_detect(void) | |||
| 294 | */ | 282 | */ |
| 295 | no_broadcast = 0; | 283 | no_broadcast = 0; |
| 296 | 284 | ||
| 297 | /* | ||
| 298 | * Override generic MP-table parsing: | ||
| 299 | */ | ||
| 300 | mach_get_smp_config_quirk = visws_get_smp_config_quirk; | ||
| 301 | mach_find_smp_config_quirk = visws_find_smp_config_quirk; | ||
| 302 | |||
| 303 | #ifdef CONFIG_X86_IO_APIC | 285 | #ifdef CONFIG_X86_IO_APIC |
| 304 | /* | 286 | /* |
| 305 | * Turn off IO-APIC detection and initialization: | 287 | * Turn off IO-APIC detection and initialization: |
| @@ -426,7 +408,7 @@ static __init void cobalt_init(void) | |||
| 426 | co_apic_read(CO_APIC_ID)); | 408 | co_apic_read(CO_APIC_ID)); |
| 427 | } | 409 | } |
| 428 | 410 | ||
| 429 | int __init visws_trap_init_quirk(void) | 411 | static int __init visws_trap_init(void) |
| 430 | { | 412 | { |
| 431 | lithium_init(); | 413 | lithium_init(); |
| 432 | cobalt_init(); | 414 | cobalt_init(); |
| @@ -502,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq) | |||
| 502 | static unsigned int startup_cobalt_irq(unsigned int irq) | 484 | static unsigned int startup_cobalt_irq(unsigned int irq) |
| 503 | { | 485 | { |
| 504 | unsigned long flags; | 486 | unsigned long flags; |
| 487 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 505 | 488 | ||
| 506 | spin_lock_irqsave(&cobalt_lock, flags); | 489 | spin_lock_irqsave(&cobalt_lock, flags); |
| 507 | if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) | 490 | if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) |
| 508 | irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); | 491 | desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); |
| 509 | enable_cobalt_irq(irq); | 492 | enable_cobalt_irq(irq); |
| 510 | spin_unlock_irqrestore(&cobalt_lock, flags); | 493 | spin_unlock_irqrestore(&cobalt_lock, flags); |
| 511 | return 0; | 494 | return 0; |
| @@ -524,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq) | |||
| 524 | static void end_cobalt_irq(unsigned int irq) | 507 | static void end_cobalt_irq(unsigned int irq) |
| 525 | { | 508 | { |
| 526 | unsigned long flags; | 509 | unsigned long flags; |
| 510 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 527 | 511 | ||
| 528 | spin_lock_irqsave(&cobalt_lock, flags); | 512 | spin_lock_irqsave(&cobalt_lock, flags); |
| 529 | if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) | 513 | if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS))) |
| 530 | enable_cobalt_irq(irq); | 514 | enable_cobalt_irq(irq); |
| 531 | spin_unlock_irqrestore(&cobalt_lock, flags); | 515 | spin_unlock_irqrestore(&cobalt_lock, flags); |
| 532 | } | 516 | } |
| @@ -644,12 +628,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
| 644 | 628 | ||
| 645 | spin_unlock_irqrestore(&i8259A_lock, flags); | 629 | spin_unlock_irqrestore(&i8259A_lock, flags); |
| 646 | 630 | ||
| 647 | desc = irq_desc + realirq; | 631 | desc = irq_to_desc(realirq); |
| 648 | 632 | ||
| 649 | /* | 633 | /* |
| 650 | * handle this 'virtual interrupt' as a Cobalt one now. | 634 | * handle this 'virtual interrupt' as a Cobalt one now. |
| 651 | */ | 635 | */ |
| 652 | kstat_cpu(smp_processor_id()).irqs[realirq]++; | 636 | kstat_incr_irqs_this_cpu(realirq, desc); |
| 653 | 637 | ||
| 654 | if (likely(desc->action != NULL)) | 638 | if (likely(desc->action != NULL)) |
| 655 | handle_IRQ_event(realirq, desc->action); | 639 | handle_IRQ_event(realirq, desc->action); |
| @@ -680,27 +664,29 @@ void init_VISWS_APIC_irqs(void) | |||
| 680 | int i; | 664 | int i; |
| 681 | 665 | ||
| 682 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { | 666 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { |
| 683 | irq_desc[i].status = IRQ_DISABLED; | 667 | struct irq_desc *desc = irq_to_desc(i); |
| 684 | irq_desc[i].action = 0; | 668 | |
| 685 | irq_desc[i].depth = 1; | 669 | desc->status = IRQ_DISABLED; |
| 670 | desc->action = 0; | ||
| 671 | desc->depth = 1; | ||
| 686 | 672 | ||
| 687 | if (i == 0) { | 673 | if (i == 0) { |
| 688 | irq_desc[i].chip = &cobalt_irq_type; | 674 | desc->chip = &cobalt_irq_type; |
| 689 | } | 675 | } |
| 690 | else if (i == CO_IRQ_IDE0) { | 676 | else if (i == CO_IRQ_IDE0) { |
| 691 | irq_desc[i].chip = &cobalt_irq_type; | 677 | desc->chip = &cobalt_irq_type; |
| 692 | } | 678 | } |
| 693 | else if (i == CO_IRQ_IDE1) { | 679 | else if (i == CO_IRQ_IDE1) { |
| 694 | irq_desc[i].chip = &cobalt_irq_type; | 680 | desc->chip = &cobalt_irq_type; |
| 695 | } | 681 | } |
| 696 | else if (i == CO_IRQ_8259) { | 682 | else if (i == CO_IRQ_8259) { |
| 697 | irq_desc[i].chip = &piix4_master_irq_type; | 683 | desc->chip = &piix4_master_irq_type; |
| 698 | } | 684 | } |
| 699 | else if (i < CO_IRQ_APIC0) { | 685 | else if (i < CO_IRQ_APIC0) { |
| 700 | irq_desc[i].chip = &piix4_virtual_irq_type; | 686 | desc->chip = &piix4_virtual_irq_type; |
| 701 | } | 687 | } |
| 702 | else if (IS_CO_APIC(i)) { | 688 | else if (IS_CO_APIC(i)) { |
| 703 | irq_desc[i].chip = &cobalt_irq_type; | 689 | desc->chip = &cobalt_irq_type; |
| 704 | } | 690 | } |
| 705 | } | 691 | } |
| 706 | 692 | ||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 38f566fa27d2..4eeb5cf9720d 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | #include <asm/io.h> | 46 | #include <asm/io.h> |
| 47 | #include <asm/tlbflush.h> | 47 | #include <asm/tlbflush.h> |
| 48 | #include <asm/irq.h> | 48 | #include <asm/irq.h> |
| 49 | #include <asm/syscalls.h> | ||
| 49 | 50 | ||
| 50 | /* | 51 | /* |
| 51 | * Known problems: | 52 | * Known problems: |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b15346092b7b..8b6c393ab9fd 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <asm/timer.h> | 37 | #include <asm/timer.h> |
| 38 | #include <asm/vmi_time.h> | 38 | #include <asm/vmi_time.h> |
| 39 | #include <asm/kmap_types.h> | 39 | #include <asm/kmap_types.h> |
| 40 | #include <asm/setup.h> | ||
| 40 | 41 | ||
| 41 | /* Convenient for calling VMI functions indirectly in the ROM */ | 42 | /* Convenient for calling VMI functions indirectly in the ROM */ |
| 42 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | 43 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); |
| @@ -234,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | |||
| 234 | const void *desc) | 235 | const void *desc) |
| 235 | { | 236 | { |
| 236 | u32 *ldt_entry = (u32 *)desc; | 237 | u32 *ldt_entry = (u32 *)desc; |
| 237 | vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | 238 | vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); |
| 238 | } | 239 | } |
| 239 | 240 | ||
| 240 | static void vmi_load_sp0(struct tss_struct *tss, | 241 | static void vmi_load_sp0(struct tss_struct *tss, |
| @@ -392,13 +393,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
| 392 | } | 393 | } |
| 393 | #endif | 394 | #endif |
| 394 | 395 | ||
| 395 | static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) | 396 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
| 396 | { | 397 | { |
| 397 | vmi_set_page_type(pfn, VMI_PAGE_L1); | 398 | vmi_set_page_type(pfn, VMI_PAGE_L1); |
| 398 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
| 399 | } | 400 | } |
| 400 | 401 | ||
| 401 | static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) | 402 | static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) |
| 402 | { | 403 | { |
| 403 | /* | 404 | /* |
| 404 | * This call comes in very early, before mem_map is setup. | 405 | * This call comes in very early, before mem_map is setup. |
| @@ -409,20 +410,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) | |||
| 409 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
| 410 | } | 411 | } |
| 411 | 412 | ||
| 412 | static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) | 413 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) |
| 413 | { | 414 | { |
| 414 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | 415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); |
| 415 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | 416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); |
| 416 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
| 417 | } | 418 | } |
| 418 | 419 | ||
| 419 | static void vmi_release_pte(u32 pfn) | 420 | static void vmi_release_pte(unsigned long pfn) |
| 420 | { | 421 | { |
| 421 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
| 422 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
| 423 | } | 424 | } |
| 424 | 425 | ||
| 425 | static void vmi_release_pmd(u32 pfn) | 426 | static void vmi_release_pmd(unsigned long pfn) |
| 426 | { | 427 | { |
| 427 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
| 428 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
| @@ -683,7 +684,7 @@ void vmi_bringup(void) | |||
| 683 | { | 684 | { |
| 684 | /* We must establish the lowmem mapping for MMU ops to work */ | 685 | /* We must establish the lowmem mapping for MMU ops to work */ |
| 685 | if (vmi_ops.set_linear_mapping) | 686 | if (vmi_ops.set_linear_mapping) |
| 686 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); | 687 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); |
| 687 | } | 688 | } |
| 688 | 689 | ||
| 689 | /* | 690 | /* |
| @@ -904,9 +905,8 @@ static inline int __init activate_vmi(void) | |||
| 904 | #endif | 905 | #endif |
| 905 | 906 | ||
| 906 | #ifdef CONFIG_X86_LOCAL_APIC | 907 | #ifdef CONFIG_X86_LOCAL_APIC |
| 907 | para_fill(pv_apic_ops.apic_read, APICRead); | 908 | para_fill(apic_ops->read, APICRead); |
| 908 | para_fill(pv_apic_ops.apic_write, APICWrite); | 909 | para_fill(apic_ops->write, APICWrite); |
| 909 | para_fill(pv_apic_ops.apic_write_atomic, APICWrite); | ||
| 910 | #endif | 910 | #endif |
| 911 | 911 | ||
| 912 | /* | 912 | /* |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 6953859fe289..254ee07f8635 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
| @@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void) | |||
| 235 | 235 | ||
| 236 | void __init vmi_time_init(void) | 236 | void __init vmi_time_init(void) |
| 237 | { | 237 | { |
| 238 | unsigned int cpu; | ||
| 238 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | 239 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ |
| 239 | outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | 240 | outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ |
| 240 | 241 | ||
| 241 | vmi_time_init_clockevent(); | 242 | vmi_time_init_clockevent(); |
| 242 | setup_irq(0, &vmi_clock_action); | 243 | setup_irq(0, &vmi_clock_action); |
| 244 | for_each_possible_cpu(cpu) | ||
| 245 | per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; | ||
| 243 | } | 246 | } |
| 244 | 247 | ||
| 245 | #ifdef CONFIG_X86_LOCAL_APIC | 248 | #ifdef CONFIG_X86_LOCAL_APIC |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index cdb2363697d2..a9b8560adbc2 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
| @@ -140,10 +140,10 @@ SECTIONS | |||
| 140 | *(.con_initcall.init) | 140 | *(.con_initcall.init) |
| 141 | __con_initcall_end = .; | 141 | __con_initcall_end = .; |
| 142 | } | 142 | } |
| 143 | .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { | 143 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
| 144 | __x86cpuvendor_start = .; | 144 | __x86_cpu_dev_start = .; |
| 145 | *(.x86cpuvendor.init) | 145 | *(.x86_cpu_dev.init) |
| 146 | __x86cpuvendor_end = .; | 146 | __x86_cpu_dev_end = .; |
| 147 | } | 147 | } |
| 148 | SECURITY_INIT | 148 | SECURITY_INIT |
| 149 | . = ALIGN(4); | 149 | . = ALIGN(4); |
| @@ -180,6 +180,7 @@ SECTIONS | |||
| 180 | . = ALIGN(PAGE_SIZE); | 180 | . = ALIGN(PAGE_SIZE); |
| 181 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { | 181 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { |
| 182 | __per_cpu_start = .; | 182 | __per_cpu_start = .; |
| 183 | *(.data.percpu.page_aligned) | ||
| 183 | *(.data.percpu) | 184 | *(.data.percpu) |
| 184 | *(.data.percpu.shared_aligned) | 185 | *(.data.percpu.shared_aligned) |
| 185 | __per_cpu_end = .; | 186 | __per_cpu_end = .; |
| @@ -209,3 +210,11 @@ SECTIONS | |||
| 209 | 210 | ||
| 210 | DWARF_DEBUG | 211 | DWARF_DEBUG |
| 211 | } | 212 | } |
| 213 | |||
| 214 | #ifdef CONFIG_KEXEC | ||
| 215 | /* Link time checks */ | ||
| 216 | #include <asm/kexec.h> | ||
| 217 | |||
| 218 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
| 219 | "kexec control code size is too big") | ||
| 220 | #endif | ||
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 63e5c1a22e88..46e05447405b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
| @@ -168,12 +168,11 @@ SECTIONS | |||
| 168 | *(.con_initcall.init) | 168 | *(.con_initcall.init) |
| 169 | } | 169 | } |
| 170 | __con_initcall_end = .; | 170 | __con_initcall_end = .; |
| 171 | . = ALIGN(16); | 171 | __x86_cpu_dev_start = .; |
| 172 | __x86cpuvendor_start = .; | 172 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
| 173 | .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { | 173 | *(.x86_cpu_dev.init) |
| 174 | *(.x86cpuvendor.init) | ||
| 175 | } | 174 | } |
| 176 | __x86cpuvendor_end = .; | 175 | __x86_cpu_dev_end = .; |
| 177 | SECURITY_INIT | 176 | SECURITY_INIT |
| 178 | 177 | ||
| 179 | . = ALIGN(8); | 178 | . = ALIGN(8); |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 0c029e8959c7..7766d36983fc 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
| @@ -61,7 +61,7 @@ static void vsmp_irq_enable(void) | |||
| 61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); | 61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, | 64 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, |
| 65 | unsigned long addr, unsigned len) | 65 | unsigned long addr, unsigned len) |
| 66 | { | 66 | { |
| 67 | switch (type) { | 67 | switch (type) { |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c new file mode 100644 index 000000000000..b13acb75e822 --- /dev/null +++ b/arch/x86/kernel/xsave.c | |||
| @@ -0,0 +1,345 @@ | |||
| 1 | /* | ||
| 2 | * xsave/xrstor support. | ||
| 3 | * | ||
| 4 | * Author: Suresh Siddha <suresh.b.siddha@intel.com> | ||
| 5 | */ | ||
| 6 | #include <linux/bootmem.h> | ||
| 7 | #include <linux/compat.h> | ||
| 8 | #include <asm/i387.h> | ||
| 9 | #ifdef CONFIG_IA32_EMULATION | ||
| 10 | #include <asm/sigcontext32.h> | ||
| 11 | #endif | ||
| 12 | #include <asm/xcr.h> | ||
| 13 | |||
| 14 | /* | ||
| 15 | * Supported feature mask by the CPU and the kernel. | ||
| 16 | */ | ||
| 17 | u64 pcntxt_mask; | ||
| 18 | |||
| 19 | struct _fpx_sw_bytes fx_sw_reserved; | ||
| 20 | #ifdef CONFIG_IA32_EMULATION | ||
| 21 | struct _fpx_sw_bytes fx_sw_reserved_ia32; | ||
| 22 | #endif | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Check for the presence of extended state information in the | ||
| 26 | * user fpstate pointer in the sigcontext. | ||
| 27 | */ | ||
| 28 | int check_for_xstate(struct i387_fxsave_struct __user *buf, | ||
| 29 | void __user *fpstate, | ||
| 30 | struct _fpx_sw_bytes *fx_sw_user) | ||
| 31 | { | ||
| 32 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + | ||
| 33 | sizeof(struct xsave_hdr_struct); | ||
| 34 | unsigned int magic2; | ||
| 35 | int err; | ||
| 36 | |||
| 37 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], | ||
| 38 | sizeof(struct _fpx_sw_bytes)); | ||
| 39 | |||
| 40 | if (err) | ||
| 41 | return err; | ||
| 42 | |||
| 43 | /* | ||
| 44 | * First Magic check failed. | ||
| 45 | */ | ||
| 46 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) | ||
| 47 | return -1; | ||
| 48 | |||
| 49 | /* | ||
| 50 | * Check for error scenarios. | ||
| 51 | */ | ||
| 52 | if (fx_sw_user->xstate_size < min_xstate_size || | ||
| 53 | fx_sw_user->xstate_size > xstate_size || | ||
| 54 | fx_sw_user->xstate_size > fx_sw_user->extended_size) | ||
| 55 | return -1; | ||
| 56 | |||
| 57 | err = __get_user(magic2, (__u32 *) (((void *)fpstate) + | ||
| 58 | fx_sw_user->extended_size - | ||
| 59 | FP_XSTATE_MAGIC2_SIZE)); | ||
| 60 | /* | ||
| 61 | * Check for the presence of second magic word at the end of memory | ||
| 62 | * layout. This detects the case where the user just copied the legacy | ||
| 63 | * fpstate layout with out copying the extended state information | ||
| 64 | * in the memory layout. | ||
| 65 | */ | ||
| 66 | if (err || magic2 != FP_XSTATE_MAGIC2) | ||
| 67 | return -1; | ||
| 68 | |||
| 69 | return 0; | ||
| 70 | } | ||
| 71 | |||
| 72 | #ifdef CONFIG_X86_64 | ||
| 73 | /* | ||
| 74 | * Signal frame handlers. | ||
| 75 | */ | ||
| 76 | |||
| 77 | int save_i387_xstate(void __user *buf) | ||
| 78 | { | ||
| 79 | struct task_struct *tsk = current; | ||
| 80 | int err = 0; | ||
| 81 | |||
| 82 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) | ||
| 83 | return -EACCES; | ||
| 84 | |||
| 85 | BUG_ON(sig_xstate_size < xstate_size); | ||
| 86 | |||
| 87 | if ((unsigned long)buf % 64) | ||
| 88 | printk("save_i387_xstate: bad fpstate %p\n", buf); | ||
| 89 | |||
| 90 | if (!used_math()) | ||
| 91 | return 0; | ||
| 92 | clear_used_math(); /* trigger finit */ | ||
| 93 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
| 94 | /* | ||
| 95 | * Start with clearing the user buffer. This will present a | ||
| 96 | * clean context for the bytes not touched by the fxsave/xsave. | ||
| 97 | */ | ||
| 98 | err = __clear_user(buf, sig_xstate_size); | ||
| 99 | if (err) | ||
| 100 | return err; | ||
| 101 | |||
| 102 | if (task_thread_info(tsk)->status & TS_XSAVE) | ||
| 103 | err = xsave_user(buf); | ||
| 104 | else | ||
| 105 | err = fxsave_user(buf); | ||
| 106 | |||
| 107 | if (err) | ||
| 108 | return err; | ||
| 109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
| 110 | stts(); | ||
| 111 | } else { | ||
| 112 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | ||
| 113 | xstate_size)) | ||
| 114 | return -1; | ||
| 115 | } | ||
| 116 | |||
| 117 | if (task_thread_info(tsk)->status & TS_XSAVE) { | ||
| 118 | struct _fpstate __user *fx = buf; | ||
| 119 | struct _xstate __user *x = buf; | ||
| 120 | u64 xstate_bv; | ||
| 121 | |||
| 122 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, | ||
| 123 | sizeof(struct _fpx_sw_bytes)); | ||
| 124 | |||
| 125 | err |= __put_user(FP_XSTATE_MAGIC2, | ||
| 126 | (__u32 __user *) (buf + sig_xstate_size | ||
| 127 | - FP_XSTATE_MAGIC2_SIZE)); | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Read the xstate_bv which we copied (directly from the cpu or | ||
| 131 | * from the state in task struct) to the user buffers and | ||
| 132 | * set the FP/SSE bits. | ||
| 133 | */ | ||
| 134 | err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); | ||
| 135 | |||
| 136 | /* | ||
| 137 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
| 138 | * vector while saving the state to the user context. This will | ||
| 139 | * enable us capturing any changes(during sigreturn) to | ||
| 140 | * the FP/SSE bits by the legacy applications which don't touch | ||
| 141 | * xstate_bv in the xsave header. | ||
| 142 | * | ||
| 143 | * xsave aware apps can change the xstate_bv in the xsave | ||
| 144 | * header as well as change any contents in the memory layout. | ||
| 145 | * xrestore as part of sigreturn will capture all the changes. | ||
| 146 | */ | ||
| 147 | xstate_bv |= XSTATE_FPSSE; | ||
| 148 | |||
| 149 | err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); | ||
| 150 | |||
| 151 | if (err) | ||
| 152 | return err; | ||
| 153 | } | ||
| 154 | |||
| 155 | return 1; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* | ||
| 159 | * Restore the extended state if present. Otherwise, restore the FP/SSE | ||
| 160 | * state. | ||
| 161 | */ | ||
| 162 | int restore_user_xstate(void __user *buf) | ||
| 163 | { | ||
| 164 | struct _fpx_sw_bytes fx_sw_user; | ||
| 165 | u64 mask; | ||
| 166 | int err; | ||
| 167 | |||
| 168 | if (((unsigned long)buf % 64) || | ||
| 169 | check_for_xstate(buf, buf, &fx_sw_user)) | ||
| 170 | goto fx_only; | ||
| 171 | |||
| 172 | mask = fx_sw_user.xstate_bv; | ||
| 173 | |||
| 174 | /* | ||
| 175 | * restore the state passed by the user. | ||
| 176 | */ | ||
| 177 | err = xrestore_user(buf, mask); | ||
| 178 | if (err) | ||
| 179 | return err; | ||
| 180 | |||
| 181 | /* | ||
| 182 | * init the state skipped by the user. | ||
| 183 | */ | ||
| 184 | mask = pcntxt_mask & ~mask; | ||
| 185 | |||
| 186 | xrstor_state(init_xstate_buf, mask); | ||
| 187 | |||
| 188 | return 0; | ||
| 189 | |||
| 190 | fx_only: | ||
| 191 | /* | ||
| 192 | * couldn't find the extended state information in the | ||
| 193 | * memory layout. Restore just the FP/SSE and init all | ||
| 194 | * the other extended state. | ||
| 195 | */ | ||
| 196 | xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); | ||
| 197 | return fxrstor_checking((__force struct i387_fxsave_struct *)buf); | ||
| 198 | } | ||
| 199 | |||
| 200 | /* | ||
| 201 | * This restores directly out of user space. Exceptions are handled. | ||
| 202 | */ | ||
| 203 | int restore_i387_xstate(void __user *buf) | ||
| 204 | { | ||
| 205 | struct task_struct *tsk = current; | ||
| 206 | int err = 0; | ||
| 207 | |||
| 208 | if (!buf) { | ||
| 209 | if (used_math()) | ||
| 210 | goto clear; | ||
| 211 | return 0; | ||
| 212 | } else | ||
| 213 | if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) | ||
| 214 | return -EACCES; | ||
| 215 | |||
| 216 | if (!used_math()) { | ||
| 217 | err = init_fpu(tsk); | ||
| 218 | if (err) | ||
| 219 | return err; | ||
| 220 | } | ||
| 221 | |||
| 222 | if (!(task_thread_info(current)->status & TS_USEDFPU)) { | ||
| 223 | clts(); | ||
| 224 | task_thread_info(current)->status |= TS_USEDFPU; | ||
| 225 | } | ||
| 226 | if (task_thread_info(tsk)->status & TS_XSAVE) | ||
| 227 | err = restore_user_xstate(buf); | ||
| 228 | else | ||
| 229 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | ||
| 230 | buf); | ||
| 231 | if (unlikely(err)) { | ||
| 232 | /* | ||
| 233 | * Encountered an error while doing the restore from the | ||
| 234 | * user buffer, clear the fpu state. | ||
| 235 | */ | ||
| 236 | clear: | ||
| 237 | clear_fpu(tsk); | ||
| 238 | clear_used_math(); | ||
| 239 | } | ||
| 240 | return err; | ||
| 241 | } | ||
| 242 | #endif | ||
| 243 | |||
| 244 | /* | ||
| 245 | * Prepare the SW reserved portion of the fxsave memory layout, indicating | ||
| 246 | * the presence of the extended state information in the memory layout | ||
| 247 | * pointed by the fpstate pointer in the sigcontext. | ||
| 248 | * This will be saved when ever the FP and extended state context is | ||
| 249 | * saved on the user stack during the signal handler delivery to the user. | ||
| 250 | */ | ||
| 251 | static void prepare_fx_sw_frame(void) | ||
| 252 | { | ||
| 253 | int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + | ||
| 254 | FP_XSTATE_MAGIC2_SIZE; | ||
| 255 | |||
| 256 | sig_xstate_size = sizeof(struct _fpstate) + size_extended; | ||
| 257 | |||
| 258 | #ifdef CONFIG_IA32_EMULATION | ||
| 259 | sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; | ||
| 260 | #endif | ||
| 261 | |||
| 262 | memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); | ||
| 263 | |||
| 264 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | ||
| 265 | fx_sw_reserved.extended_size = sig_xstate_size; | ||
| 266 | fx_sw_reserved.xstate_bv = pcntxt_mask; | ||
| 267 | fx_sw_reserved.xstate_size = xstate_size; | ||
| 268 | #ifdef CONFIG_IA32_EMULATION | ||
| 269 | memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, | ||
| 270 | sizeof(struct _fpx_sw_bytes)); | ||
| 271 | fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; | ||
| 272 | #endif | ||
| 273 | } | ||
| 274 | |||
| 275 | /* | ||
| 276 | * Represents init state for the supported extended state. | ||
| 277 | */ | ||
| 278 | struct xsave_struct *init_xstate_buf; | ||
| 279 | |||
| 280 | #ifdef CONFIG_X86_64 | ||
| 281 | unsigned int sig_xstate_size = sizeof(struct _fpstate); | ||
| 282 | #endif | ||
| 283 | |||
| 284 | /* | ||
| 285 | * Enable the extended processor state save/restore feature | ||
| 286 | */ | ||
| 287 | void __cpuinit xsave_init(void) | ||
| 288 | { | ||
| 289 | if (!cpu_has_xsave) | ||
| 290 | return; | ||
| 291 | |||
| 292 | set_in_cr4(X86_CR4_OSXSAVE); | ||
| 293 | |||
| 294 | /* | ||
| 295 | * Enable all the features that the HW is capable of | ||
| 296 | * and the Linux kernel is aware of. | ||
| 297 | */ | ||
| 298 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | ||
| 299 | } | ||
| 300 | |||
| 301 | /* | ||
| 302 | * setup the xstate image representing the init state | ||
| 303 | */ | ||
| 304 | static void __init setup_xstate_init(void) | ||
| 305 | { | ||
| 306 | init_xstate_buf = alloc_bootmem(xstate_size); | ||
| 307 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | ||
| 308 | } | ||
| 309 | |||
| 310 | /* | ||
| 311 | * Enable and initialize the xsave feature. | ||
| 312 | */ | ||
| 313 | void __init xsave_cntxt_init(void) | ||
| 314 | { | ||
| 315 | unsigned int eax, ebx, ecx, edx; | ||
| 316 | |||
| 317 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); | ||
| 318 | pcntxt_mask = eax + ((u64)edx << 32); | ||
| 319 | |||
| 320 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { | ||
| 321 | printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", | ||
| 322 | pcntxt_mask); | ||
| 323 | BUG(); | ||
| 324 | } | ||
| 325 | |||
| 326 | /* | ||
| 327 | * for now OS knows only about FP/SSE | ||
| 328 | */ | ||
| 329 | pcntxt_mask = pcntxt_mask & XCNTXT_MASK; | ||
| 330 | xsave_init(); | ||
| 331 | |||
| 332 | /* | ||
| 333 | * Recompute the context size for enabled features | ||
| 334 | */ | ||
| 335 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); | ||
| 336 | xstate_size = ebx; | ||
| 337 | |||
| 338 | prepare_fx_sw_frame(); | ||
| 339 | |||
| 340 | setup_xstate_init(); | ||
| 341 | |||
| 342 | printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " | ||
| 343 | "cntxt size 0x%x\n", | ||
| 344 | pcntxt_mask, xstate_size); | ||
| 345 | } | ||
