From ae9b9403644f3ecc76867af042e7e1cfd5c099d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 30 Oct 2008 17:43:57 +0100 Subject: AMD IOMMU: fix detection of NP capable IOMMUs This patch changes the code to use IOMMU_CAP_NPCACHE as a shift and not as a mask. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3b346c6f5514..38e88d40ab10 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { - return iommu->cap & IOMMU_CAP_NPCACHE; + return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); } /**************************************************************************** -- cgit v1.2.2 From 1f98757776eafe31065be9118db6051afcf8643c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 1 Nov 2008 10:17:22 -0700 Subject: x86: Clean up late e820 resource allocation This makes the late e820 resources use 'insert_resource_expand_to_fit()' instead of doing a 'reserve_region_with_split()', and also avoids marking them as IORESOURCE_BUSY. This results in us being perfectly happy to use pre-existing PCI resources even if they were marked as being in a reserved region, while still avoiding any _new_ allocations in the reserved regions. It also makes for a simpler and more accurate resource tree. Example resource allocation from Jonathan Corbet, who has firmware that has an e820 reserved entry that covered a big range (e0000000-fed003ff), and that had various PCI resources in it set up by firmware. With old kernels, the reserved range would force us to re-allocate all pre-existing PCI resources, and his reserved range would end up looking like this: e0000000-fed003ff : reserved fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 where only the pre-allocated special regions (IOAPIC and HPET) were kept around. With 2.6.28-rc2, which uses 'reserve_region_with_split()', Jonathan's resource tree looked like this: e0000000-fe7fffff : reserved fe800000-fe8fffff : PCI Bus 0000:01 fe800000-fe8fffff : reserved fe900000-fe9d9aff : reserved fe9d9b00-fe9d9bff : 0000:00:1f.3 fe9d9b00-fe9d9bff : reserved fe9d9c00-fe9d9fff : 0000:00:1a.7 fe9d9c00-fe9d9fff : reserved fe9da000-fe9dafff : 0000:00:03.3 fe9da000-fe9dafff : reserved fe9db000-fe9dbfff : 0000:00:19.0 fe9db000-fe9dbfff : reserved fe9dc000-fe9dffff : 0000:00:1b.0 fe9dc000-fe9dffff : reserved fe9e0000-fe9fffff : 0000:00:19.0 fe9e0000-fe9fffff : reserved fea00000-fea7ffff : 0000:00:02.0 fea00000-fea7ffff : reserved fea80000-feafffff : 0000:00:02.1 fea80000-feafffff : reserved feb00000-febfffff : 0000:00:02.0 feb00000-febfffff : reserved fec00000-fed003ff : reserved fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 and because the reserved entry had been split and moved into the individual resources, and because it used the IORESOURCE_BUSY flag, the drivers that actually wanted to _use_ those resources couldn't actually attach to them: e1000e 0000:00:19.0: BAR 0: can't reserve mem region [0xfe9e0000-0xfe9fffff] HDA Intel 0000:00:1b.0: BAR 0: can't reserve mem region [0xfe9dc000-0xfe9dffff] with this patch, the resource tree instead becomes e0000000-fed003ff : reserved fe800000-fe8fffff : PCI Bus 0000:01 fe9d9b00-fe9d9bff : 0000:00:1f.3 fe9d9c00-fe9d9fff : 0000:00:1a.7 fe9d9c00-fe9d9fff : ehci_hcd fe9da000-fe9dafff : 0000:00:03.3 fe9db000-fe9dbfff : 0000:00:19.0 fe9db000-fe9dbfff : e1000e fe9dc000-fe9dffff : 0000:00:1b.0 fe9dc000-fe9dffff : ICH HD audio fe9e0000-fe9fffff : 0000:00:19.0 fe9e0000-fe9fffff : e1000e fea00000-fea7ffff : 0000:00:02.0 fea80000-feafffff : 0000:00:02.1 feb00000-febfffff : 0000:00:02.0 fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 ie the one reserved region now ends up surrounding all the PCI resources that were allocated inside of it by firmware, and because it is not marked BUSY, drivers have no problem attaching to the pre-allocated resources. Reported-and-tested-by: Jonathan Corbet Cc: Yinghai Lu Cc: Ingo Molnar Cc: Robert Hancock Signed-off-by: Linus Torvalds --- arch/x86/kernel/e820.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ce97bf3bed12..7aafeb5263ef 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1290,15 +1290,17 @@ void __init e820_reserve_resources(void) res->start = e820.map[i].addr; res->end = end; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM; /* * don't register the region that could be conflicted with * pci device BAR resource and insert them later in * pcibios_resource_survey() */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { + res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); + } res++; } @@ -1318,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); + insert_resource_expand_to_fit(&iomem_resource, res); res++; } } -- cgit v1.2.2 From 70de9a97049e0ba79dc040868564408d5ce697f9 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 3 Nov 2008 11:18:47 -0800 Subject: x86: don't use tsc_khz to calculate lpj if notsc is passed Impact: fix udelay when "notsc" boot parameter is passed With notsc passed on commandline, tsc may not be used for udelays, make sure that we do not use tsc_khz to calculate the lpj value in such cases. Reported-by: Bartlomiej Zolnierkiewicz Signed-off-by: Alok N Kataria Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 62348e4fd8d1..2ef80e301925 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -813,10 +813,6 @@ void __init tsc_init(void) cpu_khz = calibrate_cpu(); #endif - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); @@ -836,6 +832,10 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + use_tsc_delay(); /* Check and install the TSC clocksource */ dmi_check_system(bad_tsc_dmi_table); -- cgit v1.2.2 From c78d0cf2925bffae8a6f00e7d9b8e971b0392edd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 5 Nov 2008 12:04:46 +0000 Subject: x86: don't allow nr_irqs > NR_IRQS Impact: fix boot hang on 32-bit systems with more than 224 IO-APIC pins On some 32-bit systems with a lot of IO-APICs probe_nr_irqs() can return a value larger than NR_IRQS. This will lead to probe_irq_on() overrunning the irq_desc array. I hit this when running net-next-2.6 (close to 2.6.28-rc3) on a Supermicro dual Xeon system. NR_IRQS is 224 but probe_nr_irqs() detects 5 IOAPICs and returns 240. Here are the log messages: Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0]) Tue Nov 4 16:53:47 2008 IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x02] address[0xfec81000] gsi_base[24]) Tue Nov 4 16:53:47 2008 IOAPIC[1]: apic_id 2, version 32, address 0xfec81000, GSI 24-47 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x03] address[0xfec81400] gsi_base[48]) Tue Nov 4 16:53:47 2008 IOAPIC[2]: apic_id 3, version 32, address 0xfec81400, GSI 48-71 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x04] address[0xfec82000] gsi_base[72]) Tue Nov 4 16:53:47 2008 IOAPIC[3]: apic_id 4, version 32, address 0xfec82000, GSI 72-95 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x05] address[0xfec82400] gsi_base[96]) Tue Nov 4 16:53:47 2008 IOAPIC[4]: apic_id 5, version 32, address 0xfec82400, GSI 96-119 Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge) Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level) Tue Nov 4 16:53:47 2008 Enabling APIC mode: Flat. Using 5 I/O APICs Signed-off-by: Ben Hutchings Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index b764d7429c61..7a3f2028e2eb 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3611,6 +3611,8 @@ int __init probe_nr_irqs(void) /* something wrong ? */ if (nr < nr_min) nr = nr_min; + if (WARN_ON(nr > NR_IRQS)) + nr = NR_IRQS; return nr; } -- cgit v1.2.2 From d6f0f39b7d05e62b347c4352d070e4afb3ade4b5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 4 Nov 2008 13:53:04 -0800 Subject: x86: add smp_mb() before sending INVALIDATE_TLB_VECTOR Impact: fix rare x2apic hang On x86, x2apic mode accesses for sending IPI's don't have serializing semantics. If the IPI receivner refers(in lock-free fashion) to some memory setup by the sender, the need for smp_mb() before sending the IPI becomes critical in x2apic mode. Add the smp_mb() in native_flush_tlb_others() before sending the IPI. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_32.c | 6 ++++++ arch/x86/kernel/tlb_64.c | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e00534b33534..f4049f3513b6 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, flush_mm = mm; flush_va = va; cpus_or(flush_cpumask, cpumask, flush_cpumask); + + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index dcbf7a1159ea..8f919ca69494 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -182,6 +182,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_va = va; cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. -- cgit v1.2.2 From 80be308dfa3798c7bad0fc81760b2faf83870e91 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 6 Nov 2008 14:59:05 +0100 Subject: AMD IOMMU: fix lazy IO/TLB flushing in unmap path Lazy flushing needs to take care of the unmap path too which is not yet implemented and leads to stale IO/TLB entries. This is fixed by this patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 38e88d40ab10..4755bbc7ae5b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -526,6 +526,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); + + if (address + pages >= dom->next_bit) + dom->need_flush = true; } /**************************************************************************** @@ -981,8 +984,10 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (amd_iommu_unmap_flush) + if (amd_iommu_unmap_flush || dma_dom->need_flush) { iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + dma_dom->need_flush = false; + } } /* -- cgit v1.2.2 From 8d00450d296dedec9ada38d43b83e79cca6fd5a3 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 4 Nov 2008 12:52:44 -0200 Subject: Revert "x86: default to reboot via ACPI" This reverts commit c7ffa6c26277b403920e2255d10df849bd613380. the assumptio of this change was that this would not break any existing machine. Andrey Borzenkov reported troubles with the ACPI reboot method: the system would hang on reboot, necessiating a power cycle. Probably more systems are affected as well. Also, there are patches queued up for v2.6.29 to disable virtualization on emergency_restart() - which was the original motivation of this change. Reported-by: Andrey Borzenkov Bisected-by: Andrey Borzenkov Signed-off-by: Eduardo Habkost Acked-by: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f4c93f1cfc19..724adfc63cb9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -/* - * Keyboard reset and triple fault may result in INIT, not RESET, which - * doesn't work when we're in vmx root mode. Try ACPI first. - */ -enum reboot_type reboot_type = BOOT_ACPI; +enum reboot_type reboot_type = BOOT_KBD; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) -- cgit v1.2.2 From 7cbaef9c83e58bbd4bdd534b09052b6c5ec457d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 8 Nov 2008 17:05:38 +0100 Subject: sched: optimize sched_clock() a bit sched_clock() uses cycles_2_ns() needlessly - which is an irq-disabling variant of __cycles_2_ns(). Most of the time sched_clock() is called with irqs disabled already. The few places that call it with irqs enabled need to be updated. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2ef80e301925..424093b157d3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -55,7 +55,7 @@ u64 native_sched_clock(void) rdtscll(this_offset); /* return the value in ns */ - return cycles_2_ns(this_offset); + return __cycles_2_ns(this_offset); } /* We need to define a real function for sched_clock, to override the -- cgit v1.2.2 From 6c2e94033df5ca11149e52dd179b8dde3172e9bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Nov 2008 12:33:49 +0100 Subject: x86: apic honour irq affinity which was set in early boot setup_ioapic_dest() is called after the non boot cpus have been brought up. It sets the irq affinity of all already configured interrupts to all cpus and ignores affinity settings which were done by the early bootup code. If the IRQ_NO_BALANCING or IRQ_AFFINITY_SET flags are set then use the affinity mask from the irq descriptor and not TARGET_CPUS. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7a3f2028e2eb..988ee89467d3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3761,7 +3761,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_desc *desc; struct irq_cfg *cfg; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -3778,16 +3780,30 @@ void __init setup_ioapic_dest(void) * cpu is online. */ cfg = irq_cfg(irq); - if (!cfg->vector) + if (!cfg->vector) { setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); + continue; + + } + + /* + * Honour affinities which have been set in early boot + */ + desc = irq_to_desc(irq); + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = TARGET_CPUS; + #ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, mask); else - set_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + set_ioapic_affinity_irq(irq, mask); } } -- cgit v1.2.2 From 1de5b0854623d30d01d72cd4ea323eb5f39d1f16 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 16:04:18 +0000 Subject: x86: HPET: convert WARN_ON to WARN_ON_ONCE It is possible to flood the console with call traces if the WARN_ON condition is true because of the frequency with which this function is called. Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 77017e834cf7..f10f9461a43d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); + WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt); return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.2 From 89d77a1eb60be916d85d9394bedbfa2037af89c5 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 16:04:20 +0000 Subject: x86: HPET: read from HPET_Tn_CMP() not HPET_T0_CMP In hpet_next_event() we check that the value we just wrote to HPET_Tn_CMP(timer) has reached the chip. Currently, we're checking that the value we wrote to HPET_Tn_CMP(timer) is in HPET_T0_CMP, which, if timer is anything other than timer 0, is likely to fail. Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f10f9461a43d..cfe6aa56f71b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt); + WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.2 From 5ceb1a04187553e08c6ab60d30cee7c454ee139a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 2 Nov 2008 22:23:13 +0000 Subject: x86: HPET: enter hpet_interrupt_handler with interrupts disabled Some functions that may be called from this handler require that interrupts are disabled. Also, combining IRQF_DISABLED and IRQF_SHARED does not reliably disable interrupts in a handler, so remove IRQF_SHARED from the irq flags (this irq is not shared anyway). Signed-off-by: Matt Fleming Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Cc: "Will Newton" Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cfe6aa56f71b..067d8de913f6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) { if (request_irq(dev->irq, hpet_interrupt_handler, - IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) + IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) return -1; disable_irq(dev->irq); -- cgit v1.2.2 From a29a2af378f3f6362b68e126e2541c8bde885ead Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Wed, 29 Oct 2008 14:13:39 -0700 Subject: x86: KVM guest: fix section mismatch warning in kvmclock.c WARNING: arch/x86/kernel/built-in.o(.text+0x1722c): Section mismatch in reference from the function kvm_setup_secondary_clock() to the function .devinit.text:setup_secondary_APIC_clock() The function kvm_setup_secondary_clock() references the function __devinit setup_secondary_APIC_clock(). This is often because kvm_setup_secondary_clock lacks a __devinit annotation or the annotation of setup_secondary_APIC_clock is wrong. Signed-off-by: Md.Rakib H. Mullick Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 774ac4991568..1c9cc431ea4f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void kvm_setup_secondary_clock(void) +static void __devinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, -- cgit v1.2.2 From 32836259ff25ce97010569706cd33ba94de81d62 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Nov 2008 16:17:52 -0700 Subject: ACPI: pci_link: remove acpi_irq_balance_set() interface This removes the acpi_irq_balance_set() interface from the PCI interrupt link driver. x86 used acpi_irq_balance_set() to tell the PCI interrupt link driver to configure links to minimize IRQ sharing. But the link driver can easily figure out whether to turn on IRQ balancing based on the IRQ model (PIC/IOAPIC/etc), so we can get rid of that external interface. It's better for the driver to figure this out at init-time. If we set it externally via the x86 code, the interface reduces modularity, and we depend on the fact that acpi_process_madt() happens before we process the kernel command line. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c1f76abae9e..4c51a2f8fd31 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void) error = acpi_parse_madt_ioapic_entries(); if (!error) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - acpi_irq_balance_set(NULL); acpi_ioapic = 1; smp_found_config = 1; -- cgit v1.2.2 From 52168e60f7d86d83124903098ac8c2dba93cd1c4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 14 Nov 2008 13:47:31 +0000 Subject: Revert "x86: blacklist DMAR on Intel G31/G33 chipsets" This reverts commit e51af6630848406fc97adbd71443818cdcda297b, which was wrongly hoovered up and submitted about a month after a better fix had already been merged. The better fix is commit cbda1ba898647aeb4ee770b803c922f595e97731 ("PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets"), where we do this blacklisting based on the DMI identification for the offending motherboard, since sometimes this chipset (or at least a chipset with the same PCI ID) apparently _does_ actually have an IOMMU. Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- arch/x86/kernel/early-quirks.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 3ce029ffaa55..1b894b72c0f5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif -#ifdef CONFIG_DMAR -static void __init intel_g33_dmar(int num, int slot, int func) -{ - struct acpi_table_header *dmar_tbl; - acpi_status status; - - status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); - if (ACPI_SUCCESS(status)) { - printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); - dmar_disabled = 1; - } -} -#endif - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, -#ifdef CONFIG_DMAR - { PCI_VENDOR_ID_INTEL, 0x29c0, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, -#endif {} }; -- cgit v1.2.2 From d1f1e9c01006b4b050e090055c75278f80c2a5c5 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Sat, 15 Nov 2008 11:00:17 +0100 Subject: x86, bts: fix unlock problem in ds.c Fix a problem where ds_request() returned an error without releasing the ds lock. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 2b69994fd3a8..ac1d5b0586ba 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -384,8 +384,9 @@ static int ds_request(struct task_struct *task, void *base, size_t size, spin_lock(&ds_lock); + error = -EPERM; if (!check_tracer(task)) - return -EPERM; + goto out_unlock; error = -ENOMEM; context = ds_alloc_context(task); -- cgit v1.2.2 From d3c6aa1e69f705ac3ab64584101b1d38435b1353 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 16 Nov 2008 00:49:31 -0800 Subject: x86: fix es7000 compiling Impact: fix es7000 build CC arch/x86/kernel/es7000_32.o arch/x86/kernel/es7000_32.c: In function find_unisys_acpi_oem_table: arch/x86/kernel/es7000_32.c:255: error: implicit declaration of function acpi_get_table_with_size arch/x86/kernel/es7000_32.c:261: error: implicit declaration of function early_acpi_os_unmap_memory arch/x86/kernel/es7000_32.c: In function unmap_unisys_acpi_oem_table: arch/x86/kernel/es7000_32.c:277: error: implicit declaration of function __acpi_unmap_table make[1]: *** [arch/x86/kernel/es7000_32.o] Error 1 we applied one patch out of order... | commit a73aaedd95703bd49f4c3f9df06fb7b7373ba905 | Author: Yinghai Lu | Date: Sun Sep 14 02:33:14 2008 -0700 | | x86: check dsdt before find oem table for es7000, v2 | | v2: use __acpi_unmap_table() that patch need: x86: use early_ioremap in __acpi_map_table x86: always explicitly map acpi memory acpi: remove final __acpi_map_table mapping before setting acpi_gbl_permanent_mmap acpi/x86: introduce __apci_map_table, v4 submitted to the ACPI tree but not upstream yet. fix it until those patches applied, need to revert this one Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index f454c78fcef6..0aa2c443d600 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -250,31 +250,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; int i = 0; - acpi_size tbl_size; - while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { struct oem_table *t = (struct oem_table *)header; oem_addrX = t->OEMTableAddr; oem_size = t->OEMTableSize; - early_acpi_os_unmap_memory(header, tbl_size); *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); return 0; } - early_acpi_os_unmap_memory(header, tbl_size); } return -1; } void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { - if (!oem_addr) - return; - - __acpi_unmap_table((char *)oem_addr, oem_size); } #endif -- cgit v1.2.2 From 93ce99e849433ede4ce8b410b749dc0cad1100b2 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Mon, 17 Nov 2008 14:43:58 -0800 Subject: x86: add rdtsc barrier to TSC sync check Impact: fix incorrectly marked unstable TSC clock Patch (commit 0d12cdd "sched: improve sched_clock() performance") has a regression on one of the test systems here. With the patch, I see: checking TSC synchronization [CPU#0 -> CPU#1]: Measured 28 cycles TSC warp between CPUs, turning off TSC clock. Marking TSC unstable due to check_tsc_sync_source failed Whereas, without the patch syncs pass fine on all CPUs: checking TSC synchronization [CPU#0 -> CPU#1]: passed. Due to this, TSC is marked unstable, when it is not actually unstable. This is because syncs in check_tsc_wrap() goes away due to this commit. As per the discussion on this thread, correct way to fix this is to add explicit syncs as below? Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_sync.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9ffb01c31c40..1c0dfbca87c1 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void) cycles_t start, now, prev, end; int i; + rdtsc_barrier(); start = get_cycles(); + rdtsc_barrier(); /* * The measurement runs for 20 msecs: */ @@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void) */ __raw_spin_lock(&sync_lock); prev = last_tsc; + rdtsc_barrier(); now = get_cycles(); + rdtsc_barrier(); last_tsc = now; __raw_spin_unlock(&sync_lock); -- cgit v1.2.2 From 10db4ef7b9a65b86e4d047671a1886f4c101a859 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 18 Nov 2008 15:23:08 +0100 Subject: x86, PEBS/DS: fix code flow in ds_request() this compiler warning: arch/x86/kernel/ds.c: In function 'ds_request': arch/x86/kernel/ds.c:368: warning: 'context' may be used uninitialized in this function Shows that the code flow in ds_request() is buggy - it goes into the unlock+release-context path even when the context is not allocated yet. First allocate the context, then do the other checks. Also, take care with GFP allocations under the ds_lock spinlock. Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ac1d5b0586ba..d1a121443bde 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -236,17 +236,33 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) struct ds_context *context = *p_context; if (!context) { + spin_unlock(&ds_lock); + context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) + if (!context) { + spin_lock(&ds_lock); return NULL; + } context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); + spin_lock(&ds_lock); return NULL; } + spin_lock(&ds_lock); + /* + * Check for race - another CPU could have allocated + * it meanwhile: + */ + if (*p_context) { + kfree(context->ds); + kfree(context); + return *p_context; + } + *p_context = context; context->this = p_context; @@ -384,15 +400,15 @@ static int ds_request(struct task_struct *task, void *base, size_t size, spin_lock(&ds_lock); - error = -EPERM; - if (!check_tracer(task)) - goto out_unlock; - error = -ENOMEM; context = ds_alloc_context(task); if (!context) goto out_unlock; + error = -EPERM; + if (!check_tracer(task)) + goto out_unlock; + error = -EALREADY; if (context->owner[qual] == current) goto out_unlock; -- cgit v1.2.2 From e5e1f606ecbf67e52ebe2df5d14f8b94ec6544d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:07:17 +0100 Subject: AMD IOMMU: add parameter to disable device isolation Impact: add a new AMD IOMMU kernel command line parameter Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0cdcda35a05f..838a2e1d5bb2 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1213,6 +1213,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; + if (strncmp(str, "share", 5) == 0) + amd_iommu_isolate = 0; if (strncmp(str, "fullflush", 11) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.2 From 3ce1f93c6d53c3f91c3846cf66b018276c8ac2e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:09:20 +0100 Subject: AMD IOMMU: enable device isolation per default Impact: makes device isolation the default for AMD IOMMU Some device drivers showed double-free bugs of DMA memory while testing them with AMD IOMMU. If all devices share the same protection domain this can lead to data corruption and data loss. Prevent this by putting each device into its own protection domain per default. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 838a2e1d5bb2..595edd2befc6 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ -int amd_iommu_isolate; /* if 1, device isolation is enabled */ +int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the -- cgit v1.2.2 From 695b5676c727d80921a2dc8737d5b3322222db85 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 15:16:43 +0100 Subject: AMD IOMMU: fix fullflush comparison length Impact: fix comparison length for 'fullflush' Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 595edd2befc6..30ae2701b3df 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1215,7 +1215,7 @@ static int __init parse_amd_iommu_options(char *str) amd_iommu_isolate = 1; if (strncmp(str, "share", 5) == 0) amd_iommu_isolate = 0; - if (strncmp(str, "fullflush", 11) == 0) + if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.2 From 8501c45cc32c311ae755a2d5ac8c4a5f04908d42 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Nov 2008 19:11:46 +0100 Subject: AMD IOMMU: check for next_bit also in unmapped area Impact: fix possible use of stale IO/TLB entries Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 331b318304eb..e4899e0e8787 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -537,7 +537,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); - if (address + pages >= dom->next_bit) + if (address >= dom->next_bit) dom->need_flush = true; } -- cgit v1.2.2 From 0af40a4b1050c050e62eb1dc30b82d5ab22bf221 Mon Sep 17 00:00:00 2001 From: Philipp Kohlbecher Date: Sun, 16 Nov 2008 12:11:01 +0100 Subject: x86: more general identifier for Phoenix BIOS Impact: widen the reach of the low-memory-protect DMI quirk Phoenix BIOSes variously identify their vendor as "Phoenix Technologies, LTD" or "Phoenix Technologies LTD" (without the comma.) This patch makes the identification string in the bad_bios_dmi_table more general (following a suggestion by Ingo Molnar), so that both versions are handled. Again, the patched file compiles cleanly and the patch has been tested successfully on my machine. Signed-off-by: Philipp Kohlbecher Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa6790c1dd3..9d5674f7b6cc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -764,7 +764,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { .callback = dmi_low_memory_corruption, .ident = "Phoenix BIOS", .matches = { - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), }, }, #endif -- cgit v1.2.2 From 093bac154c142fa1fb31a3ac69ae1bc08930231b Mon Sep 17 00:00:00 2001 From: Steve Conklin Date: Fri, 14 Nov 2008 00:55:51 -0600 Subject: x86: quirk for reboot stalls on a Dell Optiplex 330 Dell Optiplex 330 appears to hang on reboot. This is resolved by adding a quirk to set bios reboot. Signed-off-by: Leann Ogasawara Signed-off-by: Steve Conklin Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc63cb9..cc5a2545dd41 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -169,6 +169,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0KW626"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 330", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"), + DMI_MATCH(DMI_BOARD_NAME, "0KP561"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", -- cgit v1.2.2 From 9bc646f163b136684390081262fab0fd8f5343ca Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 20 Nov 2008 19:08:45 +0600 Subject: x86: fix __cpuinit/__init tangle in init_thread_xstate() Impact: fix incorrect __init annotation This patch removes the following section mismatch warning. A patch set was send previously (http://lkml.org/lkml/2008/11/10/407). But introduce some other problem, reported by Rufus (http://lkml.org/lkml/2008/11/11/46). Then Ingo Molnar suggest that, it's best to remove __init from xsave_cntxt_init(void). Which is the second patch in this series. Now, this one removes the following warning. WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x2237): Section mismatch in reference from the function cpu_init() to the function .init.text:init_thread_xstate() The function __cpuinit cpu_init() references a function __init init_thread_xstate(). If init_thread_xstate is only used by cpu_init then annotate init_thread_xstate with a matching annotation. Signed-off-by: Rakib Mullick Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f20608d4ca8..b0f61f0dcd0a 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void) stts(); } -void __init init_thread_xstate(void) +void __cpuinit init_thread_xstate(void) { if (!HAVE_HWFP) { xstate_size = sizeof(struct i387_soft_struct); -- cgit v1.2.2 From bfe085f62f98a49e1b864e4950389c7205174e4f Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Thu, 20 Nov 2008 19:12:50 +0600 Subject: x86: fixing __cpuinit/__init tangle, xsave_cntxt_init() Annotate xsave_cntxt_init() as "can be called outside of __init". Signed-off-by: Rakib Mullick Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index b13acb75e822..15c3e6999182 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -310,7 +310,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -void __init xsave_cntxt_init(void) +void __ref xsave_cntxt_init(void) { unsigned int eax, ebx, ecx, edx; -- cgit v1.2.2 From 0ca4b6b00113b064c080d26d803d0d7c80fb5dc8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 20 Nov 2008 14:09:33 -0700 Subject: x86: Fix interrupt leak due to migration When we migrate an interrupt from one CPU to another, we set the move_in_progress flag and clean up the vectors later once they're not being used. If you're unlucky and call destroy_irq() before the vectors become un-used, the move_in_progress flag is never cleared, which causes the interrupt to become unusable. This was discovered by Jesse Brandeburg for whom it manifested as an MSI-X device refusing to use MSI-X mode when the driver was unloaded and reloaded repeatedly. Signed-off-by: Matthew Wilcox Signed-off-by: Linus Torvalds --- arch/x86/kernel/io_apic.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7a3f2028e2eb..c9513e1ff28d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1140,6 +1140,20 @@ static void __clear_irq_vector(int irq) cfg->vector = 0; cpus_clear(cfg->domain); + + if (likely(!cfg->move_in_progress)) + return; + cpus_and(mask, cfg->old_domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] != irq) + continue; + per_cpu(vector_irq, cpu)[vector] = -1; + break; + } + } + cfg->move_in_progress = 0; } void __setup_vector_irq(int cpu) -- cgit v1.2.2 From a1967d64414dab500e86cbbddf8eae6ad2047903 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 21 Nov 2008 11:16:48 -0800 Subject: x86: revert irq number limitation Impact: fix MSIx not enough irq numbers available regression The manual revert of the sparse_irq patches missed to bring the number of possible irqs back to the .27 status. This resulted in a regression when two multichannel network cards were placed in a system with only one IO_APIC - causing the networking driver to not have the right IRQ and the device not coming up. Remove the dynamic allocation logic leftovers and simply return NR_IRQS in probe_nr_irqs() for now. Fixes: http://lkml.org/lkml/2008/11/19/354 Reported-by: Jesper Dangaard Brouer Signed-off-by: Thomas Gleixner Tested-by: Jesper Dangaard Brouer Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index c9513e1ff28d..1fec0f9b1508 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3608,27 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic) int __init probe_nr_irqs(void) { - int idx; - int nr = 0; -#ifndef CONFIG_XEN - int nr_min = 32; -#else - int nr_min = NR_IRQS; -#endif - - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - /* double it for hotplug and msi and nmi */ - nr <<= 1; - - /* something wrong ? */ - if (nr < nr_min) - nr = nr_min; - if (WARN_ON(nr > NR_IRQS)) - nr = NR_IRQS; - - return nr; + return NR_IRQS; } /* -------------------------------------------------------------------------- -- cgit v1.2.2 From eff79aee91dd07e944df65fa448c8baeee7709d8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 25 Nov 2008 14:13:03 +0100 Subject: arch/x86/kernel/pci-calgary_64.c: change simple_strtol to simple_strtoul Impact: fix theoretical option string parsing overflow Since bridge is unsigned, it would seem better to use simple_strtoul that simple_strtol. A simplified version of the semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r2@ long e; position p; @@ e = simple_strtol@p(...) @@ position p != r2.p; type T; T e; @@ e = - simple_strtol@p + simple_strtoul (...) // Signed-off-by: Julia Lawall Cc: muli@il.ibm.com Cc: jdmason@kudzu.us Cc: discuss@x86-64.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e1e731d78f38..d28bbdc35e4e 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p) ++p; if (*p == '\0') break; - bridge = simple_strtol(p, &endp, 0); + bridge = simple_strtoul(p, &endp, 0); if (p == endp) break; -- cgit v1.2.2 From 292c669cd7087a090d6420e223eb1072f3e3c50b Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:45:13 +0100 Subject: x86, bts: exclude ds.c from build when disabled Impact: cleanup Move the CONFIG guard from the .c file into the makefile. Reported-by: Andi Kleen Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/ds.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e489ff9cb3e2..b62a7667828e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o -obj-y += ds.o +obj-$(CONFIG_X86_DS) += ds.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d1a121443bde..4c8d57ec9663 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -21,8 +21,6 @@ */ -#ifdef CONFIG_X86_DS - #include #include @@ -878,4 +876,3 @@ void ds_free(struct ds_context *context) while (leftovers--) ds_put_context(context); } -#endif /* CONFIG_X86_DS */ -- cgit v1.2.2 From c4858ffc8f2dc850cb1f609c679b1ac1ad36ef0c Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:49:06 +0100 Subject: x86, pebs: fix PEBS record size configuration Impact: fix DS hw enablement on 64-bit x86 Fix the PEBS record size in the DS configuration. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 4c8d57ec9663..04e38ef646af 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -816,13 +816,21 @@ static const struct ds_configuration ds_cfg_var = { .sizeof_ds = sizeof(long) * 12, .sizeof_field = sizeof(long), .sizeof_rec[ds_bts] = sizeof(long) * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = sizeof(long) * 10 +#else + .sizeof_rec[ds_pebs] = sizeof(long) * 18 +#endif }; static const struct ds_configuration ds_cfg_64 = { .sizeof_ds = 8 * 12, .sizeof_field = 8, .sizeof_rec[ds_bts] = 8 * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = 8 * 10 +#else + .sizeof_rec[ds_pebs] = 8 * 18 +#endif }; static inline void -- cgit v1.2.2 From de90add30e79261c3b5be68bb0f22d2ef98e8113 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 08:52:56 +0100 Subject: x86, bts: fix wrmsr and spinlock over kmalloc Impact: fix sleeping-with-spinlock-held bugs/crashes - Turn a wrmsr to write the DS_AREA MSR into a wrmsrl. - Use irqsave variants of spinlocks. - Do not allocate memory while holding spinlocks. Reported-by: Stephane Eranian Reported-by: Ingo Molnar Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 77 +++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 04e38ef646af..a2d1176c38ee 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -209,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context); static inline struct ds_context *ds_get_context(struct task_struct *task) { struct ds_context *context; + unsigned long irq; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); context = (task ? task->thread.ds_ctx : this_system_context); if (context) context->count++; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); return context; } @@ -224,55 +225,46 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) /* * Same as ds_get_context, but allocates the context and it's DS * structure, if necessary; returns NULL; if out of memory. - * - * pre: requires ds_lock to be held */ static inline struct ds_context *ds_alloc_context(struct task_struct *task) { struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); struct ds_context *context = *p_context; + unsigned long irq; if (!context) { - spin_unlock(&ds_lock); - context = kzalloc(sizeof(*context), GFP_KERNEL); - - if (!context) { - spin_lock(&ds_lock); + if (!context) return NULL; - } context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - spin_lock(&ds_lock); return NULL; } - spin_lock(&ds_lock); - /* - * Check for race - another CPU could have allocated - * it meanwhile: - */ + spin_lock_irqsave(&ds_lock, irq); + if (*p_context) { kfree(context->ds); kfree(context); - return *p_context; - } - - *p_context = context; - context->this = p_context; - context->task = task; + context = *p_context; + } else { + *p_context = context; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + context->this = p_context; + context->task = task; - if (!task || (task == current)) - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - get_tracer(task); + if (!task || (task == current)) + wrmsrl(MSR_IA32_DS_AREA, + (unsigned long)context->ds); + } + spin_unlock_irqrestore(&ds_lock, irq); } context->count++; @@ -286,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) */ static inline void ds_put_context(struct ds_context *context) { + unsigned long irq; + if (!context) return; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); if (--context->count) goto out; @@ -311,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context) kfree(context->ds); kfree(context); out: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); } @@ -382,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, struct ds_context *context; unsigned long buffer, adj; const unsigned long alignment = (1 << 3); + unsigned long irq; int error = 0; if (!ds_cfg.sizeof_ds) @@ -396,26 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return -EOPNOTSUPP; - spin_lock(&ds_lock); - - error = -ENOMEM; context = ds_alloc_context(task); if (!context) - goto out_unlock; + return -ENOMEM; + + spin_lock_irqsave(&ds_lock, irq); error = -EPERM; if (!check_tracer(task)) goto out_unlock; + get_tracer(task); + error = -EALREADY; if (context->owner[qual] == current) - goto out_unlock; + goto out_put_tracer; error = -EPERM; if (context->owner[qual] != NULL) - goto out_unlock; + goto out_put_tracer; context->owner[qual] = current; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); error = -ENOMEM; @@ -463,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size, out_release: context->owner[qual] = NULL; ds_put_context(context); + put_tracer(task); + return error; + + out_put_tracer: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(context); + put_tracer(task); return error; out_unlock: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); return error; } -- cgit v1.2.2 From a266d9f1253a38ec2d5655ebcd6846298b0554f4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 21 Nov 2008 14:49:25 +0100 Subject: [CPUFREQ] powernow-k8: ignore out-of-range PstateStatus value A workaround for AMD CPU family 11h erratum 311 might cause that the P-state Status Register shows a "current P-state" which is larger than the "current P-state limit" in P-state Current Limit Register. For the wrong P-state value there is no ACPI _PSS object defined and powernow-k8/cpufreq can't determine the proper CPU frequency for that state. As a consequence this can cause a panic during boot (potentially with all recent kernel versions -- at least I have reproduced it with various 2.6.27 kernels and with the current .28 series), as an example: powernow-k8: Found 1 AMD Turion(tm)X2 Ultra DualCore Mobile ZM-82 processors (2 \ ) powernow-k8: 0 : pstate 0 (2200 MHz) powernow-k8: 1 : pstate 1 (1100 MHz) powernow-k8: 2 : pstate 2 (600 MHz) BUG: unable to handle kernel paging request at ffff88086e7528b8 IP: [] cpufreq_stats_update+0x4a/0x5f PGD 202063 PUD 0 Oops: 0002 [#1] SMP last sysfs file: CPU 1 Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.28-rc3-dirty #16 RIP: 0010:[] [] cpufreq_stats_update+0x4a/0\ f Synaptics claims to have extended capabilities, but I'm not able to read them.<6\ 6 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88006e7528c0 RDX: 00000000ffffffff RSI: ffff88006e54af00 RDI: ffffffff808f056c RBP: 00000000fffee697 R08: 0000000000000003 R09: ffff88006e73f080 R10: 0000000000000001 R11: 00000000002191c0 R12: ffff88006fb83c10 R13: 00000000ffffffff R14: 0000000000000001 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006fb50740(0000) knlGS:0000000000000000 Unable to initialize Synaptics hardware. CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: ffff88086e7528b8 CR3: 0000000000201000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 1, threadinfo ffff88006fb82000, task ffff88006fb816d0) Stack: ffff88006e74da50 0000000000000000 ffff88006e54af00 ffffffff804863c7 ffff88006e74da50 0000000000000000 00000000ffffffff 0000000000000000 ffff88006fb83c10 ffffffff8024b46c ffffffff808f0560 ffff88006fb83c10 Call Trace: [] ? cpufreq_stat_notifier_trans+0x51/0x83 [] ? notifier_call_chain+0x29/0x4c [] ? __srcu_notifier_call_chain+0x46/0x61 [] ? cpufreq_notify_transition+0x93/0xa9 [] ? powernowk8_target+0x1e8/0x5f3 [] ? cpufreq_governor_performance+0x1b/0x20 [] ? __cpufreq_governor+0x71/0xa8 [] ? __cpufreq_set_policy+0x101/0x13e [] ? cpufreq_add_dev+0x3f0/0x4cd [] ? handle_update+0x0/0x8 [] ? sysdev_driver_register+0xb6/0x10d [] ? powernowk8_init+0x0/0x7e [] ? cpufreq_register_driver+0x8f/0x140 [] ? _stext+0x56/0x14f [] ? proc_register+0x122/0x17d [] ? create_proc_entry+0x73/0x8a [] ? register_irq_proc+0x92/0xaa [] ? init_irq_proc+0x57/0x69 [] ? kernel_init+0x116/0x169 [] ? child_rip+0xa/0x11 [] ? kernel_init+0x0/0x169 [] ? child_rip+0x0/0x11 Code: 05 c5 83 36 00 48 c7 c2 48 5d 86 80 48 8b 04 d8 48 8b 40 08 48 8b 34 02 48\ RIP [] cpufreq_stats_update+0x4a/0x5f RSP CR2: ffff88086e7528b8 ---[ end trace 0678bac75e67a2f7 ]--- Kernel panic - not syncing: Attempted to kill init! In short, aftereffect of the wrong P-state is that cpufreq_stats_update() uses "-1" as index for some array in cpufreq_stats_update (unsigned int cpu) { ... if (stat->time_in_state) stat->time_in_state[stat->last_index] = cputime64_add(stat->time_in_state[stat->last_index], cputime_sub(cur_time, stat->last_time)); ... } Fortunately, the wrong P-state value is returned only if the core is in P-state 0. This fix solves the problem by detecting the out-of-range P-state, ignoring it, and using "0" instead. Cc: Mark Langsdorf Signed-off-by: Andreas Herrmann Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 18 +++++++++++++++--- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 17 ++++++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d3dcd58b87cd..7f05f44b97e9 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - data->currpstate = i; + if (data->currpstate == HW_PSTATE_INVALID) { + /* read (initial) hw pstate if not yet set */ + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + + /* + * a workaround for family 11h erratum 311 might cause + * an "out-of-range Pstate if the core is in Pstate-0 + */ + if (i >= data->numps) + data->currpstate = HW_PSTATE_0; + else + data->currpstate = i; + } return 0; } do { @@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) } data->cpu = pol->cpu; + data->currpstate = HW_PSTATE_INVALID; if (powernow_k8_cpu_init_acpi(data)) { /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d96..65cfb5d7f77f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -5,6 +5,19 @@ * http://www.gnu.org/licenses/gpl.html */ + +enum pstate { + HW_PSTATE_INVALID = 0xff, + HW_PSTATE_0 = 0, + HW_PSTATE_1 = 1, + HW_PSTATE_2 = 2, + HW_PSTATE_3 = 3, + HW_PSTATE_4 = 4, + HW_PSTATE_5 = 5, + HW_PSTATE_6 = 6, + HW_PSTATE_7 = 7, +}; + struct powernow_k8_data { unsigned int cpu; @@ -23,7 +36,9 @@ struct powernow_k8_data { u32 exttype; /* extended interface = 1 */ /* keep track of the current fid / vid or pstate */ - u32 currvid, currfid, currpstate; + u32 currvid; + u32 currfid; + enum pstate currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. -- cgit v1.2.2 From 2236d252e001ea57d53cec1954f680e503f3b8bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:34 +0000 Subject: enable_IR_x2apic() needs to be __init calls __init, called only from __init Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 04a7f960bbc0..16f94879b525 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1315,7 +1315,7 @@ void enable_x2apic(void) } } -void enable_IR_x2apic(void) +void __init enable_IR_x2apic(void) { #ifdef CONFIG_INTR_REMAP int ret; -- cgit v1.2.2 From 23a14b9e9db49ed5f7683857557c26c874d4abb6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:37:44 +0000 Subject: kvm_setup_secondary_clock() is cpuinit Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1c9cc431ea4f..e169ae9b6a62 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void __devinit kvm_setup_secondary_clock(void) +static void __cpuinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, -- cgit v1.2.2 From 70d7d357578245f1993fd2d3ccd26088bcd38941 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 20:16:03 +0100 Subject: x86: fix broken flushing in GART nofullflush path Impact: remove stale IOTLB entries In the non-default nofullflush case the GART is only flushed when next_bit wraps around. But it can happen that an unmap operation unmaps memory which is behind the current next_bit location. If these addresses are reused it may result in stale GART IO/TLB entries. Fix this by setting the GART next_bit always behind an unmapped location. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a42b02b4df68..ba7ad83e20a8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); iommu_area_free(iommu_gart_bitmap, offset, size); + if (offset >= next_bit) + next_bit = offset + size; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } -- cgit v1.2.2 From f91ba190648be4ff127d6aaf3993ac19d66dc2c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 25 Nov 2008 12:56:12 +0100 Subject: AMD IOMMU: set device table entry for aliased devices In some rare cases a request can arrive an IOMMU with its originial requestor id even it is aliased. Handle this by setting the device table entry to the same protection domain for the original and the aliased requestor id. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e4899e0e8787..a232e5a85d48 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -858,6 +858,9 @@ static int get_device_resources(struct device *dev, print_devid(_bdf, 1); } + if (domain_for_device(_bdf) == NULL) + set_device_domain(*iommu, *domain, _bdf); + return 1; } -- cgit v1.2.2 From 09ee17eb8ea89514c13980c4010bdbbaea8630c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 12:19:27 +0100 Subject: AMD IOMMU: fix possible race while accessing iommu->need_sync The access to the iommu->need_sync member needs to be protected by the iommu->lock. Otherwise this is a possible race condition. Fix it with this patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a232e5a85d48..5662e226b0c9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) spin_lock_irqsave(&iommu->lock, flags); ret = __iommu_queue_command(iommu, cmd); + if (!ret) + iommu->need_sync = 1; spin_unlock_irqrestore(&iommu->lock, flags); return ret; @@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu) cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); - iommu->need_sync = 0; - spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->need_sync) + goto out; + + iommu->need_sync = 0; + ret = __iommu_queue_command(iommu, &cmd); if (ret) @@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -762,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu, write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); iommu_queue_inv_dev_entry(iommu, devid); - - iommu->need_sync = 1; } /***************************************************************************** @@ -1034,8 +1033,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1063,8 +1061,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1130,8 +1127,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1176,8 +1172,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1228,8 +1223,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1260,8 +1254,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v1.2.2 From 9adc13867ec5fe0cd35434f92954d90e42381f0b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Dec 2008 13:33:35 +0100 Subject: x86: fix early panic with boot option "nosmp" Impact: fix boot crash with numcpus=0 on certain systems Fix early exception in __get_smp_config with nosmp. Bail out early when there is no MP table. Reported-by: Wu Fengguang Signed-off-by: Andi Kleen Tested-by: Wu Fengguang Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f98f4e1dba09..0f4c1fd5a1f4 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early) printk(KERN_INFO "Using ACPI for processor (LAPIC) " "configuration information\n"); + if (!mpf) + return; + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -- cgit v1.2.2 From bb9d4ff80bc032d7961815c2ff5eaf458ae3adff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Dec 2008 15:59:48 +0100 Subject: AMD IOMMU: fix iommu_map_page function Impact: bugfix in iommu_map_page function Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5662e226b0c9..67970a8c2ee9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -344,7 +344,7 @@ static int iommu_map(struct protection_domain *dom, u64 __pte, *pte, *page; bus_addr = PAGE_ALIGN(bus_addr); - phys_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(phys_addr); /* only support 512GB address spaces for now */ if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) -- cgit v1.2.2 From 3cc3d84bffbd93bdb671ac7961b12cd98fbb9266 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Dec 2008 16:44:31 +0100 Subject: AMD IOMMU: fix loop counter in free_pagetable function Impact: bugfix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 67970a8c2ee9..0637e65ca0c7 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -600,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) continue; p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++i) { + for (j = 0; j < 512; ++j) { if (!IOMMU_PTE_PRESENT(p2[j])) continue; p3 = IOMMU_PTE_PAGE(p2[j]); -- cgit v1.2.2 From 24f811603e8ef4b9173f47fa263230168e237128 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:25:39 +0100 Subject: AMD IOMMU: fix typo in comment Impact: cleanup Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0637e65ca0c7..2fde073b6be6 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -922,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, /* * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is uses by all - * mapping functions provided by this IOMMU driver. + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, -- cgit v1.2.2 From 8ad909c4c1b91bd35ba6a2af5e7ab3fc8d9fe283 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:37:20 +0100 Subject: AMD IOMMU: fix WARN_ON in dma_ops unmap path Impact: minor fix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2fde073b6be6..3133a0ea09ff 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -910,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, if (address >= dom->aperture_size) return; - WARN_ON(address & 0xfffULL || address > dom->aperture_size); + WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); -- cgit v1.2.2 From b8d9905d025d80a2357e8ce4704fde2923f6a1bd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 14:40:26 +0100 Subject: AMD IOMMU: __unmap_single: check for bad_dma_address instead of 0 Impact: minor fix Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3133a0ea09ff..a7b6dec6fc3f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -983,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + if ((dma_addr == bad_dma_address) || + (dma_addr + size > dma_dom->aperture_size)) return; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); -- cgit v1.2.2 From 087052b02f42b50316c6e4d7f2d8dfba3de6fc2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Oct 2008 16:09:57 +0200 Subject: x86: fix default_spin_lock_flags() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit these warnings: arch/x86/kernel/paravirt-spinlocks.c: In function ‘default_spin_lock_flags’: arch/x86/kernel/paravirt-spinlocks.c:12: warning: passing argument 1 of ‘__raw_spin_lock’ from incompatible pointer type arch/x86/kernel/paravirt-spinlocks.c: At top level: arch/x86/kernel/paravirt-spinlocks.c:11: warning: ‘default_spin_lock_flags’ defined but not used showed that the prototype of default_spin_lock_flags() was confused about what type spinlocks have. the proper type on UP is raw_spinlock_t. Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt-spinlocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 0e9f1982b1dd..95777b0faa73 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,7 +7,8 @@ #include -static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +static inline void +default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } -- cgit v1.2.2