From 33042a9ff4d126ba944b9dc3076665a2029e0a34 Mon Sep 17 00:00:00 2001 From: Chris McDermott Date: Sat, 11 Feb 2006 17:55:50 -0800 Subject: [PATCH] x86-64: Fix HPET timer on x460 [description from AK] The IBM Summit 3 chipset doesn't implement the HPET timer replacement option. Since the current Linux code relies on it use a mixed mode with both PIT for the interrupt and HPET counters for the time keeping. That was already implemented, but didn't work properly because it was still using the last interrupt offset in HPET. This resulted in x460 not booting. Fix this up by using the free running HPET counter. Shouldn't affect any other machine because they either use full HPET mode or no HPET at all. TBD needs a similar 32bit fix. Signed-off-by: Andi Kleen Cc: Pallipadi, Venkatesh" Cc: Bob Picco Cc: Bjorn Helgaas Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 2 +- arch/x86_64/kernel/time.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 6147770b4347..7a0a3e8d5d72 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -708,7 +708,7 @@ static void setup_APIC_timer(unsigned int clocks) local_irq_save(flags); /* wait for irq slice */ - if (vxtime.hpet_address) { + if (vxtime.hpet_address && hpet_use_timer) { int trigger = hpet_readl(HPET_T0_CMP); while (hpet_readl(HPET_COUNTER) >= trigger) /* do nothing */ ; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index dba7237be5c1..3c58c30506a1 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -59,7 +59,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ -static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ +int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -326,7 +326,10 @@ static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) print_symbol("rip %s\n", regs->rip); if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { printk(KERN_WARNING "Falling back to HPET\n"); - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; } @@ -988,7 +991,10 @@ void __init time_init_gtod(void) notsc = 1; if (vxtime.hpet_address && notsc) { timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; #ifdef CONFIG_X86_PM_TIMER -- cgit v1.2.2 From a65d17c9d27a85782cfe1bbc36c747ffa1f81814 Mon Sep 17 00:00:00 2001 From: John Blackwood Date: Sun, 12 Feb 2006 14:34:58 -0800 Subject: [PATCH] arch/x86_64/kernel/traps.c PTRACE_SINGLESTEP oops We found a problem with x86_64 kernels with preemption enabled, where having multiple tasks doing ptrace singlesteps around the same time will cause the system to 'oops'. The problem seems that a task can get preempted out of the do_debug() processing while it is running on the DEBUG_STACK stack. If another task on that same cpu then enters do_debug() and uses the same per-cpu DEBUG_STACK stack, the previous preempted tasks's stack contents can be corrupted, and the system will oops when the preempted task is context switched back in again. The typical oops looks like the following: Unable to handle kernel paging request at ffffffffffffffae RIP: {thread_return+34} PGD 103027 PUD 102429067 PMD 0 Oops: 0002 [1] PREEMPT SMP CPU 0 Modules linked in: Pid: 3786, comm: ssdd Not tainted 2.6.15.2 #1 RIP: 0010:[] {thread_return+34} RSP: 0018:ffffffff80824058 EFLAGS: 000136c2 RAX: ffff81017e12cea0 RBX: 0000000000000000 RCX: 00000000c0000100 RDX: 0000000000000000 RSI: ffff8100f7856e20 RDI: ffff81017e12cea0 RBP: 0000000000000046 R08: ffff8100f68a6000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff81017e12cea0 R12: ffff81000c2d53e8 R13: ffff81017f5b3be8 R14: ffff81000c0036e0 R15: 000001056cbfc899 FS: 00002aaaaaad9b00(0000) GS:ffffffff80883800(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffffffffffffae CR3: 00000000f6fcf000 CR4: 00000000000006e0 Process ssdd (pid: 3786, threadinfo ffff8100f68a6000, task ffff8100f7856e20) Stack: ffffffff808240d8 ffffffff8012a84a ffff8100055f6c00 0000000000000020 0000000000000001 ffff81000c0036e0 ffffffff808240b8 0000000000000000 0000000000000000 0000000000000000 Call Trace: <#DB> {try_to_wake_up+985} {kick_process+87} {signal_wake_up+48} {specific_send_sig_info+179} {_spin_unlock_irqrestore+27} {force_sig_info+159} {do_debug+289} {sync_regs+103} {paranoid_userspace+35} Unable to handle kernel paging request at 00007fffffb7d000 RIP: {show_trace+465} PGD f6f25067 PUD f6fcc067 PMD f6957067 PTE 0 Oops: 0000 [2] PREEMPT SMP This patch disables preemptions for the task upon entry to do_debug(), before interrupts are reenabled, and then disables preemption before exiting do_debug(), after disabling interrupts. I've noticed that the task can be preempted either at the end of an interrupt, or on the call to force_sig_info() on the spin_unlock_irqrestore() processing. It might be better to attempt to code a fix in entry.S around the code that calls do_debug(). Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/traps.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index ee1b2da9e5e7..28d50dc540e8 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -90,6 +90,20 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void preempt_conditional_sti(struct pt_regs *regs) +{ + preempt_disable(); + if (regs->eflags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void preempt_conditional_cli(struct pt_regs *regs) +{ + if (regs->eflags & X86_EFLAGS_IF) + local_irq_disable(); + preempt_enable_no_resched(); +} + static int kstack_depth_to_print = 10; #ifdef CONFIG_KALLSYMS @@ -693,7 +707,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, SIGTRAP) == NOTIFY_STOP) return; - conditional_sti(regs); + preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -738,11 +752,13 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, clear_dr7: set_debugreg(0UL, 7); + preempt_conditional_cli(regs); return; clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->eflags &= ~TF_MASK; + preempt_conditional_cli(regs); } static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) -- cgit v1.2.2 From 0d541064e8f58858e11cd34d81b6e83617f6eb4a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 12 Feb 2006 14:34:59 -0800 Subject: [PATCH] x86_64: GART DMA merging fix Don't touch the non DMA members in the sg list in dma_map_sg in the IOMMU Some drivers (in particular ST) ran into problems because they reused the sg lists after passing them to pci_map_sg(). The merging procedure in the K8 GART IOMMU corrupted the state. This patch changes it to only touch the dma* entries during merging, but not the other fields. Approach suggested by Dave Miller. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pci-gart.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 2fe23a6c361b..dd0718dc178b 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -310,7 +310,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di for (i = 0; i < nents; i++) { struct scatterlist *s = &sg[i]; - if (!s->dma_length || !s->length) + if (!s->dma_length) break; dma_unmap_single(dev, s->dma_address, s->dma_length, dir); } @@ -364,7 +364,6 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, BUG_ON(i > start && s->offset); if (i == start) { - *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; @@ -379,7 +378,7 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; - } + } } BUG_ON(iommu_page - iommu_start != pages); return 0; @@ -391,7 +390,6 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, { if (!need) { BUG_ON(stopat - start != 1); - *sout = sg[start]; sout->dma_length = sg[start].length; return 0; } -- cgit v1.2.2 From 99019e919969be88e7e4042f3afa296bd55ad9ec Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 16 Feb 2006 23:41:55 +0100 Subject: [PATCH] x86_64: make touch_nmi_watchdog() not touch impossible cpus' private data Along with that, also suppress the memory touching altogether when the watchdog is not running, to eliminate needless crosstalk. Plus ad a call to it to make things consistent (one could also consider removing the call in enable_timer_nmi_watchdog()). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 8be407a1f62d..5bf17e41cd2d 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -236,6 +236,7 @@ static void enable_lapic_nmi_watchdog(void) { if (nmi_active < 0) { nmi_watchdog = NMI_LOCAL_APIC; + touch_nmi_watchdog(); setup_apic_nmi_watchdog(); } } @@ -456,15 +457,17 @@ static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog (void) { - int i; + if (nmi_watchdog > 0) { + unsigned cpu; - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for (i = 0; i < NR_CPUS; i++) - per_cpu(nmi_touch, i) = 1; + /* + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. + */ + for_each_present_cpu (cpu) + per_cpu(nmi_touch, cpu) = 1; + } touch_softlockup_watchdog(); } -- cgit v1.2.2 From 2391c4b594eb28abd58102de8f4e5d7a4fa39f4c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:01 +0100 Subject: [PATCH] x86_64: Don't call do_exit with interrupts disabled after IRET exception This caused a sigreturn with bad argument on a preemptible kernel to complain with Debug: sleeping function called from invalid context at /home/lsrc/quilt/linux/include/linux/rwsem.h:43 in_atomic():0, irqs_disabled():1 Call Trace: {__might_sleep+190} {profile_task_exit+21} {__do_exit+34} {do_wait+0} Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index b150c87a08c6..7c10e9009d61 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -554,6 +554,7 @@ iret_label: /* running with kernel gs */ bad_iret: movq $-9999,%rdi /* better code? */ + sti jmp do_exit .previous -- cgit v1.2.2 From ab68805955ee3dd84a6aa76cd70e61fde996968d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:04 +0100 Subject: [PATCH] x86_64: Don't enable ATI apicmaintimer workaround when the machine has C2 or C3 Many laptops have problems with ticking the local APIC timer in C2/C3. The code added earlier to use it by default on ATI didn't really work for them. Don't enable it when the system supports C2/C3. This doesn't fix the problem fully, but at least it's not worse than before. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 4282d72b2a26..2585c1d92b26 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -30,6 +30,9 @@ #include #include #include +#ifdef CONFIG_ACPI +#include +#endif #include #include @@ -260,6 +263,8 @@ __setup("apic", enable_ioapic_setup); And another hack to disable the IOMMU on VIA chipsets. + ... and others. Really should move this somewhere else. + Kludge-O-Rama. */ void __init check_ioapic(void) { @@ -307,6 +312,17 @@ void __init check_ioapic(void) case PCI_VENDOR_ID_ATI: if (apic_runs_main_timer != 0) break; +#ifdef CONFIG_ACPI + /* Don't do this for laptops right + right now because their timer + doesn't necessarily tick in C2/3 */ + if (acpi_fadt.revision >= 3 && + (acpi_fadt.plvl2_lat + acpi_fadt.plvl3_lat) < 1100) { + printk(KERN_INFO +"ATI board detected, but seems to be a laptop. Timer might be shakey, sorry\n"); + break; + } +#endif printk(KERN_INFO "ATI board detected. Using APIC/PM timer.\n"); apic_runs_main_timer = 1; -- cgit v1.2.2 From 7fd67843b96f90f59c9a244a1bc25137978a3ff9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:07 +0100 Subject: [PATCH] x86_64: Disable tsc when apicpmtimer is active Otherwise it has no effect anyways. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 1 + arch/x86_64/kernel/time.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7a0a3e8d5d72..e5b14c57eaa0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -1152,6 +1152,7 @@ __setup("noapicmaintimer", setup_noapicmaintimer); static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; + notsc_setup(NULL); return setup_apicmaintimer(NULL); } __setup("apicpmtimer", setup_apicpmtimer); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3c58c30506a1..67841d11ed1f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1327,8 +1327,7 @@ static int __init nohpet_setup(char *s) __setup("nohpet", nohpet_setup); - -static int __init notsc_setup(char *s) +int __init notsc_setup(char *s) { notsc = 1; return 0; -- cgit v1.2.2 From 6574ffd74b03e35026f428a2c820e6ddf45d426c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:10 +0100 Subject: [PATCH] x86_64: Resolve the RIP of an early exception using kallsyms But do it after everything else to risk less from recursive crashes. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/head.S | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 692c737feddb..02fc7fa0ea28 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -213,6 +213,11 @@ ENTRY(early_idt_handler) cmpl $2,early_recursion_flag(%rip) jz 1f call dump_stack +#ifdef CONFIG_KALLSYMS + leaq early_idt_ripmsg(%rip),%rdi + movq 8(%rsp),%rsi # get rip again + call __print_symbol +#endif 1: hlt jmp 1b early_recursion_flag: @@ -220,6 +225,8 @@ early_recursion_flag: early_idt_msg: .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n" +early_idt_ripmsg: + .asciz "RIP %s\n" .code32 ENTRY(no_long_mode) -- cgit v1.2.2 From 9ff4ced4676d3cd1f28b14d93a339f263ca304b0 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Fri, 17 Feb 2006 13:52:54 -0800 Subject: [PATCH] Remove KERN_INFO from middle of printk line Don't print KERN_INFO in the middle of a printk line. printk(KERN_INFO "OEM ID: %s ",str); is just above this. This is already fixed up in i386 copy. Signed-off-by: Martin J. Bligh Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index dc49bfb6db0a..9013a90b5c2e 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -288,9 +288,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) memcpy(str,mpc->mpc_productid,12); str[12]=0; - printk(KERN_INFO "Product ID: %s ",str); + printk("Product ID: %s ",str); - printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic); + printk("APIC at: 0x%X\n",mpc->mpc_lapic); /* save the local APIC address, it might be non-default */ if (!acpi_lapic) -- cgit v1.2.2 From a0124d780d06db711e8a92135d774940588a27da Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 25 Feb 2006 03:55:38 -0500 Subject: [PATCH] x86-64: react to new topology.c location Commit 9c869edac591977314323a4eaad5f7633fca684f moved the i386 topology.c file. That change broke x86-64 compiles, as it uses the same file. Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 72fe60c20d39..a098a11e7755 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -43,7 +43,7 @@ CFLAGS_vsyscall.o := $(PROFILING) -g0 bootflag-y += ../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o -topology-y += ../../i386/mach-default/topology.o +topology-y += ../../i386/kernel/topology.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o -- cgit v1.2.2 From 60b08c67220cf6faef7410ac6adba23a8a743bf7 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sun, 26 Feb 2006 04:18:22 +0100 Subject: [PATCH] x86_64: no_iommu removal in pci-gart.c In previous versions of pci-gart.c, no_iommu was used to determine if IOMMU was disabled in the GART DMA mapping functions. This changed in 2.6.16 and now gart_xxx() functions are only called if gart is enabled. Therefore, uses of no_iommu in the GART code are no longer necessary and can be removed. Also, it removes double deceleration of no_iommu and force_iommu in pci.h and proto.h, by removing the deceleration in pci.h. Lastly, end_pfn off by one error. Tested (along with patch 1/2) on dual opteron with gart enabled, iommu=soft, and iommu=off. Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/aperture.c | 2 +- arch/x86_64/kernel/pci-gart.c | 22 +++++----------------- 2 files changed, 6 insertions(+), 18 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index e4e2b7d01f89..a0f955b9995f 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -248,7 +248,7 @@ void __init iommu_hole_init(void) /* Got the aperture from the AGP bridge */ } else if (swiotlb && !valid_agp) { /* Do nothing */ - } else if ((!no_iommu && end_pfn >= MAX_DMA32_PFN) || + } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || fallback_aper_force) { diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index dd0718dc178b..0c3f052ba6ce 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -228,11 +228,6 @@ static inline int need_iommu(struct device *dev, unsigned long addr, size_t size int mmu = high; if (force_iommu) mmu = 1; - if (no_iommu) { - if (high) - panic("PCI-DMA: high address but no IOMMU.\n"); - mmu = 0; - } return mmu; } @@ -241,11 +236,6 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t u64 mask = *dev->dma_mask; int high = addr + size >= mask; int mmu = high; - if (no_iommu) { - if (high) - panic("PCI-DMA: high address but no IOMMU.\n"); - mmu = 0; - } return mmu; } @@ -310,7 +300,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di for (i = 0; i < nents; i++) { struct scatterlist *s = &sg[i]; - if (!s->dma_length) + if (!s->dma_length || !s->length) break; dma_unmap_single(dev, s->dma_address, s->dma_length, dir); } @@ -364,6 +354,7 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, BUG_ON(i > start && s->offset); if (i == start) { + *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; @@ -390,6 +381,7 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, { if (!need) { BUG_ON(stopat - start != 1); + *sout = sg[start]; sout->dma_length = sg[start].length; return 0; } @@ -632,17 +624,13 @@ static int __init pci_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) { - no_iommu = 1; + if (swiotlb) return -1; - } - + if (no_iommu || (!force_iommu && end_pfn <= MAX_DMA32_PFN) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - no_iommu = 1; - no_iommu_init(); printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " -- cgit v1.2.2 From 13a229abc25640813f1480c0478dfc6bdbc1c19e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 26 Feb 2006 04:18:31 +0100 Subject: [PATCH] x86_64: Only do the clustered systems have unsynchronized TSC assumption on IBM systems Big Unisys systems have multiple clusters too, but they have an synchronized TSC. I'm using the SMBIOS to check for vendor == IBM. Cc: Chris McDermott Cc: "Protasevich, Natalie" Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index e5b14c57eaa0..d70605eda333 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -962,12 +962,14 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_exit(); } +int __initdata unsync_tsc_on_multicluster; + /* * oem_force_hpet_timer -- force HPET mode for some boxes. * * Thus far, the major user of this is IBM's Summit2 series: * - * Clustered boxes may have unsynced TSC problems if they are + * Some clustered boxes may have unsynced TSC problems if they are * multi-chassis. Use available data to take a good guess. * If in doubt, go HPET. */ @@ -977,6 +979,11 @@ __cpuinit int oem_force_hpet_timer(void) unsigned id; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + /* Only do this check on IBM machines - big Unisys systems + use multiple clusters too, but have synchronized TSC */ + if (!unsync_tsc_on_multicluster) + return 0; + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { -- cgit v1.2.2 From 1f9921539208f6d88f600a801e333d718e4a13ff Mon Sep 17 00:00:00 2001 From: Chris McDermott Date: Sun, 26 Feb 2006 04:18:40 +0100 Subject: [PATCH] x86_64: Fix NMI watchdog on x460 [description from AK] Old check for the IO-APIC watchdog during the timer check was wrong - it obviously should only drop into this if the IO-APIC watchdog is used. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 2585c1d92b26..ffab8a756664 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1850,7 +1850,7 @@ static inline void check_timer(void) } printk(" failed.\n"); - if (nmi_watchdog) { + if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = 0; } -- cgit v1.2.2 From e2c0388866dc12bef56b178b958f9b778fe6c687 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 26 Feb 2006 04:18:46 +0100 Subject: [PATCH] x86_64: Fix the additional_cpus=.. option It didn't set up the CPU possible map early enough, so the option didn't actually work. Noticed by Heiko Carstens Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 6 ++++++ arch/x86_64/kernel/smpboot.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 9435ab7d6fb8..5de7eaf5d97c 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -423,6 +423,12 @@ static __init void parse_cmdline_early (char ** cmdline_p) else if(!memcmp(from, "elfcorehdr=", 11)) elfcorehdr_addr = memparse(from+11, &from); #endif + +#ifdef CONFIG_SMP + else if (!memcmp(from, "additional_cpus=", 16)) + setup_additional_cpus(from+16); +#endif + next_char: c = *(from++); if (!c) diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 67e4e28f4df8..b82eb86e4f5d 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1244,7 +1244,7 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } -static __init int setup_additional_cpus(char *s) +__init int setup_additional_cpus(char *s) { return get_option(&s, &additional_cpus); } -- cgit v1.2.2 From e8b917775b572bc27de105f1317c2de4335db5b3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 26 Feb 2006 04:18:49 +0100 Subject: [PATCH] x86_64: Move the SMP time selection earlier SMP time selection originally ran after all CPUs were brought up because it needed to know the number of CPUs to decide if it needs an MP safe timer or not. This is not needed anymore because we know present CPUs early. This fixes a couple of problems: - apicmaintimer didn't always work because it relied on state that was set up time_init_gtod too late. - The output for the used timer in early kernel log was misleading because time_init_gtod could actually change it later. Now always print the final timer choice Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 2 -- arch/x86_64/kernel/time.c | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index b82eb86e4f5d..66e98659d077 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1152,8 +1152,6 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif - time_init_gtod(); - check_nmi_watchdog(); } diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 67841d11ed1f..3080f84bf7b7 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -48,6 +48,8 @@ static void cpufreq_delayed_get(void); extern void i8254_timer_resume(void); extern int using_apic_timer; +static char *time_init_gtod(void); + DEFINE_SPINLOCK(rtc_lock); DEFINE_SPINLOCK(i8253_lock); @@ -901,6 +903,7 @@ static struct irqaction irq0 = { void __init time_init(void) { char *timename; + char *gtod; #ifdef HPET_HACK_ENABLE_DANGEROUS if (!vxtime.hpet_address) { @@ -945,21 +948,19 @@ void __init time_init(void) timename = "PIT"; } - printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n", - vxtime_hz / 1000000, vxtime_hz % 1000000, timename); + vxtime.mode = VXTIME_TSC; + gtod = time_init_gtod(); + + printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", + vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - vxtime.mode = VXTIME_TSC; vxtime.quot = (1000000L << 32) / vxtime_hz; vxtime.tsc_quot = (1000L << 32) / cpu_khz; vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); set_cyc2ns_scale(cpu_khz); - -#ifndef CONFIG_SMP - time_init_gtod(); -#endif } /* @@ -981,9 +982,9 @@ __cpuinit int unsynchronized_tsc(void) } /* - * Decide after all CPUs are booted what mode gettimeofday should use. + * Decide what mode gettimeofday should use. */ -void __init time_init_gtod(void) +__init static char *time_init_gtod(void) { char *timetype; @@ -1011,8 +1012,7 @@ void __init time_init_gtod(void) timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } - - printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype); + return timetype; } __setup("report_lost_ticks", time_setup); -- cgit v1.2.2 From ab9b32ee626e9b6df4ce2560a70ae15e62423cf4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 26 Feb 2006 04:18:52 +0100 Subject: [PATCH] x86_64: Better ATI timer fix The previous experiment for using apicmaintimer on ATI systems didn't work out very well. In particular laptops with C2/C3 support often don't let it tick during idle, which makes it useless. There were also some other bugs that made the apicmaintimer often not used at all. I tried some other experiments - running timer over RTC and some other things but they didn't really work well neither. I rechecked the specs now and it turns out this simple change is actually enough to avoid the double ticks on the ATI systems. We just turn off IRQ 0 in the 8254 and only route it directly using the IO-APIC. I tested it on a few ATI systems and it worked there. In fact it worked on all chipsets (NVidia, Intel, AMD, ATI) I tried it on. According to the ACPI spec routing should always work through the IO-APIC so I think it's the correct thing to do anyways (and most of the old gunk in check_timer should be thrown away for x86-64). But for 2.6.16 it's best to do a fairly minimal change: - Use the known to be working everywhere-but-ATI IRQ0 both over 8254 and IO-APIC setup everywhere - Except on ATI disable IRQ0 in the 8254 - Remove the code to select apicmaintimer on ATI chipsets - Add some boot options to allow to override this (just paranoia) In 2.6.17 I hope to switch the default over to this for everybody. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 46 ++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index ffab8a756664..ffed464e6b12 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -50,6 +50,8 @@ static int no_timer_check; int disable_timer_pin_1 __initdata; +int timer_over_8254 __initdata = 1; + /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -251,6 +253,20 @@ static int __init enable_ioapic_setup(char *str) __setup("noapic", disable_ioapic_setup); __setup("apic", enable_ioapic_setup); +static int __init setup_disable_8254_timer(char *s) +{ + timer_over_8254 = -1; + return 1; +} +static int __init setup_enable_8254_timer(char *s) +{ + timer_over_8254 = 2; + return 1; +} + +__setup("disable_8254_timer", setup_disable_8254_timer); +__setup("enable_8254_timer", setup_enable_8254_timer); + #include #include #include @@ -309,27 +325,20 @@ void __init check_ioapic(void) #endif /* RED-PEN skip them on mptables too? */ return; + + /* This should be actually default, but + for 2.6.16 let's do it for ATI only where + it's really needed. */ case PCI_VENDOR_ID_ATI: - if (apic_runs_main_timer != 0) - break; -#ifdef CONFIG_ACPI - /* Don't do this for laptops right - right now because their timer - doesn't necessarily tick in C2/3 */ - if (acpi_fadt.revision >= 3 && - (acpi_fadt.plvl2_lat + acpi_fadt.plvl3_lat) < 1100) { - printk(KERN_INFO -"ATI board detected, but seems to be a laptop. Timer might be shakey, sorry\n"); - break; - } -#endif + if (timer_over_8254 == 1) { + timer_over_8254 = 0; printk(KERN_INFO - "ATI board detected. Using APIC/PM timer.\n"); - apic_runs_main_timer = 1; - nohpet = 1; + "ATI board detected. Disabling timer routing over 8254.\n"); + } return; } + /* No multi-function device? */ type = read_pci_config_byte(num,slot,func, PCI_HEADER_TYPE); @@ -1773,6 +1782,8 @@ static inline void unlock_ExtINT_logic(void) * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. + * + * FIXME: really need to revamp this for modern platforms only. */ static inline void check_timer(void) { @@ -1795,7 +1806,8 @@ static inline void check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - enable_8259A_irq(0); + if (timer_over_8254 > 0) + enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); -- cgit v1.2.2 From d51761233d9e3be4cdf10f7482a50463bbd78c78 Mon Sep 17 00:00:00 2001 From: Brian Magnuson Date: Mon, 27 Feb 2006 04:02:04 +0100 Subject: [PATCH] fix build on x86_64 with !CONFIG_HOTPLUG_CPU The commit e2c0388866dc12bef56b178b958f9b778fe6c687 added setup_additional_cpus to setup.c but this is only defined if CONFIG_HOTPLUG_CPU is set. This patch changes the #ifdef to reflect that. Signed-off-by: Brian Magnuson Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 5de7eaf5d97c..aa55e3cec665 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -424,7 +424,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) elfcorehdr_addr = memparse(from+11, &from); #endif -#ifdef CONFIG_SMP +#ifdef CONFIG_HOTPLUG_CPU else if (!memcmp(from, "additional_cpus=", 16)) setup_additional_cpus(from+16); #endif -- cgit v1.2.2 From 637029c6cb5efcbaa3d5831af4c1972bdd629779 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 27 Feb 2006 20:41:56 -0800 Subject: Revert "[PATCH] x86_64: Only do the clustered systems have unsynchronized TSC assumption on IBM systems" This reverts commit 13a229abc25640813f1480c0478dfc6bdbc1c19e. Quoth Andi: "After some consideration and feedback from various people it turns out this wasn't that good an idea. It has some problems and needs more work. Since it was only an optimization anyways it's best to just back it out again for now." Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index d70605eda333..e5b14c57eaa0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -962,14 +962,12 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_exit(); } -int __initdata unsync_tsc_on_multicluster; - /* * oem_force_hpet_timer -- force HPET mode for some boxes. * * Thus far, the major user of this is IBM's Summit2 series: * - * Some clustered boxes may have unsynced TSC problems if they are + * Clustered boxes may have unsynced TSC problems if they are * multi-chassis. Use available data to take a good guess. * If in doubt, go HPET. */ @@ -979,11 +977,6 @@ __cpuinit int oem_force_hpet_timer(void) unsigned id; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - /* Only do this check on IBM machines - big Unisys systems - use multiple clusters too, but have synchronized TSC */ - if (!unsync_tsc_on_multicluster) - return 0; - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { -- cgit v1.2.2