From 33042a9ff4d126ba944b9dc3076665a2029e0a34 Mon Sep 17 00:00:00 2001 From: Chris McDermott Date: Sat, 11 Feb 2006 17:55:50 -0800 Subject: [PATCH] x86-64: Fix HPET timer on x460 [description from AK] The IBM Summit 3 chipset doesn't implement the HPET timer replacement option. Since the current Linux code relies on it use a mixed mode with both PIT for the interrupt and HPET counters for the time keeping. That was already implemented, but didn't work properly because it was still using the last interrupt offset in HPET. This resulted in x460 not booting. Fix this up by using the free running HPET counter. Shouldn't affect any other machine because they either use full HPET mode or no HPET at all. TBD needs a similar 32bit fix. Signed-off-by: Andi Kleen Cc: Pallipadi, Venkatesh" Cc: Bob Picco Cc: Bjorn Helgaas Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 2 +- arch/x86_64/kernel/time.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 6147770b4347..7a0a3e8d5d72 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -708,7 +708,7 @@ static void setup_APIC_timer(unsigned int clocks) local_irq_save(flags); /* wait for irq slice */ - if (vxtime.hpet_address) { + if (vxtime.hpet_address && hpet_use_timer) { int trigger = hpet_readl(HPET_T0_CMP); while (hpet_readl(HPET_COUNTER) >= trigger) /* do nothing */ ; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index dba7237be5c1..3c58c30506a1 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -59,7 +59,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ -static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ +int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -326,7 +326,10 @@ static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) print_symbol("rip %s\n", regs->rip); if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { printk(KERN_WARNING "Falling back to HPET\n"); - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; } @@ -988,7 +991,10 @@ void __init time_init_gtod(void) notsc = 1; if (vxtime.hpet_address && notsc) { timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; #ifdef CONFIG_X86_PM_TIMER -- cgit v1.2.2 From a65d17c9d27a85782cfe1bbc36c747ffa1f81814 Mon Sep 17 00:00:00 2001 From: John Blackwood Date: Sun, 12 Feb 2006 14:34:58 -0800 Subject: [PATCH] arch/x86_64/kernel/traps.c PTRACE_SINGLESTEP oops We found a problem with x86_64 kernels with preemption enabled, where having multiple tasks doing ptrace singlesteps around the same time will cause the system to 'oops'. The problem seems that a task can get preempted out of the do_debug() processing while it is running on the DEBUG_STACK stack. If another task on that same cpu then enters do_debug() and uses the same per-cpu DEBUG_STACK stack, the previous preempted tasks's stack contents can be corrupted, and the system will oops when the preempted task is context switched back in again. The typical oops looks like the following: Unable to handle kernel paging request at ffffffffffffffae RIP: {thread_return+34} PGD 103027 PUD 102429067 PMD 0 Oops: 0002 [1] PREEMPT SMP CPU 0 Modules linked in: Pid: 3786, comm: ssdd Not tainted 2.6.15.2 #1 RIP: 0010:[] {thread_return+34} RSP: 0018:ffffffff80824058 EFLAGS: 000136c2 RAX: ffff81017e12cea0 RBX: 0000000000000000 RCX: 00000000c0000100 RDX: 0000000000000000 RSI: ffff8100f7856e20 RDI: ffff81017e12cea0 RBP: 0000000000000046 R08: ffff8100f68a6000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff81017e12cea0 R12: ffff81000c2d53e8 R13: ffff81017f5b3be8 R14: ffff81000c0036e0 R15: 000001056cbfc899 FS: 00002aaaaaad9b00(0000) GS:ffffffff80883800(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffffffffffffae CR3: 00000000f6fcf000 CR4: 00000000000006e0 Process ssdd (pid: 3786, threadinfo ffff8100f68a6000, task ffff8100f7856e20) Stack: ffffffff808240d8 ffffffff8012a84a ffff8100055f6c00 0000000000000020 0000000000000001 ffff81000c0036e0 ffffffff808240b8 0000000000000000 0000000000000000 0000000000000000 Call Trace: <#DB> {try_to_wake_up+985} {kick_process+87} {signal_wake_up+48} {specific_send_sig_info+179} {_spin_unlock_irqrestore+27} {force_sig_info+159} {do_debug+289} {sync_regs+103} {paranoid_userspace+35} Unable to handle kernel paging request at 00007fffffb7d000 RIP: {show_trace+465} PGD f6f25067 PUD f6fcc067 PMD f6957067 PTE 0 Oops: 0000 [2] PREEMPT SMP This patch disables preemptions for the task upon entry to do_debug(), before interrupts are reenabled, and then disables preemption before exiting do_debug(), after disabling interrupts. I've noticed that the task can be preempted either at the end of an interrupt, or on the call to force_sig_info() on the spin_unlock_irqrestore() processing. It might be better to attempt to code a fix in entry.S around the code that calls do_debug(). Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/traps.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index ee1b2da9e5e7..28d50dc540e8 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -90,6 +90,20 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void preempt_conditional_sti(struct pt_regs *regs) +{ + preempt_disable(); + if (regs->eflags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void preempt_conditional_cli(struct pt_regs *regs) +{ + if (regs->eflags & X86_EFLAGS_IF) + local_irq_disable(); + preempt_enable_no_resched(); +} + static int kstack_depth_to_print = 10; #ifdef CONFIG_KALLSYMS @@ -693,7 +707,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, SIGTRAP) == NOTIFY_STOP) return; - conditional_sti(regs); + preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -738,11 +752,13 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, clear_dr7: set_debugreg(0UL, 7); + preempt_conditional_cli(regs); return; clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->eflags &= ~TF_MASK; + preempt_conditional_cli(regs); } static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) -- cgit v1.2.2 From 0d541064e8f58858e11cd34d81b6e83617f6eb4a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 12 Feb 2006 14:34:59 -0800 Subject: [PATCH] x86_64: GART DMA merging fix Don't touch the non DMA members in the sg list in dma_map_sg in the IOMMU Some drivers (in particular ST) ran into problems because they reused the sg lists after passing them to pci_map_sg(). The merging procedure in the K8 GART IOMMU corrupted the state. This patch changes it to only touch the dma* entries during merging, but not the other fields. Approach suggested by Dave Miller. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pci-gart.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 2fe23a6c361b..dd0718dc178b 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -310,7 +310,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di for (i = 0; i < nents; i++) { struct scatterlist *s = &sg[i]; - if (!s->dma_length || !s->length) + if (!s->dma_length) break; dma_unmap_single(dev, s->dma_address, s->dma_length, dir); } @@ -364,7 +364,6 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, BUG_ON(i > start && s->offset); if (i == start) { - *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; @@ -379,7 +378,7 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; - } + } } BUG_ON(iommu_page - iommu_start != pages); return 0; @@ -391,7 +390,6 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, { if (!need) { BUG_ON(stopat - start != 1); - *sout = sg[start]; sout->dma_length = sg[start].length; return 0; } -- cgit v1.2.2 From 99019e919969be88e7e4042f3afa296bd55ad9ec Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 16 Feb 2006 23:41:55 +0100 Subject: [PATCH] x86_64: make touch_nmi_watchdog() not touch impossible cpus' private data Along with that, also suppress the memory touching altogether when the watchdog is not running, to eliminate needless crosstalk. Plus ad a call to it to make things consistent (one could also consider removing the call in enable_timer_nmi_watchdog()). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 8be407a1f62d..5bf17e41cd2d 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -236,6 +236,7 @@ static void enable_lapic_nmi_watchdog(void) { if (nmi_active < 0) { nmi_watchdog = NMI_LOCAL_APIC; + touch_nmi_watchdog(); setup_apic_nmi_watchdog(); } } @@ -456,15 +457,17 @@ static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog (void) { - int i; + if (nmi_watchdog > 0) { + unsigned cpu; - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for (i = 0; i < NR_CPUS; i++) - per_cpu(nmi_touch, i) = 1; + /* + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. + */ + for_each_present_cpu (cpu) + per_cpu(nmi_touch, cpu) = 1; + } touch_softlockup_watchdog(); } -- cgit v1.2.2 From 2391c4b594eb28abd58102de8f4e5d7a4fa39f4c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:01 +0100 Subject: [PATCH] x86_64: Don't call do_exit with interrupts disabled after IRET exception This caused a sigreturn with bad argument on a preemptible kernel to complain with Debug: sleeping function called from invalid context at /home/lsrc/quilt/linux/include/linux/rwsem.h:43 in_atomic():0, irqs_disabled():1 Call Trace: {__might_sleep+190} {profile_task_exit+21} {__do_exit+34} {do_wait+0} Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index b150c87a08c6..7c10e9009d61 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -554,6 +554,7 @@ iret_label: /* running with kernel gs */ bad_iret: movq $-9999,%rdi /* better code? */ + sti jmp do_exit .previous -- cgit v1.2.2 From ab68805955ee3dd84a6aa76cd70e61fde996968d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:04 +0100 Subject: [PATCH] x86_64: Don't enable ATI apicmaintimer workaround when the machine has C2 or C3 Many laptops have problems with ticking the local APIC timer in C2/C3. The code added earlier to use it by default on ATI didn't really work for them. Don't enable it when the system supports C2/C3. This doesn't fix the problem fully, but at least it's not worse than before. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 4282d72b2a26..2585c1d92b26 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -30,6 +30,9 @@ #include #include #include +#ifdef CONFIG_ACPI +#include +#endif #include #include @@ -260,6 +263,8 @@ __setup("apic", enable_ioapic_setup); And another hack to disable the IOMMU on VIA chipsets. + ... and others. Really should move this somewhere else. + Kludge-O-Rama. */ void __init check_ioapic(void) { @@ -307,6 +312,17 @@ void __init check_ioapic(void) case PCI_VENDOR_ID_ATI: if (apic_runs_main_timer != 0) break; +#ifdef CONFIG_ACPI + /* Don't do this for laptops right + right now because their timer + doesn't necessarily tick in C2/3 */ + if (acpi_fadt.revision >= 3 && + (acpi_fadt.plvl2_lat + acpi_fadt.plvl3_lat) < 1100) { + printk(KERN_INFO +"ATI board detected, but seems to be a laptop. Timer might be shakey, sorry\n"); + break; + } +#endif printk(KERN_INFO "ATI board detected. Using APIC/PM timer.\n"); apic_runs_main_timer = 1; -- cgit v1.2.2 From 7fd67843b96f90f59c9a244a1bc25137978a3ff9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:07 +0100 Subject: [PATCH] x86_64: Disable tsc when apicpmtimer is active Otherwise it has no effect anyways. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 1 + arch/x86_64/kernel/time.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7a0a3e8d5d72..e5b14c57eaa0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -1152,6 +1152,7 @@ __setup("noapicmaintimer", setup_noapicmaintimer); static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; + notsc_setup(NULL); return setup_apicmaintimer(NULL); } __setup("apicpmtimer", setup_apicpmtimer); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3c58c30506a1..67841d11ed1f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1327,8 +1327,7 @@ static int __init nohpet_setup(char *s) __setup("nohpet", nohpet_setup); - -static int __init notsc_setup(char *s) +int __init notsc_setup(char *s) { notsc = 1; return 0; -- cgit v1.2.2 From 6574ffd74b03e35026f428a2c820e6ddf45d426c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:10 +0100 Subject: [PATCH] x86_64: Resolve the RIP of an early exception using kallsyms But do it after everything else to risk less from recursive crashes. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/head.S | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 692c737feddb..02fc7fa0ea28 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -213,6 +213,11 @@ ENTRY(early_idt_handler) cmpl $2,early_recursion_flag(%rip) jz 1f call dump_stack +#ifdef CONFIG_KALLSYMS + leaq early_idt_ripmsg(%rip),%rdi + movq 8(%rsp),%rsi # get rip again + call __print_symbol +#endif 1: hlt jmp 1b early_recursion_flag: @@ -220,6 +225,8 @@ early_recursion_flag: early_idt_msg: .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n" +early_idt_ripmsg: + .asciz "RIP %s\n" .code32 ENTRY(no_long_mode) -- cgit v1.2.2