From b445e26cbf784cdba10f2b6c3e2cd3ee7bab360a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 27 Jun 2005 15:42:04 -0700 Subject: [SPARC64]: Avoid membar instructions in delay slots. In particular, avoid membar instructions in the delay slot of a jmpl instruction. UltraSPARC-I, II, IIi, and IIe have a bug, documented in the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51 The long and short of it is that if the IMU unit misses on a branch or jmpl, and there is a store buffer synchronizing membar in the delay slot, the chip can stop fetching instructions. If interrupts are enabled or some other trap is enabled, the chip will unwedge itself, but performance will suffer. We already had a workaround for this bug in a few spots, but it's better to have the entire tree sanitized for this rule. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 6 ++++-- arch/sparc64/kernel/semaphore.c | 12 ++++++++---- arch/sparc64/kernel/trampoline.S | 3 ++- 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a47f2d0b1a29..ffe717ab7f83 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -271,8 +271,9 @@ cplus_fptrap_insn_1: fmuld %f0, %f2, %f26 faddd %f0, %f2, %f28 fmuld %f0, %f2, %f30 + membar #Sync b,pt %xcc, fpdis_exit - membar #Sync + nop 2: andcc %g5, FPRS_DU, %g0 bne,pt %icc, 3f fzero %f32 @@ -301,8 +302,9 @@ cplus_fptrap_insn_2: fmuld %f32, %f34, %f58 faddd %f32, %f34, %f60 fmuld %f32, %f34, %f62 + membar #Sync ba,pt %xcc, fpdis_exit - membar #Sync + nop 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 ldxa [%g3] ASI_DMMU, %g5 diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c index 63496c43fe17..a809e63f03ef 100644 --- a/arch/sparc64/kernel/semaphore.c +++ b/arch/sparc64/kernel/semaphore.c @@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr) " add %1, %4, %1\n" " cas [%3], %0, %1\n" " cmp %0, %1\n" +" membar #StoreLoad | #StoreStore\n" " bne,pn %%icc, 1b\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (&sem->count), "r" (incr), "m" (sem->count) : "cc"); @@ -71,8 +72,9 @@ void up(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " addcc %%g7, 1, %%g0\n" +" membar #StoreLoad | #StoreStore\n" " ble,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %0, %%g1\n" @@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" +" membar #StoreLoad | #StoreStore\n" " bl,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %0, %%g1\n" @@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" +" membar #StoreLoad | #StoreStore\n" " bl,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %2, %%g1\n" diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 2c8f9344b4ee..3a145fc39cf2 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -98,8 +98,9 @@ startup_continue: sethi %hi(prom_entry_lock), %g2 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 + membar #StoreLoad | #StoreStore brnz,pn %g1, 1b - membar #StoreLoad | #StoreStore + nop sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 -- cgit v1.2.2 From 63b614522cba5a015923c0e8f284be6e01c13f1a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 27 Jun 2005 17:04:45 -0700 Subject: [SPARC64]: Get rid of fast IRQ feature. The only real user was the assembler floppy interrupt handler, which does not need to be in assembly. This makes it so that there are less pieces of code which know about the internal layout of ivector_table[] and friends. Signed-off-by: David S. Miller --- arch/sparc64/kernel/auxio.c | 2 +- arch/sparc64/kernel/entry.S | 110 ----------------------- arch/sparc64/kernel/irq.c | 171 +++++++++++------------------------- arch/sparc64/kernel/sparc64_ksyms.c | 1 - 4 files changed, 51 insertions(+), 233 deletions(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/auxio.c b/arch/sparc64/kernel/auxio.c index a0716ccc2f4a..8852c20c8d99 100644 --- a/arch/sparc64/kernel/auxio.c +++ b/arch/sparc64/kernel/auxio.c @@ -16,7 +16,7 @@ #include #include -/* This cannot be static, as it is referenced in entry.S */ +/* This cannot be static, as it is referenced in irq.c */ void __iomem *auxio_register = NULL; enum auxio_type { diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index ffe717ab7f83..eee516a71c14 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -701,116 +701,6 @@ utrap_ill: ba,pt %xcc, rtrap clr %l6 -#ifdef CONFIG_BLK_DEV_FD - .globl floppy_hardint -floppy_hardint: - wr %g0, (1 << 11), %clear_softint - sethi %hi(doing_pdma), %g1 - ld [%g1 + %lo(doing_pdma)], %g2 - brz,pn %g2, floppy_dosoftint - sethi %hi(fdc_status), %g3 - ldx [%g3 + %lo(fdc_status)], %g3 - sethi %hi(pdma_vaddr), %g5 - ldx [%g5 + %lo(pdma_vaddr)], %g4 - sethi %hi(pdma_size), %g5 - ldx [%g5 + %lo(pdma_size)], %g5 - -next_byte: - lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7 - andcc %g7, 0x80, %g0 - be,pn %icc, floppy_fifo_emptied - andcc %g7, 0x20, %g0 - be,pn %icc, floppy_overrun - andcc %g7, 0x40, %g0 - be,pn %icc, floppy_write - sub %g5, 1, %g5 - - inc %g3 - lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7 - dec %g3 - orcc %g0, %g5, %g0 - stb %g7, [%g4] - bne,pn %xcc, next_byte - add %g4, 1, %g4 - - b,pt %xcc, floppy_tdone - nop - -floppy_write: - ldub [%g4], %g7 - orcc %g0, %g5, %g0 - inc %g3 - stba %g7, [%g3] ASI_PHYS_BYPASS_EC_E - dec %g3 - bne,pn %xcc, next_byte - add %g4, 1, %g4 - -floppy_tdone: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(auxio_register), %g1 - ldx [%g1 + %lo(auxio_register)], %g7 - lduba [%g7] ASI_PHYS_BYPASS_EC_E, %g5 - or %g5, AUXIO_AUX1_FTCNT, %g5 -/* andn %g5, AUXIO_AUX1_MASK, %g5 */ - stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E - andn %g5, AUXIO_AUX1_FTCNT, %g5 -/* andn %g5, AUXIO_AUX1_MASK, %g5 */ - - nop; nop; nop; nop; nop; nop; - nop; nop; nop; nop; nop; nop; - - stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E - sethi %hi(doing_pdma), %g1 - b,pt %xcc, floppy_dosoftint - st %g0, [%g1 + %lo(doing_pdma)] - -floppy_fifo_emptied: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(irq_action), %g1 - or %g1, %lo(irq_action), %g1 - ldx [%g1 + (11 << 3)], %g3 ! irqaction[floppy_irq] - ldx [%g3 + 0x08], %g4 ! action->flags>>48==ino - sethi %hi(ivector_table), %g3 - srlx %g4, 48, %g4 - or %g3, %lo(ivector_table), %g3 - sllx %g4, 5, %g4 - ldx [%g3 + %g4], %g4 ! &ivector_table[ino] - ldx [%g4 + 0x10], %g4 ! bucket->iclr - stwa %g0, [%g4] ASI_PHYS_BYPASS_EC_E ! ICLR_IDLE - membar #Sync ! probably not needed... - retry - -floppy_overrun: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(doing_pdma), %g1 - st %g0, [%g1 + %lo(doing_pdma)] - -floppy_dosoftint: - rdpr %pil, %g2 - wrpr %g0, 15, %pil - sethi %hi(109f), %g7 - b,pt %xcc, etrap_irq -109: or %g7, %lo(109b), %g7 - - mov 11, %o0 - mov 0, %o1 - call sparc_floppy_irq - add %sp, PTREGS_OFF, %o2 - - b,pt %xcc, rtrap_irq - nop - -#endif /* CONFIG_BLK_DEV_FD */ - /* XXX Here is stuff we still need to write... -DaveM XXX */ .globl netbsd_syscall netbsd_syscall: diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 4dcb8af94090..424712577307 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_SMP static void distribute_irqs(void); @@ -834,137 +835,65 @@ void handler_irq(int irq, struct pt_regs *regs) } #ifdef CONFIG_BLK_DEV_FD -extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs); +extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);; -void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) -{ - struct irqaction *action = *(irq + irq_action); - struct ino_bucket *bucket; - int cpu = smp_processor_id(); - - irq_enter(); - kstat_this_cpu.irqs[irq]++; - - *(irq_work(cpu, irq)) = 0; - bucket = get_ino_in_irqaction(action) + ivector_table; - - bucket->flags |= IBF_INPROGRESS; - - floppy_interrupt(irq, dev_cookie, regs); - upa_writel(ICLR_IDLE, bucket->iclr); - - bucket->flags &= ~IBF_INPROGRESS; - - irq_exit(); -} -#endif - -/* The following assumes that the branch lies before the place we - * are branching to. This is the case for a trap vector... - * You have been warned. - */ -#define SPARC_BRANCH(dest_addr, inst_addr) \ - (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff)) - -#define SPARC_NOP (0x01000000) +/* XXX No easy way to include asm/floppy.h XXX */ +extern unsigned char *pdma_vaddr; +extern unsigned long pdma_size; +extern volatile int doing_pdma; +extern unsigned long fdc_status; -static void install_fast_irq(unsigned int cpu_irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *)) +irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) { - extern unsigned long sparc64_ttable_tl0; - unsigned long ttent = (unsigned long) &sparc64_ttable_tl0; - unsigned int *insns; - - ttent += 0x820; - ttent += (cpu_irq - 1) << 5; - insns = (unsigned int *) ttent; - insns[0] = SPARC_BRANCH(((unsigned long) handler), - ((unsigned long)&insns[0])); - insns[1] = SPARC_NOP; - __asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent)); -} - -int request_fast_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, const char *name, void *dev_id) -{ - struct irqaction *action; - struct ino_bucket *bucket = __bucket(irq); - unsigned long flags; - - /* No pil0 dummy buckets allowed here. */ - if (bucket < &ivector_table[0] || - bucket >= &ivector_table[NUM_IVECS]) { - unsigned int *caller; - - __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); - printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt " - "from %p, irq %08x.\n", caller, irq); - return -EINVAL; - } - - if (!handler) - return -EINVAL; + if (likely(doing_pdma)) { + void __iomem *stat = (void __iomem *) fdc_status; + unsigned char *vaddr = pdma_vaddr; + unsigned long size = pdma_size; + u8 val; + + while (size) { + val = readb(stat); + if (unlikely(!(val & 0x80))) { + pdma_vaddr = vaddr; + pdma_size = size; + return IRQ_HANDLED; + } + if (unlikely(!(val & 0x20))) { + pdma_vaddr = vaddr; + pdma_size = size; + doing_pdma = 0; + goto main_interrupt; + } + if (val & 0x40) { + /* read */ + *vaddr++ = readb(stat + 1); + } else { + unsigned char data = *vaddr++; - if ((bucket->pil == 0) || (bucket->pil == 14)) { - printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n"); - return -EBUSY; - } + /* write */ + writeb(data, stat + 1); + } + size--; + } - spin_lock_irqsave(&irq_action_lock, flags); + pdma_vaddr = vaddr; + pdma_size = size; - action = *(bucket->pil + irq_action); - if (action) { - if (action->flags & SA_SHIRQ) - panic("Trying to register fast irq when already shared.\n"); - if (irqflags & SA_SHIRQ) - panic("Trying to register fast irq as shared.\n"); - printk("request_fast_irq: Trying to register yet already owned.\n"); - spin_unlock_irqrestore(&irq_action_lock, flags); - return -EBUSY; - } + /* Send Terminal Count pulse to floppy controller. */ + val = readb(auxio_register); + val |= AUXIO_AUX1_FTCNT; + writeb(val, auxio_register); + val &= AUXIO_AUX1_FTCNT; + writeb(val, auxio_register); - /* - * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we - * support smp intr affinity in this path. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed " - "using kmalloc\n", bucket->pil, name); - } - if (action == NULL) - action = (struct irqaction *)kmalloc(sizeof(struct irqaction), - GFP_ATOMIC); - if (!action) { - spin_unlock_irqrestore(&irq_action_lock, flags); - return -ENOMEM; + doing_pdma = 0; } - install_fast_irq(bucket->pil, handler); - bucket->irq_info = action; - bucket->flags |= IBF_ACTIVE; - - action->handler = handler; - action->flags = irqflags; - action->dev_id = NULL; - action->name = name; - action->next = NULL; - put_ino_in_irqaction(action, irq); - put_smpaff_in_irqaction(action, CPU_MASK_NONE); - - *(bucket->pil + irq_action) = action; - enable_irq(irq); - - spin_unlock_irqrestore(&irq_action_lock, flags); - -#ifdef CONFIG_SMP - distribute_irqs(); -#endif - return 0; +main_interrupt: + return floppy_interrupt(irq, dev_cookie, regs); } +EXPORT_SYMBOL(sparc_floppy_irq); +#endif /* We really don't need these at all on the Sparc. We only have * stubs here because they are exported to modules. diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index e78cc53594fa..56cd96f4a5cd 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -227,7 +227,6 @@ EXPORT_SYMBOL(__flush_dcache_range); EXPORT_SYMBOL(mostek_lock); EXPORT_SYMBOL(mstk48t02_regs); -EXPORT_SYMBOL(request_fast_irq); #ifdef CONFIG_SUN_AUXIO EXPORT_SYMBOL(auxio_set_led); EXPORT_SYMBOL(auxio_set_lte); -- cgit v1.2.2