aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-27 23:10:18 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-27 23:10:18 -0400
commit6e5a32754c67f0d156c2f196d604b2e9129a1fd5 (patch)
tree2dc77bbae2b2896c6435dc25c013d5d053fba591
parentb6d00f0de9e932e2884b3b7af8e43c0a61a271ee (diff)
parent63b614522cba5a015923c0e8f284be6e01c13f1a (diff)
Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
-rw-r--r--arch/sparc64/kernel/auxio.c2
-rw-r--r--arch/sparc64/kernel/entry.S116
-rw-r--r--arch/sparc64/kernel/irq.c171
-rw-r--r--arch/sparc64/kernel/semaphore.c12
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c1
-rw-r--r--arch/sparc64/kernel/trampoline.S3
-rw-r--r--arch/sparc64/lib/U1memcpy.S103
-rw-r--r--arch/sparc64/lib/VISsave.S15
-rw-r--r--arch/sparc64/lib/atomic.S42
-rw-r--r--arch/sparc64/lib/bitops.S31
-rw-r--r--arch/sparc64/lib/debuglocks.c6
-rw-r--r--arch/sparc64/lib/dec_and_lock.S6
-rw-r--r--arch/sparc64/lib/rwsem.S15
-rw-r--r--arch/sparc64/mm/init.c6
-rw-r--r--arch/sparc64/mm/ultra.S3
-rw-r--r--include/asm-sparc64/auxio.h2
-rw-r--r--include/asm-sparc64/floppy.h16
-rw-r--r--include/asm-sparc64/irq.h7
-rw-r--r--include/asm-sparc64/rwsem.h3
-rw-r--r--include/asm-sparc64/spinlock.h29
-rw-r--r--include/asm-sparc64/spitfire.h1
21 files changed, 233 insertions, 357 deletions
diff --git a/arch/sparc64/kernel/auxio.c b/arch/sparc64/kernel/auxio.c
index a0716ccc2f4..8852c20c8d9 100644
--- a/arch/sparc64/kernel/auxio.c
+++ b/arch/sparc64/kernel/auxio.c
@@ -16,7 +16,7 @@
16#include <asm/ebus.h> 16#include <asm/ebus.h>
17#include <asm/auxio.h> 17#include <asm/auxio.h>
18 18
19/* This cannot be static, as it is referenced in entry.S */ 19/* This cannot be static, as it is referenced in irq.c */
20void __iomem *auxio_register = NULL; 20void __iomem *auxio_register = NULL;
21 21
22enum auxio_type { 22enum auxio_type {
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index a47f2d0b1a2..eee516a71c1 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
271 fmuld %f0, %f2, %f26 271 fmuld %f0, %f2, %f26
272 faddd %f0, %f2, %f28 272 faddd %f0, %f2, %f28
273 fmuld %f0, %f2, %f30 273 fmuld %f0, %f2, %f30
274 membar #Sync
274 b,pt %xcc, fpdis_exit 275 b,pt %xcc, fpdis_exit
275 membar #Sync 276 nop
2762: andcc %g5, FPRS_DU, %g0 2772: andcc %g5, FPRS_DU, %g0
277 bne,pt %icc, 3f 278 bne,pt %icc, 3f
278 fzero %f32 279 fzero %f32
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
301 fmuld %f32, %f34, %f58 302 fmuld %f32, %f34, %f58
302 faddd %f32, %f34, %f60 303 faddd %f32, %f34, %f60
303 fmuld %f32, %f34, %f62 304 fmuld %f32, %f34, %f62
305 membar #Sync
304 ba,pt %xcc, fpdis_exit 306 ba,pt %xcc, fpdis_exit
305 membar #Sync 307 nop
3063: mov SECONDARY_CONTEXT, %g3 3083: mov SECONDARY_CONTEXT, %g3
307 add %g6, TI_FPREGS, %g1 309 add %g6, TI_FPREGS, %g1
308 ldxa [%g3] ASI_DMMU, %g5 310 ldxa [%g3] ASI_DMMU, %g5
@@ -699,116 +701,6 @@ utrap_ill:
699 ba,pt %xcc, rtrap 701 ba,pt %xcc, rtrap
700 clr %l6 702 clr %l6
701 703
702#ifdef CONFIG_BLK_DEV_FD
703 .globl floppy_hardint
704floppy_hardint:
705 wr %g0, (1 << 11), %clear_softint
706 sethi %hi(doing_pdma), %g1
707 ld [%g1 + %lo(doing_pdma)], %g2
708 brz,pn %g2, floppy_dosoftint
709 sethi %hi(fdc_status), %g3
710 ldx [%g3 + %lo(fdc_status)], %g3
711 sethi %hi(pdma_vaddr), %g5
712 ldx [%g5 + %lo(pdma_vaddr)], %g4
713 sethi %hi(pdma_size), %g5
714 ldx [%g5 + %lo(pdma_size)], %g5
715
716next_byte:
717 lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7
718 andcc %g7, 0x80, %g0
719 be,pn %icc, floppy_fifo_emptied
720 andcc %g7, 0x20, %g0
721 be,pn %icc, floppy_overrun
722 andcc %g7, 0x40, %g0
723 be,pn %icc, floppy_write
724 sub %g5, 1, %g5
725
726 inc %g3
727 lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7
728 dec %g3
729 orcc %g0, %g5, %g0
730 stb %g7, [%g4]
731 bne,pn %xcc, next_byte
732 add %g4, 1, %g4
733
734 b,pt %xcc, floppy_tdone
735 nop
736
737floppy_write:
738 ldub [%g4], %g7
739 orcc %g0, %g5, %g0
740 inc %g3
741 stba %g7, [%g3] ASI_PHYS_BYPASS_EC_E
742 dec %g3
743 bne,pn %xcc, next_byte
744 add %g4, 1, %g4
745
746floppy_tdone:
747 sethi %hi(pdma_vaddr), %g1
748 stx %g4, [%g1 + %lo(pdma_vaddr)]
749 sethi %hi(pdma_size), %g1
750 stx %g5, [%g1 + %lo(pdma_size)]
751 sethi %hi(auxio_register), %g1
752 ldx [%g1 + %lo(auxio_register)], %g7
753 lduba [%g7] ASI_PHYS_BYPASS_EC_E, %g5
754 or %g5, AUXIO_AUX1_FTCNT, %g5
755/* andn %g5, AUXIO_AUX1_MASK, %g5 */
756 stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E
757 andn %g5, AUXIO_AUX1_FTCNT, %g5
758/* andn %g5, AUXIO_AUX1_MASK, %g5 */
759
760 nop; nop; nop; nop; nop; nop;
761 nop; nop; nop; nop; nop; nop;
762
763 stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E
764 sethi %hi(doing_pdma), %g1
765 b,pt %xcc, floppy_dosoftint
766 st %g0, [%g1 + %lo(doing_pdma)]
767
768floppy_fifo_emptied:
769 sethi %hi(pdma_vaddr), %g1
770 stx %g4, [%g1 + %lo(pdma_vaddr)]
771 sethi %hi(pdma_size), %g1
772 stx %g5, [%g1 + %lo(pdma_size)]
773 sethi %hi(irq_action), %g1
774 or %g1, %lo(irq_action), %g1
775 ldx [%g1 + (11 << 3)], %g3 ! irqaction[floppy_irq]
776 ldx [%g3 + 0x08], %g4 ! action->flags>>48==ino
777 sethi %hi(ivector_table), %g3
778 srlx %g4, 48, %g4
779 or %g3, %lo(ivector_table), %g3
780 sllx %g4, 5, %g4
781 ldx [%g3 + %g4], %g4 ! &ivector_table[ino]
782 ldx [%g4 + 0x10], %g4 ! bucket->iclr
783 stwa %g0, [%g4] ASI_PHYS_BYPASS_EC_E ! ICLR_IDLE
784 membar #Sync ! probably not needed...
785 retry
786
787floppy_overrun:
788 sethi %hi(pdma_vaddr), %g1
789 stx %g4, [%g1 + %lo(pdma_vaddr)]
790 sethi %hi(pdma_size), %g1
791 stx %g5, [%g1 + %lo(pdma_size)]
792 sethi %hi(doing_pdma), %g1
793 st %g0, [%g1 + %lo(doing_pdma)]
794
795floppy_dosoftint:
796 rdpr %pil, %g2
797 wrpr %g0, 15, %pil
798 sethi %hi(109f), %g7
799 b,pt %xcc, etrap_irq
800109: or %g7, %lo(109b), %g7
801
802 mov 11, %o0
803 mov 0, %o1
804 call sparc_floppy_irq
805 add %sp, PTREGS_OFF, %o2
806
807 b,pt %xcc, rtrap_irq
808 nop
809
810#endif /* CONFIG_BLK_DEV_FD */
811
812 /* XXX Here is stuff we still need to write... -DaveM XXX */ 704 /* XXX Here is stuff we still need to write... -DaveM XXX */
813 .globl netbsd_syscall 705 .globl netbsd_syscall
814netbsd_syscall: 706netbsd_syscall:
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 4dcb8af9409..42471257730 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -37,6 +37,7 @@
37#include <asm/uaccess.h> 37#include <asm/uaccess.h>
38#include <asm/cache.h> 38#include <asm/cache.h>
39#include <asm/cpudata.h> 39#include <asm/cpudata.h>
40#include <asm/auxio.h>
40 41
41#ifdef CONFIG_SMP 42#ifdef CONFIG_SMP
42static void distribute_irqs(void); 43static void distribute_irqs(void);
@@ -834,137 +835,65 @@ void handler_irq(int irq, struct pt_regs *regs)
834} 835}
835 836
836#ifdef CONFIG_BLK_DEV_FD 837#ifdef CONFIG_BLK_DEV_FD
837extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs); 838extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);;
838 839
839void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) 840/* XXX No easy way to include asm/floppy.h XXX */
840{ 841extern unsigned char *pdma_vaddr;
841 struct irqaction *action = *(irq + irq_action); 842extern unsigned long pdma_size;
842 struct ino_bucket *bucket; 843extern volatile int doing_pdma;
843 int cpu = smp_processor_id(); 844extern unsigned long fdc_status;
844
845 irq_enter();
846 kstat_this_cpu.irqs[irq]++;
847
848 *(irq_work(cpu, irq)) = 0;
849 bucket = get_ino_in_irqaction(action) + ivector_table;
850
851 bucket->flags |= IBF_INPROGRESS;
852
853 floppy_interrupt(irq, dev_cookie, regs);
854 upa_writel(ICLR_IDLE, bucket->iclr);
855
856 bucket->flags &= ~IBF_INPROGRESS;
857
858 irq_exit();
859}
860#endif
861
862/* The following assumes that the branch lies before the place we
863 * are branching to. This is the case for a trap vector...
864 * You have been warned.
865 */
866#define SPARC_BRANCH(dest_addr, inst_addr) \
867 (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff))
868
869#define SPARC_NOP (0x01000000)
870 845
871static void install_fast_irq(unsigned int cpu_irq, 846irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
872 irqreturn_t (*handler)(int, void *, struct pt_regs *))
873{ 847{
874 extern unsigned long sparc64_ttable_tl0; 848 if (likely(doing_pdma)) {
875 unsigned long ttent = (unsigned long) &sparc64_ttable_tl0; 849 void __iomem *stat = (void __iomem *) fdc_status;
876 unsigned int *insns; 850 unsigned char *vaddr = pdma_vaddr;
877 851 unsigned long size = pdma_size;
878 ttent += 0x820; 852 u8 val;
879 ttent += (cpu_irq - 1) << 5; 853
880 insns = (unsigned int *) ttent; 854 while (size) {
881 insns[0] = SPARC_BRANCH(((unsigned long) handler), 855 val = readb(stat);
882 ((unsigned long)&insns[0])); 856 if (unlikely(!(val & 0x80))) {
883 insns[1] = SPARC_NOP; 857 pdma_vaddr = vaddr;
884 __asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent)); 858 pdma_size = size;
885} 859 return IRQ_HANDLED;
886 860 }
887int request_fast_irq(unsigned int irq, 861 if (unlikely(!(val & 0x20))) {
888 irqreturn_t (*handler)(int, void *, struct pt_regs *), 862 pdma_vaddr = vaddr;
889 unsigned long irqflags, const char *name, void *dev_id) 863 pdma_size = size;
890{ 864 doing_pdma = 0;
891 struct irqaction *action; 865 goto main_interrupt;
892 struct ino_bucket *bucket = __bucket(irq); 866 }
893 unsigned long flags; 867 if (val & 0x40) {
894 868 /* read */
895 /* No pil0 dummy buckets allowed here. */ 869 *vaddr++ = readb(stat + 1);
896 if (bucket < &ivector_table[0] || 870 } else {
897 bucket >= &ivector_table[NUM_IVECS]) { 871 unsigned char data = *vaddr++;
898 unsigned int *caller;
899
900 __asm__ __volatile__("mov %%i7, %0" : "=r" (caller));
901 printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt "
902 "from %p, irq %08x.\n", caller, irq);
903 return -EINVAL;
904 }
905
906 if (!handler)
907 return -EINVAL;
908 872
909 if ((bucket->pil == 0) || (bucket->pil == 14)) { 873 /* write */
910 printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n"); 874 writeb(data, stat + 1);
911 return -EBUSY; 875 }
912 } 876 size--;
877 }
913 878
914 spin_lock_irqsave(&irq_action_lock, flags); 879 pdma_vaddr = vaddr;
880 pdma_size = size;
915 881
916 action = *(bucket->pil + irq_action); 882 /* Send Terminal Count pulse to floppy controller. */
917 if (action) { 883 val = readb(auxio_register);
918 if (action->flags & SA_SHIRQ) 884 val |= AUXIO_AUX1_FTCNT;
919 panic("Trying to register fast irq when already shared.\n"); 885 writeb(val, auxio_register);
920 if (irqflags & SA_SHIRQ) 886 val &= AUXIO_AUX1_FTCNT;
921 panic("Trying to register fast irq as shared.\n"); 887 writeb(val, auxio_register);
922 printk("request_fast_irq: Trying to register yet already owned.\n");
923 spin_unlock_irqrestore(&irq_action_lock, flags);
924 return -EBUSY;
925 }
926 888
927 /* 889 doing_pdma = 0;
928 * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we
929 * support smp intr affinity in this path.
930 */
931 if (irqflags & SA_STATIC_ALLOC) {
932 if (static_irq_count < MAX_STATIC_ALLOC)
933 action = &static_irqaction[static_irq_count++];
934 else
935 printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed "
936 "using kmalloc\n", bucket->pil, name);
937 }
938 if (action == NULL)
939 action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
940 GFP_ATOMIC);
941 if (!action) {
942 spin_unlock_irqrestore(&irq_action_lock, flags);
943 return -ENOMEM;
944 } 890 }
945 install_fast_irq(bucket->pil, handler);
946 891
947 bucket->irq_info = action; 892main_interrupt:
948 bucket->flags |= IBF_ACTIVE; 893 return floppy_interrupt(irq, dev_cookie, regs);
949
950 action->handler = handler;
951 action->flags = irqflags;
952 action->dev_id = NULL;
953 action->name = name;
954 action->next = NULL;
955 put_ino_in_irqaction(action, irq);
956 put_smpaff_in_irqaction(action, CPU_MASK_NONE);
957
958 *(bucket->pil + irq_action) = action;
959 enable_irq(irq);
960
961 spin_unlock_irqrestore(&irq_action_lock, flags);
962
963#ifdef CONFIG_SMP
964 distribute_irqs();
965#endif
966 return 0;
967} 894}
895EXPORT_SYMBOL(sparc_floppy_irq);
896#endif
968 897
969/* We really don't need these at all on the Sparc. We only have 898/* We really don't need these at all on the Sparc. We only have
970 * stubs here because they are exported to modules. 899 * stubs here because they are exported to modules.
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
index 63496c43fe1..a809e63f03e 100644
--- a/arch/sparc64/kernel/semaphore.c
+++ b/arch/sparc64/kernel/semaphore.c
@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
32" add %1, %4, %1\n" 32" add %1, %4, %1\n"
33" cas [%3], %0, %1\n" 33" cas [%3], %0, %1\n"
34" cmp %0, %1\n" 34" cmp %0, %1\n"
35" membar #StoreLoad | #StoreStore\n"
35" bne,pn %%icc, 1b\n" 36" bne,pn %%icc, 1b\n"
36" membar #StoreLoad | #StoreStore\n" 37" nop\n"
37 : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) 38 : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
38 : "r" (&sem->count), "r" (incr), "m" (sem->count) 39 : "r" (&sem->count), "r" (incr), "m" (sem->count)
39 : "cc"); 40 : "cc");
@@ -71,8 +72,9 @@ void up(struct semaphore *sem)
71" cmp %%g1, %%g7\n" 72" cmp %%g1, %%g7\n"
72" bne,pn %%icc, 1b\n" 73" bne,pn %%icc, 1b\n"
73" addcc %%g7, 1, %%g0\n" 74" addcc %%g7, 1, %%g0\n"
75" membar #StoreLoad | #StoreStore\n"
74" ble,pn %%icc, 3f\n" 76" ble,pn %%icc, 3f\n"
75" membar #StoreLoad | #StoreStore\n" 77" nop\n"
76"2:\n" 78"2:\n"
77" .subsection 2\n" 79" .subsection 2\n"
78"3: mov %0, %%g1\n" 80"3: mov %0, %%g1\n"
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
128" cmp %%g1, %%g7\n" 130" cmp %%g1, %%g7\n"
129" bne,pn %%icc, 1b\n" 131" bne,pn %%icc, 1b\n"
130" cmp %%g7, 1\n" 132" cmp %%g7, 1\n"
133" membar #StoreLoad | #StoreStore\n"
131" bl,pn %%icc, 3f\n" 134" bl,pn %%icc, 3f\n"
132" membar #StoreLoad | #StoreStore\n" 135" nop\n"
133"2:\n" 136"2:\n"
134" .subsection 2\n" 137" .subsection 2\n"
135"3: mov %0, %%g1\n" 138"3: mov %0, %%g1\n"
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
233" cmp %%g1, %%g7\n" 236" cmp %%g1, %%g7\n"
234" bne,pn %%icc, 1b\n" 237" bne,pn %%icc, 1b\n"
235" cmp %%g7, 1\n" 238" cmp %%g7, 1\n"
239" membar #StoreLoad | #StoreStore\n"
236" bl,pn %%icc, 3f\n" 240" bl,pn %%icc, 3f\n"
237" membar #StoreLoad | #StoreStore\n" 241" nop\n"
238"2:\n" 242"2:\n"
239" .subsection 2\n" 243" .subsection 2\n"
240"3: mov %2, %%g1\n" 244"3: mov %2, %%g1\n"
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index e78cc53594f..56cd96f4a5c 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -227,7 +227,6 @@ EXPORT_SYMBOL(__flush_dcache_range);
227 227
228EXPORT_SYMBOL(mostek_lock); 228EXPORT_SYMBOL(mostek_lock);
229EXPORT_SYMBOL(mstk48t02_regs); 229EXPORT_SYMBOL(mstk48t02_regs);
230EXPORT_SYMBOL(request_fast_irq);
231#ifdef CONFIG_SUN_AUXIO 230#ifdef CONFIG_SUN_AUXIO
232EXPORT_SYMBOL(auxio_set_led); 231EXPORT_SYMBOL(auxio_set_led);
233EXPORT_SYMBOL(auxio_set_lte); 232EXPORT_SYMBOL(auxio_set_lte);
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 2c8f9344b4e..3a145fc39cf 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -98,8 +98,9 @@ startup_continue:
98 98
99 sethi %hi(prom_entry_lock), %g2 99 sethi %hi(prom_entry_lock), %g2
1001: ldstub [%g2 + %lo(prom_entry_lock)], %g1 1001: ldstub [%g2 + %lo(prom_entry_lock)], %g1
101 membar #StoreLoad | #StoreStore
101 brnz,pn %g1, 1b 102 brnz,pn %g1, 1b
102 membar #StoreLoad | #StoreStore 103 nop
103 104
104 sethi %hi(p1275buf), %g2 105 sethi %hi(p1275buf), %g2
105 or %g2, %lo(p1275buf), %g2 106 or %g2, %lo(p1275buf), %g2
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S
index da9b520c718..bafd2fc07ac 100644
--- a/arch/sparc64/lib/U1memcpy.S
+++ b/arch/sparc64/lib/U1memcpy.S
@@ -87,14 +87,17 @@
87#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 87#define LOOP_CHUNK3(src, dest, len, branch_dest) \
88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
89 89
90#define DO_SYNC membar #Sync;
90#define STORE_SYNC(dest, fsrc) \ 91#define STORE_SYNC(dest, fsrc) \
91 EX_ST(STORE_BLK(%fsrc, %dest)); \ 92 EX_ST(STORE_BLK(%fsrc, %dest)); \
92 add %dest, 0x40, %dest; 93 add %dest, 0x40, %dest; \
94 DO_SYNC
93 95
94#define STORE_JUMP(dest, fsrc, target) \ 96#define STORE_JUMP(dest, fsrc, target) \
95 EX_ST(STORE_BLK(%fsrc, %dest)); \ 97 EX_ST(STORE_BLK(%fsrc, %dest)); \
96 add %dest, 0x40, %dest; \ 98 add %dest, 0x40, %dest; \
97 ba,pt %xcc, target; 99 ba,pt %xcc, target; \
100 nop;
98 101
99#define FINISH_VISCHUNK(dest, f0, f1, left) \ 102#define FINISH_VISCHUNK(dest, f0, f1, left) \
100 subcc %left, 8, %left;\ 103 subcc %left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
239 ba,pt %xcc, 1b+4 242 ba,pt %xcc, 1b+4
240 faligndata %f0, %f2, %f48 243 faligndata %f0, %f2, %f48
2411: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 2441: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
242 STORE_SYNC(o0, f48) membar #Sync 245 STORE_SYNC(o0, f48)
243 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 246 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
244 STORE_JUMP(o0, f48, 40f) membar #Sync 247 STORE_JUMP(o0, f48, 40f)
2452: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 2482: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
246 STORE_SYNC(o0, f48) membar #Sync 249 STORE_SYNC(o0, f48)
247 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 250 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
248 STORE_JUMP(o0, f48, 48f) membar #Sync 251 STORE_JUMP(o0, f48, 48f)
2493: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 2523: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
250 STORE_SYNC(o0, f48) membar #Sync 253 STORE_SYNC(o0, f48)
251 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 254 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
252 STORE_JUMP(o0, f48, 56f) membar #Sync 255 STORE_JUMP(o0, f48, 56f)
253 256
2541: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2571: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
255 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 258 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
260 ba,pt %xcc, 1b+4 263 ba,pt %xcc, 1b+4
261 faligndata %f2, %f4, %f48 264 faligndata %f2, %f4, %f48
2621: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 2651: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
263 STORE_SYNC(o0, f48) membar #Sync 266 STORE_SYNC(o0, f48)
264 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 267 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
265 STORE_JUMP(o0, f48, 41f) membar #Sync 268 STORE_JUMP(o0, f48, 41f)
2662: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 2692: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
267 STORE_SYNC(o0, f48) membar #Sync 270 STORE_SYNC(o0, f48)
268 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 271 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
269 STORE_JUMP(o0, f48, 49f) membar #Sync 272 STORE_JUMP(o0, f48, 49f)
2703: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2733: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
271 STORE_SYNC(o0, f48) membar #Sync 274 STORE_SYNC(o0, f48)
272 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 275 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
273 STORE_JUMP(o0, f48, 57f) membar #Sync 276 STORE_JUMP(o0, f48, 57f)
274 277
2751: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2781: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
276 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 279 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
281 ba,pt %xcc, 1b+4 284 ba,pt %xcc, 1b+4
282 faligndata %f4, %f6, %f48 285 faligndata %f4, %f6, %f48
2831: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 2861: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
284 STORE_SYNC(o0, f48) membar #Sync 287 STORE_SYNC(o0, f48)
285 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 288 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
286 STORE_JUMP(o0, f48, 42f) membar #Sync 289 STORE_JUMP(o0, f48, 42f)
2872: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 2902: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
288 STORE_SYNC(o0, f48) membar #Sync 291 STORE_SYNC(o0, f48)
289 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 292 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
290 STORE_JUMP(o0, f48, 50f) membar #Sync 293 STORE_JUMP(o0, f48, 50f)
2913: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2943: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
292 STORE_SYNC(o0, f48) membar #Sync 295 STORE_SYNC(o0, f48)
293 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 296 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
294 STORE_JUMP(o0, f48, 58f) membar #Sync 297 STORE_JUMP(o0, f48, 58f)
295 298
2961: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 2991: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
297 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 300 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
302 ba,pt %xcc, 1b+4 305 ba,pt %xcc, 1b+4
303 faligndata %f6, %f8, %f48 306 faligndata %f6, %f8, %f48
3041: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 3071: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
305 STORE_SYNC(o0, f48) membar #Sync 308 STORE_SYNC(o0, f48)
306 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 309 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
307 STORE_JUMP(o0, f48, 43f) membar #Sync 310 STORE_JUMP(o0, f48, 43f)
3082: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 3112: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
309 STORE_SYNC(o0, f48) membar #Sync 312 STORE_SYNC(o0, f48)
310 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 313 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
311 STORE_JUMP(o0, f48, 51f) membar #Sync 314 STORE_JUMP(o0, f48, 51f)
3123: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 3153: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
313 STORE_SYNC(o0, f48) membar #Sync 316 STORE_SYNC(o0, f48)
314 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 317 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
315 STORE_JUMP(o0, f48, 59f) membar #Sync 318 STORE_JUMP(o0, f48, 59f)
316 319
3171: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3201: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
318 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 321 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
323 ba,pt %xcc, 1b+4 326 ba,pt %xcc, 1b+4
324 faligndata %f8, %f10, %f48 327 faligndata %f8, %f10, %f48
3251: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 3281: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
326 STORE_SYNC(o0, f48) membar #Sync 329 STORE_SYNC(o0, f48)
327 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 330 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
328 STORE_JUMP(o0, f48, 44f) membar #Sync 331 STORE_JUMP(o0, f48, 44f)
3292: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 3322: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
330 STORE_SYNC(o0, f48) membar #Sync 333 STORE_SYNC(o0, f48)
331 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 334 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
332 STORE_JUMP(o0, f48, 52f) membar #Sync 335 STORE_JUMP(o0, f48, 52f)
3333: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3363: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
334 STORE_SYNC(o0, f48) membar #Sync 337 STORE_SYNC(o0, f48)
335 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 338 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
336 STORE_JUMP(o0, f48, 60f) membar #Sync 339 STORE_JUMP(o0, f48, 60f)
337 340
3381: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3411: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
339 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 342 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
344 ba,pt %xcc, 1b+4 347 ba,pt %xcc, 1b+4
345 faligndata %f10, %f12, %f48 348 faligndata %f10, %f12, %f48
3461: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 3491: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
347 STORE_SYNC(o0, f48) membar #Sync 350 STORE_SYNC(o0, f48)
348 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 351 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
349 STORE_JUMP(o0, f48, 45f) membar #Sync 352 STORE_JUMP(o0, f48, 45f)
3502: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 3532: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
351 STORE_SYNC(o0, f48) membar #Sync 354 STORE_SYNC(o0, f48)
352 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 355 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
353 STORE_JUMP(o0, f48, 53f) membar #Sync 356 STORE_JUMP(o0, f48, 53f)
3543: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3573: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
355 STORE_SYNC(o0, f48) membar #Sync 358 STORE_SYNC(o0, f48)
356 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 359 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
357 STORE_JUMP(o0, f48, 61f) membar #Sync 360 STORE_JUMP(o0, f48, 61f)
358 361
3591: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3621: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
360 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 363 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
365 ba,pt %xcc, 1b+4 368 ba,pt %xcc, 1b+4
366 faligndata %f12, %f14, %f48 369 faligndata %f12, %f14, %f48
3671: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 3701: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
368 STORE_SYNC(o0, f48) membar #Sync 371 STORE_SYNC(o0, f48)
369 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 372 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
370 STORE_JUMP(o0, f48, 46f) membar #Sync 373 STORE_JUMP(o0, f48, 46f)
3712: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 3742: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
372 STORE_SYNC(o0, f48) membar #Sync 375 STORE_SYNC(o0, f48)
373 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 376 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
374 STORE_JUMP(o0, f48, 54f) membar #Sync 377 STORE_JUMP(o0, f48, 54f)
3753: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3783: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
376 STORE_SYNC(o0, f48) membar #Sync 379 STORE_SYNC(o0, f48)
377 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 380 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
378 STORE_JUMP(o0, f48, 62f) membar #Sync 381 STORE_JUMP(o0, f48, 62f)
379 382
3801: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3831: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
381 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 384 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
386 ba,pt %xcc, 1b+4 389 ba,pt %xcc, 1b+4
387 faligndata %f14, %f16, %f48 390 faligndata %f14, %f16, %f48
3881: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 3911: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
389 STORE_SYNC(o0, f48) membar #Sync 392 STORE_SYNC(o0, f48)
390 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 393 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
391 STORE_JUMP(o0, f48, 47f) membar #Sync 394 STORE_JUMP(o0, f48, 47f)
3922: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 3952: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
393 STORE_SYNC(o0, f48) membar #Sync 396 STORE_SYNC(o0, f48)
394 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 397 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
395 STORE_JUMP(o0, f48, 55f) membar #Sync 398 STORE_JUMP(o0, f48, 55f)
3963: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3993: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
397 STORE_SYNC(o0, f48) membar #Sync 400 STORE_SYNC(o0, f48)
398 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 401 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
399 STORE_JUMP(o0, f48, 63f) membar #Sync 402 STORE_JUMP(o0, f48, 63f)
400 403
40140: FINISH_VISCHUNK(o0, f0, f2, g3) 40440: FINISH_VISCHUNK(o0, f0, f2, g3)
40241: FINISH_VISCHUNK(o0, f2, f4, g3) 40541: FINISH_VISCHUNK(o0, f2, f4, g3)
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S
index 65e328d600a..4e18989bd60 100644
--- a/arch/sparc64/lib/VISsave.S
+++ b/arch/sparc64/lib/VISsave.S
@@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
72 72
73 stda %f48, [%g3 + %g1] ASI_BLK_P 73 stda %f48, [%g3 + %g1] ASI_BLK_P
745: membar #Sync 745: membar #Sync
75 jmpl %g7 + %g0, %g0 75 ba,pt %xcc, 80f
76 nop
77
78 .align 32
7980: jmpl %g7 + %g0, %g0
76 nop 80 nop
77 81
786: ldub [%g3 + TI_FPSAVED], %o5 826: ldub [%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
87 stda %f32, [%g2 + %g1] ASI_BLK_P 91 stda %f32, [%g2 + %g1] ASI_BLK_P
88 stda %f48, [%g3 + %g1] ASI_BLK_P 92 stda %f48, [%g3 + %g1] ASI_BLK_P
89 membar #Sync 93 membar #Sync
90 jmpl %g7 + %g0, %g0 94 ba,pt %xcc, 80f
95 nop
91 96
97 .align 32
9880: jmpl %g7 + %g0, %g0
92 nop 99 nop
93 100
94 .align 32 101 .align 32
@@ -126,6 +133,10 @@ VISenterhalf:
126 stda %f0, [%g2 + %g1] ASI_BLK_P 133 stda %f0, [%g2 + %g1] ASI_BLK_P
127 stda %f16, [%g3 + %g1] ASI_BLK_P 134 stda %f16, [%g3 + %g1] ASI_BLK_P
128 membar #Sync 135 membar #Sync
136 ba,pt %xcc, 4f
137 nop
138
139 .align 32
1294: and %o5, FPRS_DU, %o5 1404: and %o5, FPRS_DU, %o5
130 jmpl %g7 + %g0, %g0 141 jmpl %g7 + %g0, %g0
131 wr %o5, FPRS_FEF, %fprs 142 wr %o5, FPRS_FEF, %fprs
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S
index e528b8d1a3e..faf87c31598 100644
--- a/arch/sparc64/lib/atomic.S
+++ b/arch/sparc64/lib/atomic.S
@@ -7,18 +7,6 @@
7#include <linux/config.h> 7#include <linux/config.h>
8#include <asm/asi.h> 8#include <asm/asi.h>
9 9
10 /* On SMP we need to use memory barriers to ensure
11 * correct memory operation ordering, nop these out
12 * for uniprocessor.
13 */
14#ifdef CONFIG_SMP
15#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad
16#define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore
17#else
18#define ATOMIC_PRE_BARRIER nop
19#define ATOMIC_POST_BARRIER nop
20#endif
21
22 .text 10 .text
23 11
24 /* Two versions of the atomic routines, one that 12 /* Two versions of the atomic routines, one that
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
52 nop 40 nop
53 .size atomic_sub, .-atomic_sub 41 .size atomic_sub, .-atomic_sub
54 42
43 /* On SMP we need to use memory barriers to ensure
44 * correct memory operation ordering, nop these out
45 * for uniprocessor.
46 */
47#ifdef CONFIG_SMP
48
49#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad;
50#define ATOMIC_POST_BARRIER \
51 ba,pt %xcc, 80b; \
52 membar #StoreLoad | #StoreStore
53
5480: retl
55 nop
56#else
57#define ATOMIC_PRE_BARRIER
58#define ATOMIC_POST_BARRIER
59#endif
60
55 .globl atomic_add_ret 61 .globl atomic_add_ret
56 .type atomic_add_ret,#function 62 .type atomic_add_ret,#function
57atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ 63atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
62 cmp %g1, %g7 68 cmp %g1, %g7
63 bne,pn %icc, 1b 69 bne,pn %icc, 1b
64 add %g7, %o0, %g7 70 add %g7, %o0, %g7
71 sra %g7, 0, %o0
65 ATOMIC_POST_BARRIER 72 ATOMIC_POST_BARRIER
66 retl 73 retl
67 sra %g7, 0, %o0 74 nop
68 .size atomic_add_ret, .-atomic_add_ret 75 .size atomic_add_ret, .-atomic_add_ret
69 76
70 .globl atomic_sub_ret 77 .globl atomic_sub_ret
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
77 cmp %g1, %g7 84 cmp %g1, %g7
78 bne,pn %icc, 1b 85 bne,pn %icc, 1b
79 sub %g7, %o0, %g7 86 sub %g7, %o0, %g7
87 sra %g7, 0, %o0
80 ATOMIC_POST_BARRIER 88 ATOMIC_POST_BARRIER
81 retl 89 retl
82 sra %g7, 0, %o0 90 nop
83 .size atomic_sub_ret, .-atomic_sub_ret 91 .size atomic_sub_ret, .-atomic_sub_ret
84 92
85 .globl atomic64_add 93 .globl atomic64_add
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
118 cmp %g1, %g7 126 cmp %g1, %g7
119 bne,pn %xcc, 1b 127 bne,pn %xcc, 1b
120 add %g7, %o0, %g7 128 add %g7, %o0, %g7
129 mov %g7, %o0
121 ATOMIC_POST_BARRIER 130 ATOMIC_POST_BARRIER
122 retl 131 retl
123 mov %g7, %o0 132 nop
124 .size atomic64_add_ret, .-atomic64_add_ret 133 .size atomic64_add_ret, .-atomic64_add_ret
125 134
126 .globl atomic64_sub_ret 135 .globl atomic64_sub_ret
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
133 cmp %g1, %g7 142 cmp %g1, %g7
134 bne,pn %xcc, 1b 143 bne,pn %xcc, 1b
135 sub %g7, %o0, %g7 144 sub %g7, %o0, %g7
145 mov %g7, %o0
136 ATOMIC_POST_BARRIER 146 ATOMIC_POST_BARRIER
137 retl 147 retl
138 mov %g7, %o0 148 nop
139 .size atomic64_sub_ret, .-atomic64_sub_ret 149 .size atomic64_sub_ret, .-atomic64_sub_ret
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S
index 886dcd2b376..31afbfe6c1e 100644
--- a/arch/sparc64/lib/bitops.S
+++ b/arch/sparc64/lib/bitops.S
@@ -7,20 +7,26 @@
7#include <linux/config.h> 7#include <linux/config.h>
8#include <asm/asi.h> 8#include <asm/asi.h>
9 9
10 .text
11
10 /* On SMP we need to use memory barriers to ensure 12 /* On SMP we need to use memory barriers to ensure
11 * correct memory operation ordering, nop these out 13 * correct memory operation ordering, nop these out
12 * for uniprocessor. 14 * for uniprocessor.
13 */ 15 */
16
14#ifdef CONFIG_SMP 17#ifdef CONFIG_SMP
15#define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad 18#define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad
16#define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore 19#define BITOP_POST_BARRIER \
20 ba,pt %xcc, 80b; \
21 membar #StoreLoad | #StoreStore
22
2380: retl
24 nop
17#else 25#else
18#define BITOP_PRE_BARRIER nop 26#define BITOP_PRE_BARRIER
19#define BITOP_POST_BARRIER nop 27#define BITOP_POST_BARRIER
20#endif 28#endif
21 29
22 .text
23
24 .globl test_and_set_bit 30 .globl test_and_set_bit
25 .type test_and_set_bit,#function 31 .type test_and_set_bit,#function
26test_and_set_bit: /* %o0=nr, %o1=addr */ 32test_and_set_bit: /* %o0=nr, %o1=addr */
@@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */
37 cmp %g7, %g1 43 cmp %g7, %g1
38 bne,pn %xcc, 1b 44 bne,pn %xcc, 1b
39 and %g7, %o2, %g2 45 and %g7, %o2, %g2
40 BITOP_POST_BARRIER
41 clr %o0 46 clr %o0
47 movrne %g2, 1, %o0
48 BITOP_POST_BARRIER
42 retl 49 retl
43 movrne %g2, 1, %o0 50 nop
44 .size test_and_set_bit, .-test_and_set_bit 51 .size test_and_set_bit, .-test_and_set_bit
45 52
46 .globl test_and_clear_bit 53 .globl test_and_clear_bit
@@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */
59 cmp %g7, %g1 66 cmp %g7, %g1
60 bne,pn %xcc, 1b 67 bne,pn %xcc, 1b
61 and %g7, %o2, %g2 68 and %g7, %o2, %g2
62 BITOP_POST_BARRIER
63 clr %o0 69 clr %o0
70 movrne %g2, 1, %o0
71 BITOP_POST_BARRIER
64 retl 72 retl
65 movrne %g2, 1, %o0 73 nop
66 .size test_and_clear_bit, .-test_and_clear_bit 74 .size test_and_clear_bit, .-test_and_clear_bit
67 75
68 .globl test_and_change_bit 76 .globl test_and_change_bit
@@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */
81 cmp %g7, %g1 89 cmp %g7, %g1
82 bne,pn %xcc, 1b 90 bne,pn %xcc, 1b
83 and %g7, %o2, %g2 91 and %g7, %o2, %g2
84 BITOP_POST_BARRIER
85 clr %o0 92 clr %o0
93 movrne %g2, 1, %o0
94 BITOP_POST_BARRIER
86 retl 95 retl
87 movrne %g2, 1, %o0 96 nop
88 .size test_and_change_bit, .-test_and_change_bit 97 .size test_and_change_bit, .-test_and_change_bit
89 98
90 .globl set_bit 99 .globl set_bit
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c
index c421e0c6532..f03344cf784 100644
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -252,8 +252,9 @@ wlock_again:
252" andn %%g1, %%g3, %%g7\n" 252" andn %%g1, %%g3, %%g7\n"
253" casx [%0], %%g1, %%g7\n" 253" casx [%0], %%g1, %%g7\n"
254" cmp %%g1, %%g7\n" 254" cmp %%g1, %%g7\n"
255" membar #StoreLoad | #StoreStore\n"
255" bne,pn %%xcc, 1b\n" 256" bne,pn %%xcc, 1b\n"
256" membar #StoreLoad | #StoreStore" 257" nop"
257 : /* no outputs */ 258 : /* no outputs */
258 : "r" (&(rw->lock)) 259 : "r" (&(rw->lock))
259 : "g3", "g1", "g7", "cc", "memory"); 260 : "g3", "g1", "g7", "cc", "memory");
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str)
351" andn %%g1, %%g3, %%g7\n" 352" andn %%g1, %%g3, %%g7\n"
352" casx [%0], %%g1, %%g7\n" 353" casx [%0], %%g1, %%g7\n"
353" cmp %%g1, %%g7\n" 354" cmp %%g1, %%g7\n"
355" membar #StoreLoad | #StoreStore\n"
354" bne,pn %%xcc, 1b\n" 356" bne,pn %%xcc, 1b\n"
355" membar #StoreLoad | #StoreStore" 357" nop"
356 : /* no outputs */ 358 : /* no outputs */
357 : "r" (&(rw->lock)) 359 : "r" (&(rw->lock))
358 : "g3", "g1", "g7", "cc", "memory"); 360 : "g3", "g1", "g7", "cc", "memory");
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S
index 7e6fdaebedb..8ee288dd0af 100644
--- a/arch/sparc64/lib/dec_and_lock.S
+++ b/arch/sparc64/lib/dec_and_lock.S
@@ -48,8 +48,9 @@ start_to_zero:
48#endif 48#endif
49to_zero: 49to_zero:
50 ldstub [%o1], %g3 50 ldstub [%o1], %g3
51 membar #StoreLoad | #StoreStore
51 brnz,pn %g3, spin_on_lock 52 brnz,pn %g3, spin_on_lock
52 membar #StoreLoad | #StoreStore 53 nop
53loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ 54loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
54 cmp %g2, %g7 55 cmp %g2, %g7
55 56
@@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
71 nop 72 nop
72spin_on_lock: 73spin_on_lock:
73 ldub [%o1], %g3 74 ldub [%o1], %g3
75 membar #LoadLoad
74 brnz,pt %g3, spin_on_lock 76 brnz,pt %g3, spin_on_lock
75 membar #LoadLoad 77 nop
76 ba,pt %xcc, to_zero 78 ba,pt %xcc, to_zero
77 nop 79 nop
78 nop 80 nop
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S
index 174ff7b9164..75f0e6b951d 100644
--- a/arch/sparc64/lib/rwsem.S
+++ b/arch/sparc64/lib/rwsem.S
@@ -17,8 +17,9 @@ __down_read:
17 bne,pn %icc, 1b 17 bne,pn %icc, 1b
18 add %g7, 1, %g7 18 add %g7, 1, %g7
19 cmp %g7, 0 19 cmp %g7, 0
20 membar #StoreLoad | #StoreStore
20 bl,pn %icc, 3f 21 bl,pn %icc, 3f
21 membar #StoreLoad | #StoreStore 22 nop
222: 232:
23 retl 24 retl
24 nop 25 nop
@@ -57,8 +58,9 @@ __down_write:
57 cmp %g3, %g7 58 cmp %g3, %g7
58 bne,pn %icc, 1b 59 bne,pn %icc, 1b
59 cmp %g7, 0 60 cmp %g7, 0
61 membar #StoreLoad | #StoreStore
60 bne,pn %icc, 3f 62 bne,pn %icc, 3f
61 membar #StoreLoad | #StoreStore 63 nop
622: retl 642: retl
63 nop 65 nop
643: 663:
@@ -97,8 +99,9 @@ __up_read:
97 cmp %g1, %g7 99 cmp %g1, %g7
98 bne,pn %icc, 1b 100 bne,pn %icc, 1b
99 cmp %g7, 0 101 cmp %g7, 0
102 membar #StoreLoad | #StoreStore
100 bl,pn %icc, 3f 103 bl,pn %icc, 3f
101 membar #StoreLoad | #StoreStore 104 nop
1022: retl 1052: retl
103 nop 106 nop
1043: sethi %hi(RWSEM_ACTIVE_MASK), %g1 1073: sethi %hi(RWSEM_ACTIVE_MASK), %g1
@@ -126,8 +129,9 @@ __up_write:
126 bne,pn %icc, 1b 129 bne,pn %icc, 1b
127 sub %g7, %g1, %g7 130 sub %g7, %g1, %g7
128 cmp %g7, 0 131 cmp %g7, 0
132 membar #StoreLoad | #StoreStore
129 bl,pn %icc, 3f 133 bl,pn %icc, 3f
130 membar #StoreLoad | #StoreStore 134 nop
1312: 1352:
132 retl 136 retl
133 nop 137 nop
@@ -151,8 +155,9 @@ __downgrade_write:
151 bne,pn %icc, 1b 155 bne,pn %icc, 1b
152 sub %g7, %g1, %g7 156 sub %g7, %g1, %g7
153 cmp %g7, 0 157 cmp %g7, 0
158 membar #StoreLoad | #StoreStore
154 bl,pn %icc, 3f 159 bl,pn %icc, 3f
155 membar #StoreLoad | #StoreStore 160 nop
1562: 1612:
157 retl 162 retl
158 nop 163 nop
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 9c5222075da..8fc413cb6ac 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu)
136 "or %%g1, %0, %%g1\n\t" 136 "or %%g1, %0, %%g1\n\t"
137 "casx [%2], %%g7, %%g1\n\t" 137 "casx [%2], %%g7, %%g1\n\t"
138 "cmp %%g7, %%g1\n\t" 138 "cmp %%g7, %%g1\n\t"
139 "membar #StoreLoad | #StoreStore\n\t"
139 "bne,pn %%xcc, 1b\n\t" 140 "bne,pn %%xcc, 1b\n\t"
140 " membar #StoreLoad | #StoreStore" 141 " nop"
141 : /* no outputs */ 142 : /* no outputs */
142 : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) 143 : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
143 : "g1", "g7"); 144 : "g1", "g7");
@@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
157 " andn %%g7, %1, %%g1\n\t" 158 " andn %%g7, %1, %%g1\n\t"
158 "casx [%2], %%g7, %%g1\n\t" 159 "casx [%2], %%g7, %%g1\n\t"
159 "cmp %%g7, %%g1\n\t" 160 "cmp %%g7, %%g1\n\t"
161 "membar #StoreLoad | #StoreStore\n\t"
160 "bne,pn %%xcc, 1b\n\t" 162 "bne,pn %%xcc, 1b\n\t"
161 " membar #StoreLoad | #StoreStore\n" 163 " nop\n"
162 "2:" 164 "2:"
163 : /* no outputs */ 165 : /* no outputs */
164 : "r" (cpu), "r" (mask), "r" (&page->flags), 166 : "r" (cpu), "r" (mask), "r" (&page->flags),
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 7a093432101..7a2431d3abc 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */
266 andn %o3, 1, %o3 266 andn %o3, 1, %o3
267 stxa %g0, [%o3] ASI_IMMU_DEMAP 267 stxa %g0, [%o3] ASI_IMMU_DEMAP
2682: stxa %g0, [%o3] ASI_DMMU_DEMAP 2682: stxa %g0, [%o3] ASI_DMMU_DEMAP
269 membar #Sync
269 brnz,pt %o1, 1b 270 brnz,pt %o1, 1b
270 membar #Sync 271 nop
271 stxa %g2, [%o4] ASI_DMMU 272 stxa %g2, [%o4] ASI_DMMU
272 flush %g6 273 flush %g6
273 wrpr %g0, 0, %tl 274 wrpr %g0, 0, %tl
diff --git a/include/asm-sparc64/auxio.h b/include/asm-sparc64/auxio.h
index 5eb01dd4715..81a590a50a1 100644
--- a/include/asm-sparc64/auxio.h
+++ b/include/asm-sparc64/auxio.h
@@ -75,6 +75,8 @@
75 75
76#ifndef __ASSEMBLY__ 76#ifndef __ASSEMBLY__
77 77
78extern void __iomem *auxio_register;
79
78#define AUXIO_LTE_ON 1 80#define AUXIO_LTE_ON 1
79#define AUXIO_LTE_OFF 0 81#define AUXIO_LTE_OFF 0
80 82
diff --git a/include/asm-sparc64/floppy.h b/include/asm-sparc64/floppy.h
index e071b4b4edf..49d49a28594 100644
--- a/include/asm-sparc64/floppy.h
+++ b/include/asm-sparc64/floppy.h
@@ -159,7 +159,7 @@ static void sun_82077_fd_outb(unsigned char value, unsigned long port)
159 * underruns. If non-zero, doing_pdma encodes the direction of 159 * underruns. If non-zero, doing_pdma encodes the direction of
160 * the transfer for debugging. 1=read 2=write 160 * the transfer for debugging. 1=read 2=write
161 */ 161 */
162char *pdma_vaddr; 162unsigned char *pdma_vaddr;
163unsigned long pdma_size; 163unsigned long pdma_size;
164volatile int doing_pdma = 0; 164volatile int doing_pdma = 0;
165 165
@@ -209,8 +209,7 @@ static void sun_fd_enable_dma(void)
209 pdma_areasize = pdma_size; 209 pdma_areasize = pdma_size;
210} 210}
211 211
212/* Our low-level entry point in arch/sparc/kernel/entry.S */ 212extern irqreturn_t sparc_floppy_irq(int, void *, struct pt_regs *);
213extern irqreturn_t floppy_hardint(int irq, void *unused, struct pt_regs *regs);
214 213
215static int sun_fd_request_irq(void) 214static int sun_fd_request_irq(void)
216{ 215{
@@ -220,8 +219,8 @@ static int sun_fd_request_irq(void)
220 if(!once) { 219 if(!once) {
221 once = 1; 220 once = 1;
222 221
223 error = request_fast_irq(FLOPPY_IRQ, floppy_hardint, 222 error = request_irq(FLOPPY_IRQ, sparc_floppy_irq,
224 SA_INTERRUPT, "floppy", NULL); 223 SA_INTERRUPT, "floppy", NULL);
225 224
226 return ((error == 0) ? 0 : -1); 225 return ((error == 0) ? 0 : -1);
227 } 226 }
@@ -615,7 +614,7 @@ static unsigned long __init sun_floppy_init(void)
615 struct linux_ebus *ebus; 614 struct linux_ebus *ebus;
616 struct linux_ebus_device *edev = NULL; 615 struct linux_ebus_device *edev = NULL;
617 unsigned long config = 0; 616 unsigned long config = 0;
618 unsigned long auxio_reg; 617 void __iomem *auxio_reg;
619 618
620 for_each_ebus(ebus) { 619 for_each_ebus(ebus) {
621 for_each_ebusdev(edev, ebus) { 620 for_each_ebusdev(edev, ebus) {
@@ -642,7 +641,7 @@ static unsigned long __init sun_floppy_init(void)
642 /* Make sure the high density bit is set, some systems 641 /* Make sure the high density bit is set, some systems
643 * (most notably Ultra5/Ultra10) come up with it clear. 642 * (most notably Ultra5/Ultra10) come up with it clear.
644 */ 643 */
645 auxio_reg = edev->resource[2].start; 644 auxio_reg = (void __iomem *) edev->resource[2].start;
646 writel(readl(auxio_reg)|0x2, auxio_reg); 645 writel(readl(auxio_reg)|0x2, auxio_reg);
647 646
648 sun_pci_ebus_dev = ebus->self; 647 sun_pci_ebus_dev = ebus->self;
@@ -650,7 +649,8 @@ static unsigned long __init sun_floppy_init(void)
650 spin_lock_init(&sun_pci_fd_ebus_dma.lock); 649 spin_lock_init(&sun_pci_fd_ebus_dma.lock);
651 650
652 /* XXX ioremap */ 651 /* XXX ioremap */
653 sun_pci_fd_ebus_dma.regs = edev->resource[1].start; 652 sun_pci_fd_ebus_dma.regs = (void __iomem *)
653 edev->resource[1].start;
654 if (!sun_pci_fd_ebus_dma.regs) 654 if (!sun_pci_fd_ebus_dma.regs)
655 return 0; 655 return 0;
656 656
diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h
index 3aef0ca6775..018e2e46082 100644
--- a/include/asm-sparc64/irq.h
+++ b/include/asm-sparc64/irq.h
@@ -19,7 +19,7 @@
19/* You should not mess with this directly. That's the job of irq.c. 19/* You should not mess with this directly. That's the job of irq.c.
20 * 20 *
21 * If you make changes here, please update hand coded assembler of 21 * If you make changes here, please update hand coded assembler of
22 * SBUS/floppy interrupt handler in entry.S -DaveM 22 * the vectored interrupt trap handler in entry.S -DaveM
23 * 23 *
24 * This is currently one DCACHE line, two buckets per L2 cache 24 * This is currently one DCACHE line, two buckets per L2 cache
25 * line. Keep this in mind please. 25 * line. Keep this in mind please.
@@ -122,11 +122,6 @@ extern void enable_irq(unsigned int);
122extern unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long imap); 122extern unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long imap);
123extern unsigned int sbus_build_irq(void *sbus, unsigned int ino); 123extern unsigned int sbus_build_irq(void *sbus, unsigned int ino);
124 124
125extern int request_fast_irq(unsigned int irq,
126 irqreturn_t (*handler)(int, void *, struct pt_regs *),
127 unsigned long flags, __const__ char *devname,
128 void *dev_id);
129
130static __inline__ void set_softint(unsigned long bits) 125static __inline__ void set_softint(unsigned long bits)
131{ 126{
132 __asm__ __volatile__("wr %0, 0x0, %%set_softint" 127 __asm__ __volatile__("wr %0, 0x0, %%set_softint"
diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h
index bf2ae90ed3d..a1cc94f9598 100644
--- a/include/asm-sparc64/rwsem.h
+++ b/include/asm-sparc64/rwsem.h
@@ -55,8 +55,9 @@ static __inline__ int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
55 "add %%g1, %1, %%g7\n\t" 55 "add %%g1, %1, %%g7\n\t"
56 "cas [%2], %%g1, %%g7\n\t" 56 "cas [%2], %%g1, %%g7\n\t"
57 "cmp %%g1, %%g7\n\t" 57 "cmp %%g1, %%g7\n\t"
58 "membar #StoreLoad | #StoreStore\n\t"
58 "bne,pn %%icc, 1b\n\t" 59 "bne,pn %%icc, 1b\n\t"
59 " membar #StoreLoad | #StoreStore\n\t" 60 " nop\n\t"
60 "mov %%g7, %0\n\t" 61 "mov %%g7, %0\n\t"
61 : "=&r" (tmp) 62 : "=&r" (tmp)
62 : "0" (tmp), "r" (sem) 63 : "0" (tmp), "r" (sem)
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h
index db7581bdb53..9cb93a5c2b4 100644
--- a/include/asm-sparc64/spinlock.h
+++ b/include/asm-sparc64/spinlock.h
@@ -52,12 +52,14 @@ static inline void _raw_spin_lock(spinlock_t *lock)
52 52
53 __asm__ __volatile__( 53 __asm__ __volatile__(
54"1: ldstub [%1], %0\n" 54"1: ldstub [%1], %0\n"
55" membar #StoreLoad | #StoreStore\n"
55" brnz,pn %0, 2f\n" 56" brnz,pn %0, 2f\n"
56" membar #StoreLoad | #StoreStore\n" 57" nop\n"
57" .subsection 2\n" 58" .subsection 2\n"
58"2: ldub [%1], %0\n" 59"2: ldub [%1], %0\n"
60" membar #LoadLoad\n"
59" brnz,pt %0, 2b\n" 61" brnz,pt %0, 2b\n"
60" membar #LoadLoad\n" 62" nop\n"
61" ba,a,pt %%xcc, 1b\n" 63" ba,a,pt %%xcc, 1b\n"
62" .previous" 64" .previous"
63 : "=&r" (tmp) 65 : "=&r" (tmp)
@@ -95,16 +97,18 @@ static inline void _raw_spin_lock_flags(spinlock_t *lock, unsigned long flags)
95 97
96 __asm__ __volatile__( 98 __asm__ __volatile__(
97"1: ldstub [%2], %0\n" 99"1: ldstub [%2], %0\n"
98" brnz,pn %0, 2f\n"
99" membar #StoreLoad | #StoreStore\n" 100" membar #StoreLoad | #StoreStore\n"
101" brnz,pn %0, 2f\n"
102" nop\n"
100" .subsection 2\n" 103" .subsection 2\n"
101"2: rdpr %%pil, %1\n" 104"2: rdpr %%pil, %1\n"
102" wrpr %3, %%pil\n" 105" wrpr %3, %%pil\n"
103"3: ldub [%2], %0\n" 106"3: ldub [%2], %0\n"
104" brnz,pt %0, 3b\n"
105" membar #LoadLoad\n" 107" membar #LoadLoad\n"
108" brnz,pt %0, 3b\n"
109" nop\n"
106" ba,pt %%xcc, 1b\n" 110" ba,pt %%xcc, 1b\n"
107" wrpr %1, %%pil\n" 111" wrpr %1, %%pil\n"
108" .previous" 112" .previous"
109 : "=&r" (tmp1), "=&r" (tmp2) 113 : "=&r" (tmp1), "=&r" (tmp2)
110 : "r"(lock), "r"(flags) 114 : "r"(lock), "r"(flags)
@@ -162,12 +166,14 @@ static void inline __read_lock(rwlock_t *lock)
162"4: add %0, 1, %1\n" 166"4: add %0, 1, %1\n"
163" cas [%2], %0, %1\n" 167" cas [%2], %0, %1\n"
164" cmp %0, %1\n" 168" cmp %0, %1\n"
169" membar #StoreLoad | #StoreStore\n"
165" bne,pn %%icc, 1b\n" 170" bne,pn %%icc, 1b\n"
166" membar #StoreLoad | #StoreStore\n" 171" nop\n"
167" .subsection 2\n" 172" .subsection 2\n"
168"2: ldsw [%2], %0\n" 173"2: ldsw [%2], %0\n"
174" membar #LoadLoad\n"
169" brlz,pt %0, 2b\n" 175" brlz,pt %0, 2b\n"
170" membar #LoadLoad\n" 176" nop\n"
171" ba,a,pt %%xcc, 4b\n" 177" ba,a,pt %%xcc, 4b\n"
172" .previous" 178" .previous"
173 : "=&r" (tmp1), "=&r" (tmp2) 179 : "=&r" (tmp1), "=&r" (tmp2)
@@ -204,12 +210,14 @@ static void inline __write_lock(rwlock_t *lock)
204"4: or %0, %3, %1\n" 210"4: or %0, %3, %1\n"
205" cas [%2], %0, %1\n" 211" cas [%2], %0, %1\n"
206" cmp %0, %1\n" 212" cmp %0, %1\n"
213" membar #StoreLoad | #StoreStore\n"
207" bne,pn %%icc, 1b\n" 214" bne,pn %%icc, 1b\n"
208" membar #StoreLoad | #StoreStore\n" 215" nop\n"
209" .subsection 2\n" 216" .subsection 2\n"
210"2: lduw [%2], %0\n" 217"2: lduw [%2], %0\n"
218" membar #LoadLoad\n"
211" brnz,pt %0, 2b\n" 219" brnz,pt %0, 2b\n"
212" membar #LoadLoad\n" 220" nop\n"
213" ba,a,pt %%xcc, 4b\n" 221" ba,a,pt %%xcc, 4b\n"
214" .previous" 222" .previous"
215 : "=&r" (tmp1), "=&r" (tmp2) 223 : "=&r" (tmp1), "=&r" (tmp2)
@@ -240,8 +248,9 @@ static int inline __write_trylock(rwlock_t *lock)
240" or %0, %4, %1\n" 248" or %0, %4, %1\n"
241" cas [%3], %0, %1\n" 249" cas [%3], %0, %1\n"
242" cmp %0, %1\n" 250" cmp %0, %1\n"
251" membar #StoreLoad | #StoreStore\n"
243" bne,pn %%icc, 1b\n" 252" bne,pn %%icc, 1b\n"
244" membar #StoreLoad | #StoreStore\n" 253" nop\n"
245" mov 1, %2\n" 254" mov 1, %2\n"
246"2:" 255"2:"
247 : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result) 256 : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result)
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
index 9d7613eea81..1aa932773af 100644
--- a/include/asm-sparc64/spitfire.h
+++ b/include/asm-sparc64/spitfire.h
@@ -111,7 +111,6 @@ static __inline__ void spitfire_put_dcache_tag(unsigned long addr, unsigned long
111 "membar #Sync" 111 "membar #Sync"
112 : /* No outputs */ 112 : /* No outputs */
113 : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); 113 : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG));
114 __asm__ __volatile__ ("membar #Sync" : : : "memory");
115} 114}
116 115
117/* The instruction cache lines are flushed with this, but note that 116/* The instruction cache lines are flushed with this, but note that