diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/sparc64/lib |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/sparc64/lib')
37 files changed, 4850 insertions, 0 deletions
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile new file mode 100644 index 000000000000..40dbeec7e5d6 --- /dev/null +++ b/arch/sparc64/lib/Makefile | |||
@@ -0,0 +1,20 @@ | |||
1 | # $Id: Makefile,v 1.25 2000/12/14 22:57:25 davem Exp $ | ||
2 | # Makefile for Sparc64 library files.. | ||
3 | # | ||
4 | |||
5 | EXTRA_AFLAGS := -ansi | ||
6 | EXTRA_CFLAGS := -Werror | ||
7 | |||
8 | lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ | ||
9 | memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \ | ||
10 | bzero.o csum_copy.o csum_copy_from_user.o csum_copy_to_user.o \ | ||
11 | VISsave.o atomic.o bitops.o \ | ||
12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ | ||
13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ | ||
14 | copy_in_user.o user_fixup.o memmove.o \ | ||
15 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o | ||
16 | |||
17 | lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o | ||
18 | lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o | ||
19 | |||
20 | obj-y += iomap.o | ||
diff --git a/arch/sparc64/lib/PeeCeeI.c b/arch/sparc64/lib/PeeCeeI.c new file mode 100644 index 000000000000..3008d536e8c2 --- /dev/null +++ b/arch/sparc64/lib/PeeCeeI.c | |||
@@ -0,0 +1,237 @@ | |||
1 | /* $Id: PeeCeeI.c,v 1.4 1999/09/06 01:17:35 davem Exp $ | ||
2 | * PeeCeeI.c: The emerging standard... | ||
3 | * | ||
4 | * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) | ||
5 | */ | ||
6 | |||
7 | #include <asm/io.h> | ||
8 | #include <asm/byteorder.h> | ||
9 | |||
10 | void outsb(void __iomem *addr, const void *src, unsigned long count) | ||
11 | { | ||
12 | const u8 *p = src; | ||
13 | |||
14 | while(count--) | ||
15 | outb(*p++, addr); | ||
16 | } | ||
17 | |||
18 | void outsw(void __iomem *addr, const void *src, unsigned long count) | ||
19 | { | ||
20 | if(count) { | ||
21 | u16 *ps = (u16 *)src; | ||
22 | u32 *pi; | ||
23 | |||
24 | if(((u64)src) & 0x2) { | ||
25 | u16 val = le16_to_cpup(ps); | ||
26 | outw(val, addr); | ||
27 | ps++; | ||
28 | count--; | ||
29 | } | ||
30 | pi = (u32 *)ps; | ||
31 | while(count >= 2) { | ||
32 | u32 w = le32_to_cpup(pi); | ||
33 | |||
34 | pi++; | ||
35 | outw(w >> 0, addr); | ||
36 | outw(w >> 16, addr); | ||
37 | count -= 2; | ||
38 | } | ||
39 | ps = (u16 *)pi; | ||
40 | if(count) { | ||
41 | u16 val = le16_to_cpup(ps); | ||
42 | outw(val, addr); | ||
43 | } | ||
44 | } | ||
45 | } | ||
46 | |||
47 | void outsl(void __iomem *addr, const void *src, unsigned long count) | ||
48 | { | ||
49 | if(count) { | ||
50 | if((((u64)src) & 0x3) == 0) { | ||
51 | u32 *p = (u32 *)src; | ||
52 | while(count--) { | ||
53 | u32 val = cpu_to_le32p(p); | ||
54 | outl(val, addr); | ||
55 | p++; | ||
56 | } | ||
57 | } else { | ||
58 | u8 *pb; | ||
59 | u16 *ps = (u16 *)src; | ||
60 | u32 l = 0, l2; | ||
61 | u32 *pi; | ||
62 | |||
63 | switch(((u64)src) & 0x3) { | ||
64 | case 0x2: | ||
65 | count -= 1; | ||
66 | l = cpu_to_le16p(ps) << 16; | ||
67 | ps++; | ||
68 | pi = (u32 *)ps; | ||
69 | while(count--) { | ||
70 | l2 = cpu_to_le32p(pi); | ||
71 | pi++; | ||
72 | outl(((l >> 16) | (l2 << 16)), addr); | ||
73 | l = l2; | ||
74 | } | ||
75 | ps = (u16 *)pi; | ||
76 | l2 = cpu_to_le16p(ps); | ||
77 | outl(((l >> 16) | (l2 << 16)), addr); | ||
78 | break; | ||
79 | |||
80 | case 0x1: | ||
81 | count -= 1; | ||
82 | pb = (u8 *)src; | ||
83 | l = (*pb++ << 8); | ||
84 | ps = (u16 *)pb; | ||
85 | l2 = cpu_to_le16p(ps); | ||
86 | ps++; | ||
87 | l |= (l2 << 16); | ||
88 | pi = (u32 *)ps; | ||
89 | while(count--) { | ||
90 | l2 = cpu_to_le32p(pi); | ||
91 | pi++; | ||
92 | outl(((l >> 8) | (l2 << 24)), addr); | ||
93 | l = l2; | ||
94 | } | ||
95 | pb = (u8 *)pi; | ||
96 | outl(((l >> 8) | (*pb << 24)), addr); | ||
97 | break; | ||
98 | |||
99 | case 0x3: | ||
100 | count -= 1; | ||
101 | pb = (u8 *)src; | ||
102 | l = (*pb++ << 24); | ||
103 | pi = (u32 *)pb; | ||
104 | while(count--) { | ||
105 | l2 = cpu_to_le32p(pi); | ||
106 | pi++; | ||
107 | outl(((l >> 24) | (l2 << 8)), addr); | ||
108 | l = l2; | ||
109 | } | ||
110 | ps = (u16 *)pi; | ||
111 | l2 = cpu_to_le16p(ps); | ||
112 | ps++; | ||
113 | pb = (u8 *)ps; | ||
114 | l2 |= (*pb << 16); | ||
115 | outl(((l >> 24) | (l2 << 8)), addr); | ||
116 | break; | ||
117 | } | ||
118 | } | ||
119 | } | ||
120 | } | ||
121 | |||
122 | void insb(void __iomem *addr, void *dst, unsigned long count) | ||
123 | { | ||
124 | if(count) { | ||
125 | u32 *pi; | ||
126 | u8 *pb = dst; | ||
127 | |||
128 | while((((unsigned long)pb) & 0x3) && count--) | ||
129 | *pb++ = inb(addr); | ||
130 | pi = (u32 *)pb; | ||
131 | while(count >= 4) { | ||
132 | u32 w; | ||
133 | |||
134 | w = (inb(addr) << 24); | ||
135 | w |= (inb(addr) << 16); | ||
136 | w |= (inb(addr) << 8); | ||
137 | w |= (inb(addr) << 0); | ||
138 | *pi++ = w; | ||
139 | count -= 4; | ||
140 | } | ||
141 | pb = (u8 *)pi; | ||
142 | while(count--) | ||
143 | *pb++ = inb(addr); | ||
144 | } | ||
145 | } | ||
146 | |||
147 | void insw(void __iomem *addr, void *dst, unsigned long count) | ||
148 | { | ||
149 | if(count) { | ||
150 | u16 *ps = dst; | ||
151 | u32 *pi; | ||
152 | |||
153 | if(((unsigned long)ps) & 0x2) { | ||
154 | *ps++ = le16_to_cpu(inw(addr)); | ||
155 | count--; | ||
156 | } | ||
157 | pi = (u32 *)ps; | ||
158 | while(count >= 2) { | ||
159 | u32 w; | ||
160 | |||
161 | w = (le16_to_cpu(inw(addr)) << 16); | ||
162 | w |= (le16_to_cpu(inw(addr)) << 0); | ||
163 | *pi++ = w; | ||
164 | count -= 2; | ||
165 | } | ||
166 | ps = (u16 *)pi; | ||
167 | if(count) | ||
168 | *ps = le16_to_cpu(inw(addr)); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | void insl(void __iomem *addr, void *dst, unsigned long count) | ||
173 | { | ||
174 | if(count) { | ||
175 | if((((unsigned long)dst) & 0x3) == 0) { | ||
176 | u32 *pi = dst; | ||
177 | while(count--) | ||
178 | *pi++ = le32_to_cpu(inl(addr)); | ||
179 | } else { | ||
180 | u32 l = 0, l2, *pi; | ||
181 | u16 *ps; | ||
182 | u8 *pb; | ||
183 | |||
184 | switch(((unsigned long)dst) & 3) { | ||
185 | case 0x2: | ||
186 | ps = dst; | ||
187 | count -= 1; | ||
188 | l = le32_to_cpu(inl(addr)); | ||
189 | *ps++ = l; | ||
190 | pi = (u32 *)ps; | ||
191 | while(count--) { | ||
192 | l2 = le32_to_cpu(inl(addr)); | ||
193 | *pi++ = (l << 16) | (l2 >> 16); | ||
194 | l = l2; | ||
195 | } | ||
196 | ps = (u16 *)pi; | ||
197 | *ps = l; | ||
198 | break; | ||
199 | |||
200 | case 0x1: | ||
201 | pb = dst; | ||
202 | count -= 1; | ||
203 | l = le32_to_cpu(inl(addr)); | ||
204 | *pb++ = l >> 24; | ||
205 | ps = (u16 *)pb; | ||
206 | *ps++ = ((l >> 8) & 0xffff); | ||
207 | pi = (u32 *)ps; | ||
208 | while(count--) { | ||
209 | l2 = le32_to_cpu(inl(addr)); | ||
210 | *pi++ = (l << 24) | (l2 >> 8); | ||
211 | l = l2; | ||
212 | } | ||
213 | pb = (u8 *)pi; | ||
214 | *pb = l; | ||
215 | break; | ||
216 | |||
217 | case 0x3: | ||
218 | pb = (u8 *)dst; | ||
219 | count -= 1; | ||
220 | l = le32_to_cpu(inl(addr)); | ||
221 | *pb++ = l >> 24; | ||
222 | pi = (u32 *)pb; | ||
223 | while(count--) { | ||
224 | l2 = le32_to_cpu(inl(addr)); | ||
225 | *pi++ = (l << 8) | (l2 >> 24); | ||
226 | l = l2; | ||
227 | } | ||
228 | ps = (u16 *)pi; | ||
229 | *ps++ = ((l >> 8) & 0xffff); | ||
230 | pb = (u8 *)ps; | ||
231 | *pb = l; | ||
232 | break; | ||
233 | } | ||
234 | } | ||
235 | } | ||
236 | } | ||
237 | |||
diff --git a/arch/sparc64/lib/U1copy_from_user.S b/arch/sparc64/lib/U1copy_from_user.S new file mode 100644 index 000000000000..93146a81e2d3 --- /dev/null +++ b/arch/sparc64/lib/U1copy_from_user.S | |||
@@ -0,0 +1,33 @@ | |||
1 | /* U1copy_from_user.S: UltraSparc-I/II/IIi/IIe optimized copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: retl; \ | ||
11 | mov 1, %o0; \ | ||
12 | .section __ex_table; \ | ||
13 | .align 4; \ | ||
14 | .word 98b, 99b; \ | ||
15 | .text; \ | ||
16 | .align 4; | ||
17 | |||
18 | #define FUNC_NAME ___copy_from_user | ||
19 | #define LOAD(type,addr,dest) type##a [addr] %asi, dest | ||
20 | #define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS, dest | ||
21 | #define EX_RETVAL(x) 0 | ||
22 | |||
23 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
24 | * Reading %asi to check for KERNEL_DS is comparatively | ||
25 | * cheap. | ||
26 | */ | ||
27 | #define PREAMBLE \ | ||
28 | rd %asi, %g1; \ | ||
29 | cmp %g1, ASI_AIUS; \ | ||
30 | bne,pn %icc, memcpy_user_stub; \ | ||
31 | nop; \ | ||
32 | |||
33 | #include "U1memcpy.S" | ||
diff --git a/arch/sparc64/lib/U1copy_to_user.S b/arch/sparc64/lib/U1copy_to_user.S new file mode 100644 index 000000000000..1fccc521e2bd --- /dev/null +++ b/arch/sparc64/lib/U1copy_to_user.S | |||
@@ -0,0 +1,33 @@ | |||
1 | /* U1copy_to_user.S: UltraSparc-I/II/IIi/IIe optimized copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: retl; \ | ||
11 | mov 1, %o0; \ | ||
12 | .section __ex_table; \ | ||
13 | .align 4; \ | ||
14 | .word 98b, 99b; \ | ||
15 | .text; \ | ||
16 | .align 4; | ||
17 | |||
18 | #define FUNC_NAME ___copy_to_user | ||
19 | #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS | ||
20 | #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS | ||
21 | #define EX_RETVAL(x) 0 | ||
22 | |||
23 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
24 | * Reading %asi to check for KERNEL_DS is comparatively | ||
25 | * cheap. | ||
26 | */ | ||
27 | #define PREAMBLE \ | ||
28 | rd %asi, %g1; \ | ||
29 | cmp %g1, ASI_AIUS; \ | ||
30 | bne,pn %icc, memcpy_user_stub; \ | ||
31 | nop; \ | ||
32 | |||
33 | #include "U1memcpy.S" | ||
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S new file mode 100644 index 000000000000..da9b520c7189 --- /dev/null +++ b/arch/sparc64/lib/U1memcpy.S | |||
@@ -0,0 +1,560 @@ | |||
1 | /* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy. | ||
2 | * | ||
3 | * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) | ||
4 | * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) | ||
5 | */ | ||
6 | |||
7 | #ifdef __KERNEL__ | ||
8 | #include <asm/visasm.h> | ||
9 | #include <asm/asi.h> | ||
10 | #define GLOBAL_SPARE g7 | ||
11 | #else | ||
12 | #define GLOBAL_SPARE g5 | ||
13 | #define ASI_BLK_P 0xf0 | ||
14 | #define FPRS_FEF 0x04 | ||
15 | #ifdef MEMCPY_DEBUG | ||
16 | #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ | ||
17 | clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; | ||
18 | #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
19 | #else | ||
20 | #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs | ||
21 | #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
22 | #endif | ||
23 | #endif | ||
24 | |||
25 | #ifndef EX_LD | ||
26 | #define EX_LD(x) x | ||
27 | #endif | ||
28 | |||
29 | #ifndef EX_ST | ||
30 | #define EX_ST(x) x | ||
31 | #endif | ||
32 | |||
33 | #ifndef EX_RETVAL | ||
34 | #define EX_RETVAL(x) x | ||
35 | #endif | ||
36 | |||
37 | #ifndef LOAD | ||
38 | #define LOAD(type,addr,dest) type [addr], dest | ||
39 | #endif | ||
40 | |||
41 | #ifndef LOAD_BLK | ||
42 | #define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest | ||
43 | #endif | ||
44 | |||
45 | #ifndef STORE | ||
46 | #define STORE(type,src,addr) type src, [addr] | ||
47 | #endif | ||
48 | |||
49 | #ifndef STORE_BLK | ||
50 | #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P | ||
51 | #endif | ||
52 | |||
53 | #ifndef FUNC_NAME | ||
54 | #define FUNC_NAME memcpy | ||
55 | #endif | ||
56 | |||
57 | #ifndef PREAMBLE | ||
58 | #define PREAMBLE | ||
59 | #endif | ||
60 | |||
61 | #ifndef XCC | ||
62 | #define XCC xcc | ||
63 | #endif | ||
64 | |||
65 | #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ | ||
66 | faligndata %f1, %f2, %f48; \ | ||
67 | faligndata %f2, %f3, %f50; \ | ||
68 | faligndata %f3, %f4, %f52; \ | ||
69 | faligndata %f4, %f5, %f54; \ | ||
70 | faligndata %f5, %f6, %f56; \ | ||
71 | faligndata %f6, %f7, %f58; \ | ||
72 | faligndata %f7, %f8, %f60; \ | ||
73 | faligndata %f8, %f9, %f62; | ||
74 | |||
75 | #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ | ||
76 | EX_LD(LOAD_BLK(%src, %fdest)); \ | ||
77 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | ||
78 | add %src, 0x40, %src; \ | ||
79 | subcc %len, 0x40, %len; \ | ||
80 | be,pn %xcc, jmptgt; \ | ||
81 | add %dest, 0x40, %dest; \ | ||
82 | |||
83 | #define LOOP_CHUNK1(src, dest, len, branch_dest) \ | ||
84 | MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) | ||
85 | #define LOOP_CHUNK2(src, dest, len, branch_dest) \ | ||
86 | MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) | ||
87 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ | ||
88 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) | ||
89 | |||
90 | #define STORE_SYNC(dest, fsrc) \ | ||
91 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | ||
92 | add %dest, 0x40, %dest; | ||
93 | |||
94 | #define STORE_JUMP(dest, fsrc, target) \ | ||
95 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | ||
96 | add %dest, 0x40, %dest; \ | ||
97 | ba,pt %xcc, target; | ||
98 | |||
99 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ | ||
100 | subcc %left, 8, %left;\ | ||
101 | bl,pn %xcc, 95f; \ | ||
102 | faligndata %f0, %f1, %f48; \ | ||
103 | EX_ST(STORE(std, %f48, %dest)); \ | ||
104 | add %dest, 8, %dest; | ||
105 | |||
106 | #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ | ||
107 | subcc %left, 8, %left; \ | ||
108 | bl,pn %xcc, 95f; \ | ||
109 | fsrc1 %f0, %f1; | ||
110 | |||
111 | #define UNEVEN_VISCHUNK(dest, f0, f1, left) \ | ||
112 | UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ | ||
113 | ba,a,pt %xcc, 93f; | ||
114 | |||
115 | .register %g2,#scratch | ||
116 | .register %g3,#scratch | ||
117 | |||
118 | .text | ||
119 | .align 64 | ||
120 | |||
121 | .globl FUNC_NAME | ||
122 | .type FUNC_NAME,#function | ||
123 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
124 | srlx %o2, 31, %g2 | ||
125 | cmp %g2, 0 | ||
126 | tne %xcc, 5 | ||
127 | PREAMBLE | ||
128 | mov %o0, %o4 | ||
129 | cmp %o2, 0 | ||
130 | be,pn %XCC, 85f | ||
131 | or %o0, %o1, %o3 | ||
132 | cmp %o2, 16 | ||
133 | blu,a,pn %XCC, 80f | ||
134 | or %o3, %o2, %o3 | ||
135 | |||
136 | cmp %o2, (5 * 64) | ||
137 | blu,pt %XCC, 70f | ||
138 | andcc %o3, 0x7, %g0 | ||
139 | |||
140 | /* Clobbers o5/g1/g2/g3/g7/icc/xcc. */ | ||
141 | VISEntry | ||
142 | |||
143 | /* Is 'dst' already aligned on an 64-byte boundary? */ | ||
144 | andcc %o0, 0x3f, %g2 | ||
145 | be,pt %XCC, 2f | ||
146 | |||
147 | /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number | ||
148 | * of bytes to copy to make 'dst' 64-byte aligned. We pre- | ||
149 | * subtract this from 'len'. | ||
150 | */ | ||
151 | sub %o0, %o1, %GLOBAL_SPARE | ||
152 | sub %g2, 0x40, %g2 | ||
153 | sub %g0, %g2, %g2 | ||
154 | sub %o2, %g2, %o2 | ||
155 | andcc %g2, 0x7, %g1 | ||
156 | be,pt %icc, 2f | ||
157 | and %g2, 0x38, %g2 | ||
158 | |||
159 | 1: subcc %g1, 0x1, %g1 | ||
160 | EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) | ||
161 | EX_ST(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) | ||
162 | bgu,pt %XCC, 1b | ||
163 | add %o1, 0x1, %o1 | ||
164 | |||
165 | add %o1, %GLOBAL_SPARE, %o0 | ||
166 | |||
167 | 2: cmp %g2, 0x0 | ||
168 | and %o1, 0x7, %g1 | ||
169 | be,pt %icc, 3f | ||
170 | alignaddr %o1, %g0, %o1 | ||
171 | |||
172 | EX_LD(LOAD(ldd, %o1, %f4)) | ||
173 | 1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) | ||
174 | add %o1, 0x8, %o1 | ||
175 | subcc %g2, 0x8, %g2 | ||
176 | faligndata %f4, %f6, %f0 | ||
177 | EX_ST(STORE(std, %f0, %o0)) | ||
178 | be,pn %icc, 3f | ||
179 | add %o0, 0x8, %o0 | ||
180 | |||
181 | EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) | ||
182 | add %o1, 0x8, %o1 | ||
183 | subcc %g2, 0x8, %g2 | ||
184 | faligndata %f6, %f4, %f0 | ||
185 | EX_ST(STORE(std, %f0, %o0)) | ||
186 | bne,pt %icc, 1b | ||
187 | add %o0, 0x8, %o0 | ||
188 | |||
189 | /* Destination is 64-byte aligned. */ | ||
190 | 3: | ||
191 | membar #LoadStore | #StoreStore | #StoreLoad | ||
192 | |||
193 | subcc %o2, 0x40, %GLOBAL_SPARE | ||
194 | add %o1, %g1, %g1 | ||
195 | andncc %GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE | ||
196 | srl %g1, 3, %g2 | ||
197 | sub %o2, %GLOBAL_SPARE, %g3 | ||
198 | andn %o1, (0x40 - 1), %o1 | ||
199 | and %g2, 7, %g2 | ||
200 | andncc %g3, 0x7, %g3 | ||
201 | fmovd %f0, %f2 | ||
202 | sub %g3, 0x8, %g3 | ||
203 | sub %o2, %GLOBAL_SPARE, %o2 | ||
204 | |||
205 | add %g1, %GLOBAL_SPARE, %g1 | ||
206 | subcc %o2, %g3, %o2 | ||
207 | |||
208 | EX_LD(LOAD_BLK(%o1, %f0)) | ||
209 | add %o1, 0x40, %o1 | ||
210 | add %g1, %g3, %g1 | ||
211 | EX_LD(LOAD_BLK(%o1, %f16)) | ||
212 | add %o1, 0x40, %o1 | ||
213 | sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE | ||
214 | EX_LD(LOAD_BLK(%o1, %f32)) | ||
215 | add %o1, 0x40, %o1 | ||
216 | |||
217 | /* There are 8 instances of the unrolled loop, | ||
218 | * one for each possible alignment of the | ||
219 | * source buffer. Each loop instance is 452 | ||
220 | * bytes. | ||
221 | */ | ||
222 | sll %g2, 3, %o3 | ||
223 | sub %o3, %g2, %o3 | ||
224 | sllx %o3, 4, %o3 | ||
225 | add %o3, %g2, %o3 | ||
226 | sllx %o3, 2, %g2 | ||
227 | 1: rd %pc, %o3 | ||
228 | add %o3, %lo(1f - 1b), %o3 | ||
229 | jmpl %o3 + %g2, %g0 | ||
230 | nop | ||
231 | |||
232 | .align 64 | ||
233 | 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | ||
234 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
235 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | ||
236 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
237 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | ||
238 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
239 | ba,pt %xcc, 1b+4 | ||
240 | faligndata %f0, %f2, %f48 | ||
241 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | ||
242 | STORE_SYNC(o0, f48) membar #Sync | ||
243 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | ||
244 | STORE_JUMP(o0, f48, 40f) membar #Sync | ||
245 | 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | ||
246 | STORE_SYNC(o0, f48) membar #Sync | ||
247 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | ||
248 | STORE_JUMP(o0, f48, 48f) membar #Sync | ||
249 | 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | ||
250 | STORE_SYNC(o0, f48) membar #Sync | ||
251 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | ||
252 | STORE_JUMP(o0, f48, 56f) membar #Sync | ||
253 | |||
254 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | ||
255 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
256 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | ||
257 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
258 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | ||
259 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
260 | ba,pt %xcc, 1b+4 | ||
261 | faligndata %f2, %f4, %f48 | ||
262 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | ||
263 | STORE_SYNC(o0, f48) membar #Sync | ||
264 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | ||
265 | STORE_JUMP(o0, f48, 41f) membar #Sync | ||
266 | 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | ||
267 | STORE_SYNC(o0, f48) membar #Sync | ||
268 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | ||
269 | STORE_JUMP(o0, f48, 49f) membar #Sync | ||
270 | 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | ||
271 | STORE_SYNC(o0, f48) membar #Sync | ||
272 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | ||
273 | STORE_JUMP(o0, f48, 57f) membar #Sync | ||
274 | |||
275 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | ||
276 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
277 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | ||
278 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
279 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | ||
280 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
281 | ba,pt %xcc, 1b+4 | ||
282 | faligndata %f4, %f6, %f48 | ||
283 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | ||
284 | STORE_SYNC(o0, f48) membar #Sync | ||
285 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | ||
286 | STORE_JUMP(o0, f48, 42f) membar #Sync | ||
287 | 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | ||
288 | STORE_SYNC(o0, f48) membar #Sync | ||
289 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | ||
290 | STORE_JUMP(o0, f48, 50f) membar #Sync | ||
291 | 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | ||
292 | STORE_SYNC(o0, f48) membar #Sync | ||
293 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | ||
294 | STORE_JUMP(o0, f48, 58f) membar #Sync | ||
295 | |||
296 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | ||
297 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
298 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | ||
299 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
300 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | ||
301 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
302 | ba,pt %xcc, 1b+4 | ||
303 | faligndata %f6, %f8, %f48 | ||
304 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | ||
305 | STORE_SYNC(o0, f48) membar #Sync | ||
306 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | ||
307 | STORE_JUMP(o0, f48, 43f) membar #Sync | ||
308 | 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | ||
309 | STORE_SYNC(o0, f48) membar #Sync | ||
310 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | ||
311 | STORE_JUMP(o0, f48, 51f) membar #Sync | ||
312 | 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | ||
313 | STORE_SYNC(o0, f48) membar #Sync | ||
314 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | ||
315 | STORE_JUMP(o0, f48, 59f) membar #Sync | ||
316 | |||
317 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | ||
318 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
319 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | ||
320 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
321 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | ||
322 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
323 | ba,pt %xcc, 1b+4 | ||
324 | faligndata %f8, %f10, %f48 | ||
325 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | ||
326 | STORE_SYNC(o0, f48) membar #Sync | ||
327 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | ||
328 | STORE_JUMP(o0, f48, 44f) membar #Sync | ||
329 | 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | ||
330 | STORE_SYNC(o0, f48) membar #Sync | ||
331 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | ||
332 | STORE_JUMP(o0, f48, 52f) membar #Sync | ||
333 | 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | ||
334 | STORE_SYNC(o0, f48) membar #Sync | ||
335 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | ||
336 | STORE_JUMP(o0, f48, 60f) membar #Sync | ||
337 | |||
338 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | ||
339 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
340 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | ||
341 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
342 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | ||
343 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
344 | ba,pt %xcc, 1b+4 | ||
345 | faligndata %f10, %f12, %f48 | ||
346 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | ||
347 | STORE_SYNC(o0, f48) membar #Sync | ||
348 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | ||
349 | STORE_JUMP(o0, f48, 45f) membar #Sync | ||
350 | 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | ||
351 | STORE_SYNC(o0, f48) membar #Sync | ||
352 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | ||
353 | STORE_JUMP(o0, f48, 53f) membar #Sync | ||
354 | 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | ||
355 | STORE_SYNC(o0, f48) membar #Sync | ||
356 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | ||
357 | STORE_JUMP(o0, f48, 61f) membar #Sync | ||
358 | |||
359 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | ||
360 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
361 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | ||
362 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
363 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | ||
364 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
365 | ba,pt %xcc, 1b+4 | ||
366 | faligndata %f12, %f14, %f48 | ||
367 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | ||
368 | STORE_SYNC(o0, f48) membar #Sync | ||
369 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | ||
370 | STORE_JUMP(o0, f48, 46f) membar #Sync | ||
371 | 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | ||
372 | STORE_SYNC(o0, f48) membar #Sync | ||
373 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | ||
374 | STORE_JUMP(o0, f48, 54f) membar #Sync | ||
375 | 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | ||
376 | STORE_SYNC(o0, f48) membar #Sync | ||
377 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | ||
378 | STORE_JUMP(o0, f48, 62f) membar #Sync | ||
379 | |||
380 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | ||
381 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | ||
382 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | ||
383 | LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) | ||
384 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | ||
385 | LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) | ||
386 | ba,pt %xcc, 1b+4 | ||
387 | faligndata %f14, %f16, %f48 | ||
388 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | ||
389 | STORE_SYNC(o0, f48) membar #Sync | ||
390 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | ||
391 | STORE_JUMP(o0, f48, 47f) membar #Sync | ||
392 | 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | ||
393 | STORE_SYNC(o0, f48) membar #Sync | ||
394 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | ||
395 | STORE_JUMP(o0, f48, 55f) membar #Sync | ||
396 | 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | ||
397 | STORE_SYNC(o0, f48) membar #Sync | ||
398 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | ||
399 | STORE_JUMP(o0, f48, 63f) membar #Sync | ||
400 | |||
401 | 40: FINISH_VISCHUNK(o0, f0, f2, g3) | ||
402 | 41: FINISH_VISCHUNK(o0, f2, f4, g3) | ||
403 | 42: FINISH_VISCHUNK(o0, f4, f6, g3) | ||
404 | 43: FINISH_VISCHUNK(o0, f6, f8, g3) | ||
405 | 44: FINISH_VISCHUNK(o0, f8, f10, g3) | ||
406 | 45: FINISH_VISCHUNK(o0, f10, f12, g3) | ||
407 | 46: FINISH_VISCHUNK(o0, f12, f14, g3) | ||
408 | 47: UNEVEN_VISCHUNK(o0, f14, f0, g3) | ||
409 | 48: FINISH_VISCHUNK(o0, f16, f18, g3) | ||
410 | 49: FINISH_VISCHUNK(o0, f18, f20, g3) | ||
411 | 50: FINISH_VISCHUNK(o0, f20, f22, g3) | ||
412 | 51: FINISH_VISCHUNK(o0, f22, f24, g3) | ||
413 | 52: FINISH_VISCHUNK(o0, f24, f26, g3) | ||
414 | 53: FINISH_VISCHUNK(o0, f26, f28, g3) | ||
415 | 54: FINISH_VISCHUNK(o0, f28, f30, g3) | ||
416 | 55: UNEVEN_VISCHUNK(o0, f30, f0, g3) | ||
417 | 56: FINISH_VISCHUNK(o0, f32, f34, g3) | ||
418 | 57: FINISH_VISCHUNK(o0, f34, f36, g3) | ||
419 | 58: FINISH_VISCHUNK(o0, f36, f38, g3) | ||
420 | 59: FINISH_VISCHUNK(o0, f38, f40, g3) | ||
421 | 60: FINISH_VISCHUNK(o0, f40, f42, g3) | ||
422 | 61: FINISH_VISCHUNK(o0, f42, f44, g3) | ||
423 | 62: FINISH_VISCHUNK(o0, f44, f46, g3) | ||
424 | 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) | ||
425 | |||
426 | 93: EX_LD(LOAD(ldd, %o1, %f2)) | ||
427 | add %o1, 8, %o1 | ||
428 | subcc %g3, 8, %g3 | ||
429 | faligndata %f0, %f2, %f8 | ||
430 | EX_ST(STORE(std, %f8, %o0)) | ||
431 | bl,pn %xcc, 95f | ||
432 | add %o0, 8, %o0 | ||
433 | EX_LD(LOAD(ldd, %o1, %f0)) | ||
434 | add %o1, 8, %o1 | ||
435 | subcc %g3, 8, %g3 | ||
436 | faligndata %f2, %f0, %f8 | ||
437 | EX_ST(STORE(std, %f8, %o0)) | ||
438 | bge,pt %xcc, 93b | ||
439 | add %o0, 8, %o0 | ||
440 | |||
441 | 95: brz,pt %o2, 2f | ||
442 | mov %g1, %o1 | ||
443 | |||
444 | 1: EX_LD(LOAD(ldub, %o1, %o3)) | ||
445 | add %o1, 1, %o1 | ||
446 | subcc %o2, 1, %o2 | ||
447 | EX_ST(STORE(stb, %o3, %o0)) | ||
448 | bne,pt %xcc, 1b | ||
449 | add %o0, 1, %o0 | ||
450 | |||
451 | 2: membar #StoreLoad | #StoreStore | ||
452 | VISExit | ||
453 | retl | ||
454 | mov EX_RETVAL(%o4), %o0 | ||
455 | |||
456 | .align 64 | ||
457 | 70: /* 16 < len <= (5 * 64) */ | ||
458 | bne,pn %XCC, 75f | ||
459 | sub %o0, %o1, %o3 | ||
460 | |||
461 | 72: andn %o2, 0xf, %GLOBAL_SPARE | ||
462 | and %o2, 0xf, %o2 | ||
463 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) | ||
464 | EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) | ||
465 | subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE | ||
466 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
467 | add %o1, 0x8, %o1 | ||
468 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | ||
469 | bgu,pt %XCC, 1b | ||
470 | add %o1, 0x8, %o1 | ||
471 | 73: andcc %o2, 0x8, %g0 | ||
472 | be,pt %XCC, 1f | ||
473 | nop | ||
474 | EX_LD(LOAD(ldx, %o1, %o5)) | ||
475 | sub %o2, 0x8, %o2 | ||
476 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
477 | add %o1, 0x8, %o1 | ||
478 | 1: andcc %o2, 0x4, %g0 | ||
479 | be,pt %XCC, 1f | ||
480 | nop | ||
481 | EX_LD(LOAD(lduw, %o1, %o5)) | ||
482 | sub %o2, 0x4, %o2 | ||
483 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
484 | add %o1, 0x4, %o1 | ||
485 | 1: cmp %o2, 0 | ||
486 | be,pt %XCC, 85f | ||
487 | nop | ||
488 | ba,pt %xcc, 90f | ||
489 | nop | ||
490 | |||
491 | 75: andcc %o0, 0x7, %g1 | ||
492 | sub %g1, 0x8, %g1 | ||
493 | be,pn %icc, 2f | ||
494 | sub %g0, %g1, %g1 | ||
495 | sub %o2, %g1, %o2 | ||
496 | |||
497 | 1: EX_LD(LOAD(ldub, %o1, %o5)) | ||
498 | subcc %g1, 1, %g1 | ||
499 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
500 | bgu,pt %icc, 1b | ||
501 | add %o1, 1, %o1 | ||
502 | |||
503 | 2: add %o1, %o3, %o0 | ||
504 | andcc %o1, 0x7, %g1 | ||
505 | bne,pt %icc, 8f | ||
506 | sll %g1, 3, %g1 | ||
507 | |||
508 | cmp %o2, 16 | ||
509 | bgeu,pt %icc, 72b | ||
510 | nop | ||
511 | ba,a,pt %xcc, 73b | ||
512 | |||
513 | 8: mov 64, %o3 | ||
514 | andn %o1, 0x7, %o1 | ||
515 | EX_LD(LOAD(ldx, %o1, %g2)) | ||
516 | sub %o3, %g1, %o3 | ||
517 | andn %o2, 0x7, %GLOBAL_SPARE | ||
518 | sllx %g2, %g1, %g2 | ||
519 | 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) | ||
520 | subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE | ||
521 | add %o1, 0x8, %o1 | ||
522 | srlx %g3, %o3, %o5 | ||
523 | or %o5, %g2, %o5 | ||
524 | EX_ST(STORE(stx, %o5, %o0)) | ||
525 | add %o0, 0x8, %o0 | ||
526 | bgu,pt %icc, 1b | ||
527 | sllx %g3, %g1, %g2 | ||
528 | |||
529 | srl %g1, 3, %g1 | ||
530 | andcc %o2, 0x7, %o2 | ||
531 | be,pn %icc, 85f | ||
532 | add %o1, %g1, %o1 | ||
533 | ba,pt %xcc, 90f | ||
534 | sub %o0, %o1, %o3 | ||
535 | |||
536 | .align 64 | ||
537 | 80: /* 0 < len <= 16 */ | ||
538 | andcc %o3, 0x3, %g0 | ||
539 | bne,pn %XCC, 90f | ||
540 | sub %o0, %o1, %o3 | ||
541 | |||
542 | 1: EX_LD(LOAD(lduw, %o1, %g1)) | ||
543 | subcc %o2, 4, %o2 | ||
544 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | ||
545 | bgu,pt %XCC, 1b | ||
546 | add %o1, 4, %o1 | ||
547 | |||
548 | 85: retl | ||
549 | mov EX_RETVAL(%o4), %o0 | ||
550 | |||
551 | .align 32 | ||
552 | 90: EX_LD(LOAD(ldub, %o1, %g1)) | ||
553 | subcc %o2, 1, %o2 | ||
554 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | ||
555 | bgu,pt %XCC, 90b | ||
556 | add %o1, 1, %o1 | ||
557 | retl | ||
558 | mov EX_RETVAL(%o4), %o0 | ||
559 | |||
560 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc64/lib/U3copy_from_user.S b/arch/sparc64/lib/U3copy_from_user.S new file mode 100644 index 000000000000..df600b667e48 --- /dev/null +++ b/arch/sparc64/lib/U3copy_from_user.S | |||
@@ -0,0 +1,22 @@ | |||
1 | /* U3copy_from_user.S: UltraSparc-III optimized copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: retl; \ | ||
11 | mov 1, %o0; \ | ||
12 | .section __ex_table; \ | ||
13 | .align 4; \ | ||
14 | .word 98b, 99b; \ | ||
15 | .text; \ | ||
16 | .align 4; | ||
17 | |||
18 | #define FUNC_NAME U3copy_from_user | ||
19 | #define LOAD(type,addr,dest) type##a [addr] %asi, dest | ||
20 | #define EX_RETVAL(x) 0 | ||
21 | |||
22 | #include "U3memcpy.S" | ||
diff --git a/arch/sparc64/lib/U3copy_to_user.S b/arch/sparc64/lib/U3copy_to_user.S new file mode 100644 index 000000000000..f337f22ed82e --- /dev/null +++ b/arch/sparc64/lib/U3copy_to_user.S | |||
@@ -0,0 +1,33 @@ | |||
1 | /* U3copy_to_user.S: UltraSparc-III optimized copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: retl; \ | ||
11 | mov 1, %o0; \ | ||
12 | .section __ex_table; \ | ||
13 | .align 4; \ | ||
14 | .word 98b, 99b; \ | ||
15 | .text; \ | ||
16 | .align 4; | ||
17 | |||
18 | #define FUNC_NAME U3copy_to_user | ||
19 | #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS | ||
20 | #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS | ||
21 | #define EX_RETVAL(x) 0 | ||
22 | |||
23 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
24 | * Reading %asi to check for KERNEL_DS is comparatively | ||
25 | * cheap. | ||
26 | */ | ||
27 | #define PREAMBLE \ | ||
28 | rd %asi, %g1; \ | ||
29 | cmp %g1, ASI_AIUS; \ | ||
30 | bne,pn %icc, memcpy_user_stub; \ | ||
31 | nop; \ | ||
32 | |||
33 | #include "U3memcpy.S" | ||
diff --git a/arch/sparc64/lib/U3memcpy.S b/arch/sparc64/lib/U3memcpy.S new file mode 100644 index 000000000000..7cae9cc6a204 --- /dev/null +++ b/arch/sparc64/lib/U3memcpy.S | |||
@@ -0,0 +1,422 @@ | |||
1 | /* U3memcpy.S: UltraSparc-III optimized memcpy. | ||
2 | * | ||
3 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | */ | ||
5 | |||
6 | #ifdef __KERNEL__ | ||
7 | #include <asm/visasm.h> | ||
8 | #include <asm/asi.h> | ||
9 | #define GLOBAL_SPARE %g7 | ||
10 | #else | ||
11 | #define ASI_BLK_P 0xf0 | ||
12 | #define FPRS_FEF 0x04 | ||
13 | #ifdef MEMCPY_DEBUG | ||
14 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ | ||
15 | clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; | ||
16 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
17 | #else | ||
18 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs | ||
19 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
20 | #endif | ||
21 | #define GLOBAL_SPARE %g5 | ||
22 | #endif | ||
23 | |||
24 | #ifndef EX_LD | ||
25 | #define EX_LD(x) x | ||
26 | #endif | ||
27 | |||
28 | #ifndef EX_ST | ||
29 | #define EX_ST(x) x | ||
30 | #endif | ||
31 | |||
32 | #ifndef EX_RETVAL | ||
33 | #define EX_RETVAL(x) x | ||
34 | #endif | ||
35 | |||
36 | #ifndef LOAD | ||
37 | #define LOAD(type,addr,dest) type [addr], dest | ||
38 | #endif | ||
39 | |||
40 | #ifndef STORE | ||
41 | #define STORE(type,src,addr) type src, [addr] | ||
42 | #endif | ||
43 | |||
44 | #ifndef STORE_BLK | ||
45 | #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P | ||
46 | #endif | ||
47 | |||
48 | #ifndef FUNC_NAME | ||
49 | #define FUNC_NAME U3memcpy | ||
50 | #endif | ||
51 | |||
52 | #ifndef PREAMBLE | ||
53 | #define PREAMBLE | ||
54 | #endif | ||
55 | |||
56 | #ifndef XCC | ||
57 | #define XCC xcc | ||
58 | #endif | ||
59 | |||
60 | .register %g2,#scratch | ||
61 | .register %g3,#scratch | ||
62 | |||
63 | /* Special/non-trivial issues of this code: | ||
64 | * | ||
65 | * 1) %o5 is preserved from VISEntryHalf to VISExitHalf | ||
66 | * 2) Only low 32 FPU registers are used so that only the | ||
67 | * lower half of the FPU register set is dirtied by this | ||
68 | * code. This is especially important in the kernel. | ||
69 | * 3) This code never prefetches cachelines past the end | ||
70 | * of the source buffer. | ||
71 | */ | ||
72 | |||
73 | .text | ||
74 | .align 64 | ||
75 | |||
76 | /* The cheetah's flexible spine, oversized liver, enlarged heart, | ||
77 | * slender muscular body, and claws make it the swiftest hunter | ||
78 | * in Africa and the fastest animal on land. Can reach speeds | ||
79 | * of up to 2.4GB per second. | ||
80 | */ | ||
81 | |||
82 | .globl FUNC_NAME | ||
83 | .type FUNC_NAME,#function | ||
84 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
85 | srlx %o2, 31, %g2 | ||
86 | cmp %g2, 0 | ||
87 | tne %xcc, 5 | ||
88 | PREAMBLE | ||
89 | mov %o0, %o4 | ||
90 | cmp %o2, 0 | ||
91 | be,pn %XCC, 85f | ||
92 | or %o0, %o1, %o3 | ||
93 | cmp %o2, 16 | ||
94 | blu,a,pn %XCC, 80f | ||
95 | or %o3, %o2, %o3 | ||
96 | |||
97 | cmp %o2, (3 * 64) | ||
98 | blu,pt %XCC, 70f | ||
99 | andcc %o3, 0x7, %g0 | ||
100 | |||
101 | /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve | ||
102 | * o5 from here until we hit VISExitHalf. | ||
103 | */ | ||
104 | VISEntryHalf | ||
105 | |||
106 | /* Is 'dst' already aligned on an 64-byte boundary? */ | ||
107 | andcc %o0, 0x3f, %g2 | ||
108 | be,pt %XCC, 2f | ||
109 | |||
110 | /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number | ||
111 | * of bytes to copy to make 'dst' 64-byte aligned. We pre- | ||
112 | * subtract this from 'len'. | ||
113 | */ | ||
114 | sub %o0, %o1, GLOBAL_SPARE | ||
115 | sub %g2, 0x40, %g2 | ||
116 | sub %g0, %g2, %g2 | ||
117 | sub %o2, %g2, %o2 | ||
118 | andcc %g2, 0x7, %g1 | ||
119 | be,pt %icc, 2f | ||
120 | and %g2, 0x38, %g2 | ||
121 | |||
122 | 1: subcc %g1, 0x1, %g1 | ||
123 | EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) | ||
124 | EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) | ||
125 | bgu,pt %XCC, 1b | ||
126 | add %o1, 0x1, %o1 | ||
127 | |||
128 | add %o1, GLOBAL_SPARE, %o0 | ||
129 | |||
130 | 2: cmp %g2, 0x0 | ||
131 | and %o1, 0x7, %g1 | ||
132 | be,pt %icc, 3f | ||
133 | alignaddr %o1, %g0, %o1 | ||
134 | |||
135 | EX_LD(LOAD(ldd, %o1, %f4)) | ||
136 | 1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) | ||
137 | add %o1, 0x8, %o1 | ||
138 | subcc %g2, 0x8, %g2 | ||
139 | faligndata %f4, %f6, %f0 | ||
140 | EX_ST(STORE(std, %f0, %o0)) | ||
141 | be,pn %icc, 3f | ||
142 | add %o0, 0x8, %o0 | ||
143 | |||
144 | EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) | ||
145 | add %o1, 0x8, %o1 | ||
146 | subcc %g2, 0x8, %g2 | ||
147 | faligndata %f6, %f4, %f2 | ||
148 | EX_ST(STORE(std, %f2, %o0)) | ||
149 | bne,pt %icc, 1b | ||
150 | add %o0, 0x8, %o0 | ||
151 | |||
152 | 3: LOAD(prefetch, %o1 + 0x000, #one_read) | ||
153 | LOAD(prefetch, %o1 + 0x040, #one_read) | ||
154 | andn %o2, (0x40 - 1), GLOBAL_SPARE | ||
155 | LOAD(prefetch, %o1 + 0x080, #one_read) | ||
156 | LOAD(prefetch, %o1 + 0x0c0, #one_read) | ||
157 | LOAD(prefetch, %o1 + 0x100, #one_read) | ||
158 | EX_LD(LOAD(ldd, %o1 + 0x000, %f0)) | ||
159 | LOAD(prefetch, %o1 + 0x140, #one_read) | ||
160 | EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) | ||
161 | LOAD(prefetch, %o1 + 0x180, #one_read) | ||
162 | EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) | ||
163 | LOAD(prefetch, %o1 + 0x1c0, #one_read) | ||
164 | faligndata %f0, %f2, %f16 | ||
165 | EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) | ||
166 | faligndata %f2, %f4, %f18 | ||
167 | EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) | ||
168 | faligndata %f4, %f6, %f20 | ||
169 | EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) | ||
170 | faligndata %f6, %f8, %f22 | ||
171 | |||
172 | EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) | ||
173 | faligndata %f8, %f10, %f24 | ||
174 | EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) | ||
175 | faligndata %f10, %f12, %f26 | ||
176 | EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) | ||
177 | |||
178 | subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE | ||
179 | add %o1, 0x40, %o1 | ||
180 | bgu,pt %XCC, 1f | ||
181 | srl GLOBAL_SPARE, 6, %o3 | ||
182 | ba,pt %xcc, 2f | ||
183 | nop | ||
184 | |||
185 | .align 64 | ||
186 | 1: | ||
187 | EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) | ||
188 | faligndata %f12, %f14, %f28 | ||
189 | EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) | ||
190 | faligndata %f14, %f0, %f30 | ||
191 | EX_ST(STORE_BLK(%f16, %o0)) | ||
192 | EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) | ||
193 | faligndata %f0, %f2, %f16 | ||
194 | add %o0, 0x40, %o0 | ||
195 | |||
196 | EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) | ||
197 | faligndata %f2, %f4, %f18 | ||
198 | EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) | ||
199 | faligndata %f4, %f6, %f20 | ||
200 | EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) | ||
201 | subcc %o3, 0x01, %o3 | ||
202 | faligndata %f6, %f8, %f22 | ||
203 | EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) | ||
204 | |||
205 | faligndata %f8, %f10, %f24 | ||
206 | EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) | ||
207 | LOAD(prefetch, %o1 + 0x1c0, #one_read) | ||
208 | faligndata %f10, %f12, %f26 | ||
209 | bg,pt %XCC, 1b | ||
210 | add %o1, 0x40, %o1 | ||
211 | |||
212 | /* Finally we copy the last full 64-byte block. */ | ||
213 | 2: | ||
214 | EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) | ||
215 | faligndata %f12, %f14, %f28 | ||
216 | EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) | ||
217 | faligndata %f14, %f0, %f30 | ||
218 | EX_ST(STORE_BLK(%f16, %o0)) | ||
219 | EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) | ||
220 | faligndata %f0, %f2, %f16 | ||
221 | EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) | ||
222 | faligndata %f2, %f4, %f18 | ||
223 | EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) | ||
224 | faligndata %f4, %f6, %f20 | ||
225 | EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) | ||
226 | faligndata %f6, %f8, %f22 | ||
227 | EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) | ||
228 | faligndata %f8, %f10, %f24 | ||
229 | cmp %g1, 0 | ||
230 | be,pt %XCC, 1f | ||
231 | add %o0, 0x40, %o0 | ||
232 | EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) | ||
233 | 1: faligndata %f10, %f12, %f26 | ||
234 | faligndata %f12, %f14, %f28 | ||
235 | faligndata %f14, %f0, %f30 | ||
236 | EX_ST(STORE_BLK(%f16, %o0)) | ||
237 | add %o0, 0x40, %o0 | ||
238 | add %o1, 0x40, %o1 | ||
239 | membar #Sync | ||
240 | |||
241 | /* Now we copy the (len modulo 64) bytes at the end. | ||
242 | * Note how we borrow the %f0 loaded above. | ||
243 | * | ||
244 | * Also notice how this code is careful not to perform a | ||
245 | * load past the end of the src buffer. | ||
246 | */ | ||
247 | and %o2, 0x3f, %o2 | ||
248 | andcc %o2, 0x38, %g2 | ||
249 | be,pn %XCC, 2f | ||
250 | subcc %g2, 0x8, %g2 | ||
251 | be,pn %XCC, 2f | ||
252 | cmp %g1, 0 | ||
253 | |||
254 | sub %o2, %g2, %o2 | ||
255 | be,a,pt %XCC, 1f | ||
256 | EX_LD(LOAD(ldd, %o1 + 0x00, %f0)) | ||
257 | |||
258 | 1: EX_LD(LOAD(ldd, %o1 + 0x08, %f2)) | ||
259 | add %o1, 0x8, %o1 | ||
260 | subcc %g2, 0x8, %g2 | ||
261 | faligndata %f0, %f2, %f8 | ||
262 | EX_ST(STORE(std, %f8, %o0)) | ||
263 | be,pn %XCC, 2f | ||
264 | add %o0, 0x8, %o0 | ||
265 | EX_LD(LOAD(ldd, %o1 + 0x08, %f0)) | ||
266 | add %o1, 0x8, %o1 | ||
267 | subcc %g2, 0x8, %g2 | ||
268 | faligndata %f2, %f0, %f8 | ||
269 | EX_ST(STORE(std, %f8, %o0)) | ||
270 | bne,pn %XCC, 1b | ||
271 | add %o0, 0x8, %o0 | ||
272 | |||
273 | /* If anything is left, we copy it one byte at a time. | ||
274 | * Note that %g1 is (src & 0x3) saved above before the | ||
275 | * alignaddr was performed. | ||
276 | */ | ||
277 | 2: | ||
278 | cmp %o2, 0 | ||
279 | add %o1, %g1, %o1 | ||
280 | VISExitHalf | ||
281 | be,pn %XCC, 85f | ||
282 | sub %o0, %o1, %o3 | ||
283 | |||
284 | andcc %g1, 0x7, %g0 | ||
285 | bne,pn %icc, 90f | ||
286 | andcc %o2, 0x8, %g0 | ||
287 | be,pt %icc, 1f | ||
288 | nop | ||
289 | EX_LD(LOAD(ldx, %o1, %o5)) | ||
290 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
291 | add %o1, 0x8, %o1 | ||
292 | |||
293 | 1: andcc %o2, 0x4, %g0 | ||
294 | be,pt %icc, 1f | ||
295 | nop | ||
296 | EX_LD(LOAD(lduw, %o1, %o5)) | ||
297 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
298 | add %o1, 0x4, %o1 | ||
299 | |||
300 | 1: andcc %o2, 0x2, %g0 | ||
301 | be,pt %icc, 1f | ||
302 | nop | ||
303 | EX_LD(LOAD(lduh, %o1, %o5)) | ||
304 | EX_ST(STORE(sth, %o5, %o1 + %o3)) | ||
305 | add %o1, 0x2, %o1 | ||
306 | |||
307 | 1: andcc %o2, 0x1, %g0 | ||
308 | be,pt %icc, 85f | ||
309 | nop | ||
310 | EX_LD(LOAD(ldub, %o1, %o5)) | ||
311 | ba,pt %xcc, 85f | ||
312 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
313 | |||
314 | .align 64 | ||
315 | 70: /* 16 < len <= 64 */ | ||
316 | bne,pn %XCC, 75f | ||
317 | sub %o0, %o1, %o3 | ||
318 | |||
319 | 72: | ||
320 | andn %o2, 0xf, GLOBAL_SPARE | ||
321 | and %o2, 0xf, %o2 | ||
322 | 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE | ||
323 | EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) | ||
324 | EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) | ||
325 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
326 | add %o1, 0x8, %o1 | ||
327 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | ||
328 | bgu,pt %XCC, 1b | ||
329 | add %o1, 0x8, %o1 | ||
330 | 73: andcc %o2, 0x8, %g0 | ||
331 | be,pt %XCC, 1f | ||
332 | nop | ||
333 | sub %o2, 0x8, %o2 | ||
334 | EX_LD(LOAD(ldx, %o1, %o5)) | ||
335 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
336 | add %o1, 0x8, %o1 | ||
337 | 1: andcc %o2, 0x4, %g0 | ||
338 | be,pt %XCC, 1f | ||
339 | nop | ||
340 | sub %o2, 0x4, %o2 | ||
341 | EX_LD(LOAD(lduw, %o1, %o5)) | ||
342 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
343 | add %o1, 0x4, %o1 | ||
344 | 1: cmp %o2, 0 | ||
345 | be,pt %XCC, 85f | ||
346 | nop | ||
347 | ba,pt %xcc, 90f | ||
348 | nop | ||
349 | |||
350 | 75: | ||
351 | andcc %o0, 0x7, %g1 | ||
352 | sub %g1, 0x8, %g1 | ||
353 | be,pn %icc, 2f | ||
354 | sub %g0, %g1, %g1 | ||
355 | sub %o2, %g1, %o2 | ||
356 | |||
357 | 1: subcc %g1, 1, %g1 | ||
358 | EX_LD(LOAD(ldub, %o1, %o5)) | ||
359 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
360 | bgu,pt %icc, 1b | ||
361 | add %o1, 1, %o1 | ||
362 | |||
363 | 2: add %o1, %o3, %o0 | ||
364 | andcc %o1, 0x7, %g1 | ||
365 | bne,pt %icc, 8f | ||
366 | sll %g1, 3, %g1 | ||
367 | |||
368 | cmp %o2, 16 | ||
369 | bgeu,pt %icc, 72b | ||
370 | nop | ||
371 | ba,a,pt %xcc, 73b | ||
372 | |||
373 | 8: mov 64, %o3 | ||
374 | andn %o1, 0x7, %o1 | ||
375 | EX_LD(LOAD(ldx, %o1, %g2)) | ||
376 | sub %o3, %g1, %o3 | ||
377 | andn %o2, 0x7, GLOBAL_SPARE | ||
378 | sllx %g2, %g1, %g2 | ||
379 | 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) | ||
380 | subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE | ||
381 | add %o1, 0x8, %o1 | ||
382 | srlx %g3, %o3, %o5 | ||
383 | or %o5, %g2, %o5 | ||
384 | EX_ST(STORE(stx, %o5, %o0)) | ||
385 | add %o0, 0x8, %o0 | ||
386 | bgu,pt %icc, 1b | ||
387 | sllx %g3, %g1, %g2 | ||
388 | |||
389 | srl %g1, 3, %g1 | ||
390 | andcc %o2, 0x7, %o2 | ||
391 | be,pn %icc, 85f | ||
392 | add %o1, %g1, %o1 | ||
393 | ba,pt %xcc, 90f | ||
394 | sub %o0, %o1, %o3 | ||
395 | |||
396 | .align 64 | ||
397 | 80: /* 0 < len <= 16 */ | ||
398 | andcc %o3, 0x3, %g0 | ||
399 | bne,pn %XCC, 90f | ||
400 | sub %o0, %o1, %o3 | ||
401 | |||
402 | 1: | ||
403 | subcc %o2, 4, %o2 | ||
404 | EX_LD(LOAD(lduw, %o1, %g1)) | ||
405 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | ||
406 | bgu,pt %XCC, 1b | ||
407 | add %o1, 4, %o1 | ||
408 | |||
409 | 85: retl | ||
410 | mov EX_RETVAL(%o4), %o0 | ||
411 | |||
412 | .align 32 | ||
413 | 90: | ||
414 | subcc %o2, 1, %o2 | ||
415 | EX_LD(LOAD(ldub, %o1, %g1)) | ||
416 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | ||
417 | bgu,pt %XCC, 90b | ||
418 | add %o1, 1, %o1 | ||
419 | retl | ||
420 | mov EX_RETVAL(%o4), %o0 | ||
421 | |||
422 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc64/lib/U3patch.S b/arch/sparc64/lib/U3patch.S new file mode 100644 index 000000000000..e2b6c5e4b95a --- /dev/null +++ b/arch/sparc64/lib/U3patch.S | |||
@@ -0,0 +1,32 @@ | |||
1 | /* U3patch.S: Patch Ultra-I routines with Ultra-III variant. | ||
2 | * | ||
3 | * Copyright (C) 2004 David S. Miller <davem@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #define BRANCH_ALWAYS 0x10680000 | ||
7 | #define NOP 0x01000000 | ||
8 | #define ULTRA3_DO_PATCH(OLD, NEW) \ | ||
9 | sethi %hi(NEW), %g1; \ | ||
10 | or %g1, %lo(NEW), %g1; \ | ||
11 | sethi %hi(OLD), %g2; \ | ||
12 | or %g2, %lo(OLD), %g2; \ | ||
13 | sub %g1, %g2, %g1; \ | ||
14 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
15 | srl %g1, 2, %g1; \ | ||
16 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
17 | or %g3, %g1, %g3; \ | ||
18 | stw %g3, [%g2]; \ | ||
19 | sethi %hi(NOP), %g3; \ | ||
20 | or %g3, %lo(NOP), %g3; \ | ||
21 | stw %g3, [%g2 + 0x4]; \ | ||
22 | flush %g2; | ||
23 | |||
24 | .globl cheetah_patch_copyops | ||
25 | .type cheetah_patch_copyops,#function | ||
26 | cheetah_patch_copyops: | ||
27 | ULTRA3_DO_PATCH(memcpy, U3memcpy) | ||
28 | ULTRA3_DO_PATCH(___copy_from_user, U3copy_from_user) | ||
29 | ULTRA3_DO_PATCH(___copy_to_user, U3copy_to_user) | ||
30 | retl | ||
31 | nop | ||
32 | .size cheetah_patch_copyops,.-cheetah_patch_copyops | ||
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S new file mode 100644 index 000000000000..65e328d600a8 --- /dev/null +++ b/arch/sparc64/lib/VISsave.S | |||
@@ -0,0 +1,131 @@ | |||
1 | /* $Id: VISsave.S,v 1.6 2002/02/09 19:49:30 davem Exp $ | ||
2 | * VISsave.S: Code for saving FPU register state for | ||
3 | * VIS routines. One should not call this directly, | ||
4 | * but use macros provided in <asm/visasm.h>. | ||
5 | * | ||
6 | * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) | ||
7 | */ | ||
8 | |||
9 | #include <asm/asi.h> | ||
10 | #include <asm/page.h> | ||
11 | #include <asm/ptrace.h> | ||
12 | #include <asm/visasm.h> | ||
13 | #include <asm/thread_info.h> | ||
14 | |||
15 | .text | ||
16 | .globl VISenter, VISenterhalf | ||
17 | |||
18 | /* On entry: %o5=current FPRS value, %g7 is callers address */ | ||
19 | /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */ | ||
20 | |||
21 | /* Nothing special need be done here to handle pre-emption, this | ||
22 | * FPU save/restore mechanism is already preemption safe. | ||
23 | */ | ||
24 | |||
25 | .align 32 | ||
26 | VISenter: | ||
27 | ldub [%g6 + TI_FPDEPTH], %g1 | ||
28 | brnz,a,pn %g1, 1f | ||
29 | cmp %g1, 1 | ||
30 | stb %g0, [%g6 + TI_FPSAVED] | ||
31 | stx %fsr, [%g6 + TI_XFSR] | ||
32 | 9: jmpl %g7 + %g0, %g0 | ||
33 | nop | ||
34 | 1: bne,pn %icc, 2f | ||
35 | |||
36 | srl %g1, 1, %g1 | ||
37 | vis1: ldub [%g6 + TI_FPSAVED], %g3 | ||
38 | stx %fsr, [%g6 + TI_XFSR] | ||
39 | or %g3, %o5, %g3 | ||
40 | stb %g3, [%g6 + TI_FPSAVED] | ||
41 | rd %gsr, %g3 | ||
42 | clr %g1 | ||
43 | ba,pt %xcc, 3f | ||
44 | |||
45 | stx %g3, [%g6 + TI_GSR] | ||
46 | 2: add %g6, %g1, %g3 | ||
47 | cmp %o5, FPRS_DU | ||
48 | be,pn %icc, 6f | ||
49 | sll %g1, 3, %g1 | ||
50 | stb %o5, [%g3 + TI_FPSAVED] | ||
51 | rd %gsr, %g2 | ||
52 | add %g6, %g1, %g3 | ||
53 | stx %g2, [%g3 + TI_GSR] | ||
54 | |||
55 | add %g6, %g1, %g2 | ||
56 | stx %fsr, [%g2 + TI_XFSR] | ||
57 | sll %g1, 5, %g1 | ||
58 | 3: andcc %o5, FPRS_DL|FPRS_DU, %g0 | ||
59 | be,pn %icc, 9b | ||
60 | add %g6, TI_FPREGS, %g2 | ||
61 | andcc %o5, FPRS_DL, %g0 | ||
62 | membar #StoreStore | #LoadStore | ||
63 | |||
64 | be,pn %icc, 4f | ||
65 | add %g6, TI_FPREGS+0x40, %g3 | ||
66 | stda %f0, [%g2 + %g1] ASI_BLK_P | ||
67 | stda %f16, [%g3 + %g1] ASI_BLK_P | ||
68 | andcc %o5, FPRS_DU, %g0 | ||
69 | be,pn %icc, 5f | ||
70 | 4: add %g1, 128, %g1 | ||
71 | stda %f32, [%g2 + %g1] ASI_BLK_P | ||
72 | |||
73 | stda %f48, [%g3 + %g1] ASI_BLK_P | ||
74 | 5: membar #Sync | ||
75 | jmpl %g7 + %g0, %g0 | ||
76 | nop | ||
77 | |||
78 | 6: ldub [%g3 + TI_FPSAVED], %o5 | ||
79 | or %o5, FPRS_DU, %o5 | ||
80 | add %g6, TI_FPREGS+0x80, %g2 | ||
81 | stb %o5, [%g3 + TI_FPSAVED] | ||
82 | |||
83 | sll %g1, 5, %g1 | ||
84 | add %g6, TI_FPREGS+0xc0, %g3 | ||
85 | wr %g0, FPRS_FEF, %fprs | ||
86 | membar #StoreStore | #LoadStore | ||
87 | stda %f32, [%g2 + %g1] ASI_BLK_P | ||
88 | stda %f48, [%g3 + %g1] ASI_BLK_P | ||
89 | membar #Sync | ||
90 | jmpl %g7 + %g0, %g0 | ||
91 | |||
92 | nop | ||
93 | |||
94 | .align 32 | ||
95 | VISenterhalf: | ||
96 | ldub [%g6 + TI_FPDEPTH], %g1 | ||
97 | brnz,a,pn %g1, 1f | ||
98 | cmp %g1, 1 | ||
99 | stb %g0, [%g6 + TI_FPSAVED] | ||
100 | stx %fsr, [%g6 + TI_XFSR] | ||
101 | clr %o5 | ||
102 | jmpl %g7 + %g0, %g0 | ||
103 | wr %g0, FPRS_FEF, %fprs | ||
104 | |||
105 | 1: bne,pn %icc, 2f | ||
106 | srl %g1, 1, %g1 | ||
107 | ba,pt %xcc, vis1 | ||
108 | sub %g7, 8, %g7 | ||
109 | 2: addcc %g6, %g1, %g3 | ||
110 | sll %g1, 3, %g1 | ||
111 | andn %o5, FPRS_DU, %g2 | ||
112 | stb %g2, [%g3 + TI_FPSAVED] | ||
113 | |||
114 | rd %gsr, %g2 | ||
115 | add %g6, %g1, %g3 | ||
116 | stx %g2, [%g3 + TI_GSR] | ||
117 | add %g6, %g1, %g2 | ||
118 | stx %fsr, [%g2 + TI_XFSR] | ||
119 | sll %g1, 5, %g1 | ||
120 | 3: andcc %o5, FPRS_DL, %g0 | ||
121 | be,pn %icc, 4f | ||
122 | add %g6, TI_FPREGS, %g2 | ||
123 | |||
124 | membar #StoreStore | #LoadStore | ||
125 | add %g6, TI_FPREGS+0x40, %g3 | ||
126 | stda %f0, [%g2 + %g1] ASI_BLK_P | ||
127 | stda %f16, [%g3 + %g1] ASI_BLK_P | ||
128 | membar #Sync | ||
129 | 4: and %o5, FPRS_DU, %o5 | ||
130 | jmpl %g7 + %g0, %g0 | ||
131 | wr %o5, FPRS_FEF, %fprs | ||
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S new file mode 100644 index 000000000000..e528b8d1a3e6 --- /dev/null +++ b/arch/sparc64/lib/atomic.S | |||
@@ -0,0 +1,139 @@ | |||
1 | /* $Id: atomic.S,v 1.4 2001/11/18 00:12:56 davem Exp $ | ||
2 | * atomic.S: These things are too big to do inline. | ||
3 | * | ||
4 | * Copyright (C) 1999 David S. Miller (davem@redhat.com) | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | #include <asm/asi.h> | ||
9 | |||
10 | /* On SMP we need to use memory barriers to ensure | ||
11 | * correct memory operation ordering, nop these out | ||
12 | * for uniprocessor. | ||
13 | */ | ||
14 | #ifdef CONFIG_SMP | ||
15 | #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad | ||
16 | #define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore | ||
17 | #else | ||
18 | #define ATOMIC_PRE_BARRIER nop | ||
19 | #define ATOMIC_POST_BARRIER nop | ||
20 | #endif | ||
21 | |||
22 | .text | ||
23 | |||
24 | /* Two versions of the atomic routines, one that | ||
25 | * does not return a value and does not perform | ||
26 | * memory barriers, and a second which returns | ||
27 | * a value and does the barriers. | ||
28 | */ | ||
29 | .globl atomic_add | ||
30 | .type atomic_add,#function | ||
31 | atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ | ||
32 | 1: lduw [%o1], %g1 | ||
33 | add %g1, %o0, %g7 | ||
34 | cas [%o1], %g1, %g7 | ||
35 | cmp %g1, %g7 | ||
36 | bne,pn %icc, 1b | ||
37 | nop | ||
38 | retl | ||
39 | nop | ||
40 | .size atomic_add, .-atomic_add | ||
41 | |||
42 | .globl atomic_sub | ||
43 | .type atomic_sub,#function | ||
44 | atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ | ||
45 | 1: lduw [%o1], %g1 | ||
46 | sub %g1, %o0, %g7 | ||
47 | cas [%o1], %g1, %g7 | ||
48 | cmp %g1, %g7 | ||
49 | bne,pn %icc, 1b | ||
50 | nop | ||
51 | retl | ||
52 | nop | ||
53 | .size atomic_sub, .-atomic_sub | ||
54 | |||
55 | .globl atomic_add_ret | ||
56 | .type atomic_add_ret,#function | ||
57 | atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | ||
58 | ATOMIC_PRE_BARRIER | ||
59 | 1: lduw [%o1], %g1 | ||
60 | add %g1, %o0, %g7 | ||
61 | cas [%o1], %g1, %g7 | ||
62 | cmp %g1, %g7 | ||
63 | bne,pn %icc, 1b | ||
64 | add %g7, %o0, %g7 | ||
65 | ATOMIC_POST_BARRIER | ||
66 | retl | ||
67 | sra %g7, 0, %o0 | ||
68 | .size atomic_add_ret, .-atomic_add_ret | ||
69 | |||
70 | .globl atomic_sub_ret | ||
71 | .type atomic_sub_ret,#function | ||
72 | atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | ||
73 | ATOMIC_PRE_BARRIER | ||
74 | 1: lduw [%o1], %g1 | ||
75 | sub %g1, %o0, %g7 | ||
76 | cas [%o1], %g1, %g7 | ||
77 | cmp %g1, %g7 | ||
78 | bne,pn %icc, 1b | ||
79 | sub %g7, %o0, %g7 | ||
80 | ATOMIC_POST_BARRIER | ||
81 | retl | ||
82 | sra %g7, 0, %o0 | ||
83 | .size atomic_sub_ret, .-atomic_sub_ret | ||
84 | |||
85 | .globl atomic64_add | ||
86 | .type atomic64_add,#function | ||
87 | atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ | ||
88 | 1: ldx [%o1], %g1 | ||
89 | add %g1, %o0, %g7 | ||
90 | casx [%o1], %g1, %g7 | ||
91 | cmp %g1, %g7 | ||
92 | bne,pn %xcc, 1b | ||
93 | nop | ||
94 | retl | ||
95 | nop | ||
96 | .size atomic64_add, .-atomic64_add | ||
97 | |||
98 | .globl atomic64_sub | ||
99 | .type atomic64_sub,#function | ||
100 | atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ | ||
101 | 1: ldx [%o1], %g1 | ||
102 | sub %g1, %o0, %g7 | ||
103 | casx [%o1], %g1, %g7 | ||
104 | cmp %g1, %g7 | ||
105 | bne,pn %xcc, 1b | ||
106 | nop | ||
107 | retl | ||
108 | nop | ||
109 | .size atomic64_sub, .-atomic64_sub | ||
110 | |||
111 | .globl atomic64_add_ret | ||
112 | .type atomic64_add_ret,#function | ||
113 | atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | ||
114 | ATOMIC_PRE_BARRIER | ||
115 | 1: ldx [%o1], %g1 | ||
116 | add %g1, %o0, %g7 | ||
117 | casx [%o1], %g1, %g7 | ||
118 | cmp %g1, %g7 | ||
119 | bne,pn %xcc, 1b | ||
120 | add %g7, %o0, %g7 | ||
121 | ATOMIC_POST_BARRIER | ||
122 | retl | ||
123 | mov %g7, %o0 | ||
124 | .size atomic64_add_ret, .-atomic64_add_ret | ||
125 | |||
126 | .globl atomic64_sub_ret | ||
127 | .type atomic64_sub_ret,#function | ||
128 | atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | ||
129 | ATOMIC_PRE_BARRIER | ||
130 | 1: ldx [%o1], %g1 | ||
131 | sub %g1, %o0, %g7 | ||
132 | casx [%o1], %g1, %g7 | ||
133 | cmp %g1, %g7 | ||
134 | bne,pn %xcc, 1b | ||
135 | sub %g7, %o0, %g7 | ||
136 | ATOMIC_POST_BARRIER | ||
137 | retl | ||
138 | mov %g7, %o0 | ||
139 | .size atomic64_sub_ret, .-atomic64_sub_ret | ||
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S new file mode 100644 index 000000000000..886dcd2b376a --- /dev/null +++ b/arch/sparc64/lib/bitops.S | |||
@@ -0,0 +1,145 @@ | |||
1 | /* $Id: bitops.S,v 1.3 2001/11/18 00:12:56 davem Exp $ | ||
2 | * bitops.S: Sparc64 atomic bit operations. | ||
3 | * | ||
4 | * Copyright (C) 2000 David S. Miller (davem@redhat.com) | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | #include <asm/asi.h> | ||
9 | |||
10 | /* On SMP we need to use memory barriers to ensure | ||
11 | * correct memory operation ordering, nop these out | ||
12 | * for uniprocessor. | ||
13 | */ | ||
14 | #ifdef CONFIG_SMP | ||
15 | #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad | ||
16 | #define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore | ||
17 | #else | ||
18 | #define BITOP_PRE_BARRIER nop | ||
19 | #define BITOP_POST_BARRIER nop | ||
20 | #endif | ||
21 | |||
22 | .text | ||
23 | |||
24 | .globl test_and_set_bit | ||
25 | .type test_and_set_bit,#function | ||
26 | test_and_set_bit: /* %o0=nr, %o1=addr */ | ||
27 | BITOP_PRE_BARRIER | ||
28 | srlx %o0, 6, %g1 | ||
29 | mov 1, %o2 | ||
30 | sllx %g1, 3, %g3 | ||
31 | and %o0, 63, %g2 | ||
32 | sllx %o2, %g2, %o2 | ||
33 | add %o1, %g3, %o1 | ||
34 | 1: ldx [%o1], %g7 | ||
35 | or %g7, %o2, %g1 | ||
36 | casx [%o1], %g7, %g1 | ||
37 | cmp %g7, %g1 | ||
38 | bne,pn %xcc, 1b | ||
39 | and %g7, %o2, %g2 | ||
40 | BITOP_POST_BARRIER | ||
41 | clr %o0 | ||
42 | retl | ||
43 | movrne %g2, 1, %o0 | ||
44 | .size test_and_set_bit, .-test_and_set_bit | ||
45 | |||
46 | .globl test_and_clear_bit | ||
47 | .type test_and_clear_bit,#function | ||
48 | test_and_clear_bit: /* %o0=nr, %o1=addr */ | ||
49 | BITOP_PRE_BARRIER | ||
50 | srlx %o0, 6, %g1 | ||
51 | mov 1, %o2 | ||
52 | sllx %g1, 3, %g3 | ||
53 | and %o0, 63, %g2 | ||
54 | sllx %o2, %g2, %o2 | ||
55 | add %o1, %g3, %o1 | ||
56 | 1: ldx [%o1], %g7 | ||
57 | andn %g7, %o2, %g1 | ||
58 | casx [%o1], %g7, %g1 | ||
59 | cmp %g7, %g1 | ||
60 | bne,pn %xcc, 1b | ||
61 | and %g7, %o2, %g2 | ||
62 | BITOP_POST_BARRIER | ||
63 | clr %o0 | ||
64 | retl | ||
65 | movrne %g2, 1, %o0 | ||
66 | .size test_and_clear_bit, .-test_and_clear_bit | ||
67 | |||
68 | .globl test_and_change_bit | ||
69 | .type test_and_change_bit,#function | ||
70 | test_and_change_bit: /* %o0=nr, %o1=addr */ | ||
71 | BITOP_PRE_BARRIER | ||
72 | srlx %o0, 6, %g1 | ||
73 | mov 1, %o2 | ||
74 | sllx %g1, 3, %g3 | ||
75 | and %o0, 63, %g2 | ||
76 | sllx %o2, %g2, %o2 | ||
77 | add %o1, %g3, %o1 | ||
78 | 1: ldx [%o1], %g7 | ||
79 | xor %g7, %o2, %g1 | ||
80 | casx [%o1], %g7, %g1 | ||
81 | cmp %g7, %g1 | ||
82 | bne,pn %xcc, 1b | ||
83 | and %g7, %o2, %g2 | ||
84 | BITOP_POST_BARRIER | ||
85 | clr %o0 | ||
86 | retl | ||
87 | movrne %g2, 1, %o0 | ||
88 | .size test_and_change_bit, .-test_and_change_bit | ||
89 | |||
90 | .globl set_bit | ||
91 | .type set_bit,#function | ||
92 | set_bit: /* %o0=nr, %o1=addr */ | ||
93 | srlx %o0, 6, %g1 | ||
94 | mov 1, %o2 | ||
95 | sllx %g1, 3, %g3 | ||
96 | and %o0, 63, %g2 | ||
97 | sllx %o2, %g2, %o2 | ||
98 | add %o1, %g3, %o1 | ||
99 | 1: ldx [%o1], %g7 | ||
100 | or %g7, %o2, %g1 | ||
101 | casx [%o1], %g7, %g1 | ||
102 | cmp %g7, %g1 | ||
103 | bne,pn %xcc, 1b | ||
104 | nop | ||
105 | retl | ||
106 | nop | ||
107 | .size set_bit, .-set_bit | ||
108 | |||
109 | .globl clear_bit | ||
110 | .type clear_bit,#function | ||
111 | clear_bit: /* %o0=nr, %o1=addr */ | ||
112 | srlx %o0, 6, %g1 | ||
113 | mov 1, %o2 | ||
114 | sllx %g1, 3, %g3 | ||
115 | and %o0, 63, %g2 | ||
116 | sllx %o2, %g2, %o2 | ||
117 | add %o1, %g3, %o1 | ||
118 | 1: ldx [%o1], %g7 | ||
119 | andn %g7, %o2, %g1 | ||
120 | casx [%o1], %g7, %g1 | ||
121 | cmp %g7, %g1 | ||
122 | bne,pn %xcc, 1b | ||
123 | nop | ||
124 | retl | ||
125 | nop | ||
126 | .size clear_bit, .-clear_bit | ||
127 | |||
128 | .globl change_bit | ||
129 | .type change_bit,#function | ||
130 | change_bit: /* %o0=nr, %o1=addr */ | ||
131 | srlx %o0, 6, %g1 | ||
132 | mov 1, %o2 | ||
133 | sllx %g1, 3, %g3 | ||
134 | and %o0, 63, %g2 | ||
135 | sllx %o2, %g2, %o2 | ||
136 | add %o1, %g3, %o1 | ||
137 | 1: ldx [%o1], %g7 | ||
138 | xor %g7, %o2, %g1 | ||
139 | casx [%o1], %g7, %g1 | ||
140 | cmp %g7, %g1 | ||
141 | bne,pn %xcc, 1b | ||
142 | nop | ||
143 | retl | ||
144 | nop | ||
145 | .size change_bit, .-change_bit | ||
diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S new file mode 100644 index 000000000000..21a933ffb7c2 --- /dev/null +++ b/arch/sparc64/lib/bzero.S | |||
@@ -0,0 +1,158 @@ | |||
1 | /* bzero.S: Simple prefetching memset, bzero, and clear_user | ||
2 | * implementations. | ||
3 | * | ||
4 | * Copyright (C) 2005 David S. Miller <davem@davemloft.net> | ||
5 | */ | ||
6 | |||
7 | .text | ||
8 | |||
9 | .globl __memset | ||
10 | .type __memset, #function | ||
11 | __memset: /* %o0=buf, %o1=pat, %o2=len */ | ||
12 | |||
13 | .globl memset | ||
14 | .type memset, #function | ||
15 | memset: /* %o0=buf, %o1=pat, %o2=len */ | ||
16 | and %o1, 0xff, %o3 | ||
17 | mov %o2, %o1 | ||
18 | sllx %o3, 8, %g1 | ||
19 | or %g1, %o3, %o2 | ||
20 | sllx %o2, 16, %g1 | ||
21 | or %g1, %o2, %o2 | ||
22 | sllx %o2, 32, %g1 | ||
23 | ba,pt %xcc, 1f | ||
24 | or %g1, %o2, %o2 | ||
25 | |||
26 | .globl __bzero | ||
27 | .type __bzero, #function | ||
28 | __bzero: /* %o0=buf, %o1=len */ | ||
29 | clr %o2 | ||
30 | 1: mov %o0, %o3 | ||
31 | brz,pn %o1, __bzero_done | ||
32 | cmp %o1, 16 | ||
33 | bl,pn %icc, __bzero_tiny | ||
34 | prefetch [%o0 + 0x000], #n_writes | ||
35 | andcc %o0, 0x3, %g0 | ||
36 | be,pt %icc, 2f | ||
37 | 1: stb %o2, [%o0 + 0x00] | ||
38 | add %o0, 1, %o0 | ||
39 | andcc %o0, 0x3, %g0 | ||
40 | bne,pn %icc, 1b | ||
41 | sub %o1, 1, %o1 | ||
42 | 2: andcc %o0, 0x7, %g0 | ||
43 | be,pt %icc, 3f | ||
44 | stw %o2, [%o0 + 0x00] | ||
45 | sub %o1, 4, %o1 | ||
46 | add %o0, 4, %o0 | ||
47 | 3: and %o1, 0x38, %g1 | ||
48 | cmp %o1, 0x40 | ||
49 | andn %o1, 0x3f, %o4 | ||
50 | bl,pn %icc, 5f | ||
51 | and %o1, 0x7, %o1 | ||
52 | prefetch [%o0 + 0x040], #n_writes | ||
53 | prefetch [%o0 + 0x080], #n_writes | ||
54 | prefetch [%o0 + 0x0c0], #n_writes | ||
55 | prefetch [%o0 + 0x100], #n_writes | ||
56 | prefetch [%o0 + 0x140], #n_writes | ||
57 | 4: prefetch [%o0 + 0x180], #n_writes | ||
58 | stx %o2, [%o0 + 0x00] | ||
59 | stx %o2, [%o0 + 0x08] | ||
60 | stx %o2, [%o0 + 0x10] | ||
61 | stx %o2, [%o0 + 0x18] | ||
62 | stx %o2, [%o0 + 0x20] | ||
63 | stx %o2, [%o0 + 0x28] | ||
64 | stx %o2, [%o0 + 0x30] | ||
65 | stx %o2, [%o0 + 0x38] | ||
66 | subcc %o4, 0x40, %o4 | ||
67 | bne,pt %icc, 4b | ||
68 | add %o0, 0x40, %o0 | ||
69 | brz,pn %g1, 6f | ||
70 | nop | ||
71 | 5: stx %o2, [%o0 + 0x00] | ||
72 | subcc %g1, 8, %g1 | ||
73 | bne,pt %icc, 5b | ||
74 | add %o0, 0x8, %o0 | ||
75 | 6: brz,pt %o1, __bzero_done | ||
76 | nop | ||
77 | __bzero_tiny: | ||
78 | 1: stb %o2, [%o0 + 0x00] | ||
79 | subcc %o1, 1, %o1 | ||
80 | bne,pt %icc, 1b | ||
81 | add %o0, 1, %o0 | ||
82 | __bzero_done: | ||
83 | retl | ||
84 | mov %o3, %o0 | ||
85 | .size __bzero, .-__bzero | ||
86 | .size __memset, .-__memset | ||
87 | .size memset, .-memset | ||
88 | |||
89 | #define EX_ST(x,y) \ | ||
90 | 98: x,y; \ | ||
91 | .section .fixup; \ | ||
92 | .align 4; \ | ||
93 | 99: retl; \ | ||
94 | mov %o1, %o0; \ | ||
95 | .section __ex_table; \ | ||
96 | .align 4; \ | ||
97 | .word 98b, 99b; \ | ||
98 | .text; \ | ||
99 | .align 4; | ||
100 | |||
101 | .globl __bzero_noasi | ||
102 | .type __bzero_noasi, #function | ||
103 | __bzero_noasi: /* %o0=buf, %o1=len */ | ||
104 | brz,pn %o1, __bzero_noasi_done | ||
105 | cmp %o1, 16 | ||
106 | bl,pn %icc, __bzero_noasi_tiny | ||
107 | EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes) | ||
108 | andcc %o0, 0x3, %g0 | ||
109 | be,pt %icc, 2f | ||
110 | 1: EX_ST(stba %g0, [%o0 + 0x00] %asi) | ||
111 | add %o0, 1, %o0 | ||
112 | andcc %o0, 0x3, %g0 | ||
113 | bne,pn %icc, 1b | ||
114 | sub %o1, 1, %o1 | ||
115 | 2: andcc %o0, 0x7, %g0 | ||
116 | be,pt %icc, 3f | ||
117 | EX_ST(stwa %g0, [%o0 + 0x00] %asi) | ||
118 | sub %o1, 4, %o1 | ||
119 | add %o0, 4, %o0 | ||
120 | 3: and %o1, 0x38, %g1 | ||
121 | cmp %o1, 0x40 | ||
122 | andn %o1, 0x3f, %o4 | ||
123 | bl,pn %icc, 5f | ||
124 | and %o1, 0x7, %o1 | ||
125 | EX_ST(prefetcha [%o0 + 0x040] %asi, #n_writes) | ||
126 | EX_ST(prefetcha [%o0 + 0x080] %asi, #n_writes) | ||
127 | EX_ST(prefetcha [%o0 + 0x0c0] %asi, #n_writes) | ||
128 | EX_ST(prefetcha [%o0 + 0x100] %asi, #n_writes) | ||
129 | EX_ST(prefetcha [%o0 + 0x140] %asi, #n_writes) | ||
130 | 4: EX_ST(prefetcha [%o0 + 0x180] %asi, #n_writes) | ||
131 | EX_ST(stxa %g0, [%o0 + 0x00] %asi) | ||
132 | EX_ST(stxa %g0, [%o0 + 0x08] %asi) | ||
133 | EX_ST(stxa %g0, [%o0 + 0x10] %asi) | ||
134 | EX_ST(stxa %g0, [%o0 + 0x18] %asi) | ||
135 | EX_ST(stxa %g0, [%o0 + 0x20] %asi) | ||
136 | EX_ST(stxa %g0, [%o0 + 0x28] %asi) | ||
137 | EX_ST(stxa %g0, [%o0 + 0x30] %asi) | ||
138 | EX_ST(stxa %g0, [%o0 + 0x38] %asi) | ||
139 | subcc %o4, 0x40, %o4 | ||
140 | bne,pt %icc, 4b | ||
141 | add %o0, 0x40, %o0 | ||
142 | brz,pn %g1, 6f | ||
143 | nop | ||
144 | 5: EX_ST(stxa %g0, [%o0 + 0x00] %asi) | ||
145 | subcc %g1, 8, %g1 | ||
146 | bne,pt %icc, 5b | ||
147 | add %o0, 0x8, %o0 | ||
148 | 6: brz,pt %o1, __bzero_noasi_done | ||
149 | nop | ||
150 | __bzero_noasi_tiny: | ||
151 | 1: EX_ST(stba %g0, [%o0 + 0x00] %asi) | ||
152 | subcc %o1, 1, %o1 | ||
153 | bne,pt %icc, 1b | ||
154 | add %o0, 1, %o0 | ||
155 | __bzero_noasi_done: | ||
156 | retl | ||
157 | clr %o0 | ||
158 | .size __bzero_noasi, .-__bzero_noasi | ||
diff --git a/arch/sparc64/lib/checksum.S b/arch/sparc64/lib/checksum.S new file mode 100644 index 000000000000..ba9cd3ccc2b2 --- /dev/null +++ b/arch/sparc64/lib/checksum.S | |||
@@ -0,0 +1,172 @@ | |||
1 | /* checksum.S: Sparc V9 optimized checksum code. | ||
2 | * | ||
3 | * Copyright(C) 1995 Linus Torvalds | ||
4 | * Copyright(C) 1995 Miguel de Icaza | ||
5 | * Copyright(C) 1996, 2000 David S. Miller | ||
6 | * Copyright(C) 1997 Jakub Jelinek | ||
7 | * | ||
8 | * derived from: | ||
9 | * Linux/Alpha checksum c-code | ||
10 | * Linux/ix86 inline checksum assembly | ||
11 | * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) | ||
12 | * David Mosberger-Tang for optimized reference c-code | ||
13 | * BSD4.4 portable checksum routine | ||
14 | */ | ||
15 | |||
16 | .text | ||
17 | |||
18 | csum_partial_fix_alignment: | ||
19 | /* We checked for zero length already, so there must be | ||
20 | * at least one byte. | ||
21 | */ | ||
22 | be,pt %icc, 1f | ||
23 | nop | ||
24 | ldub [%o0 + 0x00], %o4 | ||
25 | add %o0, 1, %o0 | ||
26 | sub %o1, 1, %o1 | ||
27 | 1: andcc %o0, 0x2, %g0 | ||
28 | be,pn %icc, csum_partial_post_align | ||
29 | cmp %o1, 2 | ||
30 | blu,pn %icc, csum_partial_end_cruft | ||
31 | nop | ||
32 | lduh [%o0 + 0x00], %o5 | ||
33 | add %o0, 2, %o0 | ||
34 | sub %o1, 2, %o1 | ||
35 | ba,pt %xcc, csum_partial_post_align | ||
36 | add %o5, %o4, %o4 | ||
37 | |||
38 | .align 32 | ||
39 | .globl csum_partial | ||
40 | csum_partial: /* %o0=buff, %o1=len, %o2=sum */ | ||
41 | prefetch [%o0 + 0x000], #n_reads | ||
42 | clr %o4 | ||
43 | prefetch [%o0 + 0x040], #n_reads | ||
44 | brz,pn %o1, csum_partial_finish | ||
45 | andcc %o0, 0x3, %g0 | ||
46 | |||
47 | /* We "remember" whether the lowest bit in the address | ||
48 | * was set in %g7. Because if it is, we have to swap | ||
49 | * upper and lower 8 bit fields of the sum we calculate. | ||
50 | */ | ||
51 | bne,pn %icc, csum_partial_fix_alignment | ||
52 | andcc %o0, 0x1, %g7 | ||
53 | |||
54 | csum_partial_post_align: | ||
55 | prefetch [%o0 + 0x080], #n_reads | ||
56 | andncc %o1, 0x3f, %o3 | ||
57 | |||
58 | prefetch [%o0 + 0x0c0], #n_reads | ||
59 | sub %o1, %o3, %o1 | ||
60 | brz,pn %o3, 2f | ||
61 | prefetch [%o0 + 0x100], #n_reads | ||
62 | |||
63 | /* So that we don't need to use the non-pairing | ||
64 | * add-with-carry instructions we accumulate 32-bit | ||
65 | * values into a 64-bit register. At the end of the | ||
66 | * loop we fold it down to 32-bits and so on. | ||
67 | */ | ||
68 | prefetch [%o0 + 0x140], #n_reads | ||
69 | 1: lduw [%o0 + 0x00], %o5 | ||
70 | lduw [%o0 + 0x04], %g1 | ||
71 | lduw [%o0 + 0x08], %g2 | ||
72 | add %o4, %o5, %o4 | ||
73 | lduw [%o0 + 0x0c], %g3 | ||
74 | add %o4, %g1, %o4 | ||
75 | lduw [%o0 + 0x10], %o5 | ||
76 | add %o4, %g2, %o4 | ||
77 | lduw [%o0 + 0x14], %g1 | ||
78 | add %o4, %g3, %o4 | ||
79 | lduw [%o0 + 0x18], %g2 | ||
80 | add %o4, %o5, %o4 | ||
81 | lduw [%o0 + 0x1c], %g3 | ||
82 | add %o4, %g1, %o4 | ||
83 | lduw [%o0 + 0x20], %o5 | ||
84 | add %o4, %g2, %o4 | ||
85 | lduw [%o0 + 0x24], %g1 | ||
86 | add %o4, %g3, %o4 | ||
87 | lduw [%o0 + 0x28], %g2 | ||
88 | add %o4, %o5, %o4 | ||
89 | lduw [%o0 + 0x2c], %g3 | ||
90 | add %o4, %g1, %o4 | ||
91 | lduw [%o0 + 0x30], %o5 | ||
92 | add %o4, %g2, %o4 | ||
93 | lduw [%o0 + 0x34], %g1 | ||
94 | add %o4, %g3, %o4 | ||
95 | lduw [%o0 + 0x38], %g2 | ||
96 | add %o4, %o5, %o4 | ||
97 | lduw [%o0 + 0x3c], %g3 | ||
98 | add %o4, %g1, %o4 | ||
99 | prefetch [%o0 + 0x180], #n_reads | ||
100 | add %o4, %g2, %o4 | ||
101 | subcc %o3, 0x40, %o3 | ||
102 | add %o0, 0x40, %o0 | ||
103 | bne,pt %icc, 1b | ||
104 | add %o4, %g3, %o4 | ||
105 | |||
106 | 2: and %o1, 0x3c, %o3 | ||
107 | brz,pn %o3, 2f | ||
108 | sub %o1, %o3, %o1 | ||
109 | 1: lduw [%o0 + 0x00], %o5 | ||
110 | subcc %o3, 0x4, %o3 | ||
111 | add %o0, 0x4, %o0 | ||
112 | bne,pt %icc, 1b | ||
113 | add %o4, %o5, %o4 | ||
114 | |||
115 | 2: | ||
116 | /* fold 64-->32 */ | ||
117 | srlx %o4, 32, %o5 | ||
118 | srl %o4, 0, %o4 | ||
119 | add %o4, %o5, %o4 | ||
120 | srlx %o4, 32, %o5 | ||
121 | srl %o4, 0, %o4 | ||
122 | add %o4, %o5, %o4 | ||
123 | |||
124 | /* fold 32-->16 */ | ||
125 | sethi %hi(0xffff0000), %g1 | ||
126 | srl %o4, 16, %o5 | ||
127 | andn %o4, %g1, %g2 | ||
128 | add %o5, %g2, %o4 | ||
129 | srl %o4, 16, %o5 | ||
130 | andn %o4, %g1, %g2 | ||
131 | add %o5, %g2, %o4 | ||
132 | |||
133 | csum_partial_end_cruft: | ||
134 | /* %o4 has the 16-bit sum we have calculated so-far. */ | ||
135 | cmp %o1, 2 | ||
136 | blu,pt %icc, 1f | ||
137 | nop | ||
138 | lduh [%o0 + 0x00], %o5 | ||
139 | sub %o1, 2, %o1 | ||
140 | add %o0, 2, %o0 | ||
141 | add %o4, %o5, %o4 | ||
142 | 1: brz,pt %o1, 1f | ||
143 | nop | ||
144 | ldub [%o0 + 0x00], %o5 | ||
145 | sub %o1, 1, %o1 | ||
146 | add %o0, 1, %o0 | ||
147 | sllx %o5, 8, %o5 | ||
148 | add %o4, %o5, %o4 | ||
149 | 1: | ||
150 | /* fold 32-->16 */ | ||
151 | sethi %hi(0xffff0000), %g1 | ||
152 | srl %o4, 16, %o5 | ||
153 | andn %o4, %g1, %g2 | ||
154 | add %o5, %g2, %o4 | ||
155 | srl %o4, 16, %o5 | ||
156 | andn %o4, %g1, %g2 | ||
157 | add %o5, %g2, %o4 | ||
158 | |||
159 | 1: brz,pt %g7, 1f | ||
160 | nop | ||
161 | |||
162 | /* We started with an odd byte, byte-swap the result. */ | ||
163 | srl %o4, 8, %o5 | ||
164 | and %o4, 0xff, %g1 | ||
165 | sll %g1, 8, %g1 | ||
166 | or %o5, %g1, %o4 | ||
167 | |||
168 | 1: add %o2, %o4, %o2 | ||
169 | |||
170 | csum_partial_finish: | ||
171 | retl | ||
172 | mov %o2, %o0 | ||
diff --git a/arch/sparc64/lib/clear_page.S b/arch/sparc64/lib/clear_page.S new file mode 100644 index 000000000000..b59884ef051d --- /dev/null +++ b/arch/sparc64/lib/clear_page.S | |||
@@ -0,0 +1,105 @@ | |||
1 | /* clear_page.S: UltraSparc optimized clear page. | ||
2 | * | ||
3 | * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) | ||
5 | */ | ||
6 | |||
7 | #include <asm/visasm.h> | ||
8 | #include <asm/thread_info.h> | ||
9 | #include <asm/page.h> | ||
10 | #include <asm/pgtable.h> | ||
11 | #include <asm/spitfire.h> | ||
12 | |||
13 | /* What we used to do was lock a TLB entry into a specific | ||
14 | * TLB slot, clear the page with interrupts disabled, then | ||
15 | * restore the original TLB entry. This was great for | ||
16 | * disturbing the TLB as little as possible, but it meant | ||
17 | * we had to keep interrupts disabled for a long time. | ||
18 | * | ||
19 | * Now, we simply use the normal TLB loading mechanism, | ||
20 | * and this makes the cpu choose a slot all by itself. | ||
21 | * Then we do a normal TLB flush on exit. We need only | ||
22 | * disable preemption during the clear. | ||
23 | */ | ||
24 | |||
25 | #define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) | ||
26 | #define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) | ||
27 | |||
28 | .text | ||
29 | |||
30 | .globl _clear_page | ||
31 | _clear_page: /* %o0=dest */ | ||
32 | ba,pt %xcc, clear_page_common | ||
33 | clr %o4 | ||
34 | |||
35 | /* This thing is pretty important, it shows up | ||
36 | * on the profiles via do_anonymous_page(). | ||
37 | */ | ||
38 | .align 32 | ||
39 | .globl clear_user_page | ||
40 | clear_user_page: /* %o0=dest, %o1=vaddr */ | ||
41 | lduw [%g6 + TI_PRE_COUNT], %o2 | ||
42 | sethi %uhi(PAGE_OFFSET), %g2 | ||
43 | sethi %hi(PAGE_SIZE), %o4 | ||
44 | |||
45 | sllx %g2, 32, %g2 | ||
46 | sethi %uhi(TTE_BITS_TOP), %g3 | ||
47 | |||
48 | sllx %g3, 32, %g3 | ||
49 | sub %o0, %g2, %g1 ! paddr | ||
50 | |||
51 | or %g3, TTE_BITS_BOTTOM, %g3 | ||
52 | and %o1, %o4, %o0 ! vaddr D-cache alias bit | ||
53 | |||
54 | or %g1, %g3, %g1 ! TTE data | ||
55 | sethi %hi(TLBTEMP_BASE), %o3 | ||
56 | |||
57 | add %o2, 1, %o4 | ||
58 | add %o0, %o3, %o0 ! TTE vaddr | ||
59 | |||
60 | /* Disable preemption. */ | ||
61 | mov TLB_TAG_ACCESS, %g3 | ||
62 | stw %o4, [%g6 + TI_PRE_COUNT] | ||
63 | |||
64 | /* Load TLB entry. */ | ||
65 | rdpr %pstate, %o4 | ||
66 | wrpr %o4, PSTATE_IE, %pstate | ||
67 | stxa %o0, [%g3] ASI_DMMU | ||
68 | stxa %g1, [%g0] ASI_DTLB_DATA_IN | ||
69 | flush %g6 | ||
70 | wrpr %o4, 0x0, %pstate | ||
71 | |||
72 | mov 1, %o4 | ||
73 | |||
74 | clear_page_common: | ||
75 | VISEntryHalf | ||
76 | membar #StoreLoad | #StoreStore | #LoadStore | ||
77 | fzero %f0 | ||
78 | sethi %hi(PAGE_SIZE/64), %o1 | ||
79 | mov %o0, %g1 ! remember vaddr for tlbflush | ||
80 | fzero %f2 | ||
81 | or %o1, %lo(PAGE_SIZE/64), %o1 | ||
82 | faddd %f0, %f2, %f4 | ||
83 | fmuld %f0, %f2, %f6 | ||
84 | faddd %f0, %f2, %f8 | ||
85 | fmuld %f0, %f2, %f10 | ||
86 | |||
87 | faddd %f0, %f2, %f12 | ||
88 | fmuld %f0, %f2, %f14 | ||
89 | 1: stda %f0, [%o0 + %g0] ASI_BLK_P | ||
90 | subcc %o1, 1, %o1 | ||
91 | bne,pt %icc, 1b | ||
92 | add %o0, 0x40, %o0 | ||
93 | membar #Sync | ||
94 | VISExitHalf | ||
95 | |||
96 | brz,pn %o4, out | ||
97 | nop | ||
98 | |||
99 | stxa %g0, [%g1] ASI_DMMU_DEMAP | ||
100 | membar #Sync | ||
101 | stw %o2, [%g6 + TI_PRE_COUNT] | ||
102 | |||
103 | out: retl | ||
104 | nop | ||
105 | |||
diff --git a/arch/sparc64/lib/copy_in_user.S b/arch/sparc64/lib/copy_in_user.S new file mode 100644 index 000000000000..816076c0bc06 --- /dev/null +++ b/arch/sparc64/lib/copy_in_user.S | |||
@@ -0,0 +1,119 @@ | |||
1 | /* copy_in_user.S: Copy from userspace to userspace. | ||
2 | * | ||
3 | * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | |||
8 | #define XCC xcc | ||
9 | |||
10 | #define EX(x,y) \ | ||
11 | 98: x,y; \ | ||
12 | .section .fixup; \ | ||
13 | .align 4; \ | ||
14 | 99: retl; \ | ||
15 | mov 1, %o0; \ | ||
16 | .section __ex_table; \ | ||
17 | .align 4; \ | ||
18 | .word 98b, 99b; \ | ||
19 | .text; \ | ||
20 | .align 4; | ||
21 | |||
22 | .register %g2,#scratch | ||
23 | .register %g3,#scratch | ||
24 | |||
25 | .text | ||
26 | .align 32 | ||
27 | |||
28 | /* Don't try to get too fancy here, just nice and | ||
29 | * simple. This is predominantly used for well aligned | ||
30 | * small copies in the compat layer. It is also used | ||
31 | * to copy register windows around during thread cloning. | ||
32 | */ | ||
33 | |||
34 | .globl ___copy_in_user | ||
35 | .type ___copy_in_user,#function | ||
36 | ___copy_in_user: /* %o0=dst, %o1=src, %o2=len */ | ||
37 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
38 | * Reading %asi to check for KERNEL_DS is comparatively | ||
39 | * cheap. | ||
40 | */ | ||
41 | rd %asi, %g1 | ||
42 | cmp %g1, ASI_AIUS | ||
43 | bne,pn %icc, memcpy_user_stub | ||
44 | nop | ||
45 | |||
46 | cmp %o2, 0 | ||
47 | be,pn %XCC, 85f | ||
48 | or %o0, %o1, %o3 | ||
49 | cmp %o2, 16 | ||
50 | bleu,a,pn %XCC, 80f | ||
51 | or %o3, %o2, %o3 | ||
52 | |||
53 | /* 16 < len <= 64 */ | ||
54 | andcc %o3, 0x7, %g0 | ||
55 | bne,pn %XCC, 90f | ||
56 | sub %o0, %o1, %o3 | ||
57 | |||
58 | andn %o2, 0x7, %o4 | ||
59 | and %o2, 0x7, %o2 | ||
60 | 1: subcc %o4, 0x8, %o4 | ||
61 | EX(ldxa [%o1] %asi, %o5) | ||
62 | EX(stxa %o5, [%o1 + %o3] ASI_AIUS) | ||
63 | bgu,pt %XCC, 1b | ||
64 | add %o1, 0x8, %o1 | ||
65 | andcc %o2, 0x4, %g0 | ||
66 | be,pt %XCC, 1f | ||
67 | nop | ||
68 | sub %o2, 0x4, %o2 | ||
69 | EX(lduwa [%o1] %asi, %o5) | ||
70 | EX(stwa %o5, [%o1 + %o3] ASI_AIUS) | ||
71 | add %o1, 0x4, %o1 | ||
72 | 1: cmp %o2, 0 | ||
73 | be,pt %XCC, 85f | ||
74 | nop | ||
75 | ba,pt %xcc, 90f | ||
76 | nop | ||
77 | |||
78 | 80: /* 0 < len <= 16 */ | ||
79 | andcc %o3, 0x3, %g0 | ||
80 | bne,pn %XCC, 90f | ||
81 | sub %o0, %o1, %o3 | ||
82 | |||
83 | 82: | ||
84 | subcc %o2, 4, %o2 | ||
85 | EX(lduwa [%o1] %asi, %g1) | ||
86 | EX(stwa %g1, [%o1 + %o3] ASI_AIUS) | ||
87 | bgu,pt %XCC, 82b | ||
88 | add %o1, 4, %o1 | ||
89 | |||
90 | 85: retl | ||
91 | clr %o0 | ||
92 | |||
93 | .align 32 | ||
94 | 90: | ||
95 | subcc %o2, 1, %o2 | ||
96 | EX(lduba [%o1] %asi, %g1) | ||
97 | EX(stba %g1, [%o1 + %o3] ASI_AIUS) | ||
98 | bgu,pt %XCC, 90b | ||
99 | add %o1, 1, %o1 | ||
100 | retl | ||
101 | clr %o0 | ||
102 | |||
103 | .size ___copy_in_user, .-___copy_in_user | ||
104 | |||
105 | /* Act like copy_{to,in}_user(), ie. return zero instead | ||
106 | * of original destination pointer. This is invoked when | ||
107 | * copy_{to,in}_user() finds that %asi is kernel space. | ||
108 | */ | ||
109 | .globl memcpy_user_stub | ||
110 | .type memcpy_user_stub,#function | ||
111 | memcpy_user_stub: | ||
112 | save %sp, -192, %sp | ||
113 | mov %i0, %o0 | ||
114 | mov %i1, %o1 | ||
115 | call memcpy | ||
116 | mov %i2, %o2 | ||
117 | ret | ||
118 | restore %g0, %g0, %o0 | ||
119 | .size memcpy_user_stub, .-memcpy_user_stub | ||
diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S new file mode 100644 index 000000000000..23ebf2c970b7 --- /dev/null +++ b/arch/sparc64/lib/copy_page.S | |||
@@ -0,0 +1,242 @@ | |||
1 | /* clear_page.S: UltraSparc optimized copy page. | ||
2 | * | ||
3 | * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com) | ||
4 | * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com) | ||
5 | */ | ||
6 | |||
7 | #include <asm/visasm.h> | ||
8 | #include <asm/thread_info.h> | ||
9 | #include <asm/page.h> | ||
10 | #include <asm/pgtable.h> | ||
11 | #include <asm/spitfire.h> | ||
12 | #include <asm/head.h> | ||
13 | |||
14 | /* What we used to do was lock a TLB entry into a specific | ||
15 | * TLB slot, clear the page with interrupts disabled, then | ||
16 | * restore the original TLB entry. This was great for | ||
17 | * disturbing the TLB as little as possible, but it meant | ||
18 | * we had to keep interrupts disabled for a long time. | ||
19 | * | ||
20 | * Now, we simply use the normal TLB loading mechanism, | ||
21 | * and this makes the cpu choose a slot all by itself. | ||
22 | * Then we do a normal TLB flush on exit. We need only | ||
23 | * disable preemption during the clear. | ||
24 | */ | ||
25 | |||
26 | #define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) | ||
27 | #define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) | ||
28 | #define DCACHE_SIZE (PAGE_SIZE * 2) | ||
29 | |||
30 | #if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) | ||
31 | #define PAGE_SIZE_REM 0x80 | ||
32 | #elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) | ||
33 | #define PAGE_SIZE_REM 0x100 | ||
34 | #else | ||
35 | #error Wrong PAGE_SHIFT specified | ||
36 | #endif | ||
37 | |||
38 | #define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \ | ||
39 | fmovd %reg0, %f48; fmovd %reg1, %f50; \ | ||
40 | fmovd %reg2, %f52; fmovd %reg3, %f54; \ | ||
41 | fmovd %reg4, %f56; fmovd %reg5, %f58; \ | ||
42 | fmovd %reg6, %f60; fmovd %reg7, %f62; | ||
43 | |||
44 | .text | ||
45 | |||
46 | .align 32 | ||
47 | .globl copy_user_page | ||
48 | .type copy_user_page,#function | ||
49 | copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | ||
50 | lduw [%g6 + TI_PRE_COUNT], %o4 | ||
51 | sethi %uhi(PAGE_OFFSET), %g2 | ||
52 | sethi %hi(PAGE_SIZE), %o3 | ||
53 | |||
54 | sllx %g2, 32, %g2 | ||
55 | sethi %uhi(TTE_BITS_TOP), %g3 | ||
56 | |||
57 | sllx %g3, 32, %g3 | ||
58 | sub %o0, %g2, %g1 ! dest paddr | ||
59 | |||
60 | sub %o1, %g2, %g2 ! src paddr | ||
61 | or %g3, TTE_BITS_BOTTOM, %g3 | ||
62 | |||
63 | and %o2, %o3, %o0 ! vaddr D-cache alias bit | ||
64 | or %g1, %g3, %g1 ! dest TTE data | ||
65 | |||
66 | or %g2, %g3, %g2 ! src TTE data | ||
67 | sethi %hi(TLBTEMP_BASE), %o3 | ||
68 | |||
69 | sethi %hi(DCACHE_SIZE), %o1 | ||
70 | add %o0, %o3, %o0 ! dest TTE vaddr | ||
71 | |||
72 | add %o4, 1, %o2 | ||
73 | add %o0, %o1, %o1 ! src TTE vaddr | ||
74 | |||
75 | /* Disable preemption. */ | ||
76 | mov TLB_TAG_ACCESS, %g3 | ||
77 | stw %o2, [%g6 + TI_PRE_COUNT] | ||
78 | |||
79 | /* Load TLB entries. */ | ||
80 | rdpr %pstate, %o2 | ||
81 | wrpr %o2, PSTATE_IE, %pstate | ||
82 | stxa %o0, [%g3] ASI_DMMU | ||
83 | stxa %g1, [%g0] ASI_DTLB_DATA_IN | ||
84 | membar #Sync | ||
85 | stxa %o1, [%g3] ASI_DMMU | ||
86 | stxa %g2, [%g0] ASI_DTLB_DATA_IN | ||
87 | membar #Sync | ||
88 | wrpr %o2, 0x0, %pstate | ||
89 | |||
90 | BRANCH_IF_ANY_CHEETAH(g3,o2,1f) | ||
91 | ba,pt %xcc, 9f | ||
92 | nop | ||
93 | |||
94 | 1: | ||
95 | VISEntryHalf | ||
96 | membar #StoreLoad | #StoreStore | #LoadStore | ||
97 | sethi %hi((PAGE_SIZE/64)-2), %o2 | ||
98 | mov %o0, %g1 | ||
99 | prefetch [%o1 + 0x000], #one_read | ||
100 | or %o2, %lo((PAGE_SIZE/64)-2), %o2 | ||
101 | prefetch [%o1 + 0x040], #one_read | ||
102 | prefetch [%o1 + 0x080], #one_read | ||
103 | prefetch [%o1 + 0x0c0], #one_read | ||
104 | ldd [%o1 + 0x000], %f0 | ||
105 | prefetch [%o1 + 0x100], #one_read | ||
106 | ldd [%o1 + 0x008], %f2 | ||
107 | prefetch [%o1 + 0x140], #one_read | ||
108 | ldd [%o1 + 0x010], %f4 | ||
109 | prefetch [%o1 + 0x180], #one_read | ||
110 | fmovd %f0, %f16 | ||
111 | ldd [%o1 + 0x018], %f6 | ||
112 | fmovd %f2, %f18 | ||
113 | ldd [%o1 + 0x020], %f8 | ||
114 | fmovd %f4, %f20 | ||
115 | ldd [%o1 + 0x028], %f10 | ||
116 | fmovd %f6, %f22 | ||
117 | ldd [%o1 + 0x030], %f12 | ||
118 | fmovd %f8, %f24 | ||
119 | ldd [%o1 + 0x038], %f14 | ||
120 | fmovd %f10, %f26 | ||
121 | ldd [%o1 + 0x040], %f0 | ||
122 | 1: ldd [%o1 + 0x048], %f2 | ||
123 | fmovd %f12, %f28 | ||
124 | ldd [%o1 + 0x050], %f4 | ||
125 | fmovd %f14, %f30 | ||
126 | stda %f16, [%o0] ASI_BLK_P | ||
127 | ldd [%o1 + 0x058], %f6 | ||
128 | fmovd %f0, %f16 | ||
129 | ldd [%o1 + 0x060], %f8 | ||
130 | fmovd %f2, %f18 | ||
131 | ldd [%o1 + 0x068], %f10 | ||
132 | fmovd %f4, %f20 | ||
133 | ldd [%o1 + 0x070], %f12 | ||
134 | fmovd %f6, %f22 | ||
135 | ldd [%o1 + 0x078], %f14 | ||
136 | fmovd %f8, %f24 | ||
137 | ldd [%o1 + 0x080], %f0 | ||
138 | prefetch [%o1 + 0x180], #one_read | ||
139 | fmovd %f10, %f26 | ||
140 | subcc %o2, 1, %o2 | ||
141 | add %o0, 0x40, %o0 | ||
142 | bne,pt %xcc, 1b | ||
143 | add %o1, 0x40, %o1 | ||
144 | |||
145 | ldd [%o1 + 0x048], %f2 | ||
146 | fmovd %f12, %f28 | ||
147 | ldd [%o1 + 0x050], %f4 | ||
148 | fmovd %f14, %f30 | ||
149 | stda %f16, [%o0] ASI_BLK_P | ||
150 | ldd [%o1 + 0x058], %f6 | ||
151 | fmovd %f0, %f16 | ||
152 | ldd [%o1 + 0x060], %f8 | ||
153 | fmovd %f2, %f18 | ||
154 | ldd [%o1 + 0x068], %f10 | ||
155 | fmovd %f4, %f20 | ||
156 | ldd [%o1 + 0x070], %f12 | ||
157 | fmovd %f6, %f22 | ||
158 | add %o0, 0x40, %o0 | ||
159 | ldd [%o1 + 0x078], %f14 | ||
160 | fmovd %f8, %f24 | ||
161 | fmovd %f10, %f26 | ||
162 | fmovd %f12, %f28 | ||
163 | fmovd %f14, %f30 | ||
164 | stda %f16, [%o0] ASI_BLK_P | ||
165 | membar #Sync | ||
166 | VISExitHalf | ||
167 | ba,pt %xcc, 5f | ||
168 | nop | ||
169 | |||
170 | 9: | ||
171 | VISEntry | ||
172 | ldub [%g6 + TI_FAULT_CODE], %g3 | ||
173 | mov %o0, %g1 | ||
174 | cmp %g3, 0 | ||
175 | rd %asi, %g3 | ||
176 | be,a,pt %icc, 1f | ||
177 | wr %g0, ASI_BLK_P, %asi | ||
178 | wr %g0, ASI_BLK_COMMIT_P, %asi | ||
179 | 1: ldda [%o1] ASI_BLK_P, %f0 | ||
180 | add %o1, 0x40, %o1 | ||
181 | ldda [%o1] ASI_BLK_P, %f16 | ||
182 | add %o1, 0x40, %o1 | ||
183 | sethi %hi(PAGE_SIZE), %o2 | ||
184 | 1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) | ||
185 | ldda [%o1] ASI_BLK_P, %f32 | ||
186 | stda %f48, [%o0] %asi | ||
187 | add %o1, 0x40, %o1 | ||
188 | sub %o2, 0x40, %o2 | ||
189 | add %o0, 0x40, %o0 | ||
190 | TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) | ||
191 | ldda [%o1] ASI_BLK_P, %f0 | ||
192 | stda %f48, [%o0] %asi | ||
193 | add %o1, 0x40, %o1 | ||
194 | sub %o2, 0x40, %o2 | ||
195 | add %o0, 0x40, %o0 | ||
196 | TOUCH(f32, f34, f36, f38, f40, f42, f44, f46) | ||
197 | ldda [%o1] ASI_BLK_P, %f16 | ||
198 | stda %f48, [%o0] %asi | ||
199 | sub %o2, 0x40, %o2 | ||
200 | add %o1, 0x40, %o1 | ||
201 | cmp %o2, PAGE_SIZE_REM | ||
202 | bne,pt %xcc, 1b | ||
203 | add %o0, 0x40, %o0 | ||
204 | #if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) | ||
205 | TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) | ||
206 | ldda [%o1] ASI_BLK_P, %f32 | ||
207 | stda %f48, [%o0] %asi | ||
208 | add %o1, 0x40, %o1 | ||
209 | sub %o2, 0x40, %o2 | ||
210 | add %o0, 0x40, %o0 | ||
211 | TOUCH(f16, f18, f20, f22, f24, f26, f28, f30) | ||
212 | ldda [%o1] ASI_BLK_P, %f0 | ||
213 | stda %f48, [%o0] %asi | ||
214 | add %o1, 0x40, %o1 | ||
215 | sub %o2, 0x40, %o2 | ||
216 | add %o0, 0x40, %o0 | ||
217 | membar #Sync | ||
218 | stda %f32, [%o0] %asi | ||
219 | add %o0, 0x40, %o0 | ||
220 | stda %f0, [%o0] %asi | ||
221 | #else | ||
222 | membar #Sync | ||
223 | stda %f0, [%o0] %asi | ||
224 | add %o0, 0x40, %o0 | ||
225 | stda %f16, [%o0] %asi | ||
226 | #endif | ||
227 | membar #Sync | ||
228 | wr %g3, 0x0, %asi | ||
229 | VISExit | ||
230 | |||
231 | 5: | ||
232 | stxa %g0, [%g1] ASI_DMMU_DEMAP | ||
233 | membar #Sync | ||
234 | |||
235 | sethi %hi(DCACHE_SIZE), %g2 | ||
236 | stxa %g0, [%g1 + %g2] ASI_DMMU_DEMAP | ||
237 | membar #Sync | ||
238 | |||
239 | retl | ||
240 | stw %o4, [%g6 + TI_PRE_COUNT] | ||
241 | |||
242 | .size copy_user_page, .-copy_user_page | ||
diff --git a/arch/sparc64/lib/csum_copy.S b/arch/sparc64/lib/csum_copy.S new file mode 100644 index 000000000000..71af48839064 --- /dev/null +++ b/arch/sparc64/lib/csum_copy.S | |||
@@ -0,0 +1,308 @@ | |||
1 | /* csum_copy.S: Checksum+copy code for sparc64 | ||
2 | * | ||
3 | * Copyright (C) 2005 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #ifdef __KERNEL__ | ||
7 | #define GLOBAL_SPARE %g7 | ||
8 | #else | ||
9 | #define GLOBAL_SPARE %g5 | ||
10 | #endif | ||
11 | |||
12 | #ifndef EX_LD | ||
13 | #define EX_LD(x) x | ||
14 | #endif | ||
15 | |||
16 | #ifndef EX_ST | ||
17 | #define EX_ST(x) x | ||
18 | #endif | ||
19 | |||
20 | #ifndef EX_RETVAL | ||
21 | #define EX_RETVAL(x) x | ||
22 | #endif | ||
23 | |||
24 | #ifndef LOAD | ||
25 | #define LOAD(type,addr,dest) type [addr], dest | ||
26 | #endif | ||
27 | |||
28 | #ifndef STORE | ||
29 | #define STORE(type,src,addr) type src, [addr] | ||
30 | #endif | ||
31 | |||
32 | #ifndef FUNC_NAME | ||
33 | #define FUNC_NAME csum_partial_copy_nocheck | ||
34 | #endif | ||
35 | |||
36 | .register %g2, #scratch | ||
37 | .register %g3, #scratch | ||
38 | |||
39 | .text | ||
40 | |||
41 | 90: | ||
42 | /* We checked for zero length already, so there must be | ||
43 | * at least one byte. | ||
44 | */ | ||
45 | be,pt %icc, 1f | ||
46 | nop | ||
47 | EX_LD(LOAD(ldub, %o0 + 0x00, %o4)) | ||
48 | add %o0, 1, %o0 | ||
49 | sub %o2, 1, %o2 | ||
50 | EX_ST(STORE(stb, %o4, %o1 + 0x00)) | ||
51 | add %o1, 1, %o1 | ||
52 | 1: andcc %o0, 0x2, %g0 | ||
53 | be,pn %icc, 80f | ||
54 | cmp %o2, 2 | ||
55 | blu,pn %icc, 60f | ||
56 | nop | ||
57 | EX_LD(LOAD(lduh, %o0 + 0x00, %o5)) | ||
58 | add %o0, 2, %o0 | ||
59 | sub %o2, 2, %o2 | ||
60 | EX_ST(STORE(sth, %o5, %o1 + 0x00)) | ||
61 | add %o1, 2, %o1 | ||
62 | ba,pt %xcc, 80f | ||
63 | add %o5, %o4, %o4 | ||
64 | |||
65 | .globl FUNC_NAME | ||
66 | FUNC_NAME: /* %o0=src, %o1=dst, %o2=len, %o3=sum */ | ||
67 | LOAD(prefetch, %o0 + 0x000, #n_reads) | ||
68 | xor %o0, %o1, %g1 | ||
69 | clr %o4 | ||
70 | andcc %g1, 0x3, %g0 | ||
71 | bne,pn %icc, 95f | ||
72 | LOAD(prefetch, %o0 + 0x040, #n_reads) | ||
73 | |||
74 | brz,pn %o2, 70f | ||
75 | andcc %o0, 0x3, %g0 | ||
76 | |||
77 | /* We "remember" whether the lowest bit in the address | ||
78 | * was set in GLOBAL_SPARE. Because if it is, we have to swap | ||
79 | * upper and lower 8 bit fields of the sum we calculate. | ||
80 | */ | ||
81 | bne,pn %icc, 90b | ||
82 | andcc %o0, 0x1, GLOBAL_SPARE | ||
83 | |||
84 | 80: | ||
85 | LOAD(prefetch, %o0 + 0x080, #n_reads) | ||
86 | andncc %o2, 0x3f, %g3 | ||
87 | |||
88 | LOAD(prefetch, %o0 + 0x0c0, #n_reads) | ||
89 | sub %o2, %g3, %o2 | ||
90 | brz,pn %g3, 2f | ||
91 | LOAD(prefetch, %o0 + 0x100, #n_reads) | ||
92 | |||
93 | /* So that we don't need to use the non-pairing | ||
94 | * add-with-carry instructions we accumulate 32-bit | ||
95 | * values into a 64-bit register. At the end of the | ||
96 | * loop we fold it down to 32-bits and so on. | ||
97 | */ | ||
98 | ba,pt %xcc, 1f | ||
99 | LOAD(prefetch, %o0 + 0x140, #n_reads) | ||
100 | |||
101 | .align 32 | ||
102 | 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5)) | ||
103 | EX_LD(LOAD(lduw, %o0 + 0x04, %g1)) | ||
104 | EX_LD(LOAD(lduw, %o0 + 0x08, %g2)) | ||
105 | add %o4, %o5, %o4 | ||
106 | EX_ST(STORE(stw, %o5, %o1 + 0x00)) | ||
107 | EX_LD(LOAD(lduw, %o0 + 0x0c, %o5)) | ||
108 | add %o4, %g1, %o4 | ||
109 | EX_ST(STORE(stw, %g1, %o1 + 0x04)) | ||
110 | EX_LD(LOAD(lduw, %o0 + 0x10, %g1)) | ||
111 | add %o4, %g2, %o4 | ||
112 | EX_ST(STORE(stw, %g2, %o1 + 0x08)) | ||
113 | EX_LD(LOAD(lduw, %o0 + 0x14, %g2)) | ||
114 | add %o4, %o5, %o4 | ||
115 | EX_ST(STORE(stw, %o5, %o1 + 0x0c)) | ||
116 | EX_LD(LOAD(lduw, %o0 + 0x18, %o5)) | ||
117 | add %o4, %g1, %o4 | ||
118 | EX_ST(STORE(stw, %g1, %o1 + 0x10)) | ||
119 | EX_LD(LOAD(lduw, %o0 + 0x1c, %g1)) | ||
120 | add %o4, %g2, %o4 | ||
121 | EX_ST(STORE(stw, %g2, %o1 + 0x14)) | ||
122 | EX_LD(LOAD(lduw, %o0 + 0x20, %g2)) | ||
123 | add %o4, %o5, %o4 | ||
124 | EX_ST(STORE(stw, %o5, %o1 + 0x18)) | ||
125 | EX_LD(LOAD(lduw, %o0 + 0x24, %o5)) | ||
126 | add %o4, %g1, %o4 | ||
127 | EX_ST(STORE(stw, %g1, %o1 + 0x1c)) | ||
128 | EX_LD(LOAD(lduw, %o0 + 0x28, %g1)) | ||
129 | add %o4, %g2, %o4 | ||
130 | EX_ST(STORE(stw, %g2, %o1 + 0x20)) | ||
131 | EX_LD(LOAD(lduw, %o0 + 0x2c, %g2)) | ||
132 | add %o4, %o5, %o4 | ||
133 | EX_ST(STORE(stw, %o5, %o1 + 0x24)) | ||
134 | EX_LD(LOAD(lduw, %o0 + 0x30, %o5)) | ||
135 | add %o4, %g1, %o4 | ||
136 | EX_ST(STORE(stw, %g1, %o1 + 0x28)) | ||
137 | EX_LD(LOAD(lduw, %o0 + 0x34, %g1)) | ||
138 | add %o4, %g2, %o4 | ||
139 | EX_ST(STORE(stw, %g2, %o1 + 0x2c)) | ||
140 | EX_LD(LOAD(lduw, %o0 + 0x38, %g2)) | ||
141 | add %o4, %o5, %o4 | ||
142 | EX_ST(STORE(stw, %o5, %o1 + 0x30)) | ||
143 | EX_LD(LOAD(lduw, %o0 + 0x3c, %o5)) | ||
144 | add %o4, %g1, %o4 | ||
145 | EX_ST(STORE(stw, %g1, %o1 + 0x34)) | ||
146 | LOAD(prefetch, %o0 + 0x180, #n_reads) | ||
147 | add %o4, %g2, %o4 | ||
148 | EX_ST(STORE(stw, %g2, %o1 + 0x38)) | ||
149 | subcc %g3, 0x40, %g3 | ||
150 | add %o0, 0x40, %o0 | ||
151 | add %o4, %o5, %o4 | ||
152 | EX_ST(STORE(stw, %o5, %o1 + 0x3c)) | ||
153 | bne,pt %icc, 1b | ||
154 | add %o1, 0x40, %o1 | ||
155 | |||
156 | 2: and %o2, 0x3c, %g3 | ||
157 | brz,pn %g3, 2f | ||
158 | sub %o2, %g3, %o2 | ||
159 | 1: EX_LD(LOAD(lduw, %o0 + 0x00, %o5)) | ||
160 | subcc %g3, 0x4, %g3 | ||
161 | add %o0, 0x4, %o0 | ||
162 | add %o4, %o5, %o4 | ||
163 | EX_ST(STORE(stw, %o5, %o1 + 0x00)) | ||
164 | bne,pt %icc, 1b | ||
165 | add %o1, 0x4, %o1 | ||
166 | |||
167 | 2: | ||
168 | /* fold 64-->32 */ | ||
169 | srlx %o4, 32, %o5 | ||
170 | srl %o4, 0, %o4 | ||
171 | add %o4, %o5, %o4 | ||
172 | srlx %o4, 32, %o5 | ||
173 | srl %o4, 0, %o4 | ||
174 | add %o4, %o5, %o4 | ||
175 | |||
176 | /* fold 32-->16 */ | ||
177 | sethi %hi(0xffff0000), %g1 | ||
178 | srl %o4, 16, %o5 | ||
179 | andn %o4, %g1, %g2 | ||
180 | add %o5, %g2, %o4 | ||
181 | srl %o4, 16, %o5 | ||
182 | andn %o4, %g1, %g2 | ||
183 | add %o5, %g2, %o4 | ||
184 | |||
185 | 60: | ||
186 | /* %o4 has the 16-bit sum we have calculated so-far. */ | ||
187 | cmp %o2, 2 | ||
188 | blu,pt %icc, 1f | ||
189 | nop | ||
190 | EX_LD(LOAD(lduh, %o0 + 0x00, %o5)) | ||
191 | sub %o2, 2, %o2 | ||
192 | add %o0, 2, %o0 | ||
193 | add %o4, %o5, %o4 | ||
194 | EX_ST(STORE(sth, %o5, %o1 + 0x00)) | ||
195 | add %o1, 0x2, %o1 | ||
196 | 1: brz,pt %o2, 1f | ||
197 | nop | ||
198 | EX_LD(LOAD(ldub, %o0 + 0x00, %o5)) | ||
199 | sub %o2, 1, %o2 | ||
200 | add %o0, 1, %o0 | ||
201 | EX_ST(STORE(stb, %o5, %o1 + 0x00)) | ||
202 | sllx %o5, 8, %o5 | ||
203 | add %o1, 1, %o1 | ||
204 | add %o4, %o5, %o4 | ||
205 | 1: | ||
206 | /* fold 32-->16 */ | ||
207 | sethi %hi(0xffff0000), %g1 | ||
208 | srl %o4, 16, %o5 | ||
209 | andn %o4, %g1, %g2 | ||
210 | add %o5, %g2, %o4 | ||
211 | srl %o4, 16, %o5 | ||
212 | andn %o4, %g1, %g2 | ||
213 | add %o5, %g2, %o4 | ||
214 | |||
215 | 1: brz,pt GLOBAL_SPARE, 1f | ||
216 | nop | ||
217 | |||
218 | /* We started with an odd byte, byte-swap the result. */ | ||
219 | srl %o4, 8, %o5 | ||
220 | and %o4, 0xff, %g1 | ||
221 | sll %g1, 8, %g1 | ||
222 | or %o5, %g1, %o4 | ||
223 | |||
224 | 1: add %o3, %o4, %o3 | ||
225 | |||
226 | 70: | ||
227 | retl | ||
228 | mov %o3, %o0 | ||
229 | |||
230 | 95: mov 0, GLOBAL_SPARE | ||
231 | brlez,pn %o2, 4f | ||
232 | andcc %o0, 1, %o5 | ||
233 | be,a,pt %icc, 1f | ||
234 | srl %o2, 1, %g1 | ||
235 | sub %o2, 1, %o2 | ||
236 | EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE)) | ||
237 | add %o0, 1, %o0 | ||
238 | EX_ST(STORE(stb, GLOBAL_SPARE, %o1)) | ||
239 | srl %o2, 1, %g1 | ||
240 | add %o1, 1, %o1 | ||
241 | 1: brz,a,pn %g1, 3f | ||
242 | andcc %o2, 1, %g0 | ||
243 | andcc %o0, 2, %g0 | ||
244 | be,a,pt %icc, 1f | ||
245 | srl %g1, 1, %g1 | ||
246 | EX_LD(LOAD(lduh, %o0, %o4)) | ||
247 | sub %o2, 2, %o2 | ||
248 | srl %o4, 8, %g2 | ||
249 | sub %g1, 1, %g1 | ||
250 | EX_ST(STORE(stb, %g2, %o1)) | ||
251 | add %o4, GLOBAL_SPARE, GLOBAL_SPARE | ||
252 | EX_ST(STORE(stb, %o4, %o1 + 1)) | ||
253 | add %o0, 2, %o0 | ||
254 | srl %g1, 1, %g1 | ||
255 | add %o1, 2, %o1 | ||
256 | 1: brz,a,pn %g1, 2f | ||
257 | andcc %o2, 2, %g0 | ||
258 | EX_LD(LOAD(lduw, %o0, %o4)) | ||
259 | 5: srl %o4, 24, %g2 | ||
260 | srl %o4, 16, %g3 | ||
261 | EX_ST(STORE(stb, %g2, %o1)) | ||
262 | srl %o4, 8, %g2 | ||
263 | EX_ST(STORE(stb, %g3, %o1 + 1)) | ||
264 | add %o0, 4, %o0 | ||
265 | EX_ST(STORE(stb, %g2, %o1 + 2)) | ||
266 | addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE | ||
267 | EX_ST(STORE(stb, %o4, %o1 + 3)) | ||
268 | addc GLOBAL_SPARE, %g0, GLOBAL_SPARE | ||
269 | add %o1, 4, %o1 | ||
270 | subcc %g1, 1, %g1 | ||
271 | bne,a,pt %icc, 5b | ||
272 | EX_LD(LOAD(lduw, %o0, %o4)) | ||
273 | sll GLOBAL_SPARE, 16, %g2 | ||
274 | srl GLOBAL_SPARE, 16, GLOBAL_SPARE | ||
275 | srl %g2, 16, %g2 | ||
276 | andcc %o2, 2, %g0 | ||
277 | add %g2, GLOBAL_SPARE, GLOBAL_SPARE | ||
278 | 2: be,a,pt %icc, 3f | ||
279 | andcc %o2, 1, %g0 | ||
280 | EX_LD(LOAD(lduh, %o0, %o4)) | ||
281 | andcc %o2, 1, %g0 | ||
282 | srl %o4, 8, %g2 | ||
283 | add %o0, 2, %o0 | ||
284 | EX_ST(STORE(stb, %g2, %o1)) | ||
285 | add GLOBAL_SPARE, %o4, GLOBAL_SPARE | ||
286 | EX_ST(STORE(stb, %o4, %o1 + 1)) | ||
287 | add %o1, 2, %o1 | ||
288 | 3: be,a,pt %icc, 1f | ||
289 | sll GLOBAL_SPARE, 16, %o4 | ||
290 | EX_LD(LOAD(ldub, %o0, %g2)) | ||
291 | sll %g2, 8, %o4 | ||
292 | EX_ST(STORE(stb, %g2, %o1)) | ||
293 | add GLOBAL_SPARE, %o4, GLOBAL_SPARE | ||
294 | sll GLOBAL_SPARE, 16, %o4 | ||
295 | 1: addcc %o4, GLOBAL_SPARE, GLOBAL_SPARE | ||
296 | srl GLOBAL_SPARE, 16, %o4 | ||
297 | addc %g0, %o4, GLOBAL_SPARE | ||
298 | brz,pt %o5, 4f | ||
299 | srl GLOBAL_SPARE, 8, %o4 | ||
300 | and GLOBAL_SPARE, 0xff, %g2 | ||
301 | and %o4, 0xff, %o4 | ||
302 | sll %g2, 8, %g2 | ||
303 | or %g2, %o4, GLOBAL_SPARE | ||
304 | 4: addcc %o3, GLOBAL_SPARE, %o3 | ||
305 | addc %g0, %o3, %o0 | ||
306 | retl | ||
307 | srl %o0, 0, %o0 | ||
308 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc64/lib/csum_copy_from_user.S b/arch/sparc64/lib/csum_copy_from_user.S new file mode 100644 index 000000000000..817ebdae39f8 --- /dev/null +++ b/arch/sparc64/lib/csum_copy_from_user.S | |||
@@ -0,0 +1,21 @@ | |||
1 | /* csum_copy_from_user.S: Checksum+copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 2005 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: retl; \ | ||
11 | mov -1, %o0; \ | ||
12 | .section __ex_table; \ | ||
13 | .align 4; \ | ||
14 | .word 98b, 99b; \ | ||
15 | .text; \ | ||
16 | .align 4; | ||
17 | |||
18 | #define FUNC_NAME __csum_partial_copy_from_user | ||
19 | #define LOAD(type,addr,dest) type##a [addr] %asi, dest | ||
20 | |||
21 | #include "csum_copy.S" | ||
diff --git a/arch/sparc64/lib/csum_copy_to_user.S b/arch/sparc64/lib/csum_copy_to_user.S new file mode 100644 index 000000000000..c2f9463ea1e2 --- /dev/null +++ b/arch/sparc64/lib/csum_copy_to_user.S | |||
@@ -0,0 +1,21 @@ | |||
1 | /* csum_copy_to_user.S: Checksum+copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 2005 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: retl; \ | ||
11 | mov -1, %o0; \ | ||
12 | .section __ex_table; \ | ||
13 | .align 4; \ | ||
14 | .word 98b, 99b; \ | ||
15 | .text; \ | ||
16 | .align 4; | ||
17 | |||
18 | #define FUNC_NAME __csum_partial_copy_to_user | ||
19 | #define STORE(type,src,addr) type##a src, [addr] %asi | ||
20 | |||
21 | #include "csum_copy.S" | ||
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c new file mode 100644 index 000000000000..c421e0c65325 --- /dev/null +++ b/arch/sparc64/lib/debuglocks.c | |||
@@ -0,0 +1,376 @@ | |||
1 | /* $Id: debuglocks.c,v 1.9 2001/11/17 00:10:48 davem Exp $ | ||
2 | * debuglocks.c: Debugging versions of SMP locking primitives. | ||
3 | * | ||
4 | * Copyright (C) 1998 David S. Miller (davem@redhat.com) | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | #include <asm/system.h> | ||
12 | |||
13 | #ifdef CONFIG_SMP | ||
14 | |||
15 | #define GET_CALLER(PC) __asm__ __volatile__("mov %%i7, %0" : "=r" (PC)) | ||
16 | |||
17 | static inline void show (char *str, spinlock_t *lock, unsigned long caller) | ||
18 | { | ||
19 | int cpu = smp_processor_id(); | ||
20 | |||
21 | printk("%s(%p) CPU#%d stuck at %08x, owner PC(%08x):CPU(%x)\n", | ||
22 | str, lock, cpu, (unsigned int) caller, | ||
23 | lock->owner_pc, lock->owner_cpu); | ||
24 | } | ||
25 | |||
26 | static inline void show_read (char *str, rwlock_t *lock, unsigned long caller) | ||
27 | { | ||
28 | int cpu = smp_processor_id(); | ||
29 | |||
30 | printk("%s(%p) CPU#%d stuck at %08x, writer PC(%08x):CPU(%x)\n", | ||
31 | str, lock, cpu, (unsigned int) caller, | ||
32 | lock->writer_pc, lock->writer_cpu); | ||
33 | } | ||
34 | |||
35 | static inline void show_write (char *str, rwlock_t *lock, unsigned long caller) | ||
36 | { | ||
37 | int cpu = smp_processor_id(); | ||
38 | int i; | ||
39 | |||
40 | printk("%s(%p) CPU#%d stuck at %08x\n", | ||
41 | str, lock, cpu, (unsigned int) caller); | ||
42 | printk("Writer: PC(%08x):CPU(%x)\n", | ||
43 | lock->writer_pc, lock->writer_cpu); | ||
44 | printk("Readers:"); | ||
45 | for (i = 0; i < NR_CPUS; i++) | ||
46 | if (lock->reader_pc[i]) | ||
47 | printk(" %d[%08x]", i, lock->reader_pc[i]); | ||
48 | printk("\n"); | ||
49 | } | ||
50 | |||
51 | #undef INIT_STUCK | ||
52 | #define INIT_STUCK 100000000 | ||
53 | |||
54 | void _do_spin_lock(spinlock_t *lock, char *str) | ||
55 | { | ||
56 | unsigned long caller, val; | ||
57 | int stuck = INIT_STUCK; | ||
58 | int cpu = get_cpu(); | ||
59 | int shown = 0; | ||
60 | |||
61 | GET_CALLER(caller); | ||
62 | again: | ||
63 | __asm__ __volatile__("ldstub [%1], %0" | ||
64 | : "=r" (val) | ||
65 | : "r" (&(lock->lock)) | ||
66 | : "memory"); | ||
67 | membar("#StoreLoad | #StoreStore"); | ||
68 | if (val) { | ||
69 | while (lock->lock) { | ||
70 | if (!--stuck) { | ||
71 | if (shown++ <= 2) | ||
72 | show(str, lock, caller); | ||
73 | stuck = INIT_STUCK; | ||
74 | } | ||
75 | membar("#LoadLoad"); | ||
76 | } | ||
77 | goto again; | ||
78 | } | ||
79 | lock->owner_pc = ((unsigned int)caller); | ||
80 | lock->owner_cpu = cpu; | ||
81 | current->thread.smp_lock_count++; | ||
82 | current->thread.smp_lock_pc = ((unsigned int)caller); | ||
83 | |||
84 | put_cpu(); | ||
85 | } | ||
86 | |||
87 | int _do_spin_trylock(spinlock_t *lock) | ||
88 | { | ||
89 | unsigned long val, caller; | ||
90 | int cpu = get_cpu(); | ||
91 | |||
92 | GET_CALLER(caller); | ||
93 | __asm__ __volatile__("ldstub [%1], %0" | ||
94 | : "=r" (val) | ||
95 | : "r" (&(lock->lock)) | ||
96 | : "memory"); | ||
97 | membar("#StoreLoad | #StoreStore"); | ||
98 | if (!val) { | ||
99 | lock->owner_pc = ((unsigned int)caller); | ||
100 | lock->owner_cpu = cpu; | ||
101 | current->thread.smp_lock_count++; | ||
102 | current->thread.smp_lock_pc = ((unsigned int)caller); | ||
103 | } | ||
104 | |||
105 | put_cpu(); | ||
106 | |||
107 | return val == 0; | ||
108 | } | ||
109 | |||
110 | void _do_spin_unlock(spinlock_t *lock) | ||
111 | { | ||
112 | lock->owner_pc = 0; | ||
113 | lock->owner_cpu = NO_PROC_ID; | ||
114 | membar("#StoreStore | #LoadStore"); | ||
115 | lock->lock = 0; | ||
116 | current->thread.smp_lock_count--; | ||
117 | } | ||
118 | |||
119 | /* Keep INIT_STUCK the same... */ | ||
120 | |||
121 | void _do_read_lock (rwlock_t *rw, char *str) | ||
122 | { | ||
123 | unsigned long caller, val; | ||
124 | int stuck = INIT_STUCK; | ||
125 | int cpu = get_cpu(); | ||
126 | int shown = 0; | ||
127 | |||
128 | GET_CALLER(caller); | ||
129 | wlock_again: | ||
130 | /* Wait for any writer to go away. */ | ||
131 | while (((long)(rw->lock)) < 0) { | ||
132 | if (!--stuck) { | ||
133 | if (shown++ <= 2) | ||
134 | show_read(str, rw, caller); | ||
135 | stuck = INIT_STUCK; | ||
136 | } | ||
137 | membar("#LoadLoad"); | ||
138 | } | ||
139 | /* Try once to increment the counter. */ | ||
140 | __asm__ __volatile__( | ||
141 | " ldx [%0], %%g1\n" | ||
142 | " brlz,a,pn %%g1, 2f\n" | ||
143 | " mov 1, %0\n" | ||
144 | " add %%g1, 1, %%g7\n" | ||
145 | " casx [%0], %%g1, %%g7\n" | ||
146 | " sub %%g1, %%g7, %0\n" | ||
147 | "2:" : "=r" (val) | ||
148 | : "0" (&(rw->lock)) | ||
149 | : "g1", "g7", "memory"); | ||
150 | membar("#StoreLoad | #StoreStore"); | ||
151 | if (val) | ||
152 | goto wlock_again; | ||
153 | rw->reader_pc[cpu] = ((unsigned int)caller); | ||
154 | current->thread.smp_lock_count++; | ||
155 | current->thread.smp_lock_pc = ((unsigned int)caller); | ||
156 | |||
157 | put_cpu(); | ||
158 | } | ||
159 | |||
160 | void _do_read_unlock (rwlock_t *rw, char *str) | ||
161 | { | ||
162 | unsigned long caller, val; | ||
163 | int stuck = INIT_STUCK; | ||
164 | int cpu = get_cpu(); | ||
165 | int shown = 0; | ||
166 | |||
167 | GET_CALLER(caller); | ||
168 | |||
169 | /* Drop our identity _first_. */ | ||
170 | rw->reader_pc[cpu] = 0; | ||
171 | current->thread.smp_lock_count--; | ||
172 | runlock_again: | ||
173 | /* Spin trying to decrement the counter using casx. */ | ||
174 | __asm__ __volatile__( | ||
175 | " membar #StoreLoad | #LoadLoad\n" | ||
176 | " ldx [%0], %%g1\n" | ||
177 | " sub %%g1, 1, %%g7\n" | ||
178 | " casx [%0], %%g1, %%g7\n" | ||
179 | " membar #StoreLoad | #StoreStore\n" | ||
180 | " sub %%g1, %%g7, %0\n" | ||
181 | : "=r" (val) | ||
182 | : "0" (&(rw->lock)) | ||
183 | : "g1", "g7", "memory"); | ||
184 | if (val) { | ||
185 | if (!--stuck) { | ||
186 | if (shown++ <= 2) | ||
187 | show_read(str, rw, caller); | ||
188 | stuck = INIT_STUCK; | ||
189 | } | ||
190 | goto runlock_again; | ||
191 | } | ||
192 | |||
193 | put_cpu(); | ||
194 | } | ||
195 | |||
196 | void _do_write_lock (rwlock_t *rw, char *str) | ||
197 | { | ||
198 | unsigned long caller, val; | ||
199 | int stuck = INIT_STUCK; | ||
200 | int cpu = get_cpu(); | ||
201 | int shown = 0; | ||
202 | |||
203 | GET_CALLER(caller); | ||
204 | wlock_again: | ||
205 | /* Spin while there is another writer. */ | ||
206 | while (((long)rw->lock) < 0) { | ||
207 | if (!--stuck) { | ||
208 | if (shown++ <= 2) | ||
209 | show_write(str, rw, caller); | ||
210 | stuck = INIT_STUCK; | ||
211 | } | ||
212 | membar("#LoadLoad"); | ||
213 | } | ||
214 | |||
215 | /* Try to acuire the write bit. */ | ||
216 | __asm__ __volatile__( | ||
217 | " mov 1, %%g3\n" | ||
218 | " sllx %%g3, 63, %%g3\n" | ||
219 | " ldx [%0], %%g1\n" | ||
220 | " brlz,pn %%g1, 1f\n" | ||
221 | " or %%g1, %%g3, %%g7\n" | ||
222 | " casx [%0], %%g1, %%g7\n" | ||
223 | " membar #StoreLoad | #StoreStore\n" | ||
224 | " ba,pt %%xcc, 2f\n" | ||
225 | " sub %%g1, %%g7, %0\n" | ||
226 | "1: mov 1, %0\n" | ||
227 | "2:" : "=r" (val) | ||
228 | : "0" (&(rw->lock)) | ||
229 | : "g3", "g1", "g7", "memory"); | ||
230 | if (val) { | ||
231 | /* We couldn't get the write bit. */ | ||
232 | if (!--stuck) { | ||
233 | if (shown++ <= 2) | ||
234 | show_write(str, rw, caller); | ||
235 | stuck = INIT_STUCK; | ||
236 | } | ||
237 | goto wlock_again; | ||
238 | } | ||
239 | if ((rw->lock & ((1UL<<63)-1UL)) != 0UL) { | ||
240 | /* Readers still around, drop the write | ||
241 | * lock, spin, and try again. | ||
242 | */ | ||
243 | if (!--stuck) { | ||
244 | if (shown++ <= 2) | ||
245 | show_write(str, rw, caller); | ||
246 | stuck = INIT_STUCK; | ||
247 | } | ||
248 | __asm__ __volatile__( | ||
249 | " mov 1, %%g3\n" | ||
250 | " sllx %%g3, 63, %%g3\n" | ||
251 | "1: ldx [%0], %%g1\n" | ||
252 | " andn %%g1, %%g3, %%g7\n" | ||
253 | " casx [%0], %%g1, %%g7\n" | ||
254 | " cmp %%g1, %%g7\n" | ||
255 | " bne,pn %%xcc, 1b\n" | ||
256 | " membar #StoreLoad | #StoreStore" | ||
257 | : /* no outputs */ | ||
258 | : "r" (&(rw->lock)) | ||
259 | : "g3", "g1", "g7", "cc", "memory"); | ||
260 | while(rw->lock != 0) { | ||
261 | if (!--stuck) { | ||
262 | if (shown++ <= 2) | ||
263 | show_write(str, rw, caller); | ||
264 | stuck = INIT_STUCK; | ||
265 | } | ||
266 | membar("#LoadLoad"); | ||
267 | } | ||
268 | goto wlock_again; | ||
269 | } | ||
270 | |||
271 | /* We have it, say who we are. */ | ||
272 | rw->writer_pc = ((unsigned int)caller); | ||
273 | rw->writer_cpu = cpu; | ||
274 | current->thread.smp_lock_count++; | ||
275 | current->thread.smp_lock_pc = ((unsigned int)caller); | ||
276 | |||
277 | put_cpu(); | ||
278 | } | ||
279 | |||
280 | void _do_write_unlock(rwlock_t *rw) | ||
281 | { | ||
282 | unsigned long caller, val; | ||
283 | int stuck = INIT_STUCK; | ||
284 | int shown = 0; | ||
285 | |||
286 | GET_CALLER(caller); | ||
287 | |||
288 | /* Drop our identity _first_ */ | ||
289 | rw->writer_pc = 0; | ||
290 | rw->writer_cpu = NO_PROC_ID; | ||
291 | current->thread.smp_lock_count--; | ||
292 | wlock_again: | ||
293 | __asm__ __volatile__( | ||
294 | " membar #StoreLoad | #LoadLoad\n" | ||
295 | " mov 1, %%g3\n" | ||
296 | " sllx %%g3, 63, %%g3\n" | ||
297 | " ldx [%0], %%g1\n" | ||
298 | " andn %%g1, %%g3, %%g7\n" | ||
299 | " casx [%0], %%g1, %%g7\n" | ||
300 | " membar #StoreLoad | #StoreStore\n" | ||
301 | " sub %%g1, %%g7, %0\n" | ||
302 | : "=r" (val) | ||
303 | : "0" (&(rw->lock)) | ||
304 | : "g3", "g1", "g7", "memory"); | ||
305 | if (val) { | ||
306 | if (!--stuck) { | ||
307 | if (shown++ <= 2) | ||
308 | show_write("write_unlock", rw, caller); | ||
309 | stuck = INIT_STUCK; | ||
310 | } | ||
311 | goto wlock_again; | ||
312 | } | ||
313 | } | ||
314 | |||
315 | int _do_write_trylock (rwlock_t *rw, char *str) | ||
316 | { | ||
317 | unsigned long caller, val; | ||
318 | int cpu = get_cpu(); | ||
319 | |||
320 | GET_CALLER(caller); | ||
321 | |||
322 | /* Try to acuire the write bit. */ | ||
323 | __asm__ __volatile__( | ||
324 | " mov 1, %%g3\n" | ||
325 | " sllx %%g3, 63, %%g3\n" | ||
326 | " ldx [%0], %%g1\n" | ||
327 | " brlz,pn %%g1, 1f\n" | ||
328 | " or %%g1, %%g3, %%g7\n" | ||
329 | " casx [%0], %%g1, %%g7\n" | ||
330 | " membar #StoreLoad | #StoreStore\n" | ||
331 | " ba,pt %%xcc, 2f\n" | ||
332 | " sub %%g1, %%g7, %0\n" | ||
333 | "1: mov 1, %0\n" | ||
334 | "2:" : "=r" (val) | ||
335 | : "0" (&(rw->lock)) | ||
336 | : "g3", "g1", "g7", "memory"); | ||
337 | |||
338 | if (val) { | ||
339 | put_cpu(); | ||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | if ((rw->lock & ((1UL<<63)-1UL)) != 0UL) { | ||
344 | /* Readers still around, drop the write | ||
345 | * lock, return failure. | ||
346 | */ | ||
347 | __asm__ __volatile__( | ||
348 | " mov 1, %%g3\n" | ||
349 | " sllx %%g3, 63, %%g3\n" | ||
350 | "1: ldx [%0], %%g1\n" | ||
351 | " andn %%g1, %%g3, %%g7\n" | ||
352 | " casx [%0], %%g1, %%g7\n" | ||
353 | " cmp %%g1, %%g7\n" | ||
354 | " bne,pn %%xcc, 1b\n" | ||
355 | " membar #StoreLoad | #StoreStore" | ||
356 | : /* no outputs */ | ||
357 | : "r" (&(rw->lock)) | ||
358 | : "g3", "g1", "g7", "cc", "memory"); | ||
359 | |||
360 | put_cpu(); | ||
361 | |||
362 | return 0; | ||
363 | } | ||
364 | |||
365 | /* We have it, say who we are. */ | ||
366 | rw->writer_pc = ((unsigned int)caller); | ||
367 | rw->writer_cpu = cpu; | ||
368 | current->thread.smp_lock_count++; | ||
369 | current->thread.smp_lock_pc = ((unsigned int)caller); | ||
370 | |||
371 | put_cpu(); | ||
372 | |||
373 | return 1; | ||
374 | } | ||
375 | |||
376 | #endif /* CONFIG_SMP */ | ||
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S new file mode 100644 index 000000000000..7e6fdaebedba --- /dev/null +++ b/arch/sparc64/lib/dec_and_lock.S | |||
@@ -0,0 +1,78 @@ | |||
1 | /* $Id: dec_and_lock.S,v 1.5 2001/11/18 00:12:56 davem Exp $ | ||
2 | * dec_and_lock.S: Sparc64 version of "atomic_dec_and_lock()" | ||
3 | * using cas and ldstub instructions. | ||
4 | * | ||
5 | * Copyright (C) 2000 David S. Miller (davem@redhat.com) | ||
6 | */ | ||
7 | #include <linux/config.h> | ||
8 | #include <asm/thread_info.h> | ||
9 | |||
10 | .text | ||
11 | .align 64 | ||
12 | |||
13 | /* CAS basically works like this: | ||
14 | * | ||
15 | * void CAS(MEM, REG1, REG2) | ||
16 | * { | ||
17 | * START_ATOMIC(); | ||
18 | * if (*(MEM) == REG1) { | ||
19 | * TMP = *(MEM); | ||
20 | * *(MEM) = REG2; | ||
21 | * REG2 = TMP; | ||
22 | * } else | ||
23 | * REG2 = *(MEM); | ||
24 | * END_ATOMIC(); | ||
25 | * } | ||
26 | */ | ||
27 | |||
28 | .globl _atomic_dec_and_lock | ||
29 | _atomic_dec_and_lock: /* %o0 = counter, %o1 = lock */ | ||
30 | loop1: lduw [%o0], %g2 | ||
31 | subcc %g2, 1, %g7 | ||
32 | be,pn %icc, start_to_zero | ||
33 | nop | ||
34 | nzero: cas [%o0], %g2, %g7 | ||
35 | cmp %g2, %g7 | ||
36 | bne,pn %icc, loop1 | ||
37 | mov 0, %g1 | ||
38 | |||
39 | out: | ||
40 | membar #StoreLoad | #StoreStore | ||
41 | retl | ||
42 | mov %g1, %o0 | ||
43 | start_to_zero: | ||
44 | #ifdef CONFIG_PREEMPT | ||
45 | ldsw [%g6 + TI_PRE_COUNT], %g3 | ||
46 | add %g3, 1, %g3 | ||
47 | stw %g3, [%g6 + TI_PRE_COUNT] | ||
48 | #endif | ||
49 | to_zero: | ||
50 | ldstub [%o1], %g3 | ||
51 | brnz,pn %g3, spin_on_lock | ||
52 | membar #StoreLoad | #StoreStore | ||
53 | loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ | ||
54 | cmp %g2, %g7 | ||
55 | |||
56 | be,pt %icc, out | ||
57 | mov 1, %g1 | ||
58 | lduw [%o0], %g2 | ||
59 | subcc %g2, 1, %g7 | ||
60 | be,pn %icc, loop2 | ||
61 | nop | ||
62 | membar #StoreStore | #LoadStore | ||
63 | stb %g0, [%o1] | ||
64 | #ifdef CONFIG_PREEMPT | ||
65 | ldsw [%g6 + TI_PRE_COUNT], %g3 | ||
66 | sub %g3, 1, %g3 | ||
67 | stw %g3, [%g6 + TI_PRE_COUNT] | ||
68 | #endif | ||
69 | |||
70 | b,pt %xcc, nzero | ||
71 | nop | ||
72 | spin_on_lock: | ||
73 | ldub [%o1], %g3 | ||
74 | brnz,pt %g3, spin_on_lock | ||
75 | membar #LoadLoad | ||
76 | ba,pt %xcc, to_zero | ||
77 | nop | ||
78 | nop | ||
diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c new file mode 100644 index 000000000000..f6b4c784d53e --- /dev/null +++ b/arch/sparc64/lib/delay.c | |||
@@ -0,0 +1,49 @@ | |||
1 | /* delay.c: Delay loops for sparc64 | ||
2 | * | ||
3 | * Copyright (C) 2004 David S. Miller <davem@redhat.com> | ||
4 | * | ||
5 | * Based heavily upon x86 variant which is: | ||
6 | * Copyright (C) 1993 Linus Torvalds | ||
7 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
8 | */ | ||
9 | |||
10 | #include <linux/delay.h> | ||
11 | |||
12 | void __delay(unsigned long loops) | ||
13 | { | ||
14 | __asm__ __volatile__( | ||
15 | " b,pt %%xcc, 1f\n" | ||
16 | " cmp %0, 0\n" | ||
17 | " .align 32\n" | ||
18 | "1:\n" | ||
19 | " bne,pt %%xcc, 1b\n" | ||
20 | " subcc %0, 1, %0\n" | ||
21 | : "=&r" (loops) | ||
22 | : "0" (loops) | ||
23 | : "cc"); | ||
24 | } | ||
25 | |||
26 | /* We used to multiply by HZ after shifting down by 32 bits | ||
27 | * but that runs into problems for higher values of HZ and | ||
28 | * slow cpus. | ||
29 | */ | ||
30 | void __const_udelay(unsigned long n) | ||
31 | { | ||
32 | n *= 4; | ||
33 | |||
34 | n *= (cpu_data(_smp_processor_id()).udelay_val * (HZ/4)); | ||
35 | n >>= 32; | ||
36 | |||
37 | __delay(n + 1); | ||
38 | } | ||
39 | |||
40 | void __udelay(unsigned long n) | ||
41 | { | ||
42 | __const_udelay(n * 0x10c7UL); | ||
43 | } | ||
44 | |||
45 | |||
46 | void __ndelay(unsigned long n) | ||
47 | { | ||
48 | __const_udelay(n * 0x5UL); | ||
49 | } | ||
diff --git a/arch/sparc64/lib/find_bit.c b/arch/sparc64/lib/find_bit.c new file mode 100644 index 000000000000..6059557067b4 --- /dev/null +++ b/arch/sparc64/lib/find_bit.c | |||
@@ -0,0 +1,127 @@ | |||
1 | #include <linux/bitops.h> | ||
2 | |||
3 | /** | ||
4 | * find_next_bit - find the next set bit in a memory region | ||
5 | * @addr: The address to base the search on | ||
6 | * @offset: The bitnumber to start searching at | ||
7 | * @size: The maximum size to search | ||
8 | */ | ||
9 | unsigned long find_next_bit(const unsigned long *addr, unsigned long size, | ||
10 | unsigned long offset) | ||
11 | { | ||
12 | const unsigned long *p = addr + (offset >> 6); | ||
13 | unsigned long result = offset & ~63UL; | ||
14 | unsigned long tmp; | ||
15 | |||
16 | if (offset >= size) | ||
17 | return size; | ||
18 | size -= result; | ||
19 | offset &= 63UL; | ||
20 | if (offset) { | ||
21 | tmp = *(p++); | ||
22 | tmp &= (~0UL << offset); | ||
23 | if (size < 64) | ||
24 | goto found_first; | ||
25 | if (tmp) | ||
26 | goto found_middle; | ||
27 | size -= 64; | ||
28 | result += 64; | ||
29 | } | ||
30 | while (size & ~63UL) { | ||
31 | if ((tmp = *(p++))) | ||
32 | goto found_middle; | ||
33 | result += 64; | ||
34 | size -= 64; | ||
35 | } | ||
36 | if (!size) | ||
37 | return result; | ||
38 | tmp = *p; | ||
39 | |||
40 | found_first: | ||
41 | tmp &= (~0UL >> (64 - size)); | ||
42 | if (tmp == 0UL) /* Are any bits set? */ | ||
43 | return result + size; /* Nope. */ | ||
44 | found_middle: | ||
45 | return result + __ffs(tmp); | ||
46 | } | ||
47 | |||
48 | /* find_next_zero_bit() finds the first zero bit in a bit string of length | ||
49 | * 'size' bits, starting the search at bit 'offset'. This is largely based | ||
50 | * on Linus's ALPHA routines, which are pretty portable BTW. | ||
51 | */ | ||
52 | |||
53 | unsigned long find_next_zero_bit(const unsigned long *addr, | ||
54 | unsigned long size, unsigned long offset) | ||
55 | { | ||
56 | const unsigned long *p = addr + (offset >> 6); | ||
57 | unsigned long result = offset & ~63UL; | ||
58 | unsigned long tmp; | ||
59 | |||
60 | if (offset >= size) | ||
61 | return size; | ||
62 | size -= result; | ||
63 | offset &= 63UL; | ||
64 | if (offset) { | ||
65 | tmp = *(p++); | ||
66 | tmp |= ~0UL >> (64-offset); | ||
67 | if (size < 64) | ||
68 | goto found_first; | ||
69 | if (~tmp) | ||
70 | goto found_middle; | ||
71 | size -= 64; | ||
72 | result += 64; | ||
73 | } | ||
74 | while (size & ~63UL) { | ||
75 | if (~(tmp = *(p++))) | ||
76 | goto found_middle; | ||
77 | result += 64; | ||
78 | size -= 64; | ||
79 | } | ||
80 | if (!size) | ||
81 | return result; | ||
82 | tmp = *p; | ||
83 | |||
84 | found_first: | ||
85 | tmp |= ~0UL << size; | ||
86 | if (tmp == ~0UL) /* Are any bits zero? */ | ||
87 | return result + size; /* Nope. */ | ||
88 | found_middle: | ||
89 | return result + ffz(tmp); | ||
90 | } | ||
91 | |||
92 | unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset) | ||
93 | { | ||
94 | unsigned long *p = addr + (offset >> 6); | ||
95 | unsigned long result = offset & ~63UL; | ||
96 | unsigned long tmp; | ||
97 | |||
98 | if (offset >= size) | ||
99 | return size; | ||
100 | size -= result; | ||
101 | offset &= 63UL; | ||
102 | if(offset) { | ||
103 | tmp = __swab64p(p++); | ||
104 | tmp |= (~0UL >> (64-offset)); | ||
105 | if(size < 64) | ||
106 | goto found_first; | ||
107 | if(~tmp) | ||
108 | goto found_middle; | ||
109 | size -= 64; | ||
110 | result += 64; | ||
111 | } | ||
112 | while(size & ~63) { | ||
113 | if(~(tmp = __swab64p(p++))) | ||
114 | goto found_middle; | ||
115 | result += 64; | ||
116 | size -= 64; | ||
117 | } | ||
118 | if(!size) | ||
119 | return result; | ||
120 | tmp = __swab64p(p); | ||
121 | found_first: | ||
122 | tmp |= (~0UL << size); | ||
123 | if (tmp == ~0UL) /* Are any bits zero? */ | ||
124 | return result + size; /* Nope. */ | ||
125 | found_middle: | ||
126 | return result + ffz(tmp); | ||
127 | } | ||
diff --git a/arch/sparc64/lib/iomap.c b/arch/sparc64/lib/iomap.c new file mode 100644 index 000000000000..ac556db06973 --- /dev/null +++ b/arch/sparc64/lib/iomap.c | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * Implement the sparc64 iomap interfaces | ||
3 | */ | ||
4 | #include <linux/pci.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <asm/io.h> | ||
7 | |||
8 | /* Create a virtual mapping cookie for an IO port range */ | ||
9 | void __iomem *ioport_map(unsigned long port, unsigned int nr) | ||
10 | { | ||
11 | return (void __iomem *) (unsigned long) port; | ||
12 | } | ||
13 | |||
14 | void ioport_unmap(void __iomem *addr) | ||
15 | { | ||
16 | /* Nothing to do */ | ||
17 | } | ||
18 | EXPORT_SYMBOL(ioport_map); | ||
19 | EXPORT_SYMBOL(ioport_unmap); | ||
20 | |||
21 | /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ | ||
22 | void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) | ||
23 | { | ||
24 | unsigned long start = pci_resource_start(dev, bar); | ||
25 | unsigned long len = pci_resource_len(dev, bar); | ||
26 | unsigned long flags = pci_resource_flags(dev, bar); | ||
27 | |||
28 | if (!len || !start) | ||
29 | return NULL; | ||
30 | if (maxlen && len > maxlen) | ||
31 | len = maxlen; | ||
32 | if (flags & IORESOURCE_IO) | ||
33 | return ioport_map(start, len); | ||
34 | if (flags & IORESOURCE_MEM) { | ||
35 | if (flags & IORESOURCE_CACHEABLE) | ||
36 | return ioremap(start, len); | ||
37 | return ioremap_nocache(start, len); | ||
38 | } | ||
39 | /* What? */ | ||
40 | return NULL; | ||
41 | } | ||
42 | |||
43 | void pci_iounmap(struct pci_dev *dev, void __iomem * addr) | ||
44 | { | ||
45 | /* nothing to do */ | ||
46 | } | ||
47 | EXPORT_SYMBOL(pci_iomap); | ||
48 | EXPORT_SYMBOL(pci_iounmap); | ||
diff --git a/arch/sparc64/lib/ipcsum.S b/arch/sparc64/lib/ipcsum.S new file mode 100644 index 000000000000..58ca5b9a8778 --- /dev/null +++ b/arch/sparc64/lib/ipcsum.S | |||
@@ -0,0 +1,34 @@ | |||
1 | .text | ||
2 | .align 32 | ||
3 | .globl ip_fast_csum | ||
4 | .type ip_fast_csum,#function | ||
5 | ip_fast_csum: /* %o0 = iph, %o1 = ihl */ | ||
6 | sub %o1, 4, %g7 | ||
7 | lduw [%o0 + 0x00], %o2 | ||
8 | lduw [%o0 + 0x04], %g2 | ||
9 | lduw [%o0 + 0x08], %g3 | ||
10 | addcc %g2, %o2, %o2 | ||
11 | lduw [%o0 + 0x0c], %g2 | ||
12 | addccc %g3, %o2, %o2 | ||
13 | lduw [%o0 + 0x10], %g3 | ||
14 | |||
15 | addccc %g2, %o2, %o2 | ||
16 | addc %o2, %g0, %o2 | ||
17 | 1: addcc %g3, %o2, %o2 | ||
18 | add %o0, 4, %o0 | ||
19 | addccc %o2, %g0, %o2 | ||
20 | subcc %g7, 1, %g7 | ||
21 | be,a,pt %icc, 2f | ||
22 | sll %o2, 16, %g2 | ||
23 | |||
24 | lduw [%o0 + 0x10], %g3 | ||
25 | ba,pt %xcc, 1b | ||
26 | nop | ||
27 | 2: addcc %o2, %g2, %g2 | ||
28 | srl %g2, 16, %o2 | ||
29 | addc %o2, %g0, %o2 | ||
30 | xnor %g0, %o2, %o2 | ||
31 | set 0xffff, %o1 | ||
32 | retl | ||
33 | and %o2, %o1, %o0 | ||
34 | .size ip_fast_csum, .-ip_fast_csum | ||
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S new file mode 100644 index 000000000000..2ef2e268bdcf --- /dev/null +++ b/arch/sparc64/lib/mcount.S | |||
@@ -0,0 +1,61 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com) | ||
3 | * | ||
4 | * This file implements mcount(), which is used to collect profiling data. | ||
5 | * This can also be tweaked for kernel stack overflow detection. | ||
6 | */ | ||
7 | |||
8 | #include <linux/config.h> | ||
9 | #include <linux/linkage.h> | ||
10 | |||
11 | #include <asm/ptrace.h> | ||
12 | #include <asm/thread_info.h> | ||
13 | |||
14 | /* | ||
15 | * This is the main variant and is called by C code. GCC's -pg option | ||
16 | * automatically instruments every C function with a call to this. | ||
17 | */ | ||
18 | |||
19 | #ifdef CONFIG_STACK_DEBUG | ||
20 | |||
21 | #define OVSTACKSIZE 4096 /* lets hope this is enough */ | ||
22 | |||
23 | .data | ||
24 | .align 8 | ||
25 | panicstring: | ||
26 | .asciz "Stack overflow\n" | ||
27 | .align 8 | ||
28 | ovstack: | ||
29 | .skip OVSTACKSIZE | ||
30 | #endif | ||
31 | .text | ||
32 | .align 32 | ||
33 | .globl mcount, _mcount | ||
34 | mcount: | ||
35 | _mcount: | ||
36 | #ifdef CONFIG_STACK_DEBUG | ||
37 | /* | ||
38 | * Check whether %sp is dangerously low. | ||
39 | */ | ||
40 | ldub [%g6 + TI_FPDEPTH], %g1 | ||
41 | srl %g1, 1, %g3 | ||
42 | add %g3, 1, %g3 | ||
43 | sllx %g3, 8, %g3 ! each fpregs frame is 256b | ||
44 | add %g3, 192, %g3 | ||
45 | add %g6, %g3, %g3 ! where does task_struct+frame end? | ||
46 | sub %g3, STACK_BIAS, %g3 | ||
47 | cmp %sp, %g3 | ||
48 | bg,pt %xcc, 1f | ||
49 | sethi %hi(panicstring), %g3 | ||
50 | sethi %hi(ovstack), %g7 ! cant move to panic stack fast enough | ||
51 | or %g7, %lo(ovstack), %g7 | ||
52 | add %g7, OVSTACKSIZE, %g7 | ||
53 | sub %g7, STACK_BIAS, %g7 | ||
54 | mov %g7, %sp | ||
55 | call prom_printf | ||
56 | or %g3, %lo(panicstring), %o0 | ||
57 | call prom_halt | ||
58 | nop | ||
59 | #endif | ||
60 | 1: retl | ||
61 | nop | ||
diff --git a/arch/sparc64/lib/memcmp.S b/arch/sparc64/lib/memcmp.S new file mode 100644 index 000000000000..c90ad96c51b9 --- /dev/null +++ b/arch/sparc64/lib/memcmp.S | |||
@@ -0,0 +1,28 @@ | |||
1 | /* $Id: memcmp.S,v 1.3 2000/03/23 07:51:08 davem Exp $ | ||
2 | * Sparc64 optimized memcmp code. | ||
3 | * | ||
4 | * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
5 | * Copyright (C) 2000 David S. Miller (davem@redhat.com) | ||
6 | */ | ||
7 | |||
8 | .text | ||
9 | .align 32 | ||
10 | .globl __memcmp, memcmp | ||
11 | __memcmp: | ||
12 | memcmp: | ||
13 | cmp %o2, 0 ! IEU1 Group | ||
14 | loop: be,pn %icc, ret_0 ! CTI | ||
15 | nop ! IEU0 | ||
16 | ldub [%o0], %g7 ! LSU Group | ||
17 | ldub [%o1], %g3 ! LSU Group | ||
18 | sub %o2, 1, %o2 ! IEU0 | ||
19 | add %o0, 1, %o0 ! IEU1 | ||
20 | add %o1, 1, %o1 ! IEU0 Group | ||
21 | subcc %g7, %g3, %g3 ! IEU1 Group | ||
22 | be,pt %icc, loop ! CTI | ||
23 | cmp %o2, 0 ! IEU1 Group | ||
24 | |||
25 | ret_n0: retl | ||
26 | mov %g3, %o0 | ||
27 | ret_0: retl | ||
28 | mov 0, %o0 | ||
diff --git a/arch/sparc64/lib/memmove.S b/arch/sparc64/lib/memmove.S new file mode 100644 index 000000000000..97395802c23c --- /dev/null +++ b/arch/sparc64/lib/memmove.S | |||
@@ -0,0 +1,31 @@ | |||
1 | /* memmove.S: Simple memmove implementation. | ||
2 | * | ||
3 | * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) | ||
4 | * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) | ||
5 | */ | ||
6 | |||
7 | .text | ||
8 | .align 32 | ||
9 | .globl memmove | ||
10 | .type memmove,#function | ||
11 | memmove: /* o0=dst o1=src o2=len */ | ||
12 | mov %o0, %g1 | ||
13 | cmp %o0, %o1 | ||
14 | bleu,pt %xcc, memcpy | ||
15 | add %o1, %o2, %g7 | ||
16 | cmp %g7, %o0 | ||
17 | bleu,pt %xcc, memcpy | ||
18 | add %o0, %o2, %o5 | ||
19 | sub %g7, 1, %o1 | ||
20 | |||
21 | sub %o5, 1, %o0 | ||
22 | 1: ldub [%o1], %g7 | ||
23 | subcc %o2, 1, %o2 | ||
24 | sub %o1, 1, %o1 | ||
25 | stb %g7, [%o0] | ||
26 | bne,pt %icc, 1b | ||
27 | sub %o0, 1, %o0 | ||
28 | |||
29 | retl | ||
30 | mov %g1, %o0 | ||
31 | .size memmove, .-memmove | ||
diff --git a/arch/sparc64/lib/memscan.S b/arch/sparc64/lib/memscan.S new file mode 100644 index 000000000000..5e72d4911417 --- /dev/null +++ b/arch/sparc64/lib/memscan.S | |||
@@ -0,0 +1,129 @@ | |||
1 | /* $Id: memscan.S,v 1.3 2000/01/31 04:59:10 davem Exp $ | ||
2 | * memscan.S: Optimized memscan for Sparc64. | ||
3 | * | ||
4 | * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) | ||
5 | * Copyright (C) 1998 David S. Miller (davem@redhat.com) | ||
6 | */ | ||
7 | |||
8 | #define HI_MAGIC 0x8080808080808080 | ||
9 | #define LO_MAGIC 0x0101010101010101 | ||
10 | #define ASI_PL 0x88 | ||
11 | |||
12 | .text | ||
13 | .align 32 | ||
14 | .globl __memscan_zero, __memscan_generic | ||
15 | .globl memscan | ||
16 | |||
17 | __memscan_zero: | ||
18 | /* %o0 = bufp, %o1 = size */ | ||
19 | brlez,pn %o1, szzero | ||
20 | andcc %o0, 7, %g0 | ||
21 | be,pt %icc, we_are_aligned | ||
22 | sethi %hi(HI_MAGIC), %o4 | ||
23 | ldub [%o0], %o5 | ||
24 | 1: subcc %o1, 1, %o1 | ||
25 | brz,pn %o5, 10f | ||
26 | add %o0, 1, %o0 | ||
27 | |||
28 | be,pn %xcc, szzero | ||
29 | andcc %o0, 7, %g0 | ||
30 | bne,a,pn %icc, 1b | ||
31 | ldub [%o0], %o5 | ||
32 | we_are_aligned: | ||
33 | ldxa [%o0] ASI_PL, %o5 | ||
34 | or %o4, %lo(HI_MAGIC), %o3 | ||
35 | sllx %o3, 32, %o4 | ||
36 | or %o4, %o3, %o3 | ||
37 | |||
38 | srlx %o3, 7, %o2 | ||
39 | msloop: | ||
40 | sub %o1, 8, %o1 | ||
41 | add %o0, 8, %o0 | ||
42 | sub %o5, %o2, %o4 | ||
43 | xor %o4, %o5, %o4 | ||
44 | andcc %o4, %o3, %g3 | ||
45 | bne,pn %xcc, check_bytes | ||
46 | srlx %o4, 32, %g3 | ||
47 | |||
48 | brgz,a,pt %o1, msloop | ||
49 | ldxa [%o0] ASI_PL, %o5 | ||
50 | check_bytes: | ||
51 | bne,a,pn %icc, 2f | ||
52 | andcc %o5, 0xff, %g0 | ||
53 | add %o0, -5, %g2 | ||
54 | ba,pt %xcc, 3f | ||
55 | srlx %o5, 32, %g7 | ||
56 | |||
57 | 2: srlx %o5, 8, %g7 | ||
58 | be,pn %icc, 1f | ||
59 | add %o0, -8, %g2 | ||
60 | andcc %g7, 0xff, %g0 | ||
61 | srlx %g7, 8, %g7 | ||
62 | be,pn %icc, 1f | ||
63 | inc %g2 | ||
64 | andcc %g7, 0xff, %g0 | ||
65 | |||
66 | srlx %g7, 8, %g7 | ||
67 | be,pn %icc, 1f | ||
68 | inc %g2 | ||
69 | andcc %g7, 0xff, %g0 | ||
70 | srlx %g7, 8, %g7 | ||
71 | be,pn %icc, 1f | ||
72 | inc %g2 | ||
73 | andcc %g3, %o3, %g0 | ||
74 | |||
75 | be,a,pn %icc, 2f | ||
76 | mov %o0, %g2 | ||
77 | 3: andcc %g7, 0xff, %g0 | ||
78 | srlx %g7, 8, %g7 | ||
79 | be,pn %icc, 1f | ||
80 | inc %g2 | ||
81 | andcc %g7, 0xff, %g0 | ||
82 | srlx %g7, 8, %g7 | ||
83 | |||
84 | be,pn %icc, 1f | ||
85 | inc %g2 | ||
86 | andcc %g7, 0xff, %g0 | ||
87 | srlx %g7, 8, %g7 | ||
88 | be,pn %icc, 1f | ||
89 | inc %g2 | ||
90 | andcc %g7, 0xff, %g0 | ||
91 | srlx %g7, 8, %g7 | ||
92 | |||
93 | be,pn %icc, 1f | ||
94 | inc %g2 | ||
95 | 2: brgz,a,pt %o1, msloop | ||
96 | ldxa [%o0] ASI_PL, %o5 | ||
97 | inc %g2 | ||
98 | 1: add %o0, %o1, %o0 | ||
99 | cmp %g2, %o0 | ||
100 | retl | ||
101 | |||
102 | movle %xcc, %g2, %o0 | ||
103 | 10: retl | ||
104 | sub %o0, 1, %o0 | ||
105 | szzero: retl | ||
106 | nop | ||
107 | |||
108 | memscan: | ||
109 | __memscan_generic: | ||
110 | /* %o0 = addr, %o1 = c, %o2 = size */ | ||
111 | brz,pn %o2, 3f | ||
112 | add %o0, %o2, %o3 | ||
113 | ldub [%o0], %o5 | ||
114 | sub %g0, %o2, %o4 | ||
115 | 1: | ||
116 | cmp %o5, %o1 | ||
117 | be,pn %icc, 2f | ||
118 | addcc %o4, 1, %o4 | ||
119 | bne,a,pt %xcc, 1b | ||
120 | ldub [%o3 + %o4], %o5 | ||
121 | retl | ||
122 | /* The delay slot is the same as the next insn, this is just to make it look more awful */ | ||
123 | 2: | ||
124 | add %o3, %o4, %o0 | ||
125 | retl | ||
126 | sub %o0, 1, %o0 | ||
127 | 3: | ||
128 | retl | ||
129 | nop | ||
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S new file mode 100644 index 000000000000..174ff7b9164c --- /dev/null +++ b/arch/sparc64/lib/rwsem.S | |||
@@ -0,0 +1,165 @@ | |||
1 | /* rwsem.S: RW semaphore assembler. | ||
2 | * | ||
3 | * Written by David S. Miller (davem@redhat.com), 2001. | ||
4 | * Derived from asm-i386/rwsem.h | ||
5 | */ | ||
6 | |||
7 | #include <asm/rwsem-const.h> | ||
8 | |||
9 | .section .sched.text | ||
10 | |||
11 | .globl __down_read | ||
12 | __down_read: | ||
13 | 1: lduw [%o0], %g1 | ||
14 | add %g1, 1, %g7 | ||
15 | cas [%o0], %g1, %g7 | ||
16 | cmp %g1, %g7 | ||
17 | bne,pn %icc, 1b | ||
18 | add %g7, 1, %g7 | ||
19 | cmp %g7, 0 | ||
20 | bl,pn %icc, 3f | ||
21 | membar #StoreLoad | #StoreStore | ||
22 | 2: | ||
23 | retl | ||
24 | nop | ||
25 | 3: | ||
26 | save %sp, -192, %sp | ||
27 | call rwsem_down_read_failed | ||
28 | mov %i0, %o0 | ||
29 | ret | ||
30 | restore | ||
31 | .size __down_read, .-__down_read | ||
32 | |||
33 | .globl __down_read_trylock | ||
34 | __down_read_trylock: | ||
35 | 1: lduw [%o0], %g1 | ||
36 | add %g1, 1, %g7 | ||
37 | cmp %g7, 0 | ||
38 | bl,pn %icc, 2f | ||
39 | mov 0, %o1 | ||
40 | cas [%o0], %g1, %g7 | ||
41 | cmp %g1, %g7 | ||
42 | bne,pn %icc, 1b | ||
43 | mov 1, %o1 | ||
44 | membar #StoreLoad | #StoreStore | ||
45 | 2: retl | ||
46 | mov %o1, %o0 | ||
47 | .size __down_read_trylock, .-__down_read_trylock | ||
48 | |||
49 | .globl __down_write | ||
50 | __down_write: | ||
51 | sethi %hi(RWSEM_ACTIVE_WRITE_BIAS), %g1 | ||
52 | or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 | ||
53 | 1: | ||
54 | lduw [%o0], %g3 | ||
55 | add %g3, %g1, %g7 | ||
56 | cas [%o0], %g3, %g7 | ||
57 | cmp %g3, %g7 | ||
58 | bne,pn %icc, 1b | ||
59 | cmp %g7, 0 | ||
60 | bne,pn %icc, 3f | ||
61 | membar #StoreLoad | #StoreStore | ||
62 | 2: retl | ||
63 | nop | ||
64 | 3: | ||
65 | save %sp, -192, %sp | ||
66 | call rwsem_down_write_failed | ||
67 | mov %i0, %o0 | ||
68 | ret | ||
69 | restore | ||
70 | .size __down_write, .-__down_write | ||
71 | |||
72 | .globl __down_write_trylock | ||
73 | __down_write_trylock: | ||
74 | sethi %hi(RWSEM_ACTIVE_WRITE_BIAS), %g1 | ||
75 | or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 | ||
76 | 1: | ||
77 | lduw [%o0], %g3 | ||
78 | cmp %g3, 0 | ||
79 | bne,pn %icc, 2f | ||
80 | mov 0, %o1 | ||
81 | add %g3, %g1, %g7 | ||
82 | cas [%o0], %g3, %g7 | ||
83 | cmp %g3, %g7 | ||
84 | bne,pn %icc, 1b | ||
85 | mov 1, %o1 | ||
86 | membar #StoreLoad | #StoreStore | ||
87 | 2: retl | ||
88 | mov %o1, %o0 | ||
89 | .size __down_write_trylock, .-__down_write_trylock | ||
90 | |||
91 | .globl __up_read | ||
92 | __up_read: | ||
93 | 1: | ||
94 | lduw [%o0], %g1 | ||
95 | sub %g1, 1, %g7 | ||
96 | cas [%o0], %g1, %g7 | ||
97 | cmp %g1, %g7 | ||
98 | bne,pn %icc, 1b | ||
99 | cmp %g7, 0 | ||
100 | bl,pn %icc, 3f | ||
101 | membar #StoreLoad | #StoreStore | ||
102 | 2: retl | ||
103 | nop | ||
104 | 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 | ||
105 | sub %g7, 1, %g7 | ||
106 | or %g1, %lo(RWSEM_ACTIVE_MASK), %g1 | ||
107 | andcc %g7, %g1, %g0 | ||
108 | bne,pn %icc, 2b | ||
109 | nop | ||
110 | save %sp, -192, %sp | ||
111 | call rwsem_wake | ||
112 | mov %i0, %o0 | ||
113 | ret | ||
114 | restore | ||
115 | .size __up_read, .-__up_read | ||
116 | |||
117 | .globl __up_write | ||
118 | __up_write: | ||
119 | sethi %hi(RWSEM_ACTIVE_WRITE_BIAS), %g1 | ||
120 | or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 | ||
121 | 1: | ||
122 | lduw [%o0], %g3 | ||
123 | sub %g3, %g1, %g7 | ||
124 | cas [%o0], %g3, %g7 | ||
125 | cmp %g3, %g7 | ||
126 | bne,pn %icc, 1b | ||
127 | sub %g7, %g1, %g7 | ||
128 | cmp %g7, 0 | ||
129 | bl,pn %icc, 3f | ||
130 | membar #StoreLoad | #StoreStore | ||
131 | 2: | ||
132 | retl | ||
133 | nop | ||
134 | 3: | ||
135 | save %sp, -192, %sp | ||
136 | call rwsem_wake | ||
137 | mov %i0, %o0 | ||
138 | ret | ||
139 | restore | ||
140 | .size __up_write, .-__up_write | ||
141 | |||
142 | .globl __downgrade_write | ||
143 | __downgrade_write: | ||
144 | sethi %hi(RWSEM_WAITING_BIAS), %g1 | ||
145 | or %g1, %lo(RWSEM_WAITING_BIAS), %g1 | ||
146 | 1: | ||
147 | lduw [%o0], %g3 | ||
148 | sub %g3, %g1, %g7 | ||
149 | cas [%o0], %g3, %g7 | ||
150 | cmp %g3, %g7 | ||
151 | bne,pn %icc, 1b | ||
152 | sub %g7, %g1, %g7 | ||
153 | cmp %g7, 0 | ||
154 | bl,pn %icc, 3f | ||
155 | membar #StoreLoad | #StoreStore | ||
156 | 2: | ||
157 | retl | ||
158 | nop | ||
159 | 3: | ||
160 | save %sp, -192, %sp | ||
161 | call rwsem_downgrade_wake | ||
162 | mov %i0, %o0 | ||
163 | ret | ||
164 | restore | ||
165 | .size __downgrade_write, .-__downgrade_write | ||
diff --git a/arch/sparc64/lib/strlen.S b/arch/sparc64/lib/strlen.S new file mode 100644 index 000000000000..e9ba1920d818 --- /dev/null +++ b/arch/sparc64/lib/strlen.S | |||
@@ -0,0 +1,80 @@ | |||
1 | /* strlen.S: Sparc64 optimized strlen code | ||
2 | * Hand optimized from GNU libc's strlen | ||
3 | * Copyright (C) 1991,1996 Free Software Foundation | ||
4 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | ||
5 | * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
6 | */ | ||
7 | |||
8 | #define LO_MAGIC 0x01010101 | ||
9 | #define HI_MAGIC 0x80808080 | ||
10 | |||
11 | .align 32 | ||
12 | .globl strlen | ||
13 | .type strlen,#function | ||
14 | strlen: | ||
15 | mov %o0, %o1 | ||
16 | andcc %o0, 3, %g0 | ||
17 | be,pt %icc, 9f | ||
18 | sethi %hi(HI_MAGIC), %o4 | ||
19 | ldub [%o0], %o5 | ||
20 | brz,pn %o5, 11f | ||
21 | add %o0, 1, %o0 | ||
22 | andcc %o0, 3, %g0 | ||
23 | be,pn %icc, 4f | ||
24 | or %o4, %lo(HI_MAGIC), %o3 | ||
25 | ldub [%o0], %o5 | ||
26 | brz,pn %o5, 12f | ||
27 | add %o0, 1, %o0 | ||
28 | andcc %o0, 3, %g0 | ||
29 | be,pt %icc, 5f | ||
30 | sethi %hi(LO_MAGIC), %o4 | ||
31 | ldub [%o0], %o5 | ||
32 | brz,pn %o5, 13f | ||
33 | add %o0, 1, %o0 | ||
34 | ba,pt %icc, 8f | ||
35 | or %o4, %lo(LO_MAGIC), %o2 | ||
36 | 9: | ||
37 | or %o4, %lo(HI_MAGIC), %o3 | ||
38 | 4: | ||
39 | sethi %hi(LO_MAGIC), %o4 | ||
40 | 5: | ||
41 | or %o4, %lo(LO_MAGIC), %o2 | ||
42 | 8: | ||
43 | ld [%o0], %o5 | ||
44 | 2: | ||
45 | sub %o5, %o2, %o4 | ||
46 | andcc %o4, %o3, %g0 | ||
47 | be,pt %icc, 8b | ||
48 | add %o0, 4, %o0 | ||
49 | |||
50 | /* Check every byte. */ | ||
51 | srl %o5, 24, %g7 | ||
52 | andcc %g7, 0xff, %g0 | ||
53 | be,pn %icc, 1f | ||
54 | add %o0, -4, %o4 | ||
55 | srl %o5, 16, %g7 | ||
56 | andcc %g7, 0xff, %g0 | ||
57 | be,pn %icc, 1f | ||
58 | add %o4, 1, %o4 | ||
59 | srl %o5, 8, %g7 | ||
60 | andcc %g7, 0xff, %g0 | ||
61 | be,pn %icc, 1f | ||
62 | add %o4, 1, %o4 | ||
63 | andcc %o5, 0xff, %g0 | ||
64 | bne,a,pt %icc, 2b | ||
65 | ld [%o0], %o5 | ||
66 | add %o4, 1, %o4 | ||
67 | 1: | ||
68 | retl | ||
69 | sub %o4, %o1, %o0 | ||
70 | 11: | ||
71 | retl | ||
72 | mov 0, %o0 | ||
73 | 12: | ||
74 | retl | ||
75 | mov 1, %o0 | ||
76 | 13: | ||
77 | retl | ||
78 | mov 2, %o0 | ||
79 | |||
80 | .size strlen, .-strlen | ||
diff --git a/arch/sparc64/lib/strlen_user.S b/arch/sparc64/lib/strlen_user.S new file mode 100644 index 000000000000..9ed54ba14fc6 --- /dev/null +++ b/arch/sparc64/lib/strlen_user.S | |||
@@ -0,0 +1,95 @@ | |||
1 | /* strlen_user.S: Sparc64 optimized strlen_user code | ||
2 | * | ||
3 | * Return length of string in userspace including terminating 0 | ||
4 | * or 0 for error | ||
5 | * | ||
6 | * Copyright (C) 1991,1996 Free Software Foundation | ||
7 | * Copyright (C) 1996,1999 David S. Miller (davem@redhat.com) | ||
8 | * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
9 | */ | ||
10 | |||
11 | #include <asm/asi.h> | ||
12 | |||
13 | #define LO_MAGIC 0x01010101 | ||
14 | #define HI_MAGIC 0x80808080 | ||
15 | |||
16 | .align 4 | ||
17 | .global __strlen_user, __strnlen_user | ||
18 | __strlen_user: | ||
19 | sethi %hi(32768), %o1 | ||
20 | __strnlen_user: | ||
21 | mov %o1, %g1 | ||
22 | mov %o0, %o1 | ||
23 | andcc %o0, 3, %g0 | ||
24 | be,pt %icc, 9f | ||
25 | sethi %hi(HI_MAGIC), %o4 | ||
26 | 10: lduba [%o0] %asi, %o5 | ||
27 | brz,pn %o5, 21f | ||
28 | add %o0, 1, %o0 | ||
29 | andcc %o0, 3, %g0 | ||
30 | be,pn %icc, 4f | ||
31 | or %o4, %lo(HI_MAGIC), %o3 | ||
32 | 11: lduba [%o0] %asi, %o5 | ||
33 | brz,pn %o5, 22f | ||
34 | add %o0, 1, %o0 | ||
35 | andcc %o0, 3, %g0 | ||
36 | be,pt %icc, 13f | ||
37 | srl %o3, 7, %o2 | ||
38 | 12: lduba [%o0] %asi, %o5 | ||
39 | brz,pn %o5, 23f | ||
40 | add %o0, 1, %o0 | ||
41 | ba,pt %icc, 2f | ||
42 | 15: lda [%o0] %asi, %o5 | ||
43 | 9: or %o4, %lo(HI_MAGIC), %o3 | ||
44 | 4: srl %o3, 7, %o2 | ||
45 | 13: lda [%o0] %asi, %o5 | ||
46 | 2: sub %o5, %o2, %o4 | ||
47 | andcc %o4, %o3, %g0 | ||
48 | bne,pn %icc, 82f | ||
49 | add %o0, 4, %o0 | ||
50 | sub %o0, %o1, %g2 | ||
51 | 81: cmp %g2, %g1 | ||
52 | blu,pt %icc, 13b | ||
53 | mov %o0, %o4 | ||
54 | ba,a,pt %xcc, 1f | ||
55 | |||
56 | /* Check every byte. */ | ||
57 | 82: srl %o5, 24, %g7 | ||
58 | andcc %g7, 0xff, %g0 | ||
59 | be,pn %icc, 1f | ||
60 | add %o0, -3, %o4 | ||
61 | srl %o5, 16, %g7 | ||
62 | andcc %g7, 0xff, %g0 | ||
63 | be,pn %icc, 1f | ||
64 | add %o4, 1, %o4 | ||
65 | srl %o5, 8, %g7 | ||
66 | andcc %g7, 0xff, %g0 | ||
67 | be,pn %icc, 1f | ||
68 | add %o4, 1, %o4 | ||
69 | andcc %o5, 0xff, %g0 | ||
70 | bne,pt %icc, 81b | ||
71 | sub %o0, %o1, %g2 | ||
72 | add %o4, 1, %o4 | ||
73 | 1: retl | ||
74 | sub %o4, %o1, %o0 | ||
75 | 21: retl | ||
76 | mov 1, %o0 | ||
77 | 22: retl | ||
78 | mov 2, %o0 | ||
79 | 23: retl | ||
80 | mov 3, %o0 | ||
81 | |||
82 | .section .fixup,#alloc,#execinstr | ||
83 | .align 4 | ||
84 | 30: | ||
85 | retl | ||
86 | clr %o0 | ||
87 | |||
88 | .section __ex_table,#alloc | ||
89 | .align 4 | ||
90 | |||
91 | .word 10b, 30b | ||
92 | .word 11b, 30b | ||
93 | .word 12b, 30b | ||
94 | .word 15b, 30b | ||
95 | .word 13b, 30b | ||
diff --git a/arch/sparc64/lib/strncmp.S b/arch/sparc64/lib/strncmp.S new file mode 100644 index 000000000000..6f14f53dbabe --- /dev/null +++ b/arch/sparc64/lib/strncmp.S | |||
@@ -0,0 +1,32 @@ | |||
1 | /* $Id: strncmp.S,v 1.2 1997/03/11 17:51:44 jj Exp $ | ||
2 | * Sparc64 optimized strncmp code. | ||
3 | * | ||
4 | * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | ||
5 | */ | ||
6 | |||
7 | #include <asm/asi.h> | ||
8 | |||
9 | .text | ||
10 | .align 32 | ||
11 | .globl strncmp | ||
12 | .type strncmp,#function | ||
13 | strncmp: | ||
14 | brlez,pn %o2, 3f | ||
15 | lduba [%o0] (ASI_PNF), %o3 | ||
16 | 1: | ||
17 | add %o0, 1, %o0 | ||
18 | ldub [%o1], %o4 | ||
19 | brz,pn %o3, 2f | ||
20 | add %o1, 1, %o1 | ||
21 | cmp %o3, %o4 | ||
22 | bne,pn %icc, 2f | ||
23 | subcc %o2, 1, %o2 | ||
24 | bne,a,pt %xcc, 1b | ||
25 | ldub [%o0], %o3 | ||
26 | 2: | ||
27 | retl | ||
28 | sub %o3, %o4, %o0 | ||
29 | 3: | ||
30 | retl | ||
31 | clr %o0 | ||
32 | .size strncmp, .-strncmp | ||
diff --git a/arch/sparc64/lib/strncpy_from_user.S b/arch/sparc64/lib/strncpy_from_user.S new file mode 100644 index 000000000000..09cbbaa0ebf4 --- /dev/null +++ b/arch/sparc64/lib/strncpy_from_user.S | |||
@@ -0,0 +1,139 @@ | |||
1 | /* $Id: strncpy_from_user.S,v 1.6 1999/05/25 16:53:05 jj Exp $ | ||
2 | * strncpy_from_user.S: Sparc64 strncpy from userspace. | ||
3 | * | ||
4 | * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) | ||
5 | */ | ||
6 | |||
7 | #include <asm/asi.h> | ||
8 | #include <asm/errno.h> | ||
9 | |||
10 | .data | ||
11 | .align 8 | ||
12 | 0: .xword 0x0101010101010101 | ||
13 | |||
14 | .text | ||
15 | .align 32 | ||
16 | |||
17 | /* Must return: | ||
18 | * | ||
19 | * -EFAULT for an exception | ||
20 | * count if we hit the buffer limit | ||
21 | * bytes copied if we hit a null byte | ||
22 | * (without the null byte) | ||
23 | * | ||
24 | * This implementation assumes: | ||
25 | * %o1 is 8 aligned => !(%o2 & 7) | ||
26 | * %o0 is 8 aligned (if not, it will be slooooow, but will work) | ||
27 | * | ||
28 | * This is optimized for the common case: | ||
29 | * in my stats, 90% of src are 8 aligned (even on sparc32) | ||
30 | * and average length is 18 or so. | ||
31 | */ | ||
32 | |||
33 | .globl __strncpy_from_user | ||
34 | .type __strncpy_from_user,#function | ||
35 | __strncpy_from_user: | ||
36 | /* %o0=dest, %o1=src, %o2=count */ | ||
37 | andcc %o1, 7, %g0 ! IEU1 Group | ||
38 | bne,pn %icc, 30f ! CTI | ||
39 | add %o0, %o2, %g3 ! IEU0 | ||
40 | 60: ldxa [%o1] %asi, %g1 ! Load Group | ||
41 | brlez,pn %o2, 10f ! CTI | ||
42 | mov %o0, %o3 ! IEU0 | ||
43 | 50: sethi %hi(0b), %o4 ! IEU0 Group | ||
44 | ldx [%o4 + %lo(0b)], %o4 ! Load | ||
45 | sllx %o4, 7, %o5 ! IEU1 Group | ||
46 | 1: sub %g1, %o4, %g2 ! IEU0 Group | ||
47 | stx %g1, [%o0] ! Store | ||
48 | add %o0, 8, %o0 ! IEU1 | ||
49 | andcc %g2, %o5, %g0 ! IEU1 Group | ||
50 | bne,pn %xcc, 5f ! CTI | ||
51 | add %o1, 8, %o1 ! IEU0 | ||
52 | cmp %o0, %g3 ! IEU1 Group | ||
53 | bl,a,pt %xcc, 1b ! CTI | ||
54 | 61: ldxa [%o1] %asi, %g1 ! Load | ||
55 | 10: retl ! CTI Group | ||
56 | mov %o2, %o0 ! IEU0 | ||
57 | 5: srlx %g2, 32, %g7 ! IEU0 Group | ||
58 | sethi %hi(0xff00), %o4 ! IEU1 | ||
59 | andcc %g7, %o5, %g0 ! IEU1 Group | ||
60 | be,pn %icc, 2f ! CTI | ||
61 | or %o4, %lo(0xff00), %o4 ! IEU0 | ||
62 | srlx %g1, 48, %g7 ! IEU0 Group | ||
63 | andcc %g7, %o4, %g0 ! IEU1 Group | ||
64 | be,pn %icc, 50f ! CTI | ||
65 | andcc %g7, 0xff, %g0 ! IEU1 Group | ||
66 | be,pn %icc, 51f ! CTI | ||
67 | srlx %g1, 32, %g7 ! IEU0 | ||
68 | andcc %g7, %o4, %g0 ! IEU1 Group | ||
69 | be,pn %icc, 52f ! CTI | ||
70 | andcc %g7, 0xff, %g0 ! IEU1 Group | ||
71 | be,pn %icc, 53f ! CTI | ||
72 | 2: andcc %g2, %o5, %g0 ! IEU1 Group | ||
73 | be,pn %icc, 2f ! CTI | ||
74 | srl %g1, 16, %g7 ! IEU0 | ||
75 | andcc %g7, %o4, %g0 ! IEU1 Group | ||
76 | be,pn %icc, 54f ! CTI | ||
77 | andcc %g7, 0xff, %g0 ! IEU1 Group | ||
78 | be,pn %icc, 55f ! CTI | ||
79 | andcc %g1, %o4, %g0 ! IEU1 Group | ||
80 | be,pn %icc, 56f ! CTI | ||
81 | andcc %g1, 0xff, %g0 ! IEU1 Group | ||
82 | be,a,pn %icc, 57f ! CTI | ||
83 | sub %o0, %o3, %o0 ! IEU0 | ||
84 | 2: cmp %o0, %g3 ! IEU1 Group | ||
85 | bl,a,pt %xcc, 50b ! CTI | ||
86 | 62: ldxa [%o1] %asi, %g1 ! Load | ||
87 | retl ! CTI Group | ||
88 | mov %o2, %o0 ! IEU0 | ||
89 | 50: sub %o0, %o3, %o0 | ||
90 | retl | ||
91 | sub %o0, 8, %o0 | ||
92 | 51: sub %o0, %o3, %o0 | ||
93 | retl | ||
94 | sub %o0, 7, %o0 | ||
95 | 52: sub %o0, %o3, %o0 | ||
96 | retl | ||
97 | sub %o0, 6, %o0 | ||
98 | 53: sub %o0, %o3, %o0 | ||
99 | retl | ||
100 | sub %o0, 5, %o0 | ||
101 | 54: sub %o0, %o3, %o0 | ||
102 | retl | ||
103 | sub %o0, 4, %o0 | ||
104 | 55: sub %o0, %o3, %o0 | ||
105 | retl | ||
106 | sub %o0, 3, %o0 | ||
107 | 56: sub %o0, %o3, %o0 | ||
108 | retl | ||
109 | sub %o0, 2, %o0 | ||
110 | 57: retl | ||
111 | sub %o0, 1, %o0 | ||
112 | 30: brlez,pn %o2, 3f | ||
113 | sub %g0, %o2, %o3 | ||
114 | add %o0, %o2, %o0 | ||
115 | 63: lduba [%o1] %asi, %o4 | ||
116 | 1: add %o1, 1, %o1 | ||
117 | brz,pn %o4, 2f | ||
118 | stb %o4, [%o0 + %o3] | ||
119 | addcc %o3, 1, %o3 | ||
120 | bne,pt %xcc, 1b | ||
121 | 64: lduba [%o1] %asi, %o4 | ||
122 | 3: retl | ||
123 | mov %o2, %o0 | ||
124 | 2: retl | ||
125 | add %o2, %o3, %o0 | ||
126 | .size __strncpy_from_user, .-__strncpy_from_user | ||
127 | |||
128 | .section .fixup,#alloc,#execinstr | ||
129 | .align 4 | ||
130 | 4: retl | ||
131 | mov -EFAULT, %o0 | ||
132 | |||
133 | .section __ex_table,#alloc | ||
134 | .align 4 | ||
135 | .word 60b, 4b | ||
136 | .word 61b, 4b | ||
137 | .word 62b, 4b | ||
138 | .word 63b, 4b | ||
139 | .word 64b, 4b | ||
diff --git a/arch/sparc64/lib/user_fixup.c b/arch/sparc64/lib/user_fixup.c new file mode 100644 index 000000000000..0278e34125db --- /dev/null +++ b/arch/sparc64/lib/user_fixup.c | |||
@@ -0,0 +1,71 @@ | |||
1 | /* user_fixup.c: Fix up user copy faults. | ||
2 | * | ||
3 | * Copyright (C) 2004 David S. Miller <davem@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/compiler.h> | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/string.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <asm/uaccess.h> | ||
11 | |||
12 | /* Calculating the exact fault address when using | ||
13 | * block loads and stores can be very complicated. | ||
14 | * Instead of trying to be clever and handling all | ||
15 | * of the cases, just fix things up simply here. | ||
16 | */ | ||
17 | |||
18 | unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) | ||
19 | { | ||
20 | char *dst = to; | ||
21 | const char __user *src = from; | ||
22 | |||
23 | while (size) { | ||
24 | if (__get_user(*dst, src)) | ||
25 | break; | ||
26 | dst++; | ||
27 | src++; | ||
28 | size--; | ||
29 | } | ||
30 | |||
31 | if (size) | ||
32 | memset(dst, 0, size); | ||
33 | |||
34 | return size; | ||
35 | } | ||
36 | |||
37 | unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) | ||
38 | { | ||
39 | char __user *dst = to; | ||
40 | const char *src = from; | ||
41 | |||
42 | while (size) { | ||
43 | if (__put_user(*src, dst)) | ||
44 | break; | ||
45 | dst++; | ||
46 | src++; | ||
47 | size--; | ||
48 | } | ||
49 | |||
50 | return size; | ||
51 | } | ||
52 | |||
53 | unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) | ||
54 | { | ||
55 | char __user *dst = to; | ||
56 | char __user *src = from; | ||
57 | |||
58 | while (size) { | ||
59 | char tmp; | ||
60 | |||
61 | if (__get_user(tmp, src)) | ||
62 | break; | ||
63 | if (__put_user(tmp, dst)) | ||
64 | break; | ||
65 | dst++; | ||
66 | src++; | ||
67 | size--; | ||
68 | } | ||
69 | |||
70 | return size; | ||
71 | } | ||
diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S new file mode 100644 index 000000000000..4cd5d2be1ae1 --- /dev/null +++ b/arch/sparc64/lib/xor.S | |||
@@ -0,0 +1,354 @@ | |||
1 | /* | ||
2 | * arch/sparc64/lib/xor.S | ||
3 | * | ||
4 | * High speed xor_block operation for RAID4/5 utilizing the | ||
5 | * UltraSparc Visual Instruction Set. | ||
6 | * | ||
7 | * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) | ||
8 | */ | ||
9 | |||
10 | #include <asm/visasm.h> | ||
11 | #include <asm/asi.h> | ||
12 | #include <asm/dcu.h> | ||
13 | #include <asm/spitfire.h> | ||
14 | |||
15 | /* | ||
16 | * Requirements: | ||
17 | * !(((long)dest | (long)sourceN) & (64 - 1)) && | ||
18 | * !(len & 127) && len >= 256 | ||
19 | */ | ||
20 | .text | ||
21 | .align 32 | ||
22 | .globl xor_vis_2 | ||
23 | .type xor_vis_2,#function | ||
24 | xor_vis_2: | ||
25 | rd %fprs, %o5 | ||
26 | andcc %o5, FPRS_FEF|FPRS_DU, %g0 | ||
27 | be,pt %icc, 0f | ||
28 | sethi %hi(VISenter), %g1 | ||
29 | jmpl %g1 + %lo(VISenter), %g7 | ||
30 | add %g7, 8, %g7 | ||
31 | 0: wr %g0, FPRS_FEF, %fprs | ||
32 | rd %asi, %g1 | ||
33 | wr %g0, ASI_BLK_P, %asi | ||
34 | membar #LoadStore|#StoreLoad|#StoreStore | ||
35 | sub %o0, 128, %o0 | ||
36 | ldda [%o1] %asi, %f0 | ||
37 | ldda [%o2] %asi, %f16 | ||
38 | |||
39 | 2: ldda [%o1 + 64] %asi, %f32 | ||
40 | fxor %f0, %f16, %f16 | ||
41 | fxor %f2, %f18, %f18 | ||
42 | fxor %f4, %f20, %f20 | ||
43 | fxor %f6, %f22, %f22 | ||
44 | fxor %f8, %f24, %f24 | ||
45 | fxor %f10, %f26, %f26 | ||
46 | fxor %f12, %f28, %f28 | ||
47 | fxor %f14, %f30, %f30 | ||
48 | stda %f16, [%o1] %asi | ||
49 | ldda [%o2 + 64] %asi, %f48 | ||
50 | ldda [%o1 + 128] %asi, %f0 | ||
51 | fxor %f32, %f48, %f48 | ||
52 | fxor %f34, %f50, %f50 | ||
53 | add %o1, 128, %o1 | ||
54 | fxor %f36, %f52, %f52 | ||
55 | add %o2, 128, %o2 | ||
56 | fxor %f38, %f54, %f54 | ||
57 | subcc %o0, 128, %o0 | ||
58 | fxor %f40, %f56, %f56 | ||
59 | fxor %f42, %f58, %f58 | ||
60 | fxor %f44, %f60, %f60 | ||
61 | fxor %f46, %f62, %f62 | ||
62 | stda %f48, [%o1 - 64] %asi | ||
63 | bne,pt %xcc, 2b | ||
64 | ldda [%o2] %asi, %f16 | ||
65 | |||
66 | ldda [%o1 + 64] %asi, %f32 | ||
67 | fxor %f0, %f16, %f16 | ||
68 | fxor %f2, %f18, %f18 | ||
69 | fxor %f4, %f20, %f20 | ||
70 | fxor %f6, %f22, %f22 | ||
71 | fxor %f8, %f24, %f24 | ||
72 | fxor %f10, %f26, %f26 | ||
73 | fxor %f12, %f28, %f28 | ||
74 | fxor %f14, %f30, %f30 | ||
75 | stda %f16, [%o1] %asi | ||
76 | ldda [%o2 + 64] %asi, %f48 | ||
77 | membar #Sync | ||
78 | fxor %f32, %f48, %f48 | ||
79 | fxor %f34, %f50, %f50 | ||
80 | fxor %f36, %f52, %f52 | ||
81 | fxor %f38, %f54, %f54 | ||
82 | fxor %f40, %f56, %f56 | ||
83 | fxor %f42, %f58, %f58 | ||
84 | fxor %f44, %f60, %f60 | ||
85 | fxor %f46, %f62, %f62 | ||
86 | stda %f48, [%o1 + 64] %asi | ||
87 | membar #Sync|#StoreStore|#StoreLoad | ||
88 | wr %g1, %g0, %asi | ||
89 | retl | ||
90 | wr %g0, 0, %fprs | ||
91 | .size xor_vis_2, .-xor_vis_2 | ||
92 | |||
93 | .globl xor_vis_3 | ||
94 | .type xor_vis_3,#function | ||
95 | xor_vis_3: | ||
96 | rd %fprs, %o5 | ||
97 | andcc %o5, FPRS_FEF|FPRS_DU, %g0 | ||
98 | be,pt %icc, 0f | ||
99 | sethi %hi(VISenter), %g1 | ||
100 | jmpl %g1 + %lo(VISenter), %g7 | ||
101 | add %g7, 8, %g7 | ||
102 | 0: wr %g0, FPRS_FEF, %fprs | ||
103 | rd %asi, %g1 | ||
104 | wr %g0, ASI_BLK_P, %asi | ||
105 | membar #LoadStore|#StoreLoad|#StoreStore | ||
106 | sub %o0, 64, %o0 | ||
107 | ldda [%o1] %asi, %f0 | ||
108 | ldda [%o2] %asi, %f16 | ||
109 | |||
110 | 3: ldda [%o3] %asi, %f32 | ||
111 | fxor %f0, %f16, %f48 | ||
112 | fxor %f2, %f18, %f50 | ||
113 | add %o1, 64, %o1 | ||
114 | fxor %f4, %f20, %f52 | ||
115 | fxor %f6, %f22, %f54 | ||
116 | add %o2, 64, %o2 | ||
117 | fxor %f8, %f24, %f56 | ||
118 | fxor %f10, %f26, %f58 | ||
119 | fxor %f12, %f28, %f60 | ||
120 | fxor %f14, %f30, %f62 | ||
121 | ldda [%o1] %asi, %f0 | ||
122 | fxor %f48, %f32, %f48 | ||
123 | fxor %f50, %f34, %f50 | ||
124 | fxor %f52, %f36, %f52 | ||
125 | fxor %f54, %f38, %f54 | ||
126 | add %o3, 64, %o3 | ||
127 | fxor %f56, %f40, %f56 | ||
128 | fxor %f58, %f42, %f58 | ||
129 | subcc %o0, 64, %o0 | ||
130 | fxor %f60, %f44, %f60 | ||
131 | fxor %f62, %f46, %f62 | ||
132 | stda %f48, [%o1 - 64] %asi | ||
133 | bne,pt %xcc, 3b | ||
134 | ldda [%o2] %asi, %f16 | ||
135 | |||
136 | ldda [%o3] %asi, %f32 | ||
137 | fxor %f0, %f16, %f48 | ||
138 | fxor %f2, %f18, %f50 | ||
139 | fxor %f4, %f20, %f52 | ||
140 | fxor %f6, %f22, %f54 | ||
141 | fxor %f8, %f24, %f56 | ||
142 | fxor %f10, %f26, %f58 | ||
143 | fxor %f12, %f28, %f60 | ||
144 | fxor %f14, %f30, %f62 | ||
145 | membar #Sync | ||
146 | fxor %f48, %f32, %f48 | ||
147 | fxor %f50, %f34, %f50 | ||
148 | fxor %f52, %f36, %f52 | ||
149 | fxor %f54, %f38, %f54 | ||
150 | fxor %f56, %f40, %f56 | ||
151 | fxor %f58, %f42, %f58 | ||
152 | fxor %f60, %f44, %f60 | ||
153 | fxor %f62, %f46, %f62 | ||
154 | stda %f48, [%o1] %asi | ||
155 | membar #Sync|#StoreStore|#StoreLoad | ||
156 | wr %g1, %g0, %asi | ||
157 | retl | ||
158 | wr %g0, 0, %fprs | ||
159 | .size xor_vis_3, .-xor_vis_3 | ||
160 | |||
161 | .globl xor_vis_4 | ||
162 | .type xor_vis_4,#function | ||
163 | xor_vis_4: | ||
164 | rd %fprs, %o5 | ||
165 | andcc %o5, FPRS_FEF|FPRS_DU, %g0 | ||
166 | be,pt %icc, 0f | ||
167 | sethi %hi(VISenter), %g1 | ||
168 | jmpl %g1 + %lo(VISenter), %g7 | ||
169 | add %g7, 8, %g7 | ||
170 | 0: wr %g0, FPRS_FEF, %fprs | ||
171 | rd %asi, %g1 | ||
172 | wr %g0, ASI_BLK_P, %asi | ||
173 | membar #LoadStore|#StoreLoad|#StoreStore | ||
174 | sub %o0, 64, %o0 | ||
175 | ldda [%o1] %asi, %f0 | ||
176 | ldda [%o2] %asi, %f16 | ||
177 | |||
178 | 4: ldda [%o3] %asi, %f32 | ||
179 | fxor %f0, %f16, %f16 | ||
180 | fxor %f2, %f18, %f18 | ||
181 | add %o1, 64, %o1 | ||
182 | fxor %f4, %f20, %f20 | ||
183 | fxor %f6, %f22, %f22 | ||
184 | add %o2, 64, %o2 | ||
185 | fxor %f8, %f24, %f24 | ||
186 | fxor %f10, %f26, %f26 | ||
187 | fxor %f12, %f28, %f28 | ||
188 | fxor %f14, %f30, %f30 | ||
189 | ldda [%o4] %asi, %f48 | ||
190 | fxor %f16, %f32, %f32 | ||
191 | fxor %f18, %f34, %f34 | ||
192 | fxor %f20, %f36, %f36 | ||
193 | fxor %f22, %f38, %f38 | ||
194 | add %o3, 64, %o3 | ||
195 | fxor %f24, %f40, %f40 | ||
196 | fxor %f26, %f42, %f42 | ||
197 | fxor %f28, %f44, %f44 | ||
198 | fxor %f30, %f46, %f46 | ||
199 | ldda [%o1] %asi, %f0 | ||
200 | fxor %f32, %f48, %f48 | ||
201 | fxor %f34, %f50, %f50 | ||
202 | fxor %f36, %f52, %f52 | ||
203 | add %o4, 64, %o4 | ||
204 | fxor %f38, %f54, %f54 | ||
205 | fxor %f40, %f56, %f56 | ||
206 | fxor %f42, %f58, %f58 | ||
207 | subcc %o0, 64, %o0 | ||
208 | fxor %f44, %f60, %f60 | ||
209 | fxor %f46, %f62, %f62 | ||
210 | stda %f48, [%o1 - 64] %asi | ||
211 | bne,pt %xcc, 4b | ||
212 | ldda [%o2] %asi, %f16 | ||
213 | |||
214 | ldda [%o3] %asi, %f32 | ||
215 | fxor %f0, %f16, %f16 | ||
216 | fxor %f2, %f18, %f18 | ||
217 | fxor %f4, %f20, %f20 | ||
218 | fxor %f6, %f22, %f22 | ||
219 | fxor %f8, %f24, %f24 | ||
220 | fxor %f10, %f26, %f26 | ||
221 | fxor %f12, %f28, %f28 | ||
222 | fxor %f14, %f30, %f30 | ||
223 | ldda [%o4] %asi, %f48 | ||
224 | fxor %f16, %f32, %f32 | ||
225 | fxor %f18, %f34, %f34 | ||
226 | fxor %f20, %f36, %f36 | ||
227 | fxor %f22, %f38, %f38 | ||
228 | fxor %f24, %f40, %f40 | ||
229 | fxor %f26, %f42, %f42 | ||
230 | fxor %f28, %f44, %f44 | ||
231 | fxor %f30, %f46, %f46 | ||
232 | membar #Sync | ||
233 | fxor %f32, %f48, %f48 | ||
234 | fxor %f34, %f50, %f50 | ||
235 | fxor %f36, %f52, %f52 | ||
236 | fxor %f38, %f54, %f54 | ||
237 | fxor %f40, %f56, %f56 | ||
238 | fxor %f42, %f58, %f58 | ||
239 | fxor %f44, %f60, %f60 | ||
240 | fxor %f46, %f62, %f62 | ||
241 | stda %f48, [%o1] %asi | ||
242 | membar #Sync|#StoreStore|#StoreLoad | ||
243 | wr %g1, %g0, %asi | ||
244 | retl | ||
245 | wr %g0, 0, %fprs | ||
246 | .size xor_vis_4, .-xor_vis_4 | ||
247 | |||
248 | .globl xor_vis_5 | ||
249 | .type xor_vis_5,#function | ||
250 | xor_vis_5: | ||
251 | save %sp, -192, %sp | ||
252 | rd %fprs, %o5 | ||
253 | andcc %o5, FPRS_FEF|FPRS_DU, %g0 | ||
254 | be,pt %icc, 0f | ||
255 | sethi %hi(VISenter), %g1 | ||
256 | jmpl %g1 + %lo(VISenter), %g7 | ||
257 | add %g7, 8, %g7 | ||
258 | 0: wr %g0, FPRS_FEF, %fprs | ||
259 | rd %asi, %g1 | ||
260 | wr %g0, ASI_BLK_P, %asi | ||
261 | membar #LoadStore|#StoreLoad|#StoreStore | ||
262 | sub %i0, 64, %i0 | ||
263 | ldda [%i1] %asi, %f0 | ||
264 | ldda [%i2] %asi, %f16 | ||
265 | |||
266 | 5: ldda [%i3] %asi, %f32 | ||
267 | fxor %f0, %f16, %f48 | ||
268 | fxor %f2, %f18, %f50 | ||
269 | add %i1, 64, %i1 | ||
270 | fxor %f4, %f20, %f52 | ||
271 | fxor %f6, %f22, %f54 | ||
272 | add %i2, 64, %i2 | ||
273 | fxor %f8, %f24, %f56 | ||
274 | fxor %f10, %f26, %f58 | ||
275 | fxor %f12, %f28, %f60 | ||
276 | fxor %f14, %f30, %f62 | ||
277 | ldda [%i4] %asi, %f16 | ||
278 | fxor %f48, %f32, %f48 | ||
279 | fxor %f50, %f34, %f50 | ||
280 | fxor %f52, %f36, %f52 | ||
281 | fxor %f54, %f38, %f54 | ||
282 | add %i3, 64, %i3 | ||
283 | fxor %f56, %f40, %f56 | ||
284 | fxor %f58, %f42, %f58 | ||
285 | fxor %f60, %f44, %f60 | ||
286 | fxor %f62, %f46, %f62 | ||
287 | ldda [%i5] %asi, %f32 | ||
288 | fxor %f48, %f16, %f48 | ||
289 | fxor %f50, %f18, %f50 | ||
290 | add %i4, 64, %i4 | ||
291 | fxor %f52, %f20, %f52 | ||
292 | fxor %f54, %f22, %f54 | ||
293 | add %i5, 64, %i5 | ||
294 | fxor %f56, %f24, %f56 | ||
295 | fxor %f58, %f26, %f58 | ||
296 | fxor %f60, %f28, %f60 | ||
297 | fxor %f62, %f30, %f62 | ||
298 | ldda [%i1] %asi, %f0 | ||
299 | fxor %f48, %f32, %f48 | ||
300 | fxor %f50, %f34, %f50 | ||
301 | fxor %f52, %f36, %f52 | ||
302 | fxor %f54, %f38, %f54 | ||
303 | fxor %f56, %f40, %f56 | ||
304 | fxor %f58, %f42, %f58 | ||
305 | subcc %i0, 64, %i0 | ||
306 | fxor %f60, %f44, %f60 | ||
307 | fxor %f62, %f46, %f62 | ||
308 | stda %f48, [%i1 - 64] %asi | ||
309 | bne,pt %xcc, 5b | ||
310 | ldda [%i2] %asi, %f16 | ||
311 | |||
312 | ldda [%i3] %asi, %f32 | ||
313 | fxor %f0, %f16, %f48 | ||
314 | fxor %f2, %f18, %f50 | ||
315 | fxor %f4, %f20, %f52 | ||
316 | fxor %f6, %f22, %f54 | ||
317 | fxor %f8, %f24, %f56 | ||
318 | fxor %f10, %f26, %f58 | ||
319 | fxor %f12, %f28, %f60 | ||
320 | fxor %f14, %f30, %f62 | ||
321 | ldda [%i4] %asi, %f16 | ||
322 | fxor %f48, %f32, %f48 | ||
323 | fxor %f50, %f34, %f50 | ||
324 | fxor %f52, %f36, %f52 | ||
325 | fxor %f54, %f38, %f54 | ||
326 | fxor %f56, %f40, %f56 | ||
327 | fxor %f58, %f42, %f58 | ||
328 | fxor %f60, %f44, %f60 | ||
329 | fxor %f62, %f46, %f62 | ||
330 | ldda [%i5] %asi, %f32 | ||
331 | fxor %f48, %f16, %f48 | ||
332 | fxor %f50, %f18, %f50 | ||
333 | fxor %f52, %f20, %f52 | ||
334 | fxor %f54, %f22, %f54 | ||
335 | fxor %f56, %f24, %f56 | ||
336 | fxor %f58, %f26, %f58 | ||
337 | fxor %f60, %f28, %f60 | ||
338 | fxor %f62, %f30, %f62 | ||
339 | membar #Sync | ||
340 | fxor %f48, %f32, %f48 | ||
341 | fxor %f50, %f34, %f50 | ||
342 | fxor %f52, %f36, %f52 | ||
343 | fxor %f54, %f38, %f54 | ||
344 | fxor %f56, %f40, %f56 | ||
345 | fxor %f58, %f42, %f58 | ||
346 | fxor %f60, %f44, %f60 | ||
347 | fxor %f62, %f46, %f62 | ||
348 | stda %f48, [%i1] %asi | ||
349 | membar #Sync|#StoreStore|#StoreLoad | ||
350 | wr %g1, %g0, %asi | ||
351 | wr %g0, 0, %fprs | ||
352 | ret | ||
353 | restore | ||
354 | .size xor_vis_5, .-xor_vis_5 | ||