diff options
Diffstat (limited to 'arch/sparc64/lib')
-rw-r--r-- | arch/sparc64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc64/lib/NGbzero.S | 163 | ||||
-rw-r--r-- | arch/sparc64/lib/NGcopy_from_user.S | 37 | ||||
-rw-r--r-- | arch/sparc64/lib/NGcopy_to_user.S | 40 | ||||
-rw-r--r-- | arch/sparc64/lib/NGmemcpy.S | 368 | ||||
-rw-r--r-- | arch/sparc64/lib/NGpage.S | 96 | ||||
-rw-r--r-- | arch/sparc64/lib/NGpatch.S | 33 | ||||
-rw-r--r-- | arch/sparc64/lib/U3patch.S | 3 | ||||
-rw-r--r-- | arch/sparc64/lib/bzero.S | 18 | ||||
-rw-r--r-- | arch/sparc64/lib/clear_page.S | 12 | ||||
-rw-r--r-- | arch/sparc64/lib/copy_page.S | 7 | ||||
-rw-r--r-- | arch/sparc64/lib/delay.c | 19 | ||||
-rw-r--r-- | arch/sparc64/lib/xor.S | 300 |
13 files changed, 1064 insertions, 34 deletions
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index c295806500f7..8812ded19f01 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile | |||
@@ -11,6 +11,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ | |||
11 | VISsave.o atomic.o bitops.o \ | 11 | VISsave.o atomic.o bitops.o \ |
12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ | 12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ |
13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ | 13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ |
14 | NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ | ||
15 | NGpage.o NGbzero.o \ | ||
14 | copy_in_user.o user_fixup.o memmove.o \ | 16 | copy_in_user.o user_fixup.o memmove.o \ |
15 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o | 17 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o |
16 | 18 | ||
diff --git a/arch/sparc64/lib/NGbzero.S b/arch/sparc64/lib/NGbzero.S new file mode 100644 index 000000000000..e86baece5cc8 --- /dev/null +++ b/arch/sparc64/lib/NGbzero.S | |||
@@ -0,0 +1,163 @@ | |||
1 | /* NGbzero.S: Niagara optimized memset/clear_user. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | #include <asm/asi.h> | ||
6 | |||
7 | #define EX_ST(x,y) \ | ||
8 | 98: x,y; \ | ||
9 | .section .fixup; \ | ||
10 | .align 4; \ | ||
11 | 99: retl; \ | ||
12 | mov %o1, %o0; \ | ||
13 | .section __ex_table; \ | ||
14 | .align 4; \ | ||
15 | .word 98b, 99b; \ | ||
16 | .text; \ | ||
17 | .align 4; | ||
18 | |||
19 | .text | ||
20 | |||
21 | .globl NGmemset | ||
22 | .type NGmemset, #function | ||
23 | NGmemset: /* %o0=buf, %o1=pat, %o2=len */ | ||
24 | and %o1, 0xff, %o3 | ||
25 | mov %o2, %o1 | ||
26 | sllx %o3, 8, %g1 | ||
27 | or %g1, %o3, %o2 | ||
28 | sllx %o2, 16, %g1 | ||
29 | or %g1, %o2, %o2 | ||
30 | sllx %o2, 32, %g1 | ||
31 | ba,pt %xcc, 1f | ||
32 | or %g1, %o2, %o2 | ||
33 | |||
34 | .globl NGbzero | ||
35 | .type NGbzero, #function | ||
36 | NGbzero: | ||
37 | clr %o2 | ||
38 | 1: brz,pn %o1, NGbzero_return | ||
39 | mov %o0, %o3 | ||
40 | |||
41 | /* %o5: saved %asi, restored at NGbzero_done | ||
42 | * %g7: store-init %asi to use | ||
43 | * %o4: non-store-init %asi to use | ||
44 | */ | ||
45 | rd %asi, %o5 | ||
46 | mov ASI_BLK_INIT_QUAD_LDD_P, %g7 | ||
47 | mov ASI_P, %o4 | ||
48 | wr %o4, 0x0, %asi | ||
49 | |||
50 | NGbzero_from_clear_user: | ||
51 | cmp %o1, 15 | ||
52 | bl,pn %icc, NGbzero_tiny | ||
53 | andcc %o0, 0x7, %g1 | ||
54 | be,pt %xcc, 2f | ||
55 | mov 8, %g2 | ||
56 | sub %g2, %g1, %g1 | ||
57 | sub %o1, %g1, %o1 | ||
58 | 1: EX_ST(stba %o2, [%o0 + 0x00] %asi) | ||
59 | subcc %g1, 1, %g1 | ||
60 | bne,pt %xcc, 1b | ||
61 | add %o0, 1, %o0 | ||
62 | 2: cmp %o1, 128 | ||
63 | bl,pn %icc, NGbzero_medium | ||
64 | andcc %o0, (64 - 1), %g1 | ||
65 | be,pt %xcc, NGbzero_pre_loop | ||
66 | mov 64, %g2 | ||
67 | sub %g2, %g1, %g1 | ||
68 | sub %o1, %g1, %o1 | ||
69 | 1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) | ||
70 | subcc %g1, 8, %g1 | ||
71 | bne,pt %xcc, 1b | ||
72 | add %o0, 8, %o0 | ||
73 | |||
74 | NGbzero_pre_loop: | ||
75 | wr %g7, 0x0, %asi | ||
76 | andn %o1, (64 - 1), %g1 | ||
77 | sub %o1, %g1, %o1 | ||
78 | NGbzero_loop: | ||
79 | EX_ST(stxa %o2, [%o0 + 0x00] %asi) | ||
80 | EX_ST(stxa %o2, [%o0 + 0x08] %asi) | ||
81 | EX_ST(stxa %o2, [%o0 + 0x10] %asi) | ||
82 | EX_ST(stxa %o2, [%o0 + 0x18] %asi) | ||
83 | EX_ST(stxa %o2, [%o0 + 0x20] %asi) | ||
84 | EX_ST(stxa %o2, [%o0 + 0x28] %asi) | ||
85 | EX_ST(stxa %o2, [%o0 + 0x30] %asi) | ||
86 | EX_ST(stxa %o2, [%o0 + 0x38] %asi) | ||
87 | subcc %g1, 64, %g1 | ||
88 | bne,pt %xcc, NGbzero_loop | ||
89 | add %o0, 64, %o0 | ||
90 | |||
91 | wr %o4, 0x0, %asi | ||
92 | brz,pn %o1, NGbzero_done | ||
93 | NGbzero_medium: | ||
94 | andncc %o1, 0x7, %g1 | ||
95 | be,pn %xcc, 2f | ||
96 | sub %o1, %g1, %o1 | ||
97 | 1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) | ||
98 | subcc %g1, 8, %g1 | ||
99 | bne,pt %xcc, 1b | ||
100 | add %o0, 8, %o0 | ||
101 | 2: brz,pt %o1, NGbzero_done | ||
102 | nop | ||
103 | |||
104 | NGbzero_tiny: | ||
105 | 1: EX_ST(stba %o2, [%o0 + 0x00] %asi) | ||
106 | subcc %o1, 1, %o1 | ||
107 | bne,pt %icc, 1b | ||
108 | add %o0, 1, %o0 | ||
109 | |||
110 | /* fallthrough */ | ||
111 | |||
112 | NGbzero_done: | ||
113 | wr %o5, 0x0, %asi | ||
114 | |||
115 | NGbzero_return: | ||
116 | retl | ||
117 | mov %o3, %o0 | ||
118 | .size NGbzero, .-NGbzero | ||
119 | .size NGmemset, .-NGmemset | ||
120 | |||
121 | .globl NGclear_user | ||
122 | .type NGclear_user, #function | ||
123 | NGclear_user: /* %o0=buf, %o1=len */ | ||
124 | rd %asi, %o5 | ||
125 | brz,pn %o1, NGbzero_done | ||
126 | clr %o3 | ||
127 | cmp %o5, ASI_AIUS | ||
128 | bne,pn %icc, NGbzero | ||
129 | clr %o2 | ||
130 | mov ASI_BLK_INIT_QUAD_LDD_AIUS, %g7 | ||
131 | ba,pt %xcc, NGbzero_from_clear_user | ||
132 | mov ASI_AIUS, %o4 | ||
133 | .size NGclear_user, .-NGclear_user | ||
134 | |||
135 | #define BRANCH_ALWAYS 0x10680000 | ||
136 | #define NOP 0x01000000 | ||
137 | #define NG_DO_PATCH(OLD, NEW) \ | ||
138 | sethi %hi(NEW), %g1; \ | ||
139 | or %g1, %lo(NEW), %g1; \ | ||
140 | sethi %hi(OLD), %g2; \ | ||
141 | or %g2, %lo(OLD), %g2; \ | ||
142 | sub %g1, %g2, %g1; \ | ||
143 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
144 | sll %g1, 11, %g1; \ | ||
145 | srl %g1, 11 + 2, %g1; \ | ||
146 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
147 | or %g3, %g1, %g3; \ | ||
148 | stw %g3, [%g2]; \ | ||
149 | sethi %hi(NOP), %g3; \ | ||
150 | or %g3, %lo(NOP), %g3; \ | ||
151 | stw %g3, [%g2 + 0x4]; \ | ||
152 | flush %g2; | ||
153 | |||
154 | .globl niagara_patch_bzero | ||
155 | .type niagara_patch_bzero,#function | ||
156 | niagara_patch_bzero: | ||
157 | NG_DO_PATCH(memset, NGmemset) | ||
158 | NG_DO_PATCH(__bzero, NGbzero) | ||
159 | NG_DO_PATCH(__clear_user, NGclear_user) | ||
160 | NG_DO_PATCH(tsb_init, NGtsb_init) | ||
161 | retl | ||
162 | nop | ||
163 | .size niagara_patch_bzero,.-niagara_patch_bzero | ||
diff --git a/arch/sparc64/lib/NGcopy_from_user.S b/arch/sparc64/lib/NGcopy_from_user.S new file mode 100644 index 000000000000..2d93456f76dd --- /dev/null +++ b/arch/sparc64/lib/NGcopy_from_user.S | |||
@@ -0,0 +1,37 @@ | |||
1 | /* NGcopy_from_user.S: Niagara optimized copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: wr %g0, ASI_AIUS, %asi;\ | ||
11 | retl; \ | ||
12 | mov 1, %o0; \ | ||
13 | .section __ex_table,"a";\ | ||
14 | .align 4; \ | ||
15 | .word 98b, 99b; \ | ||
16 | .text; \ | ||
17 | .align 4; | ||
18 | |||
19 | #ifndef ASI_AIUS | ||
20 | #define ASI_AIUS 0x11 | ||
21 | #endif | ||
22 | |||
23 | #define FUNC_NAME NGcopy_from_user | ||
24 | #define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest | ||
25 | #define LOAD_TWIN(addr_reg,dest0,dest1) \ | ||
26 | ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0 | ||
27 | #define EX_RETVAL(x) 0 | ||
28 | |||
29 | #ifdef __KERNEL__ | ||
30 | #define PREAMBLE \ | ||
31 | rd %asi, %g1; \ | ||
32 | cmp %g1, ASI_AIUS; \ | ||
33 | bne,pn %icc, memcpy_user_stub; \ | ||
34 | nop | ||
35 | #endif | ||
36 | |||
37 | #include "NGmemcpy.S" | ||
diff --git a/arch/sparc64/lib/NGcopy_to_user.S b/arch/sparc64/lib/NGcopy_to_user.S new file mode 100644 index 000000000000..34112d5054ef --- /dev/null +++ b/arch/sparc64/lib/NGcopy_to_user.S | |||
@@ -0,0 +1,40 @@ | |||
1 | /* NGcopy_to_user.S: Niagara optimized copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: wr %g0, ASI_AIUS, %asi;\ | ||
11 | retl; \ | ||
12 | mov 1, %o0; \ | ||
13 | .section __ex_table,"a";\ | ||
14 | .align 4; \ | ||
15 | .word 98b, 99b; \ | ||
16 | .text; \ | ||
17 | .align 4; | ||
18 | |||
19 | #ifndef ASI_AIUS | ||
20 | #define ASI_AIUS 0x11 | ||
21 | #endif | ||
22 | |||
23 | #define FUNC_NAME NGcopy_to_user | ||
24 | #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS | ||
25 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS | ||
26 | #define EX_RETVAL(x) 0 | ||
27 | |||
28 | #ifdef __KERNEL__ | ||
29 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
30 | * Reading %asi to check for KERNEL_DS is comparatively | ||
31 | * cheap. | ||
32 | */ | ||
33 | #define PREAMBLE \ | ||
34 | rd %asi, %g1; \ | ||
35 | cmp %g1, ASI_AIUS; \ | ||
36 | bne,pn %icc, memcpy_user_stub; \ | ||
37 | nop | ||
38 | #endif | ||
39 | |||
40 | #include "NGmemcpy.S" | ||
diff --git a/arch/sparc64/lib/NGmemcpy.S b/arch/sparc64/lib/NGmemcpy.S new file mode 100644 index 000000000000..8e522b3dc095 --- /dev/null +++ b/arch/sparc64/lib/NGmemcpy.S | |||
@@ -0,0 +1,368 @@ | |||
1 | /* NGmemcpy.S: Niagara optimized memcpy. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #ifdef __KERNEL__ | ||
7 | #include <asm/asi.h> | ||
8 | #include <asm/thread_info.h> | ||
9 | #define GLOBAL_SPARE %g7 | ||
10 | #define RESTORE_ASI(TMP) \ | ||
11 | ldub [%g6 + TI_CURRENT_DS], TMP; \ | ||
12 | wr TMP, 0x0, %asi; | ||
13 | #else | ||
14 | #define GLOBAL_SPARE %g5 | ||
15 | #define RESTORE_ASI(TMP) \ | ||
16 | wr %g0, ASI_PNF, %asi | ||
17 | #endif | ||
18 | |||
19 | #ifndef STORE_ASI | ||
20 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P | ||
21 | #endif | ||
22 | |||
23 | #ifndef EX_LD | ||
24 | #define EX_LD(x) x | ||
25 | #endif | ||
26 | |||
27 | #ifndef EX_ST | ||
28 | #define EX_ST(x) x | ||
29 | #endif | ||
30 | |||
31 | #ifndef EX_RETVAL | ||
32 | #define EX_RETVAL(x) x | ||
33 | #endif | ||
34 | |||
35 | #ifndef LOAD | ||
36 | #ifndef MEMCPY_DEBUG | ||
37 | #define LOAD(type,addr,dest) type [addr], dest | ||
38 | #else | ||
39 | #define LOAD(type,addr,dest) type##a [addr] 0x80, dest | ||
40 | #endif | ||
41 | #endif | ||
42 | |||
43 | #ifndef LOAD_TWIN | ||
44 | #define LOAD_TWIN(addr_reg,dest0,dest1) \ | ||
45 | ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0 | ||
46 | #endif | ||
47 | |||
48 | #ifndef STORE | ||
49 | #define STORE(type,src,addr) type src, [addr] | ||
50 | #endif | ||
51 | |||
52 | #ifndef STORE_INIT | ||
53 | #define STORE_INIT(src,addr) stxa src, [addr] %asi | ||
54 | #endif | ||
55 | |||
56 | #ifndef FUNC_NAME | ||
57 | #define FUNC_NAME NGmemcpy | ||
58 | #endif | ||
59 | |||
60 | #ifndef PREAMBLE | ||
61 | #define PREAMBLE | ||
62 | #endif | ||
63 | |||
64 | #ifndef XCC | ||
65 | #define XCC xcc | ||
66 | #endif | ||
67 | |||
68 | .register %g2,#scratch | ||
69 | .register %g3,#scratch | ||
70 | |||
71 | .text | ||
72 | .align 64 | ||
73 | |||
74 | .globl FUNC_NAME | ||
75 | .type FUNC_NAME,#function | ||
76 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
77 | srlx %o2, 31, %g2 | ||
78 | cmp %g2, 0 | ||
79 | tne %xcc, 5 | ||
80 | PREAMBLE | ||
81 | mov %o0, GLOBAL_SPARE | ||
82 | cmp %o2, 0 | ||
83 | be,pn %XCC, 85f | ||
84 | or %o0, %o1, %o3 | ||
85 | cmp %o2, 16 | ||
86 | blu,a,pn %XCC, 80f | ||
87 | or %o3, %o2, %o3 | ||
88 | |||
89 | /* 2 blocks (128 bytes) is the minimum we can do the block | ||
90 | * copy with. We need to ensure that we'll iterate at least | ||
91 | * once in the block copy loop. At worst we'll need to align | ||
92 | * the destination to a 64-byte boundary which can chew up | ||
93 | * to (64 - 1) bytes from the length before we perform the | ||
94 | * block copy loop. | ||
95 | */ | ||
96 | cmp %o2, (2 * 64) | ||
97 | blu,pt %XCC, 70f | ||
98 | andcc %o3, 0x7, %g0 | ||
99 | |||
100 | /* %o0: dst | ||
101 | * %o1: src | ||
102 | * %o2: len (known to be >= 128) | ||
103 | * | ||
104 | * The block copy loops will use %o4/%o5,%g2/%g3 as | ||
105 | * temporaries while copying the data. | ||
106 | */ | ||
107 | |||
108 | LOAD(prefetch, %o1, #one_read) | ||
109 | wr %g0, STORE_ASI, %asi | ||
110 | |||
111 | /* Align destination on 64-byte boundary. */ | ||
112 | andcc %o0, (64 - 1), %o4 | ||
113 | be,pt %XCC, 2f | ||
114 | sub %o4, 64, %o4 | ||
115 | sub %g0, %o4, %o4 ! bytes to align dst | ||
116 | sub %o2, %o4, %o2 | ||
117 | 1: subcc %o4, 1, %o4 | ||
118 | EX_LD(LOAD(ldub, %o1, %g1)) | ||
119 | EX_ST(STORE(stb, %g1, %o0)) | ||
120 | add %o1, 1, %o1 | ||
121 | bne,pt %XCC, 1b | ||
122 | add %o0, 1, %o0 | ||
123 | |||
124 | /* If the source is on a 16-byte boundary we can do | ||
125 | * the direct block copy loop. If it is 8-byte aligned | ||
126 | * we can do the 16-byte loads offset by -8 bytes and the | ||
127 | * init stores offset by one register. | ||
128 | * | ||
129 | * If the source is not even 8-byte aligned, we need to do | ||
130 | * shifting and masking (basically integer faligndata). | ||
131 | * | ||
132 | * The careful bit with init stores is that if we store | ||
133 | * to any part of the cache line we have to store the whole | ||
134 | * cacheline else we can end up with corrupt L2 cache line | ||
135 | * contents. Since the loop works on 64-bytes of 64-byte | ||
136 | * aligned store data at a time, this is easy to ensure. | ||
137 | */ | ||
138 | 2: | ||
139 | andcc %o1, (16 - 1), %o4 | ||
140 | andn %o2, (64 - 1), %g1 ! block copy loop iterator | ||
141 | sub %o2, %g1, %o2 ! final sub-block copy bytes | ||
142 | be,pt %XCC, 50f | ||
143 | cmp %o4, 8 | ||
144 | be,a,pt %XCC, 10f | ||
145 | sub %o1, 0x8, %o1 | ||
146 | |||
147 | /* Neither 8-byte nor 16-byte aligned, shift and mask. */ | ||
148 | mov %g1, %o4 | ||
149 | and %o1, 0x7, %g1 | ||
150 | sll %g1, 3, %g1 | ||
151 | mov 64, %o3 | ||
152 | andn %o1, 0x7, %o1 | ||
153 | EX_LD(LOAD(ldx, %o1, %g2)) | ||
154 | sub %o3, %g1, %o3 | ||
155 | sllx %g2, %g1, %g2 | ||
156 | |||
157 | #define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ | ||
158 | EX_LD(LOAD(ldx, SRC, TMP1)); \ | ||
159 | srlx TMP1, PRE_SHIFT, TMP2; \ | ||
160 | or TMP2, PRE_VAL, TMP2; \ | ||
161 | EX_ST(STORE_INIT(TMP2, DST)); \ | ||
162 | sllx TMP1, POST_SHIFT, PRE_VAL; | ||
163 | |||
164 | 1: add %o1, 0x8, %o1 | ||
165 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) | ||
166 | add %o1, 0x8, %o1 | ||
167 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) | ||
168 | add %o1, 0x8, %o1 | ||
169 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) | ||
170 | add %o1, 0x8, %o1 | ||
171 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) | ||
172 | add %o1, 32, %o1 | ||
173 | LOAD(prefetch, %o1, #one_read) | ||
174 | sub %o1, 32 - 8, %o1 | ||
175 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) | ||
176 | add %o1, 8, %o1 | ||
177 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) | ||
178 | add %o1, 8, %o1 | ||
179 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) | ||
180 | add %o1, 8, %o1 | ||
181 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) | ||
182 | subcc %o4, 64, %o4 | ||
183 | bne,pt %XCC, 1b | ||
184 | add %o0, 64, %o0 | ||
185 | |||
186 | #undef SWIVEL_ONE_DWORD | ||
187 | |||
188 | srl %g1, 3, %g1 | ||
189 | ba,pt %XCC, 60f | ||
190 | add %o1, %g1, %o1 | ||
191 | |||
192 | 10: /* Destination is 64-byte aligned, source was only 8-byte | ||
193 | * aligned but it has been subtracted by 8 and we perform | ||
194 | * one twin load ahead, then add 8 back into source when | ||
195 | * we finish the loop. | ||
196 | */ | ||
197 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
198 | 1: add %o1, 16, %o1 | ||
199 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
200 | add %o1, 16 + 32, %o1 | ||
201 | LOAD(prefetch, %o1, #one_read) | ||
202 | sub %o1, 32, %o1 | ||
203 | EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line | ||
204 | EX_ST(STORE_INIT(%g2, %o0 + 0x08)) | ||
205 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
206 | add %o1, 16, %o1 | ||
207 | EX_ST(STORE_INIT(%g3, %o0 + 0x10)) | ||
208 | EX_ST(STORE_INIT(%o4, %o0 + 0x18)) | ||
209 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
210 | add %o1, 16, %o1 | ||
211 | EX_ST(STORE_INIT(%o5, %o0 + 0x20)) | ||
212 | EX_ST(STORE_INIT(%g2, %o0 + 0x28)) | ||
213 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
214 | EX_ST(STORE_INIT(%g3, %o0 + 0x30)) | ||
215 | EX_ST(STORE_INIT(%o4, %o0 + 0x38)) | ||
216 | subcc %g1, 64, %g1 | ||
217 | bne,pt %XCC, 1b | ||
218 | add %o0, 64, %o0 | ||
219 | |||
220 | ba,pt %XCC, 60f | ||
221 | add %o1, 0x8, %o1 | ||
222 | |||
223 | 50: /* Destination is 64-byte aligned, and source is 16-byte | ||
224 | * aligned. | ||
225 | */ | ||
226 | 1: EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
227 | add %o1, 16, %o1 | ||
228 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
229 | add %o1, 16 + 32, %o1 | ||
230 | LOAD(prefetch, %o1, #one_read) | ||
231 | sub %o1, 32, %o1 | ||
232 | EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line | ||
233 | EX_ST(STORE_INIT(%o5, %o0 + 0x08)) | ||
234 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
235 | add %o1, 16, %o1 | ||
236 | EX_ST(STORE_INIT(%g2, %o0 + 0x10)) | ||
237 | EX_ST(STORE_INIT(%g3, %o0 + 0x18)) | ||
238 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
239 | add %o1, 16, %o1 | ||
240 | EX_ST(STORE_INIT(%o4, %o0 + 0x20)) | ||
241 | EX_ST(STORE_INIT(%o5, %o0 + 0x28)) | ||
242 | EX_ST(STORE_INIT(%g2, %o0 + 0x30)) | ||
243 | EX_ST(STORE_INIT(%g3, %o0 + 0x38)) | ||
244 | subcc %g1, 64, %g1 | ||
245 | bne,pt %XCC, 1b | ||
246 | add %o0, 64, %o0 | ||
247 | /* fall through */ | ||
248 | |||
249 | 60: | ||
250 | /* %o2 contains any final bytes still needed to be copied | ||
251 | * over. If anything is left, we copy it one byte at a time. | ||
252 | */ | ||
253 | RESTORE_ASI(%o3) | ||
254 | brz,pt %o2, 85f | ||
255 | sub %o0, %o1, %o3 | ||
256 | ba,a,pt %XCC, 90f | ||
257 | |||
258 | .align 64 | ||
259 | 70: /* 16 < len <= 64 */ | ||
260 | bne,pn %XCC, 75f | ||
261 | sub %o0, %o1, %o3 | ||
262 | |||
263 | 72: | ||
264 | andn %o2, 0xf, %o4 | ||
265 | and %o2, 0xf, %o2 | ||
266 | 1: subcc %o4, 0x10, %o4 | ||
267 | EX_LD(LOAD(ldx, %o1, %o5)) | ||
268 | add %o1, 0x08, %o1 | ||
269 | EX_LD(LOAD(ldx, %o1, %g1)) | ||
270 | sub %o1, 0x08, %o1 | ||
271 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
272 | add %o1, 0x8, %o1 | ||
273 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | ||
274 | bgu,pt %XCC, 1b | ||
275 | add %o1, 0x8, %o1 | ||
276 | 73: andcc %o2, 0x8, %g0 | ||
277 | be,pt %XCC, 1f | ||
278 | nop | ||
279 | sub %o2, 0x8, %o2 | ||
280 | EX_LD(LOAD(ldx, %o1, %o5)) | ||
281 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
282 | add %o1, 0x8, %o1 | ||
283 | 1: andcc %o2, 0x4, %g0 | ||
284 | be,pt %XCC, 1f | ||
285 | nop | ||
286 | sub %o2, 0x4, %o2 | ||
287 | EX_LD(LOAD(lduw, %o1, %o5)) | ||
288 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
289 | add %o1, 0x4, %o1 | ||
290 | 1: cmp %o2, 0 | ||
291 | be,pt %XCC, 85f | ||
292 | nop | ||
293 | ba,pt %xcc, 90f | ||
294 | nop | ||
295 | |||
296 | 75: | ||
297 | andcc %o0, 0x7, %g1 | ||
298 | sub %g1, 0x8, %g1 | ||
299 | be,pn %icc, 2f | ||
300 | sub %g0, %g1, %g1 | ||
301 | sub %o2, %g1, %o2 | ||
302 | |||
303 | 1: subcc %g1, 1, %g1 | ||
304 | EX_LD(LOAD(ldub, %o1, %o5)) | ||
305 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
306 | bgu,pt %icc, 1b | ||
307 | add %o1, 1, %o1 | ||
308 | |||
309 | 2: add %o1, %o3, %o0 | ||
310 | andcc %o1, 0x7, %g1 | ||
311 | bne,pt %icc, 8f | ||
312 | sll %g1, 3, %g1 | ||
313 | |||
314 | cmp %o2, 16 | ||
315 | bgeu,pt %icc, 72b | ||
316 | nop | ||
317 | ba,a,pt %xcc, 73b | ||
318 | |||
319 | 8: mov 64, %o3 | ||
320 | andn %o1, 0x7, %o1 | ||
321 | EX_LD(LOAD(ldx, %o1, %g2)) | ||
322 | sub %o3, %g1, %o3 | ||
323 | andn %o2, 0x7, %o4 | ||
324 | sllx %g2, %g1, %g2 | ||
325 | 1: add %o1, 0x8, %o1 | ||
326 | EX_LD(LOAD(ldx, %o1, %g3)) | ||
327 | subcc %o4, 0x8, %o4 | ||
328 | srlx %g3, %o3, %o5 | ||
329 | or %o5, %g2, %o5 | ||
330 | EX_ST(STORE(stx, %o5, %o0)) | ||
331 | add %o0, 0x8, %o0 | ||
332 | bgu,pt %icc, 1b | ||
333 | sllx %g3, %g1, %g2 | ||
334 | |||
335 | srl %g1, 3, %g1 | ||
336 | andcc %o2, 0x7, %o2 | ||
337 | be,pn %icc, 85f | ||
338 | add %o1, %g1, %o1 | ||
339 | ba,pt %xcc, 90f | ||
340 | sub %o0, %o1, %o3 | ||
341 | |||
342 | .align 64 | ||
343 | 80: /* 0 < len <= 16 */ | ||
344 | andcc %o3, 0x3, %g0 | ||
345 | bne,pn %XCC, 90f | ||
346 | sub %o0, %o1, %o3 | ||
347 | |||
348 | 1: | ||
349 | subcc %o2, 4, %o2 | ||
350 | EX_LD(LOAD(lduw, %o1, %g1)) | ||
351 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | ||
352 | bgu,pt %XCC, 1b | ||
353 | add %o1, 4, %o1 | ||
354 | |||
355 | 85: retl | ||
356 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | ||
357 | |||
358 | .align 32 | ||
359 | 90: | ||
360 | subcc %o2, 1, %o2 | ||
361 | EX_LD(LOAD(ldub, %o1, %g1)) | ||
362 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | ||
363 | bgu,pt %XCC, 90b | ||
364 | add %o1, 1, %o1 | ||
365 | retl | ||
366 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | ||
367 | |||
368 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc64/lib/NGpage.S b/arch/sparc64/lib/NGpage.S new file mode 100644 index 000000000000..7d7c3bb8dcbf --- /dev/null +++ b/arch/sparc64/lib/NGpage.S | |||
@@ -0,0 +1,96 @@ | |||
1 | /* NGpage.S: Niagara optimize clear and copy page. | ||
2 | * | ||
3 | * Copyright (C) 2006 (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | #include <asm/page.h> | ||
8 | |||
9 | .text | ||
10 | .align 32 | ||
11 | |||
12 | /* This is heavily simplified from the sun4u variants | ||
13 | * because Niagara does not have any D-cache aliasing issues | ||
14 | * and also we don't need to use the FPU in order to implement | ||
15 | * an optimal page copy/clear. | ||
16 | */ | ||
17 | |||
18 | NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | ||
19 | prefetch [%o1 + 0x00], #one_read | ||
20 | mov 8, %g1 | ||
21 | mov 16, %g2 | ||
22 | mov 24, %g3 | ||
23 | set PAGE_SIZE, %g7 | ||
24 | |||
25 | 1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 | ||
26 | ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 | ||
27 | prefetch [%o1 + 0x40], #one_read | ||
28 | add %o1, 32, %o1 | ||
29 | stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
30 | stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | ||
31 | ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 | ||
32 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
33 | stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
34 | ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 | ||
35 | add %o1, 32, %o1 | ||
36 | add %o0, 32, %o0 | ||
37 | stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
38 | stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | ||
39 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
40 | stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
41 | subcc %g7, 64, %g7 | ||
42 | bne,pt %xcc, 1b | ||
43 | add %o0, 32, %o0 | ||
44 | retl | ||
45 | nop | ||
46 | |||
47 | NGclear_page: /* %o0=dest */ | ||
48 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ | ||
49 | mov 8, %g1 | ||
50 | mov 16, %g2 | ||
51 | mov 24, %g3 | ||
52 | set PAGE_SIZE, %g7 | ||
53 | |||
54 | 1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
55 | stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | ||
56 | stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
57 | stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
58 | add %o0, 32, %o0 | ||
59 | stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
60 | stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | ||
61 | stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
62 | stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
63 | subcc %g7, 64, %g7 | ||
64 | bne,pt %xcc, 1b | ||
65 | add %o0, 32, %o0 | ||
66 | retl | ||
67 | nop | ||
68 | |||
69 | #define BRANCH_ALWAYS 0x10680000 | ||
70 | #define NOP 0x01000000 | ||
71 | #define NG_DO_PATCH(OLD, NEW) \ | ||
72 | sethi %hi(NEW), %g1; \ | ||
73 | or %g1, %lo(NEW), %g1; \ | ||
74 | sethi %hi(OLD), %g2; \ | ||
75 | or %g2, %lo(OLD), %g2; \ | ||
76 | sub %g1, %g2, %g1; \ | ||
77 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
78 | sll %g1, 11, %g1; \ | ||
79 | srl %g1, 11 + 2, %g1; \ | ||
80 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
81 | or %g3, %g1, %g3; \ | ||
82 | stw %g3, [%g2]; \ | ||
83 | sethi %hi(NOP), %g3; \ | ||
84 | or %g3, %lo(NOP), %g3; \ | ||
85 | stw %g3, [%g2 + 0x4]; \ | ||
86 | flush %g2; | ||
87 | |||
88 | .globl niagara_patch_pageops | ||
89 | .type niagara_patch_pageops,#function | ||
90 | niagara_patch_pageops: | ||
91 | NG_DO_PATCH(copy_user_page, NGcopy_user_page) | ||
92 | NG_DO_PATCH(_clear_page, NGclear_page) | ||
93 | NG_DO_PATCH(clear_user_page, NGclear_user_page) | ||
94 | retl | ||
95 | nop | ||
96 | .size niagara_patch_pageops,.-niagara_patch_pageops | ||
diff --git a/arch/sparc64/lib/NGpatch.S b/arch/sparc64/lib/NGpatch.S new file mode 100644 index 000000000000..3b0674fc3366 --- /dev/null +++ b/arch/sparc64/lib/NGpatch.S | |||
@@ -0,0 +1,33 @@ | |||
1 | /* NGpatch.S: Patch Ultra-I routines with Niagara variant. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define BRANCH_ALWAYS 0x10680000 | ||
7 | #define NOP 0x01000000 | ||
8 | #define NG_DO_PATCH(OLD, NEW) \ | ||
9 | sethi %hi(NEW), %g1; \ | ||
10 | or %g1, %lo(NEW), %g1; \ | ||
11 | sethi %hi(OLD), %g2; \ | ||
12 | or %g2, %lo(OLD), %g2; \ | ||
13 | sub %g1, %g2, %g1; \ | ||
14 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
15 | sll %g1, 11, %g1; \ | ||
16 | srl %g1, 11 + 2, %g1; \ | ||
17 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
18 | or %g3, %g1, %g3; \ | ||
19 | stw %g3, [%g2]; \ | ||
20 | sethi %hi(NOP), %g3; \ | ||
21 | or %g3, %lo(NOP), %g3; \ | ||
22 | stw %g3, [%g2 + 0x4]; \ | ||
23 | flush %g2; | ||
24 | |||
25 | .globl niagara_patch_copyops | ||
26 | .type niagara_patch_copyops,#function | ||
27 | niagara_patch_copyops: | ||
28 | NG_DO_PATCH(memcpy, NGmemcpy) | ||
29 | NG_DO_PATCH(___copy_from_user, NGcopy_from_user) | ||
30 | NG_DO_PATCH(___copy_to_user, NGcopy_to_user) | ||
31 | retl | ||
32 | nop | ||
33 | .size niagara_patch_copyops,.-niagara_patch_copyops | ||
diff --git a/arch/sparc64/lib/U3patch.S b/arch/sparc64/lib/U3patch.S index e2b6c5e4b95a..ecc302619a6e 100644 --- a/arch/sparc64/lib/U3patch.S +++ b/arch/sparc64/lib/U3patch.S | |||
@@ -12,7 +12,8 @@ | |||
12 | or %g2, %lo(OLD), %g2; \ | 12 | or %g2, %lo(OLD), %g2; \ |
13 | sub %g1, %g2, %g1; \ | 13 | sub %g1, %g2, %g1; \ |
14 | sethi %hi(BRANCH_ALWAYS), %g3; \ | 14 | sethi %hi(BRANCH_ALWAYS), %g3; \ |
15 | srl %g1, 2, %g1; \ | 15 | sll %g1, 11, %g1; \ |
16 | srl %g1, 11 + 2, %g1; \ | ||
16 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | 17 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ |
17 | or %g3, %g1, %g3; \ | 18 | or %g3, %g1, %g3; \ |
18 | stw %g3, [%g2]; \ | 19 | stw %g3, [%g2]; \ |
diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S index 1d2abcfa4e52..c7bbae8c590f 100644 --- a/arch/sparc64/lib/bzero.S +++ b/arch/sparc64/lib/bzero.S | |||
@@ -98,12 +98,12 @@ __bzero_done: | |||
98 | .text; \ | 98 | .text; \ |
99 | .align 4; | 99 | .align 4; |
100 | 100 | ||
101 | .globl __bzero_noasi | 101 | .globl __clear_user |
102 | .type __bzero_noasi, #function | 102 | .type __clear_user, #function |
103 | __bzero_noasi: /* %o0=buf, %o1=len */ | 103 | __clear_user: /* %o0=buf, %o1=len */ |
104 | brz,pn %o1, __bzero_noasi_done | 104 | brz,pn %o1, __clear_user_done |
105 | cmp %o1, 16 | 105 | cmp %o1, 16 |
106 | bl,pn %icc, __bzero_noasi_tiny | 106 | bl,pn %icc, __clear_user_tiny |
107 | EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes) | 107 | EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes) |
108 | andcc %o0, 0x3, %g0 | 108 | andcc %o0, 0x3, %g0 |
109 | be,pt %icc, 2f | 109 | be,pt %icc, 2f |
@@ -145,14 +145,14 @@ __bzero_noasi: /* %o0=buf, %o1=len */ | |||
145 | subcc %g1, 8, %g1 | 145 | subcc %g1, 8, %g1 |
146 | bne,pt %icc, 5b | 146 | bne,pt %icc, 5b |
147 | add %o0, 0x8, %o0 | 147 | add %o0, 0x8, %o0 |
148 | 6: brz,pt %o1, __bzero_noasi_done | 148 | 6: brz,pt %o1, __clear_user_done |
149 | nop | 149 | nop |
150 | __bzero_noasi_tiny: | 150 | __clear_user_tiny: |
151 | 1: EX_ST(stba %g0, [%o0 + 0x00] %asi) | 151 | 1: EX_ST(stba %g0, [%o0 + 0x00] %asi) |
152 | subcc %o1, 1, %o1 | 152 | subcc %o1, 1, %o1 |
153 | bne,pt %icc, 1b | 153 | bne,pt %icc, 1b |
154 | add %o0, 1, %o0 | 154 | add %o0, 1, %o0 |
155 | __bzero_noasi_done: | 155 | __clear_user_done: |
156 | retl | 156 | retl |
157 | clr %o0 | 157 | clr %o0 |
158 | .size __bzero_noasi, .-__bzero_noasi | 158 | .size __clear_user, .-__clear_user |
diff --git a/arch/sparc64/lib/clear_page.S b/arch/sparc64/lib/clear_page.S index b59884ef051d..77e531f6c2a7 100644 --- a/arch/sparc64/lib/clear_page.S +++ b/arch/sparc64/lib/clear_page.S | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <asm/page.h> | 9 | #include <asm/page.h> |
10 | #include <asm/pgtable.h> | 10 | #include <asm/pgtable.h> |
11 | #include <asm/spitfire.h> | 11 | #include <asm/spitfire.h> |
12 | #include <asm/head.h> | ||
12 | 13 | ||
13 | /* What we used to do was lock a TLB entry into a specific | 14 | /* What we used to do was lock a TLB entry into a specific |
14 | * TLB slot, clear the page with interrupts disabled, then | 15 | * TLB slot, clear the page with interrupts disabled, then |
@@ -22,9 +23,6 @@ | |||
22 | * disable preemption during the clear. | 23 | * disable preemption during the clear. |
23 | */ | 24 | */ |
24 | 25 | ||
25 | #define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) | ||
26 | #define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) | ||
27 | |||
28 | .text | 26 | .text |
29 | 27 | ||
30 | .globl _clear_page | 28 | .globl _clear_page |
@@ -43,12 +41,11 @@ clear_user_page: /* %o0=dest, %o1=vaddr */ | |||
43 | sethi %hi(PAGE_SIZE), %o4 | 41 | sethi %hi(PAGE_SIZE), %o4 |
44 | 42 | ||
45 | sllx %g2, 32, %g2 | 43 | sllx %g2, 32, %g2 |
46 | sethi %uhi(TTE_BITS_TOP), %g3 | 44 | sethi %hi(PAGE_KERNEL_LOCKED), %g3 |
47 | 45 | ||
48 | sllx %g3, 32, %g3 | 46 | ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 |
49 | sub %o0, %g2, %g1 ! paddr | 47 | sub %o0, %g2, %g1 ! paddr |
50 | 48 | ||
51 | or %g3, TTE_BITS_BOTTOM, %g3 | ||
52 | and %o1, %o4, %o0 ! vaddr D-cache alias bit | 49 | and %o1, %o4, %o0 ! vaddr D-cache alias bit |
53 | 50 | ||
54 | or %g1, %g3, %g1 ! TTE data | 51 | or %g1, %g3, %g1 ! TTE data |
@@ -66,7 +63,8 @@ clear_user_page: /* %o0=dest, %o1=vaddr */ | |||
66 | wrpr %o4, PSTATE_IE, %pstate | 63 | wrpr %o4, PSTATE_IE, %pstate |
67 | stxa %o0, [%g3] ASI_DMMU | 64 | stxa %o0, [%g3] ASI_DMMU |
68 | stxa %g1, [%g0] ASI_DTLB_DATA_IN | 65 | stxa %g1, [%g0] ASI_DTLB_DATA_IN |
69 | flush %g6 | 66 | sethi %hi(KERNBASE), %g1 |
67 | flush %g1 | ||
70 | wrpr %o4, 0x0, %pstate | 68 | wrpr %o4, 0x0, %pstate |
71 | 69 | ||
72 | mov 1, %o4 | 70 | mov 1, %o4 |
diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S index feebb14fd27a..37460666a5c3 100644 --- a/arch/sparc64/lib/copy_page.S +++ b/arch/sparc64/lib/copy_page.S | |||
@@ -23,8 +23,6 @@ | |||
23 | * disable preemption during the clear. | 23 | * disable preemption during the clear. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #define TTE_BITS_TOP (_PAGE_VALID | _PAGE_SZBITS) | ||
27 | #define TTE_BITS_BOTTOM (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W) | ||
28 | #define DCACHE_SIZE (PAGE_SIZE * 2) | 26 | #define DCACHE_SIZE (PAGE_SIZE * 2) |
29 | 27 | ||
30 | #if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) | 28 | #if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) |
@@ -52,13 +50,12 @@ copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | |||
52 | sethi %hi(PAGE_SIZE), %o3 | 50 | sethi %hi(PAGE_SIZE), %o3 |
53 | 51 | ||
54 | sllx %g2, 32, %g2 | 52 | sllx %g2, 32, %g2 |
55 | sethi %uhi(TTE_BITS_TOP), %g3 | 53 | sethi %hi(PAGE_KERNEL_LOCKED), %g3 |
56 | 54 | ||
57 | sllx %g3, 32, %g3 | 55 | ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 |
58 | sub %o0, %g2, %g1 ! dest paddr | 56 | sub %o0, %g2, %g1 ! dest paddr |
59 | 57 | ||
60 | sub %o1, %g2, %g2 ! src paddr | 58 | sub %o1, %g2, %g2 ! src paddr |
61 | or %g3, TTE_BITS_BOTTOM, %g3 | ||
62 | 59 | ||
63 | and %o2, %o3, %o0 ! vaddr D-cache alias bit | 60 | and %o2, %o3, %o0 ! vaddr D-cache alias bit |
64 | or %g1, %g3, %g1 ! dest TTE data | 61 | or %g1, %g3, %g1 ! dest TTE data |
diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c index e8808727617a..fb27e54a03ee 100644 --- a/arch/sparc64/lib/delay.c +++ b/arch/sparc64/lib/delay.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* delay.c: Delay loops for sparc64 | 1 | /* delay.c: Delay loops for sparc64 |
2 | * | 2 | * |
3 | * Copyright (C) 2004 David S. Miller <davem@redhat.com> | 3 | * Copyright (C) 2004, 2006 David S. Miller <davem@davemloft.net> |
4 | * | 4 | * |
5 | * Based heavily upon x86 variant which is: | 5 | * Based heavily upon x86 variant which is: |
6 | * Copyright (C) 1993 Linus Torvalds | 6 | * Copyright (C) 1993 Linus Torvalds |
@@ -8,19 +8,16 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
11 | #include <asm/timer.h> | ||
11 | 12 | ||
12 | void __delay(unsigned long loops) | 13 | void __delay(unsigned long loops) |
13 | { | 14 | { |
14 | __asm__ __volatile__( | 15 | unsigned long bclock, now; |
15 | " b,pt %%xcc, 1f\n" | 16 | |
16 | " cmp %0, 0\n" | 17 | bclock = tick_ops->get_tick(); |
17 | " .align 32\n" | 18 | do { |
18 | "1:\n" | 19 | now = tick_ops->get_tick(); |
19 | " bne,pt %%xcc, 1b\n" | 20 | } while ((now-bclock) < loops); |
20 | " subcc %0, 1, %0\n" | ||
21 | : "=&r" (loops) | ||
22 | : "0" (loops) | ||
23 | : "cc"); | ||
24 | } | 21 | } |
25 | 22 | ||
26 | /* We used to multiply by HZ after shifting down by 32 bits | 23 | /* We used to multiply by HZ after shifting down by 32 bits |
diff --git a/arch/sparc64/lib/xor.S b/arch/sparc64/lib/xor.S index 4cd5d2be1ae1..a79c8888170d 100644 --- a/arch/sparc64/lib/xor.S +++ b/arch/sparc64/lib/xor.S | |||
@@ -2,9 +2,10 @@ | |||
2 | * arch/sparc64/lib/xor.S | 2 | * arch/sparc64/lib/xor.S |
3 | * | 3 | * |
4 | * High speed xor_block operation for RAID4/5 utilizing the | 4 | * High speed xor_block operation for RAID4/5 utilizing the |
5 | * UltraSparc Visual Instruction Set. | 5 | * UltraSparc Visual Instruction Set and Niagara store-init/twin-load. |
6 | * | 6 | * |
7 | * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) | 7 | * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) |
8 | * Copyright (C) 2006 David S. Miller <davem@davemloft.net> | ||
8 | */ | 9 | */ |
9 | 10 | ||
10 | #include <asm/visasm.h> | 11 | #include <asm/visasm.h> |
@@ -19,6 +20,8 @@ | |||
19 | */ | 20 | */ |
20 | .text | 21 | .text |
21 | .align 32 | 22 | .align 32 |
23 | |||
24 | /* VIS versions. */ | ||
22 | .globl xor_vis_2 | 25 | .globl xor_vis_2 |
23 | .type xor_vis_2,#function | 26 | .type xor_vis_2,#function |
24 | xor_vis_2: | 27 | xor_vis_2: |
@@ -352,3 +355,298 @@ xor_vis_5: | |||
352 | ret | 355 | ret |
353 | restore | 356 | restore |
354 | .size xor_vis_5, .-xor_vis_5 | 357 | .size xor_vis_5, .-xor_vis_5 |
358 | |||
359 | /* Niagara versions. */ | ||
360 | .globl xor_niagara_2 | ||
361 | .type xor_niagara_2,#function | ||
362 | xor_niagara_2: /* %o0=bytes, %o1=dest, %o2=src */ | ||
363 | save %sp, -192, %sp | ||
364 | prefetch [%i1], #n_writes | ||
365 | prefetch [%i2], #one_read | ||
366 | rd %asi, %g7 | ||
367 | wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi | ||
368 | srlx %i0, 6, %g1 | ||
369 | mov %i1, %i0 | ||
370 | mov %i2, %i1 | ||
371 | 1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src + 0x00 */ | ||
372 | ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src + 0x10 */ | ||
373 | ldda [%i1 + 0x20] %asi, %g2 /* %g2/%g3 = src + 0x20 */ | ||
374 | ldda [%i1 + 0x30] %asi, %l0 /* %l0/%l1 = src + 0x30 */ | ||
375 | prefetch [%i1 + 0x40], #one_read | ||
376 | ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ | ||
377 | ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ | ||
378 | ldda [%i0 + 0x20] %asi, %o4 /* %o4/%o5 = dest + 0x20 */ | ||
379 | ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ | ||
380 | prefetch [%i0 + 0x40], #n_writes | ||
381 | xor %o0, %i2, %o0 | ||
382 | xor %o1, %i3, %o1 | ||
383 | stxa %o0, [%i0 + 0x00] %asi | ||
384 | stxa %o1, [%i0 + 0x08] %asi | ||
385 | xor %o2, %i4, %o2 | ||
386 | xor %o3, %i5, %o3 | ||
387 | stxa %o2, [%i0 + 0x10] %asi | ||
388 | stxa %o3, [%i0 + 0x18] %asi | ||
389 | xor %o4, %g2, %o4 | ||
390 | xor %o5, %g3, %o5 | ||
391 | stxa %o4, [%i0 + 0x20] %asi | ||
392 | stxa %o5, [%i0 + 0x28] %asi | ||
393 | xor %l2, %l0, %l2 | ||
394 | xor %l3, %l1, %l3 | ||
395 | stxa %l2, [%i0 + 0x30] %asi | ||
396 | stxa %l3, [%i0 + 0x38] %asi | ||
397 | add %i0, 0x40, %i0 | ||
398 | subcc %g1, 1, %g1 | ||
399 | bne,pt %xcc, 1b | ||
400 | add %i1, 0x40, %i1 | ||
401 | membar #Sync | ||
402 | wr %g7, 0x0, %asi | ||
403 | ret | ||
404 | restore | ||
405 | .size xor_niagara_2, .-xor_niagara_2 | ||
406 | |||
407 | .globl xor_niagara_3 | ||
408 | .type xor_niagara_3,#function | ||
409 | xor_niagara_3: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ | ||
410 | save %sp, -192, %sp | ||
411 | prefetch [%i1], #n_writes | ||
412 | prefetch [%i2], #one_read | ||
413 | prefetch [%i3], #one_read | ||
414 | rd %asi, %g7 | ||
415 | wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi | ||
416 | srlx %i0, 6, %g1 | ||
417 | mov %i1, %i0 | ||
418 | mov %i2, %i1 | ||
419 | mov %i3, %l7 | ||
420 | 1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ | ||
421 | ldda [%i1 + 0x10] %asi, %i4 /* %i4/%i5 = src1 + 0x10 */ | ||
422 | ldda [%l7 + 0x00] %asi, %g2 /* %g2/%g3 = src2 + 0x00 */ | ||
423 | ldda [%l7 + 0x10] %asi, %l0 /* %l0/%l1 = src2 + 0x10 */ | ||
424 | ldda [%i0 + 0x00] %asi, %o0 /* %o0/%o1 = dest + 0x00 */ | ||
425 | ldda [%i0 + 0x10] %asi, %o2 /* %o2/%o3 = dest + 0x10 */ | ||
426 | xor %g2, %i2, %g2 | ||
427 | xor %g3, %i3, %g3 | ||
428 | xor %o0, %g2, %o0 | ||
429 | xor %o1, %g3, %o1 | ||
430 | stxa %o0, [%i0 + 0x00] %asi | ||
431 | stxa %o1, [%i0 + 0x08] %asi | ||
432 | ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ | ||
433 | ldda [%l7 + 0x20] %asi, %g2 /* %g2/%g3 = src2 + 0x20 */ | ||
434 | ldda [%i0 + 0x20] %asi, %o0 /* %o0/%o1 = dest + 0x20 */ | ||
435 | xor %l0, %i4, %l0 | ||
436 | xor %l1, %i5, %l1 | ||
437 | xor %o2, %l0, %o2 | ||
438 | xor %o3, %l1, %o3 | ||
439 | stxa %o2, [%i0 + 0x10] %asi | ||
440 | stxa %o3, [%i0 + 0x18] %asi | ||
441 | ldda [%i1 + 0x30] %asi, %i4 /* %i4/%i5 = src1 + 0x30 */ | ||
442 | ldda [%l7 + 0x30] %asi, %l0 /* %l0/%l1 = src2 + 0x30 */ | ||
443 | ldda [%i0 + 0x30] %asi, %o2 /* %o2/%o3 = dest + 0x30 */ | ||
444 | prefetch [%i1 + 0x40], #one_read | ||
445 | prefetch [%l7 + 0x40], #one_read | ||
446 | prefetch [%i0 + 0x40], #n_writes | ||
447 | xor %g2, %i2, %g2 | ||
448 | xor %g3, %i3, %g3 | ||
449 | xor %o0, %g2, %o0 | ||
450 | xor %o1, %g3, %o1 | ||
451 | stxa %o0, [%i0 + 0x20] %asi | ||
452 | stxa %o1, [%i0 + 0x28] %asi | ||
453 | xor %l0, %i4, %l0 | ||
454 | xor %l1, %i5, %l1 | ||
455 | xor %o2, %l0, %o2 | ||
456 | xor %o3, %l1, %o3 | ||
457 | stxa %o2, [%i0 + 0x30] %asi | ||
458 | stxa %o3, [%i0 + 0x38] %asi | ||
459 | add %i0, 0x40, %i0 | ||
460 | add %i1, 0x40, %i1 | ||
461 | subcc %g1, 1, %g1 | ||
462 | bne,pt %xcc, 1b | ||
463 | add %l7, 0x40, %l7 | ||
464 | membar #Sync | ||
465 | wr %g7, 0x0, %asi | ||
466 | ret | ||
467 | restore | ||
468 | .size xor_niagara_3, .-xor_niagara_3 | ||
469 | |||
470 | .globl xor_niagara_4 | ||
471 | .type xor_niagara_4,#function | ||
472 | xor_niagara_4: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ | ||
473 | save %sp, -192, %sp | ||
474 | prefetch [%i1], #n_writes | ||
475 | prefetch [%i2], #one_read | ||
476 | prefetch [%i3], #one_read | ||
477 | prefetch [%i4], #one_read | ||
478 | rd %asi, %g7 | ||
479 | wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi | ||
480 | srlx %i0, 6, %g1 | ||
481 | mov %i1, %i0 | ||
482 | mov %i2, %i1 | ||
483 | mov %i3, %l7 | ||
484 | mov %i4, %l6 | ||
485 | 1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ | ||
486 | ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ | ||
487 | ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ | ||
488 | ldda [%i0 + 0x00] %asi, %l0 /* %l0/%l1 = dest + 0x00 */ | ||
489 | xor %i4, %i2, %i4 | ||
490 | xor %i5, %i3, %i5 | ||
491 | ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ | ||
492 | xor %g2, %i4, %g2 | ||
493 | xor %g3, %i5, %g3 | ||
494 | ldda [%i7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ | ||
495 | xor %l0, %g2, %l0 | ||
496 | xor %l1, %g3, %l1 | ||
497 | stxa %l0, [%i0 + 0x00] %asi | ||
498 | stxa %l1, [%i0 + 0x08] %asi | ||
499 | ldda [%i6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ | ||
500 | ldda [%i0 + 0x10] %asi, %l0 /* %l0/%l1 = dest + 0x10 */ | ||
501 | |||
502 | xor %i4, %i2, %i4 | ||
503 | xor %i5, %i3, %i5 | ||
504 | ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ | ||
505 | xor %g2, %i4, %g2 | ||
506 | xor %g3, %i5, %g3 | ||
507 | ldda [%i7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ | ||
508 | xor %l0, %g2, %l0 | ||
509 | xor %l1, %g3, %l1 | ||
510 | stxa %l0, [%i0 + 0x10] %asi | ||
511 | stxa %l1, [%i0 + 0x18] %asi | ||
512 | ldda [%i6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ | ||
513 | ldda [%i0 + 0x20] %asi, %l0 /* %l0/%l1 = dest + 0x20 */ | ||
514 | |||
515 | xor %i4, %i2, %i4 | ||
516 | xor %i5, %i3, %i5 | ||
517 | ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ | ||
518 | xor %g2, %i4, %g2 | ||
519 | xor %g3, %i5, %g3 | ||
520 | ldda [%i7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ | ||
521 | xor %l0, %g2, %l0 | ||
522 | xor %l1, %g3, %l1 | ||
523 | stxa %l0, [%i0 + 0x20] %asi | ||
524 | stxa %l1, [%i0 + 0x28] %asi | ||
525 | ldda [%i6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ | ||
526 | ldda [%i0 + 0x30] %asi, %l0 /* %l0/%l1 = dest + 0x30 */ | ||
527 | |||
528 | prefetch [%i1 + 0x40], #one_read | ||
529 | prefetch [%l7 + 0x40], #one_read | ||
530 | prefetch [%l6 + 0x40], #one_read | ||
531 | prefetch [%i0 + 0x40], #n_writes | ||
532 | |||
533 | xor %i4, %i2, %i4 | ||
534 | xor %i5, %i3, %i5 | ||
535 | xor %g2, %i4, %g2 | ||
536 | xor %g3, %i5, %g3 | ||
537 | xor %l0, %g2, %l0 | ||
538 | xor %l1, %g3, %l1 | ||
539 | stxa %l0, [%i0 + 0x30] %asi | ||
540 | stxa %l1, [%i0 + 0x38] %asi | ||
541 | |||
542 | add %i0, 0x40, %i0 | ||
543 | add %i1, 0x40, %i1 | ||
544 | add %l7, 0x40, %l7 | ||
545 | subcc %g1, 1, %g1 | ||
546 | bne,pt %xcc, 1b | ||
547 | add %l6, 0x40, %l6 | ||
548 | membar #Sync | ||
549 | wr %g7, 0x0, %asi | ||
550 | ret | ||
551 | restore | ||
552 | .size xor_niagara_4, .-xor_niagara_4 | ||
553 | |||
554 | .globl xor_niagara_5 | ||
555 | .type xor_niagara_5,#function | ||
556 | xor_niagara_5: /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ | ||
557 | save %sp, -192, %sp | ||
558 | prefetch [%i1], #n_writes | ||
559 | prefetch [%i2], #one_read | ||
560 | prefetch [%i3], #one_read | ||
561 | prefetch [%i4], #one_read | ||
562 | prefetch [%i5], #one_read | ||
563 | rd %asi, %g7 | ||
564 | wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi | ||
565 | srlx %i0, 6, %g1 | ||
566 | mov %i1, %i0 | ||
567 | mov %i2, %i1 | ||
568 | mov %i3, %l7 | ||
569 | mov %i4, %l6 | ||
570 | mov %i5, %l5 | ||
571 | 1: ldda [%i1 + 0x00] %asi, %i2 /* %i2/%i3 = src1 + 0x00 */ | ||
572 | ldda [%l7 + 0x00] %asi, %i4 /* %i4/%i5 = src2 + 0x00 */ | ||
573 | ldda [%l6 + 0x00] %asi, %g2 /* %g2/%g3 = src3 + 0x00 */ | ||
574 | ldda [%l5 + 0x00] %asi, %l0 /* %l0/%l1 = src4 + 0x00 */ | ||
575 | ldda [%i0 + 0x00] %asi, %l2 /* %l2/%l3 = dest + 0x00 */ | ||
576 | xor %i4, %i2, %i4 | ||
577 | xor %i5, %i3, %i5 | ||
578 | ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */ | ||
579 | xor %g2, %i4, %g2 | ||
580 | xor %g3, %i5, %g3 | ||
581 | ldda [%l7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */ | ||
582 | xor %l0, %g2, %l0 | ||
583 | xor %l1, %g3, %l1 | ||
584 | ldda [%l6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */ | ||
585 | xor %l2, %l0, %l2 | ||
586 | xor %l3, %l1, %l3 | ||
587 | stxa %l2, [%i0 + 0x00] %asi | ||
588 | stxa %l3, [%i0 + 0x08] %asi | ||
589 | ldda [%l5 + 0x10] %asi, %l0 /* %l0/%l1 = src4 + 0x10 */ | ||
590 | ldda [%i0 + 0x10] %asi, %l2 /* %l2/%l3 = dest + 0x10 */ | ||
591 | |||
592 | xor %i4, %i2, %i4 | ||
593 | xor %i5, %i3, %i5 | ||
594 | ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */ | ||
595 | xor %g2, %i4, %g2 | ||
596 | xor %g3, %i5, %g3 | ||
597 | ldda [%l7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */ | ||
598 | xor %l0, %g2, %l0 | ||
599 | xor %l1, %g3, %l1 | ||
600 | ldda [%l6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */ | ||
601 | xor %l2, %l0, %l2 | ||
602 | xor %l3, %l1, %l3 | ||
603 | stxa %l2, [%i0 + 0x10] %asi | ||
604 | stxa %l3, [%i0 + 0x18] %asi | ||
605 | ldda [%l5 + 0x20] %asi, %l0 /* %l0/%l1 = src4 + 0x20 */ | ||
606 | ldda [%i0 + 0x20] %asi, %l2 /* %l2/%l3 = dest + 0x20 */ | ||
607 | |||
608 | xor %i4, %i2, %i4 | ||
609 | xor %i5, %i3, %i5 | ||
610 | ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */ | ||
611 | xor %g2, %i4, %g2 | ||
612 | xor %g3, %i5, %g3 | ||
613 | ldda [%l7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */ | ||
614 | xor %l0, %g2, %l0 | ||
615 | xor %l1, %g3, %l1 | ||
616 | ldda [%l6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */ | ||
617 | xor %l2, %l0, %l2 | ||
618 | xor %l3, %l1, %l3 | ||
619 | stxa %l2, [%i0 + 0x20] %asi | ||
620 | stxa %l3, [%i0 + 0x28] %asi | ||
621 | ldda [%l5 + 0x30] %asi, %l0 /* %l0/%l1 = src4 + 0x30 */ | ||
622 | ldda [%i0 + 0x30] %asi, %l2 /* %l2/%l3 = dest + 0x30 */ | ||
623 | |||
624 | prefetch [%i1 + 0x40], #one_read | ||
625 | prefetch [%l7 + 0x40], #one_read | ||
626 | prefetch [%l6 + 0x40], #one_read | ||
627 | prefetch [%l5 + 0x40], #one_read | ||
628 | prefetch [%i0 + 0x40], #n_writes | ||
629 | |||
630 | xor %i4, %i2, %i4 | ||
631 | xor %i5, %i3, %i5 | ||
632 | xor %g2, %i4, %g2 | ||
633 | xor %g3, %i5, %g3 | ||
634 | xor %l0, %g2, %l0 | ||
635 | xor %l1, %g3, %l1 | ||
636 | xor %l2, %l0, %l2 | ||
637 | xor %l3, %l1, %l3 | ||
638 | stxa %l2, [%i0 + 0x30] %asi | ||
639 | stxa %l3, [%i0 + 0x38] %asi | ||
640 | |||
641 | add %i0, 0x40, %i0 | ||
642 | add %i1, 0x40, %i1 | ||
643 | add %l7, 0x40, %l7 | ||
644 | add %l6, 0x40, %l6 | ||
645 | subcc %g1, 1, %g1 | ||
646 | bne,pt %xcc, 1b | ||
647 | add %l5, 0x40, %l5 | ||
648 | membar #Sync | ||
649 | wr %g7, 0x0, %asi | ||
650 | ret | ||
651 | restore | ||
652 | .size xor_niagara_5, .-xor_niagara_5 | ||