diff options
author | Chris Zankel <czankel@tensilica.com> | 2005-06-24 01:01:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-24 03:05:21 -0400 |
commit | 249ac17e96811acc3c6402317dd5d5c89d2cbf68 (patch) | |
tree | 0a174065460de196861b85f1d9a48c88b2a2675a /arch/xtensa/lib/usercopy.S | |
parent | 5a0015d62668e64c8b6e02e360fbbea121bfd5e6 (diff) |
[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 4
The attached patches provides part 4 of an architecture implementation for the
Tensilica Xtensa CPU series.
Signed-off-by: Chris Zankel <chris@zankel.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/xtensa/lib/usercopy.S')
-rw-r--r-- | arch/xtensa/lib/usercopy.S | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S new file mode 100644 index 000000000000..265db2693cbd --- /dev/null +++ b/arch/xtensa/lib/usercopy.S | |||
@@ -0,0 +1,321 @@ | |||
1 | /* | ||
2 | * arch/xtensa/lib/usercopy.S | ||
3 | * | ||
4 | * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S) | ||
5 | * | ||
6 | * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>. | ||
7 | * It needs to remain separate and distinct. The hal files are part | ||
8 | * of the the Xtensa link-time HAL, and those files may differ per | ||
9 | * processor configuration. Patching the kernel for another | ||
10 | * processor configuration includes replacing the hal files, and we | ||
11 | * could loose the special functionality for accessing user-space | ||
12 | * memory during such a patch. We sacrifice a little code space here | ||
13 | * in favor to simplify code maintenance. | ||
14 | * | ||
15 | * This file is subject to the terms and conditions of the GNU General | ||
16 | * Public License. See the file "COPYING" in the main directory of | ||
17 | * this archive for more details. | ||
18 | * | ||
19 | * Copyright (C) 2002 Tensilica Inc. | ||
20 | */ | ||
21 | |||
22 | |||
23 | /* | ||
24 | * size_t __xtensa_copy_user (void *dst, const void *src, size_t len); | ||
25 | * | ||
26 | * The returned value is the number of bytes not copied. Implies zero | ||
27 | * is success. | ||
28 | * | ||
29 | * The general case algorithm is as follows: | ||
30 | * If the destination and source are both aligned, | ||
31 | * do 16B chunks with a loop, and then finish up with | ||
32 | * 8B, 4B, 2B, and 1B copies conditional on the length. | ||
33 | * If destination is aligned and source unaligned, | ||
34 | * do the same, but use SRC to align the source data. | ||
35 | * If destination is unaligned, align it by conditionally | ||
36 | * copying 1B and 2B and then retest. | ||
37 | * This code tries to use fall-through braches for the common | ||
38 | * case of aligned destinations (except for the branches to | ||
39 | * the alignment label). | ||
40 | * | ||
41 | * Register use: | ||
42 | * a0/ return address | ||
43 | * a1/ stack pointer | ||
44 | * a2/ return value | ||
45 | * a3/ src | ||
46 | * a4/ length | ||
47 | * a5/ dst | ||
48 | * a6/ tmp | ||
49 | * a7/ tmp | ||
50 | * a8/ tmp | ||
51 | * a9/ tmp | ||
52 | * a10/ tmp | ||
53 | * a11/ original length | ||
54 | */ | ||
55 | |||
56 | #include <xtensa/coreasm.h> | ||
57 | |||
58 | #ifdef __XTENSA_EB__ | ||
59 | #define ALIGN(R, W0, W1) src R, W0, W1 | ||
60 | #define SSA8(R) ssa8b R | ||
61 | #else | ||
62 | #define ALIGN(R, W0, W1) src R, W1, W0 | ||
63 | #define SSA8(R) ssa8l R | ||
64 | #endif | ||
65 | |||
66 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
67 | |||
68 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
69 | 9: insn reg1, reg2, offset; \ | ||
70 | .section __ex_table, "a"; \ | ||
71 | .word 9b, handler; \ | ||
72 | .previous | ||
73 | |||
74 | |||
75 | .text | ||
76 | .align 4 | ||
77 | .global __xtensa_copy_user | ||
78 | .type __xtensa_copy_user,@function | ||
79 | __xtensa_copy_user: | ||
80 | entry sp, 16 # minimal stack frame | ||
81 | # a2/ dst, a3/ src, a4/ len | ||
82 | mov a5, a2 # copy dst so that a2 is return value | ||
83 | mov a11, a4 # preserve original len for error case | ||
84 | .Lcommon: | ||
85 | bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 | ||
86 | bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 | ||
87 | .Ldstaligned: # return here from .Ldstunaligned when dst is aligned | ||
88 | srli a7, a4, 4 # number of loop iterations with 16B | ||
89 | # per iteration | ||
90 | movi a8, 3 # if source is also aligned, | ||
91 | bnone a3, a8, .Laligned # then use word copy | ||
92 | SSA8( a3) # set shift amount from byte offset | ||
93 | bnez a4, .Lsrcunaligned | ||
94 | movi a2, 0 # return success for len==0 | ||
95 | retw | ||
96 | |||
97 | /* | ||
98 | * Destination is unaligned | ||
99 | */ | ||
100 | |||
101 | .Ldst1mod2: # dst is only byte aligned | ||
102 | bltui a4, 7, .Lbytecopy # do short copies byte by byte | ||
103 | |||
104 | # copy 1 byte | ||
105 | EX(l8ui, a6, a3, 0, l_fixup) | ||
106 | addi a3, a3, 1 | ||
107 | EX(s8i, a6, a5, 0, s_fixup) | ||
108 | addi a5, a5, 1 | ||
109 | addi a4, a4, -1 | ||
110 | bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then | ||
111 | # return to main algorithm | ||
112 | .Ldst2mod4: # dst 16-bit aligned | ||
113 | # copy 2 bytes | ||
114 | bltui a4, 6, .Lbytecopy # do short copies byte by byte | ||
115 | EX(l8ui, a6, a3, 0, l_fixup) | ||
116 | EX(l8ui, a7, a3, 1, l_fixup) | ||
117 | addi a3, a3, 2 | ||
118 | EX(s8i, a6, a5, 0, s_fixup) | ||
119 | EX(s8i, a7, a5, 1, s_fixup) | ||
120 | addi a5, a5, 2 | ||
121 | addi a4, a4, -2 | ||
122 | j .Ldstaligned # dst is now aligned, return to main algorithm | ||
123 | |||
124 | /* | ||
125 | * Byte by byte copy | ||
126 | */ | ||
127 | .align 4 | ||
128 | .byte 0 # 1 mod 4 alignment for LOOPNEZ | ||
129 | # (0 mod 4 alignment for LBEG) | ||
130 | .Lbytecopy: | ||
131 | #if XCHAL_HAVE_LOOPS | ||
132 | loopnez a4, .Lbytecopydone | ||
133 | #else /* !XCHAL_HAVE_LOOPS */ | ||
134 | beqz a4, .Lbytecopydone | ||
135 | add a7, a3, a4 # a7 = end address for source | ||
136 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
137 | .Lnextbyte: | ||
138 | EX(l8ui, a6, a3, 0, l_fixup) | ||
139 | addi a3, a3, 1 | ||
140 | EX(s8i, a6, a5, 0, s_fixup) | ||
141 | addi a5, a5, 1 | ||
142 | #if !XCHAL_HAVE_LOOPS | ||
143 | blt a3, a7, .Lnextbyte | ||
144 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
145 | .Lbytecopydone: | ||
146 | movi a2, 0 # return success for len bytes copied | ||
147 | retw | ||
148 | |||
149 | /* | ||
150 | * Destination and source are word-aligned. | ||
151 | */ | ||
152 | # copy 16 bytes per iteration for word-aligned dst and word-aligned src | ||
153 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
154 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
155 | .Laligned: | ||
156 | #if XCHAL_HAVE_LOOPS | ||
157 | loopnez a7, .Loop1done | ||
158 | #else /* !XCHAL_HAVE_LOOPS */ | ||
159 | beqz a7, .Loop1done | ||
160 | slli a8, a7, 4 | ||
161 | add a8, a8, a3 # a8 = end of last 16B source chunk | ||
162 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
163 | .Loop1: | ||
164 | EX(l32i, a6, a3, 0, l_fixup) | ||
165 | EX(l32i, a7, a3, 4, l_fixup) | ||
166 | EX(s32i, a6, a5, 0, s_fixup) | ||
167 | EX(l32i, a6, a3, 8, l_fixup) | ||
168 | EX(s32i, a7, a5, 4, s_fixup) | ||
169 | EX(l32i, a7, a3, 12, l_fixup) | ||
170 | EX(s32i, a6, a5, 8, s_fixup) | ||
171 | addi a3, a3, 16 | ||
172 | EX(s32i, a7, a5, 12, s_fixup) | ||
173 | addi a5, a5, 16 | ||
174 | #if !XCHAL_HAVE_LOOPS | ||
175 | blt a3, a8, .Loop1 | ||
176 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
177 | .Loop1done: | ||
178 | bbci.l a4, 3, .L2 | ||
179 | # copy 8 bytes | ||
180 | EX(l32i, a6, a3, 0, l_fixup) | ||
181 | EX(l32i, a7, a3, 4, l_fixup) | ||
182 | addi a3, a3, 8 | ||
183 | EX(s32i, a6, a5, 0, s_fixup) | ||
184 | EX(s32i, a7, a5, 4, s_fixup) | ||
185 | addi a5, a5, 8 | ||
186 | .L2: | ||
187 | bbci.l a4, 2, .L3 | ||
188 | # copy 4 bytes | ||
189 | EX(l32i, a6, a3, 0, l_fixup) | ||
190 | addi a3, a3, 4 | ||
191 | EX(s32i, a6, a5, 0, s_fixup) | ||
192 | addi a5, a5, 4 | ||
193 | .L3: | ||
194 | bbci.l a4, 1, .L4 | ||
195 | # copy 2 bytes | ||
196 | EX(l16ui, a6, a3, 0, l_fixup) | ||
197 | addi a3, a3, 2 | ||
198 | EX(s16i, a6, a5, 0, s_fixup) | ||
199 | addi a5, a5, 2 | ||
200 | .L4: | ||
201 | bbci.l a4, 0, .L5 | ||
202 | # copy 1 byte | ||
203 | EX(l8ui, a6, a3, 0, l_fixup) | ||
204 | EX(s8i, a6, a5, 0, s_fixup) | ||
205 | .L5: | ||
206 | movi a2, 0 # return success for len bytes copied | ||
207 | retw | ||
208 | |||
209 | /* | ||
210 | * Destination is aligned, Source is unaligned | ||
211 | */ | ||
212 | |||
213 | .align 4 | ||
214 | .byte 0 # 1 mod 4 alignement for LOOPNEZ | ||
215 | # (0 mod 4 alignment for LBEG) | ||
216 | .Lsrcunaligned: | ||
217 | # copy 16 bytes per iteration for word-aligned dst and unaligned src | ||
218 | and a10, a3, a8 # save unalignment offset for below | ||
219 | sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) | ||
220 | EX(l32i, a6, a3, 0, l_fixup) # load first word | ||
221 | #if XCHAL_HAVE_LOOPS | ||
222 | loopnez a7, .Loop2done | ||
223 | #else /* !XCHAL_HAVE_LOOPS */ | ||
224 | beqz a7, .Loop2done | ||
225 | slli a10, a7, 4 | ||
226 | add a10, a10, a3 # a10 = end of last 16B source chunk | ||
227 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
228 | .Loop2: | ||
229 | EX(l32i, a7, a3, 4, l_fixup) | ||
230 | EX(l32i, a8, a3, 8, l_fixup) | ||
231 | ALIGN( a6, a6, a7) | ||
232 | EX(s32i, a6, a5, 0, s_fixup) | ||
233 | EX(l32i, a9, a3, 12, l_fixup) | ||
234 | ALIGN( a7, a7, a8) | ||
235 | EX(s32i, a7, a5, 4, s_fixup) | ||
236 | EX(l32i, a6, a3, 16, l_fixup) | ||
237 | ALIGN( a8, a8, a9) | ||
238 | EX(s32i, a8, a5, 8, s_fixup) | ||
239 | addi a3, a3, 16 | ||
240 | ALIGN( a9, a9, a6) | ||
241 | EX(s32i, a9, a5, 12, s_fixup) | ||
242 | addi a5, a5, 16 | ||
243 | #if !XCHAL_HAVE_LOOPS | ||
244 | blt a3, a10, .Loop2 | ||
245 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
246 | .Loop2done: | ||
247 | bbci.l a4, 3, .L12 | ||
248 | # copy 8 bytes | ||
249 | EX(l32i, a7, a3, 4, l_fixup) | ||
250 | EX(l32i, a8, a3, 8, l_fixup) | ||
251 | ALIGN( a6, a6, a7) | ||
252 | EX(s32i, a6, a5, 0, s_fixup) | ||
253 | addi a3, a3, 8 | ||
254 | ALIGN( a7, a7, a8) | ||
255 | EX(s32i, a7, a5, 4, s_fixup) | ||
256 | addi a5, a5, 8 | ||
257 | mov a6, a8 | ||
258 | .L12: | ||
259 | bbci.l a4, 2, .L13 | ||
260 | # copy 4 bytes | ||
261 | EX(l32i, a7, a3, 4, l_fixup) | ||
262 | addi a3, a3, 4 | ||
263 | ALIGN( a6, a6, a7) | ||
264 | EX(s32i, a6, a5, 0, s_fixup) | ||
265 | addi a5, a5, 4 | ||
266 | mov a6, a7 | ||
267 | .L13: | ||
268 | add a3, a3, a10 # readjust a3 with correct misalignment | ||
269 | bbci.l a4, 1, .L14 | ||
270 | # copy 2 bytes | ||
271 | EX(l8ui, a6, a3, 0, l_fixup) | ||
272 | EX(l8ui, a7, a3, 1, l_fixup) | ||
273 | addi a3, a3, 2 | ||
274 | EX(s8i, a6, a5, 0, s_fixup) | ||
275 | EX(s8i, a7, a5, 1, s_fixup) | ||
276 | addi a5, a5, 2 | ||
277 | .L14: | ||
278 | bbci.l a4, 0, .L15 | ||
279 | # copy 1 byte | ||
280 | EX(l8ui, a6, a3, 0, l_fixup) | ||
281 | EX(s8i, a6, a5, 0, s_fixup) | ||
282 | .L15: | ||
283 | movi a2, 0 # return success for len bytes copied | ||
284 | retw | ||
285 | |||
286 | |||
287 | .section .fixup, "ax" | ||
288 | .align 4 | ||
289 | |||
290 | /* a2 = original dst; a5 = current dst; a11= original len | ||
291 | * bytes_copied = a5 - a2 | ||
292 | * retval = bytes_not_copied = original len - bytes_copied | ||
293 | * retval = a11 - (a5 - a2) | ||
294 | * | ||
295 | * Clearing the remaining pieces of kernel memory plugs security | ||
296 | * holes. This functionality is the equivalent of the *_zeroing | ||
297 | * functions that some architectures provide. | ||
298 | */ | ||
299 | |||
300 | .Lmemset: | ||
301 | .word memset | ||
302 | |||
303 | s_fixup: | ||
304 | sub a2, a5, a2 /* a2 <-- bytes copied */ | ||
305 | sub a2, a11, a2 /* a2 <-- bytes not copied */ | ||
306 | retw | ||
307 | |||
308 | l_fixup: | ||
309 | sub a2, a5, a2 /* a2 <-- bytes copied */ | ||
310 | sub a2, a11, a2 /* a2 <-- bytes not copied == return value */ | ||
311 | |||
312 | /* void *memset(void *s, int c, size_t n); */ | ||
313 | mov a6, a5 /* s */ | ||
314 | movi a7, 0 /* c */ | ||
315 | mov a8, a2 /* n */ | ||
316 | l32r a4, .Lmemset | ||
317 | callx4 a4 | ||
318 | /* Ignore memset return value in a6. */ | ||
319 | /* a2 still contains bytes not copied. */ | ||
320 | retw | ||
321 | |||