diff options
author | Chris Zankel <czankel@tensilica.com> | 2005-06-24 01:01:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-24 03:05:21 -0400 |
commit | 249ac17e96811acc3c6402317dd5d5c89d2cbf68 (patch) | |
tree | 0a174065460de196861b85f1d9a48c88b2a2675a | |
parent | 5a0015d62668e64c8b6e02e360fbbea121bfd5e6 (diff) |
[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 4
The attached patches provides part 4 of an architecture implementation for the
Tensilica Xtensa CPU series.
Signed-off-by: Chris Zankel <chris@zankel.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/xtensa/lib/Makefile | 7 | ||||
-rw-r--r-- | arch/xtensa/lib/checksum.S | 410 | ||||
-rw-r--r-- | arch/xtensa/lib/memcopy.S | 315 | ||||
-rw-r--r-- | arch/xtensa/lib/memset.S | 160 | ||||
-rw-r--r-- | arch/xtensa/lib/pci-auto.c | 352 | ||||
-rw-r--r-- | arch/xtensa/lib/strcasecmp.c | 32 | ||||
-rw-r--r-- | arch/xtensa/lib/strncpy_user.S | 224 | ||||
-rw-r--r-- | arch/xtensa/lib/strnlen_user.S | 147 | ||||
-rw-r--r-- | arch/xtensa/lib/usercopy.S | 321 |
9 files changed, 1968 insertions, 0 deletions
diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile new file mode 100644 index 000000000000..ed935b58e8a4 --- /dev/null +++ b/arch/xtensa/lib/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for Xtensa-specific library files. | ||
3 | # | ||
4 | |||
5 | lib-y += memcopy.o memset.o checksum.o strcasecmp.o \ | ||
6 | usercopy.o strncpy_user.o strnlen_user.o | ||
7 | lib-$(CONFIG_PCI) += pci-auto.o | ||
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S new file mode 100644 index 000000000000..e2d64dfd530c --- /dev/null +++ b/arch/xtensa/lib/checksum.S | |||
@@ -0,0 +1,410 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IP/TCP/UDP checksumming routines | ||
7 | * | ||
8 | * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea | ||
9 | * Optimized by Joe Taylor | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License | ||
13 | * as published by the Free Software Foundation; either version | ||
14 | * 2 of the License, or (at your option) any later version. | ||
15 | */ | ||
16 | |||
17 | #include <asm/errno.h> | ||
18 | #include <linux/linkage.h> | ||
19 | #define _ASMLANGUAGE | ||
20 | #include <xtensa/config/core.h> | ||
21 | |||
22 | /* | ||
23 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
24 | */ | ||
25 | |||
26 | /* | ||
27 | * unsigned int csum_partial(const unsigned char *buf, int len, | ||
28 | * unsigned int sum); | ||
29 | * a2 = buf | ||
30 | * a3 = len | ||
31 | * a4 = sum | ||
32 | * | ||
33 | * This function assumes 2- or 4-byte alignment. Other alignments will fail! | ||
34 | */ | ||
35 | |||
36 | /* ONES_ADD converts twos-complement math to ones-complement. */ | ||
37 | #define ONES_ADD(sum, val) \ | ||
38 | add sum, sum, val ; \ | ||
39 | bgeu sum, val, 99f ; \ | ||
40 | addi sum, sum, 1 ; \ | ||
41 | 99: ; | ||
42 | |||
43 | .text | ||
44 | ENTRY(csum_partial) | ||
45 | /* | ||
46 | * Experiments with Ethernet and SLIP connections show that buf | ||
47 | * is aligned on either a 2-byte or 4-byte boundary. | ||
48 | */ | ||
49 | entry sp, 32 | ||
50 | extui a5, a2, 0, 2 | ||
51 | bnez a5, 8f /* branch if 2-byte aligned */ | ||
52 | /* Fall-through on common case, 4-byte alignment */ | ||
53 | 1: | ||
54 | srli a5, a3, 5 /* 32-byte chunks */ | ||
55 | #if XCHAL_HAVE_LOOPS | ||
56 | loopgtz a5, 2f | ||
57 | #else | ||
58 | beqz a5, 2f | ||
59 | slli a5, a5, 5 | ||
60 | add a5, a5, a2 /* a5 = end of last 32-byte chunk */ | ||
61 | .Loop1: | ||
62 | #endif | ||
63 | l32i a6, a2, 0 | ||
64 | l32i a7, a2, 4 | ||
65 | ONES_ADD(a4, a6) | ||
66 | ONES_ADD(a4, a7) | ||
67 | l32i a6, a2, 8 | ||
68 | l32i a7, a2, 12 | ||
69 | ONES_ADD(a4, a6) | ||
70 | ONES_ADD(a4, a7) | ||
71 | l32i a6, a2, 16 | ||
72 | l32i a7, a2, 20 | ||
73 | ONES_ADD(a4, a6) | ||
74 | ONES_ADD(a4, a7) | ||
75 | l32i a6, a2, 24 | ||
76 | l32i a7, a2, 28 | ||
77 | ONES_ADD(a4, a6) | ||
78 | ONES_ADD(a4, a7) | ||
79 | addi a2, a2, 4*8 | ||
80 | #if !XCHAL_HAVE_LOOPS | ||
81 | blt a2, a5, .Loop1 | ||
82 | #endif | ||
83 | 2: | ||
84 | extui a5, a3, 2, 3 /* remaining 4-byte chunks */ | ||
85 | #if XCHAL_HAVE_LOOPS | ||
86 | loopgtz a5, 3f | ||
87 | #else | ||
88 | beqz a5, 3f | ||
89 | slli a5, a5, 2 | ||
90 | add a5, a5, a2 /* a5 = end of last 4-byte chunk */ | ||
91 | .Loop2: | ||
92 | #endif | ||
93 | l32i a6, a2, 0 | ||
94 | ONES_ADD(a4, a6) | ||
95 | addi a2, a2, 4 | ||
96 | #if !XCHAL_HAVE_LOOPS | ||
97 | blt a2, a5, .Loop2 | ||
98 | #endif | ||
99 | 3: | ||
100 | _bbci.l a3, 1, 5f /* remaining 2-byte chunk */ | ||
101 | l16ui a6, a2, 0 | ||
102 | ONES_ADD(a4, a6) | ||
103 | addi a2, a2, 2 | ||
104 | 5: | ||
105 | _bbci.l a3, 0, 7f /* remaining 1-byte chunk */ | ||
106 | 6: l8ui a6, a2, 0 | ||
107 | #ifdef __XTENSA_EB__ | ||
108 | slli a6, a6, 8 /* load byte into bits 8..15 */ | ||
109 | #endif | ||
110 | ONES_ADD(a4, a6) | ||
111 | 7: | ||
112 | mov a2, a4 | ||
113 | retw | ||
114 | |||
115 | /* uncommon case, buf is 2-byte aligned */ | ||
116 | 8: | ||
117 | beqz a3, 7b /* branch if len == 0 */ | ||
118 | beqi a3, 1, 6b /* branch if len == 1 */ | ||
119 | |||
120 | extui a5, a2, 0, 1 | ||
121 | bnez a5, 8f /* branch if 1-byte aligned */ | ||
122 | |||
123 | l16ui a6, a2, 0 /* common case, len >= 2 */ | ||
124 | ONES_ADD(a4, a6) | ||
125 | addi a2, a2, 2 /* adjust buf */ | ||
126 | addi a3, a3, -2 /* adjust len */ | ||
127 | j 1b /* now buf is 4-byte aligned */ | ||
128 | |||
129 | /* case: odd-byte aligned, len > 1 | ||
130 | * This case is dog slow, so don't give us an odd address. | ||
131 | * (I don't think this ever happens, but just in case.) | ||
132 | */ | ||
133 | 8: | ||
134 | srli a5, a3, 2 /* 4-byte chunks */ | ||
135 | #if XCHAL_HAVE_LOOPS | ||
136 | loopgtz a5, 2f | ||
137 | #else | ||
138 | beqz a5, 2f | ||
139 | slli a5, a5, 2 | ||
140 | add a5, a5, a2 /* a5 = end of last 4-byte chunk */ | ||
141 | .Loop3: | ||
142 | #endif | ||
143 | l8ui a6, a2, 0 /* bits 24..31 */ | ||
144 | l16ui a7, a2, 1 /* bits 8..23 */ | ||
145 | l8ui a8, a2, 3 /* bits 0.. 8 */ | ||
146 | #ifdef __XTENSA_EB__ | ||
147 | slli a6, a6, 24 | ||
148 | #else | ||
149 | slli a8, a8, 24 | ||
150 | #endif | ||
151 | slli a7, a7, 8 | ||
152 | or a7, a7, a6 | ||
153 | or a7, a7, a8 | ||
154 | ONES_ADD(a4, a7) | ||
155 | addi a2, a2, 4 | ||
156 | #if !XCHAL_HAVE_LOOPS | ||
157 | blt a2, a5, .Loop3 | ||
158 | #endif | ||
159 | 2: | ||
160 | _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */ | ||
161 | l8ui a6, a2, 0 | ||
162 | l8ui a7, a2, 1 | ||
163 | #ifdef __XTENSA_EB__ | ||
164 | slli a6, a6, 8 | ||
165 | #else | ||
166 | slli a7, a7, 8 | ||
167 | #endif | ||
168 | or a7, a7, a6 | ||
169 | ONES_ADD(a4, a7) | ||
170 | addi a2, a2, 2 | ||
171 | 3: | ||
172 | j 5b /* branch to handle the remaining byte */ | ||
173 | |||
174 | |||
175 | |||
176 | /* | ||
177 | * Copy from ds while checksumming, otherwise like csum_partial | ||
178 | * | ||
179 | * The macros SRC and DST specify the type of access for the instruction. | ||
180 | * thus we can call a custom exception handler for each access type. | ||
181 | */ | ||
182 | |||
183 | #define SRC(y...) \ | ||
184 | 9999: y; \ | ||
185 | .section __ex_table, "a"; \ | ||
186 | .long 9999b, 6001f ; \ | ||
187 | .previous | ||
188 | |||
189 | #define DST(y...) \ | ||
190 | 9999: y; \ | ||
191 | .section __ex_table, "a"; \ | ||
192 | .long 9999b, 6002f ; \ | ||
193 | .previous | ||
194 | |||
195 | /* | ||
196 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, | ||
197 | int sum, int *src_err_ptr, int *dst_err_ptr) | ||
198 | a2 = src | ||
199 | a3 = dst | ||
200 | a4 = len | ||
201 | a5 = sum | ||
202 | a6 = src_err_ptr | ||
203 | a7 = dst_err_ptr | ||
204 | a8 = temp | ||
205 | a9 = temp | ||
206 | a10 = temp | ||
207 | a11 = original len for exception handling | ||
208 | a12 = original dst for exception handling | ||
209 | |||
210 | This function is optimized for 4-byte aligned addresses. Other | ||
211 | alignments work, but not nearly as efficiently. | ||
212 | */ | ||
213 | |||
214 | ENTRY(csum_partial_copy_generic) | ||
215 | entry sp, 32 | ||
216 | mov a12, a3 | ||
217 | mov a11, a4 | ||
218 | or a10, a2, a3 | ||
219 | |||
220 | /* We optimize the following alignment tests for the 4-byte | ||
221 | aligned case. Two bbsi.l instructions might seem more optimal | ||
222 | (commented out below). However, both labels 5: and 3: are out | ||
223 | of the imm8 range, so the assembler relaxes them into | ||
224 | equivalent bbci.l, j combinations, which is actually | ||
225 | slower. */ | ||
226 | |||
227 | extui a9, a10, 0, 2 | ||
228 | beqz a9, 1f /* branch if both are 4-byte aligned */ | ||
229 | bbsi.l a10, 0, 5f /* branch if one address is odd */ | ||
230 | j 3f /* one address is 2-byte aligned */ | ||
231 | |||
232 | /* _bbsi.l a10, 0, 5f */ /* branch if odd address */ | ||
233 | /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */ | ||
234 | |||
235 | 1: | ||
236 | /* src and dst are both 4-byte aligned */ | ||
237 | srli a10, a4, 5 /* 32-byte chunks */ | ||
238 | #if XCHAL_HAVE_LOOPS | ||
239 | loopgtz a10, 2f | ||
240 | #else | ||
241 | beqz a10, 2f | ||
242 | slli a10, a10, 5 | ||
243 | add a10, a10, a2 /* a10 = end of last 32-byte src chunk */ | ||
244 | .Loop5: | ||
245 | #endif | ||
246 | SRC( l32i a9, a2, 0 ) | ||
247 | SRC( l32i a8, a2, 4 ) | ||
248 | DST( s32i a9, a3, 0 ) | ||
249 | DST( s32i a8, a3, 4 ) | ||
250 | ONES_ADD(a5, a9) | ||
251 | ONES_ADD(a5, a8) | ||
252 | SRC( l32i a9, a2, 8 ) | ||
253 | SRC( l32i a8, a2, 12 ) | ||
254 | DST( s32i a9, a3, 8 ) | ||
255 | DST( s32i a8, a3, 12 ) | ||
256 | ONES_ADD(a5, a9) | ||
257 | ONES_ADD(a5, a8) | ||
258 | SRC( l32i a9, a2, 16 ) | ||
259 | SRC( l32i a8, a2, 20 ) | ||
260 | DST( s32i a9, a3, 16 ) | ||
261 | DST( s32i a8, a3, 20 ) | ||
262 | ONES_ADD(a5, a9) | ||
263 | ONES_ADD(a5, a8) | ||
264 | SRC( l32i a9, a2, 24 ) | ||
265 | SRC( l32i a8, a2, 28 ) | ||
266 | DST( s32i a9, a3, 24 ) | ||
267 | DST( s32i a8, a3, 28 ) | ||
268 | ONES_ADD(a5, a9) | ||
269 | ONES_ADD(a5, a8) | ||
270 | addi a2, a2, 32 | ||
271 | addi a3, a3, 32 | ||
272 | #if !XCHAL_HAVE_LOOPS | ||
273 | blt a2, a10, .Loop5 | ||
274 | #endif | ||
275 | 2: | ||
276 | extui a10, a4, 2, 3 /* remaining 4-byte chunks */ | ||
277 | extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */ | ||
278 | #if XCHAL_HAVE_LOOPS | ||
279 | loopgtz a10, 3f | ||
280 | #else | ||
281 | beqz a10, 3f | ||
282 | slli a10, a10, 2 | ||
283 | add a10, a10, a2 /* a10 = end of last 4-byte src chunk */ | ||
284 | .Loop6: | ||
285 | #endif | ||
286 | SRC( l32i a9, a2, 0 ) | ||
287 | DST( s32i a9, a3, 0 ) | ||
288 | ONES_ADD(a5, a9) | ||
289 | addi a2, a2, 4 | ||
290 | addi a3, a3, 4 | ||
291 | #if !XCHAL_HAVE_LOOPS | ||
292 | blt a2, a10, .Loop6 | ||
293 | #endif | ||
294 | 3: | ||
295 | /* | ||
296 | Control comes to here in two cases: (1) It may fall through | ||
297 | to here from the 4-byte alignment case to process, at most, | ||
298 | one 2-byte chunk. (2) It branches to here from above if | ||
299 | either src or dst is 2-byte aligned, and we process all bytes | ||
300 | here, except for perhaps a trailing odd byte. It's | ||
301 | inefficient, so align your addresses to 4-byte boundaries. | ||
302 | |||
303 | a2 = src | ||
304 | a3 = dst | ||
305 | a4 = len | ||
306 | a5 = sum | ||
307 | */ | ||
308 | srli a10, a4, 1 /* 2-byte chunks */ | ||
309 | #if XCHAL_HAVE_LOOPS | ||
310 | loopgtz a10, 4f | ||
311 | #else | ||
312 | beqz a10, 4f | ||
313 | slli a10, a10, 1 | ||
314 | add a10, a10, a2 /* a10 = end of last 2-byte src chunk */ | ||
315 | .Loop7: | ||
316 | #endif | ||
317 | SRC( l16ui a9, a2, 0 ) | ||
318 | DST( s16i a9, a3, 0 ) | ||
319 | ONES_ADD(a5, a9) | ||
320 | addi a2, a2, 2 | ||
321 | addi a3, a3, 2 | ||
322 | #if !XCHAL_HAVE_LOOPS | ||
323 | blt a2, a10, .Loop7 | ||
324 | #endif | ||
325 | 4: | ||
326 | /* This section processes a possible trailing odd byte. */ | ||
327 | _bbci.l a4, 0, 8f /* 1-byte chunk */ | ||
328 | SRC( l8ui a9, a2, 0 ) | ||
329 | DST( s8i a9, a3, 0 ) | ||
330 | #ifdef __XTENSA_EB__ | ||
331 | slli a9, a9, 8 /* shift byte to bits 8..15 */ | ||
332 | #endif | ||
333 | ONES_ADD(a5, a9) | ||
334 | 8: | ||
335 | mov a2, a5 | ||
336 | retw | ||
337 | |||
338 | 5: | ||
339 | /* Control branch to here when either src or dst is odd. We | ||
340 | process all bytes using 8-bit accesses. Grossly inefficient, | ||
341 | so don't feed us an odd address. */ | ||
342 | |||
343 | srli a10, a4, 1 /* handle in pairs for 16-bit csum */ | ||
344 | #if XCHAL_HAVE_LOOPS | ||
345 | loopgtz a10, 6f | ||
346 | #else | ||
347 | beqz a10, 6f | ||
348 | slli a10, a10, 1 | ||
349 | add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */ | ||
350 | .Loop8: | ||
351 | #endif | ||
352 | SRC( l8ui a9, a2, 0 ) | ||
353 | SRC( l8ui a8, a2, 1 ) | ||
354 | DST( s8i a9, a3, 0 ) | ||
355 | DST( s8i a8, a3, 1 ) | ||
356 | #ifdef __XTENSA_EB__ | ||
357 | slli a9, a9, 8 /* combine into a single 16-bit value */ | ||
358 | #else /* for checksum computation */ | ||
359 | slli a8, a8, 8 | ||
360 | #endif | ||
361 | or a9, a9, a8 | ||
362 | ONES_ADD(a5, a9) | ||
363 | addi a2, a2, 2 | ||
364 | addi a3, a3, 2 | ||
365 | #if !XCHAL_HAVE_LOOPS | ||
366 | blt a2, a10, .Loop8 | ||
367 | #endif | ||
368 | 6: | ||
369 | j 4b /* process the possible trailing odd byte */ | ||
370 | |||
371 | |||
372 | # Exception handler: | ||
373 | .section .fixup, "ax" | ||
374 | /* | ||
375 | a6 = src_err_ptr | ||
376 | a7 = dst_err_ptr | ||
377 | a11 = original len for exception handling | ||
378 | a12 = original dst for exception handling | ||
379 | */ | ||
380 | |||
381 | 6001: | ||
382 | _movi a2, -EFAULT | ||
383 | s32i a2, a6, 0 /* src_err_ptr */ | ||
384 | |||
385 | # clear the complete destination - computing the rest | ||
386 | # is too much work | ||
387 | movi a2, 0 | ||
388 | #if XCHAL_HAVE_LOOPS | ||
389 | loopgtz a11, 2f | ||
390 | #else | ||
391 | beqz a11, 2f | ||
392 | add a11, a11, a12 /* a11 = ending address */ | ||
393 | .Leloop: | ||
394 | #endif | ||
395 | s8i a2, a12, 0 | ||
396 | addi a12, a12, 1 | ||
397 | #if !XCHAL_HAVE_LOOPS | ||
398 | blt a12, a11, .Leloop | ||
399 | #endif | ||
400 | 2: | ||
401 | retw | ||
402 | |||
403 | 6002: | ||
404 | movi a2, -EFAULT | ||
405 | s32i a2, a7, 0 /* dst_err_ptr */ | ||
406 | movi a2, 0 | ||
407 | retw | ||
408 | |||
409 | .previous | ||
410 | |||
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S new file mode 100644 index 000000000000..e8f6d7eb7222 --- /dev/null +++ b/arch/xtensa/lib/memcopy.S | |||
@@ -0,0 +1,315 @@ | |||
1 | /* | ||
2 | * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions | ||
3 | * xthal_memcpy and xthal_bcopy | ||
4 | * | ||
5 | * This file is subject to the terms and conditions of the GNU General Public | ||
6 | * License. See the file "COPYING" in the main directory of this archive | ||
7 | * for more details. | ||
8 | * | ||
9 | * Copyright (C) 2002 - 2005 Tensilica Inc. | ||
10 | */ | ||
11 | |||
12 | #include <xtensa/coreasm.h> | ||
13 | |||
14 | .macro src_b r, w0, w1 | ||
15 | #ifdef __XTENSA_EB__ | ||
16 | src \r, \w0, \w1 | ||
17 | #else | ||
18 | src \r, \w1, \w0 | ||
19 | #endif | ||
20 | .endm | ||
21 | |||
22 | .macro ssa8 r | ||
23 | #ifdef __XTENSA_EB__ | ||
24 | ssa8b \r | ||
25 | #else | ||
26 | ssa8l \r | ||
27 | #endif | ||
28 | .endm | ||
29 | |||
30 | |||
31 | /* | ||
32 | * void *memcpy(void *dst, const void *src, size_t len); | ||
33 | * void *memmove(void *dst, const void *src, size_t len); | ||
34 | * void *bcopy(const void *src, void *dst, size_t len); | ||
35 | * | ||
36 | * This function is intended to do the same thing as the standard | ||
37 | * library function memcpy() (or bcopy()) for most cases. | ||
38 | * However, where the source and/or destination references | ||
39 | * an instruction RAM or ROM or a data RAM or ROM, that | ||
40 | * source and/or destination will always be accessed with | ||
41 | * 32-bit load and store instructions (as required for these | ||
42 | * types of devices). | ||
43 | * | ||
44 | * !!!!!!! XTFIXME: | ||
45 | * !!!!!!! Handling of IRAM/IROM has not yet | ||
46 | * !!!!!!! been implemented. | ||
47 | * | ||
48 | * The bcopy version is provided here to avoid the overhead | ||
49 | * of an extra call, for callers that require this convention. | ||
50 | * | ||
51 | * The (general case) algorithm is as follows: | ||
52 | * If destination is unaligned, align it by conditionally | ||
53 | * copying 1 and 2 bytes. | ||
54 | * If source is aligned, | ||
55 | * do 16 bytes with a loop, and then finish up with | ||
56 | * 8, 4, 2, and 1 byte copies conditional on the length; | ||
57 | * else (if source is unaligned), | ||
58 | * do the same, but use SRC to align the source data. | ||
59 | * This code tries to use fall-through branches for the common | ||
60 | * case of aligned source and destination and multiple | ||
61 | * of 4 (or 8) length. | ||
62 | * | ||
63 | * Register use: | ||
64 | * a0/ return address | ||
65 | * a1/ stack pointer | ||
66 | * a2/ return value | ||
67 | * a3/ src | ||
68 | * a4/ length | ||
69 | * a5/ dst | ||
70 | * a6/ tmp | ||
71 | * a7/ tmp | ||
72 | * a8/ tmp | ||
73 | * a9/ tmp | ||
74 | * a10/ tmp | ||
75 | * a11/ tmp | ||
76 | */ | ||
77 | |||
78 | .text | ||
79 | .align 4 | ||
80 | .global bcopy | ||
81 | .type bcopy,@function | ||
82 | bcopy: | ||
83 | entry sp, 16 # minimal stack frame | ||
84 | # a2=src, a3=dst, a4=len | ||
85 | mov a5, a3 # copy dst so that a2 is return value | ||
86 | mov a3, a2 | ||
87 | mov a2, a5 | ||
88 | j .Lcommon # go to common code for memcpy+bcopy | ||
89 | |||
90 | |||
91 | /* | ||
92 | * Byte by byte copy | ||
93 | */ | ||
94 | .align 4 | ||
95 | .byte 0 # 1 mod 4 alignment for LOOPNEZ | ||
96 | # (0 mod 4 alignment for LBEG) | ||
97 | .Lbytecopy: | ||
98 | #if XCHAL_HAVE_LOOPS | ||
99 | loopnez a4, .Lbytecopydone | ||
100 | #else /* !XCHAL_HAVE_LOOPS */ | ||
101 | beqz a4, .Lbytecopydone | ||
102 | add a7, a3, a4 # a7 = end address for source | ||
103 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
104 | .Lnextbyte: | ||
105 | l8ui a6, a3, 0 | ||
106 | addi a3, a3, 1 | ||
107 | s8i a6, a5, 0 | ||
108 | addi a5, a5, 1 | ||
109 | #if !XCHAL_HAVE_LOOPS | ||
110 | blt a3, a7, .Lnextbyte | ||
111 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
112 | .Lbytecopydone: | ||
113 | retw | ||
114 | |||
115 | /* | ||
116 | * Destination is unaligned | ||
117 | */ | ||
118 | |||
119 | .align 4 | ||
120 | .Ldst1mod2: # dst is only byte aligned | ||
121 | _bltui a4, 7, .Lbytecopy # do short copies byte by byte | ||
122 | |||
123 | # copy 1 byte | ||
124 | l8ui a6, a3, 0 | ||
125 | addi a3, a3, 1 | ||
126 | addi a4, a4, -1 | ||
127 | s8i a6, a5, 0 | ||
128 | addi a5, a5, 1 | ||
129 | _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then | ||
130 | # return to main algorithm | ||
131 | .Ldst2mod4: # dst 16-bit aligned | ||
132 | # copy 2 bytes | ||
133 | _bltui a4, 6, .Lbytecopy # do short copies byte by byte | ||
134 | l8ui a6, a3, 0 | ||
135 | l8ui a7, a3, 1 | ||
136 | addi a3, a3, 2 | ||
137 | addi a4, a4, -2 | ||
138 | s8i a6, a5, 0 | ||
139 | s8i a7, a5, 1 | ||
140 | addi a5, a5, 2 | ||
141 | j .Ldstaligned # dst is now aligned, return to main algorithm | ||
142 | |||
143 | .align 4 | ||
144 | .global memcpy | ||
145 | .type memcpy,@function | ||
146 | memcpy: | ||
147 | .global memmove | ||
148 | .type memmove,@function | ||
149 | memmove: | ||
150 | |||
151 | entry sp, 16 # minimal stack frame | ||
152 | # a2/ dst, a3/ src, a4/ len | ||
153 | mov a5, a2 # copy dst so that a2 is return value | ||
154 | .Lcommon: | ||
155 | _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 | ||
156 | _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 | ||
157 | .Ldstaligned: # return here from .Ldst?mod? once dst is aligned | ||
158 | srli a7, a4, 4 # number of loop iterations with 16B | ||
159 | # per iteration | ||
160 | movi a8, 3 # if source is not aligned, | ||
161 | _bany a3, a8, .Lsrcunaligned # then use shifting copy | ||
162 | /* | ||
163 | * Destination and source are word-aligned, use word copy. | ||
164 | */ | ||
165 | # copy 16 bytes per iteration for word-aligned dst and word-aligned src | ||
166 | #if XCHAL_HAVE_LOOPS | ||
167 | loopnez a7, .Loop1done | ||
168 | #else /* !XCHAL_HAVE_LOOPS */ | ||
169 | beqz a7, .Loop1done | ||
170 | slli a8, a7, 4 | ||
171 | add a8, a8, a3 # a8 = end of last 16B source chunk | ||
172 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
173 | .Loop1: | ||
174 | l32i a6, a3, 0 | ||
175 | l32i a7, a3, 4 | ||
176 | s32i a6, a5, 0 | ||
177 | l32i a6, a3, 8 | ||
178 | s32i a7, a5, 4 | ||
179 | l32i a7, a3, 12 | ||
180 | s32i a6, a5, 8 | ||
181 | addi a3, a3, 16 | ||
182 | s32i a7, a5, 12 | ||
183 | addi a5, a5, 16 | ||
184 | #if !XCHAL_HAVE_LOOPS | ||
185 | blt a3, a8, .Loop1 | ||
186 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
187 | .Loop1done: | ||
188 | bbci.l a4, 3, .L2 | ||
189 | # copy 8 bytes | ||
190 | l32i a6, a3, 0 | ||
191 | l32i a7, a3, 4 | ||
192 | addi a3, a3, 8 | ||
193 | s32i a6, a5, 0 | ||
194 | s32i a7, a5, 4 | ||
195 | addi a5, a5, 8 | ||
196 | .L2: | ||
197 | bbsi.l a4, 2, .L3 | ||
198 | bbsi.l a4, 1, .L4 | ||
199 | bbsi.l a4, 0, .L5 | ||
200 | retw | ||
201 | .L3: | ||
202 | # copy 4 bytes | ||
203 | l32i a6, a3, 0 | ||
204 | addi a3, a3, 4 | ||
205 | s32i a6, a5, 0 | ||
206 | addi a5, a5, 4 | ||
207 | bbsi.l a4, 1, .L4 | ||
208 | bbsi.l a4, 0, .L5 | ||
209 | retw | ||
210 | .L4: | ||
211 | # copy 2 bytes | ||
212 | l16ui a6, a3, 0 | ||
213 | addi a3, a3, 2 | ||
214 | s16i a6, a5, 0 | ||
215 | addi a5, a5, 2 | ||
216 | bbsi.l a4, 0, .L5 | ||
217 | retw | ||
218 | .L5: | ||
219 | # copy 1 byte | ||
220 | l8ui a6, a3, 0 | ||
221 | s8i a6, a5, 0 | ||
222 | retw | ||
223 | |||
224 | /* | ||
225 | * Destination is aligned, Source is unaligned | ||
226 | */ | ||
227 | |||
228 | .align 4 | ||
229 | .Lsrcunaligned: | ||
230 | _beqz a4, .Ldone # avoid loading anything for zero-length copies | ||
231 | # copy 16 bytes per iteration for word-aligned dst and unaligned src | ||
232 | ssa8 a3 # set shift amount from byte offset | ||
233 | #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS (simulator) with the | ||
234 | lint or ferret client, or 0 to save a few cycles */ | ||
235 | #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT | ||
236 | and a11, a3, a8 # save unalignment offset for below | ||
237 | sub a3, a3, a11 # align a3 | ||
238 | #endif | ||
239 | l32i a6, a3, 0 # load first word | ||
240 | #if XCHAL_HAVE_LOOPS | ||
241 | loopnez a7, .Loop2done | ||
242 | #else /* !XCHAL_HAVE_LOOPS */ | ||
243 | beqz a7, .Loop2done | ||
244 | slli a10, a7, 4 | ||
245 | add a10, a10, a3 # a10 = end of last 16B source chunk | ||
246 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
247 | .Loop2: | ||
248 | l32i a7, a3, 4 | ||
249 | l32i a8, a3, 8 | ||
250 | src_b a6, a6, a7 | ||
251 | s32i a6, a5, 0 | ||
252 | l32i a9, a3, 12 | ||
253 | src_b a7, a7, a8 | ||
254 | s32i a7, a5, 4 | ||
255 | l32i a6, a3, 16 | ||
256 | src_b a8, a8, a9 | ||
257 | s32i a8, a5, 8 | ||
258 | addi a3, a3, 16 | ||
259 | src_b a9, a9, a6 | ||
260 | s32i a9, a5, 12 | ||
261 | addi a5, a5, 16 | ||
262 | #if !XCHAL_HAVE_LOOPS | ||
263 | blt a3, a10, .Loop2 | ||
264 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
265 | .Loop2done: | ||
266 | bbci.l a4, 3, .L12 | ||
267 | # copy 8 bytes | ||
268 | l32i a7, a3, 4 | ||
269 | l32i a8, a3, 8 | ||
270 | src_b a6, a6, a7 | ||
271 | s32i a6, a5, 0 | ||
272 | addi a3, a3, 8 | ||
273 | src_b a7, a7, a8 | ||
274 | s32i a7, a5, 4 | ||
275 | addi a5, a5, 8 | ||
276 | mov a6, a8 | ||
277 | .L12: | ||
278 | bbci.l a4, 2, .L13 | ||
279 | # copy 4 bytes | ||
280 | l32i a7, a3, 4 | ||
281 | addi a3, a3, 4 | ||
282 | src_b a6, a6, a7 | ||
283 | s32i a6, a5, 0 | ||
284 | addi a5, a5, 4 | ||
285 | mov a6, a7 | ||
286 | .L13: | ||
287 | #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT | ||
288 | add a3, a3, a11 # readjust a3 with correct misalignment | ||
289 | #endif | ||
290 | bbsi.l a4, 1, .L14 | ||
291 | bbsi.l a4, 0, .L15 | ||
292 | .Ldone: retw | ||
293 | .L14: | ||
294 | # copy 2 bytes | ||
295 | l8ui a6, a3, 0 | ||
296 | l8ui a7, a3, 1 | ||
297 | addi a3, a3, 2 | ||
298 | s8i a6, a5, 0 | ||
299 | s8i a7, a5, 1 | ||
300 | addi a5, a5, 2 | ||
301 | bbsi.l a4, 0, .L15 | ||
302 | retw | ||
303 | .L15: | ||
304 | # copy 1 byte | ||
305 | l8ui a6, a3, 0 | ||
306 | s8i a6, a5, 0 | ||
307 | retw | ||
308 | |||
309 | /* | ||
310 | * Local Variables: | ||
311 | * mode:fundamental | ||
312 | * comment-start: "# " | ||
313 | * comment-start-skip: "# *" | ||
314 | * End: | ||
315 | */ | ||
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S new file mode 100644 index 000000000000..4de25134bc62 --- /dev/null +++ b/arch/xtensa/lib/memset.S | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * arch/xtensa/lib/memset.S | ||
3 | * | ||
4 | * ANSI C standard library function memset | ||
5 | * (Well, almost. .fixup code might return zero.) | ||
6 | * | ||
7 | * This file is subject to the terms and conditions of the GNU General | ||
8 | * Public License. See the file "COPYING" in the main directory of | ||
9 | * this archive for more details. | ||
10 | * | ||
11 | * Copyright (C) 2002 Tensilica Inc. | ||
12 | */ | ||
13 | |||
14 | #include <xtensa/coreasm.h> | ||
15 | |||
16 | /* | ||
17 | * void *memset(void *dst, int c, size_t length) | ||
18 | * | ||
19 | * The algorithm is as follows: | ||
20 | * Create a word with c in all byte positions | ||
21 | * If the destination is aligned, | ||
22 | * do 16B chucks with a loop, and then finish up with | ||
23 | * 8B, 4B, 2B, and 1B stores conditional on the length. | ||
24 | * If destination is unaligned, align it by conditionally | ||
25 | * setting 1B and 2B and then go to aligned case. | ||
26 | * This code tries to use fall-through branches for the common | ||
27 | * case of an aligned destination (except for the branches to | ||
28 | * the alignment labels). | ||
29 | */ | ||
30 | |||
31 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
32 | |||
33 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
34 | 9: insn reg1, reg2, offset; \ | ||
35 | .section __ex_table, "a"; \ | ||
36 | .word 9b, handler; \ | ||
37 | .previous | ||
38 | |||
39 | |||
40 | .text | ||
41 | .align 4 | ||
42 | .global memset | ||
43 | .type memset,@function | ||
44 | memset: | ||
45 | entry sp, 16 # minimal stack frame | ||
46 | # a2/ dst, a3/ c, a4/ length | ||
47 | extui a3, a3, 0, 8 # mask to just 8 bits | ||
48 | slli a7, a3, 8 # duplicate character in all bytes of word | ||
49 | or a3, a3, a7 # ... | ||
50 | slli a7, a3, 16 # ... | ||
51 | or a3, a3, a7 # ... | ||
52 | mov a5, a2 # copy dst so that a2 is return value | ||
53 | movi a6, 3 # for alignment tests | ||
54 | bany a2, a6, .Ldstunaligned # if dst is unaligned | ||
55 | .L0: # return here from .Ldstunaligned when dst is aligned | ||
56 | srli a7, a4, 4 # number of loop iterations with 16B | ||
57 | # per iteration | ||
58 | bnez a4, .Laligned | ||
59 | retw | ||
60 | |||
61 | /* | ||
62 | * Destination is word-aligned. | ||
63 | */ | ||
64 | # set 16 bytes per iteration for word-aligned dst | ||
65 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
66 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
67 | .Laligned: | ||
68 | #if XCHAL_HAVE_LOOPS | ||
69 | loopnez a7, .Loop1done | ||
70 | #else /* !XCHAL_HAVE_LOOPS */ | ||
71 | beqz a7, .Loop1done | ||
72 | slli a6, a7, 4 | ||
73 | add a6, a6, a5 # a6 = end of last 16B chunk | ||
74 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
75 | .Loop1: | ||
76 | EX(s32i, a3, a5, 0, memset_fixup) | ||
77 | EX(s32i, a3, a5, 4, memset_fixup) | ||
78 | EX(s32i, a3, a5, 8, memset_fixup) | ||
79 | EX(s32i, a3, a5, 12, memset_fixup) | ||
80 | addi a5, a5, 16 | ||
81 | #if !XCHAL_HAVE_LOOPS | ||
82 | blt a5, a6, .Loop1 | ||
83 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
84 | .Loop1done: | ||
85 | bbci.l a4, 3, .L2 | ||
86 | # set 8 bytes | ||
87 | EX(s32i, a3, a5, 0, memset_fixup) | ||
88 | EX(s32i, a3, a5, 4, memset_fixup) | ||
89 | addi a5, a5, 8 | ||
90 | .L2: | ||
91 | bbci.l a4, 2, .L3 | ||
92 | # set 4 bytes | ||
93 | EX(s32i, a3, a5, 0, memset_fixup) | ||
94 | addi a5, a5, 4 | ||
95 | .L3: | ||
96 | bbci.l a4, 1, .L4 | ||
97 | # set 2 bytes | ||
98 | EX(s16i, a3, a5, 0, memset_fixup) | ||
99 | addi a5, a5, 2 | ||
100 | .L4: | ||
101 | bbci.l a4, 0, .L5 | ||
102 | # set 1 byte | ||
103 | EX(s8i, a3, a5, 0, memset_fixup) | ||
104 | .L5: | ||
105 | .Lret1: | ||
106 | retw | ||
107 | |||
108 | /* | ||
109 | * Destination is unaligned | ||
110 | */ | ||
111 | |||
112 | .Ldstunaligned: | ||
113 | bltui a4, 8, .Lbyteset # do short copies byte by byte | ||
114 | bbci.l a5, 0, .L20 # branch if dst alignment half-aligned | ||
115 | # dst is only byte aligned | ||
116 | # set 1 byte | ||
117 | EX(s8i, a3, a5, 0, memset_fixup) | ||
118 | addi a5, a5, 1 | ||
119 | addi a4, a4, -1 | ||
120 | # now retest if dst aligned | ||
121 | bbci.l a5, 1, .L0 # if now aligned, return to main algorithm | ||
122 | .L20: | ||
123 | # dst half-aligned | ||
124 | # set 2 bytes | ||
125 | EX(s16i, a3, a5, 0, memset_fixup) | ||
126 | addi a5, a5, 2 | ||
127 | addi a4, a4, -2 | ||
128 | j .L0 # dst is now aligned, return to main algorithm | ||
129 | |||
130 | /* | ||
131 | * Byte by byte set | ||
132 | */ | ||
133 | .align 4 | ||
134 | .byte 0 # 1 mod 4 alignment for LOOPNEZ | ||
135 | # (0 mod 4 alignment for LBEG) | ||
136 | .Lbyteset: | ||
137 | #if XCHAL_HAVE_LOOPS | ||
138 | loopnez a4, .Lbytesetdone | ||
139 | #else /* !XCHAL_HAVE_LOOPS */ | ||
140 | beqz a4, .Lbytesetdone | ||
141 | add a6, a5, a4 # a6 = ending address | ||
142 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
143 | .Lbyteloop: | ||
144 | EX(s8i, a3, a5, 0, memset_fixup) | ||
145 | addi a5, a5, 1 | ||
146 | #if !XCHAL_HAVE_LOOPS | ||
147 | blt a5, a6, .Lbyteloop | ||
148 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
149 | .Lbytesetdone: | ||
150 | retw | ||
151 | |||
152 | |||
153 | .section .fixup, "ax" | ||
154 | .align 4 | ||
155 | |||
156 | /* We return zero if a failure occurred. */ | ||
157 | |||
158 | memset_fixup: | ||
159 | movi a2, 0 | ||
160 | retw | ||
diff --git a/arch/xtensa/lib/pci-auto.c b/arch/xtensa/lib/pci-auto.c new file mode 100644 index 000000000000..90c790f6123b --- /dev/null +++ b/arch/xtensa/lib/pci-auto.c | |||
@@ -0,0 +1,352 @@ | |||
1 | /* | ||
2 | * arch/xtensa/kernel/pci-auto.c | ||
3 | * | ||
4 | * PCI autoconfiguration library | ||
5 | * | ||
6 | * Copyright (C) 2001 - 2005 Tensilica Inc. | ||
7 | * | ||
8 | * Chris Zankel <zankel@tensilica.com, cez@zankel.net> | ||
9 | * | ||
10 | * Based on work from Matt Porter <mporter@mvista.com> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify it | ||
13 | * under the terms of the GNU General Public License as published by the | ||
14 | * Free Software Foundation; either version 2 of the License, or (at your | ||
15 | * option) any later version. | ||
16 | */ | ||
17 | |||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/pci.h> | ||
21 | |||
22 | #include <asm/pci-bridge.h> | ||
23 | |||
24 | |||
25 | /* | ||
26 | * | ||
27 | * Setting up a PCI | ||
28 | * | ||
29 | * pci_ctrl->first_busno = <first bus number (0)> | ||
30 | * pci_ctrl->last_busno = <last bus number (0xff)> | ||
31 | * pci_ctrl->ops = <PCI config operations> | ||
32 | * pci_ctrl->map_irq = <function to return the interrupt number for a device> | ||
33 | * | ||
34 | * pci_ctrl->io_space.start = <IO space start address (PCI view)> | ||
35 | * pci_ctrl->io_space.end = <IO space end address (PCI view)> | ||
36 | * pci_ctrl->io_space.base = <IO space offset: address 0 from CPU space> | ||
37 | * pci_ctrl->mem_space.start = <MEM space start address (PCI view)> | ||
38 | * pci_ctrl->mem_space.end = <MEM space end address (PCI view)> | ||
39 | * pci_ctrl->mem_space.base = <MEM space offset: address 0 from CPU space> | ||
40 | * | ||
41 | * pcibios_init_resource(&pci_ctrl->io_resource, <IO space start>, | ||
42 | * <IO space end>, IORESOURCE_IO, "PCI host bridge"); | ||
43 | * pcibios_init_resource(&pci_ctrl->mem_resources[0], <MEM space start>, | ||
44 | * <MEM space end>, IORESOURCE_MEM, "PCI host bridge"); | ||
45 | * | ||
46 | * pci_ctrl->last_busno = pciauto_bus_scan(pci_ctrl,pci_ctrl->first_busno); | ||
47 | * | ||
48 | * int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus) | ||
49 | * | ||
50 | */ | ||
51 | |||
52 | |||
53 | /* define DEBUG to print some debugging messages. */ | ||
54 | |||
55 | #undef DEBUG | ||
56 | |||
57 | #ifdef DEBUG | ||
58 | # define DBG(x...) printk(x) | ||
59 | #else | ||
60 | # define DBG(x...) | ||
61 | #endif | ||
62 | |||
63 | static int pciauto_upper_iospc; | ||
64 | static int pciauto_upper_memspc; | ||
65 | |||
66 | static struct pci_dev pciauto_dev; | ||
67 | static struct pci_bus pciauto_bus; | ||
68 | |||
69 | /* | ||
70 | * Helper functions | ||
71 | */ | ||
72 | |||
73 | /* Initialize the bars of a PCI device. */ | ||
74 | |||
75 | static void __init | ||
76 | pciauto_setup_bars(struct pci_dev *dev, int bar_limit) | ||
77 | { | ||
78 | int bar_size; | ||
79 | int bar, bar_nr; | ||
80 | int *upper_limit; | ||
81 | int found_mem64 = 0; | ||
82 | |||
83 | for (bar = PCI_BASE_ADDRESS_0, bar_nr = 0; | ||
84 | bar <= bar_limit; | ||
85 | bar+=4, bar_nr++) | ||
86 | { | ||
87 | /* Tickle the BAR and get the size */ | ||
88 | pci_write_config_dword(dev, bar, 0xffffffff); | ||
89 | pci_read_config_dword(dev, bar, &bar_size); | ||
90 | |||
91 | /* If BAR is not implemented go to the next BAR */ | ||
92 | if (!bar_size) | ||
93 | continue; | ||
94 | |||
95 | /* Check the BAR type and set our address mask */ | ||
96 | if (bar_size & PCI_BASE_ADDRESS_SPACE_IO) | ||
97 | { | ||
98 | bar_size &= PCI_BASE_ADDRESS_IO_MASK; | ||
99 | upper_limit = &pciauto_upper_iospc; | ||
100 | DBG("PCI Autoconfig: BAR %d, I/O, ", bar_nr); | ||
101 | } | ||
102 | else | ||
103 | { | ||
104 | if ((bar_size & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == | ||
105 | PCI_BASE_ADDRESS_MEM_TYPE_64) | ||
106 | found_mem64 = 1; | ||
107 | |||
108 | bar_size &= PCI_BASE_ADDRESS_MEM_MASK; | ||
109 | upper_limit = &pciauto_upper_memspc; | ||
110 | DBG("PCI Autoconfig: BAR %d, Mem, ", bar_nr); | ||
111 | } | ||
112 | |||
113 | /* Allocate a base address (bar_size is negative!) */ | ||
114 | *upper_limit = (*upper_limit + bar_size) & bar_size; | ||
115 | |||
116 | /* Write it out and update our limit */ | ||
117 | pci_write_config_dword(dev, bar, *upper_limit); | ||
118 | |||
119 | /* | ||
120 | * If we are a 64-bit decoder then increment to the | ||
121 | * upper 32 bits of the bar and force it to locate | ||
122 | * in the lower 4GB of memory. | ||
123 | */ | ||
124 | |||
125 | if (found_mem64) | ||
126 | pci_write_config_dword(dev, (bar+=4), 0x00000000); | ||
127 | |||
128 | DBG("size=0x%x, address=0x%x\n", ~bar_size + 1, *upper_limit); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | /* Initialize the interrupt number. */ | ||
133 | |||
134 | static void __init | ||
135 | pciauto_setup_irq(struct pci_controller* pci_ctrl,struct pci_dev *dev,int devfn) | ||
136 | { | ||
137 | u8 pin; | ||
138 | int irq = 0; | ||
139 | |||
140 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | ||
141 | |||
142 | /* Fix illegal pin numbers. */ | ||
143 | |||
144 | if (pin == 0 || pin > 4) | ||
145 | pin = 1; | ||
146 | |||
147 | if (pci_ctrl->map_irq) | ||
148 | irq = pci_ctrl->map_irq(dev, PCI_SLOT(devfn), pin); | ||
149 | |||
150 | if (irq == -1) | ||
151 | irq = 0; | ||
152 | |||
153 | DBG("PCI Autoconfig: Interrupt %d, pin %d\n", irq, pin); | ||
154 | |||
155 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); | ||
156 | } | ||
157 | |||
158 | |||
159 | static void __init | ||
160 | pciauto_prescan_setup_bridge(struct pci_dev *dev, int current_bus, | ||
161 | int sub_bus, int *iosave, int *memsave) | ||
162 | { | ||
163 | /* Configure bus number registers */ | ||
164 | pci_write_config_byte(dev, PCI_PRIMARY_BUS, current_bus); | ||
165 | pci_write_config_byte(dev, PCI_SECONDARY_BUS, sub_bus + 1); | ||
166 | pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, 0xff); | ||
167 | |||
168 | /* Round memory allocator to 1MB boundary */ | ||
169 | pciauto_upper_memspc &= ~(0x100000 - 1); | ||
170 | *memsave = pciauto_upper_memspc; | ||
171 | |||
172 | /* Round I/O allocator to 4KB boundary */ | ||
173 | pciauto_upper_iospc &= ~(0x1000 - 1); | ||
174 | *iosave = pciauto_upper_iospc; | ||
175 | |||
176 | /* Set up memory and I/O filter limits, assume 32-bit I/O space */ | ||
177 | pci_write_config_word(dev, PCI_MEMORY_LIMIT, | ||
178 | ((pciauto_upper_memspc - 1) & 0xfff00000) >> 16); | ||
179 | pci_write_config_byte(dev, PCI_IO_LIMIT, | ||
180 | ((pciauto_upper_iospc - 1) & 0x0000f000) >> 8); | ||
181 | pci_write_config_word(dev, PCI_IO_LIMIT_UPPER16, | ||
182 | ((pciauto_upper_iospc - 1) & 0xffff0000) >> 16); | ||
183 | } | ||
184 | |||
185 | static void __init | ||
186 | pciauto_postscan_setup_bridge(struct pci_dev *dev, int current_bus, int sub_bus, | ||
187 | int *iosave, int *memsave) | ||
188 | { | ||
189 | int cmdstat; | ||
190 | |||
191 | /* Configure bus number registers */ | ||
192 | pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, sub_bus); | ||
193 | |||
194 | /* | ||
195 | * Round memory allocator to 1MB boundary. | ||
196 | * If no space used, allocate minimum. | ||
197 | */ | ||
198 | pciauto_upper_memspc &= ~(0x100000 - 1); | ||
199 | if (*memsave == pciauto_upper_memspc) | ||
200 | pciauto_upper_memspc -= 0x00100000; | ||
201 | |||
202 | pci_write_config_word(dev, PCI_MEMORY_BASE, pciauto_upper_memspc >> 16); | ||
203 | |||
204 | /* Allocate 1MB for pre-fretch */ | ||
205 | pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, | ||
206 | ((pciauto_upper_memspc - 1) & 0xfff00000) >> 16); | ||
207 | |||
208 | pciauto_upper_memspc -= 0x100000; | ||
209 | |||
210 | pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, | ||
211 | pciauto_upper_memspc >> 16); | ||
212 | |||
213 | /* Round I/O allocator to 4KB boundary */ | ||
214 | pciauto_upper_iospc &= ~(0x1000 - 1); | ||
215 | if (*iosave == pciauto_upper_iospc) | ||
216 | pciauto_upper_iospc -= 0x1000; | ||
217 | |||
218 | pci_write_config_byte(dev, PCI_IO_BASE, | ||
219 | (pciauto_upper_iospc & 0x0000f000) >> 8); | ||
220 | pci_write_config_word(dev, PCI_IO_BASE_UPPER16, | ||
221 | pciauto_upper_iospc >> 16); | ||
222 | |||
223 | /* Enable memory and I/O accesses, enable bus master */ | ||
224 | pci_read_config_dword(dev, PCI_COMMAND, &cmdstat); | ||
225 | pci_write_config_dword(dev, PCI_COMMAND, | ||
226 | cmdstat | | ||
227 | PCI_COMMAND_IO | | ||
228 | PCI_COMMAND_MEMORY | | ||
229 | PCI_COMMAND_MASTER); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * Scan the current PCI bus. | ||
234 | */ | ||
235 | |||
236 | |||
237 | int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus) | ||
238 | { | ||
239 | int sub_bus, pci_devfn, pci_class, cmdstat, found_multi=0; | ||
240 | unsigned short vid; | ||
241 | unsigned char header_type; | ||
242 | struct pci_dev *dev = &pciauto_dev; | ||
243 | |||
244 | pciauto_dev.bus = &pciauto_bus; | ||
245 | pciauto_dev.sysdata = pci_ctrl; | ||
246 | pciauto_bus.ops = pci_ctrl->ops; | ||
247 | |||
248 | /* | ||
249 | * Fetch our I/O and memory space upper boundaries used | ||
250 | * to allocated base addresses on this pci_controller. | ||
251 | */ | ||
252 | |||
253 | if (current_bus == pci_ctrl->first_busno) | ||
254 | { | ||
255 | pciauto_upper_iospc = pci_ctrl->io_resource.end + 1; | ||
256 | pciauto_upper_memspc = pci_ctrl->mem_resources[0].end + 1; | ||
257 | } | ||
258 | |||
259 | sub_bus = current_bus; | ||
260 | |||
261 | for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) | ||
262 | { | ||
263 | /* Skip our host bridge */ | ||
264 | if ((current_bus == pci_ctrl->first_busno) && (pci_devfn == 0)) | ||
265 | continue; | ||
266 | |||
267 | if (PCI_FUNC(pci_devfn) && !found_multi) | ||
268 | continue; | ||
269 | |||
270 | pciauto_bus.number = current_bus; | ||
271 | pciauto_dev.devfn = pci_devfn; | ||
272 | |||
273 | /* If config space read fails from this device, move on */ | ||
274 | if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type)) | ||
275 | continue; | ||
276 | |||
277 | if (!PCI_FUNC(pci_devfn)) | ||
278 | found_multi = header_type & 0x80; | ||
279 | pci_read_config_word(dev, PCI_VENDOR_ID, &vid); | ||
280 | |||
281 | if (vid == 0xffff || vid == 0x0000) { | ||
282 | found_multi = 0; | ||
283 | continue; | ||
284 | } | ||
285 | |||
286 | pci_read_config_dword(dev, PCI_CLASS_REVISION, &pci_class); | ||
287 | |||
288 | if ((pci_class >> 16) == PCI_CLASS_BRIDGE_PCI) { | ||
289 | |||
290 | int iosave, memsave; | ||
291 | |||
292 | DBG("PCI Autoconfig: Found P2P bridge, device %d\n", | ||
293 | PCI_SLOT(pci_devfn)); | ||
294 | |||
295 | /* Allocate PCI I/O and/or memory space */ | ||
296 | pciauto_setup_bars(dev, PCI_BASE_ADDRESS_1); | ||
297 | |||
298 | pciauto_prescan_setup_bridge(dev, current_bus, sub_bus, | ||
299 | &iosave, &memsave); | ||
300 | sub_bus = pciauto_bus_scan(pci_ctrl, sub_bus+1); | ||
301 | pciauto_postscan_setup_bridge(dev, current_bus, sub_bus, | ||
302 | &iosave, &memsave); | ||
303 | pciauto_bus.number = current_bus; | ||
304 | |||
305 | continue; | ||
306 | |||
307 | } | ||
308 | |||
309 | |||
310 | #if 0 | ||
311 | /* Skip legacy mode IDE controller */ | ||
312 | |||
313 | if ((pci_class >> 16) == PCI_CLASS_STORAGE_IDE) { | ||
314 | |||
315 | unsigned char prg_iface; | ||
316 | pci_read_config_byte(dev, PCI_CLASS_PROG, &prg_iface); | ||
317 | |||
318 | if (!(prg_iface & PCIAUTO_IDE_MODE_MASK)) { | ||
319 | DBG("PCI Autoconfig: Skipping legacy mode " | ||
320 | "IDE controller\n"); | ||
321 | continue; | ||
322 | } | ||
323 | } | ||
324 | #endif | ||
325 | |||
326 | /* | ||
327 | * Found a peripheral, enable some standard | ||
328 | * settings | ||
329 | */ | ||
330 | |||
331 | pci_read_config_dword(dev, PCI_COMMAND, &cmdstat); | ||
332 | pci_write_config_dword(dev, PCI_COMMAND, | ||
333 | cmdstat | | ||
334 | PCI_COMMAND_IO | | ||
335 | PCI_COMMAND_MEMORY | | ||
336 | PCI_COMMAND_MASTER); | ||
337 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x80); | ||
338 | |||
339 | /* Allocate PCI I/O and/or memory space */ | ||
340 | DBG("PCI Autoconfig: Found Bus %d, Device %d, Function %d\n", | ||
341 | current_bus, PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn) ); | ||
342 | |||
343 | pciauto_setup_bars(dev, PCI_BASE_ADDRESS_5); | ||
344 | pciauto_setup_irq(pci_ctrl, dev, pci_devfn); | ||
345 | } | ||
346 | return sub_bus; | ||
347 | } | ||
348 | |||
349 | |||
350 | |||
351 | |||
352 | |||
diff --git a/arch/xtensa/lib/strcasecmp.c b/arch/xtensa/lib/strcasecmp.c new file mode 100644 index 000000000000..165b2d6effa5 --- /dev/null +++ b/arch/xtensa/lib/strcasecmp.c | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | * linux/arch/xtensa/lib/strcasecmp.c | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General | ||
5 | * Public License. See the file "COPYING" in the main directory of | ||
6 | * this archive for more details. | ||
7 | * | ||
8 | * Copyright (C) 2002 Tensilica Inc. | ||
9 | */ | ||
10 | |||
11 | #include <linux/string.h> | ||
12 | |||
13 | |||
14 | /* We handle nothing here except the C locale. Since this is used in | ||
15 | only one place, on strings known to contain only 7 bit ASCII, this | ||
16 | is ok. */ | ||
17 | |||
18 | int strcasecmp(const char *a, const char *b) | ||
19 | { | ||
20 | int ca, cb; | ||
21 | |||
22 | do { | ||
23 | ca = *a++ & 0xff; | ||
24 | cb = *b++ & 0xff; | ||
25 | if (ca >= 'A' && ca <= 'Z') | ||
26 | ca += 'a' - 'A'; | ||
27 | if (cb >= 'A' && cb <= 'Z') | ||
28 | cb += 'a' - 'A'; | ||
29 | } while (ca == cb && ca != '\0'); | ||
30 | |||
31 | return ca - cb; | ||
32 | } | ||
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S new file mode 100644 index 000000000000..71d55df43893 --- /dev/null +++ b/arch/xtensa/lib/strncpy_user.S | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * arch/xtensa/lib/strncpy_user.S | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General | ||
5 | * Public License. See the file "COPYING" in the main directory of | ||
6 | * this archive for more details. | ||
7 | * | ||
8 | * Returns: -EFAULT if exception before terminator, N if the entire | ||
9 | * buffer filled, else strlen. | ||
10 | * | ||
11 | * Copyright (C) 2002 Tensilica Inc. | ||
12 | */ | ||
13 | |||
14 | #include <xtensa/coreasm.h> | ||
15 | #include <linux/errno.h> | ||
16 | |||
17 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
18 | |||
19 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
20 | 9: insn reg1, reg2, offset; \ | ||
21 | .section __ex_table, "a"; \ | ||
22 | .word 9b, handler; \ | ||
23 | .previous | ||
24 | |||
25 | /* | ||
26 | * char *__strncpy_user(char *dst, const char *src, size_t len) | ||
27 | */ | ||
28 | .text | ||
29 | .begin literal | ||
30 | .align 4 | ||
31 | .Lmask0: | ||
32 | .byte 0xff, 0x00, 0x00, 0x00 | ||
33 | .Lmask1: | ||
34 | .byte 0x00, 0xff, 0x00, 0x00 | ||
35 | .Lmask2: | ||
36 | .byte 0x00, 0x00, 0xff, 0x00 | ||
37 | .Lmask3: | ||
38 | .byte 0x00, 0x00, 0x00, 0xff | ||
39 | .end literal | ||
40 | |||
41 | # Register use | ||
42 | # a0/ return address | ||
43 | # a1/ stack pointer | ||
44 | # a2/ return value | ||
45 | # a3/ src | ||
46 | # a4/ len | ||
47 | # a5/ mask0 | ||
48 | # a6/ mask1 | ||
49 | # a7/ mask2 | ||
50 | # a8/ mask3 | ||
51 | # a9/ tmp | ||
52 | # a10/ tmp | ||
53 | # a11/ dst | ||
54 | # a12/ tmp | ||
55 | |||
56 | .align 4 | ||
57 | .global __strncpy_user | ||
58 | .type __strncpy_user,@function | ||
59 | __strncpy_user: | ||
60 | entry sp, 16 # minimal stack frame | ||
61 | # a2/ dst, a3/ src, a4/ len | ||
62 | mov a11, a2 # leave dst in return value register | ||
63 | beqz a4, .Lret # if len is zero | ||
64 | l32r a5, .Lmask0 # mask for byte 0 | ||
65 | l32r a6, .Lmask1 # mask for byte 1 | ||
66 | l32r a7, .Lmask2 # mask for byte 2 | ||
67 | l32r a8, .Lmask3 # mask for byte 3 | ||
68 | bbsi.l a3, 0, .Lsrc1mod2 # if only 8-bit aligned | ||
69 | bbsi.l a3, 1, .Lsrc2mod4 # if only 16-bit aligned | ||
70 | .Lsrcaligned: # return here when src is word-aligned | ||
71 | srli a12, a4, 2 # number of loop iterations with 4B per loop | ||
72 | movi a9, 3 | ||
73 | bnone a11, a9, .Laligned | ||
74 | j .Ldstunaligned | ||
75 | |||
76 | .Lsrc1mod2: # src address is odd | ||
77 | EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 | ||
78 | addi a3, a3, 1 # advance src pointer | ||
79 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
80 | beqz a9, .Lret # if byte 0 is zero | ||
81 | addi a11, a11, 1 # advance dst pointer | ||
82 | addi a4, a4, -1 # decrement len | ||
83 | beqz a4, .Lret # if len is zero | ||
84 | bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned | ||
85 | |||
86 | .Lsrc2mod4: # src address is 2 mod 4 | ||
87 | EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 | ||
88 | /* 1-cycle interlock */ | ||
89 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
90 | beqz a9, .Lret # if byte 0 is zero | ||
91 | addi a11, a11, 1 # advance dst pointer | ||
92 | addi a4, a4, -1 # decrement len | ||
93 | beqz a4, .Lret # if len is zero | ||
94 | EX(l8ui, a9, a3, 1, fixup_l) # get byte 0 | ||
95 | addi a3, a3, 2 # advance src pointer | ||
96 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
97 | beqz a9, .Lret # if byte 0 is zero | ||
98 | addi a11, a11, 1 # advance dst pointer | ||
99 | addi a4, a4, -1 # decrement len | ||
100 | bnez a4, .Lsrcaligned # if len is nonzero | ||
101 | .Lret: | ||
102 | sub a2, a11, a2 # compute strlen | ||
103 | retw | ||
104 | |||
105 | /* | ||
106 | * dst is word-aligned, src is word-aligned | ||
107 | */ | ||
108 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
109 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
110 | .Laligned: | ||
111 | #if XCHAL_HAVE_LOOPS | ||
112 | loopnez a12, .Loop1done | ||
113 | #else | ||
114 | beqz a12, .Loop1done | ||
115 | slli a12, a12, 2 | ||
116 | add a12, a12, a11 # a12 = end of last 4B chunck | ||
117 | #endif | ||
118 | .Loop1: | ||
119 | EX(l32i, a9, a3, 0, fixup_l) # get word from src | ||
120 | addi a3, a3, 4 # advance src pointer | ||
121 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
122 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
123 | bnone a9, a7, .Lz2 # if byte 2 is zero | ||
124 | EX(s32i, a9, a11, 0, fixup_s) # store word to dst | ||
125 | bnone a9, a8, .Lz3 # if byte 3 is zero | ||
126 | addi a11, a11, 4 # advance dst pointer | ||
127 | #if !XCHAL_HAVE_LOOPS | ||
128 | blt a11, a12, .Loop1 | ||
129 | #endif | ||
130 | |||
131 | .Loop1done: | ||
132 | bbci.l a4, 1, .L100 | ||
133 | # copy 2 bytes | ||
134 | EX(l16ui, a9, a3, 0, fixup_l) | ||
135 | addi a3, a3, 2 # advance src pointer | ||
136 | #ifdef __XTENSA_EB__ | ||
137 | bnone a9, a7, .Lz0 # if byte 2 is zero | ||
138 | bnone a9, a8, .Lz1 # if byte 3 is zero | ||
139 | #else | ||
140 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
141 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
142 | #endif | ||
143 | EX(s16i, a9, a11, 0, fixup_s) | ||
144 | addi a11, a11, 2 # advance dst pointer | ||
145 | .L100: | ||
146 | bbci.l a4, 0, .Lret | ||
147 | EX(l8ui, a9, a3, 0, fixup_l) | ||
148 | /* slot */ | ||
149 | EX(s8i, a9, a11, 0, fixup_s) | ||
150 | beqz a9, .Lret # if byte is zero | ||
151 | addi a11, a11, 1-3 # advance dst ptr 1, but also cancel | ||
152 | # the effect of adding 3 in .Lz3 code | ||
153 | /* fall thru to .Lz3 and "retw" */ | ||
154 | |||
155 | .Lz3: # byte 3 is zero | ||
156 | addi a11, a11, 3 # advance dst pointer | ||
157 | sub a2, a11, a2 # compute strlen | ||
158 | retw | ||
159 | .Lz0: # byte 0 is zero | ||
160 | #ifdef __XTENSA_EB__ | ||
161 | movi a9, 0 | ||
162 | #endif /* __XTENSA_EB__ */ | ||
163 | EX(s8i, a9, a11, 0, fixup_s) | ||
164 | sub a2, a11, a2 # compute strlen | ||
165 | retw | ||
166 | .Lz1: # byte 1 is zero | ||
167 | #ifdef __XTENSA_EB__ | ||
168 | extui a9, a9, 16, 16 | ||
169 | #endif /* __XTENSA_EB__ */ | ||
170 | EX(s16i, a9, a11, 0, fixup_s) | ||
171 | addi a11, a11, 1 # advance dst pointer | ||
172 | sub a2, a11, a2 # compute strlen | ||
173 | retw | ||
174 | .Lz2: # byte 2 is zero | ||
175 | #ifdef __XTENSA_EB__ | ||
176 | extui a9, a9, 16, 16 | ||
177 | #endif /* __XTENSA_EB__ */ | ||
178 | EX(s16i, a9, a11, 0, fixup_s) | ||
179 | movi a9, 0 | ||
180 | EX(s8i, a9, a11, 2, fixup_s) | ||
181 | addi a11, a11, 2 # advance dst pointer | ||
182 | sub a2, a11, a2 # compute strlen | ||
183 | retw | ||
184 | |||
185 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
186 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
187 | .Ldstunaligned: | ||
188 | /* | ||
189 | * for now just use byte copy loop | ||
190 | */ | ||
191 | #if XCHAL_HAVE_LOOPS | ||
192 | loopnez a4, .Lunalignedend | ||
193 | #else | ||
194 | beqz a4, .Lunalignedend | ||
195 | add a12, a11, a4 # a12 = ending address | ||
196 | #endif /* XCHAL_HAVE_LOOPS */ | ||
197 | .Lnextbyte: | ||
198 | EX(l8ui, a9, a3, 0, fixup_l) | ||
199 | addi a3, a3, 1 | ||
200 | EX(s8i, a9, a11, 0, fixup_s) | ||
201 | beqz a9, .Lunalignedend | ||
202 | addi a11, a11, 1 | ||
203 | #if !XCHAL_HAVE_LOOPS | ||
204 | blt a11, a12, .Lnextbyte | ||
205 | #endif | ||
206 | |||
207 | .Lunalignedend: | ||
208 | sub a2, a11, a2 # compute strlen | ||
209 | retw | ||
210 | |||
211 | |||
212 | .section .fixup, "ax" | ||
213 | .align 4 | ||
214 | |||
215 | /* For now, just return -EFAULT. Future implementations might | ||
216 | * like to clear remaining kernel space, like the fixup | ||
217 | * implementation in memset(). Thus, we differentiate between | ||
218 | * load/store fixups. */ | ||
219 | |||
220 | fixup_s: | ||
221 | fixup_l: | ||
222 | movi a2, -EFAULT | ||
223 | retw | ||
224 | |||
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S new file mode 100644 index 000000000000..cdff4d670f3b --- /dev/null +++ b/arch/xtensa/lib/strnlen_user.S | |||
@@ -0,0 +1,147 @@ | |||
1 | /* | ||
2 | * arch/xtensa/lib/strnlen_user.S | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General | ||
5 | * Public License. See the file "COPYING" in the main directory of | ||
6 | * this archive for more details. | ||
7 | * | ||
8 | * Returns strnlen, including trailing zero terminator. | ||
9 | * Zero indicates error. | ||
10 | * | ||
11 | * Copyright (C) 2002 Tensilica Inc. | ||
12 | */ | ||
13 | |||
14 | #include <xtensa/coreasm.h> | ||
15 | |||
16 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
17 | |||
18 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
19 | 9: insn reg1, reg2, offset; \ | ||
20 | .section __ex_table, "a"; \ | ||
21 | .word 9b, handler; \ | ||
22 | .previous | ||
23 | |||
24 | /* | ||
25 | * size_t __strnlen_user(const char *s, size_t len) | ||
26 | */ | ||
27 | .text | ||
28 | .begin literal | ||
29 | .align 4 | ||
30 | .Lmask0: | ||
31 | .byte 0xff, 0x00, 0x00, 0x00 | ||
32 | .Lmask1: | ||
33 | .byte 0x00, 0xff, 0x00, 0x00 | ||
34 | .Lmask2: | ||
35 | .byte 0x00, 0x00, 0xff, 0x00 | ||
36 | .Lmask3: | ||
37 | .byte 0x00, 0x00, 0x00, 0xff | ||
38 | .end literal | ||
39 | |||
40 | # Register use: | ||
41 | # a2/ src | ||
42 | # a3/ len | ||
43 | # a4/ tmp | ||
44 | # a5/ mask0 | ||
45 | # a6/ mask1 | ||
46 | # a7/ mask2 | ||
47 | # a8/ mask3 | ||
48 | # a9/ tmp | ||
49 | # a10/ tmp | ||
50 | |||
51 | .align 4 | ||
52 | .global __strnlen_user | ||
53 | .type __strnlen_user,@function | ||
54 | __strnlen_user: | ||
55 | entry sp, 16 # minimal stack frame | ||
56 | # a2/ s, a3/ len | ||
57 | addi a4, a2, -4 # because we overincrement at the end; | ||
58 | # we compensate with load offsets of 4 | ||
59 | l32r a5, .Lmask0 # mask for byte 0 | ||
60 | l32r a6, .Lmask1 # mask for byte 1 | ||
61 | l32r a7, .Lmask2 # mask for byte 2 | ||
62 | l32r a8, .Lmask3 # mask for byte 3 | ||
63 | bbsi.l a2, 0, .L1mod2 # if only 8-bit aligned | ||
64 | bbsi.l a2, 1, .L2mod4 # if only 16-bit aligned | ||
65 | |||
66 | /* | ||
67 | * String is word-aligned. | ||
68 | */ | ||
69 | .Laligned: | ||
70 | srli a10, a3, 2 # number of loop iterations with 4B per loop | ||
71 | #if XCHAL_HAVE_LOOPS | ||
72 | loopnez a10, .Ldone | ||
73 | #else | ||
74 | beqz a10, .Ldone | ||
75 | slli a10, a10, 2 | ||
76 | add a10, a10, a4 # a10 = end of last 4B chunk | ||
77 | #endif /* XCHAL_HAVE_LOOPS */ | ||
78 | .Loop: | ||
79 | EX(l32i, a9, a4, 4, lenfixup) # get next word of string | ||
80 | addi a4, a4, 4 # advance string pointer | ||
81 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
82 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
83 | bnone a9, a7, .Lz2 # if byte 2 is zero | ||
84 | bnone a9, a8, .Lz3 # if byte 3 is zero | ||
85 | #if !XCHAL_HAVE_LOOPS | ||
86 | blt a4, a10, .Loop | ||
87 | #endif | ||
88 | |||
89 | .Ldone: | ||
90 | EX(l32i, a9, a4, 4, lenfixup) # load 4 bytes for remaining checks | ||
91 | |||
92 | bbci.l a3, 1, .L100 | ||
93 | # check two more bytes (bytes 0, 1 of word) | ||
94 | addi a4, a4, 2 # advance string pointer | ||
95 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
96 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
97 | .L100: | ||
98 | bbci.l a3, 0, .L101 | ||
99 | # check one more byte (byte 2 of word) | ||
100 | # Actually, we don't need to check. Zero or nonzero, we'll add one. | ||
101 | # Do not add an extra one for the NULL terminator since we have | ||
102 | # exhausted the original len parameter. | ||
103 | addi a4, a4, 1 # advance string pointer | ||
104 | .L101: | ||
105 | sub a2, a4, a2 # compute length | ||
106 | retw | ||
107 | |||
108 | # NOTE that in several places below, we point to the byte just after | ||
109 | # the zero byte in order to include the NULL terminator in the count. | ||
110 | |||
111 | .Lz3: # byte 3 is zero | ||
112 | addi a4, a4, 3 # point to zero byte | ||
113 | .Lz0: # byte 0 is zero | ||
114 | addi a4, a4, 1 # point just beyond zero byte | ||
115 | sub a2, a4, a2 # subtract to get length | ||
116 | retw | ||
117 | .Lz1: # byte 1 is zero | ||
118 | addi a4, a4, 1+1 # point just beyond zero byte | ||
119 | sub a2, a4, a2 # subtract to get length | ||
120 | retw | ||
121 | .Lz2: # byte 2 is zero | ||
122 | addi a4, a4, 2+1 # point just beyond zero byte | ||
123 | sub a2, a4, a2 # subtract to get length | ||
124 | retw | ||
125 | |||
126 | .L1mod2: # address is odd | ||
127 | EX(l8ui, a9, a4, 4, lenfixup) # get byte 0 | ||
128 | addi a4, a4, 1 # advance string pointer | ||
129 | beqz a9, .Lz3 # if byte 0 is zero | ||
130 | bbci.l a4, 1, .Laligned # if string pointer is now word-aligned | ||
131 | |||
132 | .L2mod4: # address is 2 mod 4 | ||
133 | addi a4, a4, 2 # advance ptr for aligned access | ||
134 | EX(l32i, a9, a4, 0, lenfixup) # get word with first two bytes of string | ||
135 | bnone a9, a7, .Lz2 # if byte 2 (of word, not string) is zero | ||
136 | bany a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero | ||
137 | # byte 3 is zero | ||
138 | addi a4, a4, 3+1 # point just beyond zero byte | ||
139 | sub a2, a4, a2 # subtract to get length | ||
140 | retw | ||
141 | |||
142 | .section .fixup, "ax" | ||
143 | .align 4 | ||
144 | lenfixup: | ||
145 | movi a2, 0 | ||
146 | retw | ||
147 | |||
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S new file mode 100644 index 000000000000..265db2693cbd --- /dev/null +++ b/arch/xtensa/lib/usercopy.S | |||
@@ -0,0 +1,321 @@ | |||
1 | /* | ||
2 | * arch/xtensa/lib/usercopy.S | ||
3 | * | ||
4 | * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S) | ||
5 | * | ||
6 | * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>. | ||
7 | * It needs to remain separate and distinct. The hal files are part | ||
8 | * of the the Xtensa link-time HAL, and those files may differ per | ||
9 | * processor configuration. Patching the kernel for another | ||
10 | * processor configuration includes replacing the hal files, and we | ||
11 | * could loose the special functionality for accessing user-space | ||
12 | * memory during such a patch. We sacrifice a little code space here | ||
13 | * in favor to simplify code maintenance. | ||
14 | * | ||
15 | * This file is subject to the terms and conditions of the GNU General | ||
16 | * Public License. See the file "COPYING" in the main directory of | ||
17 | * this archive for more details. | ||
18 | * | ||
19 | * Copyright (C) 2002 Tensilica Inc. | ||
20 | */ | ||
21 | |||
22 | |||
23 | /* | ||
24 | * size_t __xtensa_copy_user (void *dst, const void *src, size_t len); | ||
25 | * | ||
26 | * The returned value is the number of bytes not copied. Implies zero | ||
27 | * is success. | ||
28 | * | ||
29 | * The general case algorithm is as follows: | ||
30 | * If the destination and source are both aligned, | ||
31 | * do 16B chunks with a loop, and then finish up with | ||
32 | * 8B, 4B, 2B, and 1B copies conditional on the length. | ||
33 | * If destination is aligned and source unaligned, | ||
34 | * do the same, but use SRC to align the source data. | ||
35 | * If destination is unaligned, align it by conditionally | ||
36 | * copying 1B and 2B and then retest. | ||
37 | * This code tries to use fall-through braches for the common | ||
38 | * case of aligned destinations (except for the branches to | ||
39 | * the alignment label). | ||
40 | * | ||
41 | * Register use: | ||
42 | * a0/ return address | ||
43 | * a1/ stack pointer | ||
44 | * a2/ return value | ||
45 | * a3/ src | ||
46 | * a4/ length | ||
47 | * a5/ dst | ||
48 | * a6/ tmp | ||
49 | * a7/ tmp | ||
50 | * a8/ tmp | ||
51 | * a9/ tmp | ||
52 | * a10/ tmp | ||
53 | * a11/ original length | ||
54 | */ | ||
55 | |||
56 | #include <xtensa/coreasm.h> | ||
57 | |||
58 | #ifdef __XTENSA_EB__ | ||
59 | #define ALIGN(R, W0, W1) src R, W0, W1 | ||
60 | #define SSA8(R) ssa8b R | ||
61 | #else | ||
62 | #define ALIGN(R, W0, W1) src R, W1, W0 | ||
63 | #define SSA8(R) ssa8l R | ||
64 | #endif | ||
65 | |||
66 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
67 | |||
68 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
69 | 9: insn reg1, reg2, offset; \ | ||
70 | .section __ex_table, "a"; \ | ||
71 | .word 9b, handler; \ | ||
72 | .previous | ||
73 | |||
74 | |||
75 | .text | ||
76 | .align 4 | ||
77 | .global __xtensa_copy_user | ||
78 | .type __xtensa_copy_user,@function | ||
79 | __xtensa_copy_user: | ||
80 | entry sp, 16 # minimal stack frame | ||
81 | # a2/ dst, a3/ src, a4/ len | ||
82 | mov a5, a2 # copy dst so that a2 is return value | ||
83 | mov a11, a4 # preserve original len for error case | ||
84 | .Lcommon: | ||
85 | bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 | ||
86 | bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 | ||
87 | .Ldstaligned: # return here from .Ldstunaligned when dst is aligned | ||
88 | srli a7, a4, 4 # number of loop iterations with 16B | ||
89 | # per iteration | ||
90 | movi a8, 3 # if source is also aligned, | ||
91 | bnone a3, a8, .Laligned # then use word copy | ||
92 | SSA8( a3) # set shift amount from byte offset | ||
93 | bnez a4, .Lsrcunaligned | ||
94 | movi a2, 0 # return success for len==0 | ||
95 | retw | ||
96 | |||
97 | /* | ||
98 | * Destination is unaligned | ||
99 | */ | ||
100 | |||
101 | .Ldst1mod2: # dst is only byte aligned | ||
102 | bltui a4, 7, .Lbytecopy # do short copies byte by byte | ||
103 | |||
104 | # copy 1 byte | ||
105 | EX(l8ui, a6, a3, 0, l_fixup) | ||
106 | addi a3, a3, 1 | ||
107 | EX(s8i, a6, a5, 0, s_fixup) | ||
108 | addi a5, a5, 1 | ||
109 | addi a4, a4, -1 | ||
110 | bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then | ||
111 | # return to main algorithm | ||
112 | .Ldst2mod4: # dst 16-bit aligned | ||
113 | # copy 2 bytes | ||
114 | bltui a4, 6, .Lbytecopy # do short copies byte by byte | ||
115 | EX(l8ui, a6, a3, 0, l_fixup) | ||
116 | EX(l8ui, a7, a3, 1, l_fixup) | ||
117 | addi a3, a3, 2 | ||
118 | EX(s8i, a6, a5, 0, s_fixup) | ||
119 | EX(s8i, a7, a5, 1, s_fixup) | ||
120 | addi a5, a5, 2 | ||
121 | addi a4, a4, -2 | ||
122 | j .Ldstaligned # dst is now aligned, return to main algorithm | ||
123 | |||
124 | /* | ||
125 | * Byte by byte copy | ||
126 | */ | ||
127 | .align 4 | ||
128 | .byte 0 # 1 mod 4 alignment for LOOPNEZ | ||
129 | # (0 mod 4 alignment for LBEG) | ||
130 | .Lbytecopy: | ||
131 | #if XCHAL_HAVE_LOOPS | ||
132 | loopnez a4, .Lbytecopydone | ||
133 | #else /* !XCHAL_HAVE_LOOPS */ | ||
134 | beqz a4, .Lbytecopydone | ||
135 | add a7, a3, a4 # a7 = end address for source | ||
136 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
137 | .Lnextbyte: | ||
138 | EX(l8ui, a6, a3, 0, l_fixup) | ||
139 | addi a3, a3, 1 | ||
140 | EX(s8i, a6, a5, 0, s_fixup) | ||
141 | addi a5, a5, 1 | ||
142 | #if !XCHAL_HAVE_LOOPS | ||
143 | blt a3, a7, .Lnextbyte | ||
144 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
145 | .Lbytecopydone: | ||
146 | movi a2, 0 # return success for len bytes copied | ||
147 | retw | ||
148 | |||
149 | /* | ||
150 | * Destination and source are word-aligned. | ||
151 | */ | ||
152 | # copy 16 bytes per iteration for word-aligned dst and word-aligned src | ||
153 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
154 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
155 | .Laligned: | ||
156 | #if XCHAL_HAVE_LOOPS | ||
157 | loopnez a7, .Loop1done | ||
158 | #else /* !XCHAL_HAVE_LOOPS */ | ||
159 | beqz a7, .Loop1done | ||
160 | slli a8, a7, 4 | ||
161 | add a8, a8, a3 # a8 = end of last 16B source chunk | ||
162 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
163 | .Loop1: | ||
164 | EX(l32i, a6, a3, 0, l_fixup) | ||
165 | EX(l32i, a7, a3, 4, l_fixup) | ||
166 | EX(s32i, a6, a5, 0, s_fixup) | ||
167 | EX(l32i, a6, a3, 8, l_fixup) | ||
168 | EX(s32i, a7, a5, 4, s_fixup) | ||
169 | EX(l32i, a7, a3, 12, l_fixup) | ||
170 | EX(s32i, a6, a5, 8, s_fixup) | ||
171 | addi a3, a3, 16 | ||
172 | EX(s32i, a7, a5, 12, s_fixup) | ||
173 | addi a5, a5, 16 | ||
174 | #if !XCHAL_HAVE_LOOPS | ||
175 | blt a3, a8, .Loop1 | ||
176 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
177 | .Loop1done: | ||
178 | bbci.l a4, 3, .L2 | ||
179 | # copy 8 bytes | ||
180 | EX(l32i, a6, a3, 0, l_fixup) | ||
181 | EX(l32i, a7, a3, 4, l_fixup) | ||
182 | addi a3, a3, 8 | ||
183 | EX(s32i, a6, a5, 0, s_fixup) | ||
184 | EX(s32i, a7, a5, 4, s_fixup) | ||
185 | addi a5, a5, 8 | ||
186 | .L2: | ||
187 | bbci.l a4, 2, .L3 | ||
188 | # copy 4 bytes | ||
189 | EX(l32i, a6, a3, 0, l_fixup) | ||
190 | addi a3, a3, 4 | ||
191 | EX(s32i, a6, a5, 0, s_fixup) | ||
192 | addi a5, a5, 4 | ||
193 | .L3: | ||
194 | bbci.l a4, 1, .L4 | ||
195 | # copy 2 bytes | ||
196 | EX(l16ui, a6, a3, 0, l_fixup) | ||
197 | addi a3, a3, 2 | ||
198 | EX(s16i, a6, a5, 0, s_fixup) | ||
199 | addi a5, a5, 2 | ||
200 | .L4: | ||
201 | bbci.l a4, 0, .L5 | ||
202 | # copy 1 byte | ||
203 | EX(l8ui, a6, a3, 0, l_fixup) | ||
204 | EX(s8i, a6, a5, 0, s_fixup) | ||
205 | .L5: | ||
206 | movi a2, 0 # return success for len bytes copied | ||
207 | retw | ||
208 | |||
209 | /* | ||
210 | * Destination is aligned, Source is unaligned | ||
211 | */ | ||
212 | |||
213 | .align 4 | ||
214 | .byte 0 # 1 mod 4 alignement for LOOPNEZ | ||
215 | # (0 mod 4 alignment for LBEG) | ||
216 | .Lsrcunaligned: | ||
217 | # copy 16 bytes per iteration for word-aligned dst and unaligned src | ||
218 | and a10, a3, a8 # save unalignment offset for below | ||
219 | sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) | ||
220 | EX(l32i, a6, a3, 0, l_fixup) # load first word | ||
221 | #if XCHAL_HAVE_LOOPS | ||
222 | loopnez a7, .Loop2done | ||
223 | #else /* !XCHAL_HAVE_LOOPS */ | ||
224 | beqz a7, .Loop2done | ||
225 | slli a10, a7, 4 | ||
226 | add a10, a10, a3 # a10 = end of last 16B source chunk | ||
227 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
228 | .Loop2: | ||
229 | EX(l32i, a7, a3, 4, l_fixup) | ||
230 | EX(l32i, a8, a3, 8, l_fixup) | ||
231 | ALIGN( a6, a6, a7) | ||
232 | EX(s32i, a6, a5, 0, s_fixup) | ||
233 | EX(l32i, a9, a3, 12, l_fixup) | ||
234 | ALIGN( a7, a7, a8) | ||
235 | EX(s32i, a7, a5, 4, s_fixup) | ||
236 | EX(l32i, a6, a3, 16, l_fixup) | ||
237 | ALIGN( a8, a8, a9) | ||
238 | EX(s32i, a8, a5, 8, s_fixup) | ||
239 | addi a3, a3, 16 | ||
240 | ALIGN( a9, a9, a6) | ||
241 | EX(s32i, a9, a5, 12, s_fixup) | ||
242 | addi a5, a5, 16 | ||
243 | #if !XCHAL_HAVE_LOOPS | ||
244 | blt a3, a10, .Loop2 | ||
245 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
246 | .Loop2done: | ||
247 | bbci.l a4, 3, .L12 | ||
248 | # copy 8 bytes | ||
249 | EX(l32i, a7, a3, 4, l_fixup) | ||
250 | EX(l32i, a8, a3, 8, l_fixup) | ||
251 | ALIGN( a6, a6, a7) | ||
252 | EX(s32i, a6, a5, 0, s_fixup) | ||
253 | addi a3, a3, 8 | ||
254 | ALIGN( a7, a7, a8) | ||
255 | EX(s32i, a7, a5, 4, s_fixup) | ||
256 | addi a5, a5, 8 | ||
257 | mov a6, a8 | ||
258 | .L12: | ||
259 | bbci.l a4, 2, .L13 | ||
260 | # copy 4 bytes | ||
261 | EX(l32i, a7, a3, 4, l_fixup) | ||
262 | addi a3, a3, 4 | ||
263 | ALIGN( a6, a6, a7) | ||
264 | EX(s32i, a6, a5, 0, s_fixup) | ||
265 | addi a5, a5, 4 | ||
266 | mov a6, a7 | ||
267 | .L13: | ||
268 | add a3, a3, a10 # readjust a3 with correct misalignment | ||
269 | bbci.l a4, 1, .L14 | ||
270 | # copy 2 bytes | ||
271 | EX(l8ui, a6, a3, 0, l_fixup) | ||
272 | EX(l8ui, a7, a3, 1, l_fixup) | ||
273 | addi a3, a3, 2 | ||
274 | EX(s8i, a6, a5, 0, s_fixup) | ||
275 | EX(s8i, a7, a5, 1, s_fixup) | ||
276 | addi a5, a5, 2 | ||
277 | .L14: | ||
278 | bbci.l a4, 0, .L15 | ||
279 | # copy 1 byte | ||
280 | EX(l8ui, a6, a3, 0, l_fixup) | ||
281 | EX(s8i, a6, a5, 0, s_fixup) | ||
282 | .L15: | ||
283 | movi a2, 0 # return success for len bytes copied | ||
284 | retw | ||
285 | |||
286 | |||
287 | .section .fixup, "ax" | ||
288 | .align 4 | ||
289 | |||
290 | /* a2 = original dst; a5 = current dst; a11= original len | ||
291 | * bytes_copied = a5 - a2 | ||
292 | * retval = bytes_not_copied = original len - bytes_copied | ||
293 | * retval = a11 - (a5 - a2) | ||
294 | * | ||
295 | * Clearing the remaining pieces of kernel memory plugs security | ||
296 | * holes. This functionality is the equivalent of the *_zeroing | ||
297 | * functions that some architectures provide. | ||
298 | */ | ||
299 | |||
300 | .Lmemset: | ||
301 | .word memset | ||
302 | |||
303 | s_fixup: | ||
304 | sub a2, a5, a2 /* a2 <-- bytes copied */ | ||
305 | sub a2, a11, a2 /* a2 <-- bytes not copied */ | ||
306 | retw | ||
307 | |||
308 | l_fixup: | ||
309 | sub a2, a5, a2 /* a2 <-- bytes copied */ | ||
310 | sub a2, a11, a2 /* a2 <-- bytes not copied == return value */ | ||
311 | |||
312 | /* void *memset(void *s, int c, size_t n); */ | ||
313 | mov a6, a5 /* s */ | ||
314 | movi a7, 0 /* c */ | ||
315 | mov a8, a2 /* n */ | ||
316 | l32r a4, .Lmemset | ||
317 | callx4 a4 | ||
318 | /* Ignore memset return value in a6. */ | ||
319 | /* a2 still contains bytes not copied. */ | ||
320 | retw | ||
321 | |||