diff options
| author | Chris Zankel <czankel@tensilica.com> | 2005-06-24 01:01:20 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-24 03:05:21 -0400 |
| commit | 249ac17e96811acc3c6402317dd5d5c89d2cbf68 (patch) | |
| tree | 0a174065460de196861b85f1d9a48c88b2a2675a /arch/xtensa/lib | |
| parent | 5a0015d62668e64c8b6e02e360fbbea121bfd5e6 (diff) | |
[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 4
The attached patches provides part 4 of an architecture implementation for the
Tensilica Xtensa CPU series.
Signed-off-by: Chris Zankel <chris@zankel.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/xtensa/lib')
| -rw-r--r-- | arch/xtensa/lib/Makefile | 7 | ||||
| -rw-r--r-- | arch/xtensa/lib/checksum.S | 410 | ||||
| -rw-r--r-- | arch/xtensa/lib/memcopy.S | 315 | ||||
| -rw-r--r-- | arch/xtensa/lib/memset.S | 160 | ||||
| -rw-r--r-- | arch/xtensa/lib/pci-auto.c | 352 | ||||
| -rw-r--r-- | arch/xtensa/lib/strcasecmp.c | 32 | ||||
| -rw-r--r-- | arch/xtensa/lib/strncpy_user.S | 224 | ||||
| -rw-r--r-- | arch/xtensa/lib/strnlen_user.S | 147 | ||||
| -rw-r--r-- | arch/xtensa/lib/usercopy.S | 321 |
9 files changed, 1968 insertions, 0 deletions
diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile new file mode 100644 index 000000000000..ed935b58e8a4 --- /dev/null +++ b/arch/xtensa/lib/Makefile | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | # | ||
| 2 | # Makefile for Xtensa-specific library files. | ||
| 3 | # | ||
| 4 | |||
| 5 | lib-y += memcopy.o memset.o checksum.o strcasecmp.o \ | ||
| 6 | usercopy.o strncpy_user.o strnlen_user.o | ||
| 7 | lib-$(CONFIG_PCI) += pci-auto.o | ||
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S new file mode 100644 index 000000000000..e2d64dfd530c --- /dev/null +++ b/arch/xtensa/lib/checksum.S | |||
| @@ -0,0 +1,410 @@ | |||
| 1 | /* | ||
| 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
| 3 | * operating system. INET is implemented using the BSD Socket | ||
| 4 | * interface as the means of communication with the user level. | ||
| 5 | * | ||
| 6 | * IP/TCP/UDP checksumming routines | ||
| 7 | * | ||
| 8 | * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea | ||
| 9 | * Optimized by Joe Taylor | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or | ||
| 12 | * modify it under the terms of the GNU General Public License | ||
| 13 | * as published by the Free Software Foundation; either version | ||
| 14 | * 2 of the License, or (at your option) any later version. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <asm/errno.h> | ||
| 18 | #include <linux/linkage.h> | ||
| 19 | #define _ASMLANGUAGE | ||
| 20 | #include <xtensa/config/core.h> | ||
| 21 | |||
| 22 | /* | ||
| 23 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
| 24 | */ | ||
| 25 | |||
| 26 | /* | ||
| 27 | * unsigned int csum_partial(const unsigned char *buf, int len, | ||
| 28 | * unsigned int sum); | ||
| 29 | * a2 = buf | ||
| 30 | * a3 = len | ||
| 31 | * a4 = sum | ||
| 32 | * | ||
| 33 | * This function assumes 2- or 4-byte alignment. Other alignments will fail! | ||
| 34 | */ | ||
| 35 | |||
| 36 | /* ONES_ADD converts twos-complement math to ones-complement. */ | ||
| 37 | #define ONES_ADD(sum, val) \ | ||
| 38 | add sum, sum, val ; \ | ||
| 39 | bgeu sum, val, 99f ; \ | ||
| 40 | addi sum, sum, 1 ; \ | ||
| 41 | 99: ; | ||
| 42 | |||
| 43 | .text | ||
| 44 | ENTRY(csum_partial) | ||
| 45 | /* | ||
| 46 | * Experiments with Ethernet and SLIP connections show that buf | ||
| 47 | * is aligned on either a 2-byte or 4-byte boundary. | ||
| 48 | */ | ||
| 49 | entry sp, 32 | ||
| 50 | extui a5, a2, 0, 2 | ||
| 51 | bnez a5, 8f /* branch if 2-byte aligned */ | ||
| 52 | /* Fall-through on common case, 4-byte alignment */ | ||
| 53 | 1: | ||
| 54 | srli a5, a3, 5 /* 32-byte chunks */ | ||
| 55 | #if XCHAL_HAVE_LOOPS | ||
| 56 | loopgtz a5, 2f | ||
| 57 | #else | ||
| 58 | beqz a5, 2f | ||
| 59 | slli a5, a5, 5 | ||
| 60 | add a5, a5, a2 /* a5 = end of last 32-byte chunk */ | ||
| 61 | .Loop1: | ||
| 62 | #endif | ||
| 63 | l32i a6, a2, 0 | ||
| 64 | l32i a7, a2, 4 | ||
| 65 | ONES_ADD(a4, a6) | ||
| 66 | ONES_ADD(a4, a7) | ||
| 67 | l32i a6, a2, 8 | ||
| 68 | l32i a7, a2, 12 | ||
| 69 | ONES_ADD(a4, a6) | ||
| 70 | ONES_ADD(a4, a7) | ||
| 71 | l32i a6, a2, 16 | ||
| 72 | l32i a7, a2, 20 | ||
| 73 | ONES_ADD(a4, a6) | ||
| 74 | ONES_ADD(a4, a7) | ||
| 75 | l32i a6, a2, 24 | ||
| 76 | l32i a7, a2, 28 | ||
| 77 | ONES_ADD(a4, a6) | ||
| 78 | ONES_ADD(a4, a7) | ||
| 79 | addi a2, a2, 4*8 | ||
| 80 | #if !XCHAL_HAVE_LOOPS | ||
| 81 | blt a2, a5, .Loop1 | ||
| 82 | #endif | ||
| 83 | 2: | ||
| 84 | extui a5, a3, 2, 3 /* remaining 4-byte chunks */ | ||
| 85 | #if XCHAL_HAVE_LOOPS | ||
| 86 | loopgtz a5, 3f | ||
| 87 | #else | ||
| 88 | beqz a5, 3f | ||
| 89 | slli a5, a5, 2 | ||
| 90 | add a5, a5, a2 /* a5 = end of last 4-byte chunk */ | ||
| 91 | .Loop2: | ||
| 92 | #endif | ||
| 93 | l32i a6, a2, 0 | ||
| 94 | ONES_ADD(a4, a6) | ||
| 95 | addi a2, a2, 4 | ||
| 96 | #if !XCHAL_HAVE_LOOPS | ||
| 97 | blt a2, a5, .Loop2 | ||
| 98 | #endif | ||
| 99 | 3: | ||
| 100 | _bbci.l a3, 1, 5f /* remaining 2-byte chunk */ | ||
| 101 | l16ui a6, a2, 0 | ||
| 102 | ONES_ADD(a4, a6) | ||
| 103 | addi a2, a2, 2 | ||
| 104 | 5: | ||
| 105 | _bbci.l a3, 0, 7f /* remaining 1-byte chunk */ | ||
| 106 | 6: l8ui a6, a2, 0 | ||
| 107 | #ifdef __XTENSA_EB__ | ||
| 108 | slli a6, a6, 8 /* load byte into bits 8..15 */ | ||
| 109 | #endif | ||
| 110 | ONES_ADD(a4, a6) | ||
| 111 | 7: | ||
| 112 | mov a2, a4 | ||
| 113 | retw | ||
| 114 | |||
| 115 | /* uncommon case, buf is 2-byte aligned */ | ||
| 116 | 8: | ||
| 117 | beqz a3, 7b /* branch if len == 0 */ | ||
| 118 | beqi a3, 1, 6b /* branch if len == 1 */ | ||
| 119 | |||
| 120 | extui a5, a2, 0, 1 | ||
| 121 | bnez a5, 8f /* branch if 1-byte aligned */ | ||
| 122 | |||
| 123 | l16ui a6, a2, 0 /* common case, len >= 2 */ | ||
| 124 | ONES_ADD(a4, a6) | ||
| 125 | addi a2, a2, 2 /* adjust buf */ | ||
| 126 | addi a3, a3, -2 /* adjust len */ | ||
| 127 | j 1b /* now buf is 4-byte aligned */ | ||
| 128 | |||
| 129 | /* case: odd-byte aligned, len > 1 | ||
| 130 | * This case is dog slow, so don't give us an odd address. | ||
| 131 | * (I don't think this ever happens, but just in case.) | ||
| 132 | */ | ||
| 133 | 8: | ||
| 134 | srli a5, a3, 2 /* 4-byte chunks */ | ||
| 135 | #if XCHAL_HAVE_LOOPS | ||
| 136 | loopgtz a5, 2f | ||
| 137 | #else | ||
| 138 | beqz a5, 2f | ||
| 139 | slli a5, a5, 2 | ||
| 140 | add a5, a5, a2 /* a5 = end of last 4-byte chunk */ | ||
| 141 | .Loop3: | ||
| 142 | #endif | ||
| 143 | l8ui a6, a2, 0 /* bits 24..31 */ | ||
| 144 | l16ui a7, a2, 1 /* bits 8..23 */ | ||
| 145 | l8ui a8, a2, 3 /* bits 0.. 8 */ | ||
| 146 | #ifdef __XTENSA_EB__ | ||
| 147 | slli a6, a6, 24 | ||
| 148 | #else | ||
| 149 | slli a8, a8, 24 | ||
| 150 | #endif | ||
| 151 | slli a7, a7, 8 | ||
| 152 | or a7, a7, a6 | ||
| 153 | or a7, a7, a8 | ||
| 154 | ONES_ADD(a4, a7) | ||
| 155 | addi a2, a2, 4 | ||
| 156 | #if !XCHAL_HAVE_LOOPS | ||
| 157 | blt a2, a5, .Loop3 | ||
| 158 | #endif | ||
| 159 | 2: | ||
| 160 | _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */ | ||
| 161 | l8ui a6, a2, 0 | ||
| 162 | l8ui a7, a2, 1 | ||
| 163 | #ifdef __XTENSA_EB__ | ||
| 164 | slli a6, a6, 8 | ||
| 165 | #else | ||
| 166 | slli a7, a7, 8 | ||
| 167 | #endif | ||
| 168 | or a7, a7, a6 | ||
| 169 | ONES_ADD(a4, a7) | ||
| 170 | addi a2, a2, 2 | ||
| 171 | 3: | ||
| 172 | j 5b /* branch to handle the remaining byte */ | ||
| 173 | |||
| 174 | |||
| 175 | |||
| 176 | /* | ||
| 177 | * Copy from ds while checksumming, otherwise like csum_partial | ||
| 178 | * | ||
| 179 | * The macros SRC and DST specify the type of access for the instruction. | ||
| 180 | * thus we can call a custom exception handler for each access type. | ||
| 181 | */ | ||
| 182 | |||
| 183 | #define SRC(y...) \ | ||
| 184 | 9999: y; \ | ||
| 185 | .section __ex_table, "a"; \ | ||
| 186 | .long 9999b, 6001f ; \ | ||
| 187 | .previous | ||
| 188 | |||
| 189 | #define DST(y...) \ | ||
| 190 | 9999: y; \ | ||
| 191 | .section __ex_table, "a"; \ | ||
| 192 | .long 9999b, 6002f ; \ | ||
| 193 | .previous | ||
| 194 | |||
| 195 | /* | ||
| 196 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, | ||
| 197 | int sum, int *src_err_ptr, int *dst_err_ptr) | ||
| 198 | a2 = src | ||
| 199 | a3 = dst | ||
| 200 | a4 = len | ||
| 201 | a5 = sum | ||
| 202 | a6 = src_err_ptr | ||
| 203 | a7 = dst_err_ptr | ||
| 204 | a8 = temp | ||
| 205 | a9 = temp | ||
| 206 | a10 = temp | ||
| 207 | a11 = original len for exception handling | ||
| 208 | a12 = original dst for exception handling | ||
| 209 | |||
| 210 | This function is optimized for 4-byte aligned addresses. Other | ||
| 211 | alignments work, but not nearly as efficiently. | ||
| 212 | */ | ||
| 213 | |||
| 214 | ENTRY(csum_partial_copy_generic) | ||
| 215 | entry sp, 32 | ||
| 216 | mov a12, a3 | ||
| 217 | mov a11, a4 | ||
| 218 | or a10, a2, a3 | ||
| 219 | |||
| 220 | /* We optimize the following alignment tests for the 4-byte | ||
| 221 | aligned case. Two bbsi.l instructions might seem more optimal | ||
| 222 | (commented out below). However, both labels 5: and 3: are out | ||
| 223 | of the imm8 range, so the assembler relaxes them into | ||
| 224 | equivalent bbci.l, j combinations, which is actually | ||
| 225 | slower. */ | ||
| 226 | |||
| 227 | extui a9, a10, 0, 2 | ||
| 228 | beqz a9, 1f /* branch if both are 4-byte aligned */ | ||
| 229 | bbsi.l a10, 0, 5f /* branch if one address is odd */ | ||
| 230 | j 3f /* one address is 2-byte aligned */ | ||
| 231 | |||
| 232 | /* _bbsi.l a10, 0, 5f */ /* branch if odd address */ | ||
| 233 | /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */ | ||
| 234 | |||
| 235 | 1: | ||
| 236 | /* src and dst are both 4-byte aligned */ | ||
| 237 | srli a10, a4, 5 /* 32-byte chunks */ | ||
| 238 | #if XCHAL_HAVE_LOOPS | ||
| 239 | loopgtz a10, 2f | ||
| 240 | #else | ||
| 241 | beqz a10, 2f | ||
| 242 | slli a10, a10, 5 | ||
| 243 | add a10, a10, a2 /* a10 = end of last 32-byte src chunk */ | ||
| 244 | .Loop5: | ||
| 245 | #endif | ||
| 246 | SRC( l32i a9, a2, 0 ) | ||
| 247 | SRC( l32i a8, a2, 4 ) | ||
| 248 | DST( s32i a9, a3, 0 ) | ||
| 249 | DST( s32i a8, a3, 4 ) | ||
| 250 | ONES_ADD(a5, a9) | ||
| 251 | ONES_ADD(a5, a8) | ||
| 252 | SRC( l32i a9, a2, 8 ) | ||
| 253 | SRC( l32i a8, a2, 12 ) | ||
| 254 | DST( s32i a9, a3, 8 ) | ||
| 255 | DST( s32i a8, a3, 12 ) | ||
| 256 | ONES_ADD(a5, a9) | ||
| 257 | ONES_ADD(a5, a8) | ||
| 258 | SRC( l32i a9, a2, 16 ) | ||
| 259 | SRC( l32i a8, a2, 20 ) | ||
| 260 | DST( s32i a9, a3, 16 ) | ||
| 261 | DST( s32i a8, a3, 20 ) | ||
| 262 | ONES_ADD(a5, a9) | ||
| 263 | ONES_ADD(a5, a8) | ||
| 264 | SRC( l32i a9, a2, 24 ) | ||
| 265 | SRC( l32i a8, a2, 28 ) | ||
| 266 | DST( s32i a9, a3, 24 ) | ||
| 267 | DST( s32i a8, a3, 28 ) | ||
| 268 | ONES_ADD(a5, a9) | ||
| 269 | ONES_ADD(a5, a8) | ||
| 270 | addi a2, a2, 32 | ||
| 271 | addi a3, a3, 32 | ||
| 272 | #if !XCHAL_HAVE_LOOPS | ||
| 273 | blt a2, a10, .Loop5 | ||
| 274 | #endif | ||
| 275 | 2: | ||
| 276 | extui a10, a4, 2, 3 /* remaining 4-byte chunks */ | ||
| 277 | extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */ | ||
| 278 | #if XCHAL_HAVE_LOOPS | ||
| 279 | loopgtz a10, 3f | ||
| 280 | #else | ||
| 281 | beqz a10, 3f | ||
| 282 | slli a10, a10, 2 | ||
| 283 | add a10, a10, a2 /* a10 = end of last 4-byte src chunk */ | ||
| 284 | .Loop6: | ||
| 285 | #endif | ||
| 286 | SRC( l32i a9, a2, 0 ) | ||
| 287 | DST( s32i a9, a3, 0 ) | ||
| 288 | ONES_ADD(a5, a9) | ||
| 289 | addi a2, a2, 4 | ||
| 290 | addi a3, a3, 4 | ||
| 291 | #if !XCHAL_HAVE_LOOPS | ||
| 292 | blt a2, a10, .Loop6 | ||
| 293 | #endif | ||
| 294 | 3: | ||
| 295 | /* | ||
| 296 | Control comes to here in two cases: (1) It may fall through | ||
| 297 | to here from the 4-byte alignment case to process, at most, | ||
| 298 | one 2-byte chunk. (2) It branches to here from above if | ||
| 299 | either src or dst is 2-byte aligned, and we process all bytes | ||
| 300 | here, except for perhaps a trailing odd byte. It's | ||
| 301 | inefficient, so align your addresses to 4-byte boundaries. | ||
| 302 | |||
| 303 | a2 = src | ||
| 304 | a3 = dst | ||
| 305 | a4 = len | ||
| 306 | a5 = sum | ||
| 307 | */ | ||
| 308 | srli a10, a4, 1 /* 2-byte chunks */ | ||
| 309 | #if XCHAL_HAVE_LOOPS | ||
| 310 | loopgtz a10, 4f | ||
| 311 | #else | ||
| 312 | beqz a10, 4f | ||
| 313 | slli a10, a10, 1 | ||
| 314 | add a10, a10, a2 /* a10 = end of last 2-byte src chunk */ | ||
| 315 | .Loop7: | ||
| 316 | #endif | ||
| 317 | SRC( l16ui a9, a2, 0 ) | ||
| 318 | DST( s16i a9, a3, 0 ) | ||
| 319 | ONES_ADD(a5, a9) | ||
| 320 | addi a2, a2, 2 | ||
| 321 | addi a3, a3, 2 | ||
| 322 | #if !XCHAL_HAVE_LOOPS | ||
| 323 | blt a2, a10, .Loop7 | ||
| 324 | #endif | ||
| 325 | 4: | ||
| 326 | /* This section processes a possible trailing odd byte. */ | ||
| 327 | _bbci.l a4, 0, 8f /* 1-byte chunk */ | ||
| 328 | SRC( l8ui a9, a2, 0 ) | ||
| 329 | DST( s8i a9, a3, 0 ) | ||
| 330 | #ifdef __XTENSA_EB__ | ||
| 331 | slli a9, a9, 8 /* shift byte to bits 8..15 */ | ||
| 332 | #endif | ||
| 333 | ONES_ADD(a5, a9) | ||
| 334 | 8: | ||
| 335 | mov a2, a5 | ||
| 336 | retw | ||
| 337 | |||
| 338 | 5: | ||
| 339 | /* Control branch to here when either src or dst is odd. We | ||
| 340 | process all bytes using 8-bit accesses. Grossly inefficient, | ||
| 341 | so don't feed us an odd address. */ | ||
| 342 | |||
| 343 | srli a10, a4, 1 /* handle in pairs for 16-bit csum */ | ||
| 344 | #if XCHAL_HAVE_LOOPS | ||
| 345 | loopgtz a10, 6f | ||
| 346 | #else | ||
| 347 | beqz a10, 6f | ||
| 348 | slli a10, a10, 1 | ||
| 349 | add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */ | ||
| 350 | .Loop8: | ||
| 351 | #endif | ||
| 352 | SRC( l8ui a9, a2, 0 ) | ||
| 353 | SRC( l8ui a8, a2, 1 ) | ||
| 354 | DST( s8i a9, a3, 0 ) | ||
| 355 | DST( s8i a8, a3, 1 ) | ||
| 356 | #ifdef __XTENSA_EB__ | ||
| 357 | slli a9, a9, 8 /* combine into a single 16-bit value */ | ||
| 358 | #else /* for checksum computation */ | ||
| 359 | slli a8, a8, 8 | ||
| 360 | #endif | ||
| 361 | or a9, a9, a8 | ||
| 362 | ONES_ADD(a5, a9) | ||
| 363 | addi a2, a2, 2 | ||
| 364 | addi a3, a3, 2 | ||
| 365 | #if !XCHAL_HAVE_LOOPS | ||
| 366 | blt a2, a10, .Loop8 | ||
| 367 | #endif | ||
| 368 | 6: | ||
| 369 | j 4b /* process the possible trailing odd byte */ | ||
| 370 | |||
| 371 | |||
| 372 | # Exception handler: | ||
| 373 | .section .fixup, "ax" | ||
| 374 | /* | ||
| 375 | a6 = src_err_ptr | ||
| 376 | a7 = dst_err_ptr | ||
| 377 | a11 = original len for exception handling | ||
| 378 | a12 = original dst for exception handling | ||
| 379 | */ | ||
| 380 | |||
| 381 | 6001: | ||
| 382 | _movi a2, -EFAULT | ||
| 383 | s32i a2, a6, 0 /* src_err_ptr */ | ||
| 384 | |||
| 385 | # clear the complete destination - computing the rest | ||
| 386 | # is too much work | ||
| 387 | movi a2, 0 | ||
| 388 | #if XCHAL_HAVE_LOOPS | ||
| 389 | loopgtz a11, 2f | ||
| 390 | #else | ||
| 391 | beqz a11, 2f | ||
| 392 | add a11, a11, a12 /* a11 = ending address */ | ||
| 393 | .Leloop: | ||
| 394 | #endif | ||
| 395 | s8i a2, a12, 0 | ||
| 396 | addi a12, a12, 1 | ||
| 397 | #if !XCHAL_HAVE_LOOPS | ||
| 398 | blt a12, a11, .Leloop | ||
| 399 | #endif | ||
| 400 | 2: | ||
| 401 | retw | ||
| 402 | |||
| 403 | 6002: | ||
| 404 | movi a2, -EFAULT | ||
| 405 | s32i a2, a7, 0 /* dst_err_ptr */ | ||
| 406 | movi a2, 0 | ||
| 407 | retw | ||
| 408 | |||
| 409 | .previous | ||
| 410 | |||
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S new file mode 100644 index 000000000000..e8f6d7eb7222 --- /dev/null +++ b/arch/xtensa/lib/memcopy.S | |||
| @@ -0,0 +1,315 @@ | |||
| 1 | /* | ||
| 2 | * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions | ||
| 3 | * xthal_memcpy and xthal_bcopy | ||
| 4 | * | ||
| 5 | * This file is subject to the terms and conditions of the GNU General Public | ||
| 6 | * License. See the file "COPYING" in the main directory of this archive | ||
| 7 | * for more details. | ||
| 8 | * | ||
| 9 | * Copyright (C) 2002 - 2005 Tensilica Inc. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <xtensa/coreasm.h> | ||
| 13 | |||
| 14 | .macro src_b r, w0, w1 | ||
| 15 | #ifdef __XTENSA_EB__ | ||
| 16 | src \r, \w0, \w1 | ||
| 17 | #else | ||
| 18 | src \r, \w1, \w0 | ||
| 19 | #endif | ||
| 20 | .endm | ||
| 21 | |||
| 22 | .macro ssa8 r | ||
| 23 | #ifdef __XTENSA_EB__ | ||
| 24 | ssa8b \r | ||
| 25 | #else | ||
| 26 | ssa8l \r | ||
| 27 | #endif | ||
| 28 | .endm | ||
| 29 | |||
| 30 | |||
| 31 | /* | ||
| 32 | * void *memcpy(void *dst, const void *src, size_t len); | ||
| 33 | * void *memmove(void *dst, const void *src, size_t len); | ||
| 34 | * void *bcopy(const void *src, void *dst, size_t len); | ||
| 35 | * | ||
| 36 | * This function is intended to do the same thing as the standard | ||
| 37 | * library function memcpy() (or bcopy()) for most cases. | ||
| 38 | * However, where the source and/or destination references | ||
| 39 | * an instruction RAM or ROM or a data RAM or ROM, that | ||
| 40 | * source and/or destination will always be accessed with | ||
| 41 | * 32-bit load and store instructions (as required for these | ||
| 42 | * types of devices). | ||
| 43 | * | ||
| 44 | * !!!!!!! XTFIXME: | ||
| 45 | * !!!!!!! Handling of IRAM/IROM has not yet | ||
| 46 | * !!!!!!! been implemented. | ||
| 47 | * | ||
| 48 | * The bcopy version is provided here to avoid the overhead | ||
| 49 | * of an extra call, for callers that require this convention. | ||
| 50 | * | ||
| 51 | * The (general case) algorithm is as follows: | ||
| 52 | * If destination is unaligned, align it by conditionally | ||
| 53 | * copying 1 and 2 bytes. | ||
| 54 | * If source is aligned, | ||
| 55 | * do 16 bytes with a loop, and then finish up with | ||
| 56 | * 8, 4, 2, and 1 byte copies conditional on the length; | ||
| 57 | * else (if source is unaligned), | ||
| 58 | * do the same, but use SRC to align the source data. | ||
| 59 | * This code tries to use fall-through branches for the common | ||
| 60 | * case of aligned source and destination and multiple | ||
| 61 | * of 4 (or 8) length. | ||
| 62 | * | ||
| 63 | * Register use: | ||
| 64 | * a0/ return address | ||
| 65 | * a1/ stack pointer | ||
| 66 | * a2/ return value | ||
| 67 | * a3/ src | ||
| 68 | * a4/ length | ||
| 69 | * a5/ dst | ||
| 70 | * a6/ tmp | ||
| 71 | * a7/ tmp | ||
| 72 | * a8/ tmp | ||
| 73 | * a9/ tmp | ||
| 74 | * a10/ tmp | ||
| 75 | * a11/ tmp | ||
| 76 | */ | ||
| 77 | |||
| 78 | .text | ||
| 79 | .align 4 | ||
| 80 | .global bcopy | ||
| 81 | .type bcopy,@function | ||
| 82 | bcopy: | ||
| 83 | entry sp, 16 # minimal stack frame | ||
| 84 | # a2=src, a3=dst, a4=len | ||
| 85 | mov a5, a3 # copy dst so that a2 is return value | ||
| 86 | mov a3, a2 | ||
| 87 | mov a2, a5 | ||
| 88 | j .Lcommon # go to common code for memcpy+bcopy | ||
| 89 | |||
| 90 | |||
| 91 | /* | ||
| 92 | * Byte by byte copy | ||
| 93 | */ | ||
| 94 | .align 4 | ||
| 95 | .byte 0 # 1 mod 4 alignment for LOOPNEZ | ||
| 96 | # (0 mod 4 alignment for LBEG) | ||
| 97 | .Lbytecopy: | ||
| 98 | #if XCHAL_HAVE_LOOPS | ||
| 99 | loopnez a4, .Lbytecopydone | ||
| 100 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 101 | beqz a4, .Lbytecopydone | ||
| 102 | add a7, a3, a4 # a7 = end address for source | ||
| 103 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 104 | .Lnextbyte: | ||
| 105 | l8ui a6, a3, 0 | ||
| 106 | addi a3, a3, 1 | ||
| 107 | s8i a6, a5, 0 | ||
| 108 | addi a5, a5, 1 | ||
| 109 | #if !XCHAL_HAVE_LOOPS | ||
| 110 | blt a3, a7, .Lnextbyte | ||
| 111 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 112 | .Lbytecopydone: | ||
| 113 | retw | ||
| 114 | |||
| 115 | /* | ||
| 116 | * Destination is unaligned | ||
| 117 | */ | ||
| 118 | |||
| 119 | .align 4 | ||
| 120 | .Ldst1mod2: # dst is only byte aligned | ||
| 121 | _bltui a4, 7, .Lbytecopy # do short copies byte by byte | ||
| 122 | |||
| 123 | # copy 1 byte | ||
| 124 | l8ui a6, a3, 0 | ||
| 125 | addi a3, a3, 1 | ||
| 126 | addi a4, a4, -1 | ||
| 127 | s8i a6, a5, 0 | ||
| 128 | addi a5, a5, 1 | ||
| 129 | _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then | ||
| 130 | # return to main algorithm | ||
| 131 | .Ldst2mod4: # dst 16-bit aligned | ||
| 132 | # copy 2 bytes | ||
| 133 | _bltui a4, 6, .Lbytecopy # do short copies byte by byte | ||
| 134 | l8ui a6, a3, 0 | ||
| 135 | l8ui a7, a3, 1 | ||
| 136 | addi a3, a3, 2 | ||
| 137 | addi a4, a4, -2 | ||
| 138 | s8i a6, a5, 0 | ||
| 139 | s8i a7, a5, 1 | ||
| 140 | addi a5, a5, 2 | ||
| 141 | j .Ldstaligned # dst is now aligned, return to main algorithm | ||
| 142 | |||
| 143 | .align 4 | ||
| 144 | .global memcpy | ||
| 145 | .type memcpy,@function | ||
| 146 | memcpy: | ||
| 147 | .global memmove | ||
| 148 | .type memmove,@function | ||
| 149 | memmove: | ||
| 150 | |||
| 151 | entry sp, 16 # minimal stack frame | ||
| 152 | # a2/ dst, a3/ src, a4/ len | ||
| 153 | mov a5, a2 # copy dst so that a2 is return value | ||
| 154 | .Lcommon: | ||
| 155 | _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 | ||
| 156 | _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 | ||
| 157 | .Ldstaligned: # return here from .Ldst?mod? once dst is aligned | ||
| 158 | srli a7, a4, 4 # number of loop iterations with 16B | ||
| 159 | # per iteration | ||
| 160 | movi a8, 3 # if source is not aligned, | ||
| 161 | _bany a3, a8, .Lsrcunaligned # then use shifting copy | ||
| 162 | /* | ||
| 163 | * Destination and source are word-aligned, use word copy. | ||
| 164 | */ | ||
| 165 | # copy 16 bytes per iteration for word-aligned dst and word-aligned src | ||
| 166 | #if XCHAL_HAVE_LOOPS | ||
| 167 | loopnez a7, .Loop1done | ||
| 168 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 169 | beqz a7, .Loop1done | ||
| 170 | slli a8, a7, 4 | ||
| 171 | add a8, a8, a3 # a8 = end of last 16B source chunk | ||
| 172 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 173 | .Loop1: | ||
| 174 | l32i a6, a3, 0 | ||
| 175 | l32i a7, a3, 4 | ||
| 176 | s32i a6, a5, 0 | ||
| 177 | l32i a6, a3, 8 | ||
| 178 | s32i a7, a5, 4 | ||
| 179 | l32i a7, a3, 12 | ||
| 180 | s32i a6, a5, 8 | ||
| 181 | addi a3, a3, 16 | ||
| 182 | s32i a7, a5, 12 | ||
| 183 | addi a5, a5, 16 | ||
| 184 | #if !XCHAL_HAVE_LOOPS | ||
| 185 | blt a3, a8, .Loop1 | ||
| 186 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 187 | .Loop1done: | ||
| 188 | bbci.l a4, 3, .L2 | ||
| 189 | # copy 8 bytes | ||
| 190 | l32i a6, a3, 0 | ||
| 191 | l32i a7, a3, 4 | ||
| 192 | addi a3, a3, 8 | ||
| 193 | s32i a6, a5, 0 | ||
| 194 | s32i a7, a5, 4 | ||
| 195 | addi a5, a5, 8 | ||
| 196 | .L2: | ||
| 197 | bbsi.l a4, 2, .L3 | ||
| 198 | bbsi.l a4, 1, .L4 | ||
| 199 | bbsi.l a4, 0, .L5 | ||
| 200 | retw | ||
| 201 | .L3: | ||
| 202 | # copy 4 bytes | ||
| 203 | l32i a6, a3, 0 | ||
| 204 | addi a3, a3, 4 | ||
| 205 | s32i a6, a5, 0 | ||
| 206 | addi a5, a5, 4 | ||
| 207 | bbsi.l a4, 1, .L4 | ||
| 208 | bbsi.l a4, 0, .L5 | ||
| 209 | retw | ||
| 210 | .L4: | ||
| 211 | # copy 2 bytes | ||
| 212 | l16ui a6, a3, 0 | ||
| 213 | addi a3, a3, 2 | ||
| 214 | s16i a6, a5, 0 | ||
| 215 | addi a5, a5, 2 | ||
| 216 | bbsi.l a4, 0, .L5 | ||
| 217 | retw | ||
| 218 | .L5: | ||
| 219 | # copy 1 byte | ||
| 220 | l8ui a6, a3, 0 | ||
| 221 | s8i a6, a5, 0 | ||
| 222 | retw | ||
| 223 | |||
| 224 | /* | ||
| 225 | * Destination is aligned, Source is unaligned | ||
| 226 | */ | ||
| 227 | |||
| 228 | .align 4 | ||
| 229 | .Lsrcunaligned: | ||
| 230 | _beqz a4, .Ldone # avoid loading anything for zero-length copies | ||
| 231 | # copy 16 bytes per iteration for word-aligned dst and unaligned src | ||
| 232 | ssa8 a3 # set shift amount from byte offset | ||
| 233 | #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS (simulator) with the | ||
| 234 | lint or ferret client, or 0 to save a few cycles */ | ||
| 235 | #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT | ||
| 236 | and a11, a3, a8 # save unalignment offset for below | ||
| 237 | sub a3, a3, a11 # align a3 | ||
| 238 | #endif | ||
| 239 | l32i a6, a3, 0 # load first word | ||
| 240 | #if XCHAL_HAVE_LOOPS | ||
| 241 | loopnez a7, .Loop2done | ||
| 242 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 243 | beqz a7, .Loop2done | ||
| 244 | slli a10, a7, 4 | ||
| 245 | add a10, a10, a3 # a10 = end of last 16B source chunk | ||
| 246 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 247 | .Loop2: | ||
| 248 | l32i a7, a3, 4 | ||
| 249 | l32i a8, a3, 8 | ||
| 250 | src_b a6, a6, a7 | ||
| 251 | s32i a6, a5, 0 | ||
| 252 | l32i a9, a3, 12 | ||
| 253 | src_b a7, a7, a8 | ||
| 254 | s32i a7, a5, 4 | ||
| 255 | l32i a6, a3, 16 | ||
| 256 | src_b a8, a8, a9 | ||
| 257 | s32i a8, a5, 8 | ||
| 258 | addi a3, a3, 16 | ||
| 259 | src_b a9, a9, a6 | ||
| 260 | s32i a9, a5, 12 | ||
| 261 | addi a5, a5, 16 | ||
| 262 | #if !XCHAL_HAVE_LOOPS | ||
| 263 | blt a3, a10, .Loop2 | ||
| 264 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 265 | .Loop2done: | ||
| 266 | bbci.l a4, 3, .L12 | ||
| 267 | # copy 8 bytes | ||
| 268 | l32i a7, a3, 4 | ||
| 269 | l32i a8, a3, 8 | ||
| 270 | src_b a6, a6, a7 | ||
| 271 | s32i a6, a5, 0 | ||
| 272 | addi a3, a3, 8 | ||
| 273 | src_b a7, a7, a8 | ||
| 274 | s32i a7, a5, 4 | ||
| 275 | addi a5, a5, 8 | ||
| 276 | mov a6, a8 | ||
| 277 | .L12: | ||
| 278 | bbci.l a4, 2, .L13 | ||
| 279 | # copy 4 bytes | ||
| 280 | l32i a7, a3, 4 | ||
| 281 | addi a3, a3, 4 | ||
| 282 | src_b a6, a6, a7 | ||
| 283 | s32i a6, a5, 0 | ||
| 284 | addi a5, a5, 4 | ||
| 285 | mov a6, a7 | ||
| 286 | .L13: | ||
| 287 | #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT | ||
| 288 | add a3, a3, a11 # readjust a3 with correct misalignment | ||
| 289 | #endif | ||
| 290 | bbsi.l a4, 1, .L14 | ||
| 291 | bbsi.l a4, 0, .L15 | ||
| 292 | .Ldone: retw | ||
| 293 | .L14: | ||
| 294 | # copy 2 bytes | ||
| 295 | l8ui a6, a3, 0 | ||
| 296 | l8ui a7, a3, 1 | ||
| 297 | addi a3, a3, 2 | ||
| 298 | s8i a6, a5, 0 | ||
| 299 | s8i a7, a5, 1 | ||
| 300 | addi a5, a5, 2 | ||
| 301 | bbsi.l a4, 0, .L15 | ||
| 302 | retw | ||
| 303 | .L15: | ||
| 304 | # copy 1 byte | ||
| 305 | l8ui a6, a3, 0 | ||
| 306 | s8i a6, a5, 0 | ||
| 307 | retw | ||
| 308 | |||
| 309 | /* | ||
| 310 | * Local Variables: | ||
| 311 | * mode:fundamental | ||
| 312 | * comment-start: "# " | ||
| 313 | * comment-start-skip: "# *" | ||
| 314 | * End: | ||
| 315 | */ | ||
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S new file mode 100644 index 000000000000..4de25134bc62 --- /dev/null +++ b/arch/xtensa/lib/memset.S | |||
| @@ -0,0 +1,160 @@ | |||
| 1 | /* | ||
| 2 | * arch/xtensa/lib/memset.S | ||
| 3 | * | ||
| 4 | * ANSI C standard library function memset | ||
| 5 | * (Well, almost. .fixup code might return zero.) | ||
| 6 | * | ||
| 7 | * This file is subject to the terms and conditions of the GNU General | ||
| 8 | * Public License. See the file "COPYING" in the main directory of | ||
| 9 | * this archive for more details. | ||
| 10 | * | ||
| 11 | * Copyright (C) 2002 Tensilica Inc. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <xtensa/coreasm.h> | ||
| 15 | |||
| 16 | /* | ||
| 17 | * void *memset(void *dst, int c, size_t length) | ||
| 18 | * | ||
| 19 | * The algorithm is as follows: | ||
| 20 | * Create a word with c in all byte positions | ||
| 21 | * If the destination is aligned, | ||
| 22 | * do 16B chucks with a loop, and then finish up with | ||
| 23 | * 8B, 4B, 2B, and 1B stores conditional on the length. | ||
| 24 | * If destination is unaligned, align it by conditionally | ||
| 25 | * setting 1B and 2B and then go to aligned case. | ||
| 26 | * This code tries to use fall-through branches for the common | ||
| 27 | * case of an aligned destination (except for the branches to | ||
| 28 | * the alignment labels). | ||
| 29 | */ | ||
| 30 | |||
| 31 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
| 32 | |||
| 33 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
| 34 | 9: insn reg1, reg2, offset; \ | ||
| 35 | .section __ex_table, "a"; \ | ||
| 36 | .word 9b, handler; \ | ||
| 37 | .previous | ||
| 38 | |||
| 39 | |||
| 40 | .text | ||
| 41 | .align 4 | ||
| 42 | .global memset | ||
| 43 | .type memset,@function | ||
| 44 | memset: | ||
| 45 | entry sp, 16 # minimal stack frame | ||
| 46 | # a2/ dst, a3/ c, a4/ length | ||
| 47 | extui a3, a3, 0, 8 # mask to just 8 bits | ||
| 48 | slli a7, a3, 8 # duplicate character in all bytes of word | ||
| 49 | or a3, a3, a7 # ... | ||
| 50 | slli a7, a3, 16 # ... | ||
| 51 | or a3, a3, a7 # ... | ||
| 52 | mov a5, a2 # copy dst so that a2 is return value | ||
| 53 | movi a6, 3 # for alignment tests | ||
| 54 | bany a2, a6, .Ldstunaligned # if dst is unaligned | ||
| 55 | .L0: # return here from .Ldstunaligned when dst is aligned | ||
| 56 | srli a7, a4, 4 # number of loop iterations with 16B | ||
| 57 | # per iteration | ||
| 58 | bnez a4, .Laligned | ||
| 59 | retw | ||
| 60 | |||
| 61 | /* | ||
| 62 | * Destination is word-aligned. | ||
| 63 | */ | ||
| 64 | # set 16 bytes per iteration for word-aligned dst | ||
| 65 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
| 66 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
| 67 | .Laligned: | ||
| 68 | #if XCHAL_HAVE_LOOPS | ||
| 69 | loopnez a7, .Loop1done | ||
| 70 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 71 | beqz a7, .Loop1done | ||
| 72 | slli a6, a7, 4 | ||
| 73 | add a6, a6, a5 # a6 = end of last 16B chunk | ||
| 74 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 75 | .Loop1: | ||
| 76 | EX(s32i, a3, a5, 0, memset_fixup) | ||
| 77 | EX(s32i, a3, a5, 4, memset_fixup) | ||
| 78 | EX(s32i, a3, a5, 8, memset_fixup) | ||
| 79 | EX(s32i, a3, a5, 12, memset_fixup) | ||
| 80 | addi a5, a5, 16 | ||
| 81 | #if !XCHAL_HAVE_LOOPS | ||
| 82 | blt a5, a6, .Loop1 | ||
| 83 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 84 | .Loop1done: | ||
| 85 | bbci.l a4, 3, .L2 | ||
| 86 | # set 8 bytes | ||
| 87 | EX(s32i, a3, a5, 0, memset_fixup) | ||
| 88 | EX(s32i, a3, a5, 4, memset_fixup) | ||
| 89 | addi a5, a5, 8 | ||
| 90 | .L2: | ||
| 91 | bbci.l a4, 2, .L3 | ||
| 92 | # set 4 bytes | ||
| 93 | EX(s32i, a3, a5, 0, memset_fixup) | ||
| 94 | addi a5, a5, 4 | ||
| 95 | .L3: | ||
| 96 | bbci.l a4, 1, .L4 | ||
| 97 | # set 2 bytes | ||
| 98 | EX(s16i, a3, a5, 0, memset_fixup) | ||
| 99 | addi a5, a5, 2 | ||
| 100 | .L4: | ||
| 101 | bbci.l a4, 0, .L5 | ||
| 102 | # set 1 byte | ||
| 103 | EX(s8i, a3, a5, 0, memset_fixup) | ||
| 104 | .L5: | ||
| 105 | .Lret1: | ||
| 106 | retw | ||
| 107 | |||
| 108 | /* | ||
| 109 | * Destination is unaligned | ||
| 110 | */ | ||
| 111 | |||
| 112 | .Ldstunaligned: | ||
| 113 | bltui a4, 8, .Lbyteset # do short copies byte by byte | ||
| 114 | bbci.l a5, 0, .L20 # branch if dst alignment half-aligned | ||
| 115 | # dst is only byte aligned | ||
| 116 | # set 1 byte | ||
| 117 | EX(s8i, a3, a5, 0, memset_fixup) | ||
| 118 | addi a5, a5, 1 | ||
| 119 | addi a4, a4, -1 | ||
| 120 | # now retest if dst aligned | ||
| 121 | bbci.l a5, 1, .L0 # if now aligned, return to main algorithm | ||
| 122 | .L20: | ||
| 123 | # dst half-aligned | ||
| 124 | # set 2 bytes | ||
| 125 | EX(s16i, a3, a5, 0, memset_fixup) | ||
| 126 | addi a5, a5, 2 | ||
| 127 | addi a4, a4, -2 | ||
| 128 | j .L0 # dst is now aligned, return to main algorithm | ||
| 129 | |||
| 130 | /* | ||
| 131 | * Byte by byte set | ||
| 132 | */ | ||
| 133 | .align 4 | ||
| 134 | .byte 0 # 1 mod 4 alignment for LOOPNEZ | ||
| 135 | # (0 mod 4 alignment for LBEG) | ||
| 136 | .Lbyteset: | ||
| 137 | #if XCHAL_HAVE_LOOPS | ||
| 138 | loopnez a4, .Lbytesetdone | ||
| 139 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 140 | beqz a4, .Lbytesetdone | ||
| 141 | add a6, a5, a4 # a6 = ending address | ||
| 142 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 143 | .Lbyteloop: | ||
| 144 | EX(s8i, a3, a5, 0, memset_fixup) | ||
| 145 | addi a5, a5, 1 | ||
| 146 | #if !XCHAL_HAVE_LOOPS | ||
| 147 | blt a5, a6, .Lbyteloop | ||
| 148 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 149 | .Lbytesetdone: | ||
| 150 | retw | ||
| 151 | |||
| 152 | |||
| 153 | .section .fixup, "ax" | ||
| 154 | .align 4 | ||
| 155 | |||
| 156 | /* We return zero if a failure occurred. */ | ||
| 157 | |||
| 158 | memset_fixup: | ||
| 159 | movi a2, 0 | ||
| 160 | retw | ||
diff --git a/arch/xtensa/lib/pci-auto.c b/arch/xtensa/lib/pci-auto.c new file mode 100644 index 000000000000..90c790f6123b --- /dev/null +++ b/arch/xtensa/lib/pci-auto.c | |||
| @@ -0,0 +1,352 @@ | |||
| 1 | /* | ||
| 2 | * arch/xtensa/kernel/pci-auto.c | ||
| 3 | * | ||
| 4 | * PCI autoconfiguration library | ||
| 5 | * | ||
| 6 | * Copyright (C) 2001 - 2005 Tensilica Inc. | ||
| 7 | * | ||
| 8 | * Chris Zankel <zankel@tensilica.com, cez@zankel.net> | ||
| 9 | * | ||
| 10 | * Based on work from Matt Porter <mporter@mvista.com> | ||
| 11 | * | ||
| 12 | * This program is free software; you can redistribute it and/or modify it | ||
| 13 | * under the terms of the GNU General Public License as published by the | ||
| 14 | * Free Software Foundation; either version 2 of the License, or (at your | ||
| 15 | * option) any later version. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/kernel.h> | ||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/pci.h> | ||
| 21 | |||
| 22 | #include <asm/pci-bridge.h> | ||
| 23 | |||
| 24 | |||
| 25 | /* | ||
| 26 | * | ||
| 27 | * Setting up a PCI | ||
| 28 | * | ||
| 29 | * pci_ctrl->first_busno = <first bus number (0)> | ||
| 30 | * pci_ctrl->last_busno = <last bus number (0xff)> | ||
| 31 | * pci_ctrl->ops = <PCI config operations> | ||
| 32 | * pci_ctrl->map_irq = <function to return the interrupt number for a device> | ||
| 33 | * | ||
| 34 | * pci_ctrl->io_space.start = <IO space start address (PCI view)> | ||
| 35 | * pci_ctrl->io_space.end = <IO space end address (PCI view)> | ||
| 36 | * pci_ctrl->io_space.base = <IO space offset: address 0 from CPU space> | ||
| 37 | * pci_ctrl->mem_space.start = <MEM space start address (PCI view)> | ||
| 38 | * pci_ctrl->mem_space.end = <MEM space end address (PCI view)> | ||
| 39 | * pci_ctrl->mem_space.base = <MEM space offset: address 0 from CPU space> | ||
| 40 | * | ||
| 41 | * pcibios_init_resource(&pci_ctrl->io_resource, <IO space start>, | ||
| 42 | * <IO space end>, IORESOURCE_IO, "PCI host bridge"); | ||
| 43 | * pcibios_init_resource(&pci_ctrl->mem_resources[0], <MEM space start>, | ||
| 44 | * <MEM space end>, IORESOURCE_MEM, "PCI host bridge"); | ||
| 45 | * | ||
| 46 | * pci_ctrl->last_busno = pciauto_bus_scan(pci_ctrl,pci_ctrl->first_busno); | ||
| 47 | * | ||
| 48 | * int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus) | ||
| 49 | * | ||
| 50 | */ | ||
| 51 | |||
| 52 | |||
| 53 | /* define DEBUG to print some debugging messages. */ | ||
| 54 | |||
| 55 | #undef DEBUG | ||
| 56 | |||
| 57 | #ifdef DEBUG | ||
| 58 | # define DBG(x...) printk(x) | ||
| 59 | #else | ||
| 60 | # define DBG(x...) | ||
| 61 | #endif | ||
| 62 | |||
| 63 | static int pciauto_upper_iospc; | ||
| 64 | static int pciauto_upper_memspc; | ||
| 65 | |||
| 66 | static struct pci_dev pciauto_dev; | ||
| 67 | static struct pci_bus pciauto_bus; | ||
| 68 | |||
| 69 | /* | ||
| 70 | * Helper functions | ||
| 71 | */ | ||
| 72 | |||
| 73 | /* Initialize the bars of a PCI device. */ | ||
| 74 | |||
| 75 | static void __init | ||
| 76 | pciauto_setup_bars(struct pci_dev *dev, int bar_limit) | ||
| 77 | { | ||
| 78 | int bar_size; | ||
| 79 | int bar, bar_nr; | ||
| 80 | int *upper_limit; | ||
| 81 | int found_mem64 = 0; | ||
| 82 | |||
| 83 | for (bar = PCI_BASE_ADDRESS_0, bar_nr = 0; | ||
| 84 | bar <= bar_limit; | ||
| 85 | bar+=4, bar_nr++) | ||
| 86 | { | ||
| 87 | /* Tickle the BAR and get the size */ | ||
| 88 | pci_write_config_dword(dev, bar, 0xffffffff); | ||
| 89 | pci_read_config_dword(dev, bar, &bar_size); | ||
| 90 | |||
| 91 | /* If BAR is not implemented go to the next BAR */ | ||
| 92 | if (!bar_size) | ||
| 93 | continue; | ||
| 94 | |||
| 95 | /* Check the BAR type and set our address mask */ | ||
| 96 | if (bar_size & PCI_BASE_ADDRESS_SPACE_IO) | ||
| 97 | { | ||
| 98 | bar_size &= PCI_BASE_ADDRESS_IO_MASK; | ||
| 99 | upper_limit = &pciauto_upper_iospc; | ||
| 100 | DBG("PCI Autoconfig: BAR %d, I/O, ", bar_nr); | ||
| 101 | } | ||
| 102 | else | ||
| 103 | { | ||
| 104 | if ((bar_size & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == | ||
| 105 | PCI_BASE_ADDRESS_MEM_TYPE_64) | ||
| 106 | found_mem64 = 1; | ||
| 107 | |||
| 108 | bar_size &= PCI_BASE_ADDRESS_MEM_MASK; | ||
| 109 | upper_limit = &pciauto_upper_memspc; | ||
| 110 | DBG("PCI Autoconfig: BAR %d, Mem, ", bar_nr); | ||
| 111 | } | ||
| 112 | |||
| 113 | /* Allocate a base address (bar_size is negative!) */ | ||
| 114 | *upper_limit = (*upper_limit + bar_size) & bar_size; | ||
| 115 | |||
| 116 | /* Write it out and update our limit */ | ||
| 117 | pci_write_config_dword(dev, bar, *upper_limit); | ||
| 118 | |||
| 119 | /* | ||
| 120 | * If we are a 64-bit decoder then increment to the | ||
| 121 | * upper 32 bits of the bar and force it to locate | ||
| 122 | * in the lower 4GB of memory. | ||
| 123 | */ | ||
| 124 | |||
| 125 | if (found_mem64) | ||
| 126 | pci_write_config_dword(dev, (bar+=4), 0x00000000); | ||
| 127 | |||
| 128 | DBG("size=0x%x, address=0x%x\n", ~bar_size + 1, *upper_limit); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | /* Initialize the interrupt number. */ | ||
| 133 | |||
| 134 | static void __init | ||
| 135 | pciauto_setup_irq(struct pci_controller* pci_ctrl,struct pci_dev *dev,int devfn) | ||
| 136 | { | ||
| 137 | u8 pin; | ||
| 138 | int irq = 0; | ||
| 139 | |||
| 140 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | ||
| 141 | |||
| 142 | /* Fix illegal pin numbers. */ | ||
| 143 | |||
| 144 | if (pin == 0 || pin > 4) | ||
| 145 | pin = 1; | ||
| 146 | |||
| 147 | if (pci_ctrl->map_irq) | ||
| 148 | irq = pci_ctrl->map_irq(dev, PCI_SLOT(devfn), pin); | ||
| 149 | |||
| 150 | if (irq == -1) | ||
| 151 | irq = 0; | ||
| 152 | |||
| 153 | DBG("PCI Autoconfig: Interrupt %d, pin %d\n", irq, pin); | ||
| 154 | |||
| 155 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); | ||
| 156 | } | ||
| 157 | |||
| 158 | |||
| 159 | static void __init | ||
| 160 | pciauto_prescan_setup_bridge(struct pci_dev *dev, int current_bus, | ||
| 161 | int sub_bus, int *iosave, int *memsave) | ||
| 162 | { | ||
| 163 | /* Configure bus number registers */ | ||
| 164 | pci_write_config_byte(dev, PCI_PRIMARY_BUS, current_bus); | ||
| 165 | pci_write_config_byte(dev, PCI_SECONDARY_BUS, sub_bus + 1); | ||
| 166 | pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, 0xff); | ||
| 167 | |||
| 168 | /* Round memory allocator to 1MB boundary */ | ||
| 169 | pciauto_upper_memspc &= ~(0x100000 - 1); | ||
| 170 | *memsave = pciauto_upper_memspc; | ||
| 171 | |||
| 172 | /* Round I/O allocator to 4KB boundary */ | ||
| 173 | pciauto_upper_iospc &= ~(0x1000 - 1); | ||
| 174 | *iosave = pciauto_upper_iospc; | ||
| 175 | |||
| 176 | /* Set up memory and I/O filter limits, assume 32-bit I/O space */ | ||
| 177 | pci_write_config_word(dev, PCI_MEMORY_LIMIT, | ||
| 178 | ((pciauto_upper_memspc - 1) & 0xfff00000) >> 16); | ||
| 179 | pci_write_config_byte(dev, PCI_IO_LIMIT, | ||
| 180 | ((pciauto_upper_iospc - 1) & 0x0000f000) >> 8); | ||
| 181 | pci_write_config_word(dev, PCI_IO_LIMIT_UPPER16, | ||
| 182 | ((pciauto_upper_iospc - 1) & 0xffff0000) >> 16); | ||
| 183 | } | ||
| 184 | |||
| 185 | static void __init | ||
| 186 | pciauto_postscan_setup_bridge(struct pci_dev *dev, int current_bus, int sub_bus, | ||
| 187 | int *iosave, int *memsave) | ||
| 188 | { | ||
| 189 | int cmdstat; | ||
| 190 | |||
| 191 | /* Configure bus number registers */ | ||
| 192 | pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, sub_bus); | ||
| 193 | |||
| 194 | /* | ||
| 195 | * Round memory allocator to 1MB boundary. | ||
| 196 | * If no space used, allocate minimum. | ||
| 197 | */ | ||
| 198 | pciauto_upper_memspc &= ~(0x100000 - 1); | ||
| 199 | if (*memsave == pciauto_upper_memspc) | ||
| 200 | pciauto_upper_memspc -= 0x00100000; | ||
| 201 | |||
| 202 | pci_write_config_word(dev, PCI_MEMORY_BASE, pciauto_upper_memspc >> 16); | ||
| 203 | |||
| 204 | /* Allocate 1MB for pre-fretch */ | ||
| 205 | pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, | ||
| 206 | ((pciauto_upper_memspc - 1) & 0xfff00000) >> 16); | ||
| 207 | |||
| 208 | pciauto_upper_memspc -= 0x100000; | ||
| 209 | |||
| 210 | pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, | ||
| 211 | pciauto_upper_memspc >> 16); | ||
| 212 | |||
| 213 | /* Round I/O allocator to 4KB boundary */ | ||
| 214 | pciauto_upper_iospc &= ~(0x1000 - 1); | ||
| 215 | if (*iosave == pciauto_upper_iospc) | ||
| 216 | pciauto_upper_iospc -= 0x1000; | ||
| 217 | |||
| 218 | pci_write_config_byte(dev, PCI_IO_BASE, | ||
| 219 | (pciauto_upper_iospc & 0x0000f000) >> 8); | ||
| 220 | pci_write_config_word(dev, PCI_IO_BASE_UPPER16, | ||
| 221 | pciauto_upper_iospc >> 16); | ||
| 222 | |||
| 223 | /* Enable memory and I/O accesses, enable bus master */ | ||
| 224 | pci_read_config_dword(dev, PCI_COMMAND, &cmdstat); | ||
| 225 | pci_write_config_dword(dev, PCI_COMMAND, | ||
| 226 | cmdstat | | ||
| 227 | PCI_COMMAND_IO | | ||
| 228 | PCI_COMMAND_MEMORY | | ||
| 229 | PCI_COMMAND_MASTER); | ||
| 230 | } | ||
| 231 | |||
| 232 | /* | ||
| 233 | * Scan the current PCI bus. | ||
| 234 | */ | ||
| 235 | |||
| 236 | |||
| 237 | int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus) | ||
| 238 | { | ||
| 239 | int sub_bus, pci_devfn, pci_class, cmdstat, found_multi=0; | ||
| 240 | unsigned short vid; | ||
| 241 | unsigned char header_type; | ||
| 242 | struct pci_dev *dev = &pciauto_dev; | ||
| 243 | |||
| 244 | pciauto_dev.bus = &pciauto_bus; | ||
| 245 | pciauto_dev.sysdata = pci_ctrl; | ||
| 246 | pciauto_bus.ops = pci_ctrl->ops; | ||
| 247 | |||
| 248 | /* | ||
| 249 | * Fetch our I/O and memory space upper boundaries used | ||
| 250 | * to allocated base addresses on this pci_controller. | ||
| 251 | */ | ||
| 252 | |||
| 253 | if (current_bus == pci_ctrl->first_busno) | ||
| 254 | { | ||
| 255 | pciauto_upper_iospc = pci_ctrl->io_resource.end + 1; | ||
| 256 | pciauto_upper_memspc = pci_ctrl->mem_resources[0].end + 1; | ||
| 257 | } | ||
| 258 | |||
| 259 | sub_bus = current_bus; | ||
| 260 | |||
| 261 | for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) | ||
| 262 | { | ||
| 263 | /* Skip our host bridge */ | ||
| 264 | if ((current_bus == pci_ctrl->first_busno) && (pci_devfn == 0)) | ||
| 265 | continue; | ||
| 266 | |||
| 267 | if (PCI_FUNC(pci_devfn) && !found_multi) | ||
| 268 | continue; | ||
| 269 | |||
| 270 | pciauto_bus.number = current_bus; | ||
| 271 | pciauto_dev.devfn = pci_devfn; | ||
| 272 | |||
| 273 | /* If config space read fails from this device, move on */ | ||
| 274 | if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type)) | ||
| 275 | continue; | ||
| 276 | |||
| 277 | if (!PCI_FUNC(pci_devfn)) | ||
| 278 | found_multi = header_type & 0x80; | ||
| 279 | pci_read_config_word(dev, PCI_VENDOR_ID, &vid); | ||
| 280 | |||
| 281 | if (vid == 0xffff || vid == 0x0000) { | ||
| 282 | found_multi = 0; | ||
| 283 | continue; | ||
| 284 | } | ||
| 285 | |||
| 286 | pci_read_config_dword(dev, PCI_CLASS_REVISION, &pci_class); | ||
| 287 | |||
| 288 | if ((pci_class >> 16) == PCI_CLASS_BRIDGE_PCI) { | ||
| 289 | |||
| 290 | int iosave, memsave; | ||
| 291 | |||
| 292 | DBG("PCI Autoconfig: Found P2P bridge, device %d\n", | ||
| 293 | PCI_SLOT(pci_devfn)); | ||
| 294 | |||
| 295 | /* Allocate PCI I/O and/or memory space */ | ||
| 296 | pciauto_setup_bars(dev, PCI_BASE_ADDRESS_1); | ||
| 297 | |||
| 298 | pciauto_prescan_setup_bridge(dev, current_bus, sub_bus, | ||
| 299 | &iosave, &memsave); | ||
| 300 | sub_bus = pciauto_bus_scan(pci_ctrl, sub_bus+1); | ||
| 301 | pciauto_postscan_setup_bridge(dev, current_bus, sub_bus, | ||
| 302 | &iosave, &memsave); | ||
| 303 | pciauto_bus.number = current_bus; | ||
| 304 | |||
| 305 | continue; | ||
| 306 | |||
| 307 | } | ||
| 308 | |||
| 309 | |||
| 310 | #if 0 | ||
| 311 | /* Skip legacy mode IDE controller */ | ||
| 312 | |||
| 313 | if ((pci_class >> 16) == PCI_CLASS_STORAGE_IDE) { | ||
| 314 | |||
| 315 | unsigned char prg_iface; | ||
| 316 | pci_read_config_byte(dev, PCI_CLASS_PROG, &prg_iface); | ||
| 317 | |||
| 318 | if (!(prg_iface & PCIAUTO_IDE_MODE_MASK)) { | ||
| 319 | DBG("PCI Autoconfig: Skipping legacy mode " | ||
| 320 | "IDE controller\n"); | ||
| 321 | continue; | ||
| 322 | } | ||
| 323 | } | ||
| 324 | #endif | ||
| 325 | |||
| 326 | /* | ||
| 327 | * Found a peripheral, enable some standard | ||
| 328 | * settings | ||
| 329 | */ | ||
| 330 | |||
| 331 | pci_read_config_dword(dev, PCI_COMMAND, &cmdstat); | ||
| 332 | pci_write_config_dword(dev, PCI_COMMAND, | ||
| 333 | cmdstat | | ||
| 334 | PCI_COMMAND_IO | | ||
| 335 | PCI_COMMAND_MEMORY | | ||
| 336 | PCI_COMMAND_MASTER); | ||
| 337 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x80); | ||
| 338 | |||
| 339 | /* Allocate PCI I/O and/or memory space */ | ||
| 340 | DBG("PCI Autoconfig: Found Bus %d, Device %d, Function %d\n", | ||
| 341 | current_bus, PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn) ); | ||
| 342 | |||
| 343 | pciauto_setup_bars(dev, PCI_BASE_ADDRESS_5); | ||
| 344 | pciauto_setup_irq(pci_ctrl, dev, pci_devfn); | ||
| 345 | } | ||
| 346 | return sub_bus; | ||
| 347 | } | ||
| 348 | |||
| 349 | |||
| 350 | |||
| 351 | |||
| 352 | |||
diff --git a/arch/xtensa/lib/strcasecmp.c b/arch/xtensa/lib/strcasecmp.c new file mode 100644 index 000000000000..165b2d6effa5 --- /dev/null +++ b/arch/xtensa/lib/strcasecmp.c | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | /* | ||
| 2 | * linux/arch/xtensa/lib/strcasecmp.c | ||
| 3 | * | ||
| 4 | * This file is subject to the terms and conditions of the GNU General | ||
| 5 | * Public License. See the file "COPYING" in the main directory of | ||
| 6 | * this archive for more details. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2002 Tensilica Inc. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/string.h> | ||
| 12 | |||
| 13 | |||
| 14 | /* We handle nothing here except the C locale. Since this is used in | ||
| 15 | only one place, on strings known to contain only 7 bit ASCII, this | ||
| 16 | is ok. */ | ||
| 17 | |||
| 18 | int strcasecmp(const char *a, const char *b) | ||
| 19 | { | ||
| 20 | int ca, cb; | ||
| 21 | |||
| 22 | do { | ||
| 23 | ca = *a++ & 0xff; | ||
| 24 | cb = *b++ & 0xff; | ||
| 25 | if (ca >= 'A' && ca <= 'Z') | ||
| 26 | ca += 'a' - 'A'; | ||
| 27 | if (cb >= 'A' && cb <= 'Z') | ||
| 28 | cb += 'a' - 'A'; | ||
| 29 | } while (ca == cb && ca != '\0'); | ||
| 30 | |||
| 31 | return ca - cb; | ||
| 32 | } | ||
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S new file mode 100644 index 000000000000..71d55df43893 --- /dev/null +++ b/arch/xtensa/lib/strncpy_user.S | |||
| @@ -0,0 +1,224 @@ | |||
| 1 | /* | ||
| 2 | * arch/xtensa/lib/strncpy_user.S | ||
| 3 | * | ||
| 4 | * This file is subject to the terms and conditions of the GNU General | ||
| 5 | * Public License. See the file "COPYING" in the main directory of | ||
| 6 | * this archive for more details. | ||
| 7 | * | ||
| 8 | * Returns: -EFAULT if exception before terminator, N if the entire | ||
| 9 | * buffer filled, else strlen. | ||
| 10 | * | ||
| 11 | * Copyright (C) 2002 Tensilica Inc. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <xtensa/coreasm.h> | ||
| 15 | #include <linux/errno.h> | ||
| 16 | |||
| 17 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
| 18 | |||
| 19 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
| 20 | 9: insn reg1, reg2, offset; \ | ||
| 21 | .section __ex_table, "a"; \ | ||
| 22 | .word 9b, handler; \ | ||
| 23 | .previous | ||
| 24 | |||
| 25 | /* | ||
| 26 | * char *__strncpy_user(char *dst, const char *src, size_t len) | ||
| 27 | */ | ||
| 28 | .text | ||
| 29 | .begin literal | ||
| 30 | .align 4 | ||
| 31 | .Lmask0: | ||
| 32 | .byte 0xff, 0x00, 0x00, 0x00 | ||
| 33 | .Lmask1: | ||
| 34 | .byte 0x00, 0xff, 0x00, 0x00 | ||
| 35 | .Lmask2: | ||
| 36 | .byte 0x00, 0x00, 0xff, 0x00 | ||
| 37 | .Lmask3: | ||
| 38 | .byte 0x00, 0x00, 0x00, 0xff | ||
| 39 | .end literal | ||
| 40 | |||
| 41 | # Register use | ||
| 42 | # a0/ return address | ||
| 43 | # a1/ stack pointer | ||
| 44 | # a2/ return value | ||
| 45 | # a3/ src | ||
| 46 | # a4/ len | ||
| 47 | # a5/ mask0 | ||
| 48 | # a6/ mask1 | ||
| 49 | # a7/ mask2 | ||
| 50 | # a8/ mask3 | ||
| 51 | # a9/ tmp | ||
| 52 | # a10/ tmp | ||
| 53 | # a11/ dst | ||
| 54 | # a12/ tmp | ||
| 55 | |||
| 56 | .align 4 | ||
| 57 | .global __strncpy_user | ||
| 58 | .type __strncpy_user,@function | ||
| 59 | __strncpy_user: | ||
| 60 | entry sp, 16 # minimal stack frame | ||
| 61 | # a2/ dst, a3/ src, a4/ len | ||
| 62 | mov a11, a2 # leave dst in return value register | ||
| 63 | beqz a4, .Lret # if len is zero | ||
| 64 | l32r a5, .Lmask0 # mask for byte 0 | ||
| 65 | l32r a6, .Lmask1 # mask for byte 1 | ||
| 66 | l32r a7, .Lmask2 # mask for byte 2 | ||
| 67 | l32r a8, .Lmask3 # mask for byte 3 | ||
| 68 | bbsi.l a3, 0, .Lsrc1mod2 # if only 8-bit aligned | ||
| 69 | bbsi.l a3, 1, .Lsrc2mod4 # if only 16-bit aligned | ||
| 70 | .Lsrcaligned: # return here when src is word-aligned | ||
| 71 | srli a12, a4, 2 # number of loop iterations with 4B per loop | ||
| 72 | movi a9, 3 | ||
| 73 | bnone a11, a9, .Laligned | ||
| 74 | j .Ldstunaligned | ||
| 75 | |||
| 76 | .Lsrc1mod2: # src address is odd | ||
| 77 | EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 | ||
| 78 | addi a3, a3, 1 # advance src pointer | ||
| 79 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
| 80 | beqz a9, .Lret # if byte 0 is zero | ||
| 81 | addi a11, a11, 1 # advance dst pointer | ||
| 82 | addi a4, a4, -1 # decrement len | ||
| 83 | beqz a4, .Lret # if len is zero | ||
| 84 | bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned | ||
| 85 | |||
| 86 | .Lsrc2mod4: # src address is 2 mod 4 | ||
| 87 | EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 | ||
| 88 | /* 1-cycle interlock */ | ||
| 89 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
| 90 | beqz a9, .Lret # if byte 0 is zero | ||
| 91 | addi a11, a11, 1 # advance dst pointer | ||
| 92 | addi a4, a4, -1 # decrement len | ||
| 93 | beqz a4, .Lret # if len is zero | ||
| 94 | EX(l8ui, a9, a3, 1, fixup_l) # get byte 0 | ||
| 95 | addi a3, a3, 2 # advance src pointer | ||
| 96 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
| 97 | beqz a9, .Lret # if byte 0 is zero | ||
| 98 | addi a11, a11, 1 # advance dst pointer | ||
| 99 | addi a4, a4, -1 # decrement len | ||
| 100 | bnez a4, .Lsrcaligned # if len is nonzero | ||
| 101 | .Lret: | ||
| 102 | sub a2, a11, a2 # compute strlen | ||
| 103 | retw | ||
| 104 | |||
| 105 | /* | ||
| 106 | * dst is word-aligned, src is word-aligned | ||
| 107 | */ | ||
| 108 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
| 109 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
| 110 | .Laligned: | ||
| 111 | #if XCHAL_HAVE_LOOPS | ||
| 112 | loopnez a12, .Loop1done | ||
| 113 | #else | ||
| 114 | beqz a12, .Loop1done | ||
| 115 | slli a12, a12, 2 | ||
| 116 | add a12, a12, a11 # a12 = end of last 4B chunck | ||
| 117 | #endif | ||
| 118 | .Loop1: | ||
| 119 | EX(l32i, a9, a3, 0, fixup_l) # get word from src | ||
| 120 | addi a3, a3, 4 # advance src pointer | ||
| 121 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
| 122 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
| 123 | bnone a9, a7, .Lz2 # if byte 2 is zero | ||
| 124 | EX(s32i, a9, a11, 0, fixup_s) # store word to dst | ||
| 125 | bnone a9, a8, .Lz3 # if byte 3 is zero | ||
| 126 | addi a11, a11, 4 # advance dst pointer | ||
| 127 | #if !XCHAL_HAVE_LOOPS | ||
| 128 | blt a11, a12, .Loop1 | ||
| 129 | #endif | ||
| 130 | |||
| 131 | .Loop1done: | ||
| 132 | bbci.l a4, 1, .L100 | ||
| 133 | # copy 2 bytes | ||
| 134 | EX(l16ui, a9, a3, 0, fixup_l) | ||
| 135 | addi a3, a3, 2 # advance src pointer | ||
| 136 | #ifdef __XTENSA_EB__ | ||
| 137 | bnone a9, a7, .Lz0 # if byte 2 is zero | ||
| 138 | bnone a9, a8, .Lz1 # if byte 3 is zero | ||
| 139 | #else | ||
| 140 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
| 141 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
| 142 | #endif | ||
| 143 | EX(s16i, a9, a11, 0, fixup_s) | ||
| 144 | addi a11, a11, 2 # advance dst pointer | ||
| 145 | .L100: | ||
| 146 | bbci.l a4, 0, .Lret | ||
| 147 | EX(l8ui, a9, a3, 0, fixup_l) | ||
| 148 | /* slot */ | ||
| 149 | EX(s8i, a9, a11, 0, fixup_s) | ||
| 150 | beqz a9, .Lret # if byte is zero | ||
| 151 | addi a11, a11, 1-3 # advance dst ptr 1, but also cancel | ||
| 152 | # the effect of adding 3 in .Lz3 code | ||
| 153 | /* fall thru to .Lz3 and "retw" */ | ||
| 154 | |||
| 155 | .Lz3: # byte 3 is zero | ||
| 156 | addi a11, a11, 3 # advance dst pointer | ||
| 157 | sub a2, a11, a2 # compute strlen | ||
| 158 | retw | ||
| 159 | .Lz0: # byte 0 is zero | ||
| 160 | #ifdef __XTENSA_EB__ | ||
| 161 | movi a9, 0 | ||
| 162 | #endif /* __XTENSA_EB__ */ | ||
| 163 | EX(s8i, a9, a11, 0, fixup_s) | ||
| 164 | sub a2, a11, a2 # compute strlen | ||
| 165 | retw | ||
| 166 | .Lz1: # byte 1 is zero | ||
| 167 | #ifdef __XTENSA_EB__ | ||
| 168 | extui a9, a9, 16, 16 | ||
| 169 | #endif /* __XTENSA_EB__ */ | ||
| 170 | EX(s16i, a9, a11, 0, fixup_s) | ||
| 171 | addi a11, a11, 1 # advance dst pointer | ||
| 172 | sub a2, a11, a2 # compute strlen | ||
| 173 | retw | ||
| 174 | .Lz2: # byte 2 is zero | ||
| 175 | #ifdef __XTENSA_EB__ | ||
| 176 | extui a9, a9, 16, 16 | ||
| 177 | #endif /* __XTENSA_EB__ */ | ||
| 178 | EX(s16i, a9, a11, 0, fixup_s) | ||
| 179 | movi a9, 0 | ||
| 180 | EX(s8i, a9, a11, 2, fixup_s) | ||
| 181 | addi a11, a11, 2 # advance dst pointer | ||
| 182 | sub a2, a11, a2 # compute strlen | ||
| 183 | retw | ||
| 184 | |||
| 185 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
| 186 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
| 187 | .Ldstunaligned: | ||
| 188 | /* | ||
| 189 | * for now just use byte copy loop | ||
| 190 | */ | ||
| 191 | #if XCHAL_HAVE_LOOPS | ||
| 192 | loopnez a4, .Lunalignedend | ||
| 193 | #else | ||
| 194 | beqz a4, .Lunalignedend | ||
| 195 | add a12, a11, a4 # a12 = ending address | ||
| 196 | #endif /* XCHAL_HAVE_LOOPS */ | ||
| 197 | .Lnextbyte: | ||
| 198 | EX(l8ui, a9, a3, 0, fixup_l) | ||
| 199 | addi a3, a3, 1 | ||
| 200 | EX(s8i, a9, a11, 0, fixup_s) | ||
| 201 | beqz a9, .Lunalignedend | ||
| 202 | addi a11, a11, 1 | ||
| 203 | #if !XCHAL_HAVE_LOOPS | ||
| 204 | blt a11, a12, .Lnextbyte | ||
| 205 | #endif | ||
| 206 | |||
| 207 | .Lunalignedend: | ||
| 208 | sub a2, a11, a2 # compute strlen | ||
| 209 | retw | ||
| 210 | |||
| 211 | |||
| 212 | .section .fixup, "ax" | ||
| 213 | .align 4 | ||
| 214 | |||
| 215 | /* For now, just return -EFAULT. Future implementations might | ||
| 216 | * like to clear remaining kernel space, like the fixup | ||
| 217 | * implementation in memset(). Thus, we differentiate between | ||
| 218 | * load/store fixups. */ | ||
| 219 | |||
| 220 | fixup_s: | ||
| 221 | fixup_l: | ||
| 222 | movi a2, -EFAULT | ||
| 223 | retw | ||
| 224 | |||
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S new file mode 100644 index 000000000000..cdff4d670f3b --- /dev/null +++ b/arch/xtensa/lib/strnlen_user.S | |||
| @@ -0,0 +1,147 @@ | |||
| 1 | /* | ||
| 2 | * arch/xtensa/lib/strnlen_user.S | ||
| 3 | * | ||
| 4 | * This file is subject to the terms and conditions of the GNU General | ||
| 5 | * Public License. See the file "COPYING" in the main directory of | ||
| 6 | * this archive for more details. | ||
| 7 | * | ||
| 8 | * Returns strnlen, including trailing zero terminator. | ||
| 9 | * Zero indicates error. | ||
| 10 | * | ||
| 11 | * Copyright (C) 2002 Tensilica Inc. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <xtensa/coreasm.h> | ||
| 15 | |||
| 16 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
| 17 | |||
| 18 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
| 19 | 9: insn reg1, reg2, offset; \ | ||
| 20 | .section __ex_table, "a"; \ | ||
| 21 | .word 9b, handler; \ | ||
| 22 | .previous | ||
| 23 | |||
| 24 | /* | ||
| 25 | * size_t __strnlen_user(const char *s, size_t len) | ||
| 26 | */ | ||
| 27 | .text | ||
| 28 | .begin literal | ||
| 29 | .align 4 | ||
| 30 | .Lmask0: | ||
| 31 | .byte 0xff, 0x00, 0x00, 0x00 | ||
| 32 | .Lmask1: | ||
| 33 | .byte 0x00, 0xff, 0x00, 0x00 | ||
| 34 | .Lmask2: | ||
| 35 | .byte 0x00, 0x00, 0xff, 0x00 | ||
| 36 | .Lmask3: | ||
| 37 | .byte 0x00, 0x00, 0x00, 0xff | ||
| 38 | .end literal | ||
| 39 | |||
| 40 | # Register use: | ||
| 41 | # a2/ src | ||
| 42 | # a3/ len | ||
| 43 | # a4/ tmp | ||
| 44 | # a5/ mask0 | ||
| 45 | # a6/ mask1 | ||
| 46 | # a7/ mask2 | ||
| 47 | # a8/ mask3 | ||
| 48 | # a9/ tmp | ||
| 49 | # a10/ tmp | ||
| 50 | |||
| 51 | .align 4 | ||
| 52 | .global __strnlen_user | ||
| 53 | .type __strnlen_user,@function | ||
| 54 | __strnlen_user: | ||
| 55 | entry sp, 16 # minimal stack frame | ||
| 56 | # a2/ s, a3/ len | ||
| 57 | addi a4, a2, -4 # because we overincrement at the end; | ||
| 58 | # we compensate with load offsets of 4 | ||
| 59 | l32r a5, .Lmask0 # mask for byte 0 | ||
| 60 | l32r a6, .Lmask1 # mask for byte 1 | ||
| 61 | l32r a7, .Lmask2 # mask for byte 2 | ||
| 62 | l32r a8, .Lmask3 # mask for byte 3 | ||
| 63 | bbsi.l a2, 0, .L1mod2 # if only 8-bit aligned | ||
| 64 | bbsi.l a2, 1, .L2mod4 # if only 16-bit aligned | ||
| 65 | |||
| 66 | /* | ||
| 67 | * String is word-aligned. | ||
| 68 | */ | ||
| 69 | .Laligned: | ||
| 70 | srli a10, a3, 2 # number of loop iterations with 4B per loop | ||
| 71 | #if XCHAL_HAVE_LOOPS | ||
| 72 | loopnez a10, .Ldone | ||
| 73 | #else | ||
| 74 | beqz a10, .Ldone | ||
| 75 | slli a10, a10, 2 | ||
| 76 | add a10, a10, a4 # a10 = end of last 4B chunk | ||
| 77 | #endif /* XCHAL_HAVE_LOOPS */ | ||
| 78 | .Loop: | ||
| 79 | EX(l32i, a9, a4, 4, lenfixup) # get next word of string | ||
| 80 | addi a4, a4, 4 # advance string pointer | ||
| 81 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
| 82 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
| 83 | bnone a9, a7, .Lz2 # if byte 2 is zero | ||
| 84 | bnone a9, a8, .Lz3 # if byte 3 is zero | ||
| 85 | #if !XCHAL_HAVE_LOOPS | ||
| 86 | blt a4, a10, .Loop | ||
| 87 | #endif | ||
| 88 | |||
| 89 | .Ldone: | ||
| 90 | EX(l32i, a9, a4, 4, lenfixup) # load 4 bytes for remaining checks | ||
| 91 | |||
| 92 | bbci.l a3, 1, .L100 | ||
| 93 | # check two more bytes (bytes 0, 1 of word) | ||
| 94 | addi a4, a4, 2 # advance string pointer | ||
| 95 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
| 96 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
| 97 | .L100: | ||
| 98 | bbci.l a3, 0, .L101 | ||
| 99 | # check one more byte (byte 2 of word) | ||
| 100 | # Actually, we don't need to check. Zero or nonzero, we'll add one. | ||
| 101 | # Do not add an extra one for the NULL terminator since we have | ||
| 102 | # exhausted the original len parameter. | ||
| 103 | addi a4, a4, 1 # advance string pointer | ||
| 104 | .L101: | ||
| 105 | sub a2, a4, a2 # compute length | ||
| 106 | retw | ||
| 107 | |||
| 108 | # NOTE that in several places below, we point to the byte just after | ||
| 109 | # the zero byte in order to include the NULL terminator in the count. | ||
| 110 | |||
| 111 | .Lz3: # byte 3 is zero | ||
| 112 | addi a4, a4, 3 # point to zero byte | ||
| 113 | .Lz0: # byte 0 is zero | ||
| 114 | addi a4, a4, 1 # point just beyond zero byte | ||
| 115 | sub a2, a4, a2 # subtract to get length | ||
| 116 | retw | ||
| 117 | .Lz1: # byte 1 is zero | ||
| 118 | addi a4, a4, 1+1 # point just beyond zero byte | ||
| 119 | sub a2, a4, a2 # subtract to get length | ||
| 120 | retw | ||
| 121 | .Lz2: # byte 2 is zero | ||
| 122 | addi a4, a4, 2+1 # point just beyond zero byte | ||
| 123 | sub a2, a4, a2 # subtract to get length | ||
| 124 | retw | ||
| 125 | |||
| 126 | .L1mod2: # address is odd | ||
| 127 | EX(l8ui, a9, a4, 4, lenfixup) # get byte 0 | ||
| 128 | addi a4, a4, 1 # advance string pointer | ||
| 129 | beqz a9, .Lz3 # if byte 0 is zero | ||
| 130 | bbci.l a4, 1, .Laligned # if string pointer is now word-aligned | ||
| 131 | |||
| 132 | .L2mod4: # address is 2 mod 4 | ||
| 133 | addi a4, a4, 2 # advance ptr for aligned access | ||
| 134 | EX(l32i, a9, a4, 0, lenfixup) # get word with first two bytes of string | ||
| 135 | bnone a9, a7, .Lz2 # if byte 2 (of word, not string) is zero | ||
| 136 | bany a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero | ||
| 137 | # byte 3 is zero | ||
| 138 | addi a4, a4, 3+1 # point just beyond zero byte | ||
| 139 | sub a2, a4, a2 # subtract to get length | ||
| 140 | retw | ||
| 141 | |||
| 142 | .section .fixup, "ax" | ||
| 143 | .align 4 | ||
| 144 | lenfixup: | ||
| 145 | movi a2, 0 | ||
| 146 | retw | ||
| 147 | |||
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S new file mode 100644 index 000000000000..265db2693cbd --- /dev/null +++ b/arch/xtensa/lib/usercopy.S | |||
| @@ -0,0 +1,321 @@ | |||
| 1 | /* | ||
| 2 | * arch/xtensa/lib/usercopy.S | ||
| 3 | * | ||
| 4 | * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S) | ||
| 5 | * | ||
| 6 | * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>. | ||
| 7 | * It needs to remain separate and distinct. The hal files are part | ||
| 8 | * of the the Xtensa link-time HAL, and those files may differ per | ||
| 9 | * processor configuration. Patching the kernel for another | ||
| 10 | * processor configuration includes replacing the hal files, and we | ||
| 11 | * could loose the special functionality for accessing user-space | ||
| 12 | * memory during such a patch. We sacrifice a little code space here | ||
| 13 | * in favor to simplify code maintenance. | ||
| 14 | * | ||
| 15 | * This file is subject to the terms and conditions of the GNU General | ||
| 16 | * Public License. See the file "COPYING" in the main directory of | ||
| 17 | * this archive for more details. | ||
| 18 | * | ||
| 19 | * Copyright (C) 2002 Tensilica Inc. | ||
| 20 | */ | ||
| 21 | |||
| 22 | |||
| 23 | /* | ||
| 24 | * size_t __xtensa_copy_user (void *dst, const void *src, size_t len); | ||
| 25 | * | ||
| 26 | * The returned value is the number of bytes not copied. Implies zero | ||
| 27 | * is success. | ||
| 28 | * | ||
| 29 | * The general case algorithm is as follows: | ||
| 30 | * If the destination and source are both aligned, | ||
| 31 | * do 16B chunks with a loop, and then finish up with | ||
| 32 | * 8B, 4B, 2B, and 1B copies conditional on the length. | ||
| 33 | * If destination is aligned and source unaligned, | ||
| 34 | * do the same, but use SRC to align the source data. | ||
| 35 | * If destination is unaligned, align it by conditionally | ||
| 36 | * copying 1B and 2B and then retest. | ||
| 37 | * This code tries to use fall-through braches for the common | ||
| 38 | * case of aligned destinations (except for the branches to | ||
| 39 | * the alignment label). | ||
| 40 | * | ||
| 41 | * Register use: | ||
| 42 | * a0/ return address | ||
| 43 | * a1/ stack pointer | ||
| 44 | * a2/ return value | ||
| 45 | * a3/ src | ||
| 46 | * a4/ length | ||
| 47 | * a5/ dst | ||
| 48 | * a6/ tmp | ||
| 49 | * a7/ tmp | ||
| 50 | * a8/ tmp | ||
| 51 | * a9/ tmp | ||
| 52 | * a10/ tmp | ||
| 53 | * a11/ original length | ||
| 54 | */ | ||
| 55 | |||
| 56 | #include <xtensa/coreasm.h> | ||
| 57 | |||
| 58 | #ifdef __XTENSA_EB__ | ||
| 59 | #define ALIGN(R, W0, W1) src R, W0, W1 | ||
| 60 | #define SSA8(R) ssa8b R | ||
| 61 | #else | ||
| 62 | #define ALIGN(R, W0, W1) src R, W1, W0 | ||
| 63 | #define SSA8(R) ssa8l R | ||
| 64 | #endif | ||
| 65 | |||
| 66 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
| 67 | |||
| 68 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
| 69 | 9: insn reg1, reg2, offset; \ | ||
| 70 | .section __ex_table, "a"; \ | ||
| 71 | .word 9b, handler; \ | ||
| 72 | .previous | ||
| 73 | |||
| 74 | |||
| 75 | .text | ||
| 76 | .align 4 | ||
| 77 | .global __xtensa_copy_user | ||
| 78 | .type __xtensa_copy_user,@function | ||
| 79 | __xtensa_copy_user: | ||
| 80 | entry sp, 16 # minimal stack frame | ||
| 81 | # a2/ dst, a3/ src, a4/ len | ||
| 82 | mov a5, a2 # copy dst so that a2 is return value | ||
| 83 | mov a11, a4 # preserve original len for error case | ||
| 84 | .Lcommon: | ||
| 85 | bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 | ||
| 86 | bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 | ||
| 87 | .Ldstaligned: # return here from .Ldstunaligned when dst is aligned | ||
| 88 | srli a7, a4, 4 # number of loop iterations with 16B | ||
| 89 | # per iteration | ||
| 90 | movi a8, 3 # if source is also aligned, | ||
| 91 | bnone a3, a8, .Laligned # then use word copy | ||
| 92 | SSA8( a3) # set shift amount from byte offset | ||
| 93 | bnez a4, .Lsrcunaligned | ||
| 94 | movi a2, 0 # return success for len==0 | ||
| 95 | retw | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Destination is unaligned | ||
| 99 | */ | ||
| 100 | |||
| 101 | .Ldst1mod2: # dst is only byte aligned | ||
| 102 | bltui a4, 7, .Lbytecopy # do short copies byte by byte | ||
| 103 | |||
| 104 | # copy 1 byte | ||
| 105 | EX(l8ui, a6, a3, 0, l_fixup) | ||
| 106 | addi a3, a3, 1 | ||
| 107 | EX(s8i, a6, a5, 0, s_fixup) | ||
| 108 | addi a5, a5, 1 | ||
| 109 | addi a4, a4, -1 | ||
| 110 | bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then | ||
| 111 | # return to main algorithm | ||
| 112 | .Ldst2mod4: # dst 16-bit aligned | ||
| 113 | # copy 2 bytes | ||
| 114 | bltui a4, 6, .Lbytecopy # do short copies byte by byte | ||
| 115 | EX(l8ui, a6, a3, 0, l_fixup) | ||
| 116 | EX(l8ui, a7, a3, 1, l_fixup) | ||
| 117 | addi a3, a3, 2 | ||
| 118 | EX(s8i, a6, a5, 0, s_fixup) | ||
| 119 | EX(s8i, a7, a5, 1, s_fixup) | ||
| 120 | addi a5, a5, 2 | ||
| 121 | addi a4, a4, -2 | ||
| 122 | j .Ldstaligned # dst is now aligned, return to main algorithm | ||
| 123 | |||
| 124 | /* | ||
| 125 | * Byte by byte copy | ||
| 126 | */ | ||
| 127 | .align 4 | ||
| 128 | .byte 0 # 1 mod 4 alignment for LOOPNEZ | ||
| 129 | # (0 mod 4 alignment for LBEG) | ||
| 130 | .Lbytecopy: | ||
| 131 | #if XCHAL_HAVE_LOOPS | ||
| 132 | loopnez a4, .Lbytecopydone | ||
| 133 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 134 | beqz a4, .Lbytecopydone | ||
| 135 | add a7, a3, a4 # a7 = end address for source | ||
| 136 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 137 | .Lnextbyte: | ||
| 138 | EX(l8ui, a6, a3, 0, l_fixup) | ||
| 139 | addi a3, a3, 1 | ||
| 140 | EX(s8i, a6, a5, 0, s_fixup) | ||
| 141 | addi a5, a5, 1 | ||
| 142 | #if !XCHAL_HAVE_LOOPS | ||
| 143 | blt a3, a7, .Lnextbyte | ||
| 144 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 145 | .Lbytecopydone: | ||
| 146 | movi a2, 0 # return success for len bytes copied | ||
| 147 | retw | ||
| 148 | |||
| 149 | /* | ||
| 150 | * Destination and source are word-aligned. | ||
| 151 | */ | ||
| 152 | # copy 16 bytes per iteration for word-aligned dst and word-aligned src | ||
| 153 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
| 154 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
| 155 | .Laligned: | ||
| 156 | #if XCHAL_HAVE_LOOPS | ||
| 157 | loopnez a7, .Loop1done | ||
| 158 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 159 | beqz a7, .Loop1done | ||
| 160 | slli a8, a7, 4 | ||
| 161 | add a8, a8, a3 # a8 = end of last 16B source chunk | ||
| 162 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 163 | .Loop1: | ||
| 164 | EX(l32i, a6, a3, 0, l_fixup) | ||
| 165 | EX(l32i, a7, a3, 4, l_fixup) | ||
| 166 | EX(s32i, a6, a5, 0, s_fixup) | ||
| 167 | EX(l32i, a6, a3, 8, l_fixup) | ||
| 168 | EX(s32i, a7, a5, 4, s_fixup) | ||
| 169 | EX(l32i, a7, a3, 12, l_fixup) | ||
| 170 | EX(s32i, a6, a5, 8, s_fixup) | ||
| 171 | addi a3, a3, 16 | ||
| 172 | EX(s32i, a7, a5, 12, s_fixup) | ||
| 173 | addi a5, a5, 16 | ||
| 174 | #if !XCHAL_HAVE_LOOPS | ||
| 175 | blt a3, a8, .Loop1 | ||
| 176 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 177 | .Loop1done: | ||
| 178 | bbci.l a4, 3, .L2 | ||
| 179 | # copy 8 bytes | ||
| 180 | EX(l32i, a6, a3, 0, l_fixup) | ||
| 181 | EX(l32i, a7, a3, 4, l_fixup) | ||
| 182 | addi a3, a3, 8 | ||
| 183 | EX(s32i, a6, a5, 0, s_fixup) | ||
| 184 | EX(s32i, a7, a5, 4, s_fixup) | ||
| 185 | addi a5, a5, 8 | ||
| 186 | .L2: | ||
| 187 | bbci.l a4, 2, .L3 | ||
| 188 | # copy 4 bytes | ||
| 189 | EX(l32i, a6, a3, 0, l_fixup) | ||
| 190 | addi a3, a3, 4 | ||
| 191 | EX(s32i, a6, a5, 0, s_fixup) | ||
| 192 | addi a5, a5, 4 | ||
| 193 | .L3: | ||
| 194 | bbci.l a4, 1, .L4 | ||
| 195 | # copy 2 bytes | ||
| 196 | EX(l16ui, a6, a3, 0, l_fixup) | ||
| 197 | addi a3, a3, 2 | ||
| 198 | EX(s16i, a6, a5, 0, s_fixup) | ||
| 199 | addi a5, a5, 2 | ||
| 200 | .L4: | ||
| 201 | bbci.l a4, 0, .L5 | ||
| 202 | # copy 1 byte | ||
| 203 | EX(l8ui, a6, a3, 0, l_fixup) | ||
| 204 | EX(s8i, a6, a5, 0, s_fixup) | ||
| 205 | .L5: | ||
| 206 | movi a2, 0 # return success for len bytes copied | ||
| 207 | retw | ||
| 208 | |||
| 209 | /* | ||
| 210 | * Destination is aligned, Source is unaligned | ||
| 211 | */ | ||
| 212 | |||
| 213 | .align 4 | ||
| 214 | .byte 0 # 1 mod 4 alignement for LOOPNEZ | ||
| 215 | # (0 mod 4 alignment for LBEG) | ||
| 216 | .Lsrcunaligned: | ||
| 217 | # copy 16 bytes per iteration for word-aligned dst and unaligned src | ||
| 218 | and a10, a3, a8 # save unalignment offset for below | ||
| 219 | sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) | ||
| 220 | EX(l32i, a6, a3, 0, l_fixup) # load first word | ||
| 221 | #if XCHAL_HAVE_LOOPS | ||
| 222 | loopnez a7, .Loop2done | ||
| 223 | #else /* !XCHAL_HAVE_LOOPS */ | ||
| 224 | beqz a7, .Loop2done | ||
| 225 | slli a10, a7, 4 | ||
| 226 | add a10, a10, a3 # a10 = end of last 16B source chunk | ||
| 227 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 228 | .Loop2: | ||
| 229 | EX(l32i, a7, a3, 4, l_fixup) | ||
| 230 | EX(l32i, a8, a3, 8, l_fixup) | ||
| 231 | ALIGN( a6, a6, a7) | ||
| 232 | EX(s32i, a6, a5, 0, s_fixup) | ||
| 233 | EX(l32i, a9, a3, 12, l_fixup) | ||
| 234 | ALIGN( a7, a7, a8) | ||
| 235 | EX(s32i, a7, a5, 4, s_fixup) | ||
| 236 | EX(l32i, a6, a3, 16, l_fixup) | ||
| 237 | ALIGN( a8, a8, a9) | ||
| 238 | EX(s32i, a8, a5, 8, s_fixup) | ||
| 239 | addi a3, a3, 16 | ||
| 240 | ALIGN( a9, a9, a6) | ||
| 241 | EX(s32i, a9, a5, 12, s_fixup) | ||
| 242 | addi a5, a5, 16 | ||
| 243 | #if !XCHAL_HAVE_LOOPS | ||
| 244 | blt a3, a10, .Loop2 | ||
| 245 | #endif /* !XCHAL_HAVE_LOOPS */ | ||
| 246 | .Loop2done: | ||
| 247 | bbci.l a4, 3, .L12 | ||
| 248 | # copy 8 bytes | ||
| 249 | EX(l32i, a7, a3, 4, l_fixup) | ||
| 250 | EX(l32i, a8, a3, 8, l_fixup) | ||
| 251 | ALIGN( a6, a6, a7) | ||
| 252 | EX(s32i, a6, a5, 0, s_fixup) | ||
| 253 | addi a3, a3, 8 | ||
| 254 | ALIGN( a7, a7, a8) | ||
| 255 | EX(s32i, a7, a5, 4, s_fixup) | ||
| 256 | addi a5, a5, 8 | ||
| 257 | mov a6, a8 | ||
| 258 | .L12: | ||
| 259 | bbci.l a4, 2, .L13 | ||
| 260 | # copy 4 bytes | ||
| 261 | EX(l32i, a7, a3, 4, l_fixup) | ||
| 262 | addi a3, a3, 4 | ||
| 263 | ALIGN( a6, a6, a7) | ||
| 264 | EX(s32i, a6, a5, 0, s_fixup) | ||
| 265 | addi a5, a5, 4 | ||
| 266 | mov a6, a7 | ||
| 267 | .L13: | ||
| 268 | add a3, a3, a10 # readjust a3 with correct misalignment | ||
| 269 | bbci.l a4, 1, .L14 | ||
| 270 | # copy 2 bytes | ||
| 271 | EX(l8ui, a6, a3, 0, l_fixup) | ||
| 272 | EX(l8ui, a7, a3, 1, l_fixup) | ||
| 273 | addi a3, a3, 2 | ||
| 274 | EX(s8i, a6, a5, 0, s_fixup) | ||
| 275 | EX(s8i, a7, a5, 1, s_fixup) | ||
| 276 | addi a5, a5, 2 | ||
| 277 | .L14: | ||
| 278 | bbci.l a4, 0, .L15 | ||
| 279 | # copy 1 byte | ||
| 280 | EX(l8ui, a6, a3, 0, l_fixup) | ||
| 281 | EX(s8i, a6, a5, 0, s_fixup) | ||
| 282 | .L15: | ||
| 283 | movi a2, 0 # return success for len bytes copied | ||
| 284 | retw | ||
| 285 | |||
| 286 | |||
| 287 | .section .fixup, "ax" | ||
| 288 | .align 4 | ||
| 289 | |||
| 290 | /* a2 = original dst; a5 = current dst; a11= original len | ||
| 291 | * bytes_copied = a5 - a2 | ||
| 292 | * retval = bytes_not_copied = original len - bytes_copied | ||
| 293 | * retval = a11 - (a5 - a2) | ||
| 294 | * | ||
| 295 | * Clearing the remaining pieces of kernel memory plugs security | ||
| 296 | * holes. This functionality is the equivalent of the *_zeroing | ||
| 297 | * functions that some architectures provide. | ||
| 298 | */ | ||
| 299 | |||
| 300 | .Lmemset: | ||
| 301 | .word memset | ||
| 302 | |||
| 303 | s_fixup: | ||
| 304 | sub a2, a5, a2 /* a2 <-- bytes copied */ | ||
| 305 | sub a2, a11, a2 /* a2 <-- bytes not copied */ | ||
| 306 | retw | ||
| 307 | |||
| 308 | l_fixup: | ||
| 309 | sub a2, a5, a2 /* a2 <-- bytes copied */ | ||
| 310 | sub a2, a11, a2 /* a2 <-- bytes not copied == return value */ | ||
| 311 | |||
| 312 | /* void *memset(void *s, int c, size_t n); */ | ||
| 313 | mov a6, a5 /* s */ | ||
| 314 | movi a7, 0 /* c */ | ||
| 315 | mov a8, a2 /* n */ | ||
| 316 | l32r a4, .Lmemset | ||
| 317 | callx4 a4 | ||
| 318 | /* Ignore memset return value in a6. */ | ||
| 319 | /* a2 still contains bytes not copied. */ | ||
| 320 | retw | ||
| 321 | |||
