diff options
Diffstat (limited to 'arch/microblaze/lib')
-rw-r--r-- | arch/microblaze/lib/Makefile | 13 | ||||
-rw-r--r-- | arch/microblaze/lib/checksum.c | 163 | ||||
-rw-r--r-- | arch/microblaze/lib/fastcopy.S | 662 | ||||
-rw-r--r-- | arch/microblaze/lib/memcpy.c | 161 | ||||
-rw-r--r-- | arch/microblaze/lib/memmove.c | 175 | ||||
-rw-r--r-- | arch/microblaze/lib/memset.c | 82 | ||||
-rw-r--r-- | arch/microblaze/lib/uaccess.c | 41 |
7 files changed, 1297 insertions, 0 deletions
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile new file mode 100644 index 000000000000..d27126bf306a --- /dev/null +++ b/arch/microblaze/lib/Makefile | |||
@@ -0,0 +1,13 @@ | |||
1 | # | ||
2 | # Makefile | ||
3 | # | ||
4 | |||
5 | lib-y := memset.o checksum.o | ||
6 | |||
7 | ifeq ($(CONFIG_OPT_LIB_ASM),y) | ||
8 | lib-y += fastcopy.o | ||
9 | else | ||
10 | lib-y += memcpy.o memmove.o | ||
11 | endif | ||
12 | |||
13 | lib-y += uaccess.o | ||
diff --git a/arch/microblaze/lib/checksum.c b/arch/microblaze/lib/checksum.c new file mode 100644 index 000000000000..809340070a13 --- /dev/null +++ b/arch/microblaze/lib/checksum.c | |||
@@ -0,0 +1,163 @@ | |||
1 | /* | ||
2 | * | ||
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
4 | * operating system. INET is implemented using the BSD Socket | ||
5 | * interface as the means of communication with the user level. | ||
6 | * | ||
7 | * IP/TCP/UDP checksumming routines | ||
8 | * | ||
9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
11 | * Tom May, <ftom@netcom.com> | ||
12 | * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de> | ||
13 | * Lots of code moved from tcp.c and ip.c; see those files | ||
14 | * for more names. | ||
15 | * | ||
16 | * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek: | ||
17 | * Fixed some nasty bugs, causing some horrible crashes. | ||
18 | * A: At some points, the sum (%0) was used as | ||
19 | * length-counter instead of the length counter | ||
20 | * (%1). Thanks to Roman Hodek for pointing this out. | ||
21 | * B: GCC seems to mess up if one uses too many | ||
22 | * data-registers to hold input values and one tries to | ||
23 | * specify d0 and d1 as scratch registers. Letting gcc | ||
24 | * choose these registers itself solves the problem. | ||
25 | * | ||
26 | * This program is free software; you can redistribute it and/or | ||
27 | * modify it under the terms of the GNU General Public License | ||
28 | * as published by the Free Software Foundation; either version | ||
29 | * 2 of the License, or (at your option) any later version. | ||
30 | */ | ||
31 | |||
32 | /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access | ||
33 | kills, so most of the assembly has to go. */ | ||
34 | |||
35 | #include <net/checksum.h> | ||
36 | #include <asm/checksum.h> | ||
37 | #include <linux/module.h> | ||
38 | |||
39 | static inline unsigned short from32to16(unsigned long x) | ||
40 | { | ||
41 | /* add up 16-bit and 16-bit for 16+c bit */ | ||
42 | x = (x & 0xffff) + (x >> 16); | ||
43 | /* add up carry.. */ | ||
44 | x = (x & 0xffff) + (x >> 16); | ||
45 | return x; | ||
46 | } | ||
47 | |||
48 | static unsigned int do_csum(const unsigned char *buff, int len) | ||
49 | { | ||
50 | int odd, count; | ||
51 | unsigned long result = 0; | ||
52 | |||
53 | if (len <= 0) | ||
54 | goto out; | ||
55 | odd = 1 & (unsigned long) buff; | ||
56 | if (odd) { | ||
57 | result = *buff; | ||
58 | len--; | ||
59 | buff++; | ||
60 | } | ||
61 | count = len >> 1; /* nr of 16-bit words.. */ | ||
62 | if (count) { | ||
63 | if (2 & (unsigned long) buff) { | ||
64 | result += *(unsigned short *) buff; | ||
65 | count--; | ||
66 | len -= 2; | ||
67 | buff += 2; | ||
68 | } | ||
69 | count >>= 1; /* nr of 32-bit words.. */ | ||
70 | if (count) { | ||
71 | unsigned long carry = 0; | ||
72 | do { | ||
73 | unsigned long w = *(unsigned long *) buff; | ||
74 | count--; | ||
75 | buff += 4; | ||
76 | result += carry; | ||
77 | result += w; | ||
78 | carry = (w > result); | ||
79 | } while (count); | ||
80 | result += carry; | ||
81 | result = (result & 0xffff) + (result >> 16); | ||
82 | } | ||
83 | if (len & 2) { | ||
84 | result += *(unsigned short *) buff; | ||
85 | buff += 2; | ||
86 | } | ||
87 | } | ||
88 | if (len & 1) | ||
89 | result += (*buff << 8); | ||
90 | result = from32to16(result); | ||
91 | if (odd) | ||
92 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | ||
93 | out: | ||
94 | return result; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * This is a version of ip_compute_csum() optimized for IP headers, | ||
99 | * which always checksum on 4 octet boundaries. | ||
100 | */ | ||
101 | __sum16 ip_fast_csum(const void *iph, unsigned int ihl) | ||
102 | { | ||
103 | return (__force __sum16)~do_csum(iph, ihl*4); | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * computes the checksum of a memory block at buff, length len, | ||
108 | * and adds in "sum" (32-bit) | ||
109 | * | ||
110 | * returns a 32-bit number suitable for feeding into itself | ||
111 | * or csum_tcpudp_magic | ||
112 | * | ||
113 | * this function must be called with even lengths, except | ||
114 | * for the last fragment, which may be odd | ||
115 | * | ||
116 | * it's best to have buff aligned on a 32-bit boundary | ||
117 | */ | ||
118 | __wsum csum_partial(const void *buff, int len, __wsum sum) | ||
119 | { | ||
120 | unsigned int result = do_csum(buff, len); | ||
121 | |||
122 | /* add in old sum, and carry.. */ | ||
123 | result += sum; | ||
124 | if (sum > result) | ||
125 | result += 1; | ||
126 | return result; | ||
127 | } | ||
128 | EXPORT_SYMBOL(csum_partial); | ||
129 | |||
130 | /* | ||
131 | * this routine is used for miscellaneous IP-like checksums, mainly | ||
132 | * in icmp.c | ||
133 | */ | ||
134 | __sum16 ip_compute_csum(const unsigned char *buff, int len) | ||
135 | { | ||
136 | return ~do_csum(buff, len); | ||
137 | } | ||
138 | EXPORT_SYMBOL(ip_compute_csum); | ||
139 | |||
140 | /* | ||
141 | * copy from fs while checksumming, otherwise like csum_partial | ||
142 | */ | ||
143 | __wsum | ||
144 | csum_partial_copy_from_user(const char __user *src, char *dst, int len, | ||
145 | int sum, int *csum_err) | ||
146 | { | ||
147 | if (csum_err) | ||
148 | *csum_err = 0; | ||
149 | memcpy(dst, src, len); | ||
150 | return csum_partial(dst, len, sum); | ||
151 | } | ||
152 | EXPORT_SYMBOL(csum_partial_copy_from_user); | ||
153 | |||
154 | /* | ||
155 | * copy from ds while checksumming, otherwise like csum_partial | ||
156 | */ | ||
157 | __wsum | ||
158 | csum_partial_copy(const char *src, char *dst, int len, int sum) | ||
159 | { | ||
160 | memcpy(dst, src, len); | ||
161 | return csum_partial(dst, len, sum); | ||
162 | } | ||
163 | EXPORT_SYMBOL(csum_partial_copy); | ||
diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S new file mode 100644 index 000000000000..02e3ab4eddf3 --- /dev/null +++ b/arch/microblaze/lib/fastcopy.S | |||
@@ -0,0 +1,662 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> | ||
3 | * Copyright (C) 2008-2009 PetaLogix | ||
4 | * Copyright (C) 2008 Jim Law - Iris LP All rights reserved. | ||
5 | * | ||
6 | * This file is subject to the terms and conditions of the GNU General | ||
7 | * Public License. See the file COPYING in the main directory of this | ||
8 | * archive for more details. | ||
9 | * | ||
10 | * Written by Jim Law <jlaw@irispower.com> | ||
11 | * | ||
12 | * intended to replace: | ||
13 | * memcpy in memcpy.c and | ||
14 | * memmove in memmove.c | ||
15 | * ... in arch/microblaze/lib | ||
16 | * | ||
17 | * | ||
18 | * assly_fastcopy.S | ||
19 | * | ||
20 | * Attempt at quicker memcpy and memmove for MicroBlaze | ||
21 | * Input : Operand1 in Reg r5 - destination address | ||
22 | * Operand2 in Reg r6 - source address | ||
23 | * Operand3 in Reg r7 - number of bytes to transfer | ||
24 | * Output: Result in Reg r3 - starting destinaition address | ||
25 | * | ||
26 | * | ||
27 | * Explanation: | ||
28 | * Perform (possibly unaligned) copy of a block of memory | ||
29 | * between mem locations with size of xfer spec'd in bytes | ||
30 | */ | ||
31 | |||
32 | #include <linux/linkage.h> | ||
33 | |||
34 | .globl memcpy | ||
35 | .ent memcpy | ||
36 | |||
37 | memcpy: | ||
38 | fast_memcpy_ascending: | ||
39 | /* move d to return register as value of function */ | ||
40 | addi r3, r5, 0 | ||
41 | |||
42 | addi r4, r0, 4 /* n = 4 */ | ||
43 | cmpu r4, r4, r7 /* n = c - n (unsigned) */ | ||
44 | blti r4, a_xfer_end /* if n < 0, less than one word to transfer */ | ||
45 | |||
46 | /* transfer first 0~3 bytes to get aligned dest address */ | ||
47 | andi r4, r5, 3 /* n = d & 3 */ | ||
48 | /* if zero, destination already aligned */ | ||
49 | beqi r4, a_dalign_done | ||
50 | /* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */ | ||
51 | rsubi r4, r4, 4 | ||
52 | rsub r7, r4, r7 /* c = c - n adjust c */ | ||
53 | |||
54 | a_xfer_first_loop: | ||
55 | /* if no bytes left to transfer, transfer the bulk */ | ||
56 | beqi r4, a_dalign_done | ||
57 | lbui r11, r6, 0 /* h = *s */ | ||
58 | sbi r11, r5, 0 /* *d = h */ | ||
59 | addi r6, r6, 1 /* s++ */ | ||
60 | addi r5, r5, 1 /* d++ */ | ||
61 | brid a_xfer_first_loop /* loop */ | ||
62 | addi r4, r4, -1 /* n-- (IN DELAY SLOT) */ | ||
63 | |||
64 | a_dalign_done: | ||
65 | addi r4, r0, 32 /* n = 32 */ | ||
66 | cmpu r4, r4, r7 /* n = c - n (unsigned) */ | ||
67 | /* if n < 0, less than one block to transfer */ | ||
68 | blti r4, a_block_done | ||
69 | |||
70 | a_block_xfer: | ||
71 | andi r4, r7, 0xffffffe0 /* n = c & ~31 */ | ||
72 | rsub r7, r4, r7 /* c = c - n */ | ||
73 | |||
74 | andi r9, r6, 3 /* t1 = s & 3 */ | ||
75 | /* if temp != 0, unaligned transfers needed */ | ||
76 | bnei r9, a_block_unaligned | ||
77 | |||
78 | a_block_aligned: | ||
79 | lwi r9, r6, 0 /* t1 = *(s + 0) */ | ||
80 | lwi r10, r6, 4 /* t2 = *(s + 4) */ | ||
81 | lwi r11, r6, 8 /* t3 = *(s + 8) */ | ||
82 | lwi r12, r6, 12 /* t4 = *(s + 12) */ | ||
83 | swi r9, r5, 0 /* *(d + 0) = t1 */ | ||
84 | swi r10, r5, 4 /* *(d + 4) = t2 */ | ||
85 | swi r11, r5, 8 /* *(d + 8) = t3 */ | ||
86 | swi r12, r5, 12 /* *(d + 12) = t4 */ | ||
87 | lwi r9, r6, 16 /* t1 = *(s + 16) */ | ||
88 | lwi r10, r6, 20 /* t2 = *(s + 20) */ | ||
89 | lwi r11, r6, 24 /* t3 = *(s + 24) */ | ||
90 | lwi r12, r6, 28 /* t4 = *(s + 28) */ | ||
91 | swi r9, r5, 16 /* *(d + 16) = t1 */ | ||
92 | swi r10, r5, 20 /* *(d + 20) = t2 */ | ||
93 | swi r11, r5, 24 /* *(d + 24) = t3 */ | ||
94 | swi r12, r5, 28 /* *(d + 28) = t4 */ | ||
95 | addi r6, r6, 32 /* s = s + 32 */ | ||
96 | addi r4, r4, -32 /* n = n - 32 */ | ||
97 | bneid r4, a_block_aligned /* while (n) loop */ | ||
98 | addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ | ||
99 | bri a_block_done | ||
100 | |||
101 | a_block_unaligned: | ||
102 | andi r8, r6, 0xfffffffc /* as = s & ~3 */ | ||
103 | add r6, r6, r4 /* s = s + n */ | ||
104 | lwi r11, r8, 0 /* h = *(as + 0) */ | ||
105 | |||
106 | addi r9, r9, -1 | ||
107 | beqi r9, a_block_u1 /* t1 was 1 => 1 byte offset */ | ||
108 | addi r9, r9, -1 | ||
109 | beqi r9, a_block_u2 /* t1 was 2 => 2 byte offset */ | ||
110 | |||
111 | a_block_u3: | ||
112 | bslli r11, r11, 24 /* h = h << 24 */ | ||
113 | a_bu3_loop: | ||
114 | lwi r12, r8, 4 /* v = *(as + 4) */ | ||
115 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
116 | or r9, r11, r9 /* t1 = h | t1 */ | ||
117 | swi r9, r5, 0 /* *(d + 0) = t1 */ | ||
118 | bslli r11, r12, 24 /* h = v << 24 */ | ||
119 | lwi r12, r8, 8 /* v = *(as + 8) */ | ||
120 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
121 | or r9, r11, r9 /* t1 = h | t1 */ | ||
122 | swi r9, r5, 4 /* *(d + 4) = t1 */ | ||
123 | bslli r11, r12, 24 /* h = v << 24 */ | ||
124 | lwi r12, r8, 12 /* v = *(as + 12) */ | ||
125 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
126 | or r9, r11, r9 /* t1 = h | t1 */ | ||
127 | swi r9, r5, 8 /* *(d + 8) = t1 */ | ||
128 | bslli r11, r12, 24 /* h = v << 24 */ | ||
129 | lwi r12, r8, 16 /* v = *(as + 16) */ | ||
130 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
131 | or r9, r11, r9 /* t1 = h | t1 */ | ||
132 | swi r9, r5, 12 /* *(d + 12) = t1 */ | ||
133 | bslli r11, r12, 24 /* h = v << 24 */ | ||
134 | lwi r12, r8, 20 /* v = *(as + 20) */ | ||
135 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
136 | or r9, r11, r9 /* t1 = h | t1 */ | ||
137 | swi r9, r5, 16 /* *(d + 16) = t1 */ | ||
138 | bslli r11, r12, 24 /* h = v << 24 */ | ||
139 | lwi r12, r8, 24 /* v = *(as + 24) */ | ||
140 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
141 | or r9, r11, r9 /* t1 = h | t1 */ | ||
142 | swi r9, r5, 20 /* *(d + 20) = t1 */ | ||
143 | bslli r11, r12, 24 /* h = v << 24 */ | ||
144 | lwi r12, r8, 28 /* v = *(as + 28) */ | ||
145 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
146 | or r9, r11, r9 /* t1 = h | t1 */ | ||
147 | swi r9, r5, 24 /* *(d + 24) = t1 */ | ||
148 | bslli r11, r12, 24 /* h = v << 24 */ | ||
149 | lwi r12, r8, 32 /* v = *(as + 32) */ | ||
150 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
151 | or r9, r11, r9 /* t1 = h | t1 */ | ||
152 | swi r9, r5, 28 /* *(d + 28) = t1 */ | ||
153 | bslli r11, r12, 24 /* h = v << 24 */ | ||
154 | addi r8, r8, 32 /* as = as + 32 */ | ||
155 | addi r4, r4, -32 /* n = n - 32 */ | ||
156 | bneid r4, a_bu3_loop /* while (n) loop */ | ||
157 | addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ | ||
158 | bri a_block_done | ||
159 | |||
160 | a_block_u1: | ||
161 | bslli r11, r11, 8 /* h = h << 8 */ | ||
162 | a_bu1_loop: | ||
163 | lwi r12, r8, 4 /* v = *(as + 4) */ | ||
164 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
165 | or r9, r11, r9 /* t1 = h | t1 */ | ||
166 | swi r9, r5, 0 /* *(d + 0) = t1 */ | ||
167 | bslli r11, r12, 8 /* h = v << 8 */ | ||
168 | lwi r12, r8, 8 /* v = *(as + 8) */ | ||
169 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
170 | or r9, r11, r9 /* t1 = h | t1 */ | ||
171 | swi r9, r5, 4 /* *(d + 4) = t1 */ | ||
172 | bslli r11, r12, 8 /* h = v << 8 */ | ||
173 | lwi r12, r8, 12 /* v = *(as + 12) */ | ||
174 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
175 | or r9, r11, r9 /* t1 = h | t1 */ | ||
176 | swi r9, r5, 8 /* *(d + 8) = t1 */ | ||
177 | bslli r11, r12, 8 /* h = v << 8 */ | ||
178 | lwi r12, r8, 16 /* v = *(as + 16) */ | ||
179 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
180 | or r9, r11, r9 /* t1 = h | t1 */ | ||
181 | swi r9, r5, 12 /* *(d + 12) = t1 */ | ||
182 | bslli r11, r12, 8 /* h = v << 8 */ | ||
183 | lwi r12, r8, 20 /* v = *(as + 20) */ | ||
184 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
185 | or r9, r11, r9 /* t1 = h | t1 */ | ||
186 | swi r9, r5, 16 /* *(d + 16) = t1 */ | ||
187 | bslli r11, r12, 8 /* h = v << 8 */ | ||
188 | lwi r12, r8, 24 /* v = *(as + 24) */ | ||
189 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
190 | or r9, r11, r9 /* t1 = h | t1 */ | ||
191 | swi r9, r5, 20 /* *(d + 20) = t1 */ | ||
192 | bslli r11, r12, 8 /* h = v << 8 */ | ||
193 | lwi r12, r8, 28 /* v = *(as + 28) */ | ||
194 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
195 | or r9, r11, r9 /* t1 = h | t1 */ | ||
196 | swi r9, r5, 24 /* *(d + 24) = t1 */ | ||
197 | bslli r11, r12, 8 /* h = v << 8 */ | ||
198 | lwi r12, r8, 32 /* v = *(as + 32) */ | ||
199 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
200 | or r9, r11, r9 /* t1 = h | t1 */ | ||
201 | swi r9, r5, 28 /* *(d + 28) = t1 */ | ||
202 | bslli r11, r12, 8 /* h = v << 8 */ | ||
203 | addi r8, r8, 32 /* as = as + 32 */ | ||
204 | addi r4, r4, -32 /* n = n - 32 */ | ||
205 | bneid r4, a_bu1_loop /* while (n) loop */ | ||
206 | addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ | ||
207 | bri a_block_done | ||
208 | |||
209 | a_block_u2: | ||
210 | bslli r11, r11, 16 /* h = h << 16 */ | ||
211 | a_bu2_loop: | ||
212 | lwi r12, r8, 4 /* v = *(as + 4) */ | ||
213 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
214 | or r9, r11, r9 /* t1 = h | t1 */ | ||
215 | swi r9, r5, 0 /* *(d + 0) = t1 */ | ||
216 | bslli r11, r12, 16 /* h = v << 16 */ | ||
217 | lwi r12, r8, 8 /* v = *(as + 8) */ | ||
218 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
219 | or r9, r11, r9 /* t1 = h | t1 */ | ||
220 | swi r9, r5, 4 /* *(d + 4) = t1 */ | ||
221 | bslli r11, r12, 16 /* h = v << 16 */ | ||
222 | lwi r12, r8, 12 /* v = *(as + 12) */ | ||
223 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
224 | or r9, r11, r9 /* t1 = h | t1 */ | ||
225 | swi r9, r5, 8 /* *(d + 8) = t1 */ | ||
226 | bslli r11, r12, 16 /* h = v << 16 */ | ||
227 | lwi r12, r8, 16 /* v = *(as + 16) */ | ||
228 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
229 | or r9, r11, r9 /* t1 = h | t1 */ | ||
230 | swi r9, r5, 12 /* *(d + 12) = t1 */ | ||
231 | bslli r11, r12, 16 /* h = v << 16 */ | ||
232 | lwi r12, r8, 20 /* v = *(as + 20) */ | ||
233 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
234 | or r9, r11, r9 /* t1 = h | t1 */ | ||
235 | swi r9, r5, 16 /* *(d + 16) = t1 */ | ||
236 | bslli r11, r12, 16 /* h = v << 16 */ | ||
237 | lwi r12, r8, 24 /* v = *(as + 24) */ | ||
238 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
239 | or r9, r11, r9 /* t1 = h | t1 */ | ||
240 | swi r9, r5, 20 /* *(d + 20) = t1 */ | ||
241 | bslli r11, r12, 16 /* h = v << 16 */ | ||
242 | lwi r12, r8, 28 /* v = *(as + 28) */ | ||
243 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
244 | or r9, r11, r9 /* t1 = h | t1 */ | ||
245 | swi r9, r5, 24 /* *(d + 24) = t1 */ | ||
246 | bslli r11, r12, 16 /* h = v << 16 */ | ||
247 | lwi r12, r8, 32 /* v = *(as + 32) */ | ||
248 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
249 | or r9, r11, r9 /* t1 = h | t1 */ | ||
250 | swi r9, r5, 28 /* *(d + 28) = t1 */ | ||
251 | bslli r11, r12, 16 /* h = v << 16 */ | ||
252 | addi r8, r8, 32 /* as = as + 32 */ | ||
253 | addi r4, r4, -32 /* n = n - 32 */ | ||
254 | bneid r4, a_bu2_loop /* while (n) loop */ | ||
255 | addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ | ||
256 | |||
257 | a_block_done: | ||
258 | addi r4, r0, 4 /* n = 4 */ | ||
259 | cmpu r4, r4, r7 /* n = c - n (unsigned) */ | ||
260 | blti r4, a_xfer_end /* if n < 0, less than one word to transfer */ | ||
261 | |||
262 | a_word_xfer: | ||
263 | andi r4, r7, 0xfffffffc /* n = c & ~3 */ | ||
264 | addi r10, r0, 0 /* offset = 0 */ | ||
265 | |||
266 | andi r9, r6, 3 /* t1 = s & 3 */ | ||
267 | /* if temp != 0, unaligned transfers needed */ | ||
268 | bnei r9, a_word_unaligned | ||
269 | |||
270 | a_word_aligned: | ||
271 | lw r9, r6, r10 /* t1 = *(s+offset) */ | ||
272 | sw r9, r5, r10 /* *(d+offset) = t1 */ | ||
273 | addi r4, r4,-4 /* n-- */ | ||
274 | bneid r4, a_word_aligned /* loop */ | ||
275 | addi r10, r10, 4 /* offset++ (IN DELAY SLOT) */ | ||
276 | |||
277 | bri a_word_done | ||
278 | |||
279 | a_word_unaligned: | ||
280 | andi r8, r6, 0xfffffffc /* as = s & ~3 */ | ||
281 | lwi r11, r8, 0 /* h = *(as + 0) */ | ||
282 | addi r8, r8, 4 /* as = as + 4 */ | ||
283 | |||
284 | addi r9, r9, -1 | ||
285 | beqi r9, a_word_u1 /* t1 was 1 => 1 byte offset */ | ||
286 | addi r9, r9, -1 | ||
287 | beqi r9, a_word_u2 /* t1 was 2 => 2 byte offset */ | ||
288 | |||
289 | a_word_u3: | ||
290 | bslli r11, r11, 24 /* h = h << 24 */ | ||
291 | a_wu3_loop: | ||
292 | lw r12, r8, r10 /* v = *(as + offset) */ | ||
293 | bsrli r9, r12, 8 /* t1 = v >> 8 */ | ||
294 | or r9, r11, r9 /* t1 = h | t1 */ | ||
295 | sw r9, r5, r10 /* *(d + offset) = t1 */ | ||
296 | bslli r11, r12, 24 /* h = v << 24 */ | ||
297 | addi r4, r4,-4 /* n = n - 4 */ | ||
298 | bneid r4, a_wu3_loop /* while (n) loop */ | ||
299 | addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ | ||
300 | |||
301 | bri a_word_done | ||
302 | |||
303 | a_word_u1: | ||
304 | bslli r11, r11, 8 /* h = h << 8 */ | ||
305 | a_wu1_loop: | ||
306 | lw r12, r8, r10 /* v = *(as + offset) */ | ||
307 | bsrli r9, r12, 24 /* t1 = v >> 24 */ | ||
308 | or r9, r11, r9 /* t1 = h | t1 */ | ||
309 | sw r9, r5, r10 /* *(d + offset) = t1 */ | ||
310 | bslli r11, r12, 8 /* h = v << 8 */ | ||
311 | addi r4, r4,-4 /* n = n - 4 */ | ||
312 | bneid r4, a_wu1_loop /* while (n) loop */ | ||
313 | addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ | ||
314 | |||
315 | bri a_word_done | ||
316 | |||
317 | a_word_u2: | ||
318 | bslli r11, r11, 16 /* h = h << 16 */ | ||
319 | a_wu2_loop: | ||
320 | lw r12, r8, r10 /* v = *(as + offset) */ | ||
321 | bsrli r9, r12, 16 /* t1 = v >> 16 */ | ||
322 | or r9, r11, r9 /* t1 = h | t1 */ | ||
323 | sw r9, r5, r10 /* *(d + offset) = t1 */ | ||
324 | bslli r11, r12, 16 /* h = v << 16 */ | ||
325 | addi r4, r4,-4 /* n = n - 4 */ | ||
326 | bneid r4, a_wu2_loop /* while (n) loop */ | ||
327 | addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ | ||
328 | |||
329 | a_word_done: | ||
330 | add r5, r5, r10 /* d = d + offset */ | ||
331 | add r6, r6, r10 /* s = s + offset */ | ||
332 | rsub r7, r10, r7 /* c = c - offset */ | ||
333 | |||
334 | a_xfer_end: | ||
335 | a_xfer_end_loop: | ||
336 | beqi r7, a_done /* while (c) */ | ||
337 | lbui r9, r6, 0 /* t1 = *s */ | ||
338 | addi r6, r6, 1 /* s++ */ | ||
339 | sbi r9, r5, 0 /* *d = t1 */ | ||
340 | addi r7, r7, -1 /* c-- */ | ||
341 | brid a_xfer_end_loop /* loop */ | ||
342 | addi r5, r5, 1 /* d++ (IN DELAY SLOT) */ | ||
343 | |||
344 | a_done: | ||
345 | rtsd r15, 8 | ||
346 | nop | ||
347 | |||
348 | .end memcpy | ||
349 | /*----------------------------------------------------------------------------*/ | ||
350 | .globl memmove | ||
351 | .ent memmove | ||
352 | |||
353 | memmove: | ||
354 | cmpu r4, r5, r6 /* n = s - d */ | ||
355 | bgei r4,fast_memcpy_ascending | ||
356 | |||
357 | fast_memcpy_descending: | ||
358 | /* move d to return register as value of function */ | ||
359 | addi r3, r5, 0 | ||
360 | |||
361 | add r5, r5, r7 /* d = d + c */ | ||
362 | add r6, r6, r7 /* s = s + c */ | ||
363 | |||
364 | addi r4, r0, 4 /* n = 4 */ | ||
365 | cmpu r4, r4, r7 /* n = c - n (unsigned) */ | ||
366 | blti r4,d_xfer_end /* if n < 0, less than one word to transfer */ | ||
367 | |||
368 | /* transfer first 0~3 bytes to get aligned dest address */ | ||
369 | andi r4, r5, 3 /* n = d & 3 */ | ||
370 | /* if zero, destination already aligned */ | ||
371 | beqi r4,d_dalign_done | ||
372 | rsub r7, r4, r7 /* c = c - n adjust c */ | ||
373 | |||
374 | d_xfer_first_loop: | ||
375 | /* if no bytes left to transfer, transfer the bulk */ | ||
376 | beqi r4,d_dalign_done | ||
377 | addi r6, r6, -1 /* s-- */ | ||
378 | addi r5, r5, -1 /* d-- */ | ||
379 | lbui r11, r6, 0 /* h = *s */ | ||
380 | sbi r11, r5, 0 /* *d = h */ | ||
381 | brid d_xfer_first_loop /* loop */ | ||
382 | addi r4, r4, -1 /* n-- (IN DELAY SLOT) */ | ||
383 | |||
384 | d_dalign_done: | ||
385 | addi r4, r0, 32 /* n = 32 */ | ||
386 | cmpu r4, r4, r7 /* n = c - n (unsigned) */ | ||
387 | /* if n < 0, less than one block to transfer */ | ||
388 | blti r4, d_block_done | ||
389 | |||
390 | d_block_xfer: | ||
391 | andi r4, r7, 0xffffffe0 /* n = c & ~31 */ | ||
392 | rsub r7, r4, r7 /* c = c - n */ | ||
393 | |||
394 | andi r9, r6, 3 /* t1 = s & 3 */ | ||
395 | /* if temp != 0, unaligned transfers needed */ | ||
396 | bnei r9, d_block_unaligned | ||
397 | |||
398 | d_block_aligned: | ||
399 | addi r6, r6, -32 /* s = s - 32 */ | ||
400 | addi r5, r5, -32 /* d = d - 32 */ | ||
401 | lwi r9, r6, 28 /* t1 = *(s + 28) */ | ||
402 | lwi r10, r6, 24 /* t2 = *(s + 24) */ | ||
403 | lwi r11, r6, 20 /* t3 = *(s + 20) */ | ||
404 | lwi r12, r6, 16 /* t4 = *(s + 16) */ | ||
405 | swi r9, r5, 28 /* *(d + 28) = t1 */ | ||
406 | swi r10, r5, 24 /* *(d + 24) = t2 */ | ||
407 | swi r11, r5, 20 /* *(d + 20) = t3 */ | ||
408 | swi r12, r5, 16 /* *(d + 16) = t4 */ | ||
409 | lwi r9, r6, 12 /* t1 = *(s + 12) */ | ||
410 | lwi r10, r6, 8 /* t2 = *(s + 8) */ | ||
411 | lwi r11, r6, 4 /* t3 = *(s + 4) */ | ||
412 | lwi r12, r6, 0 /* t4 = *(s + 0) */ | ||
413 | swi r9, r5, 12 /* *(d + 12) = t1 */ | ||
414 | swi r10, r5, 8 /* *(d + 8) = t2 */ | ||
415 | swi r11, r5, 4 /* *(d + 4) = t3 */ | ||
416 | addi r4, r4, -32 /* n = n - 32 */ | ||
417 | bneid r4, d_block_aligned /* while (n) loop */ | ||
418 | swi r12, r5, 0 /* *(d + 0) = t4 (IN DELAY SLOT) */ | ||
419 | bri d_block_done | ||
420 | |||
421 | d_block_unaligned: | ||
422 | andi r8, r6, 0xfffffffc /* as = s & ~3 */ | ||
423 | rsub r6, r4, r6 /* s = s - n */ | ||
424 | lwi r11, r8, 0 /* h = *(as + 0) */ | ||
425 | |||
426 | addi r9, r9, -1 | ||
427 | beqi r9,d_block_u1 /* t1 was 1 => 1 byte offset */ | ||
428 | addi r9, r9, -1 | ||
429 | beqi r9,d_block_u2 /* t1 was 2 => 2 byte offset */ | ||
430 | |||
431 | d_block_u3: | ||
432 | bsrli r11, r11, 8 /* h = h >> 8 */ | ||
433 | d_bu3_loop: | ||
434 | addi r8, r8, -32 /* as = as - 32 */ | ||
435 | addi r5, r5, -32 /* d = d - 32 */ | ||
436 | lwi r12, r8, 28 /* v = *(as + 28) */ | ||
437 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
438 | or r9, r11, r9 /* t1 = h | t1 */ | ||
439 | swi r9, r5, 28 /* *(d + 28) = t1 */ | ||
440 | bsrli r11, r12, 8 /* h = v >> 8 */ | ||
441 | lwi r12, r8, 24 /* v = *(as + 24) */ | ||
442 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
443 | or r9, r11, r9 /* t1 = h | t1 */ | ||
444 | swi r9, r5, 24 /* *(d + 24) = t1 */ | ||
445 | bsrli r11, r12, 8 /* h = v >> 8 */ | ||
446 | lwi r12, r8, 20 /* v = *(as + 20) */ | ||
447 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
448 | or r9, r11, r9 /* t1 = h | t1 */ | ||
449 | swi r9, r5, 20 /* *(d + 20) = t1 */ | ||
450 | bsrli r11, r12, 8 /* h = v >> 8 */ | ||
451 | lwi r12, r8, 16 /* v = *(as + 16) */ | ||
452 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
453 | or r9, r11, r9 /* t1 = h | t1 */ | ||
454 | swi r9, r5, 16 /* *(d + 16) = t1 */ | ||
455 | bsrli r11, r12, 8 /* h = v >> 8 */ | ||
456 | lwi r12, r8, 12 /* v = *(as + 12) */ | ||
457 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
458 | or r9, r11, r9 /* t1 = h | t1 */ | ||
459 | swi r9, r5, 12 /* *(d + 112) = t1 */ | ||
460 | bsrli r11, r12, 8 /* h = v >> 8 */ | ||
461 | lwi r12, r8, 8 /* v = *(as + 8) */ | ||
462 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
463 | or r9, r11, r9 /* t1 = h | t1 */ | ||
464 | swi r9, r5, 8 /* *(d + 8) = t1 */ | ||
465 | bsrli r11, r12, 8 /* h = v >> 8 */ | ||
466 | lwi r12, r8, 4 /* v = *(as + 4) */ | ||
467 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
468 | or r9, r11, r9 /* t1 = h | t1 */ | ||
469 | swi r9, r5, 4 /* *(d + 4) = t1 */ | ||
470 | bsrli r11, r12, 8 /* h = v >> 8 */ | ||
471 | lwi r12, r8, 0 /* v = *(as + 0) */ | ||
472 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
473 | or r9, r11, r9 /* t1 = h | t1 */ | ||
474 | swi r9, r5, 0 /* *(d + 0) = t1 */ | ||
475 | addi r4, r4, -32 /* n = n - 32 */ | ||
476 | bneid r4, d_bu3_loop /* while (n) loop */ | ||
477 | bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */ | ||
478 | bri d_block_done | ||
479 | |||
480 | d_block_u1: | ||
481 | bsrli r11, r11, 24 /* h = h >> 24 */ | ||
482 | d_bu1_loop: | ||
483 | addi r8, r8, -32 /* as = as - 32 */ | ||
484 | addi r5, r5, -32 /* d = d - 32 */ | ||
485 | lwi r12, r8, 28 /* v = *(as + 28) */ | ||
486 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
487 | or r9, r11, r9 /* t1 = h | t1 */ | ||
488 | swi r9, r5, 28 /* *(d + 28) = t1 */ | ||
489 | bsrli r11, r12, 24 /* h = v >> 24 */ | ||
490 | lwi r12, r8, 24 /* v = *(as + 24) */ | ||
491 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
492 | or r9, r11, r9 /* t1 = h | t1 */ | ||
493 | swi r9, r5, 24 /* *(d + 24) = t1 */ | ||
494 | bsrli r11, r12, 24 /* h = v >> 24 */ | ||
495 | lwi r12, r8, 20 /* v = *(as + 20) */ | ||
496 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
497 | or r9, r11, r9 /* t1 = h | t1 */ | ||
498 | swi r9, r5, 20 /* *(d + 20) = t1 */ | ||
499 | bsrli r11, r12, 24 /* h = v >> 24 */ | ||
500 | lwi r12, r8, 16 /* v = *(as + 16) */ | ||
501 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
502 | or r9, r11, r9 /* t1 = h | t1 */ | ||
503 | swi r9, r5, 16 /* *(d + 16) = t1 */ | ||
504 | bsrli r11, r12, 24 /* h = v >> 24 */ | ||
505 | lwi r12, r8, 12 /* v = *(as + 12) */ | ||
506 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
507 | or r9, r11, r9 /* t1 = h | t1 */ | ||
508 | swi r9, r5, 12 /* *(d + 112) = t1 */ | ||
509 | bsrli r11, r12, 24 /* h = v >> 24 */ | ||
510 | lwi r12, r8, 8 /* v = *(as + 8) */ | ||
511 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
512 | or r9, r11, r9 /* t1 = h | t1 */ | ||
513 | swi r9, r5, 8 /* *(d + 8) = t1 */ | ||
514 | bsrli r11, r12, 24 /* h = v >> 24 */ | ||
515 | lwi r12, r8, 4 /* v = *(as + 4) */ | ||
516 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
517 | or r9, r11, r9 /* t1 = h | t1 */ | ||
518 | swi r9, r5, 4 /* *(d + 4) = t1 */ | ||
519 | bsrli r11, r12, 24 /* h = v >> 24 */ | ||
520 | lwi r12, r8, 0 /* v = *(as + 0) */ | ||
521 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
522 | or r9, r11, r9 /* t1 = h | t1 */ | ||
523 | swi r9, r5, 0 /* *(d + 0) = t1 */ | ||
524 | addi r4, r4, -32 /* n = n - 32 */ | ||
525 | bneid r4, d_bu1_loop /* while (n) loop */ | ||
526 | bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */ | ||
527 | bri d_block_done | ||
528 | |||
529 | d_block_u2: | ||
530 | bsrli r11, r11, 16 /* h = h >> 16 */ | ||
531 | d_bu2_loop: | ||
532 | addi r8, r8, -32 /* as = as - 32 */ | ||
533 | addi r5, r5, -32 /* d = d - 32 */ | ||
534 | lwi r12, r8, 28 /* v = *(as + 28) */ | ||
535 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
536 | or r9, r11, r9 /* t1 = h | t1 */ | ||
537 | swi r9, r5, 28 /* *(d + 28) = t1 */ | ||
538 | bsrli r11, r12, 16 /* h = v >> 16 */ | ||
539 | lwi r12, r8, 24 /* v = *(as + 24) */ | ||
540 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
541 | or r9, r11, r9 /* t1 = h | t1 */ | ||
542 | swi r9, r5, 24 /* *(d + 24) = t1 */ | ||
543 | bsrli r11, r12, 16 /* h = v >> 16 */ | ||
544 | lwi r12, r8, 20 /* v = *(as + 20) */ | ||
545 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
546 | or r9, r11, r9 /* t1 = h | t1 */ | ||
547 | swi r9, r5, 20 /* *(d + 20) = t1 */ | ||
548 | bsrli r11, r12, 16 /* h = v >> 16 */ | ||
549 | lwi r12, r8, 16 /* v = *(as + 16) */ | ||
550 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
551 | or r9, r11, r9 /* t1 = h | t1 */ | ||
552 | swi r9, r5, 16 /* *(d + 16) = t1 */ | ||
553 | bsrli r11, r12, 16 /* h = v >> 16 */ | ||
554 | lwi r12, r8, 12 /* v = *(as + 12) */ | ||
555 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
556 | or r9, r11, r9 /* t1 = h | t1 */ | ||
557 | swi r9, r5, 12 /* *(d + 112) = t1 */ | ||
558 | bsrli r11, r12, 16 /* h = v >> 16 */ | ||
559 | lwi r12, r8, 8 /* v = *(as + 8) */ | ||
560 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
561 | or r9, r11, r9 /* t1 = h | t1 */ | ||
562 | swi r9, r5, 8 /* *(d + 8) = t1 */ | ||
563 | bsrli r11, r12, 16 /* h = v >> 16 */ | ||
564 | lwi r12, r8, 4 /* v = *(as + 4) */ | ||
565 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
566 | or r9, r11, r9 /* t1 = h | t1 */ | ||
567 | swi r9, r5, 4 /* *(d + 4) = t1 */ | ||
568 | bsrli r11, r12, 16 /* h = v >> 16 */ | ||
569 | lwi r12, r8, 0 /* v = *(as + 0) */ | ||
570 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
571 | or r9, r11, r9 /* t1 = h | t1 */ | ||
572 | swi r9, r5, 0 /* *(d + 0) = t1 */ | ||
573 | addi r4, r4, -32 /* n = n - 32 */ | ||
574 | bneid r4, d_bu2_loop /* while (n) loop */ | ||
575 | bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */ | ||
576 | |||
577 | d_block_done: | ||
578 | addi r4, r0, 4 /* n = 4 */ | ||
579 | cmpu r4, r4, r7 /* n = c - n (unsigned) */ | ||
580 | blti r4,d_xfer_end /* if n < 0, less than one word to transfer */ | ||
581 | |||
582 | d_word_xfer: | ||
583 | andi r4, r7, 0xfffffffc /* n = c & ~3 */ | ||
584 | rsub r5, r4, r5 /* d = d - n */ | ||
585 | rsub r6, r4, r6 /* s = s - n */ | ||
586 | rsub r7, r4, r7 /* c = c - n */ | ||
587 | |||
588 | andi r9, r6, 3 /* t1 = s & 3 */ | ||
589 | /* if temp != 0, unaligned transfers needed */ | ||
590 | bnei r9, d_word_unaligned | ||
591 | |||
592 | d_word_aligned: | ||
593 | addi r4, r4,-4 /* n-- */ | ||
594 | lw r9, r6, r4 /* t1 = *(s+n) */ | ||
595 | bneid r4, d_word_aligned /* loop */ | ||
596 | sw r9, r5, r4 /* *(d+n) = t1 (IN DELAY SLOT) */ | ||
597 | |||
598 | bri d_word_done | ||
599 | |||
600 | d_word_unaligned: | ||
601 | andi r8, r6, 0xfffffffc /* as = s & ~3 */ | ||
602 | lw r11, r8, r4 /* h = *(as + n) */ | ||
603 | |||
604 | addi r9, r9, -1 | ||
605 | beqi r9,d_word_u1 /* t1 was 1 => 1 byte offset */ | ||
606 | addi r9, r9, -1 | ||
607 | beqi r9,d_word_u2 /* t1 was 2 => 2 byte offset */ | ||
608 | |||
609 | d_word_u3: | ||
610 | bsrli r11, r11, 8 /* h = h >> 8 */ | ||
611 | d_wu3_loop: | ||
612 | addi r4, r4,-4 /* n = n - 4 */ | ||
613 | lw r12, r8, r4 /* v = *(as + n) */ | ||
614 | bslli r9, r12, 24 /* t1 = v << 24 */ | ||
615 | or r9, r11, r9 /* t1 = h | t1 */ | ||
616 | sw r9, r5, r4 /* *(d + n) = t1 */ | ||
617 | bneid r4, d_wu3_loop /* while (n) loop */ | ||
618 | bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */ | ||
619 | |||
620 | bri d_word_done | ||
621 | |||
622 | d_word_u1: | ||
623 | bsrli r11, r11, 24 /* h = h >> 24 */ | ||
624 | d_wu1_loop: | ||
625 | addi r4, r4,-4 /* n = n - 4 */ | ||
626 | lw r12, r8, r4 /* v = *(as + n) */ | ||
627 | bslli r9, r12, 8 /* t1 = v << 8 */ | ||
628 | or r9, r11, r9 /* t1 = h | t1 */ | ||
629 | sw r9, r5, r4 /* *(d + n) = t1 */ | ||
630 | bneid r4, d_wu1_loop /* while (n) loop */ | ||
631 | bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */ | ||
632 | |||
633 | bri d_word_done | ||
634 | |||
635 | d_word_u2: | ||
636 | bsrli r11, r11, 16 /* h = h >> 16 */ | ||
637 | d_wu2_loop: | ||
638 | addi r4, r4,-4 /* n = n - 4 */ | ||
639 | lw r12, r8, r4 /* v = *(as + n) */ | ||
640 | bslli r9, r12, 16 /* t1 = v << 16 */ | ||
641 | or r9, r11, r9 /* t1 = h | t1 */ | ||
642 | sw r9, r5, r4 /* *(d + n) = t1 */ | ||
643 | bneid r4, d_wu2_loop /* while (n) loop */ | ||
644 | bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */ | ||
645 | |||
646 | d_word_done: | ||
647 | |||
648 | d_xfer_end: | ||
649 | d_xfer_end_loop: | ||
650 | beqi r7, a_done /* while (c) */ | ||
651 | addi r6, r6, -1 /* s-- */ | ||
652 | lbui r9, r6, 0 /* t1 = *s */ | ||
653 | addi r5, r5, -1 /* d-- */ | ||
654 | sbi r9, r5, 0 /* *d = t1 */ | ||
655 | brid d_xfer_end_loop /* loop */ | ||
656 | addi r7, r7, -1 /* c-- (IN DELAY SLOT) */ | ||
657 | |||
658 | d_done: | ||
659 | rtsd r15, 8 | ||
660 | nop | ||
661 | |||
662 | .end memmove | ||
diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c new file mode 100644 index 000000000000..5880119c4487 --- /dev/null +++ b/arch/microblaze/lib/memcpy.c | |||
@@ -0,0 +1,161 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> | ||
3 | * Copyright (C) 2008-2009 PetaLogix | ||
4 | * Copyright (C) 2007 John Williams | ||
5 | * | ||
6 | * Reasonably optimised generic C-code for memcpy on Microblaze | ||
7 | * This is generic C code to do efficient, alignment-aware memcpy. | ||
8 | * | ||
9 | * It is based on demo code originally Copyright 2001 by Intel Corp, taken from | ||
10 | * http://www.embedded.com/showArticle.jhtml?articleID=19205567 | ||
11 | * | ||
12 | * Attempts were made, unsuccesfully, to contact the original | ||
13 | * author of this code (Michael Morrow, Intel). Below is the original | ||
14 | * copyright notice. | ||
15 | * | ||
16 | * This software has been developed by Intel Corporation. | ||
17 | * Intel specifically disclaims all warranties, express or | ||
18 | * implied, and all liability, including consequential and | ||
19 | * other indirect damages, for the use of this program, including | ||
20 | * liability for infringement of any proprietary rights, | ||
21 | * and including the warranties of merchantability and fitness | ||
22 | * for a particular purpose. Intel does not assume any | ||
23 | * responsibility for and errors which may appear in this program | ||
24 | * not any responsibility to update it. | ||
25 | */ | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/stddef.h> | ||
29 | #include <linux/compiler.h> | ||
30 | #include <linux/module.h> | ||
31 | |||
32 | #include <linux/string.h> | ||
33 | #include <asm/system.h> | ||
34 | |||
35 | #ifdef __HAVE_ARCH_MEMCPY | ||
36 | void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) | ||
37 | { | ||
38 | const char *src = v_src; | ||
39 | char *dst = v_dst; | ||
40 | #ifndef CONFIG_OPT_LIB_FUNCTION | ||
41 | /* Simple, byte oriented memcpy. */ | ||
42 | while (c--) | ||
43 | *dst++ = *src++; | ||
44 | |||
45 | return v_dst; | ||
46 | #else | ||
47 | /* The following code tries to optimize the copy by using unsigned | ||
48 | * alignment. This will work fine if both source and destination are | ||
49 | * aligned on the same boundary. However, if they are aligned on | ||
50 | * different boundaries shifts will be necessary. This might result in | ||
51 | * bad performance on MicroBlaze systems without a barrel shifter. | ||
52 | */ | ||
53 | const uint32_t *i_src; | ||
54 | uint32_t *i_dst; | ||
55 | |||
56 | if (c >= 4) { | ||
57 | unsigned value, buf_hold; | ||
58 | |||
59 | /* Align the dstination to a word boundry. */ | ||
60 | /* This is done in an endian independant manner. */ | ||
61 | switch ((unsigned long)dst & 3) { | ||
62 | case 1: | ||
63 | *dst++ = *src++; | ||
64 | --c; | ||
65 | case 2: | ||
66 | *dst++ = *src++; | ||
67 | --c; | ||
68 | case 3: | ||
69 | *dst++ = *src++; | ||
70 | --c; | ||
71 | } | ||
72 | |||
73 | i_dst = (void *)dst; | ||
74 | |||
75 | /* Choose a copy scheme based on the source */ | ||
76 | /* alignment relative to dstination. */ | ||
77 | switch ((unsigned long)src & 3) { | ||
78 | case 0x0: /* Both byte offsets are aligned */ | ||
79 | i_src = (const void *)src; | ||
80 | |||
81 | for (; c >= 4; c -= 4) | ||
82 | *i_dst++ = *i_src++; | ||
83 | |||
84 | src = (const void *)i_src; | ||
85 | break; | ||
86 | case 0x1: /* Unaligned - Off by 1 */ | ||
87 | /* Word align the source */ | ||
88 | i_src = (const void *) ((unsigned)src & ~3); | ||
89 | |||
90 | /* Load the holding buffer */ | ||
91 | buf_hold = *i_src++ << 8; | ||
92 | |||
93 | for (; c >= 4; c -= 4) { | ||
94 | value = *i_src++; | ||
95 | *i_dst++ = buf_hold | value >> 24; | ||
96 | buf_hold = value << 8; | ||
97 | } | ||
98 | |||
99 | /* Realign the source */ | ||
100 | src = (const void *)i_src; | ||
101 | src -= 3; | ||
102 | break; | ||
103 | case 0x2: /* Unaligned - Off by 2 */ | ||
104 | /* Word align the source */ | ||
105 | i_src = (const void *) ((unsigned)src & ~3); | ||
106 | |||
107 | /* Load the holding buffer */ | ||
108 | buf_hold = *i_src++ << 16; | ||
109 | |||
110 | for (; c >= 4; c -= 4) { | ||
111 | value = *i_src++; | ||
112 | *i_dst++ = buf_hold | value >> 16; | ||
113 | buf_hold = value << 16; | ||
114 | } | ||
115 | |||
116 | /* Realign the source */ | ||
117 | src = (const void *)i_src; | ||
118 | src -= 2; | ||
119 | break; | ||
120 | case 0x3: /* Unaligned - Off by 3 */ | ||
121 | /* Word align the source */ | ||
122 | i_src = (const void *) ((unsigned)src & ~3); | ||
123 | |||
124 | /* Load the holding buffer */ | ||
125 | buf_hold = *i_src++ << 24; | ||
126 | |||
127 | for (; c >= 4; c -= 4) { | ||
128 | value = *i_src++; | ||
129 | *i_dst++ = buf_hold | value >> 8; | ||
130 | buf_hold = value << 24; | ||
131 | } | ||
132 | |||
133 | /* Realign the source */ | ||
134 | src = (const void *)i_src; | ||
135 | src -= 1; | ||
136 | break; | ||
137 | } | ||
138 | dst = (void *)i_dst; | ||
139 | } | ||
140 | |||
141 | /* Finish off any remaining bytes */ | ||
142 | /* simple fast copy, ... unless a cache boundry is crossed */ | ||
143 | switch (c) { | ||
144 | case 3: | ||
145 | *dst++ = *src++; | ||
146 | case 2: | ||
147 | *dst++ = *src++; | ||
148 | case 1: | ||
149 | *dst++ = *src++; | ||
150 | } | ||
151 | |||
152 | return v_dst; | ||
153 | #endif | ||
154 | } | ||
155 | EXPORT_SYMBOL(memcpy); | ||
156 | #endif /* __HAVE_ARCH_MEMCPY */ | ||
157 | |||
158 | void *cacheable_memcpy(void *d, const void *s, __kernel_size_t c) | ||
159 | { | ||
160 | return memcpy(d, s, c); | ||
161 | } | ||
diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c new file mode 100644 index 000000000000..d4e9f49a71f7 --- /dev/null +++ b/arch/microblaze/lib/memmove.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> | ||
3 | * Copyright (C) 2008-2009 PetaLogix | ||
4 | * Copyright (C) 2007 John Williams | ||
5 | * | ||
6 | * Reasonably optimised generic C-code for memcpy on Microblaze | ||
7 | * This is generic C code to do efficient, alignment-aware memmove. | ||
8 | * | ||
9 | * It is based on demo code originally Copyright 2001 by Intel Corp, taken from | ||
10 | * http://www.embedded.com/showArticle.jhtml?articleID=19205567 | ||
11 | * | ||
12 | * Attempts were made, unsuccesfully, to contact the original | ||
13 | * author of this code (Michael Morrow, Intel). Below is the original | ||
14 | * copyright notice. | ||
15 | * | ||
16 | * This software has been developed by Intel Corporation. | ||
17 | * Intel specifically disclaims all warranties, express or | ||
18 | * implied, and all liability, including consequential and | ||
19 | * other indirect damages, for the use of this program, including | ||
20 | * liability for infringement of any proprietary rights, | ||
21 | * and including the warranties of merchantability and fitness | ||
22 | * for a particular purpose. Intel does not assume any | ||
23 | * responsibility for and errors which may appear in this program | ||
24 | * not any responsibility to update it. | ||
25 | */ | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/stddef.h> | ||
29 | #include <linux/compiler.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/string.h> | ||
32 | |||
33 | #ifdef __HAVE_ARCH_MEMMOVE | ||
34 | void *memmove(void *v_dst, const void *v_src, __kernel_size_t c) | ||
35 | { | ||
36 | const char *src = v_src; | ||
37 | char *dst = v_dst; | ||
38 | |||
39 | #ifdef CONFIG_OPT_LIB_FUNCTION | ||
40 | const uint32_t *i_src; | ||
41 | uint32_t *i_dst; | ||
42 | #endif | ||
43 | |||
44 | if (!c) | ||
45 | return v_dst; | ||
46 | |||
47 | /* Use memcpy when source is higher than dest */ | ||
48 | if (v_dst <= v_src) | ||
49 | return memcpy(v_dst, v_src, c); | ||
50 | |||
51 | #ifndef CONFIG_OPT_LIB_FUNCTION | ||
52 | /* copy backwards, from end to beginning */ | ||
53 | src += c; | ||
54 | dst += c; | ||
55 | |||
56 | /* Simple, byte oriented memmove. */ | ||
57 | while (c--) | ||
58 | *--dst = *--src; | ||
59 | |||
60 | return v_dst; | ||
61 | #else | ||
62 | /* The following code tries to optimize the copy by using unsigned | ||
63 | * alignment. This will work fine if both source and destination are | ||
64 | * aligned on the same boundary. However, if they are aligned on | ||
65 | * different boundaries shifts will be necessary. This might result in | ||
66 | * bad performance on MicroBlaze systems without a barrel shifter. | ||
67 | */ | ||
68 | /* FIXME this part needs more test */ | ||
69 | /* Do a descending copy - this is a bit trickier! */ | ||
70 | dst += c; | ||
71 | src += c; | ||
72 | |||
73 | if (c >= 4) { | ||
74 | unsigned value, buf_hold; | ||
75 | |||
76 | /* Align the destination to a word boundry. */ | ||
77 | /* This is done in an endian independant manner. */ | ||
78 | |||
79 | switch ((unsigned long)dst & 3) { | ||
80 | case 3: | ||
81 | *--dst = *--src; | ||
82 | --c; | ||
83 | case 2: | ||
84 | *--dst = *--src; | ||
85 | --c; | ||
86 | case 1: | ||
87 | *--dst = *--src; | ||
88 | --c; | ||
89 | } | ||
90 | |||
91 | i_dst = (void *)dst; | ||
92 | /* Choose a copy scheme based on the source */ | ||
93 | /* alignment relative to dstination. */ | ||
94 | switch ((unsigned long)src & 3) { | ||
95 | case 0x0: /* Both byte offsets are aligned */ | ||
96 | |||
97 | i_src = (const void *)src; | ||
98 | |||
99 | for (; c >= 4; c -= 4) | ||
100 | *--i_dst = *--i_src; | ||
101 | |||
102 | src = (const void *)i_src; | ||
103 | break; | ||
104 | case 0x1: /* Unaligned - Off by 1 */ | ||
105 | /* Word align the source */ | ||
106 | i_src = (const void *) (((unsigned)src + 4) & ~3); | ||
107 | |||
108 | /* Load the holding buffer */ | ||
109 | buf_hold = *--i_src >> 24; | ||
110 | |||
111 | for (; c >= 4; c -= 4) { | ||
112 | value = *--i_src; | ||
113 | *--i_dst = buf_hold << 8 | value; | ||
114 | buf_hold = value >> 24; | ||
115 | } | ||
116 | |||
117 | /* Realign the source */ | ||
118 | src = (const void *)i_src; | ||
119 | src += 1; | ||
120 | break; | ||
121 | case 0x2: /* Unaligned - Off by 2 */ | ||
122 | /* Word align the source */ | ||
123 | i_src = (const void *) (((unsigned)src + 4) & ~3); | ||
124 | |||
125 | /* Load the holding buffer */ | ||
126 | buf_hold = *--i_src >> 16; | ||
127 | |||
128 | for (; c >= 4; c -= 4) { | ||
129 | value = *--i_src; | ||
130 | *--i_dst = buf_hold << 16 | value; | ||
131 | buf_hold = value >> 16; | ||
132 | } | ||
133 | |||
134 | /* Realign the source */ | ||
135 | src = (const void *)i_src; | ||
136 | src += 2; | ||
137 | break; | ||
138 | case 0x3: /* Unaligned - Off by 3 */ | ||
139 | /* Word align the source */ | ||
140 | i_src = (const void *) (((unsigned)src + 4) & ~3); | ||
141 | |||
142 | /* Load the holding buffer */ | ||
143 | buf_hold = *--i_src >> 8; | ||
144 | |||
145 | for (; c >= 4; c -= 4) { | ||
146 | value = *--i_src; | ||
147 | *--i_dst = buf_hold << 24 | value; | ||
148 | buf_hold = value >> 8; | ||
149 | } | ||
150 | |||
151 | /* Realign the source */ | ||
152 | src = (const void *)i_src; | ||
153 | src += 3; | ||
154 | break; | ||
155 | } | ||
156 | dst = (void *)i_dst; | ||
157 | } | ||
158 | |||
159 | /* simple fast copy, ... unless a cache boundry is crossed */ | ||
160 | /* Finish off any remaining bytes */ | ||
161 | switch (c) { | ||
162 | case 4: | ||
163 | *--dst = *--src; | ||
164 | case 3: | ||
165 | *--dst = *--src; | ||
166 | case 2: | ||
167 | *--dst = *--src; | ||
168 | case 1: | ||
169 | *--dst = *--src; | ||
170 | } | ||
171 | return v_dst; | ||
172 | #endif | ||
173 | } | ||
174 | EXPORT_SYMBOL(memmove); | ||
175 | #endif /* __HAVE_ARCH_MEMMOVE */ | ||
diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c new file mode 100644 index 000000000000..941dc8f94b03 --- /dev/null +++ b/arch/microblaze/lib/memset.c | |||
@@ -0,0 +1,82 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> | ||
3 | * Copyright (C) 2008-2009 PetaLogix | ||
4 | * Copyright (C) 2007 John Williams | ||
5 | * | ||
6 | * Reasonably optimised generic C-code for memset on Microblaze | ||
7 | * This is generic C code to do efficient, alignment-aware memcpy. | ||
8 | * | ||
9 | * It is based on demo code originally Copyright 2001 by Intel Corp, taken from | ||
10 | * http://www.embedded.com/showArticle.jhtml?articleID=19205567 | ||
11 | * | ||
12 | * Attempts were made, unsuccesfully, to contact the original | ||
13 | * author of this code (Michael Morrow, Intel). Below is the original | ||
14 | * copyright notice. | ||
15 | * | ||
16 | * This software has been developed by Intel Corporation. | ||
17 | * Intel specifically disclaims all warranties, express or | ||
18 | * implied, and all liability, including consequential and | ||
19 | * other indirect damages, for the use of this program, including | ||
20 | * liability for infringement of any proprietary rights, | ||
21 | * and including the warranties of merchantability and fitness | ||
22 | * for a particular purpose. Intel does not assume any | ||
23 | * responsibility for and errors which may appear in this program | ||
24 | * not any responsibility to update it. | ||
25 | */ | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/stddef.h> | ||
29 | #include <linux/compiler.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/string.h> | ||
32 | |||
33 | #ifdef __HAVE_ARCH_MEMSET | ||
34 | void *memset(void *v_src, int c, __kernel_size_t n) | ||
35 | { | ||
36 | |||
37 | char *src = v_src; | ||
38 | #ifdef CONFIG_OPT_LIB_FUNCTION | ||
39 | uint32_t *i_src; | ||
40 | uint32_t w32; | ||
41 | #endif | ||
42 | /* Truncate c to 8 bits */ | ||
43 | c = (c & 0xFF); | ||
44 | |||
45 | #ifdef CONFIG_OPT_LIB_FUNCTION | ||
46 | /* Make a repeating word out of it */ | ||
47 | w32 = c; | ||
48 | w32 |= w32 << 8; | ||
49 | w32 |= w32 << 16; | ||
50 | |||
51 | if (n >= 4) { | ||
52 | /* Align the destination to a word boundary */ | ||
53 | /* This is done in an endian independant manner */ | ||
54 | switch ((unsigned) src & 3) { | ||
55 | case 1: | ||
56 | *src++ = c; | ||
57 | --n; | ||
58 | case 2: | ||
59 | *src++ = c; | ||
60 | --n; | ||
61 | case 3: | ||
62 | *src++ = c; | ||
63 | --n; | ||
64 | } | ||
65 | |||
66 | i_src = (void *)src; | ||
67 | |||
68 | /* Do as many full-word copies as we can */ | ||
69 | for (; n >= 4; n -= 4) | ||
70 | *i_src++ = w32; | ||
71 | |||
72 | src = (void *)i_src; | ||
73 | } | ||
74 | #endif | ||
75 | /* Simple, byte oriented memset or the rest of count. */ | ||
76 | while (n--) | ||
77 | *src++ = c; | ||
78 | |||
79 | return v_src; | ||
80 | } | ||
81 | EXPORT_SYMBOL(memset); | ||
82 | #endif /* __HAVE_ARCH_MEMSET */ | ||
diff --git a/arch/microblaze/lib/uaccess.c b/arch/microblaze/lib/uaccess.c new file mode 100644 index 000000000000..8eb9df5a26c9 --- /dev/null +++ b/arch/microblaze/lib/uaccess.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006 Atmark Techno, Inc. | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General Public | ||
5 | * License. See the file "COPYING" in the main directory of this archive | ||
6 | * for more details. | ||
7 | */ | ||
8 | |||
9 | #include <linux/string.h> | ||
10 | #include <asm/uaccess.h> | ||
11 | |||
12 | #include <asm/bug.h> | ||
13 | |||
14 | long strnlen_user(const char __user *src, long count) | ||
15 | { | ||
16 | return strlen(src) + 1; | ||
17 | } | ||
18 | |||
19 | #define __do_strncpy_from_user(dst, src, count, res) \ | ||
20 | do { \ | ||
21 | char *tmp; \ | ||
22 | strncpy(dst, src, count); \ | ||
23 | for (tmp = dst; *tmp && count > 0; tmp++, count--) \ | ||
24 | ; \ | ||
25 | res = (tmp - dst); \ | ||
26 | } while (0) | ||
27 | |||
28 | long __strncpy_from_user(char *dst, const char __user *src, long count) | ||
29 | { | ||
30 | long res; | ||
31 | __do_strncpy_from_user(dst, src, count, res); | ||
32 | return res; | ||
33 | } | ||
34 | |||
35 | long strncpy_from_user(char *dst, const char __user *src, long count) | ||
36 | { | ||
37 | long res = -EFAULT; | ||
38 | if (access_ok(VERIFY_READ, src, 1)) | ||
39 | __do_strncpy_from_user(dst, src, count, res); | ||
40 | return res; | ||
41 | } | ||