diff options
Diffstat (limited to 'arch/xtensa/lib/strncpy_user.S')
-rw-r--r-- | arch/xtensa/lib/strncpy_user.S | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S new file mode 100644 index 000000000000..71d55df43893 --- /dev/null +++ b/arch/xtensa/lib/strncpy_user.S | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * arch/xtensa/lib/strncpy_user.S | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General | ||
5 | * Public License. See the file "COPYING" in the main directory of | ||
6 | * this archive for more details. | ||
7 | * | ||
8 | * Returns: -EFAULT if exception before terminator, N if the entire | ||
9 | * buffer filled, else strlen. | ||
10 | * | ||
11 | * Copyright (C) 2002 Tensilica Inc. | ||
12 | */ | ||
13 | |||
14 | #include <xtensa/coreasm.h> | ||
15 | #include <linux/errno.h> | ||
16 | |||
17 | /* Load or store instructions that may cause exceptions use the EX macro. */ | ||
18 | |||
19 | #define EX(insn,reg1,reg2,offset,handler) \ | ||
20 | 9: insn reg1, reg2, offset; \ | ||
21 | .section __ex_table, "a"; \ | ||
22 | .word 9b, handler; \ | ||
23 | .previous | ||
24 | |||
25 | /* | ||
26 | * char *__strncpy_user(char *dst, const char *src, size_t len) | ||
27 | */ | ||
28 | .text | ||
29 | .begin literal | ||
30 | .align 4 | ||
31 | .Lmask0: | ||
32 | .byte 0xff, 0x00, 0x00, 0x00 | ||
33 | .Lmask1: | ||
34 | .byte 0x00, 0xff, 0x00, 0x00 | ||
35 | .Lmask2: | ||
36 | .byte 0x00, 0x00, 0xff, 0x00 | ||
37 | .Lmask3: | ||
38 | .byte 0x00, 0x00, 0x00, 0xff | ||
39 | .end literal | ||
40 | |||
41 | # Register use | ||
42 | # a0/ return address | ||
43 | # a1/ stack pointer | ||
44 | # a2/ return value | ||
45 | # a3/ src | ||
46 | # a4/ len | ||
47 | # a5/ mask0 | ||
48 | # a6/ mask1 | ||
49 | # a7/ mask2 | ||
50 | # a8/ mask3 | ||
51 | # a9/ tmp | ||
52 | # a10/ tmp | ||
53 | # a11/ dst | ||
54 | # a12/ tmp | ||
55 | |||
56 | .align 4 | ||
57 | .global __strncpy_user | ||
58 | .type __strncpy_user,@function | ||
59 | __strncpy_user: | ||
60 | entry sp, 16 # minimal stack frame | ||
61 | # a2/ dst, a3/ src, a4/ len | ||
62 | mov a11, a2 # leave dst in return value register | ||
63 | beqz a4, .Lret # if len is zero | ||
64 | l32r a5, .Lmask0 # mask for byte 0 | ||
65 | l32r a6, .Lmask1 # mask for byte 1 | ||
66 | l32r a7, .Lmask2 # mask for byte 2 | ||
67 | l32r a8, .Lmask3 # mask for byte 3 | ||
68 | bbsi.l a3, 0, .Lsrc1mod2 # if only 8-bit aligned | ||
69 | bbsi.l a3, 1, .Lsrc2mod4 # if only 16-bit aligned | ||
70 | .Lsrcaligned: # return here when src is word-aligned | ||
71 | srli a12, a4, 2 # number of loop iterations with 4B per loop | ||
72 | movi a9, 3 | ||
73 | bnone a11, a9, .Laligned | ||
74 | j .Ldstunaligned | ||
75 | |||
76 | .Lsrc1mod2: # src address is odd | ||
77 | EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 | ||
78 | addi a3, a3, 1 # advance src pointer | ||
79 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
80 | beqz a9, .Lret # if byte 0 is zero | ||
81 | addi a11, a11, 1 # advance dst pointer | ||
82 | addi a4, a4, -1 # decrement len | ||
83 | beqz a4, .Lret # if len is zero | ||
84 | bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned | ||
85 | |||
86 | .Lsrc2mod4: # src address is 2 mod 4 | ||
87 | EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 | ||
88 | /* 1-cycle interlock */ | ||
89 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
90 | beqz a9, .Lret # if byte 0 is zero | ||
91 | addi a11, a11, 1 # advance dst pointer | ||
92 | addi a4, a4, -1 # decrement len | ||
93 | beqz a4, .Lret # if len is zero | ||
94 | EX(l8ui, a9, a3, 1, fixup_l) # get byte 0 | ||
95 | addi a3, a3, 2 # advance src pointer | ||
96 | EX(s8i, a9, a11, 0, fixup_s) # store byte 0 | ||
97 | beqz a9, .Lret # if byte 0 is zero | ||
98 | addi a11, a11, 1 # advance dst pointer | ||
99 | addi a4, a4, -1 # decrement len | ||
100 | bnez a4, .Lsrcaligned # if len is nonzero | ||
101 | .Lret: | ||
102 | sub a2, a11, a2 # compute strlen | ||
103 | retw | ||
104 | |||
105 | /* | ||
106 | * dst is word-aligned, src is word-aligned | ||
107 | */ | ||
108 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
109 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
110 | .Laligned: | ||
111 | #if XCHAL_HAVE_LOOPS | ||
112 | loopnez a12, .Loop1done | ||
113 | #else | ||
114 | beqz a12, .Loop1done | ||
115 | slli a12, a12, 2 | ||
116 | add a12, a12, a11 # a12 = end of last 4B chunck | ||
117 | #endif | ||
118 | .Loop1: | ||
119 | EX(l32i, a9, a3, 0, fixup_l) # get word from src | ||
120 | addi a3, a3, 4 # advance src pointer | ||
121 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
122 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
123 | bnone a9, a7, .Lz2 # if byte 2 is zero | ||
124 | EX(s32i, a9, a11, 0, fixup_s) # store word to dst | ||
125 | bnone a9, a8, .Lz3 # if byte 3 is zero | ||
126 | addi a11, a11, 4 # advance dst pointer | ||
127 | #if !XCHAL_HAVE_LOOPS | ||
128 | blt a11, a12, .Loop1 | ||
129 | #endif | ||
130 | |||
131 | .Loop1done: | ||
132 | bbci.l a4, 1, .L100 | ||
133 | # copy 2 bytes | ||
134 | EX(l16ui, a9, a3, 0, fixup_l) | ||
135 | addi a3, a3, 2 # advance src pointer | ||
136 | #ifdef __XTENSA_EB__ | ||
137 | bnone a9, a7, .Lz0 # if byte 2 is zero | ||
138 | bnone a9, a8, .Lz1 # if byte 3 is zero | ||
139 | #else | ||
140 | bnone a9, a5, .Lz0 # if byte 0 is zero | ||
141 | bnone a9, a6, .Lz1 # if byte 1 is zero | ||
142 | #endif | ||
143 | EX(s16i, a9, a11, 0, fixup_s) | ||
144 | addi a11, a11, 2 # advance dst pointer | ||
145 | .L100: | ||
146 | bbci.l a4, 0, .Lret | ||
147 | EX(l8ui, a9, a3, 0, fixup_l) | ||
148 | /* slot */ | ||
149 | EX(s8i, a9, a11, 0, fixup_s) | ||
150 | beqz a9, .Lret # if byte is zero | ||
151 | addi a11, a11, 1-3 # advance dst ptr 1, but also cancel | ||
152 | # the effect of adding 3 in .Lz3 code | ||
153 | /* fall thru to .Lz3 and "retw" */ | ||
154 | |||
155 | .Lz3: # byte 3 is zero | ||
156 | addi a11, a11, 3 # advance dst pointer | ||
157 | sub a2, a11, a2 # compute strlen | ||
158 | retw | ||
159 | .Lz0: # byte 0 is zero | ||
160 | #ifdef __XTENSA_EB__ | ||
161 | movi a9, 0 | ||
162 | #endif /* __XTENSA_EB__ */ | ||
163 | EX(s8i, a9, a11, 0, fixup_s) | ||
164 | sub a2, a11, a2 # compute strlen | ||
165 | retw | ||
166 | .Lz1: # byte 1 is zero | ||
167 | #ifdef __XTENSA_EB__ | ||
168 | extui a9, a9, 16, 16 | ||
169 | #endif /* __XTENSA_EB__ */ | ||
170 | EX(s16i, a9, a11, 0, fixup_s) | ||
171 | addi a11, a11, 1 # advance dst pointer | ||
172 | sub a2, a11, a2 # compute strlen | ||
173 | retw | ||
174 | .Lz2: # byte 2 is zero | ||
175 | #ifdef __XTENSA_EB__ | ||
176 | extui a9, a9, 16, 16 | ||
177 | #endif /* __XTENSA_EB__ */ | ||
178 | EX(s16i, a9, a11, 0, fixup_s) | ||
179 | movi a9, 0 | ||
180 | EX(s8i, a9, a11, 2, fixup_s) | ||
181 | addi a11, a11, 2 # advance dst pointer | ||
182 | sub a2, a11, a2 # compute strlen | ||
183 | retw | ||
184 | |||
185 | .align 4 # 1 mod 4 alignment for LOOPNEZ | ||
186 | .byte 0 # (0 mod 4 alignment for LBEG) | ||
187 | .Ldstunaligned: | ||
188 | /* | ||
189 | * for now just use byte copy loop | ||
190 | */ | ||
191 | #if XCHAL_HAVE_LOOPS | ||
192 | loopnez a4, .Lunalignedend | ||
193 | #else | ||
194 | beqz a4, .Lunalignedend | ||
195 | add a12, a11, a4 # a12 = ending address | ||
196 | #endif /* XCHAL_HAVE_LOOPS */ | ||
197 | .Lnextbyte: | ||
198 | EX(l8ui, a9, a3, 0, fixup_l) | ||
199 | addi a3, a3, 1 | ||
200 | EX(s8i, a9, a11, 0, fixup_s) | ||
201 | beqz a9, .Lunalignedend | ||
202 | addi a11, a11, 1 | ||
203 | #if !XCHAL_HAVE_LOOPS | ||
204 | blt a11, a12, .Lnextbyte | ||
205 | #endif | ||
206 | |||
207 | .Lunalignedend: | ||
208 | sub a2, a11, a2 # compute strlen | ||
209 | retw | ||
210 | |||
211 | |||
212 | .section .fixup, "ax" | ||
213 | .align 4 | ||
214 | |||
215 | /* For now, just return -EFAULT. Future implementations might | ||
216 | * like to clear remaining kernel space, like the fixup | ||
217 | * implementation in memset(). Thus, we differentiate between | ||
218 | * load/store fixups. */ | ||
219 | |||
220 | fixup_s: | ||
221 | fixup_l: | ||
222 | movi a2, -EFAULT | ||
223 | retw | ||
224 | |||