aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sh/lib')
-rw-r--r--arch/sh/lib/Makefile3
-rw-r--r--arch/sh/lib/clear_page.S152
-rw-r--r--arch/sh/lib/copy_page.S388
3 files changed, 542 insertions, 1 deletions
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile
index 6f7ac9eeb54f..ebb55d1149f5 100644
--- a/arch/sh/lib/Makefile
+++ b/arch/sh/lib/Makefile
@@ -8,6 +8,7 @@ lib-y = delay.o io.o memset.o memmove.o memchr.o \
8memcpy-y := memcpy.o 8memcpy-y := memcpy.o
9memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o 9memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o
10 10
11lib-y += $(memcpy-y) 11lib-$(CONFIG_MMU) += copy_page.o clear_page.o
12lib-y += $(memcpy-y)
12 13
13EXTRA_CFLAGS += -Werror 14EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/lib/clear_page.S b/arch/sh/lib/clear_page.S
new file mode 100644
index 000000000000..7a7c81ee3f01
--- /dev/null
+++ b/arch/sh/lib/clear_page.S
@@ -0,0 +1,152 @@
1/*
2 * __clear_user_page, __clear_user, clear_page implementation of SuperH
3 *
4 * Copyright (C) 2001 Kaz Kojima
5 * Copyright (C) 2001, 2002 Niibe Yutaka
6 * Copyright (C) 2006 Paul Mundt
7 */
8#include <linux/linkage.h>
9#include <asm/page.h>
10
11/*
12 * clear_page_slow
13 * @to: P1 address
14 *
15 * void clear_page_slow(void *to)
16 */
17
18/*
19 * r0 --- scratch
20 * r4 --- to
21 * r5 --- to + PAGE_SIZE
22 */
23ENTRY(clear_page_slow)
24 mov r4,r5
25 mov.l .Llimit,r0
26 add r0,r5
27 mov #0,r0
28 !
291:
30#if defined(CONFIG_CPU_SH3)
31 mov.l r0,@r4
32#elif defined(CONFIG_CPU_SH4)
33 movca.l r0,@r4
34 mov r4,r1
35#endif
36 add #32,r4
37 mov.l r0,@-r4
38 mov.l r0,@-r4
39 mov.l r0,@-r4
40 mov.l r0,@-r4
41 mov.l r0,@-r4
42 mov.l r0,@-r4
43 mov.l r0,@-r4
44#if defined(CONFIG_CPU_SH4)
45 ocbwb @r1
46#endif
47 cmp/eq r5,r4
48 bf/s 1b
49 add #28,r4
50 !
51 rts
52 nop
53.Llimit: .long (PAGE_SIZE-28)
54
55ENTRY(__clear_user)
56 !
57 mov #0, r0
58 mov #0xe0, r1 ! 0xffffffe0
59 !
60 ! r4..(r4+31)&~32 -------- not aligned [ Area 0 ]
61 ! (r4+31)&~32..(r4+r5)&~32 -------- aligned [ Area 1 ]
62 ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ]
63 !
64 ! Clear area 0
65 mov r4, r2
66 !
67 tst r1, r5 ! length < 32
68 bt .Larea2 ! skip to remainder
69 !
70 add #31, r2
71 and r1, r2
72 cmp/eq r4, r2
73 bt .Larea1
74 mov r2, r3
75 sub r4, r3
76 mov r3, r7
77 mov r4, r2
78 !
79.L0: dt r3
800: mov.b r0, @r2
81 bf/s .L0
82 add #1, r2
83 !
84 sub r7, r5
85 mov r2, r4
86.Larea1:
87 mov r4, r3
88 add r5, r3
89 and r1, r3
90 cmp/hi r2, r3
91 bf .Larea2
92 !
93 ! Clear area 1
94#if defined(CONFIG_CPU_SH4)
951: movca.l r0, @r2
96#else
971: mov.l r0, @r2
98#endif
99 add #4, r2
1002: mov.l r0, @r2
101 add #4, r2
1023: mov.l r0, @r2
103 add #4, r2
1044: mov.l r0, @r2
105 add #4, r2
1065: mov.l r0, @r2
107 add #4, r2
1086: mov.l r0, @r2
109 add #4, r2
1107: mov.l r0, @r2
111 add #4, r2
1128: mov.l r0, @r2
113 add #4, r2
114 cmp/hi r2, r3
115 bt/s 1b
116 nop
117 !
118 ! Clear area 2
119.Larea2:
120 mov r4, r3
121 add r5, r3
122 cmp/hs r3, r2
123 bt/s .Ldone
124 sub r2, r3
125.L2: dt r3
1269: mov.b r0, @r2
127 bf/s .L2
128 add #1, r2
129 !
130.Ldone: rts
131 mov #0, r0 ! return 0 as normal return
132
133 ! return the number of bytes remained
134.Lbad_clear_user:
135 mov r4, r0
136 add r5, r0
137 rts
138 sub r2, r0
139
140.section __ex_table,"a"
141 .align 2
142 .long 0b, .Lbad_clear_user
143 .long 1b, .Lbad_clear_user
144 .long 2b, .Lbad_clear_user
145 .long 3b, .Lbad_clear_user
146 .long 4b, .Lbad_clear_user
147 .long 5b, .Lbad_clear_user
148 .long 6b, .Lbad_clear_user
149 .long 7b, .Lbad_clear_user
150 .long 8b, .Lbad_clear_user
151 .long 9b, .Lbad_clear_user
152.previous
diff --git a/arch/sh/lib/copy_page.S b/arch/sh/lib/copy_page.S
new file mode 100644
index 000000000000..b879545fa28b
--- /dev/null
+++ b/arch/sh/lib/copy_page.S
@@ -0,0 +1,388 @@
1/*
2 * copy_page, __copy_user_page, __copy_user implementation of SuperH
3 *
4 * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima
5 * Copyright (C) 2002 Toshinobu Sugioka
6 * Copyright (C) 2006 Paul Mundt
7 */
8#include <linux/linkage.h>
9#include <asm/page.h>
10
11/*
12 * copy_page
13 * @to: P1 address
14 * @from: P1 address
15 *
16 * void copy_page(void *to, void *from)
17 */
18
19/*
20 * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
21 * r8 --- from + PAGE_SIZE
22 * r9 --- not used
23 * r10 --- to
24 * r11 --- from
25 */
26ENTRY(copy_page)
27 mov.l r8,@-r15
28 mov.l r10,@-r15
29 mov.l r11,@-r15
30 mov r4,r10
31 mov r5,r11
32 mov r5,r8
33 mov.l .Lpsz,r0
34 add r0,r8
35 !
361: mov.l @r11+,r0
37 mov.l @r11+,r1
38 mov.l @r11+,r2
39 mov.l @r11+,r3
40 mov.l @r11+,r4
41 mov.l @r11+,r5
42 mov.l @r11+,r6
43 mov.l @r11+,r7
44#if defined(CONFIG_CPU_SH3)
45 mov.l r0,@r10
46#elif defined(CONFIG_CPU_SH4)
47 movca.l r0,@r10
48 mov r10,r0
49#endif
50 add #32,r10
51 mov.l r7,@-r10
52 mov.l r6,@-r10
53 mov.l r5,@-r10
54 mov.l r4,@-r10
55 mov.l r3,@-r10
56 mov.l r2,@-r10
57 mov.l r1,@-r10
58#if defined(CONFIG_CPU_SH4)
59 ocbwb @r0
60#endif
61 cmp/eq r11,r8
62 bf/s 1b
63 add #28,r10
64 !
65 mov.l @r15+,r11
66 mov.l @r15+,r10
67 mov.l @r15+,r8
68 rts
69 nop
70
71 .align 2
72.Lpsz: .long PAGE_SIZE
73/*
74 * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
75 * Return the number of bytes NOT copied
76 */
77#define EX(...) \
78 9999: __VA_ARGS__ ; \
79 .section __ex_table, "a"; \
80 .long 9999b, 6000f ; \
81 .previous
82ENTRY(__copy_user)
83 ! Check if small number of bytes
84 mov #11,r0
85 mov r4,r3
86 cmp/gt r0,r6 ! r6 (len) > r0 (11)
87 bf/s .L_cleanup_loop_no_pop
88 add r6,r3 ! last destination address
89
90 ! Calculate bytes needed to align to src
91 mov.l r11,@-r15
92 neg r5,r0
93 mov.l r10,@-r15
94 add #4,r0
95 mov.l r9,@-r15
96 and #3,r0
97 mov.l r8,@-r15
98 tst r0,r0
99 bt 2f
100
1011:
102 ! Copy bytes to long word align src
103EX( mov.b @r5+,r1 )
104 dt r0
105 add #-1,r6
106EX( mov.b r1,@r4 )
107 bf/s 1b
108 add #1,r4
109
110 ! Jump to appropriate routine depending on dest
1112: mov #3,r1
112 mov r6, r2
113 and r4,r1
114 shlr2 r2
115 shll2 r1
116 mova .L_jump_tbl,r0
117 mov.l @(r0,r1),r1
118 jmp @r1
119 nop
120
121 .align 2
122.L_jump_tbl:
123 .long .L_dest00
124 .long .L_dest01
125 .long .L_dest10
126 .long .L_dest11
127
128/*
129 * Come here if there are less than 12 bytes to copy
130 *
131 * Keep the branch target close, so the bf/s callee doesn't overflow
132 * and result in a more expensive branch being inserted. This is the
133 * fast-path for small copies, the jump via the jump table will hit the
134 * default slow-path cleanup. -PFM.
135 */
136.L_cleanup_loop_no_pop:
137 tst r6,r6 ! Check explicitly for zero
138 bt 1f
139
1402:
141EX( mov.b @r5+,r0 )
142 dt r6
143EX( mov.b r0,@r4 )
144 bf/s 2b
145 add #1,r4
146
1471: mov #0,r0 ! normal return
1485000:
149
150# Exception handler:
151.section .fixup, "ax"
1526000:
153 mov.l 8000f,r1
154 mov r3,r0
155 jmp @r1
156 sub r4,r0
157 .align 2
1588000: .long 5000b
159
160.previous
161 rts
162 nop
163
164! Destination = 00
165
166.L_dest00:
167 ! Skip the large copy for small transfers
168 mov #(32+32-4), r0
169 cmp/gt r6, r0 ! r0 (60) > r6 (len)
170 bt 1f
171
172 ! Align dest to a 32 byte boundary
173 neg r4,r0
174 add #0x20, r0
175 and #0x1f, r0
176 tst r0, r0
177 bt 2f
178
179 sub r0, r6
180 shlr2 r0
1813:
182EX( mov.l @r5+,r1 )
183 dt r0
184EX( mov.l r1,@r4 )
185 bf/s 3b
186 add #4,r4
187
1882:
189EX( mov.l @r5+,r0 )
190EX( mov.l @r5+,r1 )
191EX( mov.l @r5+,r2 )
192EX( mov.l @r5+,r7 )
193EX( mov.l @r5+,r8 )
194EX( mov.l @r5+,r9 )
195EX( mov.l @r5+,r10 )
196EX( mov.l @r5+,r11 )
197#ifdef CONFIG_CPU_SH4
198EX( movca.l r0,@r4 )
199#else
200EX( mov.l r0,@r4 )
201#endif
202 add #-32, r6
203EX( mov.l r1,@(4,r4) )
204 mov #32, r0
205EX( mov.l r2,@(8,r4) )
206 cmp/gt r6, r0 ! r0 (32) > r6 (len)
207EX( mov.l r7,@(12,r4) )
208EX( mov.l r8,@(16,r4) )
209EX( mov.l r9,@(20,r4) )
210EX( mov.l r10,@(24,r4) )
211EX( mov.l r11,@(28,r4) )
212 bf/s 2b
213 add #32,r4
214
2151: mov r6, r0
216 shlr2 r0
217 tst r0, r0
218 bt .L_cleanup
2191:
220EX( mov.l @r5+,r1 )
221 dt r0
222EX( mov.l r1,@r4 )
223 bf/s 1b
224 add #4,r4
225
226 bra .L_cleanup
227 nop
228
229! Destination = 10
230
231.L_dest10:
232 mov r2,r7
233 shlr2 r7
234 shlr r7
235 tst r7,r7
236 mov #7,r0
237 bt/s 1f
238 and r0,r2
2392:
240 dt r7
241#ifdef CONFIG_CPU_LITTLE_ENDIAN
242EX( mov.l @r5+,r0 )
243EX( mov.l @r5+,r1 )
244EX( mov.l @r5+,r8 )
245EX( mov.l @r5+,r9 )
246EX( mov.l @r5+,r10 )
247EX( mov.w r0,@r4 )
248 add #2,r4
249 xtrct r1,r0
250 xtrct r8,r1
251 xtrct r9,r8
252 xtrct r10,r9
253
254EX( mov.l r0,@r4 )
255EX( mov.l r1,@(4,r4) )
256EX( mov.l r8,@(8,r4) )
257EX( mov.l r9,@(12,r4) )
258
259EX( mov.l @r5+,r1 )
260EX( mov.l @r5+,r8 )
261EX( mov.l @r5+,r0 )
262 xtrct r1,r10
263 xtrct r8,r1
264 xtrct r0,r8
265 shlr16 r0
266EX( mov.l r10,@(16,r4) )
267EX( mov.l r1,@(20,r4) )
268EX( mov.l r8,@(24,r4) )
269EX( mov.w r0,@(28,r4) )
270 bf/s 2b
271 add #30,r4
272#else
273EX( mov.l @(28,r5),r0 )
274EX( mov.l @(24,r5),r8 )
275EX( mov.l @(20,r5),r9 )
276EX( mov.l @(16,r5),r10 )
277EX( mov.w r0,@(30,r4) )
278 add #-2,r4
279 xtrct r8,r0
280 xtrct r9,r8
281 xtrct r10,r9
282EX( mov.l r0,@(28,r4) )
283EX( mov.l r8,@(24,r4) )
284EX( mov.l r9,@(20,r4) )
285
286EX( mov.l @(12,r5),r0 )
287EX( mov.l @(8,r5),r8 )
288 xtrct r0,r10
289EX( mov.l @(4,r5),r9 )
290 mov.l r10,@(16,r4)
291EX( mov.l @r5,r10 )
292 xtrct r8,r0
293 xtrct r9,r8
294 xtrct r10,r9
295EX( mov.l r0,@(12,r4) )
296EX( mov.l r8,@(8,r4) )
297 swap.w r10,r0
298EX( mov.l r9,@(4,r4) )
299EX( mov.w r0,@(2,r4) )
300
301 add #32,r5
302 bf/s 2b
303 add #34,r4
304#endif
305 tst r2,r2
306 bt .L_cleanup
307
3081: ! Read longword, write two words per iteration
309EX( mov.l @r5+,r0 )
310 dt r2
311#ifdef CONFIG_CPU_LITTLE_ENDIAN
312EX( mov.w r0,@r4 )
313 shlr16 r0
314EX( mov.w r0,@(2,r4) )
315#else
316EX( mov.w r0,@(2,r4) )
317 shlr16 r0
318EX( mov.w r0,@r4 )
319#endif
320 bf/s 1b
321 add #4,r4
322
323 bra .L_cleanup
324 nop
325
326! Destination = 01 or 11
327
328.L_dest01:
329.L_dest11:
330 ! Read longword, write byte, word, byte per iteration
331EX( mov.l @r5+,r0 )
332 dt r2
333#ifdef CONFIG_CPU_LITTLE_ENDIAN
334EX( mov.b r0,@r4 )
335 shlr8 r0
336 add #1,r4
337EX( mov.w r0,@r4 )
338 shlr16 r0
339EX( mov.b r0,@(2,r4) )
340 bf/s .L_dest01
341 add #3,r4
342#else
343EX( mov.b r0,@(3,r4) )
344 shlr8 r0
345 swap.w r0,r7
346EX( mov.b r7,@r4 )
347 add #1,r4
348EX( mov.w r0,@r4 )
349 bf/s .L_dest01
350 add #3,r4
351#endif
352
353! Cleanup last few bytes
354.L_cleanup:
355 mov r6,r0
356 and #3,r0
357 tst r0,r0
358 bt .L_exit
359 mov r0,r6
360
361.L_cleanup_loop:
362EX( mov.b @r5+,r0 )
363 dt r6
364EX( mov.b r0,@r4 )
365 bf/s .L_cleanup_loop
366 add #1,r4
367
368.L_exit:
369 mov #0,r0 ! normal return
370
3715000:
372
373# Exception handler:
374.section .fixup, "ax"
3756000:
376 mov.l 8000f,r1
377 mov r3,r0
378 jmp @r1
379 sub r4,r0
380 .align 2
3818000: .long 5000b
382
383.previous
384 mov.l @r15+,r8
385 mov.l @r15+,r9
386 mov.l @r15+,r10
387 rts
388 mov.l @r15+,r11