aboutsummaryrefslogtreecommitdiffstats
path: root/arch/parisc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/parisc/lib')
-rw-r--r--arch/parisc/lib/Makefile2
-rw-r--r--arch/parisc/lib/fixup.S98
-rw-r--r--arch/parisc/lib/lusercopy.S318
-rw-r--r--arch/parisc/lib/memcpy.c461
4 files changed, 322 insertions, 557 deletions
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index 8fa92b8d839a..f2dac4d73b1b 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -2,7 +2,7 @@
2# Makefile for parisc-specific library files 2# Makefile for parisc-specific library files
3# 3#
4 4
5lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \ 5lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
6 ucmpdi2.o delay.o 6 ucmpdi2.o delay.o
7 7
8obj-y := iomap.o 8obj-y := iomap.o
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
deleted file mode 100644
index a5b72f22c7a6..000000000000
--- a/arch/parisc/lib/fixup.S
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3 *
4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * Fixup routines for kernel exception handling.
21 */
22#include <asm/asm-offsets.h>
23#include <asm/assembly.h>
24#include <asm/errno.h>
25#include <linux/linkage.h>
26
27#ifdef CONFIG_SMP
28 .macro get_fault_ip t1 t2
29 loadgp
30 addil LT%__per_cpu_offset,%r27
31 LDREG RT%__per_cpu_offset(%r1),\t1
32 /* t2 = smp_processor_id() */
33 mfctl 30,\t2
34 ldw TI_CPU(\t2),\t2
35#ifdef CONFIG_64BIT
36 extrd,u \t2,63,32,\t2
37#endif
38 /* t2 = &__per_cpu_offset[smp_processor_id()]; */
39 LDREGX \t2(\t1),\t2
40 addil LT%exception_data,%r27
41 LDREG RT%exception_data(%r1),\t1
42 /* t1 = this_cpu_ptr(&exception_data) */
43 add,l \t1,\t2,\t1
44 /* %r27 = t1->fault_gp - restore gp */
45 LDREG EXCDATA_GP(\t1), %r27
46 /* t1 = t1->fault_ip */
47 LDREG EXCDATA_IP(\t1), \t1
48 .endm
49#else
50 .macro get_fault_ip t1 t2
51 loadgp
52 /* t1 = this_cpu_ptr(&exception_data) */
53 addil LT%exception_data,%r27
54 LDREG RT%exception_data(%r1),\t2
55 /* %r27 = t2->fault_gp - restore gp */
56 LDREG EXCDATA_GP(\t2), %r27
57 /* t1 = t2->fault_ip */
58 LDREG EXCDATA_IP(\t2), \t1
59 .endm
60#endif
61
62 .level LEVEL
63
64 .text
65 .section .fixup, "ax"
66
67 /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
68ENTRY_CFI(fixup_get_user_skip_1)
69 get_fault_ip %r1,%r8
70 ldo 4(%r1), %r1
71 ldi -EFAULT, %r8
72 bv %r0(%r1)
73 copy %r0, %r9
74ENDPROC_CFI(fixup_get_user_skip_1)
75
76ENTRY_CFI(fixup_get_user_skip_2)
77 get_fault_ip %r1,%r8
78 ldo 8(%r1), %r1
79 ldi -EFAULT, %r8
80 bv %r0(%r1)
81 copy %r0, %r9
82ENDPROC_CFI(fixup_get_user_skip_2)
83
84 /* put_user() fixups, store -EFAULT in r8 */
85ENTRY_CFI(fixup_put_user_skip_1)
86 get_fault_ip %r1,%r8
87 ldo 4(%r1), %r1
88 bv %r0(%r1)
89 ldi -EFAULT, %r8
90ENDPROC_CFI(fixup_put_user_skip_1)
91
92ENTRY_CFI(fixup_put_user_skip_2)
93 get_fault_ip %r1,%r8
94 ldo 8(%r1), %r1
95 bv %r0(%r1)
96 ldi -EFAULT, %r8
97ENDPROC_CFI(fixup_put_user_skip_2)
98
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 56845de6b5df..f01188c044ee 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -5,6 +5,8 @@
5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
8 * Copyright (C) 2017 Helge Deller <deller@gmx.de>
9 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
8 * 10 *
9 * 11 *
10 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)
132 134
133 .procend 135 .procend
134 136
137
138
139/*
140 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
141 *
142 * Inputs:
143 * - sr1 already contains space of source region
144 * - sr2 already contains space of destination region
145 *
146 * Returns:
147 * - number of bytes that could not be copied.
148 * On success, this will be zero.
149 *
150 * This code is based on a C-implementation of a copy routine written by
151 * Randolph Chung, which in turn was derived from the glibc.
152 *
153 * Several strategies are tried to try to get the best performance for various
154 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
155 * at a time using general registers. Unaligned copies are handled either by
156 * aligning the destination and then using shift-and-write method, or in a few
157 * cases by falling back to a byte-at-a-time copy.
158 *
159 * Testing with various alignments and buffer sizes shows that this code is
160 * often >10x faster than a simple byte-at-a-time copy, even for strangely
161 * aligned operands. It is interesting to note that the glibc version of memcpy
162 * (written in C) is actually quite fast already. This routine is able to beat
163 * it by 30-40% for aligned copies because of the loop unrolling, but in some
164 * cases the glibc version is still slightly faster. This lends more
165 * credibility that gcc can generate very good code as long as we are careful.
166 *
167 * Possible optimizations:
168 * - add cache prefetching
169 * - try not to use the post-increment address modifiers; they may create
170 * additional interlocks. Assumption is that those were only efficient on old
171 * machines (pre PA8000 processors)
172 */
173
174 dst = arg0
175 src = arg1
176 len = arg2
177 end = arg3
178 t1 = r19
179 t2 = r20
180 t3 = r21
181 t4 = r22
182 srcspc = sr1
183 dstspc = sr2
184
185 t0 = r1
186 a1 = t1
187 a2 = t2
188 a3 = t3
189 a0 = t4
190
191 save_src = ret0
192 save_dst = ret1
193 save_len = r31
194
195ENTRY_CFI(pa_memcpy)
196 .proc
197 .callinfo NO_CALLS
198 .entry
199
200 /* Last destination address */
201 add dst,len,end
202
203 /* short copy with less than 16 bytes? */
204 cmpib,>>=,n 15,len,.Lbyte_loop
205
206 /* same alignment? */
207 xor src,dst,t0
208 extru t0,31,2,t1
209 cmpib,<>,n 0,t1,.Lunaligned_copy
210
211#ifdef CONFIG_64BIT
212 /* only do 64-bit copies if we can get aligned. */
213 extru t0,31,3,t1
214 cmpib,<>,n 0,t1,.Lalign_loop32
215
216 /* loop until we are 64-bit aligned */
217.Lalign_loop64:
218 extru dst,31,3,t1
219 cmpib,=,n 0,t1,.Lcopy_loop_16
22020: ldb,ma 1(srcspc,src),t1
22121: stb,ma t1,1(dstspc,dst)
222 b .Lalign_loop64
223 ldo -1(len),len
224
225 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
226 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
227
228 ldi 31,t0
229.Lcopy_loop_16:
230 cmpb,COND(>>=),n t0,len,.Lword_loop
231
23210: ldd 0(srcspc,src),t1
23311: ldd 8(srcspc,src),t2
234 ldo 16(src),src
23512: std,ma t1,8(dstspc,dst)
23613: std,ma t2,8(dstspc,dst)
23714: ldd 0(srcspc,src),t1
23815: ldd 8(srcspc,src),t2
239 ldo 16(src),src
24016: std,ma t1,8(dstspc,dst)
24117: std,ma t2,8(dstspc,dst)
242
243 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
244 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
245 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
246 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
247 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
248 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
249 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
250 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
251
252 b .Lcopy_loop_16
253 ldo -32(len),len
254
255.Lword_loop:
256 cmpib,COND(>>=),n 3,len,.Lbyte_loop
25720: ldw,ma 4(srcspc,src),t1
25821: stw,ma t1,4(dstspc,dst)
259 b .Lword_loop
260 ldo -4(len),len
261
262 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
263 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
264
265#endif /* CONFIG_64BIT */
266
267 /* loop until we are 32-bit aligned */
268.Lalign_loop32:
269 extru dst,31,2,t1
270 cmpib,=,n 0,t1,.Lcopy_loop_4
27120: ldb,ma 1(srcspc,src),t1
27221: stb,ma t1,1(dstspc,dst)
273 b .Lalign_loop32
274 ldo -1(len),len
275
276 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
277 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
278
279
280.Lcopy_loop_4:
281 cmpib,COND(>>=),n 15,len,.Lbyte_loop
282
28310: ldw 0(srcspc,src),t1
28411: ldw 4(srcspc,src),t2
28512: stw,ma t1,4(dstspc,dst)
28613: stw,ma t2,4(dstspc,dst)
28714: ldw 8(srcspc,src),t1
28815: ldw 12(srcspc,src),t2
289 ldo 16(src),src
29016: stw,ma t1,4(dstspc,dst)
29117: stw,ma t2,4(dstspc,dst)
292
293 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
294 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
295 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
296 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
297 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
298 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
299 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
300 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
301
302 b .Lcopy_loop_4
303 ldo -16(len),len
304
305.Lbyte_loop:
306 cmpclr,COND(<>) len,%r0,%r0
307 b,n .Lcopy_done
30820: ldb 0(srcspc,src),t1
309 ldo 1(src),src
31021: stb,ma t1,1(dstspc,dst)
311 b .Lbyte_loop
312 ldo -1(len),len
313
314 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
315 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
316
317.Lcopy_done:
318 bv %r0(%r2)
319 sub end,dst,ret0
320
321
322 /* src and dst are not aligned the same way. */
323 /* need to go the hard way */
324.Lunaligned_copy:
325 /* align until dst is 32bit-word-aligned */
326 extru dst,31,2,t1
327 cmpib,COND(=),n 0,t1,.Lcopy_dstaligned
32820: ldb 0(srcspc,src),t1
329 ldo 1(src),src
33021: stb,ma t1,1(dstspc,dst)
331 b .Lunaligned_copy
332 ldo -1(len),len
333
334 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
335 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
336
337.Lcopy_dstaligned:
338
339 /* store src, dst and len in safe place */
340 copy src,save_src
341 copy dst,save_dst
342 copy len,save_len
343
344 /* len now needs give number of words to copy */
345 SHRREG len,2,len
346
347 /*
348 * Copy from a not-aligned src to an aligned dst using shifts.
349 * Handles 4 words per loop.
350 */
351
352 depw,z src,28,2,t0
353 subi 32,t0,t0
354 mtsar t0
355 extru len,31,2,t0
356 cmpib,= 2,t0,.Lcase2
357 /* Make src aligned by rounding it down. */
358 depi 0,31,2,src
359
360 cmpiclr,<> 3,t0,%r0
361 b,n .Lcase3
362 cmpiclr,<> 1,t0,%r0
363 b,n .Lcase1
364.Lcase0:
365 cmpb,= %r0,len,.Lcda_finish
366 nop
367
3681: ldw,ma 4(srcspc,src), a3
369 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3701: ldw,ma 4(srcspc,src), a0
371 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
372 b,n .Ldo3
373.Lcase1:
3741: ldw,ma 4(srcspc,src), a2
375 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3761: ldw,ma 4(srcspc,src), a3
377 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
378 ldo -1(len),len
379 cmpb,=,n %r0,len,.Ldo0
380.Ldo4:
3811: ldw,ma 4(srcspc,src), a0
382 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
383 shrpw a2, a3, %sar, t0
3841: stw,ma t0, 4(dstspc,dst)
385 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
386.Ldo3:
3871: ldw,ma 4(srcspc,src), a1
388 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
389 shrpw a3, a0, %sar, t0
3901: stw,ma t0, 4(dstspc,dst)
391 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
392.Ldo2:
3931: ldw,ma 4(srcspc,src), a2
394 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
395 shrpw a0, a1, %sar, t0
3961: stw,ma t0, 4(dstspc,dst)
397 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
398.Ldo1:
3991: ldw,ma 4(srcspc,src), a3
400 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
401 shrpw a1, a2, %sar, t0
4021: stw,ma t0, 4(dstspc,dst)
403 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
404 ldo -4(len),len
405 cmpb,<> %r0,len,.Ldo4
406 nop
407.Ldo0:
408 shrpw a2, a3, %sar, t0
4091: stw,ma t0, 4(dstspc,dst)
410 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
411
412.Lcda_rdfault:
413.Lcda_finish:
414 /* calculate new src, dst and len and jump to byte-copy loop */
415 sub dst,save_dst,t0
416 add save_src,t0,src
417 b .Lbyte_loop
418 sub save_len,t0,len
419
420.Lcase3:
4211: ldw,ma 4(srcspc,src), a0
422 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
4231: ldw,ma 4(srcspc,src), a1
424 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
425 b .Ldo2
426 ldo 1(len),len
427.Lcase2:
4281: ldw,ma 4(srcspc,src), a1
429 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
4301: ldw,ma 4(srcspc,src), a2
431 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
432 b .Ldo1
433 ldo 2(len),len
434
435
436 /* fault exception fixup handlers: */
437#ifdef CONFIG_64BIT
438.Lcopy16_fault:
43910: b .Lcopy_done
440 std,ma t1,8(dstspc,dst)
441 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
442#endif
443
444.Lcopy8_fault:
44510: b .Lcopy_done
446 stw,ma t1,4(dstspc,dst)
447 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
448
449 .exit
450ENDPROC_CFI(pa_memcpy)
451 .procend
452
135 .end 453 .end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index f82ff10ed974..b3d47ec1d80a 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -2,7 +2,7 @@
2 * Optimized memory copy routines. 2 * Optimized memory copy routines.
3 * 3 *
4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org> 4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org>
5 * Copyright (C) 2013 Helge Deller <deller@gmx.de> 5 * Copyright (C) 2013-2017 Helge Deller <deller@gmx.de>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -21,474 +21,21 @@
21 * Portions derived from the GNU C Library 21 * Portions derived from the GNU C Library
22 * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. 22 * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
23 * 23 *
24 * Several strategies are tried to try to get the best performance for various
25 * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
26 * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
27 * general registers. Unaligned copies are handled either by aligning the
28 * destination and then using shift-and-write method, or in a few cases by
29 * falling back to a byte-at-a-time copy.
30 *
31 * I chose to implement this in C because it is easier to maintain and debug,
32 * and in my experiments it appears that the C code generated by gcc (3.3/3.4
33 * at the time of writing) is fairly optimal. Unfortunately some of the
34 * semantics of the copy routine (exception handling) is difficult to express
35 * in C, so we have to play some tricks to get it to work.
36 *
37 * All the loads and stores are done via explicit asm() code in order to use
38 * the right space registers.
39 *
40 * Testing with various alignments and buffer sizes shows that this code is
41 * often >10x faster than a simple byte-at-a-time copy, even for strangely
42 * aligned operands. It is interesting to note that the glibc version
43 * of memcpy (written in C) is actually quite fast already. This routine is
44 * able to beat it by 30-40% for aligned copies because of the loop unrolling,
45 * but in some cases the glibc version is still slightly faster. This lends
46 * more credibility that gcc can generate very good code as long as we are
47 * careful.
48 *
49 * TODO:
50 * - cache prefetching needs more experimentation to get optimal settings
51 * - try not to use the post-increment address modifiers; they create additional
52 * interlocks
53 * - replace byte-copy loops with stybs sequences
54 */ 24 */
55 25
56#ifdef __KERNEL__
57#include <linux/module.h> 26#include <linux/module.h>
58#include <linux/compiler.h> 27#include <linux/compiler.h>
59#include <linux/uaccess.h> 28#include <linux/uaccess.h>
60#define s_space "%%sr1"
61#define d_space "%%sr2"
62#else
63#include "memcpy.h"
64#define s_space "%%sr0"
65#define d_space "%%sr0"
66#define pa_memcpy new2_copy
67#endif
68 29
69DECLARE_PER_CPU(struct exception_data, exception_data); 30DECLARE_PER_CPU(struct exception_data, exception_data);
70 31
71#define preserve_branch(label) do { \
72 volatile int dummy = 0; \
73 /* The following branch is never taken, it's just here to */ \
74 /* prevent gcc from optimizing away our exception code. */ \
75 if (unlikely(dummy != dummy)) \
76 goto label; \
77} while (0)
78
79#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) 32#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
80#define get_kernel_space() (0) 33#define get_kernel_space() (0)
81 34
82#define MERGE(w0, sh_1, w1, sh_2) ({ \
83 unsigned int _r; \
84 asm volatile ( \
85 "mtsar %3\n" \
86 "shrpw %1, %2, %%sar, %0\n" \
87 : "=r"(_r) \
88 : "r"(w0), "r"(w1), "r"(sh_2) \
89 ); \
90 _r; \
91})
92#define THRESHOLD 16
93
94#ifdef DEBUG_MEMCPY
95#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
96#else
97#define DPRINTF(fmt, args...)
98#endif
99
100#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
101 __asm__ __volatile__ ( \
102 "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \
103 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
104 : _tt(_t), "+r"(_a) \
105 : \
106 : "r8")
107
108#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
109 __asm__ __volatile__ ( \
110 "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \
111 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
112 : "+r"(_a) \
113 : _tt(_t) \
114 : "r8")
115
116#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
117#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
118#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
119#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
120#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
121#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
122
123#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \
124 __asm__ __volatile__ ( \
125 "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \
126 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
127 : _tt(_t) \
128 : "r"(_a) \
129 : "r8")
130
131#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \
132 __asm__ __volatile__ ( \
133 "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \
134 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
135 : \
136 : _tt(_t), "r"(_a) \
137 : "r8")
138
139#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
140#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e)
141
142#ifdef CONFIG_PREFETCH
143static inline void prefetch_src(const void *addr)
144{
145 __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
146}
147
148static inline void prefetch_dst(const void *addr)
149{
150 __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
151}
152#else
153#define prefetch_src(addr) do { } while(0)
154#define prefetch_dst(addr) do { } while(0)
155#endif
156
157#define PA_MEMCPY_OK 0
158#define PA_MEMCPY_LOAD_ERROR 1
159#define PA_MEMCPY_STORE_ERROR 2
160
161/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
162 * per loop. This code is derived from glibc.
163 */
164static noinline unsigned long copy_dstaligned(unsigned long dst,
165 unsigned long src, unsigned long len)
166{
167 /* gcc complains that a2 and a3 may be uninitialized, but actually
168 * they cannot be. Initialize a2/a3 to shut gcc up.
169 */
170 register unsigned int a0, a1, a2 = 0, a3 = 0;
171 int sh_1, sh_2;
172
173 /* prefetch_src((const void *)src); */
174
175 /* Calculate how to shift a word read at the memory operation
176 aligned srcp to make it aligned for copy. */
177 sh_1 = 8 * (src % sizeof(unsigned int));
178 sh_2 = 8 * sizeof(unsigned int) - sh_1;
179
180 /* Make src aligned by rounding it down. */
181 src &= -sizeof(unsigned int);
182
183 switch (len % 4)
184 {
185 case 2:
186 /* a1 = ((unsigned int *) src)[0];
187 a2 = ((unsigned int *) src)[1]; */
188 ldw(s_space, 0, src, a1, cda_ldw_exc);
189 ldw(s_space, 4, src, a2, cda_ldw_exc);
190 src -= 1 * sizeof(unsigned int);
191 dst -= 3 * sizeof(unsigned int);
192 len += 2;
193 goto do1;
194 case 3:
195 /* a0 = ((unsigned int *) src)[0];
196 a1 = ((unsigned int *) src)[1]; */
197 ldw(s_space, 0, src, a0, cda_ldw_exc);
198 ldw(s_space, 4, src, a1, cda_ldw_exc);
199 src -= 0 * sizeof(unsigned int);
200 dst -= 2 * sizeof(unsigned int);
201 len += 1;
202 goto do2;
203 case 0:
204 if (len == 0)
205 return PA_MEMCPY_OK;
206 /* a3 = ((unsigned int *) src)[0];
207 a0 = ((unsigned int *) src)[1]; */
208 ldw(s_space, 0, src, a3, cda_ldw_exc);
209 ldw(s_space, 4, src, a0, cda_ldw_exc);
210 src -=-1 * sizeof(unsigned int);
211 dst -= 1 * sizeof(unsigned int);
212 len += 0;
213 goto do3;
214 case 1:
215 /* a2 = ((unsigned int *) src)[0];
216 a3 = ((unsigned int *) src)[1]; */
217 ldw(s_space, 0, src, a2, cda_ldw_exc);
218 ldw(s_space, 4, src, a3, cda_ldw_exc);
219 src -=-2 * sizeof(unsigned int);
220 dst -= 0 * sizeof(unsigned int);
221 len -= 1;
222 if (len == 0)
223 goto do0;
224 goto do4; /* No-op. */
225 }
226
227 do
228 {
229 /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
230do4:
231 /* a0 = ((unsigned int *) src)[0]; */
232 ldw(s_space, 0, src, a0, cda_ldw_exc);
233 /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
234 stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
235do3:
236 /* a1 = ((unsigned int *) src)[1]; */
237 ldw(s_space, 4, src, a1, cda_ldw_exc);
238 /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
239 stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
240do2:
241 /* a2 = ((unsigned int *) src)[2]; */
242 ldw(s_space, 8, src, a2, cda_ldw_exc);
243 /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
244 stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
245do1:
246 /* a3 = ((unsigned int *) src)[3]; */
247 ldw(s_space, 12, src, a3, cda_ldw_exc);
248 /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
249 stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
250
251 src += 4 * sizeof(unsigned int);
252 dst += 4 * sizeof(unsigned int);
253 len -= 4;
254 }
255 while (len != 0);
256
257do0:
258 /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
259 stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
260
261 preserve_branch(handle_load_error);
262 preserve_branch(handle_store_error);
263
264 return PA_MEMCPY_OK;
265
266handle_load_error:
267 __asm__ __volatile__ ("cda_ldw_exc:\n");
268 return PA_MEMCPY_LOAD_ERROR;
269
270handle_store_error:
271 __asm__ __volatile__ ("cda_stw_exc:\n");
272 return PA_MEMCPY_STORE_ERROR;
273}
274
275
276/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
277 * In case of an access fault the faulty address can be read from the per_cpu
278 * exception data struct. */
279static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
280 unsigned long len)
281{
282 register unsigned long src, dst, t1, t2, t3;
283 register unsigned char *pcs, *pcd;
284 register unsigned int *pws, *pwd;
285 register double *pds, *pdd;
286 unsigned long ret;
287
288 src = (unsigned long)srcp;
289 dst = (unsigned long)dstp;
290 pcs = (unsigned char *)srcp;
291 pcd = (unsigned char *)dstp;
292
293 /* prefetch_src((const void *)srcp); */
294
295 if (len < THRESHOLD)
296 goto byte_copy;
297
298 /* Check alignment */
299 t1 = (src ^ dst);
300 if (unlikely(t1 & (sizeof(double)-1)))
301 goto unaligned_copy;
302
303 /* src and dst have same alignment. */
304
305 /* Copy bytes till we are double-aligned. */
306 t2 = src & (sizeof(double) - 1);
307 if (unlikely(t2 != 0)) {
308 t2 = sizeof(double) - t2;
309 while (t2 && len) {
310 /* *pcd++ = *pcs++; */
311 ldbma(s_space, pcs, t3, pmc_load_exc);
312 len--;
313 stbma(d_space, t3, pcd, pmc_store_exc);
314 t2--;
315 }
316 }
317
318 pds = (double *)pcs;
319 pdd = (double *)pcd;
320
321#if 0
322 /* Copy 8 doubles at a time */
323 while (len >= 8*sizeof(double)) {
324 register double r1, r2, r3, r4, r5, r6, r7, r8;
325 /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
326 flddma(s_space, pds, r1, pmc_load_exc);
327 flddma(s_space, pds, r2, pmc_load_exc);
328 flddma(s_space, pds, r3, pmc_load_exc);
329 flddma(s_space, pds, r4, pmc_load_exc);
330 fstdma(d_space, r1, pdd, pmc_store_exc);
331 fstdma(d_space, r2, pdd, pmc_store_exc);
332 fstdma(d_space, r3, pdd, pmc_store_exc);
333 fstdma(d_space, r4, pdd, pmc_store_exc);
334
335#if 0
336 if (L1_CACHE_BYTES <= 32)
337 prefetch_src((char *)pds + L1_CACHE_BYTES);
338#endif
339 flddma(s_space, pds, r5, pmc_load_exc);
340 flddma(s_space, pds, r6, pmc_load_exc);
341 flddma(s_space, pds, r7, pmc_load_exc);
342 flddma(s_space, pds, r8, pmc_load_exc);
343 fstdma(d_space, r5, pdd, pmc_store_exc);
344 fstdma(d_space, r6, pdd, pmc_store_exc);
345 fstdma(d_space, r7, pdd, pmc_store_exc);
346 fstdma(d_space, r8, pdd, pmc_store_exc);
347 len -= 8*sizeof(double);
348 }
349#endif
350
351 pws = (unsigned int *)pds;
352 pwd = (unsigned int *)pdd;
353
354word_copy:
355 while (len >= 8*sizeof(unsigned int)) {
356 register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
357 /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
358 ldwma(s_space, pws, r1, pmc_load_exc);
359 ldwma(s_space, pws, r2, pmc_load_exc);
360 ldwma(s_space, pws, r3, pmc_load_exc);
361 ldwma(s_space, pws, r4, pmc_load_exc);
362 stwma(d_space, r1, pwd, pmc_store_exc);
363 stwma(d_space, r2, pwd, pmc_store_exc);
364 stwma(d_space, r3, pwd, pmc_store_exc);
365 stwma(d_space, r4, pwd, pmc_store_exc);
366
367 ldwma(s_space, pws, r5, pmc_load_exc);
368 ldwma(s_space, pws, r6, pmc_load_exc);
369 ldwma(s_space, pws, r7, pmc_load_exc);
370 ldwma(s_space, pws, r8, pmc_load_exc);
371 stwma(d_space, r5, pwd, pmc_store_exc);
372 stwma(d_space, r6, pwd, pmc_store_exc);
373 stwma(d_space, r7, pwd, pmc_store_exc);
374 stwma(d_space, r8, pwd, pmc_store_exc);
375 len -= 8*sizeof(unsigned int);
376 }
377
378 while (len >= 4*sizeof(unsigned int)) {
379 register unsigned int r1,r2,r3,r4;
380 ldwma(s_space, pws, r1, pmc_load_exc);
381 ldwma(s_space, pws, r2, pmc_load_exc);
382 ldwma(s_space, pws, r3, pmc_load_exc);
383 ldwma(s_space, pws, r4, pmc_load_exc);
384 stwma(d_space, r1, pwd, pmc_store_exc);
385 stwma(d_space, r2, pwd, pmc_store_exc);
386 stwma(d_space, r3, pwd, pmc_store_exc);
387 stwma(d_space, r4, pwd, pmc_store_exc);
388 len -= 4*sizeof(unsigned int);
389 }
390
391 pcs = (unsigned char *)pws;
392 pcd = (unsigned char *)pwd;
393
394byte_copy:
395 while (len) {
396 /* *pcd++ = *pcs++; */
397 ldbma(s_space, pcs, t3, pmc_load_exc);
398 stbma(d_space, t3, pcd, pmc_store_exc);
399 len--;
400 }
401
402 return PA_MEMCPY_OK;
403
404unaligned_copy:
405 /* possibly we are aligned on a word, but not on a double... */
406 if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
407 t2 = src & (sizeof(unsigned int) - 1);
408
409 if (unlikely(t2 != 0)) {
410 t2 = sizeof(unsigned int) - t2;
411 while (t2) {
412 /* *pcd++ = *pcs++; */
413 ldbma(s_space, pcs, t3, pmc_load_exc);
414 stbma(d_space, t3, pcd, pmc_store_exc);
415 len--;
416 t2--;
417 }
418 }
419
420 pws = (unsigned int *)pcs;
421 pwd = (unsigned int *)pcd;
422 goto word_copy;
423 }
424
425 /* Align the destination. */
426 if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
427 t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
428 while (t2) {
429 /* *pcd++ = *pcs++; */
430 ldbma(s_space, pcs, t3, pmc_load_exc);
431 stbma(d_space, t3, pcd, pmc_store_exc);
432 len--;
433 t2--;
434 }
435 dst = (unsigned long)pcd;
436 src = (unsigned long)pcs;
437 }
438
439 ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
440 if (ret)
441 return ret;
442
443 pcs += (len & -sizeof(unsigned int));
444 pcd += (len & -sizeof(unsigned int));
445 len %= sizeof(unsigned int);
446
447 preserve_branch(handle_load_error);
448 preserve_branch(handle_store_error);
449
450 goto byte_copy;
451
452handle_load_error:
453 __asm__ __volatile__ ("pmc_load_exc:\n");
454 return PA_MEMCPY_LOAD_ERROR;
455
456handle_store_error:
457 __asm__ __volatile__ ("pmc_store_exc:\n");
458 return PA_MEMCPY_STORE_ERROR;
459}
460
461
462/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ 35/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
463static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 36extern unsigned long pa_memcpy(void *dst, const void *src,
464{ 37 unsigned long len);
465 unsigned long ret, fault_addr, reference;
466 struct exception_data *d;
467
468 ret = pa_memcpy_internal(dstp, srcp, len);
469 if (likely(ret == PA_MEMCPY_OK))
470 return 0;
471
472 /* if a load or store fault occured we can get the faulty addr */
473 d = this_cpu_ptr(&exception_data);
474 fault_addr = d->fault_addr;
475
476 /* error in load or store? */
477 if (ret == PA_MEMCPY_LOAD_ERROR)
478 reference = (unsigned long) srcp;
479 else
480 reference = (unsigned long) dstp;
481 38
482 DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
483 ret, len, fault_addr, reference);
484
485 if (fault_addr >= reference)
486 return len - (fault_addr - reference);
487 else
488 return len;
489}
490
491#ifdef __KERNEL__
492unsigned long __copy_to_user(void __user *dst, const void *src, 39unsigned long __copy_to_user(void __user *dst, const void *src,
493 unsigned long len) 40 unsigned long len)
494{ 41{
@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
537 84
538 return __probe_kernel_read(dst, src, size); 85 return __probe_kernel_read(dst, src, size);
539} 86}
540
541#endif