aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-04-01 23:11:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-04-01 23:11:35 -0400
commit346ce1d75c20ac74b131c28022ef814453d37564 (patch)
tree61c8c4e0347b398e03d11acd43a010b511a8130a
parent7d34ddbe476e2f00851002bfdec7cafde02b4311 (diff)
parent476e75a44b56038bee9207242d4bc718f6b4de06 (diff)
Merge branch 'parisc-4.11-3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
Pull parisc fixes from Helge Deller: "Al Viro reported that - in case of read faults - our copy_from_user() implementation may claim to have copied more bytes than it actually did. In order to fix this bug and because of the way how gcc optimizes register usage for inline assembly in C code, we had to replace our pa_memcpy() function with a pure assembler implementation. While fixing the memcpy bug we noticed some other issues with our get_user() and put_user() functions, e.g. nested faults may return wrong data. This is now fixed by a common fixup handler for get_user/put_user in the exception handler which additionally makes generated code smaller and faster. The third patch is a trivial one-line fix for a patch which went in during 4.11-rc and which avoids stalled CPU warnings after power shutdown (for parisc machines which can't plug power off themselves). Due to the rewrite of pa_memcpy() into assembly this patch got bigger than what I wanted to have sent at this stage. Those patches have been running in production during the last few days on our debian build servers without any further issues" * 'parisc-4.11-3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux: parisc: Avoid stalled CPU warnings after system shutdown parisc: Clean up fixup routines for get_user()/put_user() parisc: Fix access fault handling in pa_memcpy()
-rw-r--r--arch/parisc/include/asm/uaccess.h59
-rw-r--r--arch/parisc/kernel/parisc_ksyms.c10
-rw-r--r--arch/parisc/kernel/process.c2
-rw-r--r--arch/parisc/lib/Makefile2
-rw-r--r--arch/parisc/lib/fixup.S98
-rw-r--r--arch/parisc/lib/lusercopy.S318
-rw-r--r--arch/parisc/lib/memcpy.c461
-rw-r--r--arch/parisc/mm/fault.c17
8 files changed, 375 insertions, 592 deletions
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index edfbf9d6a6dd..8442727f28d2 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -65,6 +65,15 @@ struct exception_table_entry {
65 ".previous\n" 65 ".previous\n"
66 66
67/* 67/*
68 * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
69 * (with lowest bit set) for which the fault handler in fixup_exception() will
70 * load -EFAULT into %r8 for a read or write fault, and zeroes the target
71 * register in case of a read fault in get_user().
72 */
73#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
74 ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
75
76/*
68 * The page fault handler stores, in a per-cpu area, the following information 77 * The page fault handler stores, in a per-cpu area, the following information
69 * if a fixup routine is available. 78 * if a fixup routine is available.
70 */ 79 */
@@ -91,7 +100,7 @@ struct exception_data {
91#define __get_user(x, ptr) \ 100#define __get_user(x, ptr) \
92({ \ 101({ \
93 register long __gu_err __asm__ ("r8") = 0; \ 102 register long __gu_err __asm__ ("r8") = 0; \
94 register long __gu_val __asm__ ("r9") = 0; \ 103 register long __gu_val; \
95 \ 104 \
96 load_sr2(); \ 105 load_sr2(); \
97 switch (sizeof(*(ptr))) { \ 106 switch (sizeof(*(ptr))) { \
@@ -107,22 +116,23 @@ struct exception_data {
107}) 116})
108 117
109#define __get_user_asm(ldx, ptr) \ 118#define __get_user_asm(ldx, ptr) \
110 __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t" \ 119 __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \
111 ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\ 120 "9:\n" \
121 ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
112 : "=r"(__gu_val), "=r"(__gu_err) \ 122 : "=r"(__gu_val), "=r"(__gu_err) \
113 : "r"(ptr), "1"(__gu_err) \ 123 : "r"(ptr), "1"(__gu_err));
114 : "r1");
115 124
116#if !defined(CONFIG_64BIT) 125#if !defined(CONFIG_64BIT)
117 126
118#define __get_user_asm64(ptr) \ 127#define __get_user_asm64(ptr) \
119 __asm__("\n1:\tldw 0(%%sr2,%2),%0" \ 128 __asm__(" copy %%r0,%R0\n" \
120 "\n2:\tldw 4(%%sr2,%2),%R0\n\t" \ 129 "1: ldw 0(%%sr2,%2),%0\n" \
121 ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\ 130 "2: ldw 4(%%sr2,%2),%R0\n" \
122 ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\ 131 "9:\n" \
132 ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
133 ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
123 : "=r"(__gu_val), "=r"(__gu_err) \ 134 : "=r"(__gu_val), "=r"(__gu_err) \
124 : "r"(ptr), "1"(__gu_err) \ 135 : "r"(ptr), "1"(__gu_err));
125 : "r1");
126 136
127#endif /* !defined(CONFIG_64BIT) */ 137#endif /* !defined(CONFIG_64BIT) */
128 138
@@ -148,32 +158,31 @@ struct exception_data {
148 * The "__put_user/kernel_asm()" macros tell gcc they read from memory 158 * The "__put_user/kernel_asm()" macros tell gcc they read from memory
149 * instead of writing. This is because they do not write to any memory 159 * instead of writing. This is because they do not write to any memory
150 * gcc knows about, so there are no aliasing issues. These macros must 160 * gcc knows about, so there are no aliasing issues. These macros must
151 * also be aware that "fixup_put_user_skip_[12]" are executed in the 161 * also be aware that fixups are executed in the context of the fault,
152 * context of the fault, and any registers used there must be listed 162 * and any registers used there must be listed as clobbers.
153 * as clobbers. In this case only "r1" is used by the current routines. 163 * r8 is already listed as err.
154 * r8/r9 are already listed as err/val.
155 */ 164 */
156 165
157#define __put_user_asm(stx, x, ptr) \ 166#define __put_user_asm(stx, x, ptr) \
158 __asm__ __volatile__ ( \ 167 __asm__ __volatile__ ( \
159 "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t" \ 168 "1: " stx " %2,0(%%sr2,%1)\n" \
160 ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\ 169 "9:\n" \
170 ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
161 : "=r"(__pu_err) \ 171 : "=r"(__pu_err) \
162 : "r"(ptr), "r"(x), "0"(__pu_err) \ 172 : "r"(ptr), "r"(x), "0"(__pu_err))
163 : "r1")
164 173
165 174
166#if !defined(CONFIG_64BIT) 175#if !defined(CONFIG_64BIT)
167 176
168#define __put_user_asm64(__val, ptr) do { \ 177#define __put_user_asm64(__val, ptr) do { \
169 __asm__ __volatile__ ( \ 178 __asm__ __volatile__ ( \
170 "\n1:\tstw %2,0(%%sr2,%1)" \ 179 "1: stw %2,0(%%sr2,%1)\n" \
171 "\n2:\tstw %R2,4(%%sr2,%1)\n\t" \ 180 "2: stw %R2,4(%%sr2,%1)\n" \
172 ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\ 181 "9:\n" \
173 ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\ 182 ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
183 ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
174 : "=r"(__pu_err) \ 184 : "=r"(__pu_err) \
175 : "r"(ptr), "r"(__val), "0"(__pu_err) \ 185 : "r"(ptr), "r"(__val), "0"(__pu_err)); \
176 : "r1"); \
177} while (0) 186} while (0)
178 187
179#endif /* !defined(CONFIG_64BIT) */ 188#endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7484b3d11e0d..c6d6272a934f 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64);
47EXPORT_SYMBOL(lclear_user); 47EXPORT_SYMBOL(lclear_user);
48EXPORT_SYMBOL(lstrnlen_user); 48EXPORT_SYMBOL(lstrnlen_user);
49 49
50/* Global fixups - defined as int to avoid creation of function pointers */
51extern int fixup_get_user_skip_1;
52extern int fixup_get_user_skip_2;
53extern int fixup_put_user_skip_1;
54extern int fixup_put_user_skip_2;
55EXPORT_SYMBOL(fixup_get_user_skip_1);
56EXPORT_SYMBOL(fixup_get_user_skip_2);
57EXPORT_SYMBOL(fixup_put_user_skip_1);
58EXPORT_SYMBOL(fixup_put_user_skip_2);
59
60#ifndef CONFIG_64BIT 50#ifndef CONFIG_64BIT
61/* Needed so insmod can set dp value */ 51/* Needed so insmod can set dp value */
62extern int $global$; 52extern int $global$;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index b76f503eee4a..4516a5b53f38 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -143,6 +143,8 @@ void machine_power_off(void)
143 printk(KERN_EMERG "System shut down completed.\n" 143 printk(KERN_EMERG "System shut down completed.\n"
144 "Please power this system off now."); 144 "Please power this system off now.");
145 145
146 /* prevent soft lockup/stalled CPU messages for endless loop. */
147 rcu_sysrq_start();
146 for (;;); 148 for (;;);
147} 149}
148 150
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index 8fa92b8d839a..f2dac4d73b1b 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -2,7 +2,7 @@
2# Makefile for parisc-specific library files 2# Makefile for parisc-specific library files
3# 3#
4 4
5lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \ 5lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
6 ucmpdi2.o delay.o 6 ucmpdi2.o delay.o
7 7
8obj-y := iomap.o 8obj-y := iomap.o
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
deleted file mode 100644
index a5b72f22c7a6..000000000000
--- a/arch/parisc/lib/fixup.S
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3 *
4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * Fixup routines for kernel exception handling.
21 */
22#include <asm/asm-offsets.h>
23#include <asm/assembly.h>
24#include <asm/errno.h>
25#include <linux/linkage.h>
26
27#ifdef CONFIG_SMP
28 .macro get_fault_ip t1 t2
29 loadgp
30 addil LT%__per_cpu_offset,%r27
31 LDREG RT%__per_cpu_offset(%r1),\t1
32 /* t2 = smp_processor_id() */
33 mfctl 30,\t2
34 ldw TI_CPU(\t2),\t2
35#ifdef CONFIG_64BIT
36 extrd,u \t2,63,32,\t2
37#endif
38 /* t2 = &__per_cpu_offset[smp_processor_id()]; */
39 LDREGX \t2(\t1),\t2
40 addil LT%exception_data,%r27
41 LDREG RT%exception_data(%r1),\t1
42 /* t1 = this_cpu_ptr(&exception_data) */
43 add,l \t1,\t2,\t1
44 /* %r27 = t1->fault_gp - restore gp */
45 LDREG EXCDATA_GP(\t1), %r27
46 /* t1 = t1->fault_ip */
47 LDREG EXCDATA_IP(\t1), \t1
48 .endm
49#else
50 .macro get_fault_ip t1 t2
51 loadgp
52 /* t1 = this_cpu_ptr(&exception_data) */
53 addil LT%exception_data,%r27
54 LDREG RT%exception_data(%r1),\t2
55 /* %r27 = t2->fault_gp - restore gp */
56 LDREG EXCDATA_GP(\t2), %r27
57 /* t1 = t2->fault_ip */
58 LDREG EXCDATA_IP(\t2), \t1
59 .endm
60#endif
61
62 .level LEVEL
63
64 .text
65 .section .fixup, "ax"
66
67 /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
68ENTRY_CFI(fixup_get_user_skip_1)
69 get_fault_ip %r1,%r8
70 ldo 4(%r1), %r1
71 ldi -EFAULT, %r8
72 bv %r0(%r1)
73 copy %r0, %r9
74ENDPROC_CFI(fixup_get_user_skip_1)
75
76ENTRY_CFI(fixup_get_user_skip_2)
77 get_fault_ip %r1,%r8
78 ldo 8(%r1), %r1
79 ldi -EFAULT, %r8
80 bv %r0(%r1)
81 copy %r0, %r9
82ENDPROC_CFI(fixup_get_user_skip_2)
83
84 /* put_user() fixups, store -EFAULT in r8 */
85ENTRY_CFI(fixup_put_user_skip_1)
86 get_fault_ip %r1,%r8
87 ldo 4(%r1), %r1
88 bv %r0(%r1)
89 ldi -EFAULT, %r8
90ENDPROC_CFI(fixup_put_user_skip_1)
91
92ENTRY_CFI(fixup_put_user_skip_2)
93 get_fault_ip %r1,%r8
94 ldo 8(%r1), %r1
95 bv %r0(%r1)
96 ldi -EFAULT, %r8
97ENDPROC_CFI(fixup_put_user_skip_2)
98
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 56845de6b5df..f01188c044ee 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -5,6 +5,8 @@
5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
8 * Copyright (C) 2017 Helge Deller <deller@gmx.de>
9 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
8 * 10 *
9 * 11 *
10 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)
132 134
133 .procend 135 .procend
134 136
137
138
139/*
140 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
141 *
142 * Inputs:
143 * - sr1 already contains space of source region
144 * - sr2 already contains space of destination region
145 *
146 * Returns:
147 * - number of bytes that could not be copied.
148 * On success, this will be zero.
149 *
150 * This code is based on a C-implementation of a copy routine written by
151 * Randolph Chung, which in turn was derived from the glibc.
152 *
153 * Several strategies are tried to try to get the best performance for various
154 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
155 * at a time using general registers. Unaligned copies are handled either by
156 * aligning the destination and then using shift-and-write method, or in a few
157 * cases by falling back to a byte-at-a-time copy.
158 *
159 * Testing with various alignments and buffer sizes shows that this code is
160 * often >10x faster than a simple byte-at-a-time copy, even for strangely
161 * aligned operands. It is interesting to note that the glibc version of memcpy
162 * (written in C) is actually quite fast already. This routine is able to beat
163 * it by 30-40% for aligned copies because of the loop unrolling, but in some
164 * cases the glibc version is still slightly faster. This lends more
165 * credibility that gcc can generate very good code as long as we are careful.
166 *
167 * Possible optimizations:
168 * - add cache prefetching
169 * - try not to use the post-increment address modifiers; they may create
170 * additional interlocks. Assumption is that those were only efficient on old
171 * machines (pre PA8000 processors)
172 */
173
174 dst = arg0
175 src = arg1
176 len = arg2
177 end = arg3
178 t1 = r19
179 t2 = r20
180 t3 = r21
181 t4 = r22
182 srcspc = sr1
183 dstspc = sr2
184
185 t0 = r1
186 a1 = t1
187 a2 = t2
188 a3 = t3
189 a0 = t4
190
191 save_src = ret0
192 save_dst = ret1
193 save_len = r31
194
195ENTRY_CFI(pa_memcpy)
196 .proc
197 .callinfo NO_CALLS
198 .entry
199
200 /* Last destination address */
201 add dst,len,end
202
203 /* short copy with less than 16 bytes? */
204 cmpib,>>=,n 15,len,.Lbyte_loop
205
206 /* same alignment? */
207 xor src,dst,t0
208 extru t0,31,2,t1
209 cmpib,<>,n 0,t1,.Lunaligned_copy
210
211#ifdef CONFIG_64BIT
212 /* only do 64-bit copies if we can get aligned. */
213 extru t0,31,3,t1
214 cmpib,<>,n 0,t1,.Lalign_loop32
215
216 /* loop until we are 64-bit aligned */
217.Lalign_loop64:
218 extru dst,31,3,t1
219 cmpib,=,n 0,t1,.Lcopy_loop_16
22020: ldb,ma 1(srcspc,src),t1
22121: stb,ma t1,1(dstspc,dst)
222 b .Lalign_loop64
223 ldo -1(len),len
224
225 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
226 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
227
228 ldi 31,t0
229.Lcopy_loop_16:
230 cmpb,COND(>>=),n t0,len,.Lword_loop
231
23210: ldd 0(srcspc,src),t1
23311: ldd 8(srcspc,src),t2
234 ldo 16(src),src
23512: std,ma t1,8(dstspc,dst)
23613: std,ma t2,8(dstspc,dst)
23714: ldd 0(srcspc,src),t1
23815: ldd 8(srcspc,src),t2
239 ldo 16(src),src
24016: std,ma t1,8(dstspc,dst)
24117: std,ma t2,8(dstspc,dst)
242
243 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
244 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
245 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
246 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
247 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
248 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
249 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
250 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
251
252 b .Lcopy_loop_16
253 ldo -32(len),len
254
255.Lword_loop:
256 cmpib,COND(>>=),n 3,len,.Lbyte_loop
25720: ldw,ma 4(srcspc,src),t1
25821: stw,ma t1,4(dstspc,dst)
259 b .Lword_loop
260 ldo -4(len),len
261
262 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
263 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
264
265#endif /* CONFIG_64BIT */
266
267 /* loop until we are 32-bit aligned */
268.Lalign_loop32:
269 extru dst,31,2,t1
270 cmpib,=,n 0,t1,.Lcopy_loop_4
27120: ldb,ma 1(srcspc,src),t1
27221: stb,ma t1,1(dstspc,dst)
273 b .Lalign_loop32
274 ldo -1(len),len
275
276 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
277 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
278
279
280.Lcopy_loop_4:
281 cmpib,COND(>>=),n 15,len,.Lbyte_loop
282
28310: ldw 0(srcspc,src),t1
28411: ldw 4(srcspc,src),t2
28512: stw,ma t1,4(dstspc,dst)
28613: stw,ma t2,4(dstspc,dst)
28714: ldw 8(srcspc,src),t1
28815: ldw 12(srcspc,src),t2
289 ldo 16(src),src
29016: stw,ma t1,4(dstspc,dst)
29117: stw,ma t2,4(dstspc,dst)
292
293 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
294 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
295 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
296 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
297 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
298 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
299 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
300 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
301
302 b .Lcopy_loop_4
303 ldo -16(len),len
304
305.Lbyte_loop:
306 cmpclr,COND(<>) len,%r0,%r0
307 b,n .Lcopy_done
30820: ldb 0(srcspc,src),t1
309 ldo 1(src),src
31021: stb,ma t1,1(dstspc,dst)
311 b .Lbyte_loop
312 ldo -1(len),len
313
314 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
315 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
316
317.Lcopy_done:
318 bv %r0(%r2)
319 sub end,dst,ret0
320
321
322 /* src and dst are not aligned the same way. */
323 /* need to go the hard way */
324.Lunaligned_copy:
325 /* align until dst is 32bit-word-aligned */
326 extru dst,31,2,t1
327 cmpib,COND(=),n 0,t1,.Lcopy_dstaligned
32820: ldb 0(srcspc,src),t1
329 ldo 1(src),src
33021: stb,ma t1,1(dstspc,dst)
331 b .Lunaligned_copy
332 ldo -1(len),len
333
334 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
335 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
336
337.Lcopy_dstaligned:
338
339 /* store src, dst and len in safe place */
340 copy src,save_src
341 copy dst,save_dst
342 copy len,save_len
343
344 /* len now needs give number of words to copy */
345 SHRREG len,2,len
346
347 /*
348 * Copy from a not-aligned src to an aligned dst using shifts.
349 * Handles 4 words per loop.
350 */
351
352 depw,z src,28,2,t0
353 subi 32,t0,t0
354 mtsar t0
355 extru len,31,2,t0
356 cmpib,= 2,t0,.Lcase2
357 /* Make src aligned by rounding it down. */
358 depi 0,31,2,src
359
360 cmpiclr,<> 3,t0,%r0
361 b,n .Lcase3
362 cmpiclr,<> 1,t0,%r0
363 b,n .Lcase1
364.Lcase0:
365 cmpb,= %r0,len,.Lcda_finish
366 nop
367
3681: ldw,ma 4(srcspc,src), a3
369 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3701: ldw,ma 4(srcspc,src), a0
371 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
372 b,n .Ldo3
373.Lcase1:
3741: ldw,ma 4(srcspc,src), a2
375 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3761: ldw,ma 4(srcspc,src), a3
377 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
378 ldo -1(len),len
379 cmpb,=,n %r0,len,.Ldo0
380.Ldo4:
3811: ldw,ma 4(srcspc,src), a0
382 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
383 shrpw a2, a3, %sar, t0
3841: stw,ma t0, 4(dstspc,dst)
385 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
386.Ldo3:
3871: ldw,ma 4(srcspc,src), a1
388 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
389 shrpw a3, a0, %sar, t0
3901: stw,ma t0, 4(dstspc,dst)
391 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
392.Ldo2:
3931: ldw,ma 4(srcspc,src), a2
394 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
395 shrpw a0, a1, %sar, t0
3961: stw,ma t0, 4(dstspc,dst)
397 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
398.Ldo1:
3991: ldw,ma 4(srcspc,src), a3
400 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
401 shrpw a1, a2, %sar, t0
4021: stw,ma t0, 4(dstspc,dst)
403 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
404 ldo -4(len),len
405 cmpb,<> %r0,len,.Ldo4
406 nop
407.Ldo0:
408 shrpw a2, a3, %sar, t0
4091: stw,ma t0, 4(dstspc,dst)
410 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
411
412.Lcda_rdfault:
413.Lcda_finish:
414 /* calculate new src, dst and len and jump to byte-copy loop */
415 sub dst,save_dst,t0
416 add save_src,t0,src
417 b .Lbyte_loop
418 sub save_len,t0,len
419
420.Lcase3:
4211: ldw,ma 4(srcspc,src), a0
422 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
4231: ldw,ma 4(srcspc,src), a1
424 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
425 b .Ldo2
426 ldo 1(len),len
427.Lcase2:
4281: ldw,ma 4(srcspc,src), a1
429 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
4301: ldw,ma 4(srcspc,src), a2
431 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
432 b .Ldo1
433 ldo 2(len),len
434
435
436 /* fault exception fixup handlers: */
437#ifdef CONFIG_64BIT
438.Lcopy16_fault:
43910: b .Lcopy_done
440 std,ma t1,8(dstspc,dst)
441 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
442#endif
443
444.Lcopy8_fault:
44510: b .Lcopy_done
446 stw,ma t1,4(dstspc,dst)
447 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
448
449 .exit
450ENDPROC_CFI(pa_memcpy)
451 .procend
452
135 .end 453 .end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index f82ff10ed974..b3d47ec1d80a 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -2,7 +2,7 @@
2 * Optimized memory copy routines. 2 * Optimized memory copy routines.
3 * 3 *
4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org> 4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org>
5 * Copyright (C) 2013 Helge Deller <deller@gmx.de> 5 * Copyright (C) 2013-2017 Helge Deller <deller@gmx.de>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -21,474 +21,21 @@
21 * Portions derived from the GNU C Library 21 * Portions derived from the GNU C Library
22 * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. 22 * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
23 * 23 *
24 * Several strategies are tried to try to get the best performance for various
25 * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
26 * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
27 * general registers. Unaligned copies are handled either by aligning the
28 * destination and then using shift-and-write method, or in a few cases by
29 * falling back to a byte-at-a-time copy.
30 *
31 * I chose to implement this in C because it is easier to maintain and debug,
32 * and in my experiments it appears that the C code generated by gcc (3.3/3.4
33 * at the time of writing) is fairly optimal. Unfortunately some of the
34 * semantics of the copy routine (exception handling) is difficult to express
35 * in C, so we have to play some tricks to get it to work.
36 *
37 * All the loads and stores are done via explicit asm() code in order to use
38 * the right space registers.
39 *
40 * Testing with various alignments and buffer sizes shows that this code is
41 * often >10x faster than a simple byte-at-a-time copy, even for strangely
42 * aligned operands. It is interesting to note that the glibc version
43 * of memcpy (written in C) is actually quite fast already. This routine is
44 * able to beat it by 30-40% for aligned copies because of the loop unrolling,
45 * but in some cases the glibc version is still slightly faster. This lends
46 * more credibility that gcc can generate very good code as long as we are
47 * careful.
48 *
49 * TODO:
50 * - cache prefetching needs more experimentation to get optimal settings
51 * - try not to use the post-increment address modifiers; they create additional
52 * interlocks
53 * - replace byte-copy loops with stybs sequences
54 */ 24 */
55 25
56#ifdef __KERNEL__
57#include <linux/module.h> 26#include <linux/module.h>
58#include <linux/compiler.h> 27#include <linux/compiler.h>
59#include <linux/uaccess.h> 28#include <linux/uaccess.h>
60#define s_space "%%sr1"
61#define d_space "%%sr2"
62#else
63#include "memcpy.h"
64#define s_space "%%sr0"
65#define d_space "%%sr0"
66#define pa_memcpy new2_copy
67#endif
68 29
69DECLARE_PER_CPU(struct exception_data, exception_data); 30DECLARE_PER_CPU(struct exception_data, exception_data);
70 31
71#define preserve_branch(label) do { \
72 volatile int dummy = 0; \
73 /* The following branch is never taken, it's just here to */ \
74 /* prevent gcc from optimizing away our exception code. */ \
75 if (unlikely(dummy != dummy)) \
76 goto label; \
77} while (0)
78
79#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) 32#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
80#define get_kernel_space() (0) 33#define get_kernel_space() (0)
81 34
82#define MERGE(w0, sh_1, w1, sh_2) ({ \
83 unsigned int _r; \
84 asm volatile ( \
85 "mtsar %3\n" \
86 "shrpw %1, %2, %%sar, %0\n" \
87 : "=r"(_r) \
88 : "r"(w0), "r"(w1), "r"(sh_2) \
89 ); \
90 _r; \
91})
92#define THRESHOLD 16
93
94#ifdef DEBUG_MEMCPY
95#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
96#else
97#define DPRINTF(fmt, args...)
98#endif
99
100#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
101 __asm__ __volatile__ ( \
102 "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \
103 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
104 : _tt(_t), "+r"(_a) \
105 : \
106 : "r8")
107
108#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
109 __asm__ __volatile__ ( \
110 "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \
111 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
112 : "+r"(_a) \
113 : _tt(_t) \
114 : "r8")
115
116#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
117#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
118#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
119#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
120#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
121#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
122
123#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \
124 __asm__ __volatile__ ( \
125 "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \
126 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
127 : _tt(_t) \
128 : "r"(_a) \
129 : "r8")
130
131#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \
132 __asm__ __volatile__ ( \
133 "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \
134 ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
135 : \
136 : _tt(_t), "r"(_a) \
137 : "r8")
138
139#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
140#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e)
141
142#ifdef CONFIG_PREFETCH
143static inline void prefetch_src(const void *addr)
144{
145 __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
146}
147
148static inline void prefetch_dst(const void *addr)
149{
150 __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
151}
152#else
153#define prefetch_src(addr) do { } while(0)
154#define prefetch_dst(addr) do { } while(0)
155#endif
156
157#define PA_MEMCPY_OK 0
158#define PA_MEMCPY_LOAD_ERROR 1
159#define PA_MEMCPY_STORE_ERROR 2
160
161/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
162 * per loop. This code is derived from glibc.
163 */
164static noinline unsigned long copy_dstaligned(unsigned long dst,
165 unsigned long src, unsigned long len)
166{
167 /* gcc complains that a2 and a3 may be uninitialized, but actually
168 * they cannot be. Initialize a2/a3 to shut gcc up.
169 */
170 register unsigned int a0, a1, a2 = 0, a3 = 0;
171 int sh_1, sh_2;
172
173 /* prefetch_src((const void *)src); */
174
175 /* Calculate how to shift a word read at the memory operation
176 aligned srcp to make it aligned for copy. */
177 sh_1 = 8 * (src % sizeof(unsigned int));
178 sh_2 = 8 * sizeof(unsigned int) - sh_1;
179
180 /* Make src aligned by rounding it down. */
181 src &= -sizeof(unsigned int);
182
183 switch (len % 4)
184 {
185 case 2:
186 /* a1 = ((unsigned int *) src)[0];
187 a2 = ((unsigned int *) src)[1]; */
188 ldw(s_space, 0, src, a1, cda_ldw_exc);
189 ldw(s_space, 4, src, a2, cda_ldw_exc);
190 src -= 1 * sizeof(unsigned int);
191 dst -= 3 * sizeof(unsigned int);
192 len += 2;
193 goto do1;
194 case 3:
195 /* a0 = ((unsigned int *) src)[0];
196 a1 = ((unsigned int *) src)[1]; */
197 ldw(s_space, 0, src, a0, cda_ldw_exc);
198 ldw(s_space, 4, src, a1, cda_ldw_exc);
199 src -= 0 * sizeof(unsigned int);
200 dst -= 2 * sizeof(unsigned int);
201 len += 1;
202 goto do2;
203 case 0:
204 if (len == 0)
205 return PA_MEMCPY_OK;
206 /* a3 = ((unsigned int *) src)[0];
207 a0 = ((unsigned int *) src)[1]; */
208 ldw(s_space, 0, src, a3, cda_ldw_exc);
209 ldw(s_space, 4, src, a0, cda_ldw_exc);
210 src -=-1 * sizeof(unsigned int);
211 dst -= 1 * sizeof(unsigned int);
212 len += 0;
213 goto do3;
214 case 1:
215 /* a2 = ((unsigned int *) src)[0];
216 a3 = ((unsigned int *) src)[1]; */
217 ldw(s_space, 0, src, a2, cda_ldw_exc);
218 ldw(s_space, 4, src, a3, cda_ldw_exc);
219 src -=-2 * sizeof(unsigned int);
220 dst -= 0 * sizeof(unsigned int);
221 len -= 1;
222 if (len == 0)
223 goto do0;
224 goto do4; /* No-op. */
225 }
226
227 do
228 {
229 /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
230do4:
231 /* a0 = ((unsigned int *) src)[0]; */
232 ldw(s_space, 0, src, a0, cda_ldw_exc);
233 /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
234 stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
235do3:
236 /* a1 = ((unsigned int *) src)[1]; */
237 ldw(s_space, 4, src, a1, cda_ldw_exc);
238 /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
239 stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
240do2:
241 /* a2 = ((unsigned int *) src)[2]; */
242 ldw(s_space, 8, src, a2, cda_ldw_exc);
243 /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
244 stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
245do1:
246 /* a3 = ((unsigned int *) src)[3]; */
247 ldw(s_space, 12, src, a3, cda_ldw_exc);
248 /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
249 stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
250
251 src += 4 * sizeof(unsigned int);
252 dst += 4 * sizeof(unsigned int);
253 len -= 4;
254 }
255 while (len != 0);
256
257do0:
258 /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
259 stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
260
261 preserve_branch(handle_load_error);
262 preserve_branch(handle_store_error);
263
264 return PA_MEMCPY_OK;
265
266handle_load_error:
267 __asm__ __volatile__ ("cda_ldw_exc:\n");
268 return PA_MEMCPY_LOAD_ERROR;
269
270handle_store_error:
271 __asm__ __volatile__ ("cda_stw_exc:\n");
272 return PA_MEMCPY_STORE_ERROR;
273}
274
275
276/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
277 * In case of an access fault the faulty address can be read from the per_cpu
278 * exception data struct. */
279static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
280 unsigned long len)
281{
282 register unsigned long src, dst, t1, t2, t3;
283 register unsigned char *pcs, *pcd;
284 register unsigned int *pws, *pwd;
285 register double *pds, *pdd;
286 unsigned long ret;
287
288 src = (unsigned long)srcp;
289 dst = (unsigned long)dstp;
290 pcs = (unsigned char *)srcp;
291 pcd = (unsigned char *)dstp;
292
293 /* prefetch_src((const void *)srcp); */
294
295 if (len < THRESHOLD)
296 goto byte_copy;
297
298 /* Check alignment */
299 t1 = (src ^ dst);
300 if (unlikely(t1 & (sizeof(double)-1)))
301 goto unaligned_copy;
302
303 /* src and dst have same alignment. */
304
305 /* Copy bytes till we are double-aligned. */
306 t2 = src & (sizeof(double) - 1);
307 if (unlikely(t2 != 0)) {
308 t2 = sizeof(double) - t2;
309 while (t2 && len) {
310 /* *pcd++ = *pcs++; */
311 ldbma(s_space, pcs, t3, pmc_load_exc);
312 len--;
313 stbma(d_space, t3, pcd, pmc_store_exc);
314 t2--;
315 }
316 }
317
318 pds = (double *)pcs;
319 pdd = (double *)pcd;
320
321#if 0
322 /* Copy 8 doubles at a time */
323 while (len >= 8*sizeof(double)) {
324 register double r1, r2, r3, r4, r5, r6, r7, r8;
325 /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
326 flddma(s_space, pds, r1, pmc_load_exc);
327 flddma(s_space, pds, r2, pmc_load_exc);
328 flddma(s_space, pds, r3, pmc_load_exc);
329 flddma(s_space, pds, r4, pmc_load_exc);
330 fstdma(d_space, r1, pdd, pmc_store_exc);
331 fstdma(d_space, r2, pdd, pmc_store_exc);
332 fstdma(d_space, r3, pdd, pmc_store_exc);
333 fstdma(d_space, r4, pdd, pmc_store_exc);
334
335#if 0
336 if (L1_CACHE_BYTES <= 32)
337 prefetch_src((char *)pds + L1_CACHE_BYTES);
338#endif
339 flddma(s_space, pds, r5, pmc_load_exc);
340 flddma(s_space, pds, r6, pmc_load_exc);
341 flddma(s_space, pds, r7, pmc_load_exc);
342 flddma(s_space, pds, r8, pmc_load_exc);
343 fstdma(d_space, r5, pdd, pmc_store_exc);
344 fstdma(d_space, r6, pdd, pmc_store_exc);
345 fstdma(d_space, r7, pdd, pmc_store_exc);
346 fstdma(d_space, r8, pdd, pmc_store_exc);
347 len -= 8*sizeof(double);
348 }
349#endif
350
351 pws = (unsigned int *)pds;
352 pwd = (unsigned int *)pdd;
353
354word_copy:
355 while (len >= 8*sizeof(unsigned int)) {
356 register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
357 /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
358 ldwma(s_space, pws, r1, pmc_load_exc);
359 ldwma(s_space, pws, r2, pmc_load_exc);
360 ldwma(s_space, pws, r3, pmc_load_exc);
361 ldwma(s_space, pws, r4, pmc_load_exc);
362 stwma(d_space, r1, pwd, pmc_store_exc);
363 stwma(d_space, r2, pwd, pmc_store_exc);
364 stwma(d_space, r3, pwd, pmc_store_exc);
365 stwma(d_space, r4, pwd, pmc_store_exc);
366
367 ldwma(s_space, pws, r5, pmc_load_exc);
368 ldwma(s_space, pws, r6, pmc_load_exc);
369 ldwma(s_space, pws, r7, pmc_load_exc);
370 ldwma(s_space, pws, r8, pmc_load_exc);
371 stwma(d_space, r5, pwd, pmc_store_exc);
372 stwma(d_space, r6, pwd, pmc_store_exc);
373 stwma(d_space, r7, pwd, pmc_store_exc);
374 stwma(d_space, r8, pwd, pmc_store_exc);
375 len -= 8*sizeof(unsigned int);
376 }
377
378 while (len >= 4*sizeof(unsigned int)) {
379 register unsigned int r1,r2,r3,r4;
380 ldwma(s_space, pws, r1, pmc_load_exc);
381 ldwma(s_space, pws, r2, pmc_load_exc);
382 ldwma(s_space, pws, r3, pmc_load_exc);
383 ldwma(s_space, pws, r4, pmc_load_exc);
384 stwma(d_space, r1, pwd, pmc_store_exc);
385 stwma(d_space, r2, pwd, pmc_store_exc);
386 stwma(d_space, r3, pwd, pmc_store_exc);
387 stwma(d_space, r4, pwd, pmc_store_exc);
388 len -= 4*sizeof(unsigned int);
389 }
390
391 pcs = (unsigned char *)pws;
392 pcd = (unsigned char *)pwd;
393
394byte_copy:
395 while (len) {
396 /* *pcd++ = *pcs++; */
397 ldbma(s_space, pcs, t3, pmc_load_exc);
398 stbma(d_space, t3, pcd, pmc_store_exc);
399 len--;
400 }
401
402 return PA_MEMCPY_OK;
403
404unaligned_copy:
405 /* possibly we are aligned on a word, but not on a double... */
406 if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
407 t2 = src & (sizeof(unsigned int) - 1);
408
409 if (unlikely(t2 != 0)) {
410 t2 = sizeof(unsigned int) - t2;
411 while (t2) {
412 /* *pcd++ = *pcs++; */
413 ldbma(s_space, pcs, t3, pmc_load_exc);
414 stbma(d_space, t3, pcd, pmc_store_exc);
415 len--;
416 t2--;
417 }
418 }
419
420 pws = (unsigned int *)pcs;
421 pwd = (unsigned int *)pcd;
422 goto word_copy;
423 }
424
425 /* Align the destination. */
426 if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
427 t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
428 while (t2) {
429 /* *pcd++ = *pcs++; */
430 ldbma(s_space, pcs, t3, pmc_load_exc);
431 stbma(d_space, t3, pcd, pmc_store_exc);
432 len--;
433 t2--;
434 }
435 dst = (unsigned long)pcd;
436 src = (unsigned long)pcs;
437 }
438
439 ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
440 if (ret)
441 return ret;
442
443 pcs += (len & -sizeof(unsigned int));
444 pcd += (len & -sizeof(unsigned int));
445 len %= sizeof(unsigned int);
446
447 preserve_branch(handle_load_error);
448 preserve_branch(handle_store_error);
449
450 goto byte_copy;
451
452handle_load_error:
453 __asm__ __volatile__ ("pmc_load_exc:\n");
454 return PA_MEMCPY_LOAD_ERROR;
455
456handle_store_error:
457 __asm__ __volatile__ ("pmc_store_exc:\n");
458 return PA_MEMCPY_STORE_ERROR;
459}
460
461
462/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ 35/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
463static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 36extern unsigned long pa_memcpy(void *dst, const void *src,
464{ 37 unsigned long len);
465 unsigned long ret, fault_addr, reference;
466 struct exception_data *d;
467
468 ret = pa_memcpy_internal(dstp, srcp, len);
469 if (likely(ret == PA_MEMCPY_OK))
470 return 0;
471
472 /* if a load or store fault occured we can get the faulty addr */
473 d = this_cpu_ptr(&exception_data);
474 fault_addr = d->fault_addr;
475
476 /* error in load or store? */
477 if (ret == PA_MEMCPY_LOAD_ERROR)
478 reference = (unsigned long) srcp;
479 else
480 reference = (unsigned long) dstp;
481 38
482 DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
483 ret, len, fault_addr, reference);
484
485 if (fault_addr >= reference)
486 return len - (fault_addr - reference);
487 else
488 return len;
489}
490
491#ifdef __KERNEL__
492unsigned long __copy_to_user(void __user *dst, const void *src, 39unsigned long __copy_to_user(void __user *dst, const void *src,
493 unsigned long len) 40 unsigned long len)
494{ 41{
@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
537 84
538 return __probe_kernel_read(dst, src, size); 85 return __probe_kernel_read(dst, src, size);
539} 86}
540
541#endif
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index deab89a8915a..32ec22146141 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -150,6 +150,23 @@ int fixup_exception(struct pt_regs *regs)
150 d->fault_space = regs->isr; 150 d->fault_space = regs->isr;
151 d->fault_addr = regs->ior; 151 d->fault_addr = regs->ior;
152 152
153 /*
154 * Fix up get_user() and put_user().
155 * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
156 * bit in the relative address of the fixup routine to indicate
157 * that %r8 should be loaded with -EFAULT to report a userspace
158 * access error.
159 */
160 if (fix->fixup & 1) {
161 regs->gr[8] = -EFAULT;
162
163 /* zero target register for get_user() */
164 if (parisc_acctyp(0, regs->iir) == VM_READ) {
165 int treg = regs->iir & 0x1f;
166 regs->gr[treg] = 0;
167 }
168 }
169
153 regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup; 170 regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
154 regs->iaoq[0] &= ~3; 171 regs->iaoq[0] &= ~3;
155 /* 172 /*