sparc64: Fix several bugs in memmove().

Firstly, handle zero length calls properly. Believe it or not there are a few of these happening during early boot. Next, we can't just drop to a memcpy() call in the forward copy case where dst <= src. The reason is that the cache initializing stores used in the Niagara memcpy() implementations can end up clearing out cache lines before we've sourced their original contents completely. For example, considering NG4memcpy, the main unrolled loop begins like this: load src + 0x00 load src + 0x08 load src + 0x10 load src + 0x18 load src + 0x20 store dst + 0x00 Assume dst is 64 byte aligned and let's say that dst is src - 8 for this memcpy() call. That store at the end there is the one to the first line in the cache line, thus clearing the whole line, which thus clobbers "src + 0x28" before it even gets loaded. To avoid this, just fall through to a simple copy only mildly optimized for the case where src and dst are 8 byte aligned and the length is a multiple of 8 as well. We could get fancy and call GENmemcpy() but this is good enough for how this thing is actually used. Reported-by: David Ahern <david.ahern@oracle.com> Reported-by: Bob Picco <bpicco@meloft.net> Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2015-03-23 12:22:10 -0400
committer: David S. Miller <davem@davemloft.net> 2015-03-23 12:22:10 -0400
commit: 2077cef4d5c29cf886192ec32066f783d6a80db8 (patch)
tree: 35fb6e4b390edcba5fa69fd639f467791356f90a
parent: 31aaa98c248da766ece922bbbe8cc78cfd0bc920 (diff)
1 files changed, 32 insertions, 3 deletions
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
index b7f6334e159f..857ad4f8905f 100644
--- a/arch/sparc/lib/memmove.S
+++ b/arch/sparc/lib/memmove.S
@@ -8,9 +8,11 @@
        .text
 ENTRY(memmove) /* o0=dst o1=src o2=len */
-        mov             %o0, %g1
+        brz,pn          %o2, 99f
+         mov            %o0, %g1
        cmp             %o0, %o1
-        bleu,pt         %xcc, memcpy
+        bleu,pt         %xcc, 2f
         add            %o1, %o2, %g7
        cmp             %g7, %o0
        bleu,pt         %xcc, memcpy
@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
        stb             %g7, [%o0]
        bne,pt          %icc, 1b
         sub            %o0, 1, %o0
+99:
        retl
         mov            %g1, %o0
+        /* We can't just call memcpy for these memmove cases.  On some
+         * chips the memcpy uses cache initializing stores and when dst
+         * and src are close enough, those can clobber the source data
+         * before we've loaded it in.
+         */
+2:      or              %o0, %o1, %g7
+        or              %o2, %g7, %g7
+        andcc           %g7, 0x7, %g0
+        bne,pn          %xcc, 4f
+         nop
+3:      ldx             [%o1], %g7
+        add             %o1, 8, %o1
+        subcc           %o2, 8, %o2
+        add             %o0, 8, %o0
+        bne,pt          %icc, 3b
+         stx            %g7, [%o0 - 0x8]
+        ba,a,pt         %xcc, 99b
+4:      ldub            [%o1], %g7
+        add             %o1, 1, %o1
+        subcc           %o2, 1, %o2
+        add             %o0, 1, %o0
+        bne,pt          %icc, 4b
+         stb            %g7, [%o0 - 0x1]
+        ba,a,pt         %xcc, 99b
 ENDPROC(memmove)
author	David S. Miller <davem@davemloft.net>	2015-03-23 12:22:10 -0400
committer	David S. Miller <davem@davemloft.net>	2015-03-23 12:22:10 -0400
commit	2077cef4d5c29cf886192ec32066f783d6a80db8 (patch)
tree	35fb6e4b390edcba5fa69fd639f467791356f90a
parent	31aaa98c248da766ece922bbbe8cc78cfd0bc920 (diff)

diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index b7f6334e159f..857ad4f8905f 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S
@@ -8,9 +8,11 @@
8		8
9	.text	9	.text
10	ENTRY(memmove) /* o0=dst o1=src o2=len */	10	ENTRY(memmove) /* o0=dst o1=src o2=len */
11	mov %o0, %g1	11	brz,pn %o2, 99f
		12	mov %o0, %g1
		13
12	cmp %o0, %o1	14	cmp %o0, %o1
13	bleu,pt %xcc, memcpy	15	bleu,pt %xcc, 2f
14	add %o1, %o2, %g7	16	add %o1, %o2, %g7
15	cmp %g7, %o0	17	cmp %g7, %o0
16	bleu,pt %xcc, memcpy	18	bleu,pt %xcc, memcpy
@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
24	stb %g7, [%o0]	26	stb %g7, [%o0]
25	bne,pt %icc, 1b	27	bne,pt %icc, 1b
26	sub %o0, 1, %o0	28	sub %o0, 1, %o0
27		29	99:
28	retl	30	retl
29	mov %g1, %o0	31	mov %g1, %o0
		32
		33	/* We can't just call memcpy for these memmove cases. On some
		34	* chips the memcpy uses cache initializing stores and when dst
		35	* and src are close enough, those can clobber the source data
		36	* before we've loaded it in.
		37	*/
		38	2: or %o0, %o1, %g7
		39	or %o2, %g7, %g7
		40	andcc %g7, 0x7, %g0
		41	bne,pn %xcc, 4f
		42	nop
		43
		44	3: ldx [%o1], %g7
		45	add %o1, 8, %o1
		46	subcc %o2, 8, %o2
		47	add %o0, 8, %o0
		48	bne,pt %icc, 3b
		49	stx %g7, [%o0 - 0x8]
		50	ba,a,pt %xcc, 99b
		51
		52	4: ldub [%o1], %g7
		53	add %o1, 1, %o1
		54	subcc %o2, 1, %o2
		55	add %o0, 1, %o0
		56	bne,pt %icc, 4b
		57	stb %g7, [%o0 - 0x1]
		58	ba,a,pt %xcc, 99b
30	ENDPROC(memmove)	59	ENDPROC(memmove)