aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/lib/memset.S
diff options
context:
space:
mode:
authorIvan Djelic <ivan.djelic@parrot.com>2013-03-06 14:09:27 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2013-03-07 11:14:22 -0500
commit455bd4c430b0c0a361f38e8658a0d6cb469942b5 (patch)
tree49b812d14d37c6df726cfb9ed3ed1eb1cd316e3c /arch/arm/lib/memset.S
parent44d6b1fc3e3c6a3af8e599b724972e881c81e1c9 (diff)
ARM: 7668/1: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on assumptions about the implementation of memset and similar functions. The current ARM optimized memset code does not return the value of its first argument, as is usually expected from standard implementations. For instance in the following function: void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) { memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); waiter->magic = waiter; INIT_LIST_HEAD(&waiter->list); } compiled as: 800554d0 <debug_mutex_lock_common>: 800554d0: e92d4008 push {r3, lr} 800554d4: e1a00001 mov r0, r1 800554d8: e3a02010 mov r2, #16 ; 0x10 800554dc: e3a01011 mov r1, #17 ; 0x11 800554e0: eb04426e bl 80165ea0 <memset> 800554e4: e1a03000 mov r3, r0 800554e8: e583000c str r0, [r3, #12] 800554ec: e5830000 str r0, [r3] 800554f0: e5830004 str r0, [r3, #4] 800554f4: e8bd8008 pop {r3, pc} GCC assumes memset returns the value of pointer 'waiter' in register r0; causing register/memory corruptions. This patch fixes the return value of the assembly version of memset. It adds a 'mov' instruction and merges an additional load+store into existing load/store instructions. For ease of review, here is a breakdown of the patch into 4 simple steps: Step 1 ====== Perform the following substitutions: ip -> r8, then r0 -> ip, and insert 'mov ip, r0' as the first statement of the function. At this point, we have a memset() implementation returning the proper result, but corrupting r8 on some paths (the ones that were using ip). Step 2 ====== Make sure r8 is saved and restored when (! CALGN(1)+0) == 1: save r8: - str lr, [sp, #-4]! + stmfd sp!, {r8, lr} and restore r8 on both exit paths: - ldmeqfd sp!, {pc} @ Now <64 bytes to go. + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. (...) tst r2, #16 stmneia ip!, {r1, r3, r8, lr} - ldr lr, [sp], #4 + ldmfd sp!, {r8, lr} Step 3 ====== Make sure r8 is saved and restored when (! CALGN(1)+0) == 0: save r8: - stmfd sp!, {r4-r7, lr} + stmfd sp!, {r4-r8, lr} and restore r8 on both exit paths: bgt 3b - ldmeqfd sp!, {r4-r7, pc} + ldmeqfd sp!, {r4-r8, pc} (...) tst r2, #16 stmneia ip!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + ldmfd sp!, {r4-r8, lr} Step 4 ====== Rewrite register list "r4-r7, r8" as "r4-r8". Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com> Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Dirk Behme <dirk.behme@gmail.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/lib/memset.S')
-rw-r--r--arch/arm/lib/memset.S85
1 files changed, 44 insertions, 41 deletions
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 650d5923ab83..d912e7397ecc 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -19,9 +19,9 @@
191: subs r2, r2, #4 @ 1 do we have enough 191: subs r2, r2, #4 @ 1 do we have enough
20 blt 5f @ 1 bytes to align with? 20 blt 5f @ 1 bytes to align with?
21 cmp r3, #2 @ 1 21 cmp r3, #2 @ 1
22 strltb r1, [r0], #1 @ 1 22 strltb r1, [ip], #1 @ 1
23 strleb r1, [r0], #1 @ 1 23 strleb r1, [ip], #1 @ 1
24 strb r1, [r0], #1 @ 1 24 strb r1, [ip], #1 @ 1
25 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 25 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
26/* 26/*
27 * The pointer is now aligned and the length is adjusted. Try doing the 27 * The pointer is now aligned and the length is adjusted. Try doing the
@@ -29,10 +29,14 @@
29 */ 29 */
30 30
31ENTRY(memset) 31ENTRY(memset)
32 ands r3, r0, #3 @ 1 unaligned? 32/*
33 * Preserve the contents of r0 for the return value.
34 */
35 mov ip, r0
36 ands r3, ip, #3 @ 1 unaligned?
33 bne 1b @ 1 37 bne 1b @ 1
34/* 38/*
35 * we know that the pointer in r0 is aligned to a word boundary. 39 * we know that the pointer in ip is aligned to a word boundary.
36 */ 40 */
37 orr r1, r1, r1, lsl #8 41 orr r1, r1, r1, lsl #8
38 orr r1, r1, r1, lsl #16 42 orr r1, r1, r1, lsl #16
@@ -43,29 +47,28 @@ ENTRY(memset)
43#if ! CALGN(1)+0 47#if ! CALGN(1)+0
44 48
45/* 49/*
46 * We need an extra register for this loop - save the return address and 50 * We need 2 extra registers for this loop - use r8 and the LR
47 * use the LR
48 */ 51 */
49 str lr, [sp, #-4]! 52 stmfd sp!, {r8, lr}
50 mov ip, r1 53 mov r8, r1
51 mov lr, r1 54 mov lr, r1
52 55
532: subs r2, r2, #64 562: subs r2, r2, #64
54 stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. 57 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
55 stmgeia r0!, {r1, r3, ip, lr} 58 stmgeia ip!, {r1, r3, r8, lr}
56 stmgeia r0!, {r1, r3, ip, lr} 59 stmgeia ip!, {r1, r3, r8, lr}
57 stmgeia r0!, {r1, r3, ip, lr} 60 stmgeia ip!, {r1, r3, r8, lr}
58 bgt 2b 61 bgt 2b
59 ldmeqfd sp!, {pc} @ Now <64 bytes to go. 62 ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
60/* 63/*
61 * No need to correct the count; we're only testing bits from now on 64 * No need to correct the count; we're only testing bits from now on
62 */ 65 */
63 tst r2, #32 66 tst r2, #32
64 stmneia r0!, {r1, r3, ip, lr} 67 stmneia ip!, {r1, r3, r8, lr}
65 stmneia r0!, {r1, r3, ip, lr} 68 stmneia ip!, {r1, r3, r8, lr}
66 tst r2, #16 69 tst r2, #16
67 stmneia r0!, {r1, r3, ip, lr} 70 stmneia ip!, {r1, r3, r8, lr}
68 ldr lr, [sp], #4 71 ldmfd sp!, {r8, lr}
69 72
70#else 73#else
71 74
@@ -74,54 +77,54 @@ ENTRY(memset)
74 * whole cache lines at once. 77 * whole cache lines at once.
75 */ 78 */
76 79
77 stmfd sp!, {r4-r7, lr} 80 stmfd sp!, {r4-r8, lr}
78 mov r4, r1 81 mov r4, r1
79 mov r5, r1 82 mov r5, r1
80 mov r6, r1 83 mov r6, r1
81 mov r7, r1 84 mov r7, r1
82 mov ip, r1 85 mov r8, r1
83 mov lr, r1 86 mov lr, r1
84 87
85 cmp r2, #96 88 cmp r2, #96
86 tstgt r0, #31 89 tstgt ip, #31
87 ble 3f 90 ble 3f
88 91
89 and ip, r0, #31 92 and r8, ip, #31
90 rsb ip, ip, #32 93 rsb r8, r8, #32
91 sub r2, r2, ip 94 sub r2, r2, r8
92 movs ip, ip, lsl #(32 - 4) 95 movs r8, r8, lsl #(32 - 4)
93 stmcsia r0!, {r4, r5, r6, r7} 96 stmcsia ip!, {r4, r5, r6, r7}
94 stmmiia r0!, {r4, r5} 97 stmmiia ip!, {r4, r5}
95 tst ip, #(1 << 30) 98 tst r8, #(1 << 30)
96 mov ip, r1 99 mov r8, r1
97 strne r1, [r0], #4 100 strne r1, [ip], #4
98 101
993: subs r2, r2, #64 1023: subs r2, r2, #64
100 stmgeia r0!, {r1, r3-r7, ip, lr} 103 stmgeia ip!, {r1, r3-r8, lr}
101 stmgeia r0!, {r1, r3-r7, ip, lr} 104 stmgeia ip!, {r1, r3-r8, lr}
102 bgt 3b 105 bgt 3b
103 ldmeqfd sp!, {r4-r7, pc} 106 ldmeqfd sp!, {r4-r8, pc}
104 107
105 tst r2, #32 108 tst r2, #32
106 stmneia r0!, {r1, r3-r7, ip, lr} 109 stmneia ip!, {r1, r3-r8, lr}
107 tst r2, #16 110 tst r2, #16
108 stmneia r0!, {r4-r7} 111 stmneia ip!, {r4-r7}
109 ldmfd sp!, {r4-r7, lr} 112 ldmfd sp!, {r4-r8, lr}
110 113
111#endif 114#endif
112 115
1134: tst r2, #8 1164: tst r2, #8
114 stmneia r0!, {r1, r3} 117 stmneia ip!, {r1, r3}
115 tst r2, #4 118 tst r2, #4
116 strne r1, [r0], #4 119 strne r1, [ip], #4
117/* 120/*
118 * When we get here, we've got less than 4 bytes to zero. We 121 * When we get here, we've got less than 4 bytes to zero. We
119 * may have an unaligned pointer as well. 122 * may have an unaligned pointer as well.
120 */ 123 */
1215: tst r2, #2 1245: tst r2, #2
122 strneb r1, [r0], #1 125 strneb r1, [ip], #1
123 strneb r1, [r0], #1 126 strneb r1, [ip], #1
124 tst r2, #1 127 tst r2, #1
125 strneb r1, [r0], #1 128 strneb r1, [ip], #1
126 mov pc, lr 129 mov pc, lr
127ENDPROC(memset) 130ENDPROC(memset)