aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/lib/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/lib/memset.S')
-rw-r--r--arch/arm/lib/memset.S100
1 files changed, 48 insertions, 52 deletions
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 650d5923ab83..94b0650ea98f 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -14,27 +14,15 @@
14 14
15 .text 15 .text
16 .align 5 16 .align 5
17 .word 0
18
191: subs r2, r2, #4 @ 1 do we have enough
20 blt 5f @ 1 bytes to align with?
21 cmp r3, #2 @ 1
22 strltb r1, [r0], #1 @ 1
23 strleb r1, [r0], #1 @ 1
24 strb r1, [r0], #1 @ 1
25 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
26/*
27 * The pointer is now aligned and the length is adjusted. Try doing the
28 * memset again.
29 */
30 17
31ENTRY(memset) 18ENTRY(memset)
32 ands r3, r0, #3 @ 1 unaligned? 19 ands r3, r0, #3 @ 1 unaligned?
33 bne 1b @ 1 20 mov ip, r0 @ preserve r0 as return value
21 bne 6f @ 1
34/* 22/*
35 * we know that the pointer in r0 is aligned to a word boundary. 23 * we know that the pointer in ip is aligned to a word boundary.
36 */ 24 */
37 orr r1, r1, r1, lsl #8 251: orr r1, r1, r1, lsl #8
38 orr r1, r1, r1, lsl #16 26 orr r1, r1, r1, lsl #16
39 mov r3, r1 27 mov r3, r1
40 cmp r2, #16 28 cmp r2, #16
@@ -43,29 +31,28 @@ ENTRY(memset)
43#if ! CALGN(1)+0 31#if ! CALGN(1)+0
44 32
45/* 33/*
46 * We need an extra register for this loop - save the return address and 34 * We need 2 extra registers for this loop - use r8 and the LR
47 * use the LR
48 */ 35 */
49 str lr, [sp, #-4]! 36 stmfd sp!, {r8, lr}
50 mov ip, r1 37 mov r8, r1
51 mov lr, r1 38 mov lr, r1
52 39
532: subs r2, r2, #64 402: subs r2, r2, #64
54 stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. 41 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
55 stmgeia r0!, {r1, r3, ip, lr} 42 stmgeia ip!, {r1, r3, r8, lr}
56 stmgeia r0!, {r1, r3, ip, lr} 43 stmgeia ip!, {r1, r3, r8, lr}
57 stmgeia r0!, {r1, r3, ip, lr} 44 stmgeia ip!, {r1, r3, r8, lr}
58 bgt 2b 45 bgt 2b
59 ldmeqfd sp!, {pc} @ Now <64 bytes to go. 46 ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
60/* 47/*
61 * No need to correct the count; we're only testing bits from now on 48 * No need to correct the count; we're only testing bits from now on
62 */ 49 */
63 tst r2, #32 50 tst r2, #32
64 stmneia r0!, {r1, r3, ip, lr} 51 stmneia ip!, {r1, r3, r8, lr}
65 stmneia r0!, {r1, r3, ip, lr} 52 stmneia ip!, {r1, r3, r8, lr}
66 tst r2, #16 53 tst r2, #16
67 stmneia r0!, {r1, r3, ip, lr} 54 stmneia ip!, {r1, r3, r8, lr}
68 ldr lr, [sp], #4 55 ldmfd sp!, {r8, lr}
69 56
70#else 57#else
71 58
@@ -74,54 +61,63 @@ ENTRY(memset)
74 * whole cache lines at once. 61 * whole cache lines at once.
75 */ 62 */
76 63
77 stmfd sp!, {r4-r7, lr} 64 stmfd sp!, {r4-r8, lr}
78 mov r4, r1 65 mov r4, r1
79 mov r5, r1 66 mov r5, r1
80 mov r6, r1 67 mov r6, r1
81 mov r7, r1 68 mov r7, r1
82 mov ip, r1 69 mov r8, r1
83 mov lr, r1 70 mov lr, r1
84 71
85 cmp r2, #96 72 cmp r2, #96
86 tstgt r0, #31 73 tstgt ip, #31
87 ble 3f 74 ble 3f
88 75
89 and ip, r0, #31 76 and r8, ip, #31
90 rsb ip, ip, #32 77 rsb r8, r8, #32
91 sub r2, r2, ip 78 sub r2, r2, r8
92 movs ip, ip, lsl #(32 - 4) 79 movs r8, r8, lsl #(32 - 4)
93 stmcsia r0!, {r4, r5, r6, r7} 80 stmcsia ip!, {r4, r5, r6, r7}
94 stmmiia r0!, {r4, r5} 81 stmmiia ip!, {r4, r5}
95 tst ip, #(1 << 30) 82 tst r8, #(1 << 30)
96 mov ip, r1 83 mov r8, r1
97 strne r1, [r0], #4 84 strne r1, [ip], #4
98 85
993: subs r2, r2, #64 863: subs r2, r2, #64
100 stmgeia r0!, {r1, r3-r7, ip, lr} 87 stmgeia ip!, {r1, r3-r8, lr}
101 stmgeia r0!, {r1, r3-r7, ip, lr} 88 stmgeia ip!, {r1, r3-r8, lr}
102 bgt 3b 89 bgt 3b
103 ldmeqfd sp!, {r4-r7, pc} 90 ldmeqfd sp!, {r4-r8, pc}
104 91
105 tst r2, #32 92 tst r2, #32
106 stmneia r0!, {r1, r3-r7, ip, lr} 93 stmneia ip!, {r1, r3-r8, lr}
107 tst r2, #16 94 tst r2, #16
108 stmneia r0!, {r4-r7} 95 stmneia ip!, {r4-r7}
109 ldmfd sp!, {r4-r7, lr} 96 ldmfd sp!, {r4-r8, lr}
110 97
111#endif 98#endif
112 99
1134: tst r2, #8 1004: tst r2, #8
114 stmneia r0!, {r1, r3} 101 stmneia ip!, {r1, r3}
115 tst r2, #4 102 tst r2, #4
116 strne r1, [r0], #4 103 strne r1, [ip], #4
117/* 104/*
118 * When we get here, we've got less than 4 bytes to zero. We 105 * When we get here, we've got less than 4 bytes to zero. We
119 * may have an unaligned pointer as well. 106 * may have an unaligned pointer as well.
120 */ 107 */
1215: tst r2, #2 1085: tst r2, #2
122 strneb r1, [r0], #1 109 strneb r1, [ip], #1
123 strneb r1, [r0], #1 110 strneb r1, [ip], #1
124 tst r2, #1 111 tst r2, #1
125 strneb r1, [r0], #1 112 strneb r1, [ip], #1
126 mov pc, lr 113 mov pc, lr
114
1156: subs r2, r2, #4 @ 1 do we have enough
116 blt 5b @ 1 bytes to align with?
117 cmp r3, #2 @ 1
118 strltb r1, [ip], #1 @ 1
119 strleb r1, [ip], #1 @ 1
120 strb r1, [ip], #1 @ 1
121 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
122 b 1b
127ENDPROC(memset) 123ENDPROC(memset)