diff options
| -rw-r--r-- | arch/arm/lib/memset.S | 46 | ||||
| -rw-r--r-- | arch/arm/lib/memzero.S | 44 |
2 files changed, 90 insertions, 0 deletions
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 95b110b07a89..b477d4ac88ef 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S | |||
| @@ -39,6 +39,9 @@ ENTRY(memset) | |||
| 39 | mov r3, r1 | 39 | mov r3, r1 |
| 40 | cmp r2, #16 | 40 | cmp r2, #16 |
| 41 | blt 4f | 41 | blt 4f |
| 42 | |||
| 43 | #if ! CALGN(1)+0 | ||
| 44 | |||
| 42 | /* | 45 | /* |
| 43 | * We need an extra register for this loop - save the return address and | 46 | * We need an extra register for this loop - save the return address and |
| 44 | * use the LR | 47 | * use the LR |
| @@ -64,6 +67,49 @@ ENTRY(memset) | |||
| 64 | stmneia r0!, {r1, r3, ip, lr} | 67 | stmneia r0!, {r1, r3, ip, lr} |
| 65 | ldr lr, [sp], #4 | 68 | ldr lr, [sp], #4 |
| 66 | 69 | ||
| 70 | #else | ||
| 71 | |||
| 72 | /* | ||
| 73 | * This version aligns the destination pointer in order to write | ||
| 74 | * whole cache lines at once. | ||
| 75 | */ | ||
| 76 | |||
| 77 | stmfd sp!, {r4-r7, lr} | ||
| 78 | mov r4, r1 | ||
| 79 | mov r5, r1 | ||
| 80 | mov r6, r1 | ||
| 81 | mov r7, r1 | ||
| 82 | mov ip, r1 | ||
| 83 | mov lr, r1 | ||
| 84 | |||
| 85 | cmp r2, #96 | ||
| 86 | tstgt r0, #31 | ||
| 87 | ble 3f | ||
| 88 | |||
| 89 | and ip, r0, #31 | ||
| 90 | rsb ip, ip, #32 | ||
| 91 | sub r2, r2, ip | ||
| 92 | movs ip, ip, lsl #(32 - 4) | ||
| 93 | stmcsia r0!, {r4, r5, r6, r7} | ||
| 94 | stmmiia r0!, {r4, r5} | ||
| 95 | tst ip, #(1 << 30) | ||
| 96 | mov ip, r1 | ||
| 97 | strne r1, [r0], #4 | ||
| 98 | |||
| 99 | 3: subs r2, r2, #64 | ||
| 100 | stmgeia r0!, {r1, r3-r7, ip, lr} | ||
| 101 | stmgeia r0!, {r1, r3-r7, ip, lr} | ||
| 102 | bgt 3b | ||
| 103 | ldmeqfd sp!, {r4-r7, pc} | ||
| 104 | |||
| 105 | tst r2, #32 | ||
| 106 | stmneia r0!, {r1, r3-r7, ip, lr} | ||
| 107 | tst r2, #16 | ||
| 108 | stmneia r0!, {r4-r7} | ||
| 109 | ldmfd sp!, {r4-r7, lr} | ||
| 110 | |||
| 111 | #endif | ||
| 112 | |||
| 67 | 4: tst r2, #8 | 113 | 4: tst r2, #8 |
| 68 | stmneia r0!, {r1, r3} | 114 | stmneia r0!, {r1, r3} |
| 69 | tst r2, #4 | 115 | tst r2, #4 |
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index abf2508e8221..b8f79d80ee9b 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S | |||
| @@ -39,6 +39,9 @@ ENTRY(__memzero) | |||
| 39 | */ | 39 | */ |
| 40 | cmp r1, #16 @ 1 we can skip this chunk if we | 40 | cmp r1, #16 @ 1 we can skip this chunk if we |
| 41 | blt 4f @ 1 have < 16 bytes | 41 | blt 4f @ 1 have < 16 bytes |
| 42 | |||
| 43 | #if ! CALGN(1)+0 | ||
| 44 | |||
| 42 | /* | 45 | /* |
| 43 | * We need an extra register for this loop - save the return address and | 46 | * We need an extra register for this loop - save the return address and |
| 44 | * use the LR | 47 | * use the LR |
| @@ -64,6 +67,47 @@ ENTRY(__memzero) | |||
| 64 | stmneia r0!, {r2, r3, ip, lr} @ 4 | 67 | stmneia r0!, {r2, r3, ip, lr} @ 4 |
| 65 | ldr lr, [sp], #4 @ 1 | 68 | ldr lr, [sp], #4 @ 1 |
| 66 | 69 | ||
| 70 | #else | ||
| 71 | |||
| 72 | /* | ||
| 73 | * This version aligns the destination pointer in order to write | ||
| 74 | * whole cache lines at once. | ||
| 75 | */ | ||
| 76 | |||
| 77 | stmfd sp!, {r4-r7, lr} | ||
| 78 | mov r4, r2 | ||
| 79 | mov r5, r2 | ||
| 80 | mov r6, r2 | ||
| 81 | mov r7, r2 | ||
| 82 | mov ip, r2 | ||
| 83 | mov lr, r2 | ||
| 84 | |||
| 85 | cmp r1, #96 | ||
| 86 | andgts ip, r0, #31 | ||
| 87 | ble 3f | ||
| 88 | |||
| 89 | rsb ip, ip, #32 | ||
| 90 | sub r1, r1, ip | ||
| 91 | movs ip, ip, lsl #(32 - 4) | ||
| 92 | stmcsia r0!, {r4, r5, r6, r7} | ||
| 93 | stmmiia r0!, {r4, r5} | ||
| 94 | movs ip, ip, lsl #2 | ||
| 95 | strcs r2, [r0], #4 | ||
| 96 | |||
| 97 | 3: subs r1, r1, #64 | ||
| 98 | stmgeia r0!, {r2-r7, ip, lr} | ||
| 99 | stmgeia r0!, {r2-r7, ip, lr} | ||
| 100 | bgt 3b | ||
| 101 | ldmeqfd sp!, {r4-r7, pc} | ||
| 102 | |||
| 103 | tst r1, #32 | ||
| 104 | stmneia r0!, {r2-r7, ip, lr} | ||
| 105 | tst r1, #16 | ||
| 106 | stmneia r0!, {r4-r7} | ||
| 107 | ldmfd sp!, {r4-r7, lr} | ||
| 108 | |||
| 109 | #endif | ||
| 110 | |||
| 67 | 4: tst r1, #8 @ 1 8 bytes or more? | 111 | 4: tst r1, #8 @ 1 8 bytes or more? |
| 68 | stmneia r0!, {r2, r3} @ 2 | 112 | stmneia r0!, {r2, r3} @ 2 |
| 69 | tst r1, #4 @ 1 4 bytes or more? | 113 | tst r1, #4 @ 1 4 bytes or more? |
