diff options
author | Will Deacon <will.deacon@arm.com> | 2014-09-29 07:26:41 -0400 |
---|---|---|
committer | Will Deacon <will.deacon@arm.com> | 2014-11-14 05:42:21 -0500 |
commit | 63648dd20fa0780ab6c1e923b5c276d257422cb3 (patch) | |
tree | 4cf6d802916d0cbccd7409b2f1521eca6ce9f64b /arch/arm64/kernel | |
parent | d54e81f9af1d106e47ae8594903c43a80dae1a99 (diff) |
arm64: entry: use ldp/stp instead of push/pop when saving/restoring regs
The push/pop instructions can be suboptimal when saving/restoring large
amounts of data to/from the stack, for example on entry/exit from the
kernel. This is because:
(1) They act on descending addresses (i.e. the newly decremented sp),
which may defeat some hardware prefetchers
(2) They introduce an implicit dependency between each instruction, as
the sp has to be updated in order to resolve the address of the
next access.
This patch removes the push/pop instructions from our kernel entry/exit
macros in favour of ldp/stp plus offset.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/entry.S | 75 |
1 files changed, 37 insertions, 38 deletions
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2cebe56d650c..622a409916f3 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S | |||
@@ -64,25 +64,26 @@ | |||
64 | #define BAD_ERROR 3 | 64 | #define BAD_ERROR 3 |
65 | 65 | ||
66 | .macro kernel_entry, el, regsize = 64 | 66 | .macro kernel_entry, el, regsize = 64 |
67 | sub sp, sp, #S_FRAME_SIZE - S_LR // room for LR, SP, SPSR, ELR | 67 | sub sp, sp, #S_FRAME_SIZE |
68 | .if \regsize == 32 | 68 | .if \regsize == 32 |
69 | mov w0, w0 // zero upper 32 bits of x0 | 69 | mov w0, w0 // zero upper 32 bits of x0 |
70 | .endif | 70 | .endif |
71 | push x28, x29 | 71 | stp x0, x1, [sp, #16 * 0] |
72 | push x26, x27 | 72 | stp x2, x3, [sp, #16 * 1] |
73 | push x24, x25 | 73 | stp x4, x5, [sp, #16 * 2] |
74 | push x22, x23 | 74 | stp x6, x7, [sp, #16 * 3] |
75 | push x20, x21 | 75 | stp x8, x9, [sp, #16 * 4] |
76 | push x18, x19 | 76 | stp x10, x11, [sp, #16 * 5] |
77 | push x16, x17 | 77 | stp x12, x13, [sp, #16 * 6] |
78 | push x14, x15 | 78 | stp x14, x15, [sp, #16 * 7] |
79 | push x12, x13 | 79 | stp x16, x17, [sp, #16 * 8] |
80 | push x10, x11 | 80 | stp x18, x19, [sp, #16 * 9] |
81 | push x8, x9 | 81 | stp x20, x21, [sp, #16 * 10] |
82 | push x6, x7 | 82 | stp x22, x23, [sp, #16 * 11] |
83 | push x4, x5 | 83 | stp x24, x25, [sp, #16 * 12] |
84 | push x2, x3 | 84 | stp x26, x27, [sp, #16 * 13] |
85 | push x0, x1 | 85 | stp x28, x29, [sp, #16 * 14] |
86 | |||
86 | .if \el == 0 | 87 | .if \el == 0 |
87 | mrs x21, sp_el0 | 88 | mrs x21, sp_el0 |
88 | get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, | 89 | get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, |
@@ -118,33 +119,31 @@ | |||
118 | .if \el == 0 | 119 | .if \el == 0 |
119 | ct_user_enter | 120 | ct_user_enter |
120 | ldr x23, [sp, #S_SP] // load return stack pointer | 121 | ldr x23, [sp, #S_SP] // load return stack pointer |
122 | msr sp_el0, x23 | ||
121 | .endif | 123 | .endif |
124 | msr elr_el1, x21 // set up the return data | ||
125 | msr spsr_el1, x22 | ||
122 | .if \ret | 126 | .if \ret |
123 | ldr x1, [sp, #S_X1] // preserve x0 (syscall return) | 127 | ldr x1, [sp, #S_X1] // preserve x0 (syscall return) |
124 | add sp, sp, S_X2 | ||
125 | .else | 128 | .else |
126 | pop x0, x1 | 129 | ldp x0, x1, [sp, #16 * 0] |
127 | .endif | ||
128 | pop x2, x3 // load the rest of the registers | ||
129 | pop x4, x5 | ||
130 | pop x6, x7 | ||
131 | pop x8, x9 | ||
132 | msr elr_el1, x21 // set up the return data | ||
133 | msr spsr_el1, x22 | ||
134 | .if \el == 0 | ||
135 | msr sp_el0, x23 | ||
136 | .endif | 130 | .endif |
137 | pop x10, x11 | 131 | ldp x2, x3, [sp, #16 * 1] |
138 | pop x12, x13 | 132 | ldp x4, x5, [sp, #16 * 2] |
139 | pop x14, x15 | 133 | ldp x6, x7, [sp, #16 * 3] |
140 | pop x16, x17 | 134 | ldp x8, x9, [sp, #16 * 4] |
141 | pop x18, x19 | 135 | ldp x10, x11, [sp, #16 * 5] |
142 | pop x20, x21 | 136 | ldp x12, x13, [sp, #16 * 6] |
143 | pop x22, x23 | 137 | ldp x14, x15, [sp, #16 * 7] |
144 | pop x24, x25 | 138 | ldp x16, x17, [sp, #16 * 8] |
145 | pop x26, x27 | 139 | ldp x18, x19, [sp, #16 * 9] |
146 | pop x28, x29 | 140 | ldp x20, x21, [sp, #16 * 10] |
147 | ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP | 141 | ldp x22, x23, [sp, #16 * 11] |
142 | ldp x24, x25, [sp, #16 * 12] | ||
143 | ldp x26, x27, [sp, #16 * 13] | ||
144 | ldp x28, x29, [sp, #16 * 14] | ||
145 | ldr lr, [sp, #S_LR] | ||
146 | add sp, sp, #S_FRAME_SIZE // restore sp | ||
148 | eret // return to kernel | 147 | eret // return to kernel |
149 | .endm | 148 | .endm |
150 | 149 | ||