aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2014-09-29 07:26:41 -0400
committerWill Deacon <will.deacon@arm.com>2014-11-14 05:42:21 -0500
commit63648dd20fa0780ab6c1e923b5c276d257422cb3 (patch)
tree4cf6d802916d0cbccd7409b2f1521eca6ce9f64b /arch/arm64/kernel
parentd54e81f9af1d106e47ae8594903c43a80dae1a99 (diff)
arm64: entry: use ldp/stp instead of push/pop when saving/restoring regs
The push/pop instructions can be suboptimal when saving/restoring large amounts of data to/from the stack, for example on entry/exit from the kernel. This is because: (1) They act on descending addresses (i.e. the newly decremented sp), which may defeat some hardware prefetchers (2) They introduce an implicit dependency between each instruction, as the sp has to be updated in order to resolve the address of the next access. This patch removes the push/pop instructions from our kernel entry/exit macros in favour of ldp/stp plus offset. Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/entry.S75
1 files changed, 37 insertions, 38 deletions
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2cebe56d650c..622a409916f3 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -64,25 +64,26 @@
64#define BAD_ERROR 3 64#define BAD_ERROR 3
65 65
66 .macro kernel_entry, el, regsize = 64 66 .macro kernel_entry, el, regsize = 64
67 sub sp, sp, #S_FRAME_SIZE - S_LR // room for LR, SP, SPSR, ELR 67 sub sp, sp, #S_FRAME_SIZE
68 .if \regsize == 32 68 .if \regsize == 32
69 mov w0, w0 // zero upper 32 bits of x0 69 mov w0, w0 // zero upper 32 bits of x0
70 .endif 70 .endif
71 push x28, x29 71 stp x0, x1, [sp, #16 * 0]
72 push x26, x27 72 stp x2, x3, [sp, #16 * 1]
73 push x24, x25 73 stp x4, x5, [sp, #16 * 2]
74 push x22, x23 74 stp x6, x7, [sp, #16 * 3]
75 push x20, x21 75 stp x8, x9, [sp, #16 * 4]
76 push x18, x19 76 stp x10, x11, [sp, #16 * 5]
77 push x16, x17 77 stp x12, x13, [sp, #16 * 6]
78 push x14, x15 78 stp x14, x15, [sp, #16 * 7]
79 push x12, x13 79 stp x16, x17, [sp, #16 * 8]
80 push x10, x11 80 stp x18, x19, [sp, #16 * 9]
81 push x8, x9 81 stp x20, x21, [sp, #16 * 10]
82 push x6, x7 82 stp x22, x23, [sp, #16 * 11]
83 push x4, x5 83 stp x24, x25, [sp, #16 * 12]
84 push x2, x3 84 stp x26, x27, [sp, #16 * 13]
85 push x0, x1 85 stp x28, x29, [sp, #16 * 14]
86
86 .if \el == 0 87 .if \el == 0
87 mrs x21, sp_el0 88 mrs x21, sp_el0
88 get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, 89 get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
@@ -118,33 +119,31 @@
118 .if \el == 0 119 .if \el == 0
119 ct_user_enter 120 ct_user_enter
120 ldr x23, [sp, #S_SP] // load return stack pointer 121 ldr x23, [sp, #S_SP] // load return stack pointer
122 msr sp_el0, x23
121 .endif 123 .endif
124 msr elr_el1, x21 // set up the return data
125 msr spsr_el1, x22
122 .if \ret 126 .if \ret
123 ldr x1, [sp, #S_X1] // preserve x0 (syscall return) 127 ldr x1, [sp, #S_X1] // preserve x0 (syscall return)
124 add sp, sp, S_X2
125 .else 128 .else
126 pop x0, x1 129 ldp x0, x1, [sp, #16 * 0]
127 .endif
128 pop x2, x3 // load the rest of the registers
129 pop x4, x5
130 pop x6, x7
131 pop x8, x9
132 msr elr_el1, x21 // set up the return data
133 msr spsr_el1, x22
134 .if \el == 0
135 msr sp_el0, x23
136 .endif 130 .endif
137 pop x10, x11 131 ldp x2, x3, [sp, #16 * 1]
138 pop x12, x13 132 ldp x4, x5, [sp, #16 * 2]
139 pop x14, x15 133 ldp x6, x7, [sp, #16 * 3]
140 pop x16, x17 134 ldp x8, x9, [sp, #16 * 4]
141 pop x18, x19 135 ldp x10, x11, [sp, #16 * 5]
142 pop x20, x21 136 ldp x12, x13, [sp, #16 * 6]
143 pop x22, x23 137 ldp x14, x15, [sp, #16 * 7]
144 pop x24, x25 138 ldp x16, x17, [sp, #16 * 8]
145 pop x26, x27 139 ldp x18, x19, [sp, #16 * 9]
146 pop x28, x29 140 ldp x20, x21, [sp, #16 * 10]
147 ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP 141 ldp x22, x23, [sp, #16 * 11]
142 ldp x24, x25, [sp, #16 * 12]
143 ldp x26, x27, [sp, #16 * 13]
144 ldp x28, x29, [sp, #16 * 14]
145 ldr lr, [sp, #S_LR]
146 add sp, sp, #S_FRAME_SIZE // restore sp
148 eret // return to kernel 147 eret // return to kernel
149 .endm 148 .endm
150 149