diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2014-05-16 05:05:11 -0400 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2014-05-16 05:05:11 -0400 |
commit | cf5c95db57ffa02e430c3840c08d1ee0403849d4 (patch) | |
tree | b3b4df5e1edcde098cf45b7fa00c8450e6d665f8 /arch/arm64/kernel | |
parent | fd92d4a54a069953b4679958121317f2a25389cd (diff) | |
parent | 49788fe2a128217f78a21ee4edbe6e92e988f222 (diff) |
Merge tag 'for-3.16' of git://git.linaro.org/people/ard.biesheuvel/linux-arm into upstream
FPSIMD register bank context switching and crypto algorithms
optimisations for arm64 from Ard Biesheuvel.
* tag 'for-3.16' of git://git.linaro.org/people/ard.biesheuvel/linux-arm:
arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions
arm64: pull in <asm/simd.h> from asm-generic
arm64/crypto: AES in CCM mode using ARMv8 Crypto Extensions
arm64/crypto: AES using ARMv8 Crypto Extensions
arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions
arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto Extensions
arm64/crypto: SHA-1 using ARMv8 Crypto Extensions
arm64: add support for kernel mode NEON in interrupt context
arm64: defer reloading a task's FPSIMD state to userland resume
arm64: add abstractions for FPSIMD state manipulation
asm-generic: allow generic unaligned access if the arch supports it
Conflicts:
arch/arm64/include/asm/thread_info.h
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/entry-fpsimd.S | 24 | ||||
-rw-r--r-- | arch/arm64/kernel/entry.S | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 186 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/signal.c | 13 | ||||
-rw-r--r-- | arch/arm64/kernel/signal32.c | 9 |
7 files changed, 205 insertions, 33 deletions
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa6..d358ccacfc00 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S | |||
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) | |||
41 | fpsimd_restore x0, 8 | 41 | fpsimd_restore x0, 8 |
42 | ret | 42 | ret |
43 | ENDPROC(fpsimd_load_state) | 43 | ENDPROC(fpsimd_load_state) |
44 | |||
45 | #ifdef CONFIG_KERNEL_MODE_NEON | ||
46 | |||
47 | /* | ||
48 | * Save the bottom n FP registers. | ||
49 | * | ||
50 | * x0 - pointer to struct fpsimd_partial_state | ||
51 | */ | ||
52 | ENTRY(fpsimd_save_partial_state) | ||
53 | fpsimd_save_partial x0, 1, 8, 9 | ||
54 | ret | ||
55 | ENDPROC(fpsimd_load_partial_state) | ||
56 | |||
57 | /* | ||
58 | * Load the bottom n FP registers. | ||
59 | * | ||
60 | * x0 - pointer to struct fpsimd_partial_state | ||
61 | */ | ||
62 | ENTRY(fpsimd_load_partial_state) | ||
63 | fpsimd_restore_partial x0, 8, 9 | ||
64 | ret | ||
65 | ENDPROC(fpsimd_load_partial_state) | ||
66 | |||
67 | #endif | ||
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a670d0a98c89..bf017f4ffb4f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S | |||
@@ -562,7 +562,7 @@ fast_work_pending: | |||
562 | str x0, [sp, #S_X0] // returned x0 | 562 | str x0, [sp, #S_X0] // returned x0 |
563 | work_pending: | 563 | work_pending: |
564 | tbnz x1, #TIF_NEED_RESCHED, work_resched | 564 | tbnz x1, #TIF_NEED_RESCHED, work_resched |
565 | /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ | 565 | /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ |
566 | ldr x2, [sp, #S_PSTATE] | 566 | ldr x2, [sp, #S_PSTATE] |
567 | mov x0, sp // 'regs' | 567 | mov x0, sp // 'regs' |
568 | tst x2, #PSR_MODE_MASK // user mode regs? | 568 | tst x2, #PSR_MODE_MASK // user mode regs? |
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4aef42a04bdc..ad8aebb1cdef 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c | |||
@@ -35,6 +35,60 @@ | |||
35 | #define FPEXC_IDF (1 << 7) | 35 | #define FPEXC_IDF (1 << 7) |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * In order to reduce the number of times the FPSIMD state is needlessly saved | ||
39 | * and restored, we need to keep track of two things: | ||
40 | * (a) for each task, we need to remember which CPU was the last one to have | ||
41 | * the task's FPSIMD state loaded into its FPSIMD registers; | ||
42 | * (b) for each CPU, we need to remember which task's userland FPSIMD state has | ||
43 | * been loaded into its FPSIMD registers most recently, or whether it has | ||
44 | * been used to perform kernel mode NEON in the meantime. | ||
45 | * | ||
46 | * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to | ||
47 | * the id of the current CPU everytime the state is loaded onto a CPU. For (b), | ||
48 | * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the | ||
49 | * address of the userland FPSIMD state of the task that was loaded onto the CPU | ||
50 | * the most recently, or NULL if kernel mode NEON has been performed after that. | ||
51 | * | ||
52 | * With this in place, we no longer have to restore the next FPSIMD state right | ||
53 | * when switching between tasks. Instead, we can defer this check to userland | ||
54 | * resume, at which time we verify whether the CPU's fpsimd_last_state and the | ||
55 | * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we | ||
56 | * can omit the FPSIMD restore. | ||
57 | * | ||
58 | * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to | ||
59 | * indicate whether or not the userland FPSIMD state of the current task is | ||
60 | * present in the registers. The flag is set unless the FPSIMD registers of this | ||
61 | * CPU currently contain the most recent userland FPSIMD state of the current | ||
62 | * task. | ||
63 | * | ||
64 | * For a certain task, the sequence may look something like this: | ||
65 | * - the task gets scheduled in; if both the task's fpsimd_state.cpu field | ||
66 | * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu | ||
67 | * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is | ||
68 | * cleared, otherwise it is set; | ||
69 | * | ||
70 | * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's | ||
71 | * userland FPSIMD state is copied from memory to the registers, the task's | ||
72 | * fpsimd_state.cpu field is set to the id of the current CPU, the current | ||
73 | * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the | ||
74 | * TIF_FOREIGN_FPSTATE flag is cleared; | ||
75 | * | ||
76 | * - the task executes an ordinary syscall; upon return to userland, the | ||
77 | * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is | ||
78 | * restored; | ||
79 | * | ||
80 | * - the task executes a syscall which executes some NEON instructions; this is | ||
81 | * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD | ||
82 | * register contents to memory, clears the fpsimd_last_state per-cpu variable | ||
83 | * and sets the TIF_FOREIGN_FPSTATE flag; | ||
84 | * | ||
85 | * - the task gets preempted after kernel_neon_end() is called; as we have not | ||
86 | * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so | ||
87 | * whatever is in the FPSIMD registers is not saved to memory, but discarded. | ||
88 | */ | ||
89 | static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); | ||
90 | |||
91 | /* | ||
38 | * Trapped FP/ASIMD access. | 92 | * Trapped FP/ASIMD access. |
39 | */ | 93 | */ |
40 | void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) | 94 | void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) |
@@ -72,43 +126,137 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) | |||
72 | 126 | ||
73 | void fpsimd_thread_switch(struct task_struct *next) | 127 | void fpsimd_thread_switch(struct task_struct *next) |
74 | { | 128 | { |
75 | /* check if not kernel threads */ | 129 | /* |
76 | if (current->mm) | 130 | * Save the current FPSIMD state to memory, but only if whatever is in |
131 | * the registers is in fact the most recent userland FPSIMD state of | ||
132 | * 'current'. | ||
133 | */ | ||
134 | if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
77 | fpsimd_save_state(¤t->thread.fpsimd_state); | 135 | fpsimd_save_state(¤t->thread.fpsimd_state); |
78 | if (next->mm) | 136 | |
79 | fpsimd_load_state(&next->thread.fpsimd_state); | 137 | if (next->mm) { |
138 | /* | ||
139 | * If we are switching to a task whose most recent userland | ||
140 | * FPSIMD state is already in the registers of *this* cpu, | ||
141 | * we can skip loading the state from memory. Otherwise, set | ||
142 | * the TIF_FOREIGN_FPSTATE flag so the state will be loaded | ||
143 | * upon the next return to userland. | ||
144 | */ | ||
145 | struct fpsimd_state *st = &next->thread.fpsimd_state; | ||
146 | |||
147 | if (__this_cpu_read(fpsimd_last_state) == st | ||
148 | && st->cpu == smp_processor_id()) | ||
149 | clear_ti_thread_flag(task_thread_info(next), | ||
150 | TIF_FOREIGN_FPSTATE); | ||
151 | else | ||
152 | set_ti_thread_flag(task_thread_info(next), | ||
153 | TIF_FOREIGN_FPSTATE); | ||
154 | } | ||
80 | } | 155 | } |
81 | 156 | ||
82 | void fpsimd_flush_thread(void) | 157 | void fpsimd_flush_thread(void) |
83 | { | 158 | { |
84 | preempt_disable(); | ||
85 | memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); | 159 | memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); |
86 | fpsimd_load_state(¤t->thread.fpsimd_state); | 160 | set_thread_flag(TIF_FOREIGN_FPSTATE); |
161 | } | ||
162 | |||
163 | /* | ||
164 | * Save the userland FPSIMD state of 'current' to memory, but only if the state | ||
165 | * currently held in the registers does in fact belong to 'current' | ||
166 | */ | ||
167 | void fpsimd_preserve_current_state(void) | ||
168 | { | ||
169 | preempt_disable(); | ||
170 | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
171 | fpsimd_save_state(¤t->thread.fpsimd_state); | ||
172 | preempt_enable(); | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * Load the userland FPSIMD state of 'current' from memory, but only if the | ||
177 | * FPSIMD state already held in the registers is /not/ the most recent FPSIMD | ||
178 | * state of 'current' | ||
179 | */ | ||
180 | void fpsimd_restore_current_state(void) | ||
181 | { | ||
182 | preempt_disable(); | ||
183 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { | ||
184 | struct fpsimd_state *st = ¤t->thread.fpsimd_state; | ||
185 | |||
186 | fpsimd_load_state(st); | ||
187 | this_cpu_write(fpsimd_last_state, st); | ||
188 | st->cpu = smp_processor_id(); | ||
189 | } | ||
190 | preempt_enable(); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Load an updated userland FPSIMD state for 'current' from memory and set the | ||
195 | * flag that indicates that the FPSIMD register contents are the most recent | ||
196 | * FPSIMD state of 'current' | ||
197 | */ | ||
198 | void fpsimd_update_current_state(struct fpsimd_state *state) | ||
199 | { | ||
200 | preempt_disable(); | ||
201 | fpsimd_load_state(state); | ||
202 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { | ||
203 | struct fpsimd_state *st = ¤t->thread.fpsimd_state; | ||
204 | |||
205 | this_cpu_write(fpsimd_last_state, st); | ||
206 | st->cpu = smp_processor_id(); | ||
207 | } | ||
87 | preempt_enable(); | 208 | preempt_enable(); |
88 | } | 209 | } |
89 | 210 | ||
211 | /* | ||
212 | * Invalidate live CPU copies of task t's FPSIMD state | ||
213 | */ | ||
214 | void fpsimd_flush_task_state(struct task_struct *t) | ||
215 | { | ||
216 | t->thread.fpsimd_state.cpu = NR_CPUS; | ||
217 | } | ||
218 | |||
90 | #ifdef CONFIG_KERNEL_MODE_NEON | 219 | #ifdef CONFIG_KERNEL_MODE_NEON |
91 | 220 | ||
221 | static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); | ||
222 | static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); | ||
223 | |||
92 | /* | 224 | /* |
93 | * Kernel-side NEON support functions | 225 | * Kernel-side NEON support functions |
94 | */ | 226 | */ |
95 | void kernel_neon_begin(void) | 227 | void kernel_neon_begin_partial(u32 num_regs) |
96 | { | 228 | { |
97 | /* Avoid using the NEON in interrupt context */ | 229 | if (in_interrupt()) { |
98 | BUG_ON(in_interrupt()); | 230 | struct fpsimd_partial_state *s = this_cpu_ptr( |
99 | preempt_disable(); | 231 | in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); |
100 | 232 | ||
101 | if (current->mm) | 233 | BUG_ON(num_regs > 32); |
102 | fpsimd_save_state(¤t->thread.fpsimd_state); | 234 | fpsimd_save_partial_state(s, roundup(num_regs, 2)); |
235 | } else { | ||
236 | /* | ||
237 | * Save the userland FPSIMD state if we have one and if we | ||
238 | * haven't done so already. Clear fpsimd_last_state to indicate | ||
239 | * that there is no longer userland FPSIMD state in the | ||
240 | * registers. | ||
241 | */ | ||
242 | preempt_disable(); | ||
243 | if (current->mm && | ||
244 | !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
245 | fpsimd_save_state(¤t->thread.fpsimd_state); | ||
246 | this_cpu_write(fpsimd_last_state, NULL); | ||
247 | } | ||
103 | } | 248 | } |
104 | EXPORT_SYMBOL(kernel_neon_begin); | 249 | EXPORT_SYMBOL(kernel_neon_begin_partial); |
105 | 250 | ||
106 | void kernel_neon_end(void) | 251 | void kernel_neon_end(void) |
107 | { | 252 | { |
108 | if (current->mm) | 253 | if (in_interrupt()) { |
109 | fpsimd_load_state(¤t->thread.fpsimd_state); | 254 | struct fpsimd_partial_state *s = this_cpu_ptr( |
110 | 255 | in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); | |
111 | preempt_enable(); | 256 | fpsimd_load_partial_state(s); |
257 | } else { | ||
258 | preempt_enable(); | ||
259 | } | ||
112 | } | 260 | } |
113 | EXPORT_SYMBOL(kernel_neon_end); | 261 | EXPORT_SYMBOL(kernel_neon_end); |
114 | 262 | ||
@@ -120,12 +268,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, | |||
120 | { | 268 | { |
121 | switch (cmd) { | 269 | switch (cmd) { |
122 | case CPU_PM_ENTER: | 270 | case CPU_PM_ENTER: |
123 | if (current->mm) | 271 | if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) |
124 | fpsimd_save_state(¤t->thread.fpsimd_state); | 272 | fpsimd_save_state(¤t->thread.fpsimd_state); |
125 | break; | 273 | break; |
126 | case CPU_PM_EXIT: | 274 | case CPU_PM_EXIT: |
127 | if (current->mm) | 275 | if (current->mm) |
128 | fpsimd_load_state(¤t->thread.fpsimd_state); | 276 | set_thread_flag(TIF_FOREIGN_FPSTATE); |
129 | break; | 277 | break; |
130 | case CPU_PM_ENTER_FAILED: | 278 | case CPU_PM_ENTER_FAILED: |
131 | default: | 279 | default: |
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index d04eb871cb0e..9f2d6020b6c2 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c | |||
@@ -206,7 +206,7 @@ void release_thread(struct task_struct *dead_task) | |||
206 | 206 | ||
207 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 207 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
208 | { | 208 | { |
209 | fpsimd_save_state(¤t->thread.fpsimd_state); | 209 | fpsimd_preserve_current_state(); |
210 | *dst = *src; | 210 | *dst = *src; |
211 | return 0; | 211 | return 0; |
212 | } | 212 | } |
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 4b58e812cf67..32d52d3b079c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c | |||
@@ -518,6 +518,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, | |||
518 | return ret; | 518 | return ret; |
519 | 519 | ||
520 | target->thread.fpsimd_state.user_fpsimd = newstate; | 520 | target->thread.fpsimd_state.user_fpsimd = newstate; |
521 | fpsimd_flush_task_state(target); | ||
521 | return ret; | 522 | return ret; |
522 | } | 523 | } |
523 | 524 | ||
@@ -765,6 +766,7 @@ static int compat_vfp_set(struct task_struct *target, | |||
765 | uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; | 766 | uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; |
766 | } | 767 | } |
767 | 768 | ||
769 | fpsimd_flush_task_state(target); | ||
768 | return ret; | 770 | return ret; |
769 | } | 771 | } |
770 | 772 | ||
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 2ba72a11629f..6357b9c6c90e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c | |||
@@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) | |||
51 | int err; | 51 | int err; |
52 | 52 | ||
53 | /* dump the hardware registers to the fpsimd_state structure */ | 53 | /* dump the hardware registers to the fpsimd_state structure */ |
54 | fpsimd_save_state(fpsimd); | 54 | fpsimd_preserve_current_state(); |
55 | 55 | ||
56 | /* copy the FP and status/control registers */ | 56 | /* copy the FP and status/control registers */ |
57 | err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); | 57 | err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); |
@@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) | |||
86 | __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); | 86 | __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); |
87 | 87 | ||
88 | /* load the hardware registers from the fpsimd_state structure */ | 88 | /* load the hardware registers from the fpsimd_state structure */ |
89 | if (!err) { | 89 | if (!err) |
90 | preempt_disable(); | 90 | fpsimd_update_current_state(&fpsimd); |
91 | fpsimd_load_state(&fpsimd); | ||
92 | preempt_enable(); | ||
93 | } | ||
94 | 91 | ||
95 | return err ? -EFAULT : 0; | 92 | return err ? -EFAULT : 0; |
96 | } | 93 | } |
@@ -433,4 +430,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, | |||
433 | clear_thread_flag(TIF_NOTIFY_RESUME); | 430 | clear_thread_flag(TIF_NOTIFY_RESUME); |
434 | tracehook_notify_resume(regs); | 431 | tracehook_notify_resume(regs); |
435 | } | 432 | } |
433 | |||
434 | if (thread_flags & _TIF_FOREIGN_FPSTATE) | ||
435 | fpsimd_restore_current_state(); | ||
436 | |||
436 | } | 437 | } |
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 050c1c2af777..3491c638f172 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c | |||
@@ -222,7 +222,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) | |||
222 | * Note that this also saves V16-31, which aren't visible | 222 | * Note that this also saves V16-31, which aren't visible |
223 | * in AArch32. | 223 | * in AArch32. |
224 | */ | 224 | */ |
225 | fpsimd_save_state(fpsimd); | 225 | fpsimd_preserve_current_state(); |
226 | 226 | ||
227 | /* Place structure header on the stack */ | 227 | /* Place structure header on the stack */ |
228 | __put_user_error(magic, &frame->magic, err); | 228 | __put_user_error(magic, &frame->magic, err); |
@@ -285,11 +285,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) | |||
285 | * We don't need to touch the exception register, so | 285 | * We don't need to touch the exception register, so |
286 | * reload the hardware state. | 286 | * reload the hardware state. |
287 | */ | 287 | */ |
288 | if (!err) { | 288 | if (!err) |
289 | preempt_disable(); | 289 | fpsimd_update_current_state(&fpsimd); |
290 | fpsimd_load_state(&fpsimd); | ||
291 | preempt_enable(); | ||
292 | } | ||
293 | 290 | ||
294 | return err ? -EFAULT : 0; | 291 | return err ? -EFAULT : 0; |
295 | } | 292 | } |