diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2014-02-24 09:26:29 -0500 |
---|---|---|
committer | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2014-05-08 05:31:57 -0400 |
commit | 190f1ca85d071114930dd7abe6b5d103e9d5572f (patch) | |
tree | 27344d61659e775df00892c8a13ae2473aad29b1 /arch/arm64 | |
parent | 005f78cd88494457ed38ce817f4e3fe5d372f0cb (diff) |
arm64: add support for kernel mode NEON in interrupt context
This patch modifies kernel_neon_begin() and kernel_neon_end(), so
they may be called from any context. To address the case where only
a couple of registers are needed, kernel_neon_begin_partial(u32) is
introduced which takes as a parameter the number of bottom 'n' NEON
q-registers required. To mark the end of such a partial section, the
regular kernel_neon_end() should be used.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/include/asm/fpsimd.h | 15 | ||||
-rw-r--r-- | arch/arm64/include/asm/fpsimdmacros.h | 35 | ||||
-rw-r--r-- | arch/arm64/include/asm/neon.h | 6 | ||||
-rw-r--r-- | arch/arm64/kernel/entry-fpsimd.S | 24 | ||||
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 44 |
5 files changed, 109 insertions, 15 deletions
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 7a900142dbc8..50f559f574fe 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h | |||
@@ -41,6 +41,17 @@ struct fpsimd_state { | |||
41 | unsigned int cpu; | 41 | unsigned int cpu; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | /* | ||
45 | * Struct for stacking the bottom 'n' FP/SIMD registers. | ||
46 | */ | ||
47 | struct fpsimd_partial_state { | ||
48 | u32 fpsr; | ||
49 | u32 fpcr; | ||
50 | u32 num_regs; | ||
51 | __uint128_t vregs[32]; | ||
52 | }; | ||
53 | |||
54 | |||
44 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 55 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
45 | /* Masks for extracting the FPSR and FPCR from the FPSCR */ | 56 | /* Masks for extracting the FPSR and FPCR from the FPSCR */ |
46 | #define VFP_FPSCR_STAT_MASK 0xf800009f | 57 | #define VFP_FPSCR_STAT_MASK 0xf800009f |
@@ -66,6 +77,10 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state); | |||
66 | 77 | ||
67 | extern void fpsimd_flush_task_state(struct task_struct *target); | 78 | extern void fpsimd_flush_task_state(struct task_struct *target); |
68 | 79 | ||
80 | extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state, | ||
81 | u32 num_regs); | ||
82 | extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state); | ||
83 | |||
69 | #endif | 84 | #endif |
70 | 85 | ||
71 | #endif | 86 | #endif |
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index bbec599c96bd..768414d55e64 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h | |||
@@ -62,3 +62,38 @@ | |||
62 | ldr w\tmpnr, [\state, #16 * 2 + 4] | 62 | ldr w\tmpnr, [\state, #16 * 2 + 4] |
63 | msr fpcr, x\tmpnr | 63 | msr fpcr, x\tmpnr |
64 | .endm | 64 | .endm |
65 | |||
66 | .altmacro | ||
67 | .macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 | ||
68 | mrs x\tmpnr1, fpsr | ||
69 | str w\numnr, [\state, #8] | ||
70 | mrs x\tmpnr2, fpcr | ||
71 | stp w\tmpnr1, w\tmpnr2, [\state] | ||
72 | adr x\tmpnr1, 0f | ||
73 | add \state, \state, x\numnr, lsl #4 | ||
74 | sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 | ||
75 | br x\tmpnr1 | ||
76 | .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 | ||
77 | .irp qb, %(qa + 1) | ||
78 | stp q\qa, q\qb, [\state, # -16 * \qa - 16] | ||
79 | .endr | ||
80 | .endr | ||
81 | 0: | ||
82 | .endm | ||
83 | |||
84 | .macro fpsimd_restore_partial state, tmpnr1, tmpnr2 | ||
85 | ldp w\tmpnr1, w\tmpnr2, [\state] | ||
86 | msr fpsr, x\tmpnr1 | ||
87 | msr fpcr, x\tmpnr2 | ||
88 | adr x\tmpnr1, 0f | ||
89 | ldr w\tmpnr2, [\state, #8] | ||
90 | add \state, \state, x\tmpnr2, lsl #4 | ||
91 | sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 | ||
92 | br x\tmpnr1 | ||
93 | .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 | ||
94 | .irp qb, %(qa + 1) | ||
95 | ldp q\qa, q\qb, [\state, # -16 * \qa - 16] | ||
96 | .endr | ||
97 | .endr | ||
98 | 0: | ||
99 | .endm | ||
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index b0cc58a97780..13ce4cc18e26 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h | |||
@@ -8,7 +8,11 @@ | |||
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/types.h> | ||
12 | |||
11 | #define cpu_has_neon() (1) | 13 | #define cpu_has_neon() (1) |
12 | 14 | ||
13 | void kernel_neon_begin(void); | 15 | #define kernel_neon_begin() kernel_neon_begin_partial(32) |
16 | |||
17 | void kernel_neon_begin_partial(u32 num_regs); | ||
14 | void kernel_neon_end(void); | 18 | void kernel_neon_end(void); |
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa6..d358ccacfc00 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S | |||
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) | |||
41 | fpsimd_restore x0, 8 | 41 | fpsimd_restore x0, 8 |
42 | ret | 42 | ret |
43 | ENDPROC(fpsimd_load_state) | 43 | ENDPROC(fpsimd_load_state) |
44 | |||
45 | #ifdef CONFIG_KERNEL_MODE_NEON | ||
46 | |||
47 | /* | ||
48 | * Save the bottom n FP registers. | ||
49 | * | ||
50 | * x0 - pointer to struct fpsimd_partial_state | ||
51 | */ | ||
52 | ENTRY(fpsimd_save_partial_state) | ||
53 | fpsimd_save_partial x0, 1, 8, 9 | ||
54 | ret | ||
55 | ENDPROC(fpsimd_load_partial_state) | ||
56 | |||
57 | /* | ||
58 | * Load the bottom n FP registers. | ||
59 | * | ||
60 | * x0 - pointer to struct fpsimd_partial_state | ||
61 | */ | ||
62 | ENTRY(fpsimd_load_partial_state) | ||
63 | fpsimd_restore_partial x0, 8, 9 | ||
64 | ret | ||
65 | ENDPROC(fpsimd_load_partial_state) | ||
66 | |||
67 | #endif | ||
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5ae89303c3ab..ad8aebb1cdef 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c | |||
@@ -218,29 +218,45 @@ void fpsimd_flush_task_state(struct task_struct *t) | |||
218 | 218 | ||
219 | #ifdef CONFIG_KERNEL_MODE_NEON | 219 | #ifdef CONFIG_KERNEL_MODE_NEON |
220 | 220 | ||
221 | static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); | ||
222 | static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); | ||
223 | |||
221 | /* | 224 | /* |
222 | * Kernel-side NEON support functions | 225 | * Kernel-side NEON support functions |
223 | */ | 226 | */ |
224 | void kernel_neon_begin(void) | 227 | void kernel_neon_begin_partial(u32 num_regs) |
225 | { | 228 | { |
226 | /* Avoid using the NEON in interrupt context */ | 229 | if (in_interrupt()) { |
227 | BUG_ON(in_interrupt()); | 230 | struct fpsimd_partial_state *s = this_cpu_ptr( |
228 | preempt_disable(); | 231 | in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); |
229 | 232 | ||
230 | /* | 233 | BUG_ON(num_regs > 32); |
231 | * Save the userland FPSIMD state if we have one and if we haven't done | 234 | fpsimd_save_partial_state(s, roundup(num_regs, 2)); |
232 | * so already. Clear fpsimd_last_state to indicate that there is no | 235 | } else { |
233 | * longer userland FPSIMD state in the registers. | 236 | /* |
234 | */ | 237 | * Save the userland FPSIMD state if we have one and if we |
235 | if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) | 238 | * haven't done so already. Clear fpsimd_last_state to indicate |
236 | fpsimd_save_state(¤t->thread.fpsimd_state); | 239 | * that there is no longer userland FPSIMD state in the |
237 | this_cpu_write(fpsimd_last_state, NULL); | 240 | * registers. |
241 | */ | ||
242 | preempt_disable(); | ||
243 | if (current->mm && | ||
244 | !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
245 | fpsimd_save_state(¤t->thread.fpsimd_state); | ||
246 | this_cpu_write(fpsimd_last_state, NULL); | ||
247 | } | ||
238 | } | 248 | } |
239 | EXPORT_SYMBOL(kernel_neon_begin); | 249 | EXPORT_SYMBOL(kernel_neon_begin_partial); |
240 | 250 | ||
241 | void kernel_neon_end(void) | 251 | void kernel_neon_end(void) |
242 | { | 252 | { |
243 | preempt_enable(); | 253 | if (in_interrupt()) { |
254 | struct fpsimd_partial_state *s = this_cpu_ptr( | ||
255 | in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); | ||
256 | fpsimd_load_partial_state(s); | ||
257 | } else { | ||
258 | preempt_enable(); | ||
259 | } | ||
244 | } | 260 | } |
245 | EXPORT_SYMBOL(kernel_neon_end); | 261 | EXPORT_SYMBOL(kernel_neon_end); |
246 | 262 | ||