diff options
-rw-r--r-- | arch/arm64/include/asm/fpsimd.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/thread_info.h | 4 | ||||
-rw-r--r-- | arch/arm64/kernel/entry.S | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 144 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/signal.c | 4 |
6 files changed, 143 insertions, 18 deletions
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index f4e524b67e91..7a900142dbc8 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h | |||
@@ -37,6 +37,8 @@ struct fpsimd_state { | |||
37 | u32 fpcr; | 37 | u32 fpcr; |
38 | }; | 38 | }; |
39 | }; | 39 | }; |
40 | /* the id of the last cpu to have restored this state */ | ||
41 | unsigned int cpu; | ||
40 | }; | 42 | }; |
41 | 43 | ||
42 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 44 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
@@ -59,8 +61,11 @@ extern void fpsimd_thread_switch(struct task_struct *next); | |||
59 | extern void fpsimd_flush_thread(void); | 61 | extern void fpsimd_flush_thread(void); |
60 | 62 | ||
61 | extern void fpsimd_preserve_current_state(void); | 63 | extern void fpsimd_preserve_current_state(void); |
64 | extern void fpsimd_restore_current_state(void); | ||
62 | extern void fpsimd_update_current_state(struct fpsimd_state *state); | 65 | extern void fpsimd_update_current_state(struct fpsimd_state *state); |
63 | 66 | ||
67 | extern void fpsimd_flush_task_state(struct task_struct *target); | ||
68 | |||
64 | #endif | 69 | #endif |
65 | 70 | ||
66 | #endif | 71 | #endif |
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 720e70b66ffd..4a1ca1cfb2f8 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h | |||
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void) | |||
100 | #define TIF_SIGPENDING 0 | 100 | #define TIF_SIGPENDING 0 |
101 | #define TIF_NEED_RESCHED 1 | 101 | #define TIF_NEED_RESCHED 1 |
102 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ | 102 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ |
103 | #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ | ||
103 | #define TIF_SYSCALL_TRACE 8 | 104 | #define TIF_SYSCALL_TRACE 8 |
104 | #define TIF_POLLING_NRFLAG 16 | 105 | #define TIF_POLLING_NRFLAG 16 |
105 | #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ | 106 | #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ |
@@ -112,10 +113,11 @@ static inline struct thread_info *current_thread_info(void) | |||
112 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | 113 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) |
113 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | 114 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) |
114 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 115 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
116 | #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) | ||
115 | #define _TIF_32BIT (1 << TIF_32BIT) | 117 | #define _TIF_32BIT (1 << TIF_32BIT) |
116 | 118 | ||
117 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | 119 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ |
118 | _TIF_NOTIFY_RESUME) | 120 | _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) |
119 | 121 | ||
120 | #endif /* __KERNEL__ */ | 122 | #endif /* __KERNEL__ */ |
121 | #endif /* __ASM_THREAD_INFO_H */ | 123 | #endif /* __ASM_THREAD_INFO_H */ |
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 39ac630d83de..80464e2fb1a5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S | |||
@@ -576,7 +576,7 @@ fast_work_pending: | |||
576 | str x0, [sp, #S_X0] // returned x0 | 576 | str x0, [sp, #S_X0] // returned x0 |
577 | work_pending: | 577 | work_pending: |
578 | tbnz x1, #TIF_NEED_RESCHED, work_resched | 578 | tbnz x1, #TIF_NEED_RESCHED, work_resched |
579 | /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ | 579 | /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ |
580 | ldr x2, [sp, #S_PSTATE] | 580 | ldr x2, [sp, #S_PSTATE] |
581 | mov x0, sp // 'regs' | 581 | mov x0, sp // 'regs' |
582 | tst x2, #PSR_MODE_MASK // user mode regs? | 582 | tst x2, #PSR_MODE_MASK // user mode regs? |
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8a97163debc7..5ae89303c3ab 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c | |||
@@ -35,6 +35,60 @@ | |||
35 | #define FPEXC_IDF (1 << 7) | 35 | #define FPEXC_IDF (1 << 7) |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * In order to reduce the number of times the FPSIMD state is needlessly saved | ||
39 | * and restored, we need to keep track of two things: | ||
40 | * (a) for each task, we need to remember which CPU was the last one to have | ||
41 | * the task's FPSIMD state loaded into its FPSIMD registers; | ||
42 | * (b) for each CPU, we need to remember which task's userland FPSIMD state has | ||
43 | * been loaded into its FPSIMD registers most recently, or whether it has | ||
44 | * been used to perform kernel mode NEON in the meantime. | ||
45 | * | ||
46 | * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to | ||
47 | * the id of the current CPU everytime the state is loaded onto a CPU. For (b), | ||
48 | * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the | ||
49 | * address of the userland FPSIMD state of the task that was loaded onto the CPU | ||
50 | * the most recently, or NULL if kernel mode NEON has been performed after that. | ||
51 | * | ||
52 | * With this in place, we no longer have to restore the next FPSIMD state right | ||
53 | * when switching between tasks. Instead, we can defer this check to userland | ||
54 | * resume, at which time we verify whether the CPU's fpsimd_last_state and the | ||
55 | * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we | ||
56 | * can omit the FPSIMD restore. | ||
57 | * | ||
58 | * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to | ||
59 | * indicate whether or not the userland FPSIMD state of the current task is | ||
60 | * present in the registers. The flag is set unless the FPSIMD registers of this | ||
61 | * CPU currently contain the most recent userland FPSIMD state of the current | ||
62 | * task. | ||
63 | * | ||
64 | * For a certain task, the sequence may look something like this: | ||
65 | * - the task gets scheduled in; if both the task's fpsimd_state.cpu field | ||
66 | * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu | ||
67 | * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is | ||
68 | * cleared, otherwise it is set; | ||
69 | * | ||
70 | * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's | ||
71 | * userland FPSIMD state is copied from memory to the registers, the task's | ||
72 | * fpsimd_state.cpu field is set to the id of the current CPU, the current | ||
73 | * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the | ||
74 | * TIF_FOREIGN_FPSTATE flag is cleared; | ||
75 | * | ||
76 | * - the task executes an ordinary syscall; upon return to userland, the | ||
77 | * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is | ||
78 | * restored; | ||
79 | * | ||
80 | * - the task executes a syscall which executes some NEON instructions; this is | ||
81 | * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD | ||
82 | * register contents to memory, clears the fpsimd_last_state per-cpu variable | ||
83 | * and sets the TIF_FOREIGN_FPSTATE flag; | ||
84 | * | ||
85 | * - the task gets preempted after kernel_neon_end() is called; as we have not | ||
86 | * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so | ||
87 | * whatever is in the FPSIMD registers is not saved to memory, but discarded. | ||
88 | */ | ||
89 | static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); | ||
90 | |||
91 | /* | ||
38 | * Trapped FP/ASIMD access. | 92 | * Trapped FP/ASIMD access. |
39 | */ | 93 | */ |
40 | void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) | 94 | void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) |
@@ -72,41 +126,96 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) | |||
72 | 126 | ||
73 | void fpsimd_thread_switch(struct task_struct *next) | 127 | void fpsimd_thread_switch(struct task_struct *next) |
74 | { | 128 | { |
75 | /* check if not kernel threads */ | 129 | /* |
76 | if (current->mm) | 130 | * Save the current FPSIMD state to memory, but only if whatever is in |
131 | * the registers is in fact the most recent userland FPSIMD state of | ||
132 | * 'current'. | ||
133 | */ | ||
134 | if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
77 | fpsimd_save_state(¤t->thread.fpsimd_state); | 135 | fpsimd_save_state(¤t->thread.fpsimd_state); |
78 | if (next->mm) | 136 | |
79 | fpsimd_load_state(&next->thread.fpsimd_state); | 137 | if (next->mm) { |
138 | /* | ||
139 | * If we are switching to a task whose most recent userland | ||
140 | * FPSIMD state is already in the registers of *this* cpu, | ||
141 | * we can skip loading the state from memory. Otherwise, set | ||
142 | * the TIF_FOREIGN_FPSTATE flag so the state will be loaded | ||
143 | * upon the next return to userland. | ||
144 | */ | ||
145 | struct fpsimd_state *st = &next->thread.fpsimd_state; | ||
146 | |||
147 | if (__this_cpu_read(fpsimd_last_state) == st | ||
148 | && st->cpu == smp_processor_id()) | ||
149 | clear_ti_thread_flag(task_thread_info(next), | ||
150 | TIF_FOREIGN_FPSTATE); | ||
151 | else | ||
152 | set_ti_thread_flag(task_thread_info(next), | ||
153 | TIF_FOREIGN_FPSTATE); | ||
154 | } | ||
80 | } | 155 | } |
81 | 156 | ||
82 | void fpsimd_flush_thread(void) | 157 | void fpsimd_flush_thread(void) |
83 | { | 158 | { |
84 | preempt_disable(); | ||
85 | memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); | 159 | memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); |
86 | fpsimd_load_state(¤t->thread.fpsimd_state); | 160 | set_thread_flag(TIF_FOREIGN_FPSTATE); |
87 | preempt_enable(); | ||
88 | } | 161 | } |
89 | 162 | ||
90 | /* | 163 | /* |
91 | * Save the userland FPSIMD state of 'current' to memory | 164 | * Save the userland FPSIMD state of 'current' to memory, but only if the state |
165 | * currently held in the registers does in fact belong to 'current' | ||
92 | */ | 166 | */ |
93 | void fpsimd_preserve_current_state(void) | 167 | void fpsimd_preserve_current_state(void) |
94 | { | 168 | { |
95 | preempt_disable(); | 169 | preempt_disable(); |
96 | fpsimd_save_state(¤t->thread.fpsimd_state); | 170 | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) |
171 | fpsimd_save_state(¤t->thread.fpsimd_state); | ||
97 | preempt_enable(); | 172 | preempt_enable(); |
98 | } | 173 | } |
99 | 174 | ||
100 | /* | 175 | /* |
101 | * Load an updated userland FPSIMD state for 'current' from memory | 176 | * Load the userland FPSIMD state of 'current' from memory, but only if the |
177 | * FPSIMD state already held in the registers is /not/ the most recent FPSIMD | ||
178 | * state of 'current' | ||
179 | */ | ||
180 | void fpsimd_restore_current_state(void) | ||
181 | { | ||
182 | preempt_disable(); | ||
183 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { | ||
184 | struct fpsimd_state *st = ¤t->thread.fpsimd_state; | ||
185 | |||
186 | fpsimd_load_state(st); | ||
187 | this_cpu_write(fpsimd_last_state, st); | ||
188 | st->cpu = smp_processor_id(); | ||
189 | } | ||
190 | preempt_enable(); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Load an updated userland FPSIMD state for 'current' from memory and set the | ||
195 | * flag that indicates that the FPSIMD register contents are the most recent | ||
196 | * FPSIMD state of 'current' | ||
102 | */ | 197 | */ |
103 | void fpsimd_update_current_state(struct fpsimd_state *state) | 198 | void fpsimd_update_current_state(struct fpsimd_state *state) |
104 | { | 199 | { |
105 | preempt_disable(); | 200 | preempt_disable(); |
106 | fpsimd_load_state(state); | 201 | fpsimd_load_state(state); |
202 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { | ||
203 | struct fpsimd_state *st = ¤t->thread.fpsimd_state; | ||
204 | |||
205 | this_cpu_write(fpsimd_last_state, st); | ||
206 | st->cpu = smp_processor_id(); | ||
207 | } | ||
107 | preempt_enable(); | 208 | preempt_enable(); |
108 | } | 209 | } |
109 | 210 | ||
211 | /* | ||
212 | * Invalidate live CPU copies of task t's FPSIMD state | ||
213 | */ | ||
214 | void fpsimd_flush_task_state(struct task_struct *t) | ||
215 | { | ||
216 | t->thread.fpsimd_state.cpu = NR_CPUS; | ||
217 | } | ||
218 | |||
110 | #ifdef CONFIG_KERNEL_MODE_NEON | 219 | #ifdef CONFIG_KERNEL_MODE_NEON |
111 | 220 | ||
112 | /* | 221 | /* |
@@ -118,16 +227,19 @@ void kernel_neon_begin(void) | |||
118 | BUG_ON(in_interrupt()); | 227 | BUG_ON(in_interrupt()); |
119 | preempt_disable(); | 228 | preempt_disable(); |
120 | 229 | ||
121 | if (current->mm) | 230 | /* |
231 | * Save the userland FPSIMD state if we have one and if we haven't done | ||
232 | * so already. Clear fpsimd_last_state to indicate that there is no | ||
233 | * longer userland FPSIMD state in the registers. | ||
234 | */ | ||
235 | if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
122 | fpsimd_save_state(¤t->thread.fpsimd_state); | 236 | fpsimd_save_state(¤t->thread.fpsimd_state); |
237 | this_cpu_write(fpsimd_last_state, NULL); | ||
123 | } | 238 | } |
124 | EXPORT_SYMBOL(kernel_neon_begin); | 239 | EXPORT_SYMBOL(kernel_neon_begin); |
125 | 240 | ||
126 | void kernel_neon_end(void) | 241 | void kernel_neon_end(void) |
127 | { | 242 | { |
128 | if (current->mm) | ||
129 | fpsimd_load_state(¤t->thread.fpsimd_state); | ||
130 | |||
131 | preempt_enable(); | 243 | preempt_enable(); |
132 | } | 244 | } |
133 | EXPORT_SYMBOL(kernel_neon_end); | 245 | EXPORT_SYMBOL(kernel_neon_end); |
@@ -140,12 +252,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, | |||
140 | { | 252 | { |
141 | switch (cmd) { | 253 | switch (cmd) { |
142 | case CPU_PM_ENTER: | 254 | case CPU_PM_ENTER: |
143 | if (current->mm) | 255 | if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) |
144 | fpsimd_save_state(¤t->thread.fpsimd_state); | 256 | fpsimd_save_state(¤t->thread.fpsimd_state); |
145 | break; | 257 | break; |
146 | case CPU_PM_EXIT: | 258 | case CPU_PM_EXIT: |
147 | if (current->mm) | 259 | if (current->mm) |
148 | fpsimd_load_state(¤t->thread.fpsimd_state); | 260 | set_thread_flag(TIF_FOREIGN_FPSTATE); |
149 | break; | 261 | break; |
150 | case CPU_PM_ENTER_FAILED: | 262 | case CPU_PM_ENTER_FAILED: |
151 | default: | 263 | default: |
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6a8928bba03c..f8700eca24e7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c | |||
@@ -517,6 +517,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, | |||
517 | return ret; | 517 | return ret; |
518 | 518 | ||
519 | target->thread.fpsimd_state.user_fpsimd = newstate; | 519 | target->thread.fpsimd_state.user_fpsimd = newstate; |
520 | fpsimd_flush_task_state(target); | ||
520 | return ret; | 521 | return ret; |
521 | } | 522 | } |
522 | 523 | ||
@@ -764,6 +765,7 @@ static int compat_vfp_set(struct task_struct *target, | |||
764 | uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; | 765 | uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; |
765 | } | 766 | } |
766 | 767 | ||
768 | fpsimd_flush_task_state(target); | ||
767 | return ret; | 769 | return ret; |
768 | } | 770 | } |
769 | 771 | ||
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 06448a77ff53..882f01774365 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c | |||
@@ -413,4 +413,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, | |||
413 | clear_thread_flag(TIF_NOTIFY_RESUME); | 413 | clear_thread_flag(TIF_NOTIFY_RESUME); |
414 | tracehook_notify_resume(regs); | 414 | tracehook_notify_resume(regs); |
415 | } | 415 | } |
416 | |||
417 | if (thread_flags & _TIF_FOREIGN_FPSTATE) | ||
418 | fpsimd_restore_current_state(); | ||
419 | |||
416 | } | 420 | } |