diff options
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/entry.S | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 144 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/signal.c | 4 |
4 files changed, 135 insertions, 17 deletions
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 39ac630d83de..80464e2fb1a5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S | |||
@@ -576,7 +576,7 @@ fast_work_pending: | |||
576 | str x0, [sp, #S_X0] // returned x0 | 576 | str x0, [sp, #S_X0] // returned x0 |
577 | work_pending: | 577 | work_pending: |
578 | tbnz x1, #TIF_NEED_RESCHED, work_resched | 578 | tbnz x1, #TIF_NEED_RESCHED, work_resched |
579 | /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ | 579 | /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ |
580 | ldr x2, [sp, #S_PSTATE] | 580 | ldr x2, [sp, #S_PSTATE] |
581 | mov x0, sp // 'regs' | 581 | mov x0, sp // 'regs' |
582 | tst x2, #PSR_MODE_MASK // user mode regs? | 582 | tst x2, #PSR_MODE_MASK // user mode regs? |
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8a97163debc7..5ae89303c3ab 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c | |||
@@ -35,6 +35,60 @@ | |||
35 | #define FPEXC_IDF (1 << 7) | 35 | #define FPEXC_IDF (1 << 7) |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * In order to reduce the number of times the FPSIMD state is needlessly saved | ||
39 | * and restored, we need to keep track of two things: | ||
40 | * (a) for each task, we need to remember which CPU was the last one to have | ||
41 | * the task's FPSIMD state loaded into its FPSIMD registers; | ||
42 | * (b) for each CPU, we need to remember which task's userland FPSIMD state has | ||
43 | * been loaded into its FPSIMD registers most recently, or whether it has | ||
44 | * been used to perform kernel mode NEON in the meantime. | ||
45 | * | ||
46 | * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to | ||
47 | * the id of the current CPU everytime the state is loaded onto a CPU. For (b), | ||
48 | * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the | ||
49 | * address of the userland FPSIMD state of the task that was loaded onto the CPU | ||
50 | * the most recently, or NULL if kernel mode NEON has been performed after that. | ||
51 | * | ||
52 | * With this in place, we no longer have to restore the next FPSIMD state right | ||
53 | * when switching between tasks. Instead, we can defer this check to userland | ||
54 | * resume, at which time we verify whether the CPU's fpsimd_last_state and the | ||
55 | * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we | ||
56 | * can omit the FPSIMD restore. | ||
57 | * | ||
58 | * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to | ||
59 | * indicate whether or not the userland FPSIMD state of the current task is | ||
60 | * present in the registers. The flag is set unless the FPSIMD registers of this | ||
61 | * CPU currently contain the most recent userland FPSIMD state of the current | ||
62 | * task. | ||
63 | * | ||
64 | * For a certain task, the sequence may look something like this: | ||
65 | * - the task gets scheduled in; if both the task's fpsimd_state.cpu field | ||
66 | * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu | ||
67 | * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is | ||
68 | * cleared, otherwise it is set; | ||
69 | * | ||
70 | * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's | ||
71 | * userland FPSIMD state is copied from memory to the registers, the task's | ||
72 | * fpsimd_state.cpu field is set to the id of the current CPU, the current | ||
73 | * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the | ||
74 | * TIF_FOREIGN_FPSTATE flag is cleared; | ||
75 | * | ||
76 | * - the task executes an ordinary syscall; upon return to userland, the | ||
77 | * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is | ||
78 | * restored; | ||
79 | * | ||
80 | * - the task executes a syscall which executes some NEON instructions; this is | ||
81 | * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD | ||
82 | * register contents to memory, clears the fpsimd_last_state per-cpu variable | ||
83 | * and sets the TIF_FOREIGN_FPSTATE flag; | ||
84 | * | ||
85 | * - the task gets preempted after kernel_neon_end() is called; as we have not | ||
86 | * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so | ||
87 | * whatever is in the FPSIMD registers is not saved to memory, but discarded. | ||
88 | */ | ||
89 | static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); | ||
90 | |||
91 | /* | ||
38 | * Trapped FP/ASIMD access. | 92 | * Trapped FP/ASIMD access. |
39 | */ | 93 | */ |
40 | void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) | 94 | void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) |
@@ -72,41 +126,96 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) | |||
72 | 126 | ||
73 | void fpsimd_thread_switch(struct task_struct *next) | 127 | void fpsimd_thread_switch(struct task_struct *next) |
74 | { | 128 | { |
75 | /* check if not kernel threads */ | 129 | /* |
76 | if (current->mm) | 130 | * Save the current FPSIMD state to memory, but only if whatever is in |
131 | * the registers is in fact the most recent userland FPSIMD state of | ||
132 | * 'current'. | ||
133 | */ | ||
134 | if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
77 | fpsimd_save_state(¤t->thread.fpsimd_state); | 135 | fpsimd_save_state(¤t->thread.fpsimd_state); |
78 | if (next->mm) | 136 | |
79 | fpsimd_load_state(&next->thread.fpsimd_state); | 137 | if (next->mm) { |
138 | /* | ||
139 | * If we are switching to a task whose most recent userland | ||
140 | * FPSIMD state is already in the registers of *this* cpu, | ||
141 | * we can skip loading the state from memory. Otherwise, set | ||
142 | * the TIF_FOREIGN_FPSTATE flag so the state will be loaded | ||
143 | * upon the next return to userland. | ||
144 | */ | ||
145 | struct fpsimd_state *st = &next->thread.fpsimd_state; | ||
146 | |||
147 | if (__this_cpu_read(fpsimd_last_state) == st | ||
148 | && st->cpu == smp_processor_id()) | ||
149 | clear_ti_thread_flag(task_thread_info(next), | ||
150 | TIF_FOREIGN_FPSTATE); | ||
151 | else | ||
152 | set_ti_thread_flag(task_thread_info(next), | ||
153 | TIF_FOREIGN_FPSTATE); | ||
154 | } | ||
80 | } | 155 | } |
81 | 156 | ||
82 | void fpsimd_flush_thread(void) | 157 | void fpsimd_flush_thread(void) |
83 | { | 158 | { |
84 | preempt_disable(); | ||
85 | memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); | 159 | memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); |
86 | fpsimd_load_state(¤t->thread.fpsimd_state); | 160 | set_thread_flag(TIF_FOREIGN_FPSTATE); |
87 | preempt_enable(); | ||
88 | } | 161 | } |
89 | 162 | ||
90 | /* | 163 | /* |
91 | * Save the userland FPSIMD state of 'current' to memory | 164 | * Save the userland FPSIMD state of 'current' to memory, but only if the state |
165 | * currently held in the registers does in fact belong to 'current' | ||
92 | */ | 166 | */ |
93 | void fpsimd_preserve_current_state(void) | 167 | void fpsimd_preserve_current_state(void) |
94 | { | 168 | { |
95 | preempt_disable(); | 169 | preempt_disable(); |
96 | fpsimd_save_state(¤t->thread.fpsimd_state); | 170 | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) |
171 | fpsimd_save_state(¤t->thread.fpsimd_state); | ||
97 | preempt_enable(); | 172 | preempt_enable(); |
98 | } | 173 | } |
99 | 174 | ||
100 | /* | 175 | /* |
101 | * Load an updated userland FPSIMD state for 'current' from memory | 176 | * Load the userland FPSIMD state of 'current' from memory, but only if the |
177 | * FPSIMD state already held in the registers is /not/ the most recent FPSIMD | ||
178 | * state of 'current' | ||
179 | */ | ||
180 | void fpsimd_restore_current_state(void) | ||
181 | { | ||
182 | preempt_disable(); | ||
183 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { | ||
184 | struct fpsimd_state *st = ¤t->thread.fpsimd_state; | ||
185 | |||
186 | fpsimd_load_state(st); | ||
187 | this_cpu_write(fpsimd_last_state, st); | ||
188 | st->cpu = smp_processor_id(); | ||
189 | } | ||
190 | preempt_enable(); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Load an updated userland FPSIMD state for 'current' from memory and set the | ||
195 | * flag that indicates that the FPSIMD register contents are the most recent | ||
196 | * FPSIMD state of 'current' | ||
102 | */ | 197 | */ |
103 | void fpsimd_update_current_state(struct fpsimd_state *state) | 198 | void fpsimd_update_current_state(struct fpsimd_state *state) |
104 | { | 199 | { |
105 | preempt_disable(); | 200 | preempt_disable(); |
106 | fpsimd_load_state(state); | 201 | fpsimd_load_state(state); |
202 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { | ||
203 | struct fpsimd_state *st = ¤t->thread.fpsimd_state; | ||
204 | |||
205 | this_cpu_write(fpsimd_last_state, st); | ||
206 | st->cpu = smp_processor_id(); | ||
207 | } | ||
107 | preempt_enable(); | 208 | preempt_enable(); |
108 | } | 209 | } |
109 | 210 | ||
211 | /* | ||
212 | * Invalidate live CPU copies of task t's FPSIMD state | ||
213 | */ | ||
214 | void fpsimd_flush_task_state(struct task_struct *t) | ||
215 | { | ||
216 | t->thread.fpsimd_state.cpu = NR_CPUS; | ||
217 | } | ||
218 | |||
110 | #ifdef CONFIG_KERNEL_MODE_NEON | 219 | #ifdef CONFIG_KERNEL_MODE_NEON |
111 | 220 | ||
112 | /* | 221 | /* |
@@ -118,16 +227,19 @@ void kernel_neon_begin(void) | |||
118 | BUG_ON(in_interrupt()); | 227 | BUG_ON(in_interrupt()); |
119 | preempt_disable(); | 228 | preempt_disable(); |
120 | 229 | ||
121 | if (current->mm) | 230 | /* |
231 | * Save the userland FPSIMD state if we have one and if we haven't done | ||
232 | * so already. Clear fpsimd_last_state to indicate that there is no | ||
233 | * longer userland FPSIMD state in the registers. | ||
234 | */ | ||
235 | if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) | ||
122 | fpsimd_save_state(¤t->thread.fpsimd_state); | 236 | fpsimd_save_state(¤t->thread.fpsimd_state); |
237 | this_cpu_write(fpsimd_last_state, NULL); | ||
123 | } | 238 | } |
124 | EXPORT_SYMBOL(kernel_neon_begin); | 239 | EXPORT_SYMBOL(kernel_neon_begin); |
125 | 240 | ||
126 | void kernel_neon_end(void) | 241 | void kernel_neon_end(void) |
127 | { | 242 | { |
128 | if (current->mm) | ||
129 | fpsimd_load_state(¤t->thread.fpsimd_state); | ||
130 | |||
131 | preempt_enable(); | 243 | preempt_enable(); |
132 | } | 244 | } |
133 | EXPORT_SYMBOL(kernel_neon_end); | 245 | EXPORT_SYMBOL(kernel_neon_end); |
@@ -140,12 +252,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, | |||
140 | { | 252 | { |
141 | switch (cmd) { | 253 | switch (cmd) { |
142 | case CPU_PM_ENTER: | 254 | case CPU_PM_ENTER: |
143 | if (current->mm) | 255 | if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) |
144 | fpsimd_save_state(¤t->thread.fpsimd_state); | 256 | fpsimd_save_state(¤t->thread.fpsimd_state); |
145 | break; | 257 | break; |
146 | case CPU_PM_EXIT: | 258 | case CPU_PM_EXIT: |
147 | if (current->mm) | 259 | if (current->mm) |
148 | fpsimd_load_state(¤t->thread.fpsimd_state); | 260 | set_thread_flag(TIF_FOREIGN_FPSTATE); |
149 | break; | 261 | break; |
150 | case CPU_PM_ENTER_FAILED: | 262 | case CPU_PM_ENTER_FAILED: |
151 | default: | 263 | default: |
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6a8928bba03c..f8700eca24e7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c | |||
@@ -517,6 +517,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, | |||
517 | return ret; | 517 | return ret; |
518 | 518 | ||
519 | target->thread.fpsimd_state.user_fpsimd = newstate; | 519 | target->thread.fpsimd_state.user_fpsimd = newstate; |
520 | fpsimd_flush_task_state(target); | ||
520 | return ret; | 521 | return ret; |
521 | } | 522 | } |
522 | 523 | ||
@@ -764,6 +765,7 @@ static int compat_vfp_set(struct task_struct *target, | |||
764 | uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; | 765 | uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; |
765 | } | 766 | } |
766 | 767 | ||
768 | fpsimd_flush_task_state(target); | ||
767 | return ret; | 769 | return ret; |
768 | } | 770 | } |
769 | 771 | ||
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 06448a77ff53..882f01774365 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c | |||
@@ -413,4 +413,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, | |||
413 | clear_thread_flag(TIF_NOTIFY_RESUME); | 413 | clear_thread_flag(TIF_NOTIFY_RESUME); |
414 | tracehook_notify_resume(regs); | 414 | tracehook_notify_resume(regs); |
415 | } | 415 | } |
416 | |||
417 | if (thread_flags & _TIF_FOREIGN_FPSTATE) | ||
418 | fpsimd_restore_current_state(); | ||
419 | |||
416 | } | 420 | } |