aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2014-05-08 05:20:23 -0400
committerArd Biesheuvel <ard.biesheuvel@linaro.org>2014-05-08 05:31:57 -0400
commit005f78cd88494457ed38ce817f4e3fe5d372f0cb (patch)
treef41fff97609ae7c98bc9841bd85b60a3aa892c08 /arch/arm64
parentc51f92693c35c141cf7d9b7e2fcbb81128324eb4 (diff)
arm64: defer reloading a task's FPSIMD state to userland resume
If a task gets scheduled out and back in again and nothing has touched its FPSIMD state in the mean time, there is really no reason to reload it from memory. Similarly, repeated calls to kernel_neon_begin() and kernel_neon_end() will preserve and restore the FPSIMD state every time. This patch defers the FPSIMD state restore to the last possible moment, i.e., right before the task returns to userland. If a task does not return to userland at all (for any reason), the existing FPSIMD state is preserved and may be reused by the owning task if it gets scheduled in again on the same CPU. This patch adds two more functions to abstract away from straight FPSIMD register file saves and restores: - fpsimd_restore_current_state -> ensure current's FPSIMD state is loaded - fpsimd_flush_task_state -> invalidate live copies of a task's FPSIMD state Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/include/asm/fpsimd.h5
-rw-r--r--arch/arm64/include/asm/thread_info.h4
-rw-r--r--arch/arm64/kernel/entry.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c144
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/signal.c4
6 files changed, 143 insertions, 18 deletions
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index f4e524b67e91..7a900142dbc8 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -37,6 +37,8 @@ struct fpsimd_state {
37 u32 fpcr; 37 u32 fpcr;
38 }; 38 };
39 }; 39 };
40 /* the id of the last cpu to have restored this state */
41 unsigned int cpu;
40}; 42};
41 43
42#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 44#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
@@ -59,8 +61,11 @@ extern void fpsimd_thread_switch(struct task_struct *next);
59extern void fpsimd_flush_thread(void); 61extern void fpsimd_flush_thread(void);
60 62
61extern void fpsimd_preserve_current_state(void); 63extern void fpsimd_preserve_current_state(void);
64extern void fpsimd_restore_current_state(void);
62extern void fpsimd_update_current_state(struct fpsimd_state *state); 65extern void fpsimd_update_current_state(struct fpsimd_state *state);
63 66
67extern void fpsimd_flush_task_state(struct task_struct *target);
68
64#endif 69#endif
65 70
66#endif 71#endif
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 720e70b66ffd..4a1ca1cfb2f8 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
100#define TIF_SIGPENDING 0 100#define TIF_SIGPENDING 0
101#define TIF_NEED_RESCHED 1 101#define TIF_NEED_RESCHED 1
102#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ 102#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
103#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
103#define TIF_SYSCALL_TRACE 8 104#define TIF_SYSCALL_TRACE 8
104#define TIF_POLLING_NRFLAG 16 105#define TIF_POLLING_NRFLAG 16
105#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ 106#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
@@ -112,10 +113,11 @@ static inline struct thread_info *current_thread_info(void)
112#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 113#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
113#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 114#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
114#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 115#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
116#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
115#define _TIF_32BIT (1 << TIF_32BIT) 117#define _TIF_32BIT (1 << TIF_32BIT)
116 118
117#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ 119#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
118 _TIF_NOTIFY_RESUME) 120 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
119 121
120#endif /* __KERNEL__ */ 122#endif /* __KERNEL__ */
121#endif /* __ASM_THREAD_INFO_H */ 123#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 39ac630d83de..80464e2fb1a5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -576,7 +576,7 @@ fast_work_pending:
576 str x0, [sp, #S_X0] // returned x0 576 str x0, [sp, #S_X0] // returned x0
577work_pending: 577work_pending:
578 tbnz x1, #TIF_NEED_RESCHED, work_resched 578 tbnz x1, #TIF_NEED_RESCHED, work_resched
579 /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ 579 /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
580 ldr x2, [sp, #S_PSTATE] 580 ldr x2, [sp, #S_PSTATE]
581 mov x0, sp // 'regs' 581 mov x0, sp // 'regs'
582 tst x2, #PSR_MODE_MASK // user mode regs? 582 tst x2, #PSR_MODE_MASK // user mode regs?
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 8a97163debc7..5ae89303c3ab 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -35,6 +35,60 @@
35#define FPEXC_IDF (1 << 7) 35#define FPEXC_IDF (1 << 7)
36 36
37/* 37/*
38 * In order to reduce the number of times the FPSIMD state is needlessly saved
39 * and restored, we need to keep track of two things:
40 * (a) for each task, we need to remember which CPU was the last one to have
41 * the task's FPSIMD state loaded into its FPSIMD registers;
42 * (b) for each CPU, we need to remember which task's userland FPSIMD state has
43 * been loaded into its FPSIMD registers most recently, or whether it has
44 * been used to perform kernel mode NEON in the meantime.
45 *
46 * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
47 * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
48 * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
49 * address of the userland FPSIMD state of the task that was loaded onto the CPU
50 * the most recently, or NULL if kernel mode NEON has been performed after that.
51 *
52 * With this in place, we no longer have to restore the next FPSIMD state right
53 * when switching between tasks. Instead, we can defer this check to userland
54 * resume, at which time we verify whether the CPU's fpsimd_last_state and the
55 * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
56 * can omit the FPSIMD restore.
57 *
58 * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
59 * indicate whether or not the userland FPSIMD state of the current task is
60 * present in the registers. The flag is set unless the FPSIMD registers of this
61 * CPU currently contain the most recent userland FPSIMD state of the current
62 * task.
63 *
64 * For a certain task, the sequence may look something like this:
65 * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
66 * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
67 * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
68 * cleared, otherwise it is set;
69 *
70 * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
71 * userland FPSIMD state is copied from memory to the registers, the task's
72 * fpsimd_state.cpu field is set to the id of the current CPU, the current
73 * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
74 * TIF_FOREIGN_FPSTATE flag is cleared;
75 *
76 * - the task executes an ordinary syscall; upon return to userland, the
77 * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
78 * restored;
79 *
80 * - the task executes a syscall which executes some NEON instructions; this is
81 * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
82 * register contents to memory, clears the fpsimd_last_state per-cpu variable
83 * and sets the TIF_FOREIGN_FPSTATE flag;
84 *
85 * - the task gets preempted after kernel_neon_end() is called; as we have not
86 * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
87 * whatever is in the FPSIMD registers is not saved to memory, but discarded.
88 */
89static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
90
91/*
38 * Trapped FP/ASIMD access. 92 * Trapped FP/ASIMD access.
39 */ 93 */
40void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) 94void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
@@ -72,41 +126,96 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
72 126
73void fpsimd_thread_switch(struct task_struct *next) 127void fpsimd_thread_switch(struct task_struct *next)
74{ 128{
75 /* check if not kernel threads */ 129 /*
76 if (current->mm) 130 * Save the current FPSIMD state to memory, but only if whatever is in
131 * the registers is in fact the most recent userland FPSIMD state of
132 * 'current'.
133 */
134 if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
77 fpsimd_save_state(&current->thread.fpsimd_state); 135 fpsimd_save_state(&current->thread.fpsimd_state);
78 if (next->mm) 136
79 fpsimd_load_state(&next->thread.fpsimd_state); 137 if (next->mm) {
138 /*
139 * If we are switching to a task whose most recent userland
140 * FPSIMD state is already in the registers of *this* cpu,
141 * we can skip loading the state from memory. Otherwise, set
142 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
143 * upon the next return to userland.
144 */
145 struct fpsimd_state *st = &next->thread.fpsimd_state;
146
147 if (__this_cpu_read(fpsimd_last_state) == st
148 && st->cpu == smp_processor_id())
149 clear_ti_thread_flag(task_thread_info(next),
150 TIF_FOREIGN_FPSTATE);
151 else
152 set_ti_thread_flag(task_thread_info(next),
153 TIF_FOREIGN_FPSTATE);
154 }
80} 155}
81 156
82void fpsimd_flush_thread(void) 157void fpsimd_flush_thread(void)
83{ 158{
84 preempt_disable();
85 memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); 159 memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
86 fpsimd_load_state(&current->thread.fpsimd_state); 160 set_thread_flag(TIF_FOREIGN_FPSTATE);
87 preempt_enable();
88} 161}
89 162
90/* 163/*
91 * Save the userland FPSIMD state of 'current' to memory 164 * Save the userland FPSIMD state of 'current' to memory, but only if the state
165 * currently held in the registers does in fact belong to 'current'
92 */ 166 */
93void fpsimd_preserve_current_state(void) 167void fpsimd_preserve_current_state(void)
94{ 168{
95 preempt_disable(); 169 preempt_disable();
96 fpsimd_save_state(&current->thread.fpsimd_state); 170 if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
171 fpsimd_save_state(&current->thread.fpsimd_state);
97 preempt_enable(); 172 preempt_enable();
98} 173}
99 174
100/* 175/*
101 * Load an updated userland FPSIMD state for 'current' from memory 176 * Load the userland FPSIMD state of 'current' from memory, but only if the
177 * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
178 * state of 'current'
179 */
180void fpsimd_restore_current_state(void)
181{
182 preempt_disable();
183 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
184 struct fpsimd_state *st = &current->thread.fpsimd_state;
185
186 fpsimd_load_state(st);
187 this_cpu_write(fpsimd_last_state, st);
188 st->cpu = smp_processor_id();
189 }
190 preempt_enable();
191}
192
193/*
194 * Load an updated userland FPSIMD state for 'current' from memory and set the
195 * flag that indicates that the FPSIMD register contents are the most recent
196 * FPSIMD state of 'current'
102 */ 197 */
103void fpsimd_update_current_state(struct fpsimd_state *state) 198void fpsimd_update_current_state(struct fpsimd_state *state)
104{ 199{
105 preempt_disable(); 200 preempt_disable();
106 fpsimd_load_state(state); 201 fpsimd_load_state(state);
202 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
203 struct fpsimd_state *st = &current->thread.fpsimd_state;
204
205 this_cpu_write(fpsimd_last_state, st);
206 st->cpu = smp_processor_id();
207 }
107 preempt_enable(); 208 preempt_enable();
108} 209}
109 210
211/*
212 * Invalidate live CPU copies of task t's FPSIMD state
213 */
214void fpsimd_flush_task_state(struct task_struct *t)
215{
216 t->thread.fpsimd_state.cpu = NR_CPUS;
217}
218
110#ifdef CONFIG_KERNEL_MODE_NEON 219#ifdef CONFIG_KERNEL_MODE_NEON
111 220
112/* 221/*
@@ -118,16 +227,19 @@ void kernel_neon_begin(void)
118 BUG_ON(in_interrupt()); 227 BUG_ON(in_interrupt());
119 preempt_disable(); 228 preempt_disable();
120 229
121 if (current->mm) 230 /*
231 * Save the userland FPSIMD state if we have one and if we haven't done
232 * so already. Clear fpsimd_last_state to indicate that there is no
233 * longer userland FPSIMD state in the registers.
234 */
235 if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
122 fpsimd_save_state(&current->thread.fpsimd_state); 236 fpsimd_save_state(&current->thread.fpsimd_state);
237 this_cpu_write(fpsimd_last_state, NULL);
123} 238}
124EXPORT_SYMBOL(kernel_neon_begin); 239EXPORT_SYMBOL(kernel_neon_begin);
125 240
126void kernel_neon_end(void) 241void kernel_neon_end(void)
127{ 242{
128 if (current->mm)
129 fpsimd_load_state(&current->thread.fpsimd_state);
130
131 preempt_enable(); 243 preempt_enable();
132} 244}
133EXPORT_SYMBOL(kernel_neon_end); 245EXPORT_SYMBOL(kernel_neon_end);
@@ -140,12 +252,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
140{ 252{
141 switch (cmd) { 253 switch (cmd) {
142 case CPU_PM_ENTER: 254 case CPU_PM_ENTER:
143 if (current->mm) 255 if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
144 fpsimd_save_state(&current->thread.fpsimd_state); 256 fpsimd_save_state(&current->thread.fpsimd_state);
145 break; 257 break;
146 case CPU_PM_EXIT: 258 case CPU_PM_EXIT:
147 if (current->mm) 259 if (current->mm)
148 fpsimd_load_state(&current->thread.fpsimd_state); 260 set_thread_flag(TIF_FOREIGN_FPSTATE);
149 break; 261 break;
150 case CPU_PM_ENTER_FAILED: 262 case CPU_PM_ENTER_FAILED:
151 default: 263 default:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6a8928bba03c..f8700eca24e7 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -517,6 +517,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
517 return ret; 517 return ret;
518 518
519 target->thread.fpsimd_state.user_fpsimd = newstate; 519 target->thread.fpsimd_state.user_fpsimd = newstate;
520 fpsimd_flush_task_state(target);
520 return ret; 521 return ret;
521} 522}
522 523
@@ -764,6 +765,7 @@ static int compat_vfp_set(struct task_struct *target,
764 uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; 765 uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
765 } 766 }
766 767
768 fpsimd_flush_task_state(target);
767 return ret; 769 return ret;
768} 770}
769 771
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 06448a77ff53..882f01774365 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -413,4 +413,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
413 clear_thread_flag(TIF_NOTIFY_RESUME); 413 clear_thread_flag(TIF_NOTIFY_RESUME);
414 tracehook_notify_resume(regs); 414 tracehook_notify_resume(regs);
415 } 415 }
416
417 if (thread_flags & _TIF_FOREIGN_FPSTATE)
418 fpsimd_restore_current_state();
419
416} 420}