i387: re-introduce FPU state preloading at context switch time

commit 34ddc81a230b15c0e345b6b253049db731499f7e upstream. After all the FPU state cleanups and finally finding the problem that caused all our FPU save/restore problems, this re-introduces the preloading of FPU state that was removed in commit b3b0870ef3ff ("i387: do not preload FPU state at task switch time"). However, instead of simply reverting the removal, this reimplements preloading with several fixes, most notably - properly abstracted as a true FPU state switch, rather than as open-coded save and restore with various hacks. In particular, implementing it as a proper FPU state switch allows us to optimize the CR0.TS flag accesses: there is no reason to set the TS bit only to then almost immediately clear it again. CR0 accesses are quite slow and expensive, don't flip the bit back and forth for no good reason. - Make sure that the same model works for both x86-32 and x86-64, so that there are no gratuitous differences between the two due to the way they save and restore segment state differently due to architectural differences that really don't matter to the FPU state. - Avoid exposing the "preload" state to the context switch routines, and in particular allow the concept of lazy state restore: if nothing else has used the FPU in the meantime, and the process is still on the same CPU, we can avoid restoring state from memory entirely, just re-expose the state that is still in the FPU unit. That optimized lazy restore isn't actually implemented here, but the infrastructure is set up for it. Of course, older CPU's that use 'fnsave' to save the state cannot take advantage of this, since the state saving also trashes the state. In other words, there is now an actual _design_ to the FPU state saving, rather than just random historical baggage. Hopefully it's easier to follow as a result. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-02-18 15:56:35 -0500
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2012-02-29 19:34:26 -0500
commit: f4def3f88dc57648d1603656f1ffdf498bfce1ee (patch)
tree: a277f40828d1123325362c4d5823454de5e63e88 /arch/x86/kernel
parent: 0a9d89d976531bd5ea7fce618cee886c79b43e07 (diff)
3 files changed, 40 insertions, 25 deletions
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 74aa377081f..fcdb1b34aa1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -293,10 +293,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
                                 *next = &next_p->thread;
        int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(init_tss, cpu);
+        fpu_switch_t fpu;
        /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
-        __unlazy_fpu(prev_p);
+        fpu = switch_fpu_prepare(prev_p, next_p);
        /*
         * Reload esp0.
@@ -351,6 +352,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        if (prev->gs | next->gs)
                lazy_load_gs(next->gs);
+        switch_fpu_finish(next_p, fpu);
        percpu_write(current_task, next_p);
        return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index edb791c02c8..b01898d2744 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -377,8 +377,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(init_tss, cpu);
        unsigned fsindex, gsindex;
+        fpu_switch_t fpu;
-        __unlazy_fpu(prev_p);
+        fpu = switch_fpu_prepare(prev_p, next_p);
        /*
         * Reload esp0, LDT and the page table pointer:
@@ -448,6 +449,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
                wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
        prev->gsindex = gsindex;
+        switch_fpu_finish(next_p, fpu);
        /*
         * Switch the PDA and FPU contexts.
         */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5622d4e115d..1b26e01047b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -717,6 +717,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 }
 /*
+ * This gets called with the process already owning the
+ * FPU state, and with CR0.TS cleared. It just needs to
+ * restore the FPU register state.
+ */
+void __math_state_restore(struct task_struct *tsk)
+{
+        /* We need a safe address that is cheap to find and that is already
+           in L1. We've just brought in "tsk->thread.has_fpu", so use that */
+#define safe_address (tsk->thread.has_fpu)
+        /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+           is pending.  Clear the x87 state here by setting it to fixed
+           values. safe_address is a random variable that should be in L1 */
+        alternative_input(
+                ASM_NOP8 ASM_NOP2,
+                "emms\n\t"              /* clear stack tags */
+                "fildl %P[addr]",       /* set F?P to defined value */
+                X86_FEATURE_FXSAVE_LEAK,
+                [addr] "m" (safe_address));
+        /*
+         * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+         */
+        if (unlikely(restore_fpu_checking(tsk))) {
+                __thread_fpu_end(tsk);
+                force_sig(SIGSEGV, tsk);
+                return;
+        }
+}
+/*
 * 'math_state_restore()' saves the current math information in the
 * old math state array, and gets the new ones from the current task
 *
@@ -730,10 +761,6 @@ void math_state_restore(void)
 {
        struct task_struct *tsk = current;
-        /* We need a safe address that is cheap to find and that is already
-           in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
-#define safe_address (tsk->thread.has_fpu)
        if (!tsk_used_math(tsk)) {
                local_irq_enable();
                /*
@@ -750,25 +777,7 @@ void math_state_restore(void)
        }
        __thread_fpu_begin(tsk);
+        __math_state_restore(tsk);
-        /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
-           is pending.  Clear the x87 state here by setting it to fixed
-           values. safe_address is a random variable that should be in L1 */
-        alternative_input(
-                ASM_NOP8 ASM_NOP2,
-                "emms\n\t"              /* clear stack tags */
-                "fildl %P[addr]",       /* set F?P to defined value */
-                X86_FEATURE_FXSAVE_LEAK,
-                [addr] "m" (safe_address));
-        /*
-         * Paranoid restore. send a SIGSEGV if we fail to restore the state.
-         */
-        if (unlikely(restore_fpu_checking(tsk))) {
-                __thread_fpu_end(tsk);
-                force_sig(SIGSEGV, tsk);
-                return;
-        }
        tsk->fpu_counter++;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-02-18 15:56:35 -0500
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2012-02-29 19:34:26 -0500
commit	f4def3f88dc57648d1603656f1ffdf498bfce1ee (patch)
tree	a277f40828d1123325362c4d5823454de5e63e88 /arch/x86/kernel
parent	0a9d89d976531bd5ea7fce618cee886c79b43e07 (diff)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 74aa377081f..fcdb1b34aa1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c
@@ -293,10 +293,11 @@ __switch_to(struct task_struct prev_p, struct task_struct next_p)
293	*next = &next_p->thread;	293	*next = &next_p->thread;
294	int cpu = smp_processor_id();	294	int cpu = smp_processor_id();
295	struct tss_struct *tss = &per_cpu(init_tss, cpu);	295	struct tss_struct *tss = &per_cpu(init_tss, cpu);
		296	fpu_switch_t fpu;
296		297
297	/* never put a printk in __switch_to... printk() calls wake_up() indirectly /	298	/* never put a printk in __switch_to... printk() calls wake_up() indirectly /
298		299
299	__unlazy_fpu(prev_p);	300	fpu = switch_fpu_prepare(prev_p, next_p);
300		301
301	/*	302	/*
302	* Reload esp0.	303	* Reload esp0.
@@ -351,6 +352,8 @@ __switch_to(struct task_struct prev_p, struct task_struct next_p)
351	if (prev->gs \| next->gs)	352	if (prev->gs \| next->gs)
352	lazy_load_gs(next->gs);	353	lazy_load_gs(next->gs);
353		354
		355	switch_fpu_finish(next_p, fpu);
		356
354	percpu_write(current_task, next_p);	357	percpu_write(current_task, next_p);
355		358
356	return prev_p;	359	return prev_p;


diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index edb791c02c8..b01898d2744 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c
@@ -377,8 +377,9 @@ __switch_to(struct task_struct prev_p, struct task_struct next_p)
377	int cpu = smp_processor_id();	377	int cpu = smp_processor_id();
378	struct tss_struct *tss = &per_cpu(init_tss, cpu);	378	struct tss_struct *tss = &per_cpu(init_tss, cpu);
379	unsigned fsindex, gsindex;	379	unsigned fsindex, gsindex;
		380	fpu_switch_t fpu;
380		381
381	__unlazy_fpu(prev_p);	382	fpu = switch_fpu_prepare(prev_p, next_p);
382		383
383	/*	384	/*
384	* Reload esp0, LDT and the page table pointer:	385	* Reload esp0, LDT and the page table pointer:
@@ -448,6 +449,8 @@ __switch_to(struct task_struct prev_p, struct task_struct next_p)
448	wrmsrl(MSR_KERNEL_GS_BASE, next->gs);	449	wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
449	prev->gsindex = gsindex;	450	prev->gsindex = gsindex;
450		451
		452	switch_fpu_finish(next_p, fpu);
		453
451	/*	454	/*
452	* Switch the PDA and FPU contexts.	455	* Switch the PDA and FPU contexts.
453	*/	456	*/


diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5622d4e115d..1b26e01047b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c
@@ -717,6 +717,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
717	}	717	}
718		718
719	/*	719	/*
		720	* This gets called with the process already owning the
		721	* FPU state, and with CR0.TS cleared. It just needs to
		722	* restore the FPU register state.
		723	*/
		724	void __math_state_restore(struct task_struct *tsk)
		725	{
		726	/* We need a safe address that is cheap to find and that is already
		727	in L1. We've just brought in "tsk->thread.has_fpu", so use that */
		728	#define safe_address (tsk->thread.has_fpu)
		729
		730	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
		731	is pending. Clear the x87 state here by setting it to fixed
		732	values. safe_address is a random variable that should be in L1 */
		733	alternative_input(
		734	ASM_NOP8 ASM_NOP2,
		735	"emms\n\t" /* clear stack tags */
		736	"fildl %P[addr]", /* set F?P to defined value */
		737	X86_FEATURE_FXSAVE_LEAK,
		738	[addr] "m" (safe_address));
		739
		740	/*
		741	* Paranoid restore. send a SIGSEGV if we fail to restore the state.
		742	*/
		743	if (unlikely(restore_fpu_checking(tsk))) {
		744	__thread_fpu_end(tsk);
		745	force_sig(SIGSEGV, tsk);
		746	return;
		747	}
		748	}
		749
		750	/*
720	* 'math_state_restore()' saves the current math information in the	751	* 'math_state_restore()' saves the current math information in the
721	* old math state array, and gets the new ones from the current task	752	* old math state array, and gets the new ones from the current task
722	*	753	*
@@ -730,10 +761,6 @@ void math_state_restore(void)
730	{	761	{
731	struct task_struct *tsk = current;	762	struct task_struct *tsk = current;
732		763
733	/* We need a safe address that is cheap to find and that is already
734	in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
735	#define safe_address (tsk->thread.has_fpu)
736
737	if (!tsk_used_math(tsk)) {	764	if (!tsk_used_math(tsk)) {
738	local_irq_enable();	765	local_irq_enable();
739	/*	766	/*
@@ -750,25 +777,7 @@ void math_state_restore(void)
750	}	777	}
751		778
752	__thread_fpu_begin(tsk);	779	__thread_fpu_begin(tsk);
753		780	__math_state_restore(tsk);
754	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
755	is pending. Clear the x87 state here by setting it to fixed
756	values. safe_address is a random variable that should be in L1 */
757	alternative_input(
758	ASM_NOP8 ASM_NOP2,
759	"emms\n\t" /* clear stack tags */
760	"fildl %P[addr]", /* set F?P to defined value */
761	X86_FEATURE_FXSAVE_LEAK,
762	[addr] "m" (safe_address));
763
764	/*
765	* Paranoid restore. send a SIGSEGV if we fail to restore the state.
766	*/
767	if (unlikely(restore_fpu_checking(tsk))) {
768	__thread_fpu_end(tsk);
769	force_sig(SIGSEGV, tsk);
770	return;
771	}
772		781
773	tsk->fpu_counter++;	782	tsk->fpu_counter++;
774	}	783	}