aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2012-04-15 16:56:45 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-04-30 01:37:17 -0400
commit35000870fcfbb28757ad47de77b4645072d916b8 (patch)
tree92e9fe4570f7239e514b1254090cc595827f0dff
parent8cd3c23df79411f6b24ddb7d2ed58d26e3b06815 (diff)
powerpc: Optimise enable_kernel_altivec
Add two optimisations to enable_kernel_altivec: - enable_kernel_altivec has already determined if we need to save the previous task's state but we call giveup_altivec in both cases, requiring an extra branch in giveup_altivec. Create giveup_altivec_notask which only turns on the VMX bit in the MSR. - We write the VMX MSR bit each time we call enable_kernel_altivec even it was already set. Check the bit and branch out if we have already set it. The classic case for this is vectored IO where we have to copy multiple buffers to or from userspace. The following testcase was used to confirm this patch improves performance: http://ozlabs.org/~anton/junkcode/copy_to_user.c Since the current breakpoint for using VMX in copy_tofrom_user is 4096 bytes, I'm using buffers of 4096 + 1 cacheline (4224) bytes. A benchmark of 16 entry readvs (-s 16): time copy_to_user -l 4224 -s 16 -i 1000000 completes 5.2% faster on a POWER7 PS700. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/switch_to.h1
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/vector.S10
3 files changed, 12 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 2824609319c..1a6320290d2 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -40,6 +40,7 @@ static inline void discard_lazy_cpu_state(void)
40#ifdef CONFIG_ALTIVEC 40#ifdef CONFIG_ALTIVEC
41extern void flush_altivec_to_thread(struct task_struct *); 41extern void flush_altivec_to_thread(struct task_struct *);
42extern void giveup_altivec(struct task_struct *); 42extern void giveup_altivec(struct task_struct *);
43extern void giveup_altivec_notask(void);
43#else 44#else
44static inline void flush_altivec_to_thread(struct task_struct *t) 45static inline void flush_altivec_to_thread(struct task_struct *t)
45{ 46{
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4937c969009..bb58f41fc04 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -124,7 +124,7 @@ void enable_kernel_altivec(void)
124 if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) 124 if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
125 giveup_altivec(current); 125 giveup_altivec(current);
126 else 126 else
127 giveup_altivec(NULL); /* just enable AltiVec for kernel - force */ 127 giveup_altivec_notask();
128#else 128#else
129 giveup_altivec(last_task_used_altivec); 129 giveup_altivec(last_task_used_altivec);
130#endif /* CONFIG_SMP */ 130#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 4d5a3edff49..e830289d2e4 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -89,6 +89,16 @@ _GLOBAL(load_up_altivec)
89 /* restore registers and return */ 89 /* restore registers and return */
90 blr 90 blr
91 91
92_GLOBAL(giveup_altivec_notask)
93 mfmsr r3
94 andis. r4,r3,MSR_VEC@h
95 bnelr /* Already enabled? */
96 oris r3,r3,MSR_VEC@h
97 SYNC
98 MTMSRD(r3) /* enable use of VMX now */
99 isync
100 blr
101
92/* 102/*
93 * giveup_altivec(tsk) 103 * giveup_altivec(tsk)
94 * Disable VMX for the task given as the argument, 104 * Disable VMX for the task given as the argument,