aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVincent Whitchurch <vincent.whitchurch@axis.com>2018-11-09 04:09:48 -0500
committerRussell King <rmk+kernel@armlinux.org.uk>2018-11-12 05:51:59 -0500
commitf441882a5229ffaef61a47bccd4518f7e2749cbc (patch)
tree551bb462ee92122edfbaf07c35b42791b651abbf
parentbc2eca9a682881f9da3cc7e2d75b752e549a134d (diff)
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS
ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged ldr/str instructions in copy_{from/to}_user. They are currently unnecessarily using single ldr/str instructions and can use ldm/stm instructions instead like memcpy does (but with appropriate fixup tables). This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by about 4% on my Cortex-A9. before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
-rw-r--r--arch/arm/include/asm/assembler.h6
-rw-r--r--arch/arm/lib/copy_from_user.S23
-rw-r--r--arch/arm/lib/copy_to_user.S27
3 files changed, 48 insertions, 8 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 88286dd483ff..28a48e0d4cca 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -243,13 +243,15 @@
243 .endm 243 .endm
244#endif 244#endif
245 245
246#define USER(x...) \ 246#define USERL(l, x...) \
2479999: x; \ 2479999: x; \
248 .pushsection __ex_table,"a"; \ 248 .pushsection __ex_table,"a"; \
249 .align 3; \ 249 .align 3; \
250 .long 9999b,9001f; \ 250 .long 9999b,l; \
251 .popsection 251 .popsection
252 252
253#define USER(x...) USERL(9001f, x)
254
253#ifdef CONFIG_SMP 255#ifdef CONFIG_SMP
254#define ALT_SMP(instr...) \ 256#define ALT_SMP(instr...) \
2559998: instr 2579998: instr
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 6709a8d33963..0d4c189c7f4f 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -34,12 +34,13 @@
34 * Number of bytes NOT copied. 34 * Number of bytes NOT copied.
35 */ 35 */
36 36
37#ifdef CONFIG_CPU_USE_DOMAINS
38
37#ifndef CONFIG_THUMB2_KERNEL 39#ifndef CONFIG_THUMB2_KERNEL
38#define LDR1W_SHIFT 0 40#define LDR1W_SHIFT 0
39#else 41#else
40#define LDR1W_SHIFT 1 42#define LDR1W_SHIFT 1
41#endif 43#endif
42#define STR1W_SHIFT 0
43 44
44 .macro ldr1w ptr reg abort 45 .macro ldr1w ptr reg abort
45 ldrusr \reg, \ptr, 4, abort=\abort 46 ldrusr \reg, \ptr, 4, abort=\abort
@@ -57,10 +58,30 @@
57 ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort 58 ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
58 .endm 59 .endm
59 60
61#else
62
63#define LDR1W_SHIFT 0
64
65 .macro ldr1w ptr reg abort
66 USERL(\abort, W(ldr) \reg, [\ptr], #4)
67 .endm
68
69 .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
70 USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4})
71 .endm
72
73 .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
74 USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
75 .endm
76
77#endif /* CONFIG_CPU_USE_DOMAINS */
78
60 .macro ldr1b ptr reg cond=al abort 79 .macro ldr1b ptr reg cond=al abort
61 ldrusr \reg, \ptr, 1, \cond, abort=\abort 80 ldrusr \reg, \ptr, 1, \cond, abort=\abort
62 .endm 81 .endm
63 82
83#define STR1W_SHIFT 0
84
64 .macro str1w ptr reg abort 85 .macro str1w ptr reg abort
65 W(str) \reg, [\ptr], #4 86 W(str) \reg, [\ptr], #4
66 .endm 87 .endm
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 970abe521197..97a6ff4b7e3c 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -35,11 +35,6 @@
35 */ 35 */
36 36
37#define LDR1W_SHIFT 0 37#define LDR1W_SHIFT 0
38#ifndef CONFIG_THUMB2_KERNEL
39#define STR1W_SHIFT 0
40#else
41#define STR1W_SHIFT 1
42#endif
43 38
44 .macro ldr1w ptr reg abort 39 .macro ldr1w ptr reg abort
45 W(ldr) \reg, [\ptr], #4 40 W(ldr) \reg, [\ptr], #4
@@ -57,6 +52,14 @@
57 ldr\cond\()b \reg, [\ptr], #1 52 ldr\cond\()b \reg, [\ptr], #1
58 .endm 53 .endm
59 54
55#ifdef CONFIG_CPU_USE_DOMAINS
56
57#ifndef CONFIG_THUMB2_KERNEL
58#define STR1W_SHIFT 0
59#else
60#define STR1W_SHIFT 1
61#endif
62
60 .macro str1w ptr reg abort 63 .macro str1w ptr reg abort
61 strusr \reg, \ptr, 4, abort=\abort 64 strusr \reg, \ptr, 4, abort=\abort
62 .endm 65 .endm
@@ -72,6 +75,20 @@
72 str1w \ptr, \reg8, \abort 75 str1w \ptr, \reg8, \abort
73 .endm 76 .endm
74 77
78#else
79
80#define STR1W_SHIFT 0
81
82 .macro str1w ptr reg abort
83 USERL(\abort, W(str) \reg, [\ptr], #4)
84 .endm
85
86 .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
87 USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
88 .endm
89
90#endif /* CONFIG_CPU_USE_DOMAINS */
91
75 .macro str1b ptr reg cond=al abort 92 .macro str1b ptr reg cond=al abort
76 strusr \reg, \ptr, 1, \cond, abort=\abort 93 strusr \reg, \ptr, 1, \cond, abort=\abort
77 .endm 94 .endm