diff options
author | Vincent Whitchurch <vincent.whitchurch@axis.com> | 2018-11-09 04:09:48 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@armlinux.org.uk> | 2018-11-12 05:51:59 -0500 |
commit | f441882a5229ffaef61a47bccd4518f7e2749cbc (patch) | |
tree | 551bb462ee92122edfbaf07c35b42791b651abbf | |
parent | bc2eca9a682881f9da3cc7e2d75b752e549a134d (diff) |
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS
ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged
ldr/str instructions in copy_{from/to}_user. They are currently
unnecessarily using single ldr/str instructions and can use ldm/stm
instructions instead like memcpy does (but with appropriate fixup
tables).
This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by
about 4% on my Cortex-A9.
before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s
before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s
before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s
before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s
before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s
before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s
before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s
before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s
after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s
after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s
after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s
after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s
after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s
after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s
after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s
after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s
after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s
after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
-rw-r--r-- | arch/arm/include/asm/assembler.h | 6 | ||||
-rw-r--r-- | arch/arm/lib/copy_from_user.S | 23 | ||||
-rw-r--r-- | arch/arm/lib/copy_to_user.S | 27 |
3 files changed, 48 insertions, 8 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 88286dd483ff..28a48e0d4cca 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h | |||
@@ -243,13 +243,15 @@ | |||
243 | .endm | 243 | .endm |
244 | #endif | 244 | #endif |
245 | 245 | ||
246 | #define USER(x...) \ | 246 | #define USERL(l, x...) \ |
247 | 9999: x; \ | 247 | 9999: x; \ |
248 | .pushsection __ex_table,"a"; \ | 248 | .pushsection __ex_table,"a"; \ |
249 | .align 3; \ | 249 | .align 3; \ |
250 | .long 9999b,9001f; \ | 250 | .long 9999b,l; \ |
251 | .popsection | 251 | .popsection |
252 | 252 | ||
253 | #define USER(x...) USERL(9001f, x) | ||
254 | |||
253 | #ifdef CONFIG_SMP | 255 | #ifdef CONFIG_SMP |
254 | #define ALT_SMP(instr...) \ | 256 | #define ALT_SMP(instr...) \ |
255 | 9998: instr | 257 | 9998: instr |
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 6709a8d33963..0d4c189c7f4f 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S | |||
@@ -34,12 +34,13 @@ | |||
34 | * Number of bytes NOT copied. | 34 | * Number of bytes NOT copied. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #ifdef CONFIG_CPU_USE_DOMAINS | ||
38 | |||
37 | #ifndef CONFIG_THUMB2_KERNEL | 39 | #ifndef CONFIG_THUMB2_KERNEL |
38 | #define LDR1W_SHIFT 0 | 40 | #define LDR1W_SHIFT 0 |
39 | #else | 41 | #else |
40 | #define LDR1W_SHIFT 1 | 42 | #define LDR1W_SHIFT 1 |
41 | #endif | 43 | #endif |
42 | #define STR1W_SHIFT 0 | ||
43 | 44 | ||
44 | .macro ldr1w ptr reg abort | 45 | .macro ldr1w ptr reg abort |
45 | ldrusr \reg, \ptr, 4, abort=\abort | 46 | ldrusr \reg, \ptr, 4, abort=\abort |
@@ -57,10 +58,30 @@ | |||
57 | ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort | 58 | ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort |
58 | .endm | 59 | .endm |
59 | 60 | ||
61 | #else | ||
62 | |||
63 | #define LDR1W_SHIFT 0 | ||
64 | |||
65 | .macro ldr1w ptr reg abort | ||
66 | USERL(\abort, W(ldr) \reg, [\ptr], #4) | ||
67 | .endm | ||
68 | |||
69 | .macro ldr4w ptr reg1 reg2 reg3 reg4 abort | ||
70 | USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}) | ||
71 | .endm | ||
72 | |||
73 | .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort | ||
74 | USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}) | ||
75 | .endm | ||
76 | |||
77 | #endif /* CONFIG_CPU_USE_DOMAINS */ | ||
78 | |||
60 | .macro ldr1b ptr reg cond=al abort | 79 | .macro ldr1b ptr reg cond=al abort |
61 | ldrusr \reg, \ptr, 1, \cond, abort=\abort | 80 | ldrusr \reg, \ptr, 1, \cond, abort=\abort |
62 | .endm | 81 | .endm |
63 | 82 | ||
83 | #define STR1W_SHIFT 0 | ||
84 | |||
64 | .macro str1w ptr reg abort | 85 | .macro str1w ptr reg abort |
65 | W(str) \reg, [\ptr], #4 | 86 | W(str) \reg, [\ptr], #4 |
66 | .endm | 87 | .endm |
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 970abe521197..97a6ff4b7e3c 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S | |||
@@ -35,11 +35,6 @@ | |||
35 | */ | 35 | */ |
36 | 36 | ||
37 | #define LDR1W_SHIFT 0 | 37 | #define LDR1W_SHIFT 0 |
38 | #ifndef CONFIG_THUMB2_KERNEL | ||
39 | #define STR1W_SHIFT 0 | ||
40 | #else | ||
41 | #define STR1W_SHIFT 1 | ||
42 | #endif | ||
43 | 38 | ||
44 | .macro ldr1w ptr reg abort | 39 | .macro ldr1w ptr reg abort |
45 | W(ldr) \reg, [\ptr], #4 | 40 | W(ldr) \reg, [\ptr], #4 |
@@ -57,6 +52,14 @@ | |||
57 | ldr\cond\()b \reg, [\ptr], #1 | 52 | ldr\cond\()b \reg, [\ptr], #1 |
58 | .endm | 53 | .endm |
59 | 54 | ||
55 | #ifdef CONFIG_CPU_USE_DOMAINS | ||
56 | |||
57 | #ifndef CONFIG_THUMB2_KERNEL | ||
58 | #define STR1W_SHIFT 0 | ||
59 | #else | ||
60 | #define STR1W_SHIFT 1 | ||
61 | #endif | ||
62 | |||
60 | .macro str1w ptr reg abort | 63 | .macro str1w ptr reg abort |
61 | strusr \reg, \ptr, 4, abort=\abort | 64 | strusr \reg, \ptr, 4, abort=\abort |
62 | .endm | 65 | .endm |
@@ -72,6 +75,20 @@ | |||
72 | str1w \ptr, \reg8, \abort | 75 | str1w \ptr, \reg8, \abort |
73 | .endm | 76 | .endm |
74 | 77 | ||
78 | #else | ||
79 | |||
80 | #define STR1W_SHIFT 0 | ||
81 | |||
82 | .macro str1w ptr reg abort | ||
83 | USERL(\abort, W(str) \reg, [\ptr], #4) | ||
84 | .endm | ||
85 | |||
86 | .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort | ||
87 | USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}) | ||
88 | .endm | ||
89 | |||
90 | #endif /* CONFIG_CPU_USE_DOMAINS */ | ||
91 | |||
75 | .macro str1b ptr reg cond=al abort | 92 | .macro str1b ptr reg cond=al abort |
76 | strusr \reg, \ptr, 1, \cond, abort=\abort | 93 | strusr \reg, \ptr, 1, \cond, abort=\abort |
77 | .endm | 94 | .endm |