diff options
author | Robin Getz <robin.getz@analog.com> | 2010-05-04 10:59:21 -0400 |
---|---|---|
committer | Mike Frysinger <vapier@gentoo.org> | 2010-05-22 14:19:11 -0400 |
commit | 648eee52ccdc623e21b920d6048e93490a4860a7 (patch) | |
tree | 1d106f156abb6c7a0169a392f4856cd63893e079 /arch | |
parent | c70dcabc8eba18113a4735e7b1bd09f7493e38f0 (diff) |
Blackfin: optimize strncpy a bit
Add a little strncpy optimization which can easily cut boot time by 20%.
When the kernel is booting with initramfs, it builds up the filesystem
from a cpio archive by calling strncpy_from_user() via fs/namei.c's
do_getname() on every file in the archive (which can be lots) with a
length of PATH_MAX (1024). This causes the dest of the strncpy to be
padded with many NUL bytes.
This optimization mostly causes these NUL bytes to be padded with a call
to memset() which is already optimized for filling memory quickly, but
the hardware loop helps a little bit as well.
Boot time measured with 'loglevel=0' so UART speed doesn't get in the way.
Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/blackfin/lib/memset.S | 1 | ||||
-rw-r--r-- | arch/blackfin/lib/strncpy.S | 59 |
2 files changed, 47 insertions, 13 deletions
diff --git a/arch/blackfin/lib/memset.S b/arch/blackfin/lib/memset.S index c30d99b10969..eab1bef3f5bf 100644 --- a/arch/blackfin/lib/memset.S +++ b/arch/blackfin/lib/memset.S | |||
@@ -20,6 +20,7 @@ | |||
20 | * R1 = filler byte | 20 | * R1 = filler byte |
21 | * R2 = count | 21 | * R2 = count |
22 | * Favours word aligned data. | 22 | * Favours word aligned data. |
23 | * The strncpy assumes that I0 and I1 are not used in this function | ||
23 | */ | 24 | */ |
24 | 25 | ||
25 | ENTRY(_memset) | 26 | ENTRY(_memset) |
diff --git a/arch/blackfin/lib/strncpy.S b/arch/blackfin/lib/strncpy.S index 39fbbe6523e5..f3931d50b4a7 100644 --- a/arch/blackfin/lib/strncpy.S +++ b/arch/blackfin/lib/strncpy.S | |||
@@ -5,12 +5,14 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/linkage.h> | 7 | #include <linux/linkage.h> |
8 | #include <asm/context.S> | ||
8 | 9 | ||
9 | /* void *strncpy(char *dest, const char *src, size_t n); | 10 | /* void *strncpy(char *dest, const char *src, size_t n); |
10 | * R0 = address (dest) | 11 | * R0 = address (dest) |
11 | * R1 = address (src) | 12 | * R1 = address (src) |
12 | * R2 = size | 13 | * R2 = size |
13 | * Returns a pointer to the destination string dest | 14 | * Returns a pointer (R0) to the destination string dest |
15 | * we do this by not changing R0 | ||
14 | */ | 16 | */ |
15 | 17 | ||
16 | #ifdef CONFIG_STRNCPY_L1 | 18 | #ifdef CONFIG_STRNCPY_L1 |
@@ -24,29 +26,60 @@ | |||
24 | ENTRY(_strncpy) | 26 | ENTRY(_strncpy) |
25 | CC = R2 == 0; | 27 | CC = R2 == 0; |
26 | if CC JUMP 4f; | 28 | if CC JUMP 4f; |
29 | |||
30 | P2 = R2 ; /* size */ | ||
27 | P0 = R0 ; /* dst*/ | 31 | P0 = R0 ; /* dst*/ |
28 | P1 = R1 ; /* src*/ | 32 | P1 = R1 ; /* src*/ |
29 | 33 | ||
34 | LSETUP (1f, 2f) LC0 = P2; | ||
30 | 1: | 35 | 1: |
31 | R1 = B [P1++] (Z); | 36 | R1 = B [P1++] (Z); |
32 | B [P0++] = R1; | 37 | B [P0++] = R1; |
33 | CC = R1; | 38 | CC = R1 == 0; |
34 | if ! cc jump 2f; | ||
35 | R2 += -1; | ||
36 | CC = R2 == 0; | ||
37 | if ! cc jump 1b (bp); | ||
38 | jump 4f; | ||
39 | 2: | 39 | 2: |
40 | /* if src is shorter than n, we need to null pad bytes in dest */ | 40 | if CC jump 3f; |
41 | R1 = 0; | 41 | |
42 | RTS; | ||
43 | |||
44 | /* if src is shorter than n, we need to null pad bytes in dest | ||
45 | * but, we can get here when the last byte is zero, and we don't | ||
46 | * want to copy an extra byte at the end, so we need to check | ||
47 | */ | ||
42 | 3: | 48 | 3: |
49 | R2 = LC0; | ||
50 | CC = R2 | ||
51 | if ! CC jump 6f; | ||
52 | |||
53 | /* if the required null padded portion is small, do it here, rather than | ||
54 | * handling the overhead of memset (which is OK when things are big). | ||
55 | */ | ||
56 | R3 = 0x20; | ||
57 | CC = R2 < R3; | ||
58 | IF CC jump 4f; | ||
59 | |||
43 | R2 += -1; | 60 | R2 += -1; |
44 | CC = R2 == 0; | 61 | |
45 | if cc jump 4f; | 62 | /* Set things up for memset |
46 | B [P0++] = R1; | 63 | * R0 = address |
47 | jump 3b; | 64 | * R1 = filler byte (this case it's zero, set above) |
65 | * R2 = count (set above) | ||
66 | */ | ||
67 | |||
68 | I1 = R0; | ||
69 | R0 = RETS; | ||
70 | I0 = R0; | ||
71 | R0 = P0; | ||
72 | pseudo_long_call _memset, p0; | ||
73 | R0 = I0; | ||
74 | RETS = R0; | ||
75 | R0 = I1; | ||
76 | RTS; | ||
48 | 77 | ||
49 | 4: | 78 | 4: |
79 | LSETUP(5f, 5f) LC0; | ||
80 | 5: | ||
81 | B [P0++] = R1; | ||
82 | 6: | ||
50 | RTS; | 83 | RTS; |
51 | 84 | ||
52 | ENDPROC(_strncpy) | 85 | ENDPROC(_strncpy) |