diff options
| author | Steven J. Hill <Steven.Hill@imgtec.com> | 2013-03-25 14:40:49 -0400 |
|---|---|---|
| committer | Ralf Baechle <ralf@linux-mips.org> | 2013-05-09 11:55:19 -0400 |
| commit | 26c5e07d1478021914801c8c7dd77c9268940e4f (patch) | |
| tree | a0594bbc2c6a5881e07381e4bfc15e6b71cc901d /arch/mips/lib | |
| parent | bce860833ab1e40113eb0efab34d0c8f3b0226b1 (diff) | |
MIPS: microMIPS: Optimise 'memset' core library function.
Optimise 'memset' to use microMIPS instructions and/or optimisations
for binary size reduction. When the microMIPS ISA is not being used,
the library function compiles to the original binary code.
Signed-off-by: Steven J. Hill <Steven.Hill@imgtec.com>
Diffstat (limited to 'arch/mips/lib')
| -rw-r--r-- | arch/mips/lib/memset.S | 84 |
1 files changed, 54 insertions, 30 deletions
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 053d3b0b0317..0580194e7402 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | * | 5 | * |
| 6 | * Copyright (C) 1998, 1999, 2000 by Ralf Baechle | 6 | * Copyright (C) 1998, 1999, 2000 by Ralf Baechle |
| 7 | * Copyright (C) 1999, 2000 Silicon Graphics, Inc. | 7 | * Copyright (C) 1999, 2000 Silicon Graphics, Inc. |
| 8 | * Copyright (C) 2007 Maciej W. Rozycki | 8 | * Copyright (C) 2007 by Maciej W. Rozycki |
| 9 | * Copyright (C) 2011, 2012 MIPS Technologies, Inc. | ||
| 9 | */ | 10 | */ |
| 10 | #include <asm/asm.h> | 11 | #include <asm/asm.h> |
| 11 | #include <asm/asm-offsets.h> | 12 | #include <asm/asm-offsets.h> |
| @@ -19,6 +20,20 @@ | |||
| 19 | #define LONG_S_R sdr | 20 | #define LONG_S_R sdr |
| 20 | #endif | 21 | #endif |
| 21 | 22 | ||
| 23 | #ifdef CONFIG_CPU_MICROMIPS | ||
| 24 | #define STORSIZE (LONGSIZE * 2) | ||
| 25 | #define STORMASK (STORSIZE - 1) | ||
| 26 | #define FILL64RG t8 | ||
| 27 | #define FILLPTRG t7 | ||
| 28 | #undef LONG_S | ||
| 29 | #define LONG_S LONG_SP | ||
| 30 | #else | ||
| 31 | #define STORSIZE LONGSIZE | ||
| 32 | #define STORMASK LONGMASK | ||
| 33 | #define FILL64RG a1 | ||
| 34 | #define FILLPTRG t0 | ||
| 35 | #endif | ||
| 36 | |||
| 22 | #define EX(insn,reg,addr,handler) \ | 37 | #define EX(insn,reg,addr,handler) \ |
| 23 | 9: insn reg, addr; \ | 38 | 9: insn reg, addr; \ |
| 24 | .section __ex_table,"a"; \ | 39 | .section __ex_table,"a"; \ |
| @@ -26,23 +41,25 @@ | |||
| 26 | .previous | 41 | .previous |
| 27 | 42 | ||
| 28 | .macro f_fill64 dst, offset, val, fixup | 43 | .macro f_fill64 dst, offset, val, fixup |
| 29 | EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) | 44 | EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup) |
| 30 | EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) | 45 | EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup) |
| 31 | EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) | 46 | EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup) |
| 32 | EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) | 47 | EX(LONG_S, \val, (\offset + 3 * STORSIZE)(\dst), \fixup) |
| 33 | EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) | 48 | #if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS)) |
| 34 | EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) | 49 | EX(LONG_S, \val, (\offset + 4 * STORSIZE)(\dst), \fixup) |
| 35 | EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) | 50 | EX(LONG_S, \val, (\offset + 5 * STORSIZE)(\dst), \fixup) |
| 36 | EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) | 51 | EX(LONG_S, \val, (\offset + 6 * STORSIZE)(\dst), \fixup) |
| 37 | #if LONGSIZE == 4 | 52 | EX(LONG_S, \val, (\offset + 7 * STORSIZE)(\dst), \fixup) |
| 38 | EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup) | 53 | #endif |
| 39 | EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup) | 54 | #if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) |
| 40 | EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) | 55 | EX(LONG_S, \val, (\offset + 8 * STORSIZE)(\dst), \fixup) |
| 41 | EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) | 56 | EX(LONG_S, \val, (\offset + 9 * STORSIZE)(\dst), \fixup) |
| 42 | EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) | 57 | EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup) |
| 43 | EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) | 58 | EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup) |
| 44 | EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) | 59 | EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup) |
| 45 | EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) | 60 | EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup) |
| 61 | EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup) | ||
| 62 | EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup) | ||
| 46 | #endif | 63 | #endif |
| 47 | .endm | 64 | .endm |
| 48 | 65 | ||
| @@ -71,16 +88,20 @@ LEAF(memset) | |||
| 71 | 1: | 88 | 1: |
| 72 | 89 | ||
| 73 | FEXPORT(__bzero) | 90 | FEXPORT(__bzero) |
| 74 | sltiu t0, a2, LONGSIZE /* very small region? */ | 91 | sltiu t0, a2, STORSIZE /* very small region? */ |
| 75 | bnez t0, .Lsmall_memset | 92 | bnez t0, .Lsmall_memset |
| 76 | andi t0, a0, LONGMASK /* aligned? */ | 93 | andi t0, a0, STORMASK /* aligned? */ |
| 77 | 94 | ||
| 95 | #ifdef CONFIG_CPU_MICROMIPS | ||
| 96 | move t8, a1 /* used by 'swp' instruction */ | ||
| 97 | move t9, a1 | ||
| 98 | #endif | ||
| 78 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | 99 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS |
| 79 | beqz t0, 1f | 100 | beqz t0, 1f |
| 80 | PTR_SUBU t0, LONGSIZE /* alignment in bytes */ | 101 | PTR_SUBU t0, STORSIZE /* alignment in bytes */ |
| 81 | #else | 102 | #else |
| 82 | .set noat | 103 | .set noat |
| 83 | li AT, LONGSIZE | 104 | li AT, STORSIZE |
| 84 | beqz t0, 1f | 105 | beqz t0, 1f |
| 85 | PTR_SUBU t0, AT /* alignment in bytes */ | 106 | PTR_SUBU t0, AT /* alignment in bytes */ |
| 86 | .set at | 107 | .set at |
| @@ -99,24 +120,27 @@ FEXPORT(__bzero) | |||
| 99 | 1: ori t1, a2, 0x3f /* # of full blocks */ | 120 | 1: ori t1, a2, 0x3f /* # of full blocks */ |
| 100 | xori t1, 0x3f | 121 | xori t1, 0x3f |
| 101 | beqz t1, .Lmemset_partial /* no block to fill */ | 122 | beqz t1, .Lmemset_partial /* no block to fill */ |
| 102 | andi t0, a2, 0x40-LONGSIZE | 123 | andi t0, a2, 0x40-STORSIZE |
| 103 | 124 | ||
| 104 | PTR_ADDU t1, a0 /* end address */ | 125 | PTR_ADDU t1, a0 /* end address */ |
| 105 | .set reorder | 126 | .set reorder |
| 106 | 1: PTR_ADDIU a0, 64 | 127 | 1: PTR_ADDIU a0, 64 |
| 107 | R10KCBARRIER(0(ra)) | 128 | R10KCBARRIER(0(ra)) |
| 108 | f_fill64 a0, -64, a1, .Lfwd_fixup | 129 | f_fill64 a0, -64, FILL64RG, .Lfwd_fixup |
| 109 | bne t1, a0, 1b | 130 | bne t1, a0, 1b |
| 110 | .set noreorder | 131 | .set noreorder |
| 111 | 132 | ||
| 112 | .Lmemset_partial: | 133 | .Lmemset_partial: |
| 113 | R10KCBARRIER(0(ra)) | 134 | R10KCBARRIER(0(ra)) |
| 114 | PTR_LA t1, 2f /* where to start */ | 135 | PTR_LA t1, 2f /* where to start */ |
| 136 | #ifdef CONFIG_CPU_MICROMIPS | ||
| 137 | LONG_SRL t7, t0, 1 | ||
| 138 | #endif | ||
| 115 | #if LONGSIZE == 4 | 139 | #if LONGSIZE == 4 |
| 116 | PTR_SUBU t1, t0 | 140 | PTR_SUBU t1, FILLPTRG |
| 117 | #else | 141 | #else |
| 118 | .set noat | 142 | .set noat |
| 119 | LONG_SRL AT, t0, 1 | 143 | LONG_SRL AT, FILLPTRG, 1 |
| 120 | PTR_SUBU t1, AT | 144 | PTR_SUBU t1, AT |
| 121 | .set at | 145 | .set at |
| 122 | #endif | 146 | #endif |
| @@ -126,9 +150,9 @@ FEXPORT(__bzero) | |||
| 126 | .set push | 150 | .set push |
| 127 | .set noreorder | 151 | .set noreorder |
| 128 | .set nomacro | 152 | .set nomacro |
| 129 | f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ | 153 | f_fill64 a0, -64, FILL64RG, .Lpartial_fixup /* ... but first do longs ... */ |
| 130 | 2: .set pop | 154 | 2: .set pop |
| 131 | andi a2, LONGMASK /* At most one long to go */ | 155 | andi a2, STORMASK /* At most one long to go */ |
| 132 | 156 | ||
| 133 | beqz a2, 1f | 157 | beqz a2, 1f |
| 134 | PTR_ADDU a0, a2 /* What's left */ | 158 | PTR_ADDU a0, a2 /* What's left */ |
| @@ -169,7 +193,7 @@ FEXPORT(__bzero) | |||
| 169 | 193 | ||
| 170 | .Lpartial_fixup: | 194 | .Lpartial_fixup: |
| 171 | PTR_L t0, TI_TASK($28) | 195 | PTR_L t0, TI_TASK($28) |
| 172 | andi a2, LONGMASK | 196 | andi a2, STORMASK |
| 173 | LONG_L t0, THREAD_BUADDR(t0) | 197 | LONG_L t0, THREAD_BUADDR(t0) |
| 174 | LONG_ADDU a2, t1 | 198 | LONG_ADDU a2, t1 |
| 175 | jr ra | 199 | jr ra |
| @@ -177,4 +201,4 @@ FEXPORT(__bzero) | |||
| 177 | 201 | ||
| 178 | .Llast_fixup: | 202 | .Llast_fixup: |
| 179 | jr ra | 203 | jr ra |
| 180 | andi v1, a2, LONGMASK | 204 | andi v1, a2, STORMASK |
