diff options
| author | Michal Simek <monstr@monstr.eu> | 2011-06-10 04:49:08 -0400 |
|---|---|---|
| committer | Michal Simek <monstr@monstr.eu> | 2011-10-14 06:24:26 -0400 |
| commit | ebe211254bfa6295f4ab0b33c7c881bdfabbab60 (patch) | |
| tree | 08345bf06ede9d07585342f1264a12a47f2a0c50 /arch | |
| parent | 782d491fc210fac03976d01071145728339b6887 (diff) | |
microblaze: Add loop unrolling for PAGE in copy_tofrom_user
Increase performance by loop unrolling.
Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/microblaze/lib/uaccess_old.S | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S index d09f2dce648d..142492ec270f 100644 --- a/arch/microblaze/lib/uaccess_old.S +++ b/arch/microblaze/lib/uaccess_old.S | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/errno.h> | 11 | #include <linux/errno.h> |
| 12 | #include <linux/linkage.h> | 12 | #include <linux/linkage.h> |
| 13 | #include <asm/page.h> | ||
| 13 | 14 | ||
| 14 | /* | 15 | /* |
| 15 | * int __strncpy_user(char *to, char *from, int len); | 16 | * int __strncpy_user(char *to, char *from, int len); |
| @@ -102,6 +103,49 @@ __strnlen_user: | |||
| 102 | .section __ex_table,"a" | 103 | .section __ex_table,"a" |
| 103 | .word 1b,4b | 104 | .word 1b,4b |
| 104 | 105 | ||
| 106 | /* Loop unrolling for __copy_tofrom_user */ | ||
| 107 | #define COPY(offset) \ | ||
| 108 | 1: lwi r4 , r6, 0x0000 + offset; \ | ||
| 109 | 2: lwi r19, r6, 0x0004 + offset; \ | ||
| 110 | 3: lwi r20, r6, 0x0008 + offset; \ | ||
| 111 | 4: lwi r21, r6, 0x000C + offset; \ | ||
| 112 | 5: lwi r22, r6, 0x0010 + offset; \ | ||
| 113 | 6: lwi r23, r6, 0x0014 + offset; \ | ||
| 114 | 7: lwi r24, r6, 0x0018 + offset; \ | ||
| 115 | 8: lwi r25, r6, 0x001C + offset; \ | ||
| 116 | 9: swi r4 , r5, 0x0000 + offset; \ | ||
| 117 | 10: swi r19, r5, 0x0004 + offset; \ | ||
| 118 | 11: swi r20, r5, 0x0008 + offset; \ | ||
| 119 | 12: swi r21, r5, 0x000C + offset; \ | ||
| 120 | 13: swi r22, r5, 0x0010 + offset; \ | ||
| 121 | 14: swi r23, r5, 0x0014 + offset; \ | ||
| 122 | 15: swi r24, r5, 0x0018 + offset; \ | ||
| 123 | 16: swi r25, r5, 0x001C + offset; \ | ||
| 124 | .section __ex_table,"a"; \ | ||
| 125 | .word 1b, 0f; \ | ||
| 126 | .word 2b, 0f; \ | ||
| 127 | .word 3b, 0f; \ | ||
| 128 | .word 4b, 0f; \ | ||
| 129 | .word 5b, 0f; \ | ||
| 130 | .word 6b, 0f; \ | ||
| 131 | .word 7b, 0f; \ | ||
| 132 | .word 8b, 0f; \ | ||
| 133 | .word 9b, 0f; \ | ||
| 134 | .word 10b, 0f; \ | ||
| 135 | .word 11b, 0f; \ | ||
| 136 | .word 12b, 0f; \ | ||
| 137 | .word 13b, 0f; \ | ||
| 138 | .word 14b, 0f; \ | ||
| 139 | .word 15b, 0f; \ | ||
| 140 | .word 16b, 0f; \ | ||
| 141 | .text | ||
| 142 | |||
| 143 | #define COPY_80(offset) \ | ||
| 144 | COPY(0x00 + offset);\ | ||
| 145 | COPY(0x20 + offset);\ | ||
| 146 | COPY(0x40 + offset);\ | ||
| 147 | COPY(0x60 + offset); | ||
| 148 | |||
| 105 | /* | 149 | /* |
| 106 | * int __copy_tofrom_user(char *to, char *from, int len) | 150 | * int __copy_tofrom_user(char *to, char *from, int len) |
| 107 | * Return: | 151 | * Return: |
| @@ -126,6 +170,10 @@ __copy_tofrom_user: | |||
| 126 | bneid r3, bu1 /* if r3 is not zero then byte copying */ | 170 | bneid r3, bu1 /* if r3 is not zero then byte copying */ |
| 127 | or r3, r0, r0 | 171 | or r3, r0, r0 |
| 128 | 172 | ||
| 173 | rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */ | ||
| 174 | beqid r3, page; | ||
| 175 | or r3, r0, r0 | ||
| 176 | |||
| 129 | w1: lw r4, r6, r3 /* at least one 4 byte copy */ | 177 | w1: lw r4, r6, r3 /* at least one 4 byte copy */ |
| 130 | w2: sw r4, r5, r3 | 178 | w2: sw r4, r5, r3 |
| 131 | addik r7, r7, -4 | 179 | addik r7, r7, -4 |
| @@ -140,6 +188,42 @@ w2: sw r4, r5, r3 | |||
| 140 | .word w2, 0f; | 188 | .word w2, 0f; |
| 141 | .text | 189 | .text |
| 142 | 190 | ||
| 191 | .align 4 /* Alignment is important to keep icache happy */ | ||
| 192 | page: /* Create room on stack and save registers for storign values */ | ||
| 193 | addik r1, r1, -32 | ||
| 194 | swi r19, r1, 4 | ||
| 195 | swi r20, r1, 8 | ||
| 196 | swi r21, r1, 12 | ||
| 197 | swi r22, r1, 16 | ||
| 198 | swi r23, r1, 20 | ||
| 199 | swi r24, r1, 24 | ||
| 200 | swi r25, r1, 28 | ||
| 201 | loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */ | ||
| 202 | /* Loop unrolling to get performance boost */ | ||
| 203 | COPY_80(0x000); | ||
| 204 | COPY_80(0x080); | ||
| 205 | COPY_80(0x100); | ||
| 206 | COPY_80(0x180); | ||
| 207 | /* copy loop */ | ||
| 208 | addik r6, r6, 0x200 | ||
| 209 | addik r7, r7, -0x200 | ||
| 210 | bneid r7, loop | ||
| 211 | addik r5, r5, 0x200 | ||
| 212 | /* Restore register content */ | ||
| 213 | lwi r19, r1, 4 | ||
| 214 | lwi r20, r1, 8 | ||
| 215 | lwi r21, r1, 12 | ||
| 216 | lwi r22, r1, 16 | ||
| 217 | lwi r23, r1, 20 | ||
| 218 | lwi r24, r1, 24 | ||
| 219 | lwi r25, r1, 28 | ||
| 220 | addik r1, r1, 32 | ||
| 221 | /* return back */ | ||
| 222 | addik r3, r7, 0 | ||
| 223 | rtsd r15, 8 | ||
| 224 | nop | ||
| 225 | |||
| 226 | .align 4 /* Alignment is important to keep icache happy */ | ||
| 143 | bu1: lbu r4,r6,r3 | 227 | bu1: lbu r4,r6,r3 |
| 144 | bu2: sb r4,r5,r3 | 228 | bu2: sb r4,r5,r3 |
| 145 | addik r7,r7,-1 | 229 | addik r7,r7,-1 |
