aboutsummaryrefslogtreecommitdiffstats
path: root/arch/microblaze/lib
diff options
context:
space:
mode:
authorMichal Simek <monstr@monstr.eu>2011-06-10 04:49:08 -0400
committerMichal Simek <monstr@monstr.eu>2011-10-14 06:24:26 -0400
commitebe211254bfa6295f4ab0b33c7c881bdfabbab60 (patch)
tree08345bf06ede9d07585342f1264a12a47f2a0c50 /arch/microblaze/lib
parent782d491fc210fac03976d01071145728339b6887 (diff)
microblaze: Add loop unrolling for PAGE in copy_tofrom_user
Increase performance by loop unrolling. Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch/microblaze/lib')
-rw-r--r--arch/microblaze/lib/uaccess_old.S84
1 files changed, 84 insertions, 0 deletions
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
index d09f2dce648d..142492ec270f 100644
--- a/arch/microblaze/lib/uaccess_old.S
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -10,6 +10,7 @@
10 10
11#include <linux/errno.h> 11#include <linux/errno.h>
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <asm/page.h>
13 14
14/* 15/*
15 * int __strncpy_user(char *to, char *from, int len); 16 * int __strncpy_user(char *to, char *from, int len);
@@ -102,6 +103,49 @@ __strnlen_user:
102 .section __ex_table,"a" 103 .section __ex_table,"a"
103 .word 1b,4b 104 .word 1b,4b
104 105
106/* Loop unrolling for __copy_tofrom_user */
107#define COPY(offset) \
1081: lwi r4 , r6, 0x0000 + offset; \
1092: lwi r19, r6, 0x0004 + offset; \
1103: lwi r20, r6, 0x0008 + offset; \
1114: lwi r21, r6, 0x000C + offset; \
1125: lwi r22, r6, 0x0010 + offset; \
1136: lwi r23, r6, 0x0014 + offset; \
1147: lwi r24, r6, 0x0018 + offset; \
1158: lwi r25, r6, 0x001C + offset; \
1169: swi r4 , r5, 0x0000 + offset; \
11710: swi r19, r5, 0x0004 + offset; \
11811: swi r20, r5, 0x0008 + offset; \
11912: swi r21, r5, 0x000C + offset; \
12013: swi r22, r5, 0x0010 + offset; \
12114: swi r23, r5, 0x0014 + offset; \
12215: swi r24, r5, 0x0018 + offset; \
12316: swi r25, r5, 0x001C + offset; \
124 .section __ex_table,"a"; \
125 .word 1b, 0f; \
126 .word 2b, 0f; \
127 .word 3b, 0f; \
128 .word 4b, 0f; \
129 .word 5b, 0f; \
130 .word 6b, 0f; \
131 .word 7b, 0f; \
132 .word 8b, 0f; \
133 .word 9b, 0f; \
134 .word 10b, 0f; \
135 .word 11b, 0f; \
136 .word 12b, 0f; \
137 .word 13b, 0f; \
138 .word 14b, 0f; \
139 .word 15b, 0f; \
140 .word 16b, 0f; \
141 .text
142
143#define COPY_80(offset) \
144 COPY(0x00 + offset);\
145 COPY(0x20 + offset);\
146 COPY(0x40 + offset);\
147 COPY(0x60 + offset);
148
105/* 149/*
106 * int __copy_tofrom_user(char *to, char *from, int len) 150 * int __copy_tofrom_user(char *to, char *from, int len)
107 * Return: 151 * Return:
@@ -126,6 +170,10 @@ __copy_tofrom_user:
126 bneid r3, bu1 /* if r3 is not zero then byte copying */ 170 bneid r3, bu1 /* if r3 is not zero then byte copying */
127 or r3, r0, r0 171 or r3, r0, r0
128 172
173 rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
174 beqid r3, page;
175 or r3, r0, r0
176
129w1: lw r4, r6, r3 /* at least one 4 byte copy */ 177w1: lw r4, r6, r3 /* at least one 4 byte copy */
130w2: sw r4, r5, r3 178w2: sw r4, r5, r3
131 addik r7, r7, -4 179 addik r7, r7, -4
@@ -140,6 +188,42 @@ w2: sw r4, r5, r3
140 .word w2, 0f; 188 .word w2, 0f;
141 .text 189 .text
142 190
191.align 4 /* Alignment is important to keep icache happy */
192page: /* Create room on stack and save registers for storign values */
193 addik r1, r1, -32
194 swi r19, r1, 4
195 swi r20, r1, 8
196 swi r21, r1, 12
197 swi r22, r1, 16
198 swi r23, r1, 20
199 swi r24, r1, 24
200 swi r25, r1, 28
201loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
202 /* Loop unrolling to get performance boost */
203 COPY_80(0x000);
204 COPY_80(0x080);
205 COPY_80(0x100);
206 COPY_80(0x180);
207 /* copy loop */
208 addik r6, r6, 0x200
209 addik r7, r7, -0x200
210 bneid r7, loop
211 addik r5, r5, 0x200
212 /* Restore register content */
213 lwi r19, r1, 4
214 lwi r20, r1, 8
215 lwi r21, r1, 12
216 lwi r22, r1, 16
217 lwi r23, r1, 20
218 lwi r24, r1, 24
219 lwi r25, r1, 28
220 addik r1, r1, 32
221 /* return back */
222 addik r3, r7, 0
223 rtsd r15, 8
224 nop
225
226.align 4 /* Alignment is important to keep icache happy */
143bu1: lbu r4,r6,r3 227bu1: lbu r4,r6,r3
144bu2: sb r4,r5,r3 228bu2: sb r4,r5,r3
145 addik r7,r7,-1 229 addik r7,r7,-1