aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAtsushi Nemoto <anemo@mba.ocn.ne.jp>2006-12-07 11:04:51 -0500
committerRalf Baechle <ralf@linux-mips.org>2006-12-08 20:03:59 -0500
commited99e2bc1dc5dc54eb5a019f4975562dbef20103 (patch)
treec8ff52ab4a29fe842e34fd94d01e74082486391d
parent773ff78838ca3c07245e45c06235e0baaa5f710a (diff)
[MIPS] Optimize csum_partial for 64bit kernel
Make csum_partial 64-bit powered. Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r--arch/mips/lib/csum_partial.S76
1 files changed, 54 insertions, 22 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index b04475d76f3c..9db357294be1 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -29,30 +29,49 @@
29#define t5 $13 29#define t5 $13
30#define t6 $14 30#define t6 $14
31#define t7 $15 31#define t7 $15
32
33#define USE_DOUBLE
32#endif 34#endif
33 35
36#ifdef USE_DOUBLE
37
38#define LOAD ld
39#define ADD daddu
40#define NBYTES 8
41
42#else
43
44#define LOAD lw
45#define ADD addu
46#define NBYTES 4
47
48#endif /* USE_DOUBLE */
49
50#define UNIT(unit) ((unit)*NBYTES)
51
34#define ADDC(sum,reg) \ 52#define ADDC(sum,reg) \
35 addu sum, reg; \ 53 ADD sum, reg; \
36 sltu v1, sum, reg; \ 54 sltu v1, sum, reg; \
37 addu sum, v1 55 ADD sum, v1
38 56
39#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 57#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
40 lw _t0, (offset + 0x00)(src); \ 58 LOAD _t0, (offset + UNIT(0))(src); \
41 lw _t1, (offset + 0x04)(src); \ 59 LOAD _t1, (offset + UNIT(1))(src); \
42 lw _t2, (offset + 0x08)(src); \ 60 LOAD _t2, (offset + UNIT(2))(src); \
43 lw _t3, (offset + 0x0c)(src); \ 61 LOAD _t3, (offset + UNIT(3))(src); \
44 ADDC(sum, _t0); \
45 ADDC(sum, _t1); \
46 ADDC(sum, _t2); \
47 ADDC(sum, _t3); \
48 lw _t0, (offset + 0x10)(src); \
49 lw _t1, (offset + 0x14)(src); \
50 lw _t2, (offset + 0x18)(src); \
51 lw _t3, (offset + 0x1c)(src); \
52 ADDC(sum, _t0); \ 62 ADDC(sum, _t0); \
53 ADDC(sum, _t1); \ 63 ADDC(sum, _t1); \
54 ADDC(sum, _t2); \ 64 ADDC(sum, _t2); \
55 ADDC(sum, _t3); \ 65 ADDC(sum, _t3)
66
67#ifdef USE_DOUBLE
68#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
69 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
70#else
71#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
72 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
73 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
74#endif
56 75
57/* 76/*
58 * a0: source address 77 * a0: source address
@@ -117,11 +136,17 @@ qword_align:
117 beqz t8, oword_align 136 beqz t8, oword_align
118 andi t8, src, 0x10 137 andi t8, src, 0x10
119 138
139#ifdef USE_DOUBLE
140 ld t0, 0x00(src)
141 LONG_SUBU a1, a1, 0x8
142 ADDC(sum, t0)
143#else
120 lw t0, 0x00(src) 144 lw t0, 0x00(src)
121 lw t1, 0x04(src) 145 lw t1, 0x04(src)
122 LONG_SUBU a1, a1, 0x8 146 LONG_SUBU a1, a1, 0x8
123 ADDC(sum, t0) 147 ADDC(sum, t0)
124 ADDC(sum, t1) 148 ADDC(sum, t1)
149#endif
125 PTR_ADDU src, src, 0x8 150 PTR_ADDU src, src, 0x8
126 andi t8, src, 0x10 151 andi t8, src, 0x10
127 152
@@ -129,14 +154,14 @@ oword_align:
129 beqz t8, begin_movement 154 beqz t8, begin_movement
130 LONG_SRL t8, a1, 0x7 155 LONG_SRL t8, a1, 0x7
131 156
132 lw t3, 0x08(src) 157#ifdef USE_DOUBLE
133 lw t4, 0x0c(src) 158 ld t0, 0x00(src)
134 lw t0, 0x00(src) 159 ld t1, 0x08(src)
135 lw t1, 0x04(src)
136 ADDC(sum, t3)
137 ADDC(sum, t4)
138 ADDC(sum, t0) 160 ADDC(sum, t0)
139 ADDC(sum, t1) 161 ADDC(sum, t1)
162#else
163 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
164#endif
140 LONG_SUBU a1, a1, 0x10 165 LONG_SUBU a1, a1, 0x10
141 PTR_ADDU src, src, 0x10 166 PTR_ADDU src, src, 0x10
142 LONG_SRL t8, a1, 0x7 167 LONG_SRL t8, a1, 0x7
@@ -219,6 +244,13 @@ small_csumcpy:
2191: ADDC(sum, t1) 2441: ADDC(sum, t1)
220 245
221 /* fold checksum */ 246 /* fold checksum */
247#ifdef USE_DOUBLE
248 dsll32 v1, sum, 0
249 daddu sum, v1
250 sltu v1, sum, v1
251 dsra32 sum, sum, 0
252 addu sum, v1
253#endif
222 sll v1, sum, 16 254 sll v1, sum, 16
223 addu sum, v1 255 addu sum, v1
224 sltu v1, sum, v1 256 sltu v1, sum, v1