aboutsummaryrefslogtreecommitdiffstats
path: root/arch/score/lib/checksum.S
diff options
context:
space:
mode:
authorChen Liqin <liqin.chen@sunplusct.com>2009-06-12 10:01:00 -0400
committerArnd Bergmann <arnd@arndb.de>2009-06-19 05:38:47 -0400
commit6bc9a3966f0395419b09b2ec90f89f7f00341b37 (patch)
tree9c0d9d5376020266f5602501c8376d4a4f13142d /arch/score/lib/checksum.S
parent0732f87761dbe417cb6e084b712d07e879e876ef (diff)
score: Add support for Sunplus S+core architecture
This is the complete set of new arch Score's files for linux. Score instruction set support 16bits, 32bits and 64bits instruction, Score SOC had been used in game machine and LCD TV. Signed-off-by: Chen Liqin <liqin.chen@sunplusct.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'arch/score/lib/checksum.S')
-rw-r--r--arch/score/lib/checksum.S255
1 files changed, 255 insertions, 0 deletions
diff --git a/arch/score/lib/checksum.S b/arch/score/lib/checksum.S
new file mode 100644
index 000000000000..706157edc7d5
--- /dev/null
+++ b/arch/score/lib/checksum.S
@@ -0,0 +1,255 @@
1/*
2 * arch/score/lib/csum_partial.S
3 *
4 * Score Processor version.
5 *
6 * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
7 * Lennox Wu <lennox.wu@sunplusct.com>
8 * Chen Liqin <liqin.chen@sunplusct.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, see the file COPYING, or write
22 * to the Free Software Foundation, Inc.,
23 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25#include <linux/linkage.h>
26
27#define ADDC(sum,reg) \
28 add sum, sum, reg; \
29 cmp.c reg, sum; \
30 bleu 9f; \
31 addi sum, 0x1; \
329:
33
34#define CSUM_BIGCHUNK(src, offset, sum) \
35 lw r8, [src, offset + 0x00]; \
36 lw r9, [src, offset + 0x04]; \
37 lw r10, [src, offset + 0x08]; \
38 lw r11, [src, offset + 0x0c]; \
39 ADDC(sum, r8); \
40 ADDC(sum, r9); \
41 ADDC(sum, r10); \
42 ADDC(sum, r11); \
43 lw r8, [src, offset + 0x10]; \
44 lw r9, [src, offset + 0x14]; \
45 lw r10, [src, offset + 0x18]; \
46 lw r11, [src, offset + 0x1c]; \
47 ADDC(sum, r8); \
48 ADDC(sum, r9); \
49 ADDC(sum, r10); \
50 ADDC(sum, r11); \
51
52#define src r4
53#define dest r5
54#define sum r27
55
56 .text
57/* unknown src alignment and < 8 bytes to go */
58small_csumcpy:
59 mv r5, r10
60 ldi r9, 0x0
61 cmpi.c r25, 0x1
62 beq pass_small_set_t7 /*already set, jump to pass_small_set_t7*/
63 andri.c r25,r4 , 0x1 /*Is src 2 bytes aligned?*/
64
65pass_small_set_t7:
66 beq aligned
67 cmpi.c r5, 0x0
68 beq fold
69 lbu r9, [src]
70 slli r9,r9, 0x8 /*Little endian*/
71 ADDC(sum, r9)
72 addi src, 0x1
73 subi.c r5, 0x1
74
75 /*len still a full word */
76aligned:
77 andri.c r8, r5, 0x4 /*Len >= 4?*/
78 beq len_less_4bytes
79
80 /* Still a full word (4byte) to go,and the src is word aligned.*/
81 andri.c r8, src, 0x3 /*src is 4bytes aligned, so use LW!!*/
82 beq four_byte_aligned
83 lhu r9, [src]
84 addi src, 2
85 ADDC(sum, r9)
86 lhu r9, [src]
87 addi src, 2
88 ADDC(sum, r9)
89 b len_less_4bytes
90
91four_byte_aligned: /* Len >=4 and four byte aligned */
92 lw r9, [src]
93 addi src, 4
94 ADDC(sum, r9)
95
96len_less_4bytes: /* 2 byte aligned aligned and length<4B */
97 andri.c r8, r5, 0x2
98 beq len_less_2bytes
99 lhu r9, [src]
100 addi src, 0x2 /* src+=2 */
101 ADDC(sum, r9)
102
103len_less_2bytes: /* len = 1 */
104 andri.c r8, r5, 0x1
105 beq fold /* less than 2 and not equal 1--> len=0 -> fold */
106 lbu r9, [src]
107
108fold_ADDC:
109 ADDC(sum, r9)
110fold:
111 /* fold checksum */
112 slli r26, sum, 16
113 add sum, sum, r26
114 cmp.c r26, sum
115 srli sum, sum, 16
116 bleu 1f /* if r26<=sum */
117 addi sum, 0x1 /* r26>sum */
1181:
119 /* odd buffer alignment? r25 was set in csum_partial */
120 cmpi.c r25, 0x0
121 beq 1f
122 slli r26, sum, 8
123 srli sum, sum, 8
124 or sum, sum, r26
125 andi sum, 0xffff
1261:
127 .set optimize
128 /* Add the passed partial csum. */
129 ADDC(sum, r6)
130 mv r4, sum
131 br r3
132 .set volatile
133
134 .align 5
135ENTRY(csum_partial)
136 ldi sum, 0
137 ldi r25, 0
138 mv r10, r5
139 cmpi.c r5, 0x8
140 blt small_csumcpy /* < 8(singed) bytes to copy */
141 cmpi.c r5, 0x0
142 beq out
143 andri.c r25, src, 0x1 /* odd buffer? */
144
145 beq word_align
146hword_align: /* 1 byte */
147 lbu r8, [src]
148 subi r5, 0x1
149 slli r8, r8, 8
150 ADDC(sum, r8)
151 addi src, 0x1
152
153word_align: /* 2 bytes */
154 andri.c r8, src, 0x2 /* 4bytes(dword)_aligned? */
155 beq dword_align /* not, maybe dword_align */
156 lhu r8, [src]
157 subi r5, 0x2
158 ADDC(sum, r8)
159 addi src, 0x2
160
161dword_align: /* 4bytes */
162 mv r26, r5 /* maybe useless when len >=56 */
163 ldi r8, 56
164 cmp.c r8, r5
165 bgtu do_end_words /* if a1(len)<t0(56) ,unsigned */
166 andri.c r26, src, 0x4
167 beq qword_align
168 lw r8, [src]
169 subi r5, 0x4
170 ADDC(sum, r8)
171 addi src, 0x4
172
173qword_align: /* 8 bytes */
174 andri.c r26, src, 0x8
175 beq oword_align
176 lw r8, [src, 0x0]
177 lw r9, [src, 0x4]
178 subi r5, 0x8 /* len-=0x8 */
179 ADDC(sum, r8)
180 ADDC(sum, r9)
181 addi src, 0x8
182
183oword_align: /* 16bytes */
184 andri.c r26, src, 0x10
185 beq begin_movement
186 lw r10, [src, 0x08]
187 lw r11, [src, 0x0c]
188 lw r8, [src, 0x00]
189 lw r9, [src, 0x04]
190 ADDC(sum, r10)
191 ADDC(sum, r11)
192 ADDC(sum, r8)
193 ADDC(sum, r9)
194 subi r5, 0x10
195 addi src, 0x10
196
197begin_movement:
198 srli.c r26, r5, 0x7 /* len>=128? */
199 beq 1f /* len<128 */
200
201/* r26 is the result that computed in oword_align */
202move_128bytes:
203 CSUM_BIGCHUNK(src, 0x00, sum)
204 CSUM_BIGCHUNK(src, 0x20, sum)
205 CSUM_BIGCHUNK(src, 0x40, sum)
206 CSUM_BIGCHUNK(src, 0x60, sum)
207 subi.c r26, 0x01 /* r26 equals len/128 */
208 addi src, 0x80
209 bne move_128bytes
210
2111: /* len<128,we process 64byte here */
212 andri.c r10, r5, 0x40
213 beq 1f
214
215move_64bytes:
216 CSUM_BIGCHUNK(src, 0x00, sum)
217 CSUM_BIGCHUNK(src, 0x20, sum)
218 addi src, 0x40
219
2201: /* len<64 */
221 andri r26, r5, 0x1c /* 0x1c=28 */
222 andri.c r10, r5, 0x20
223 beq do_end_words /* decided by andri */
224
225move_32bytes:
226 CSUM_BIGCHUNK(src, 0x00, sum)
227 andri r26, r5, 0x1c
228 addri src, src, 0x20
229
230do_end_words: /* len<32 */
231 /* r26 was set already in dword_align */
232 cmpi.c r26, 0x0
233 beq maybe_end_cruft /* len<28 or len<56 */
234 srli r26, r26, 0x2
235
236end_words:
237 lw r8, [src]
238 subi.c r26, 0x1 /* unit is 4 byte */
239 ADDC(sum, r8)
240 addi src, 0x4
241 cmpi.c r26, 0x0
242 bne end_words /* r26!=0 */
243
244maybe_end_cruft: /* len<4 */
245 andri r10, r5, 0x3
246
247small_memcpy:
248 mv r5, r10
249 j small_csumcpy
250
251out:
252 mv r4, sum
253 br r3
254
255END(csum_partial)