diff options
Diffstat (limited to 'arch/score/lib/checksum.S')
-rw-r--r-- | arch/score/lib/checksum.S | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/arch/score/lib/checksum.S b/arch/score/lib/checksum.S new file mode 100644 index 000000000000..706157edc7d5 --- /dev/null +++ b/arch/score/lib/checksum.S | |||
@@ -0,0 +1,255 @@ | |||
1 | /* | ||
2 | * arch/score/lib/csum_partial.S | ||
3 | * | ||
4 | * Score Processor version. | ||
5 | * | ||
6 | * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. | ||
7 | * Lennox Wu <lennox.wu@sunplusct.com> | ||
8 | * Chen Liqin <liqin.chen@sunplusct.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, see the file COPYING, or write | ||
22 | * to the Free Software Foundation, Inc., | ||
23 | * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
24 | */ | ||
25 | #include <linux/linkage.h> | ||
26 | |||
27 | #define ADDC(sum,reg) \ | ||
28 | add sum, sum, reg; \ | ||
29 | cmp.c reg, sum; \ | ||
30 | bleu 9f; \ | ||
31 | addi sum, 0x1; \ | ||
32 | 9: | ||
33 | |||
34 | #define CSUM_BIGCHUNK(src, offset, sum) \ | ||
35 | lw r8, [src, offset + 0x00]; \ | ||
36 | lw r9, [src, offset + 0x04]; \ | ||
37 | lw r10, [src, offset + 0x08]; \ | ||
38 | lw r11, [src, offset + 0x0c]; \ | ||
39 | ADDC(sum, r8); \ | ||
40 | ADDC(sum, r9); \ | ||
41 | ADDC(sum, r10); \ | ||
42 | ADDC(sum, r11); \ | ||
43 | lw r8, [src, offset + 0x10]; \ | ||
44 | lw r9, [src, offset + 0x14]; \ | ||
45 | lw r10, [src, offset + 0x18]; \ | ||
46 | lw r11, [src, offset + 0x1c]; \ | ||
47 | ADDC(sum, r8); \ | ||
48 | ADDC(sum, r9); \ | ||
49 | ADDC(sum, r10); \ | ||
50 | ADDC(sum, r11); \ | ||
51 | |||
52 | #define src r4 | ||
53 | #define dest r5 | ||
54 | #define sum r27 | ||
55 | |||
56 | .text | ||
57 | /* unknown src alignment and < 8 bytes to go */ | ||
58 | small_csumcpy: | ||
59 | mv r5, r10 | ||
60 | ldi r9, 0x0 | ||
61 | cmpi.c r25, 0x1 | ||
62 | beq pass_small_set_t7 /*already set, jump to pass_small_set_t7*/ | ||
63 | andri.c r25,r4 , 0x1 /*Is src 2 bytes aligned?*/ | ||
64 | |||
65 | pass_small_set_t7: | ||
66 | beq aligned | ||
67 | cmpi.c r5, 0x0 | ||
68 | beq fold | ||
69 | lbu r9, [src] | ||
70 | slli r9,r9, 0x8 /*Little endian*/ | ||
71 | ADDC(sum, r9) | ||
72 | addi src, 0x1 | ||
73 | subi.c r5, 0x1 | ||
74 | |||
75 | /*len still a full word */ | ||
76 | aligned: | ||
77 | andri.c r8, r5, 0x4 /*Len >= 4?*/ | ||
78 | beq len_less_4bytes | ||
79 | |||
80 | /* Still a full word (4byte) to go,and the src is word aligned.*/ | ||
81 | andri.c r8, src, 0x3 /*src is 4bytes aligned, so use LW!!*/ | ||
82 | beq four_byte_aligned | ||
83 | lhu r9, [src] | ||
84 | addi src, 2 | ||
85 | ADDC(sum, r9) | ||
86 | lhu r9, [src] | ||
87 | addi src, 2 | ||
88 | ADDC(sum, r9) | ||
89 | b len_less_4bytes | ||
90 | |||
91 | four_byte_aligned: /* Len >=4 and four byte aligned */ | ||
92 | lw r9, [src] | ||
93 | addi src, 4 | ||
94 | ADDC(sum, r9) | ||
95 | |||
96 | len_less_4bytes: /* 2 byte aligned aligned and length<4B */ | ||
97 | andri.c r8, r5, 0x2 | ||
98 | beq len_less_2bytes | ||
99 | lhu r9, [src] | ||
100 | addi src, 0x2 /* src+=2 */ | ||
101 | ADDC(sum, r9) | ||
102 | |||
103 | len_less_2bytes: /* len = 1 */ | ||
104 | andri.c r8, r5, 0x1 | ||
105 | beq fold /* less than 2 and not equal 1--> len=0 -> fold */ | ||
106 | lbu r9, [src] | ||
107 | |||
108 | fold_ADDC: | ||
109 | ADDC(sum, r9) | ||
110 | fold: | ||
111 | /* fold checksum */ | ||
112 | slli r26, sum, 16 | ||
113 | add sum, sum, r26 | ||
114 | cmp.c r26, sum | ||
115 | srli sum, sum, 16 | ||
116 | bleu 1f /* if r26<=sum */ | ||
117 | addi sum, 0x1 /* r26>sum */ | ||
118 | 1: | ||
119 | /* odd buffer alignment? r25 was set in csum_partial */ | ||
120 | cmpi.c r25, 0x0 | ||
121 | beq 1f | ||
122 | slli r26, sum, 8 | ||
123 | srli sum, sum, 8 | ||
124 | or sum, sum, r26 | ||
125 | andi sum, 0xffff | ||
126 | 1: | ||
127 | .set optimize | ||
128 | /* Add the passed partial csum. */ | ||
129 | ADDC(sum, r6) | ||
130 | mv r4, sum | ||
131 | br r3 | ||
132 | .set volatile | ||
133 | |||
134 | .align 5 | ||
135 | ENTRY(csum_partial) | ||
136 | ldi sum, 0 | ||
137 | ldi r25, 0 | ||
138 | mv r10, r5 | ||
139 | cmpi.c r5, 0x8 | ||
140 | blt small_csumcpy /* < 8(singed) bytes to copy */ | ||
141 | cmpi.c r5, 0x0 | ||
142 | beq out | ||
143 | andri.c r25, src, 0x1 /* odd buffer? */ | ||
144 | |||
145 | beq word_align | ||
146 | hword_align: /* 1 byte */ | ||
147 | lbu r8, [src] | ||
148 | subi r5, 0x1 | ||
149 | slli r8, r8, 8 | ||
150 | ADDC(sum, r8) | ||
151 | addi src, 0x1 | ||
152 | |||
153 | word_align: /* 2 bytes */ | ||
154 | andri.c r8, src, 0x2 /* 4bytes(dword)_aligned? */ | ||
155 | beq dword_align /* not, maybe dword_align */ | ||
156 | lhu r8, [src] | ||
157 | subi r5, 0x2 | ||
158 | ADDC(sum, r8) | ||
159 | addi src, 0x2 | ||
160 | |||
161 | dword_align: /* 4bytes */ | ||
162 | mv r26, r5 /* maybe useless when len >=56 */ | ||
163 | ldi r8, 56 | ||
164 | cmp.c r8, r5 | ||
165 | bgtu do_end_words /* if a1(len)<t0(56) ,unsigned */ | ||
166 | andri.c r26, src, 0x4 | ||
167 | beq qword_align | ||
168 | lw r8, [src] | ||
169 | subi r5, 0x4 | ||
170 | ADDC(sum, r8) | ||
171 | addi src, 0x4 | ||
172 | |||
173 | qword_align: /* 8 bytes */ | ||
174 | andri.c r26, src, 0x8 | ||
175 | beq oword_align | ||
176 | lw r8, [src, 0x0] | ||
177 | lw r9, [src, 0x4] | ||
178 | subi r5, 0x8 /* len-=0x8 */ | ||
179 | ADDC(sum, r8) | ||
180 | ADDC(sum, r9) | ||
181 | addi src, 0x8 | ||
182 | |||
183 | oword_align: /* 16bytes */ | ||
184 | andri.c r26, src, 0x10 | ||
185 | beq begin_movement | ||
186 | lw r10, [src, 0x08] | ||
187 | lw r11, [src, 0x0c] | ||
188 | lw r8, [src, 0x00] | ||
189 | lw r9, [src, 0x04] | ||
190 | ADDC(sum, r10) | ||
191 | ADDC(sum, r11) | ||
192 | ADDC(sum, r8) | ||
193 | ADDC(sum, r9) | ||
194 | subi r5, 0x10 | ||
195 | addi src, 0x10 | ||
196 | |||
197 | begin_movement: | ||
198 | srli.c r26, r5, 0x7 /* len>=128? */ | ||
199 | beq 1f /* len<128 */ | ||
200 | |||
201 | /* r26 is the result that computed in oword_align */ | ||
202 | move_128bytes: | ||
203 | CSUM_BIGCHUNK(src, 0x00, sum) | ||
204 | CSUM_BIGCHUNK(src, 0x20, sum) | ||
205 | CSUM_BIGCHUNK(src, 0x40, sum) | ||
206 | CSUM_BIGCHUNK(src, 0x60, sum) | ||
207 | subi.c r26, 0x01 /* r26 equals len/128 */ | ||
208 | addi src, 0x80 | ||
209 | bne move_128bytes | ||
210 | |||
211 | 1: /* len<128,we process 64byte here */ | ||
212 | andri.c r10, r5, 0x40 | ||
213 | beq 1f | ||
214 | |||
215 | move_64bytes: | ||
216 | CSUM_BIGCHUNK(src, 0x00, sum) | ||
217 | CSUM_BIGCHUNK(src, 0x20, sum) | ||
218 | addi src, 0x40 | ||
219 | |||
220 | 1: /* len<64 */ | ||
221 | andri r26, r5, 0x1c /* 0x1c=28 */ | ||
222 | andri.c r10, r5, 0x20 | ||
223 | beq do_end_words /* decided by andri */ | ||
224 | |||
225 | move_32bytes: | ||
226 | CSUM_BIGCHUNK(src, 0x00, sum) | ||
227 | andri r26, r5, 0x1c | ||
228 | addri src, src, 0x20 | ||
229 | |||
230 | do_end_words: /* len<32 */ | ||
231 | /* r26 was set already in dword_align */ | ||
232 | cmpi.c r26, 0x0 | ||
233 | beq maybe_end_cruft /* len<28 or len<56 */ | ||
234 | srli r26, r26, 0x2 | ||
235 | |||
236 | end_words: | ||
237 | lw r8, [src] | ||
238 | subi.c r26, 0x1 /* unit is 4 byte */ | ||
239 | ADDC(sum, r8) | ||
240 | addi src, 0x4 | ||
241 | cmpi.c r26, 0x0 | ||
242 | bne end_words /* r26!=0 */ | ||
243 | |||
244 | maybe_end_cruft: /* len<4 */ | ||
245 | andri r10, r5, 0x3 | ||
246 | |||
247 | small_memcpy: | ||
248 | mv r5, r10 | ||
249 | j small_csumcpy | ||
250 | |||
251 | out: | ||
252 | mv r4, sum | ||
253 | br r3 | ||
254 | |||
255 | END(csum_partial) | ||