diff options
author | Richard Kuo <rkuo@codeaurora.org> | 2011-10-31 19:38:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-01 10:34:18 -0400 |
commit | 075a46a049d4ec16925139d69b4473499fd14122 (patch) | |
tree | 6e0f63f9e9e3f9fd22aa4db8fd843fda81a6691b /arch/hexagon | |
parent | b7840983e841e2870ff69f8e410e349c6b819b9f (diff) |
Hexagon: Add checksum functions
Signed-off-by: Richard Kuo <rkuo@codeaurora.org>
Signed-off-by: Linas Vepstas <linas@codeaurora.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/hexagon')
-rw-r--r-- | arch/hexagon/include/asm/checksum.h | 50 | ||||
-rw-r--r-- | arch/hexagon/include/asm/intrinsics.h | 26 | ||||
-rw-r--r-- | arch/hexagon/lib/checksum.c | 203 |
3 files changed, 279 insertions, 0 deletions
diff --git a/arch/hexagon/include/asm/checksum.h b/arch/hexagon/include/asm/checksum.h new file mode 100644 index 000000000000..3ce4ecd44f82 --- /dev/null +++ b/arch/hexagon/include/asm/checksum.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 and | ||
6 | * only version 2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
16 | * 02110-1301, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef _ASM_CHECKSUM_H | ||
20 | #define _ASM_CHECKSUM_H | ||
21 | |||
22 | #define do_csum do_csum | ||
23 | unsigned int do_csum(const void *voidptr, int len); | ||
24 | |||
25 | /* | ||
26 | * the same as csum_partial, but copies from src while it | ||
27 | * checksums | ||
28 | * | ||
29 | * here even more important to align src and dst on a 32-bit (or even | ||
30 | * better 64-bit) boundary | ||
31 | */ | ||
32 | #define csum_partial_copy_nocheck csum_partial_copy_nocheck | ||
33 | __wsum csum_partial_copy_nocheck(const void *src, void *dst, | ||
34 | int len, __wsum sum); | ||
35 | |||
36 | /* | ||
37 | * computes the checksum of the TCP/UDP pseudo-header | ||
38 | * returns a 16-bit checksum, already complemented | ||
39 | */ | ||
40 | #define csum_tcpudp_nofold csum_tcpudp_nofold | ||
41 | __wsum csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, | ||
42 | unsigned short len, unsigned short proto, __wsum sum); | ||
43 | |||
44 | #define csum_tcpudp_magic csum_tcpudp_magic | ||
45 | __sum16 csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, | ||
46 | unsigned short len, unsigned short proto, __wsum sum); | ||
47 | |||
48 | #include <asm-generic/checksum.h> | ||
49 | |||
50 | #endif | ||
diff --git a/arch/hexagon/include/asm/intrinsics.h b/arch/hexagon/include/asm/intrinsics.h new file mode 100644 index 000000000000..1c02186d2e9a --- /dev/null +++ b/arch/hexagon/include/asm/intrinsics.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 and | ||
6 | * only version 2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
16 | * 02110-1301, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef _ASM_HEXAGON_INTRINSICS_H | ||
20 | #define _ASM_HEXAGON_INTRINSICS_H | ||
21 | |||
22 | #define HEXAGON_P_vrmpyhacc_PP __builtin_HEXAGON_M2_vrmac_s0 | ||
23 | #define HEXAGON_P_vrmpyh_PP __builtin_HEXAGON_M2_vrmpy_s0 | ||
24 | #define HEXAGON_R_cl0_R __builtin_HEXAGON_S2_cl0 | ||
25 | |||
26 | #endif | ||
diff --git a/arch/hexagon/lib/checksum.c b/arch/hexagon/lib/checksum.c new file mode 100644 index 000000000000..93005522d52b --- /dev/null +++ b/arch/hexagon/lib/checksum.c | |||
@@ -0,0 +1,203 @@ | |||
1 | /* | ||
2 | * Checksum functions for Hexagon | ||
3 | * | ||
4 | * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 and | ||
8 | * only version 2 as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
18 | * 02110-1301, USA. | ||
19 | */ | ||
20 | |||
21 | /* This was derived from arch/alpha/lib/checksum.c */ | ||
22 | |||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/string.h> | ||
26 | |||
27 | #include <asm/byteorder.h> | ||
28 | #include <net/checksum.h> | ||
29 | #include <linux/uaccess.h> | ||
30 | #include <asm/intrinsics.h> | ||
31 | |||
32 | |||
33 | /* Vector value operations */ | ||
34 | #define SIGN(x, y) ((0x8000ULL*x)<<y) | ||
35 | #define CARRY(x, y) ((0x0002ULL*x)<<y) | ||
36 | #define SELECT(x, y) ((0x0001ULL*x)<<y) | ||
37 | |||
38 | #define VR_NEGATE(a, b, c, d) (SIGN(a, 48) + SIGN(b, 32) + SIGN(c, 16) \ | ||
39 | + SIGN(d, 0)) | ||
40 | #define VR_CARRY(a, b, c, d) (CARRY(a, 48) + CARRY(b, 32) + CARRY(c, 16) \ | ||
41 | + CARRY(d, 0)) | ||
42 | #define VR_SELECT(a, b, c, d) (SELECT(a, 48) + SELECT(b, 32) + SELECT(c, 16) \ | ||
43 | + SELECT(d, 0)) | ||
44 | |||
45 | |||
46 | /* optimized HEXAGON V3 intrinsic version */ | ||
47 | static inline unsigned short from64to16(u64 x) | ||
48 | { | ||
49 | u64 sum; | ||
50 | |||
51 | sum = HEXAGON_P_vrmpyh_PP(x^VR_NEGATE(1, 1, 1, 1), | ||
52 | VR_SELECT(1, 1, 1, 1)); | ||
53 | sum += VR_CARRY(0, 0, 1, 0); | ||
54 | sum = HEXAGON_P_vrmpyh_PP(sum, VR_SELECT(0, 0, 1, 1)); | ||
55 | |||
56 | return 0xFFFF & sum; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * computes the checksum of the TCP/UDP pseudo-header | ||
61 | * returns a 16-bit checksum, already complemented. | ||
62 | */ | ||
63 | __sum16 csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, | ||
64 | unsigned short len, unsigned short proto, | ||
65 | __wsum sum) | ||
66 | { | ||
67 | return (__force __sum16)~from64to16( | ||
68 | (__force u64)saddr + (__force u64)daddr + | ||
69 | (__force u64)sum + ((len + proto) << 8)); | ||
70 | } | ||
71 | |||
72 | __wsum csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr, | ||
73 | unsigned short len, unsigned short proto, | ||
74 | __wsum sum) | ||
75 | { | ||
76 | u64 result; | ||
77 | |||
78 | result = (__force u64)saddr + (__force u64)daddr + | ||
79 | (__force u64)sum + ((len + proto) << 8); | ||
80 | |||
81 | /* Fold down to 32-bits so we don't lose in the typedef-less | ||
82 | network stack. */ | ||
83 | /* 64 to 33 */ | ||
84 | result = (result & 0xffffffffUL) + (result >> 32); | ||
85 | /* 33 to 32 */ | ||
86 | result = (result & 0xffffffffUL) + (result >> 32); | ||
87 | return (__force __wsum)result; | ||
88 | } | ||
89 | EXPORT_SYMBOL(csum_tcpudp_nofold); | ||
90 | |||
91 | /* | ||
92 | * Do a 64-bit checksum on an arbitrary memory area.. | ||
93 | * | ||
94 | * This isn't a great routine, but it's not _horrible_ either. The | ||
95 | * inner loop could be unrolled a bit further, and there are better | ||
96 | * ways to do the carry, but this is reasonable. | ||
97 | */ | ||
98 | |||
99 | /* optimized HEXAGON intrinsic version, with over read fixed */ | ||
100 | unsigned int do_csum(const void *voidptr, int len) | ||
101 | { | ||
102 | u64 sum0, sum1, x0, x1, *ptr8_o, *ptr8_e, *ptr8; | ||
103 | int i, start, mid, end, mask; | ||
104 | const char *ptr = voidptr; | ||
105 | unsigned short *ptr2; | ||
106 | unsigned int *ptr4; | ||
107 | |||
108 | if (len <= 0) | ||
109 | return 0; | ||
110 | |||
111 | start = 0xF & (16-(((int) ptr) & 0xF)) ; | ||
112 | mask = 0x7fffffffUL >> HEXAGON_R_cl0_R(len); | ||
113 | start = start & mask ; | ||
114 | |||
115 | mid = len - start; | ||
116 | end = mid & 0xF; | ||
117 | mid = mid>>4; | ||
118 | sum0 = mid << 18; | ||
119 | sum1 = 0; | ||
120 | |||
121 | if (start & 1) | ||
122 | sum0 += (u64) (ptr[0] << 8); | ||
123 | ptr2 = (unsigned short *) &ptr[start & 1]; | ||
124 | if (start & 2) | ||
125 | sum1 += (u64) ptr2[0]; | ||
126 | ptr4 = (unsigned int *) &ptr[start & 3]; | ||
127 | if (start & 4) { | ||
128 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | ||
129 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), | ||
130 | VR_SELECT(0, 0, 1, 1)); | ||
131 | sum0 += VR_SELECT(0, 0, 1, 0); | ||
132 | } | ||
133 | ptr8 = (u64 *) &ptr[start & 7]; | ||
134 | if (start & 8) { | ||
135 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | ||
136 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), | ||
137 | VR_SELECT(1, 1, 1, 1)); | ||
138 | sum1 += VR_CARRY(0, 0, 1, 0); | ||
139 | } | ||
140 | ptr8_o = (u64 *) (ptr + start); | ||
141 | ptr8_e = (u64 *) (ptr + start + 8); | ||
142 | |||
143 | if (mid) { | ||
144 | x0 = *ptr8_e; ptr8_e += 2; | ||
145 | x1 = *ptr8_o; ptr8_o += 2; | ||
146 | if (mid > 1) | ||
147 | for (i = 0; i < mid-1; i++) { | ||
148 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | ||
149 | x0^VR_NEGATE(1, 1, 1, 1), | ||
150 | VR_SELECT(1, 1, 1, 1)); | ||
151 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | ||
152 | x1^VR_NEGATE(1, 1, 1, 1), | ||
153 | VR_SELECT(1, 1, 1, 1)); | ||
154 | x0 = *ptr8_e; ptr8_e += 2; | ||
155 | x1 = *ptr8_o; ptr8_o += 2; | ||
156 | } | ||
157 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, x0^VR_NEGATE(1, 1, 1, 1), | ||
158 | VR_SELECT(1, 1, 1, 1)); | ||
159 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, x1^VR_NEGATE(1, 1, 1, 1), | ||
160 | VR_SELECT(1, 1, 1, 1)); | ||
161 | } | ||
162 | |||
163 | ptr4 = (unsigned int *) &ptr[start + (mid * 16) + (end & 8)]; | ||
164 | if (end & 4) { | ||
165 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | ||
166 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), | ||
167 | VR_SELECT(0, 0, 1, 1)); | ||
168 | sum1 += VR_SELECT(0, 0, 1, 0); | ||
169 | } | ||
170 | ptr2 = (unsigned short *) &ptr[start + (mid * 16) + (end & 12)]; | ||
171 | if (end & 2) | ||
172 | sum0 += (u64) ptr2[0]; | ||
173 | |||
174 | if (end & 1) | ||
175 | sum1 += (u64) ptr[start + (mid * 16) + (end & 14)]; | ||
176 | |||
177 | ptr8 = (u64 *) &ptr[start + (mid * 16)]; | ||
178 | if (end & 8) { | ||
179 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | ||
180 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), | ||
181 | VR_SELECT(1, 1, 1, 1)); | ||
182 | sum0 += VR_CARRY(0, 0, 1, 0); | ||
183 | } | ||
184 | sum0 = HEXAGON_P_vrmpyh_PP((sum0+sum1)^VR_NEGATE(0, 0, 0, 1), | ||
185 | VR_SELECT(0, 0, 1, 1)); | ||
186 | sum0 += VR_NEGATE(0, 0, 0, 1); | ||
187 | sum0 = HEXAGON_P_vrmpyh_PP(sum0, VR_SELECT(0, 0, 1, 1)); | ||
188 | |||
189 | if (start & 1) | ||
190 | sum0 = (sum0 << 8) | (0xFF & (sum0 >> 8)); | ||
191 | |||
192 | return 0xFFFF & sum0; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * copy from ds while checksumming, otherwise like csum_partial | ||
197 | */ | ||
198 | __wsum | ||
199 | csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) | ||
200 | { | ||
201 | memcpy(dst, src, len); | ||
202 | return csum_partial(dst, len, sum); | ||
203 | } | ||