diff options
Diffstat (limited to 'arch/x86/lib/csum-partial_64.c')
-rw-r--r-- | arch/x86/lib/csum-partial_64.c | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c new file mode 100644 index 000000000000..bc503f506903 --- /dev/null +++ b/arch/x86/lib/csum-partial_64.c | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * arch/x86_64/lib/csum-partial.c | ||
3 | * | ||
4 | * This file contains network checksum routines that are better done | ||
5 | * in an architecture-specific manner due to speed. | ||
6 | */ | ||
7 | |||
8 | #include <linux/compiler.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <asm/checksum.h> | ||
11 | |||
12 | static inline unsigned short from32to16(unsigned a) | ||
13 | { | ||
14 | unsigned short b = a >> 16; | ||
15 | asm("addw %w2,%w0\n\t" | ||
16 | "adcw $0,%w0\n" | ||
17 | : "=r" (b) | ||
18 | : "0" (b), "r" (a)); | ||
19 | return b; | ||
20 | } | ||
21 | |||
22 | /* | ||
23 | * Do a 64-bit checksum on an arbitrary memory area. | ||
24 | * Returns a 32bit checksum. | ||
25 | * | ||
26 | * This isn't as time critical as it used to be because many NICs | ||
27 | * do hardware checksumming these days. | ||
28 | * | ||
29 | * Things tried and found to not make it faster: | ||
30 | * Manual Prefetching | ||
31 | * Unrolling to an 128 bytes inner loop. | ||
32 | * Using interleaving with more registers to break the carry chains. | ||
33 | */ | ||
34 | static unsigned do_csum(const unsigned char *buff, unsigned len) | ||
35 | { | ||
36 | unsigned odd, count; | ||
37 | unsigned long result = 0; | ||
38 | |||
39 | if (unlikely(len == 0)) | ||
40 | return result; | ||
41 | odd = 1 & (unsigned long) buff; | ||
42 | if (unlikely(odd)) { | ||
43 | result = *buff << 8; | ||
44 | len--; | ||
45 | buff++; | ||
46 | } | ||
47 | count = len >> 1; /* nr of 16-bit words.. */ | ||
48 | if (count) { | ||
49 | if (2 & (unsigned long) buff) { | ||
50 | result += *(unsigned short *)buff; | ||
51 | count--; | ||
52 | len -= 2; | ||
53 | buff += 2; | ||
54 | } | ||
55 | count >>= 1; /* nr of 32-bit words.. */ | ||
56 | if (count) { | ||
57 | unsigned long zero; | ||
58 | unsigned count64; | ||
59 | if (4 & (unsigned long) buff) { | ||
60 | result += *(unsigned int *) buff; | ||
61 | count--; | ||
62 | len -= 4; | ||
63 | buff += 4; | ||
64 | } | ||
65 | count >>= 1; /* nr of 64-bit words.. */ | ||
66 | |||
67 | /* main loop using 64byte blocks */ | ||
68 | zero = 0; | ||
69 | count64 = count >> 3; | ||
70 | while (count64) { | ||
71 | asm("addq 0*8(%[src]),%[res]\n\t" | ||
72 | "adcq 1*8(%[src]),%[res]\n\t" | ||
73 | "adcq 2*8(%[src]),%[res]\n\t" | ||
74 | "adcq 3*8(%[src]),%[res]\n\t" | ||
75 | "adcq 4*8(%[src]),%[res]\n\t" | ||
76 | "adcq 5*8(%[src]),%[res]\n\t" | ||
77 | "adcq 6*8(%[src]),%[res]\n\t" | ||
78 | "adcq 7*8(%[src]),%[res]\n\t" | ||
79 | "adcq %[zero],%[res]" | ||
80 | : [res] "=r" (result) | ||
81 | : [src] "r" (buff), [zero] "r" (zero), | ||
82 | "[res]" (result)); | ||
83 | buff += 64; | ||
84 | count64--; | ||
85 | } | ||
86 | |||
87 | /* last upto 7 8byte blocks */ | ||
88 | count %= 8; | ||
89 | while (count) { | ||
90 | asm("addq %1,%0\n\t" | ||
91 | "adcq %2,%0\n" | ||
92 | : "=r" (result) | ||
93 | : "m" (*(unsigned long *)buff), | ||
94 | "r" (zero), "0" (result)); | ||
95 | --count; | ||
96 | buff += 8; | ||
97 | } | ||
98 | result = add32_with_carry(result>>32, | ||
99 | result&0xffffffff); | ||
100 | |||
101 | if (len & 4) { | ||
102 | result += *(unsigned int *) buff; | ||
103 | buff += 4; | ||
104 | } | ||
105 | } | ||
106 | if (len & 2) { | ||
107 | result += *(unsigned short *) buff; | ||
108 | buff += 2; | ||
109 | } | ||
110 | } | ||
111 | if (len & 1) | ||
112 | result += *buff; | ||
113 | result = add32_with_carry(result>>32, result & 0xffffffff); | ||
114 | if (unlikely(odd)) { | ||
115 | result = from32to16(result); | ||
116 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | ||
117 | } | ||
118 | return result; | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * computes the checksum of a memory block at buff, length len, | ||
123 | * and adds in "sum" (32-bit) | ||
124 | * | ||
125 | * returns a 32-bit number suitable for feeding into itself | ||
126 | * or csum_tcpudp_magic | ||
127 | * | ||
128 | * this function must be called with even lengths, except | ||
129 | * for the last fragment, which may be odd | ||
130 | * | ||
131 | * it's best to have buff aligned on a 64-bit boundary | ||
132 | */ | ||
133 | __wsum csum_partial(const void *buff, int len, __wsum sum) | ||
134 | { | ||
135 | return (__force __wsum)add32_with_carry(do_csum(buff, len), | ||
136 | (__force u32)sum); | ||
137 | } | ||
138 | |||
139 | EXPORT_SYMBOL(csum_partial); | ||
140 | |||
141 | /* | ||
142 | * this routine is used for miscellaneous IP-like checksums, mainly | ||
143 | * in icmp.c | ||
144 | */ | ||
145 | __sum16 ip_compute_csum(const void *buff, int len) | ||
146 | { | ||
147 | return csum_fold(csum_partial(buff,len,0)); | ||
148 | } | ||
149 | EXPORT_SYMBOL(ip_compute_csum); | ||
150 | |||