aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRussell King <rmk@dyn-67.arm.linux.org.uk>2006-12-21 15:59:37 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2007-02-06 11:46:46 -0500
commit7ef416c4b878685a419a3b2f133ab5e7283f96b0 (patch)
treeb3a32e3e3f4f3329a624d553aed297e8fcbcf60e
parent10c03f69680e9e2acd8a9409a230aef37295ac49 (diff)
[ARM] Improve csum_fold, cleanup csum_tcpudp_magic()
csum_fold doesn't need two assembly instructions to perform its task, it can simply add the high and low parts together by rotating by 16 bits, and the carry into the upper-16 bits will automatically happen. Also, since csum_tcpudp_magic() is just csum_tcpudp_nofold + csum_fold, use those two functions to achieve this. Also note that there is a csum_fold() at the end of ip_fast_csum() as well, so use the real csum_fold() there as well. Boot tested on Versatile. Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--include/asm-arm/checksum.h56
1 files changed, 18 insertions, 38 deletions
diff --git a/include/asm-arm/checksum.h b/include/asm-arm/checksum.h
index 8c0bb5bb14ee..eaa0efd8d0d4 100644
--- a/include/asm-arm/checksum.h
+++ b/include/asm-arm/checksum.h
@@ -40,13 +40,27 @@ __wsum
40csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr); 40csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
41 41
42/* 42/*
43 * Fold a partial checksum without adding pseudo headers
44 */
45static inline __sum16 csum_fold(__wsum sum)
46{
47 __asm__(
48 "add %0, %1, %1, ror #16 @ csum_fold"
49 : "=r" (sum)
50 : "r" (sum)
51 : "cc");
52 return (__force __sum16)(~(__force u32)sum >> 16);
53}
54
55/*
43 * This is a version of ip_compute_csum() optimized for IP headers, 56 * This is a version of ip_compute_csum() optimized for IP headers,
44 * which always checksum on 4 octet boundaries. 57 * which always checksum on 4 octet boundaries.
45 */ 58 */
46static inline __sum16 59static inline __sum16
47ip_fast_csum(const void *iph, unsigned int ihl) 60ip_fast_csum(const void *iph, unsigned int ihl)
48{ 61{
49 unsigned int sum, tmp1; 62 unsigned int tmp1;
63 __wsum sum;
50 64
51 __asm__ __volatile__( 65 __asm__ __volatile__(
52 "ldr %0, [%1], #4 @ ip_fast_csum \n\ 66 "ldr %0, [%1], #4 @ ip_fast_csum \n\
@@ -62,29 +76,11 @@ ip_fast_csum(const void *iph, unsigned int ihl)
62 subne %2, %2, #1 @ without destroying \n\ 76 subne %2, %2, #1 @ without destroying \n\
63 bne 1b @ the carry flag \n\ 77 bne 1b @ the carry flag \n\
64 adcs %0, %0, %3 \n\ 78 adcs %0, %0, %3 \n\
65 adc %0, %0, #0 \n\ 79 adc %0, %0, #0"
66 adds %0, %0, %0, lsl #16 \n\
67 addcs %0, %0, #0x10000 \n\
68 mvn %0, %0 \n\
69 mov %0, %0, lsr #16"
70 : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (tmp1) 80 : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (tmp1)
71 : "1" (iph), "2" (ihl) 81 : "1" (iph), "2" (ihl)
72 : "cc", "memory"); 82 : "cc", "memory");
73 return (__force __sum16)sum; 83 return csum_fold(sum);
74}
75
76/*
77 * Fold a partial checksum without adding pseudo headers
78 */
79static inline __sum16 csum_fold(__wsum sum)
80{
81 __asm__(
82 "adds %0, %1, %1, lsl #16 @ csum_fold \n\
83 addcs %0, %0, #0x10000"
84 : "=r" (sum)
85 : "r" (sum)
86 : "cc");
87 return (__force __sum16)(~(__force u32)sum >> 16);
88} 84}
89 85
90static inline __wsum 86static inline __wsum
@@ -114,23 +110,7 @@ static inline __sum16
114csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, 110csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
115 unsigned short proto, __wsum sum) 111 unsigned short proto, __wsum sum)
116{ 112{
117 __asm__( 113 return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
118 "adds %0, %1, %2 @ csum_tcpudp_magic \n\
119 adcs %0, %0, %3 \n"
120#ifdef __ARMEB__
121 "adcs %0, %0, %4 \n"
122#else
123 "adcs %0, %0, %4, lsl #8 \n"
124#endif
125 "adcs %0, %0, %5 \n\
126 adc %0, %0, #0 \n\
127 adds %0, %0, %0, lsl #16 \n\
128 addcs %0, %0, #0x10000 \n\
129 mvn %0, %0"
130 : "=&r"(sum)
131 : "r" (sum), "r" (daddr), "r" (saddr), "r" (len), "Ir" (htons(proto))
132 : "cc");
133 return (__force __sum16)((__force u32)sum >> 16);
134} 114}
135 115
136 116