diff options
author | Heiko Carstens <heiko.carstens@de.ibm.com> | 2009-09-11 04:28:32 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2009-09-11 04:29:43 -0400 |
commit | 04efc3be767cfed0d348fd598eba8fe5f5bf5fe6 (patch) | |
tree | 53a6509f836e2f85c34e5a3af66fa9fbff910f77 /arch/s390/include | |
parent | 05e7ff7da78bad3edc1290ed86b4a37da72ced62 (diff) |
[S390] convert/optimize csum_fold() to C
In the meantime gcc generates better code than the old inline
assemblies do. Original inline assembly results in:
lr %r1,%r2
sr %r3,%r3
lr %r2,%r1
srdl %r2,16
alr %r2,%r3
alr %r1,%r2
srl %r1,16
xilf %r1,65535
llghr %r2,%r1
br %r14
Out of the C code gcc generates this:
rll %r1,%r2,16
ar %r1,%r2
srl %r1,16
xilf %r1,65535
llghr %r2,%r1
br %r14
In addition we don't have any static register allocations anymore and
gcc is free to shuffle instructions around for better pipeline usage.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/include')
-rw-r--r-- | arch/s390/include/asm/checksum.h | 25 |
1 files changed, 4 insertions, 21 deletions
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index d5a8e7c1477c..6c00f6800a34 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h | |||
@@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) | |||
78 | */ | 78 | */ |
79 | static inline __sum16 csum_fold(__wsum sum) | 79 | static inline __sum16 csum_fold(__wsum sum) |
80 | { | 80 | { |
81 | #ifndef __s390x__ | 81 | u32 csum = (__force u32) sum; |
82 | register_pair rp; | ||
83 | 82 | ||
84 | asm volatile( | 83 | csum += (csum >> 16) + (csum << 16); |
85 | " slr %N1,%N1\n" /* %0 = H L */ | 84 | csum >>= 16; |
86 | " lr %1,%0\n" /* %0 = H L, %1 = H L 0 0 */ | 85 | return (__force __sum16) ~csum; |
87 | " srdl %1,16\n" /* %0 = H L, %1 = 0 H L 0 */ | ||
88 | " alr %1,%N1\n" /* %0 = H L, %1 = L H L 0 */ | ||
89 | " alr %0,%1\n" /* %0 = H+L+C L+H */ | ||
90 | " srl %0,16\n" /* %0 = H+L+C */ | ||
91 | : "+&d" (sum), "=d" (rp) : : "cc"); | ||
92 | #else /* __s390x__ */ | ||
93 | asm volatile( | ||
94 | " sr 3,3\n" /* %0 = H*65536 + L */ | ||
95 | " lr 2,%0\n" /* %0 = H L, 2/3 = H L / 0 0 */ | ||
96 | " srdl 2,16\n" /* %0 = H L, 2/3 = 0 H / L 0 */ | ||
97 | " alr 2,3\n" /* %0 = H L, 2/3 = L H / L 0 */ | ||
98 | " alr %0,2\n" /* %0 = H+L+C L+H */ | ||
99 | " srl %0,16\n" /* %0 = H+L+C */ | ||
100 | : "+&d" (sum) : : "cc", "2", "3"); | ||
101 | #endif /* __s390x__ */ | ||
102 | return (__force __sum16) ~sum; | ||
103 | } | 86 | } |
104 | 87 | ||
105 | /* | 88 | /* |