diff options
| author | Chen, Kenneth W <kenneth.w.chen@intel.com> | 2006-11-10 16:17:50 -0500 |
|---|---|---|
| committer | Tony Luck <tony.luck@intel.com> | 2006-12-07 14:17:26 -0500 |
| commit | 007d77d0c5eb36555443ff273ce2a27f90da8837 (patch) | |
| tree | 6d85a48be575cc8cda1bef7b7aa360ac8bcebde7 | |
| parent | 5b4d5681ffaa6e1bf3b085beb701d87c7c7404da (diff) | |
[IA64] implement csum_ipv6_magic for ia64.
The asm version is 4.4 times faster than the generic C version and
10X smaller in code size.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
| -rw-r--r-- | arch/ia64/lib/ip_fast_csum.S | 55 | ||||
| -rw-r--r-- | include/asm-ia64/checksum.h | 6 |
2 files changed, 59 insertions, 2 deletions
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index 19674ca2acfc..4fb132ee207a 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S | |||
| @@ -8,8 +8,8 @@ | |||
| 8 | * in0: address of buffer to checksum (char *) | 8 | * in0: address of buffer to checksum (char *) |
| 9 | * in1: length of the buffer (int) | 9 | * in1: length of the buffer (int) |
| 10 | * | 10 | * |
| 11 | * Copyright (C) 2002 Intel Corp. | 11 | * Copyright (C) 2002, 2006 Intel Corp. |
| 12 | * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> | 12 | * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> |
| 13 | */ | 13 | */ |
| 14 | 14 | ||
| 15 | #include <asm/asmmacro.h> | 15 | #include <asm/asmmacro.h> |
| @@ -25,6 +25,9 @@ | |||
| 25 | 25 | ||
| 26 | #define in0 r32 | 26 | #define in0 r32 |
| 27 | #define in1 r33 | 27 | #define in1 r33 |
| 28 | #define in2 r34 | ||
| 29 | #define in3 r35 | ||
| 30 | #define in4 r36 | ||
| 28 | #define ret0 r8 | 31 | #define ret0 r8 |
| 29 | 32 | ||
| 30 | GLOBAL_ENTRY(ip_fast_csum) | 33 | GLOBAL_ENTRY(ip_fast_csum) |
| @@ -88,3 +91,51 @@ GLOBAL_ENTRY(ip_fast_csum) | |||
| 88 | mov b0=r34 | 91 | mov b0=r34 |
| 89 | br.ret.sptk.many b0 | 92 | br.ret.sptk.many b0 |
| 90 | END(ip_fast_csum) | 93 | END(ip_fast_csum) |
| 94 | |||
| 95 | GLOBAL_ENTRY(csum_ipv6_magic) | ||
| 96 | ld4 r20=[in0],4 | ||
| 97 | ld4 r21=[in1],4 | ||
| 98 | dep r15=in3,in2,32,16 | ||
| 99 | ;; | ||
| 100 | ld4 r22=[in0],4 | ||
| 101 | ld4 r23=[in1],4 | ||
| 102 | mux1 r15=r15,@rev | ||
| 103 | ;; | ||
| 104 | ld4 r24=[in0],4 | ||
| 105 | ld4 r25=[in1],4 | ||
| 106 | shr.u r15=r15,16 | ||
| 107 | add r16=r20,r21 | ||
| 108 | add r17=r22,r23 | ||
| 109 | ;; | ||
| 110 | ld4 r26=[in0],4 | ||
| 111 | ld4 r27=[in1],4 | ||
| 112 | add r18=r24,r25 | ||
| 113 | add r8=r16,r17 | ||
| 114 | ;; | ||
| 115 | add r19=r26,r27 | ||
| 116 | add r8=r8,r18 | ||
| 117 | ;; | ||
| 118 | add r8=r8,r19 | ||
| 119 | add r15=r15,in4 | ||
| 120 | ;; | ||
| 121 | add r8=r8,r15 | ||
| 122 | ;; | ||
| 123 | shr.u r10=r8,32 // now fold sum into short | ||
| 124 | zxt4 r11=r8 | ||
| 125 | ;; | ||
| 126 | add r8=r10,r11 | ||
| 127 | ;; | ||
| 128 | shr.u r10=r8,16 // yeah, keep it rolling | ||
| 129 | zxt2 r11=r8 | ||
| 130 | ;; | ||
| 131 | add r8=r10,r11 | ||
| 132 | ;; | ||
| 133 | shr.u r10=r8,16 // three times lucky | ||
| 134 | zxt2 r11=r8 | ||
| 135 | ;; | ||
| 136 | add r8=r10,r11 | ||
| 137 | mov r9=0xffff | ||
| 138 | ;; | ||
| 139 | andcm r8=r9,r8 | ||
| 140 | br.ret.sptk.many b0 | ||
| 141 | END(csum_ipv6_magic) | ||
diff --git a/include/asm-ia64/checksum.h b/include/asm-ia64/checksum.h index bd40f4756ce1..2b78582cbd61 100644 --- a/include/asm-ia64/checksum.h +++ b/include/asm-ia64/checksum.h | |||
| @@ -70,4 +70,10 @@ static inline __sum16 csum_fold(__wsum csum) | |||
| 70 | return (__force __sum16)~sum; | 70 | return (__force __sum16)~sum; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | #define _HAVE_ARCH_IPV6_CSUM 1 | ||
| 74 | struct in6_addr; | ||
| 75 | extern unsigned short int csum_ipv6_magic(struct in6_addr *saddr, | ||
| 76 | struct in6_addr *daddr, __u32 len, unsigned short proto, | ||
| 77 | unsigned int csum); | ||
| 78 | |||
| 73 | #endif /* _ASM_IA64_CHECKSUM_H */ | 79 | #endif /* _ASM_IA64_CHECKSUM_H */ |
