diff options
author | Chen, Kenneth W <kenneth.w.chen@intel.com> | 2006-11-10 16:17:50 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2006-12-07 14:17:26 -0500 |
commit | 007d77d0c5eb36555443ff273ce2a27f90da8837 (patch) | |
tree | 6d85a48be575cc8cda1bef7b7aa360ac8bcebde7 /arch/ia64/lib | |
parent | 5b4d5681ffaa6e1bf3b085beb701d87c7c7404da (diff) |
[IA64] implement csum_ipv6_magic for ia64.
The asm version is 4.4 times faster than the generic C version and
10X smaller in code size.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/lib')
-rw-r--r-- | arch/ia64/lib/ip_fast_csum.S | 55 |
1 files changed, 53 insertions, 2 deletions
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index 19674ca2acfc..4fb132ee207a 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S | |||
@@ -8,8 +8,8 @@ | |||
8 | * in0: address of buffer to checksum (char *) | 8 | * in0: address of buffer to checksum (char *) |
9 | * in1: length of the buffer (int) | 9 | * in1: length of the buffer (int) |
10 | * | 10 | * |
11 | * Copyright (C) 2002 Intel Corp. | 11 | * Copyright (C) 2002, 2006 Intel Corp. |
12 | * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> | 12 | * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <asm/asmmacro.h> | 15 | #include <asm/asmmacro.h> |
@@ -25,6 +25,9 @@ | |||
25 | 25 | ||
26 | #define in0 r32 | 26 | #define in0 r32 |
27 | #define in1 r33 | 27 | #define in1 r33 |
28 | #define in2 r34 | ||
29 | #define in3 r35 | ||
30 | #define in4 r36 | ||
28 | #define ret0 r8 | 31 | #define ret0 r8 |
29 | 32 | ||
30 | GLOBAL_ENTRY(ip_fast_csum) | 33 | GLOBAL_ENTRY(ip_fast_csum) |
@@ -88,3 +91,51 @@ GLOBAL_ENTRY(ip_fast_csum) | |||
88 | mov b0=r34 | 91 | mov b0=r34 |
89 | br.ret.sptk.many b0 | 92 | br.ret.sptk.many b0 |
90 | END(ip_fast_csum) | 93 | END(ip_fast_csum) |
94 | |||
95 | GLOBAL_ENTRY(csum_ipv6_magic) | ||
96 | ld4 r20=[in0],4 | ||
97 | ld4 r21=[in1],4 | ||
98 | dep r15=in3,in2,32,16 | ||
99 | ;; | ||
100 | ld4 r22=[in0],4 | ||
101 | ld4 r23=[in1],4 | ||
102 | mux1 r15=r15,@rev | ||
103 | ;; | ||
104 | ld4 r24=[in0],4 | ||
105 | ld4 r25=[in1],4 | ||
106 | shr.u r15=r15,16 | ||
107 | add r16=r20,r21 | ||
108 | add r17=r22,r23 | ||
109 | ;; | ||
110 | ld4 r26=[in0],4 | ||
111 | ld4 r27=[in1],4 | ||
112 | add r18=r24,r25 | ||
113 | add r8=r16,r17 | ||
114 | ;; | ||
115 | add r19=r26,r27 | ||
116 | add r8=r8,r18 | ||
117 | ;; | ||
118 | add r8=r8,r19 | ||
119 | add r15=r15,in4 | ||
120 | ;; | ||
121 | add r8=r8,r15 | ||
122 | ;; | ||
123 | shr.u r10=r8,32 // now fold sum into short | ||
124 | zxt4 r11=r8 | ||
125 | ;; | ||
126 | add r8=r10,r11 | ||
127 | ;; | ||
128 | shr.u r10=r8,16 // yeah, keep it rolling | ||
129 | zxt2 r11=r8 | ||
130 | ;; | ||
131 | add r8=r10,r11 | ||
132 | ;; | ||
133 | shr.u r10=r8,16 // three times lucky | ||
134 | zxt2 r11=r8 | ||
135 | ;; | ||
136 | add r8=r10,r11 | ||
137 | mov r9=0xffff | ||
138 | ;; | ||
139 | andcm r8=r9,r8 | ||
140 | br.ret.sptk.many b0 | ||
141 | END(csum_ipv6_magic) | ||