aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAkira Takeuchi <takeuchi.akr@jp.panasonic.com>2010-10-27 12:28:53 -0400
committerDavid Howells <dhowells@redhat.com>2010-10-27 12:28:53 -0400
commitab244c1a08a4e234cd3761a8aba3cb5a7bbe525a (patch)
treeb2d72c3f6641591c8aabc12b225598b49c81a985
parent4f81ca13532981192c83da95ed8c8cb285817d99 (diff)
MN10300: Optimise do_csum()
Optimise do_csum() to gang up the loads so they're less likely to get interruptions between. Signed-off-by: Akira Takeuchi <takeuchi.akr@jp.panasonic.com> Signed-off-by: Kiyoshi Owada <owada.kiyoshi@jp.panasonic.com> Signed-off-by: David Howells <dhowells@redhat.com>
-rw-r--r--arch/mn10300/lib/do_csum.S49
1 files changed, 22 insertions, 27 deletions
diff --git a/arch/mn10300/lib/do_csum.S b/arch/mn10300/lib/do_csum.S
index e138994e1667..1d27bba0cd8f 100644
--- a/arch/mn10300/lib/do_csum.S
+++ b/arch/mn10300/lib/do_csum.S
@@ -10,26 +10,25 @@
10 */ 10 */
11#include <asm/cache.h> 11#include <asm/cache.h>
12 12
13 .section .text 13 .section .text
14 .balign L1_CACHE_BYTES 14 .balign L1_CACHE_BYTES
15 15
16############################################################################### 16###############################################################################
17# 17#
18# unsigned int do_csum(const unsigned char *buff, size_t len) 18# unsigned int do_csum(const unsigned char *buff, int len)
19# 19#
20############################################################################### 20###############################################################################
21 .globl do_csum 21 .globl do_csum
22 .type do_csum,@function 22 .type do_csum,@function
23do_csum: 23do_csum:
24 movm [d2,d3],(sp) 24 movm [d2,d3],(sp)
25 mov d0,(12,sp)
26 mov d1,(16,sp)
27 mov d1,d2 # count 25 mov d1,d2 # count
28 mov d0,a0 # buff 26 mov d0,a0 # buff
27 mov a0,a1
29 clr d1 # accumulator 28 clr d1 # accumulator
30 29
31 cmp +0,d2 30 cmp +0,d2
32 beq do_csum_done # return if zero-length buffer 31 ble do_csum_done # check for zero length or negative
33 32
34 # 4-byte align the buffer pointer 33 # 4-byte align the buffer pointer
35 btst +3,a0 34 btst +3,a0
@@ -41,17 +40,15 @@ do_csum:
41 inc a0 40 inc a0
42 asl +8,d0 41 asl +8,d0
43 add d0,d1 42 add d0,d1
44 addc +0,d1
45 add -1,d2 43 add -1,d2
46do_csum_addr_not_odd:
47 44
45do_csum_addr_not_odd:
48 cmp +2,d2 46 cmp +2,d2
49 bcs do_csum_fewer_than_4 47 bcs do_csum_fewer_than_4
50 btst +2,a0 48 btst +2,a0
51 beq do_csum_now_4b_aligned 49 beq do_csum_now_4b_aligned
52 movhu (a0+),d0 50 movhu (a0+),d0
53 add d0,d1 51 add d0,d1
54 addc +0,d1
55 add -2,d2 52 add -2,d2
56 cmp +4,d2 53 cmp +4,d2
57 bcs do_csum_fewer_than_4 54 bcs do_csum_fewer_than_4
@@ -66,20 +63,20 @@ do_csum_now_4b_aligned:
66 63
67do_csum_loop: 64do_csum_loop:
68 mov (a0+),d0 65 mov (a0+),d0
69 add d0,d1
70 mov (a0+),e0 66 mov (a0+),e0
71 addc e0,d1
72 mov (a0+),e1 67 mov (a0+),e1
73 addc e1,d1
74 mov (a0+),e3 68 mov (a0+),e3
69 add d0,d1
70 addc e0,d1
71 addc e1,d1
75 addc e3,d1 72 addc e3,d1
76 mov (a0+),d0 73 mov (a0+),d0
77 addc d0,d1
78 mov (a0+),e0 74 mov (a0+),e0
79 addc e0,d1
80 mov (a0+),e1 75 mov (a0+),e1
81 addc e1,d1
82 mov (a0+),e3 76 mov (a0+),e3
77 addc d0,d1
78 addc e0,d1
79 addc e1,d1
83 addc e3,d1 80 addc e3,d1
84 addc +0,d1 81 addc +0,d1
85 82
@@ -94,12 +91,12 @@ do_csum_remainder:
94 cmp +16,d2 91 cmp +16,d2
95 bcs do_csum_fewer_than_16 92 bcs do_csum_fewer_than_16
96 mov (a0+),d0 93 mov (a0+),d0
97 add d0,d1
98 mov (a0+),e0 94 mov (a0+),e0
99 addc e0,d1
100 mov (a0+),e1 95 mov (a0+),e1
101 addc e1,d1
102 mov (a0+),e3 96 mov (a0+),e3
97 add d0,d1
98 addc e0,d1
99 addc e1,d1
103 addc e3,d1 100 addc e3,d1
104 addc +0,d1 101 addc +0,d1
105 add -16,d2 102 add -16,d2
@@ -131,9 +128,9 @@ do_csum_fewer_than_4:
131 xor_cmp d0,d0,+2,d2 128 xor_cmp d0,d0,+2,d2
132 bcs do_csum_fewer_than_2 129 bcs do_csum_fewer_than_2
133 movhu (a0+),d0 130 movhu (a0+),d0
134do_csum_fewer_than_2:
135 and +1,d2 131 and +1,d2
136 beq do_csum_add_last_bit 132 beq do_csum_add_last_bit
133do_csum_fewer_than_2:
137 movbu (a0),d3 134 movbu (a0),d3
138 add d3,d0 135 add d3,d0
139do_csum_add_last_bit: 136do_csum_add_last_bit:
@@ -142,21 +139,19 @@ do_csum_add_last_bit:
142 139
143do_csum_done: 140do_csum_done:
144 # compress the checksum down to 16 bits 141 # compress the checksum down to 16 bits
145 mov +0xffff0000,d2 142 mov +0xffff0000,d0
146 and d1,d2 143 and d1,d0
147 asl +16,d1 144 asl +16,d1
148 add d2,d1,d0 145 add d1,d0
149 addc +0xffff,d0 146 addc +0xffff,d0
150 lsr +16,d0 147 lsr +16,d0
151 148
152 # flip the halves of the word result if the buffer was oddly aligned 149 # flip the halves of the word result if the buffer was oddly aligned
153 mov (12,sp),d1 150 and +1,a1
154 and +1,d1
155 beq do_csum_not_oddly_aligned 151 beq do_csum_not_oddly_aligned
156 swaph d0,d0 # exchange bits 15:8 with 7:0 152 swaph d0,d0 # exchange bits 15:8 with 7:0
157 153
158do_csum_not_oddly_aligned: 154do_csum_not_oddly_aligned:
159 ret [d2,d3],8 155 ret [d2,d3],8
160 156
161do_csum_end: 157 .size do_csum, .-do_csum
162 .size do_csum, do_csum_end-do_csum