aboutsummaryrefslogtreecommitdiffstats
path: root/arch/blackfin/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/blackfin/lib')
-rw-r--r--arch/blackfin/lib/checksum.c4
-rw-r--r--arch/blackfin/lib/ins.S272
-rw-r--r--arch/blackfin/lib/muldi3.S68
-rw-r--r--arch/blackfin/lib/muldi3.c99
4 files changed, 156 insertions, 287 deletions
diff --git a/arch/blackfin/lib/checksum.c b/arch/blackfin/lib/checksum.c
index 5c87505165d3..762a7f02970a 100644
--- a/arch/blackfin/lib/checksum.c
+++ b/arch/blackfin/lib/checksum.c
@@ -29,6 +29,7 @@
29 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 29 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 */ 30 */
31 31
32#include <linux/module.h>
32#include <net/checksum.h> 33#include <net/checksum.h>
33#include <asm/checksum.h> 34#include <asm/checksum.h>
34 35
@@ -76,6 +77,7 @@ __sum16 ip_fast_csum(unsigned char *iph, unsigned int ihl)
76{ 77{
77 return (__force __sum16)~do_csum(iph, ihl * 4); 78 return (__force __sum16)~do_csum(iph, ihl * 4);
78} 79}
80EXPORT_SYMBOL(ip_fast_csum);
79 81
80/* 82/*
81 * computes the checksum of a memory block at buff, length len, 83 * computes the checksum of a memory block at buff, length len,
@@ -104,6 +106,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
104 106
105 return sum; 107 return sum;
106} 108}
109EXPORT_SYMBOL(csum_partial);
107 110
108/* 111/*
109 * this routine is used for miscellaneous IP-like checksums, mainly 112 * this routine is used for miscellaneous IP-like checksums, mainly
@@ -137,3 +140,4 @@ __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
137 memcpy(dst, src, len); 140 memcpy(dst, src, len);
138 return csum_partial(dst, len, sum); 141 return csum_partial(dst, len, sum);
139} 142}
143EXPORT_SYMBOL(csum_partial_copy);
diff --git a/arch/blackfin/lib/ins.S b/arch/blackfin/lib/ins.S
index d60554dce87b..1863a6ba507c 100644
--- a/arch/blackfin/lib/ins.S
+++ b/arch/blackfin/lib/ins.S
@@ -1,31 +1,9 @@
1/* 1/*
2 * File: arch/blackfin/lib/ins.S 2 * arch/blackfin/lib/ins.S - ins{bwl} using hardware loops
3 * Based on:
4 * Author: Bas Vermeulen <bas@buyways.nl>
5 * 3 *
6 * Created: Tue Mar 22 15:27:24 CEST 2005 4 * Copyright 2004-2008 Analog Devices Inc.
7 * Description: Implementation of ins{bwl} for BlackFin processors using zero overhead loops. 5 * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl>
8 * 6 * Licensed under the GPL-2 or later.
9 * Modified:
10 * Copyright 2004-2008 Analog Devices Inc.
11 * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl>
12 *
13 * Bugs: Enter bugs at http://blackfin.uclinux.org/
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, see the file COPYING, or write
27 * to the Free Software Foundation, Inc.,
28 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
29 */ 7 */
30 8
31#include <linux/linkage.h> 9#include <linux/linkage.h>
@@ -33,6 +11,46 @@
33 11
34.align 2 12.align 2
35 13
14#ifdef CONFIG_IPIPE
15# define DO_CLI \
16 [--sp] = rets; \
17 [--sp] = (P5:0); \
18 sp += -12; \
19 call ___ipipe_stall_root_raw; \
20 sp += 12; \
21 (P5:0) = [sp++];
22# define CLI_INNER_NOP
23#else
24# define DO_CLI cli R3;
25# define CLI_INNER_NOP nop; nop; nop;
26#endif
27
28#ifdef CONFIG_IPIPE
29# define DO_STI \
30 sp += -12; \
31 call ___ipipe_unstall_root_raw; \
32 sp += 12; \
332: rets = [sp++];
34#else
35# define DO_STI 2: sti R3;
36#endif
37
38#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
39# define CLI_OUTER DO_CLI;
40# define STI_OUTER DO_STI;
41# define CLI_INNER 1:
42# if ANOMALY_05000416
43# define STI_INNER nop; 2: nop;
44# else
45# define STI_INNER 2:
46# endif
47#else
48# define CLI_OUTER
49# define STI_OUTER
50# define CLI_INNER 1: DO_CLI; CLI_INNER_NOP;
51# define STI_INNER DO_STI;
52#endif
53
36/* 54/*
37 * Reads on the Blackfin are speculative. In Blackfin terms, this means they 55 * Reads on the Blackfin are speculative. In Blackfin terms, this means they
38 * can be interrupted at any time (even after they have been issued on to the 56 * can be interrupted at any time (even after they have been issued on to the
@@ -53,170 +71,48 @@
53 * buffers in/out of FIFOs. 71 * buffers in/out of FIFOs.
54 */ 72 */
55 73
56ENTRY(_insl) 74#define COMMON_INS(func, ops) \
57#ifdef CONFIG_BFIN_INS_LOWOVERHEAD 75ENTRY(_ins##func) \
58 P0 = R0; /* P0 = port */ 76 P0 = R0; /* P0 = port */ \
59 cli R3; 77 CLI_OUTER; /* 3 instructions before first read access */ \
60 P1 = R1; /* P1 = address */ 78 P1 = R1; /* P1 = address */ \
61 P2 = R2; /* P2 = count */ 79 P2 = R2; /* P2 = count */ \
62 SSYNC; 80 SSYNC; \
63 LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2; 81 \
64.Llong_loop_s: R0 = [P0]; 82 LSETUP(1f, 2f) LC0 = P2; \
65 [P1++] = R0; 83 CLI_INNER; \
66 NOP; 84 ops; \
67.Llong_loop_e: NOP; 85 STI_INNER; \
68 sti R3; 86 \
69 RTS; 87 STI_OUTER; \
70#else 88 RTS; \
71 P0 = R0; /* P0 = port */ 89ENDPROC(_ins##func)
72 P1 = R1; /* P1 = address */
73 P2 = R2; /* P2 = count */
74 SSYNC;
75 LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
76.Llong_loop_s:
77 CLI R3;
78 NOP; NOP; NOP;
79 R0 = [P0];
80 [P1++] = R0;
81.Llong_loop_e:
82 STI R3;
83 90
84 RTS; 91COMMON_INS(l, \
85#endif 92 R0 = [P0]; \
86ENDPROC(_insl) 93 [P1++] = R0; \
87 94)
88ENTRY(_insw)
89#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
90 P0 = R0; /* P0 = port */
91 cli R3;
92 P1 = R1; /* P1 = address */
93 P2 = R2; /* P2 = count */
94 SSYNC;
95 LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
96.Lword_loop_s: R0 = W[P0];
97 W[P1++] = R0;
98 NOP;
99.Lword_loop_e: NOP;
100 sti R3;
101 RTS;
102#else
103 P0 = R0; /* P0 = port */
104 P1 = R1; /* P1 = address */
105 P2 = R2; /* P2 = count */
106 SSYNC;
107 LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
108.Lword_loop_s:
109 CLI R3;
110 NOP; NOP; NOP;
111 R0 = W[P0];
112 W[P1++] = R0;
113.Lword_loop_e:
114 STI R3;
115 RTS;
116
117#endif
118ENDPROC(_insw)
119
120ENTRY(_insw_8)
121#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
122 P0 = R0; /* P0 = port */
123 cli R3;
124 P1 = R1; /* P1 = address */
125 P2 = R2; /* P2 = count */
126 SSYNC;
127 LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
128.Lword8_loop_s: R0 = W[P0];
129 B[P1++] = R0;
130 R0 = R0 >> 8;
131 B[P1++] = R0;
132 NOP;
133.Lword8_loop_e: NOP;
134 sti R3;
135 RTS;
136#else
137 P0 = R0; /* P0 = port */
138 P1 = R1; /* P1 = address */
139 P2 = R2; /* P2 = count */
140 SSYNC;
141 LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
142.Lword8_loop_s:
143 CLI R3;
144 NOP; NOP; NOP;
145 R0 = W[P0];
146 B[P1++] = R0;
147 R0 = R0 >> 8;
148 B[P1++] = R0;
149 NOP;
150.Lword8_loop_e:
151 STI R3;
152 95
153 RTS; 96COMMON_INS(w, \
154#endif 97 R0 = W[P0]; \
155ENDPROC(_insw_8) 98 W[P1++] = R0; \
99)
156 100
157ENTRY(_insb) 101COMMON_INS(w_8, \
158#ifdef CONFIG_BFIN_INS_LOWOVERHEAD 102 R0 = W[P0]; \
159 P0 = R0; /* P0 = port */ 103 B[P1++] = R0; \
160 cli R3; 104 R0 = R0 >> 8; \
161 P1 = R1; /* P1 = address */ 105 B[P1++] = R0; \
162 P2 = R2; /* P2 = count */ 106)
163 SSYNC;
164 LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
165.Lbyte_loop_s: R0 = B[P0];
166 B[P1++] = R0;
167 NOP;
168.Lbyte_loop_e: NOP;
169 sti R3;
170 RTS;
171#else
172 P0 = R0; /* P0 = port */
173 P1 = R1; /* P1 = address */
174 P2 = R2; /* P2 = count */
175 SSYNC;
176 LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
177.Lbyte_loop_s:
178 CLI R3;
179 NOP; NOP; NOP;
180 R0 = B[P0];
181 B[P1++] = R0;
182.Lbyte_loop_e:
183 STI R3;
184 107
185 RTS; 108COMMON_INS(b, \
186#endif 109 R0 = B[P0]; \
187ENDPROC(_insb) 110 B[P1++] = R0; \
111)
188 112
189ENTRY(_insl_16) 113COMMON_INS(l_16, \
190#ifdef CONFIG_BFIN_INS_LOWOVERHEAD 114 R0 = [P0]; \
191 P0 = R0; /* P0 = port */ 115 W[P1++] = R0; \
192 cli R3; 116 R0 = R0 >> 16; \
193 P1 = R1; /* P1 = address */ 117 W[P1++] = R0; \
194 P2 = R2; /* P2 = count */ 118)
195 SSYNC;
196 LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2;
197.Llong16_loop_s: R0 = [P0];
198 W[P1++] = R0;
199 R0 = R0 >> 16;
200 W[P1++] = R0;
201 NOP;
202.Llong16_loop_e: NOP;
203 sti R3;
204 RTS;
205#else
206 P0 = R0; /* P0 = port */
207 P1 = R1; /* P1 = address */
208 P2 = R2; /* P2 = count */
209 SSYNC;
210 LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2;
211.Llong16_loop_s:
212 CLI R3;
213 NOP; NOP; NOP;
214 R0 = [P0];
215 W[P1++] = R0;
216 R0 = R0 >> 16;
217 W[P1++] = R0;
218.Llong16_loop_e:
219 STI R3;
220 RTS;
221#endif
222ENDPROC(_insl_16)
diff --git a/arch/blackfin/lib/muldi3.S b/arch/blackfin/lib/muldi3.S
new file mode 100644
index 000000000000..abde120ee230
--- /dev/null
+++ b/arch/blackfin/lib/muldi3.S
@@ -0,0 +1,68 @@
1.align 2
2.global ___muldi3;
3.type ___muldi3, STT_FUNC;
4
5#ifdef CONFIG_ARITHMETIC_OPS_L1
6.section .l1.text
7#else
8.text
9#endif
10
11/*
12 R1:R0 * R3:R2
13 = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
14[X] = (R1.h * R3.h) * 2^96
15[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
16[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
17[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
18[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
19[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
20[T4] + (R0.l * R2.l)
21
22 We can discard the first three lines marked "X" since we produce
23 only a 64 bit result. So, we need ten 16-bit multiplies.
24
25 Individual mul-acc results:
26[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
27[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
28[E3] = R0.l * R2.h + R2.l * R0.h
29[E4] = R0.l * R2.l
30
31 We also need to add high parts from lower-level results to higher ones:
32 E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
33
34 One interesting property is that all parts of the result that depend
35 on the sign of the multiplication are discarded. Those would be the
36 multiplications involving R1.h and R3.h, but only the top 16 bit of
37 the 32 bit result depend on the sign, and since R1.h and R3.h only
38 occur in E1, the top half of these results is cut off.
39 So, we can just use FU mode for all of the 16-bit multiplies, and
40 ignore questions of when to use mixed mode. */
41
42___muldi3:
43 /* [SP] technically is part of the caller's frame, but we can
44 use it as scratch space. */
45 A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
46 A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
47 A0 += A1; /* E1 */
48 R4 = A0.w;
49 A0 = R0.l * R3.l (FU); /* E2 */
50 A0 += R2.l * R1.l (FU); /* E2 */
51
52 A1 = R2.L * R0.L (FU); /* E4 */
53 R3 = A1.w;
54 A1 = A1 >> 16; /* E3c */
55 A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
56 A1 += R0.L * R2.H (FU); /* E3c */
57 R0 = A1.w;
58 A1 = A1 >> 16; /* E2c */
59 A0 += A1; /* E2c */
60 R1 = A0.w;
61
62 /* low(result) = low(E3c):low(E4) */
63 R0 = PACK (R0.l, R3.l);
64 /* high(result) = E2c + (E1 << 16) */
65 R1.h = R1.h + R4.l (NS) || R4 = [SP];
66 RTS;
67
68.size ___muldi3, .-___muldi3
diff --git a/arch/blackfin/lib/muldi3.c b/arch/blackfin/lib/muldi3.c
deleted file mode 100644
index 303d0c6a6dba..000000000000
--- a/arch/blackfin/lib/muldi3.c
+++ /dev/null
@@ -1,99 +0,0 @@
1/*
2 * File: arch/blackfin/lib/muldi3.c
3 * Based on:
4 * Author:
5 *
6 * Created:
7 * Description:
8 *
9 * Modified:
10 * Copyright 2004-2006 Analog Devices Inc.
11 *
12 * Bugs: Enter bugs at http://blackfin.uclinux.org/
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, see the file COPYING, or write
26 * to the Free Software Foundation, Inc.,
27 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 */
29
30#ifndef SI_TYPE_SIZE
31#define SI_TYPE_SIZE 32
32#endif
33#define __ll_b (1L << (SI_TYPE_SIZE / 2))
34#define __ll_lowpart(t) ((usitype) (t) % __ll_b)
35#define __ll_highpart(t) ((usitype) (t) / __ll_b)
36#define BITS_PER_UNIT 8
37
38#if !defined(umul_ppmm)
39#define umul_ppmm(w1, w0, u, v) \
40 do { \
41 usitype __x0, __x1, __x2, __x3; \
42 usitype __ul, __vl, __uh, __vh; \
43 \
44 __ul = __ll_lowpart (u); \
45 __uh = __ll_highpart (u); \
46 __vl = __ll_lowpart (v); \
47 __vh = __ll_highpart (v); \
48 \
49 __x0 = (usitype) __ul * __vl; \
50 __x1 = (usitype) __ul * __vh; \
51 __x2 = (usitype) __uh * __vl; \
52 __x3 = (usitype) __uh * __vh; \
53 \
54 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
55 __x1 += __x2; /* but this indeed can */ \
56 if (__x1 < __x2) /* did we get it? */ \
57 __x3 += __ll_b; /* yes, add it in the proper pos. */ \
58 \
59 (w1) = __x3 + __ll_highpart (__x1); \
60 (w0) = __ll_lowpart (__x1) * __ll_b + __ll_lowpart (__x0); \
61 } while (0)
62#endif
63
64#if !defined(__umulsidi3)
65#define __umulsidi3(u, v) \
66 ({diunion __w; \
67 umul_ppmm (__w.s.high, __w.s.low, u, v); \
68 __w.ll; })
69#endif
70
71typedef unsigned int usitype __attribute__ ((mode(SI)));
72typedef int sitype __attribute__ ((mode(SI)));
73typedef int ditype __attribute__ ((mode(DI)));
74typedef int word_type __attribute__ ((mode(__word__)));
75
76struct distruct {
77 sitype low, high;
78};
79typedef union {
80 struct distruct s;
81 ditype ll;
82} diunion;
83
84#ifdef CONFIG_ARITHMETIC_OPS_L1
85ditype __muldi3(ditype u, ditype v)__attribute__((l1_text));
86#endif
87
88ditype __muldi3(ditype u, ditype v)
89{
90 diunion w;
91 diunion uu, vv;
92
93 uu.ll = u, vv.ll = v;
94 w.ll = __umulsidi3(uu.s.low, vv.s.low);
95 w.s.high += ((usitype) uu.s.low * (usitype) vv.s.high
96 + (usitype) uu.s.high * (usitype) vv.s.low);
97
98 return w.ll;
99}