diff options
author | James Hogan <james.hogan@imgtec.com> | 2012-10-05 12:02:09 -0400 |
---|---|---|
committer | James Hogan <james.hogan@imgtec.com> | 2013-03-02 15:09:52 -0500 |
commit | 086e9dc0e2ca925b1b58caefd04ed2757d14790b (patch) | |
tree | cfe182f1d07d40d09d2cae09e337423462250cac | |
parent | f507758ccbed5c354cc1ce3b8f53ea072d7bc222 (diff) |
metag: Optimised library functions
Add optimised library functions for metag.
Signed-off-by: James Hogan <james.hogan@imgtec.com>
-rw-r--r-- | arch/metag/include/asm/checksum.h | 92 | ||||
-rw-r--r-- | arch/metag/include/asm/div64.h | 12 | ||||
-rw-r--r-- | arch/metag/include/asm/string.h | 13 | ||||
-rw-r--r-- | arch/metag/lib/ashldi3.S | 33 | ||||
-rw-r--r-- | arch/metag/lib/ashrdi3.S | 33 | ||||
-rw-r--r-- | arch/metag/lib/checksum.c | 168 | ||||
-rw-r--r-- | arch/metag/lib/clear_page.S | 17 | ||||
-rw-r--r-- | arch/metag/lib/cmpdi2.S | 32 | ||||
-rw-r--r-- | arch/metag/lib/copy_page.S | 20 | ||||
-rw-r--r-- | arch/metag/lib/delay.c | 56 | ||||
-rw-r--r-- | arch/metag/lib/div64.S | 108 | ||||
-rw-r--r-- | arch/metag/lib/divsi3.S | 100 | ||||
-rw-r--r-- | arch/metag/lib/ip_fast_csum.S | 32 | ||||
-rw-r--r-- | arch/metag/lib/lshrdi3.S | 33 | ||||
-rw-r--r-- | arch/metag/lib/memcpy.S | 185 | ||||
-rw-r--r-- | arch/metag/lib/memmove.S | 345 | ||||
-rw-r--r-- | arch/metag/lib/memset.S | 86 | ||||
-rw-r--r-- | arch/metag/lib/modsi3.S | 38 | ||||
-rw-r--r-- | arch/metag/lib/muldi3.S | 44 | ||||
-rw-r--r-- | arch/metag/lib/ucmpdi2.S | 27 |
20 files changed, 1474 insertions, 0 deletions
diff --git a/arch/metag/include/asm/checksum.h b/arch/metag/include/asm/checksum.h new file mode 100644 index 000000000000..999bf761a732 --- /dev/null +++ b/arch/metag/include/asm/checksum.h | |||
@@ -0,0 +1,92 @@ | |||
1 | #ifndef _METAG_CHECKSUM_H | ||
2 | #define _METAG_CHECKSUM_H | ||
3 | |||
4 | /* | ||
5 | * computes the checksum of a memory block at buff, length len, | ||
6 | * and adds in "sum" (32-bit) | ||
7 | * | ||
8 | * returns a 32-bit number suitable for feeding into itself | ||
9 | * or csum_tcpudp_magic | ||
10 | * | ||
11 | * this function must be called with even lengths, except | ||
12 | * for the last fragment, which may be odd | ||
13 | * | ||
14 | * it's best to have buff aligned on a 32-bit boundary | ||
15 | */ | ||
16 | extern __wsum csum_partial(const void *buff, int len, __wsum sum); | ||
17 | |||
18 | /* | ||
19 | * the same as csum_partial, but copies from src while it | ||
20 | * checksums | ||
21 | * | ||
22 | * here even more important to align src and dst on a 32-bit (or even | ||
23 | * better 64-bit) boundary | ||
24 | */ | ||
25 | extern __wsum csum_partial_copy(const void *src, void *dst, int len, | ||
26 | __wsum sum); | ||
27 | |||
28 | /* | ||
29 | * the same as csum_partial_copy, but copies from user space. | ||
30 | * | ||
31 | * here even more important to align src and dst on a 32-bit (or even | ||
32 | * better 64-bit) boundary | ||
33 | */ | ||
34 | extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, | ||
35 | int len, __wsum sum, int *csum_err); | ||
36 | |||
37 | #define csum_partial_copy_nocheck(src, dst, len, sum) \ | ||
38 | csum_partial_copy((src), (dst), (len), (sum)) | ||
39 | |||
40 | /* | ||
41 | * Fold a partial checksum | ||
42 | */ | ||
43 | static inline __sum16 csum_fold(__wsum csum) | ||
44 | { | ||
45 | u32 sum = (__force u32)csum; | ||
46 | sum = (sum & 0xffff) + (sum >> 16); | ||
47 | sum = (sum & 0xffff) + (sum >> 16); | ||
48 | return (__force __sum16)~sum; | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * This is a version of ip_compute_csum() optimized for IP headers, | ||
53 | * which always checksum on 4 octet boundaries. | ||
54 | */ | ||
55 | extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); | ||
56 | |||
57 | /* | ||
58 | * computes the checksum of the TCP/UDP pseudo-header | ||
59 | * returns a 16-bit checksum, already complemented | ||
60 | */ | ||
61 | static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, | ||
62 | unsigned short len, | ||
63 | unsigned short proto, | ||
64 | __wsum sum) | ||
65 | { | ||
66 | unsigned long len_proto = (proto + len) << 8; | ||
67 | asm ("ADD %0, %0, %1\n" | ||
68 | "ADDS %0, %0, %2\n" | ||
69 | "ADDCS %0, %0, #1\n" | ||
70 | "ADDS %0, %0, %3\n" | ||
71 | "ADDCS %0, %0, #1\n" | ||
72 | : "=d" (sum) | ||
73 | : "d" (daddr), "d" (saddr), "d" (len_proto), | ||
74 | "0" (sum) | ||
75 | : "cc"); | ||
76 | return sum; | ||
77 | } | ||
78 | |||
79 | static inline __sum16 | ||
80 | csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, | ||
81 | unsigned short proto, __wsum sum) | ||
82 | { | ||
83 | return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * this routine is used for miscellaneous IP-like checksums, mainly | ||
88 | * in icmp.c | ||
89 | */ | ||
90 | extern __sum16 ip_compute_csum(const void *buff, int len); | ||
91 | |||
92 | #endif /* _METAG_CHECKSUM_H */ | ||
diff --git a/arch/metag/include/asm/div64.h b/arch/metag/include/asm/div64.h new file mode 100644 index 000000000000..0fdd11676212 --- /dev/null +++ b/arch/metag/include/asm/div64.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef __ASM_DIV64_H__ | ||
2 | #define __ASM_DIV64_H__ | ||
3 | |||
4 | #include <asm-generic/div64.h> | ||
5 | |||
6 | extern u64 div_u64(u64 dividend, u64 divisor); | ||
7 | extern s64 div_s64(s64 dividend, s64 divisor); | ||
8 | |||
9 | #define div_u64 div_u64 | ||
10 | #define div_s64 div_s64 | ||
11 | |||
12 | #endif | ||
diff --git a/arch/metag/include/asm/string.h b/arch/metag/include/asm/string.h new file mode 100644 index 000000000000..53e3806eee04 --- /dev/null +++ b/arch/metag/include/asm/string.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef _METAG_STRING_H_ | ||
2 | #define _METAG_STRING_H_ | ||
3 | |||
4 | #define __HAVE_ARCH_MEMSET | ||
5 | extern void *memset(void *__s, int __c, size_t __count); | ||
6 | |||
7 | #define __HAVE_ARCH_MEMCPY | ||
8 | void *memcpy(void *__to, __const__ void *__from, size_t __n); | ||
9 | |||
10 | #define __HAVE_ARCH_MEMMOVE | ||
11 | extern void *memmove(void *__dest, __const__ void *__src, size_t __n); | ||
12 | |||
13 | #endif /* _METAG_STRING_H_ */ | ||
diff --git a/arch/metag/lib/ashldi3.S b/arch/metag/lib/ashldi3.S new file mode 100644 index 000000000000..78d6974cffef --- /dev/null +++ b/arch/metag/lib/ashldi3.S | |||
@@ -0,0 +1,33 @@ | |||
1 | ! Copyright (C) 2012 by Imagination Technologies Ltd. | ||
2 | ! | ||
3 | ! 64-bit arithmetic shift left routine. | ||
4 | ! | ||
5 | |||
6 | .text | ||
7 | .global ___ashldi3 | ||
8 | .type ___ashldi3,function | ||
9 | |||
10 | ___ashldi3: | ||
11 | MOV D0Re0,D0Ar2 | ||
12 | MOV D1Re0,D1Ar1 | ||
13 | CMP D1Ar3,#0 ! COUNT == 0 | ||
14 | MOVEQ PC,D1RtP ! Yes, return | ||
15 | |||
16 | SUBS D0Ar4,D1Ar3,#32 ! N = COUNT - 32 | ||
17 | BGE $L10 | ||
18 | |||
19 | !! Shift < 32 | ||
20 | NEG D0Ar4,D0Ar4 ! N = - N | ||
21 | LSL D1Re0,D1Re0,D1Ar3 ! HI = HI << COUNT | ||
22 | LSR D0Ar6,D0Re0,D0Ar4 ! TMP= LO >> -(COUNT - 32) | ||
23 | OR D1Re0,D1Re0,D0Ar6 ! HI = HI | TMP | ||
24 | SWAP D0Ar4,D1Ar3 | ||
25 | LSL D0Re0,D0Re0,D0Ar4 ! LO = LO << COUNT | ||
26 | MOV PC,D1RtP | ||
27 | |||
28 | $L10: | ||
29 | !! Shift >= 32 | ||
30 | LSL D1Re0,D0Re0,D0Ar4 ! HI = LO << N | ||
31 | MOV D0Re0,#0 ! LO = 0 | ||
32 | MOV PC,D1RtP | ||
33 | .size ___ashldi3,.-___ashldi3 | ||
diff --git a/arch/metag/lib/ashrdi3.S b/arch/metag/lib/ashrdi3.S new file mode 100644 index 000000000000..7cb7ed3bb1ad --- /dev/null +++ b/arch/metag/lib/ashrdi3.S | |||
@@ -0,0 +1,33 @@ | |||
1 | ! Copyright (C) 2012 by Imagination Technologies Ltd. | ||
2 | ! | ||
3 | ! 64-bit arithmetic shift right routine. | ||
4 | ! | ||
5 | |||
6 | .text | ||
7 | .global ___ashrdi3 | ||
8 | .type ___ashrdi3,function | ||
9 | |||
10 | ___ashrdi3: | ||
11 | MOV D0Re0,D0Ar2 | ||
12 | MOV D1Re0,D1Ar1 | ||
13 | CMP D1Ar3,#0 ! COUNT == 0 | ||
14 | MOVEQ PC,D1RtP ! Yes, return | ||
15 | |||
16 | MOV D0Ar4,D1Ar3 | ||
17 | SUBS D1Ar3,D1Ar3,#32 ! N = COUNT - 32 | ||
18 | BGE $L20 | ||
19 | |||
20 | !! Shift < 32 | ||
21 | NEG D1Ar3,D1Ar3 ! N = - N | ||
22 | LSR D0Re0,D0Re0,D0Ar4 ! LO = LO >> COUNT | ||
23 | LSL D0Ar6,D1Re0,D1Ar3 ! TMP= HI << -(COUNT - 32) | ||
24 | OR D0Re0,D0Re0,D0Ar6 ! LO = LO | TMP | ||
25 | SWAP D1Ar3,D0Ar4 | ||
26 | ASR D1Re0,D1Re0,D1Ar3 ! HI = HI >> COUNT | ||
27 | MOV PC,D1RtP | ||
28 | $L20: | ||
29 | !! Shift >= 32 | ||
30 | ASR D0Re0,D1Re0,D1Ar3 ! LO = HI >> N | ||
31 | ASR D1Re0,D1Re0,#31 ! HI = HI >> 31 | ||
32 | MOV PC,D1RtP | ||
33 | .size ___ashrdi3,.-___ashrdi3 | ||
diff --git a/arch/metag/lib/checksum.c b/arch/metag/lib/checksum.c new file mode 100644 index 000000000000..44d2e1913560 --- /dev/null +++ b/arch/metag/lib/checksum.c | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * | ||
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
4 | * operating system. INET is implemented using the BSD Socket | ||
5 | * interface as the means of communication with the user level. | ||
6 | * | ||
7 | * IP/TCP/UDP checksumming routines | ||
8 | * | ||
9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
11 | * Tom May, <ftom@netcom.com> | ||
12 | * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de> | ||
13 | * Lots of code moved from tcp.c and ip.c; see those files | ||
14 | * for more names. | ||
15 | * | ||
16 | * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek: | ||
17 | * Fixed some nasty bugs, causing some horrible crashes. | ||
18 | * A: At some points, the sum (%0) was used as | ||
19 | * length-counter instead of the length counter | ||
20 | * (%1). Thanks to Roman Hodek for pointing this out. | ||
21 | * B: GCC seems to mess up if one uses too many | ||
22 | * data-registers to hold input values and one tries to | ||
23 | * specify d0 and d1 as scratch registers. Letting gcc | ||
24 | * choose these registers itself solves the problem. | ||
25 | * | ||
26 | * This program is free software; you can redistribute it and/or | ||
27 | * modify it under the terms of the GNU General Public License | ||
28 | * as published by the Free Software Foundation; either version | ||
29 | * 2 of the License, or (at your option) any later version. | ||
30 | */ | ||
31 | |||
32 | /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access | ||
33 | kills, so most of the assembly has to go. */ | ||
34 | |||
35 | #include <linux/module.h> | ||
36 | #include <net/checksum.h> | ||
37 | |||
38 | #include <asm/byteorder.h> | ||
39 | |||
40 | static inline unsigned short from32to16(unsigned int x) | ||
41 | { | ||
42 | /* add up 16-bit and 16-bit for 16+c bit */ | ||
43 | x = (x & 0xffff) + (x >> 16); | ||
44 | /* add up carry.. */ | ||
45 | x = (x & 0xffff) + (x >> 16); | ||
46 | return x; | ||
47 | } | ||
48 | |||
49 | static unsigned int do_csum(const unsigned char *buff, int len) | ||
50 | { | ||
51 | int odd; | ||
52 | unsigned int result = 0; | ||
53 | |||
54 | if (len <= 0) | ||
55 | goto out; | ||
56 | odd = 1 & (unsigned long) buff; | ||
57 | if (odd) { | ||
58 | #ifdef __LITTLE_ENDIAN | ||
59 | result += (*buff << 8); | ||
60 | #else | ||
61 | result = *buff; | ||
62 | #endif | ||
63 | len--; | ||
64 | buff++; | ||
65 | } | ||
66 | if (len >= 2) { | ||
67 | if (2 & (unsigned long) buff) { | ||
68 | result += *(unsigned short *) buff; | ||
69 | len -= 2; | ||
70 | buff += 2; | ||
71 | } | ||
72 | if (len >= 4) { | ||
73 | const unsigned char *end = buff + ((unsigned)len & ~3); | ||
74 | unsigned int carry = 0; | ||
75 | do { | ||
76 | unsigned int w = *(unsigned int *) buff; | ||
77 | buff += 4; | ||
78 | result += carry; | ||
79 | result += w; | ||
80 | carry = (w > result); | ||
81 | } while (buff < end); | ||
82 | result += carry; | ||
83 | result = (result & 0xffff) + (result >> 16); | ||
84 | } | ||
85 | if (len & 2) { | ||
86 | result += *(unsigned short *) buff; | ||
87 | buff += 2; | ||
88 | } | ||
89 | } | ||
90 | if (len & 1) | ||
91 | #ifdef __LITTLE_ENDIAN | ||
92 | result += *buff; | ||
93 | #else | ||
94 | result += (*buff << 8); | ||
95 | #endif | ||
96 | result = from32to16(result); | ||
97 | if (odd) | ||
98 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | ||
99 | out: | ||
100 | return result; | ||
101 | } | ||
102 | EXPORT_SYMBOL(ip_fast_csum); | ||
103 | |||
104 | /* | ||
105 | * computes the checksum of a memory block at buff, length len, | ||
106 | * and adds in "sum" (32-bit) | ||
107 | * | ||
108 | * returns a 32-bit number suitable for feeding into itself | ||
109 | * or csum_tcpudp_magic | ||
110 | * | ||
111 | * this function must be called with even lengths, except | ||
112 | * for the last fragment, which may be odd | ||
113 | * | ||
114 | * it's best to have buff aligned on a 32-bit boundary | ||
115 | */ | ||
116 | __wsum csum_partial(const void *buff, int len, __wsum wsum) | ||
117 | { | ||
118 | unsigned int sum = (__force unsigned int)wsum; | ||
119 | unsigned int result = do_csum(buff, len); | ||
120 | |||
121 | /* add in old sum, and carry.. */ | ||
122 | result += sum; | ||
123 | if (sum > result) | ||
124 | result += 1; | ||
125 | return (__force __wsum)result; | ||
126 | } | ||
127 | EXPORT_SYMBOL(csum_partial); | ||
128 | |||
129 | /* | ||
130 | * this routine is used for miscellaneous IP-like checksums, mainly | ||
131 | * in icmp.c | ||
132 | */ | ||
133 | __sum16 ip_compute_csum(const void *buff, int len) | ||
134 | { | ||
135 | return (__force __sum16)~do_csum(buff, len); | ||
136 | } | ||
137 | EXPORT_SYMBOL(ip_compute_csum); | ||
138 | |||
139 | /* | ||
140 | * copy from fs while checksumming, otherwise like csum_partial | ||
141 | */ | ||
142 | __wsum | ||
143 | csum_partial_copy_from_user(const void __user *src, void *dst, int len, | ||
144 | __wsum sum, int *csum_err) | ||
145 | { | ||
146 | int missing; | ||
147 | |||
148 | missing = __copy_from_user(dst, src, len); | ||
149 | if (missing) { | ||
150 | memset(dst + len - missing, 0, missing); | ||
151 | *csum_err = -EFAULT; | ||
152 | } else | ||
153 | *csum_err = 0; | ||
154 | |||
155 | return csum_partial(dst, len, sum); | ||
156 | } | ||
157 | EXPORT_SYMBOL(csum_partial_copy_from_user); | ||
158 | |||
159 | /* | ||
160 | * copy from ds while checksumming, otherwise like csum_partial | ||
161 | */ | ||
162 | __wsum | ||
163 | csum_partial_copy(const void *src, void *dst, int len, __wsum sum) | ||
164 | { | ||
165 | memcpy(dst, src, len); | ||
166 | return csum_partial(dst, len, sum); | ||
167 | } | ||
168 | EXPORT_SYMBOL(csum_partial_copy); | ||
diff --git a/arch/metag/lib/clear_page.S b/arch/metag/lib/clear_page.S new file mode 100644 index 000000000000..43144eebec55 --- /dev/null +++ b/arch/metag/lib/clear_page.S | |||
@@ -0,0 +1,17 @@ | |||
1 | ! Copyright 2007,2008,2009 Imagination Technologies Ltd. | ||
2 | |||
3 | #include <asm/page.h> | ||
4 | |||
5 | .text | ||
6 | .global _clear_page | ||
7 | .type _clear_page,function | ||
8 | !! D1Ar1 - page | ||
9 | _clear_page: | ||
10 | MOV TXRPT,#((PAGE_SIZE / 8) - 1) | ||
11 | MOV D0Re0,#0 | ||
12 | MOV D1Re0,#0 | ||
13 | $Lclear_page_loop: | ||
14 | SETL [D1Ar1++],D0Re0,D1Re0 | ||
15 | BR $Lclear_page_loop | ||
16 | MOV PC,D1RtP | ||
17 | .size _clear_page,.-_clear_page | ||
diff --git a/arch/metag/lib/cmpdi2.S b/arch/metag/lib/cmpdi2.S new file mode 100644 index 000000000000..9c5c663c5aea --- /dev/null +++ b/arch/metag/lib/cmpdi2.S | |||
@@ -0,0 +1,32 @@ | |||
1 | ! Copyright (C) 2012 by Imagination Technologies Ltd. | ||
2 | ! | ||
3 | ! 64-bit signed compare routine. | ||
4 | ! | ||
5 | |||
6 | .text | ||
7 | .global ___cmpdi2 | ||
8 | .type ___cmpdi2,function | ||
9 | |||
10 | ! low high | ||
11 | ! s64 a (D0Ar2, D1Ar1) | ||
12 | ! s64 b (D0Ar4, D1Ar3) | ||
13 | ___cmpdi2: | ||
14 | ! start at 1 (equal) and conditionally increment or decrement | ||
15 | MOV D0Re0,#1 | ||
16 | |||
17 | ! high words differ? | ||
18 | CMP D1Ar1,D1Ar3 | ||
19 | BNE $Lhigh_differ | ||
20 | |||
21 | ! unsigned compare low words | ||
22 | CMP D0Ar2,D0Ar4 | ||
23 | SUBLO D0Re0,D0Re0,#1 | ||
24 | ADDHI D0Re0,D0Re0,#1 | ||
25 | MOV PC,D1RtP | ||
26 | |||
27 | $Lhigh_differ: | ||
28 | ! signed compare high words | ||
29 | SUBLT D0Re0,D0Re0,#1 | ||
30 | ADDGT D0Re0,D0Re0,#1 | ||
31 | MOV PC,D1RtP | ||
32 | .size ___cmpdi2,.-___cmpdi2 | ||
diff --git a/arch/metag/lib/copy_page.S b/arch/metag/lib/copy_page.S new file mode 100644 index 000000000000..91f7d461239c --- /dev/null +++ b/arch/metag/lib/copy_page.S | |||
@@ -0,0 +1,20 @@ | |||
1 | ! Copyright 2007,2008 Imagination Technologies Ltd. | ||
2 | |||
3 | #include <asm/page.h> | ||
4 | |||
5 | .text | ||
6 | .global _copy_page | ||
7 | .type _copy_page,function | ||
8 | !! D1Ar1 - to | ||
9 | !! D0Ar2 - from | ||
10 | _copy_page: | ||
11 | MOV D0FrT,#PAGE_SIZE | ||
12 | $Lcopy_page_loop: | ||
13 | GETL D0Re0,D1Re0,[D0Ar2++] | ||
14 | GETL D0Ar6,D1Ar5,[D0Ar2++] | ||
15 | SETL [D1Ar1++],D0Re0,D1Re0 | ||
16 | SETL [D1Ar1++],D0Ar6,D1Ar5 | ||
17 | SUBS D0FrT,D0FrT,#16 | ||
18 | BNZ $Lcopy_page_loop | ||
19 | MOV PC,D1RtP | ||
20 | .size _copy_page,.-_copy_page | ||
diff --git a/arch/metag/lib/delay.c b/arch/metag/lib/delay.c new file mode 100644 index 000000000000..0b308f48b37a --- /dev/null +++ b/arch/metag/lib/delay.c | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | * Precise Delay Loops for Meta | ||
3 | * | ||
4 | * Copyright (C) 1993 Linus Torvalds | ||
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
6 | * Copyright (C) 2007,2009 Imagination Technologies Ltd. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/export.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/delay.h> | ||
13 | |||
14 | #include <asm/core_reg.h> | ||
15 | #include <asm/processor.h> | ||
16 | |||
17 | /* | ||
18 | * TXTACTCYC is only 24 bits, so on chips with fast clocks it will wrap | ||
19 | * many times per-second. If it does wrap __delay will return prematurely, | ||
20 | * but this is only likely with large delay values. | ||
21 | * | ||
22 | * We also can't implement read_current_timer() with TXTACTCYC due to | ||
23 | * this wrapping behaviour. | ||
24 | */ | ||
25 | #define rdtimer(t) t = __core_reg_get(TXTACTCYC) | ||
26 | |||
27 | void __delay(unsigned long loops) | ||
28 | { | ||
29 | unsigned long bclock, now; | ||
30 | |||
31 | rdtimer(bclock); | ||
32 | do { | ||
33 | asm("NOP"); | ||
34 | rdtimer(now); | ||
35 | } while ((now-bclock) < loops); | ||
36 | } | ||
37 | EXPORT_SYMBOL(__delay); | ||
38 | |||
39 | inline void __const_udelay(unsigned long xloops) | ||
40 | { | ||
41 | u64 loops = (u64)xloops * (u64)loops_per_jiffy * HZ; | ||
42 | __delay(loops >> 32); | ||
43 | } | ||
44 | EXPORT_SYMBOL(__const_udelay); | ||
45 | |||
46 | void __udelay(unsigned long usecs) | ||
47 | { | ||
48 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | ||
49 | } | ||
50 | EXPORT_SYMBOL(__udelay); | ||
51 | |||
52 | void __ndelay(unsigned long nsecs) | ||
53 | { | ||
54 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | ||
55 | } | ||
56 | EXPORT_SYMBOL(__ndelay); | ||
diff --git a/arch/metag/lib/div64.S b/arch/metag/lib/div64.S new file mode 100644 index 000000000000..1cfc93498f70 --- /dev/null +++ b/arch/metag/lib/div64.S | |||
@@ -0,0 +1,108 @@ | |||
1 | ! Copyright (C) 2012 Imagination Technologies Ltd. | ||
2 | ! | ||
3 | ! Signed/unsigned 64-bit division routines. | ||
4 | ! | ||
5 | |||
6 | .text | ||
7 | .global _div_u64 | ||
8 | .type _div_u64,function | ||
9 | |||
10 | _div_u64: | ||
11 | $L1: | ||
12 | ORS A0.3,D1Ar3,D0Ar4 | ||
13 | BNE $L3 | ||
14 | $L2: | ||
15 | MOV D0Re0,D0Ar2 | ||
16 | MOV D1Re0,D1Ar1 | ||
17 | MOV PC,D1RtP | ||
18 | $L3: | ||
19 | CMP D1Ar3,D1Ar1 | ||
20 | CMPEQ D0Ar4,D0Ar2 | ||
21 | MOV D0Re0,#1 | ||
22 | MOV D1Re0,#0 | ||
23 | BHS $L6 | ||
24 | $L4: | ||
25 | ADDS D0Ar6,D0Ar4,D0Ar4 | ||
26 | ADD D1Ar5,D1Ar3,D1Ar3 | ||
27 | ADDCS D1Ar5,D1Ar5,#1 | ||
28 | CMP D1Ar5,D1Ar3 | ||
29 | CMPEQ D0Ar6,D0Ar4 | ||
30 | BLO $L6 | ||
31 | $L5: | ||
32 | MOV D0Ar4,D0Ar6 | ||
33 | MOV D1Ar3,D1Ar5 | ||
34 | ADDS D0Re0,D0Re0,D0Re0 | ||
35 | ADD D1Re0,D1Re0,D1Re0 | ||
36 | ADDCS D1Re0,D1Re0,#1 | ||
37 | CMP D1Ar3,D1Ar1 | ||
38 | CMPEQ D0Ar4,D0Ar2 | ||
39 | BLO $L4 | ||
40 | $L6: | ||
41 | ORS A0.3,D1Re0,D0Re0 | ||
42 | MOV D0Ar6,#0 | ||
43 | MOV D1Ar5,D0Ar6 | ||
44 | BEQ $L10 | ||
45 | $L7: | ||
46 | CMP D1Ar1,D1Ar3 | ||
47 | CMPEQ D0Ar2,D0Ar4 | ||
48 | BLO $L9 | ||
49 | $L8: | ||
50 | ADDS D0Ar6,D0Ar6,D0Re0 | ||
51 | ADD D1Ar5,D1Ar5,D1Re0 | ||
52 | ADDCS D1Ar5,D1Ar5,#1 | ||
53 | |||
54 | SUBS D0Ar2,D0Ar2,D0Ar4 | ||
55 | SUB D1Ar1,D1Ar1,D1Ar3 | ||
56 | SUBCS D1Ar1,D1Ar1,#1 | ||
57 | $L9: | ||
58 | LSL A0.3,D1Re0,#31 | ||
59 | LSR D0Re0,D0Re0,#1 | ||
60 | LSR D1Re0,D1Re0,#1 | ||
61 | OR D0Re0,D0Re0,A0.3 | ||
62 | LSL A0.3,D1Ar3,#31 | ||
63 | LSR D0Ar4,D0Ar4,#1 | ||
64 | LSR D1Ar3,D1Ar3,#1 | ||
65 | OR D0Ar4,D0Ar4,A0.3 | ||
66 | ORS A0.3,D1Re0,D0Re0 | ||
67 | BNE $L7 | ||
68 | $L10: | ||
69 | MOV D0Re0,D0Ar6 | ||
70 | MOV D1Re0,D1Ar5 | ||
71 | MOV PC,D1RtP | ||
72 | .size _div_u64,.-_div_u64 | ||
73 | |||
74 | .text | ||
75 | .global _div_s64 | ||
76 | .type _div_s64,function | ||
77 | _div_s64: | ||
78 | MSETL [A0StP],D0FrT,D0.5 | ||
79 | XOR D0.5,D0Ar2,D0Ar4 | ||
80 | XOR D1.5,D1Ar1,D1Ar3 | ||
81 | TSTT D1Ar1,#HI(0x80000000) | ||
82 | BZ $L25 | ||
83 | |||
84 | NEGS D0Ar2,D0Ar2 | ||
85 | NEG D1Ar1,D1Ar1 | ||
86 | SUBCS D1Ar1,D1Ar1,#1 | ||
87 | $L25: | ||
88 | TSTT D1Ar3,#HI(0x80000000) | ||
89 | BZ $L27 | ||
90 | |||
91 | NEGS D0Ar4,D0Ar4 | ||
92 | NEG D1Ar3,D1Ar3 | ||
93 | SUBCS D1Ar3,D1Ar3,#1 | ||
94 | $L27: | ||
95 | CALLR D1RtP,_div_u64 | ||
96 | TSTT D1.5,#HI(0x80000000) | ||
97 | BZ $L29 | ||
98 | |||
99 | NEGS D0Re0,D0Re0 | ||
100 | NEG D1Re0,D1Re0 | ||
101 | SUBCS D1Re0,D1Re0,#1 | ||
102 | $L29: | ||
103 | |||
104 | GETL D0FrT,D1RtP,[A0StP+#(-16)] | ||
105 | GETL D0.5,D1.5,[A0StP+#(-8)] | ||
106 | SUB A0StP,A0StP,#16 | ||
107 | MOV PC,D1RtP | ||
108 | .size _div_s64,.-_div_s64 | ||
diff --git a/arch/metag/lib/divsi3.S b/arch/metag/lib/divsi3.S new file mode 100644 index 000000000000..7c8a8ae9a0a1 --- /dev/null +++ b/arch/metag/lib/divsi3.S | |||
@@ -0,0 +1,100 @@ | |||
1 | ! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 | ||
2 | ! Imagination Technologies Ltd | ||
3 | ! | ||
4 | ! Integer divide routines. | ||
5 | ! | ||
6 | |||
7 | .text | ||
8 | .global ___udivsi3 | ||
9 | .type ___udivsi3,function | ||
10 | .align 2 | ||
11 | ___udivsi3: | ||
12 | !! | ||
13 | !! Since core is signed divide case, just set control variable | ||
14 | !! | ||
15 | MOV D1Re0,D0Ar2 ! Au already in A1Ar1, Bu -> D1Re0 | ||
16 | MOV D0Re0,#0 ! Result is 0 | ||
17 | MOV D0Ar4,#0 ! Return positive result | ||
18 | B $LIDMCUStart | ||
19 | .size ___udivsi3,.-___udivsi3 | ||
20 | |||
21 | !! | ||
22 | !! 32-bit division signed i/p - passed signed 32-bit numbers | ||
23 | !! | ||
24 | .global ___divsi3 | ||
25 | .type ___divsi3,function | ||
26 | .align 2 | ||
27 | ___divsi3: | ||
28 | !! | ||
29 | !! A already in D1Ar1, B already in D0Ar2 -> make B abs(B) | ||
30 | !! | ||
31 | MOV D1Re0,D0Ar2 ! A already in A1Ar1, B -> D1Re0 | ||
32 | MOV D0Re0,#0 ! Result is 0 | ||
33 | XOR D0Ar4,D1Ar1,D1Re0 ! D0Ar4 -ive if result is -ive | ||
34 | ABS D1Ar1,D1Ar1 ! abs(A) -> Au | ||
35 | ABS D1Re0,D1Re0 ! abs(B) -> Bu | ||
36 | $LIDMCUStart: | ||
37 | CMP D1Ar1,D1Re0 ! Is ( Au > Bu )? | ||
38 | LSR D1Ar3,D1Ar1,#2 ! Calculate (Au & (~3)) >> 2 | ||
39 | CMPHI D1Re0,D1Ar3 ! OR ( (Au & (~3)) <= (Bu << 2) )? | ||
40 | LSLSHI D1Ar3,D1Re0,#1 ! Buq = Bu << 1 | ||
41 | BLS $LIDMCUSetup ! Yes: Do normal divide | ||
42 | !! | ||
43 | !! Quick divide setup can assume that CurBit only needs to start at 2 | ||
44 | !! | ||
45 | $LIDMCQuick: | ||
46 | CMP D1Ar1,D1Ar3 ! ( A >= Buq )? | ||
47 | ADDCC D0Re0,D0Re0,#2 ! If yes result += 2 | ||
48 | SUBCC D1Ar1,D1Ar1,D1Ar3 ! and A -= Buq | ||
49 | CMP D1Ar1,D1Re0 ! ( A >= Bu )? | ||
50 | ADDCC D0Re0,D0Re0,#1 ! If yes result += 1 | ||
51 | SUBCC D1Ar1,D1Ar1,D1Re0 ! and A -= Bu | ||
52 | ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result? | ||
53 | NEG D0Ar2,D0Re0 ! Calulate neg result | ||
54 | MOVMI D0Re0,D0Ar2 ! Yes: Take neg result | ||
55 | $LIDMCRet: | ||
56 | MOV PC,D1RtP | ||
57 | !! | ||
58 | !! Setup for general unsigned divide code | ||
59 | !! | ||
60 | !! D0Re0 is used to form the result, already set to Zero | ||
61 | !! D1Re0 is the input Bu value, this gets trashed | ||
62 | !! D0Ar6 is curbit which is set to 1 at the start and shifted up | ||
63 | !! D0Ar4 is negative if we should return a negative result | ||
64 | !! D1Ar1 is the input Au value, eventually this holds the remainder | ||
65 | !! | ||
66 | $LIDMCUSetup: | ||
67 | CMP D1Ar1,D1Re0 ! Is ( Au < Bu )? | ||
68 | MOV D0Ar6,#1 ! Set curbit to 1 | ||
69 | BCS $LIDMCRet ! Yes: Return 0 remainder Au | ||
70 | !! | ||
71 | !! Calculate alignment using FFB instruction | ||
72 | !! | ||
73 | FFB D1Ar5,D1Ar1 ! Find first bit of Au | ||
74 | ANDN D1Ar5,D1Ar5,#31 ! Handle exceptional case. | ||
75 | ORN D1Ar5,D1Ar5,#31 ! if N bit set, set to 31 | ||
76 | FFB D1Ar3,D1Re0 ! Find first bit of Bu | ||
77 | ANDN D1Ar3,D1Ar3,#31 ! Handle exceptional case. | ||
78 | ORN D1Ar3,D1Ar3,#31 ! if N bit set, set to 31 | ||
79 | SUBS D1Ar3,D1Ar5,D1Ar3 ! calculate diff, ffbA - ffbB | ||
80 | MOV D0Ar2,D1Ar3 ! copy into bank 0 | ||
81 | LSLGT D1Re0,D1Re0,D1Ar3 ! ( > 0) ? left shift B | ||
82 | LSLGT D0Ar6,D0Ar6,D0Ar2 ! ( > 0) ? left shift curbit | ||
83 | !! | ||
84 | !! Now we start the divide proper, logic is | ||
85 | !! | ||
86 | !! if ( A >= B ) add curbit to result and subtract B from A | ||
87 | !! shift curbit and B down by 1 in either case | ||
88 | !! | ||
89 | $LIDMCLoop: | ||
90 | CMP D1Ar1, D1Re0 ! ( A >= B )? | ||
91 | ADDCC D0Re0, D0Re0, D0Ar6 ! If yes result += curbit | ||
92 | SUBCC D1Ar1, D1Ar1, D1Re0 ! and A -= B | ||
93 | LSRS D0Ar6, D0Ar6, #1 ! Shift down curbit, is it zero? | ||
94 | LSR D1Re0, D1Re0, #1 ! Shift down B | ||
95 | BNZ $LIDMCLoop ! Was single bit in curbit lost? | ||
96 | ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result? | ||
97 | NEG D0Ar2,D0Re0 ! Calulate neg result | ||
98 | MOVMI D0Re0,D0Ar2 ! Yes: Take neg result | ||
99 | MOV PC,D1RtP | ||
100 | .size ___divsi3,.-___divsi3 | ||
diff --git a/arch/metag/lib/ip_fast_csum.S b/arch/metag/lib/ip_fast_csum.S new file mode 100644 index 000000000000..533b1e73deac --- /dev/null +++ b/arch/metag/lib/ip_fast_csum.S | |||
@@ -0,0 +1,32 @@ | |||
1 | |||
2 | .text | ||
3 | /* | ||
4 | * This is a version of ip_compute_csum() optimized for IP headers, | ||
5 | * which always checksum on 4 octet boundaries. | ||
6 | * | ||
7 | * extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); | ||
8 | * | ||
9 | */ | ||
10 | .global _ip_fast_csum | ||
11 | .type _ip_fast_csum,function | ||
12 | _ip_fast_csum: | ||
13 | !! TXRPT needs loops - 1 | ||
14 | SUBS TXRPT,D0Ar2,#1 | ||
15 | MOV D0Re0,#0 | ||
16 | BLO $Lfast_csum_exit | ||
17 | $Lfast_csum_loop: | ||
18 | GETD D1Ar3,[D1Ar1++] | ||
19 | ADDS D0Re0,D0Re0,D1Ar3 | ||
20 | ADDCS D0Re0,D0Re0,#1 | ||
21 | BR $Lfast_csum_loop | ||
22 | LSR D0Ar4,D0Re0,#16 | ||
23 | AND D0Re0,D0Re0,#0xffff | ||
24 | AND D0Ar4,D0Ar4,#0xffff | ||
25 | ADD D0Re0,D0Re0,D0Ar4 | ||
26 | LSR D0Ar4,D0Re0,#16 | ||
27 | ADD D0Re0,D0Re0,D0Ar4 | ||
28 | XOR D0Re0,D0Re0,#-1 | ||
29 | AND D0Re0,D0Re0,#0xffff | ||
30 | $Lfast_csum_exit: | ||
31 | MOV PC,D1RtP | ||
32 | .size _ip_fast_csum,.-_ip_fast_csum | ||
diff --git a/arch/metag/lib/lshrdi3.S b/arch/metag/lib/lshrdi3.S new file mode 100644 index 000000000000..47f720283077 --- /dev/null +++ b/arch/metag/lib/lshrdi3.S | |||
@@ -0,0 +1,33 @@ | |||
1 | ! Copyright (C) 2012 by Imagination Technologies Ltd. | ||
2 | ! | ||
3 | ! 64-bit logical shift right routine. | ||
4 | ! | ||
5 | |||
6 | .text | ||
7 | .global ___lshrdi3 | ||
8 | .type ___lshrdi3,function | ||
9 | |||
10 | ___lshrdi3: | ||
11 | MOV D0Re0,D0Ar2 | ||
12 | MOV D1Re0,D1Ar1 | ||
13 | CMP D1Ar3,#0 ! COUNT == 0 | ||
14 | MOVEQ PC,D1RtP ! Yes, return | ||
15 | |||
16 | MOV D0Ar4,D1Ar3 | ||
17 | SUBS D1Ar3,D1Ar3,#32 ! N = COUNT - 32 | ||
18 | BGE $L30 | ||
19 | |||
20 | !! Shift < 32 | ||
21 | NEG D1Ar3,D1Ar3 ! N = - N | ||
22 | LSR D0Re0,D0Re0,D0Ar4 ! LO = LO >> COUNT | ||
23 | LSL D0Ar6,D1Re0,D1Ar3 ! TMP= HI << -(COUNT - 32) | ||
24 | OR D0Re0,D0Re0,D0Ar6 ! LO = LO | TMP | ||
25 | SWAP D1Ar3,D0Ar4 | ||
26 | LSR D1Re0,D1Re0,D1Ar3 ! HI = HI >> COUNT | ||
27 | MOV PC,D1RtP | ||
28 | $L30: | ||
29 | !! Shift >= 32 | ||
30 | LSR D0Re0,D1Re0,D1Ar3 ! LO = HI >> N | ||
31 | MOV D1Re0,#0 ! HI = 0 | ||
32 | MOV PC,D1RtP | ||
33 | .size ___lshrdi3,.-___lshrdi3 | ||
diff --git a/arch/metag/lib/memcpy.S b/arch/metag/lib/memcpy.S new file mode 100644 index 000000000000..46b7a2b9479e --- /dev/null +++ b/arch/metag/lib/memcpy.S | |||
@@ -0,0 +1,185 @@ | |||
1 | ! Copyright (C) 2008-2012 Imagination Technologies Ltd. | ||
2 | |||
3 | .text | ||
4 | .global _memcpy | ||
5 | .type _memcpy,function | ||
6 | ! D1Ar1 dst | ||
7 | ! D0Ar2 src | ||
8 | ! D1Ar3 cnt | ||
9 | ! D0Re0 dst | ||
10 | _memcpy: | ||
11 | CMP D1Ar3, #16 | ||
12 | MOV A1.2, D0Ar2 ! source pointer | ||
13 | MOV A0.2, D1Ar1 ! destination pointer | ||
14 | MOV A0.3, D1Ar1 ! for return value | ||
15 | ! If there are less than 16 bytes to copy use the byte copy loop | ||
16 | BGE $Llong_copy | ||
17 | |||
18 | $Lbyte_copy: | ||
19 | ! Simply copy a byte at a time | ||
20 | SUBS TXRPT, D1Ar3, #1 | ||
21 | BLT $Lend | ||
22 | $Lloop_byte: | ||
23 | GETB D1Re0, [A1.2++] | ||
24 | SETB [A0.2++], D1Re0 | ||
25 | BR $Lloop_byte | ||
26 | |||
27 | $Lend: | ||
28 | ! Finally set return value and return | ||
29 | MOV D0Re0, A0.3 | ||
30 | MOV PC, D1RtP | ||
31 | |||
32 | $Llong_copy: | ||
33 | ANDS D1Ar5, D1Ar1, #7 ! test destination alignment | ||
34 | BZ $Laligned_dst | ||
35 | |||
36 | ! The destination address is not 8 byte aligned. We will copy bytes from | ||
37 | ! the source to the destination until the remaining data has an 8 byte | ||
38 | ! destination address alignment (i.e we should never copy more than 7 | ||
39 | ! bytes here). | ||
40 | $Lalign_dst: | ||
41 | GETB D0Re0, [A1.2++] | ||
42 | ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8 | ||
43 | SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes | ||
44 | SETB [A0.2++], D0Re0 | ||
45 | CMP D1Ar5, #8 | ||
46 | BNE $Lalign_dst | ||
47 | |||
48 | ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte | ||
49 | ! blocks, then jump to the unaligned copy loop or fall through to the aligned | ||
50 | ! copy loop as appropriate. | ||
51 | $Laligned_dst: | ||
52 | MOV D0Ar4, A1.2 | ||
53 | LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks | ||
54 | ANDS D0Ar4, D0Ar4, #7 ! test source alignment | ||
55 | BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop | ||
56 | |||
57 | ! Both source and destination are 8 byte aligned - the easy case. | ||
58 | $Laligned_copy: | ||
59 | LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks | ||
60 | BZ $Lbyte_copy | ||
61 | SUB TXRPT, D1Ar5, #1 | ||
62 | |||
63 | $Laligned_32: | ||
64 | GETL D0Re0, D1Re0, [A1.2++] | ||
65 | GETL D0Ar6, D1Ar5, [A1.2++] | ||
66 | SETL [A0.2++], D0Re0, D1Re0 | ||
67 | SETL [A0.2++], D0Ar6, D1Ar5 | ||
68 | GETL D0Re0, D1Re0, [A1.2++] | ||
69 | GETL D0Ar6, D1Ar5, [A1.2++] | ||
70 | SETL [A0.2++], D0Re0, D1Re0 | ||
71 | SETL [A0.2++], D0Ar6, D1Ar5 | ||
72 | BR $Laligned_32 | ||
73 | |||
74 | ! If there are any remaining bytes use the byte copy loop, otherwise we are done | ||
75 | ANDS D1Ar3, D1Ar3, #0x1f | ||
76 | BNZ $Lbyte_copy | ||
77 | B $Lend | ||
78 | |||
79 | ! The destination is 8 byte aligned but the source is not, and there are 8 | ||
80 | ! or more bytes to be copied. | ||
81 | $Lunaligned_copy: | ||
82 | ! Adjust the source pointer (A1.2) to the 8 byte boundary before its | ||
83 | ! current value | ||
84 | MOV D0Ar4, A1.2 | ||
85 | MOV D0Ar6, A1.2 | ||
86 | ANDMB D0Ar4, D0Ar4, #0xfff8 | ||
87 | MOV A1.2, D0Ar4 | ||
88 | ! Save the number of bytes of mis-alignment in D0Ar4 for use later | ||
89 | SUBS D0Ar6, D0Ar6, D0Ar4 | ||
90 | MOV D0Ar4, D0Ar6 | ||
91 | ! if there is no mis-alignment after all, use the aligned copy loop | ||
92 | BZ $Laligned_copy | ||
93 | |||
94 | ! prefetch 8 bytes | ||
95 | GETL D0Re0, D1Re0, [A1.2] | ||
96 | |||
97 | SUB TXRPT, D1Ar5, #1 | ||
98 | |||
99 | ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly | ||
100 | ! 4 bytes, and more than 4 bytes. | ||
101 | CMP D0Ar6, #4 | ||
102 | BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop | ||
103 | BZ $Lunaligned_4 ! use 4 byte mis-alignment loop | ||
104 | |||
105 | ! The mis-alignment is more than 4 bytes | ||
106 | $Lunaligned_5_6_7: | ||
107 | SUB D0Ar6, D0Ar6, #4 | ||
108 | ! Calculate the bit offsets required for the shift operations necesssary | ||
109 | ! to align the data. | ||
110 | ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) | ||
111 | MULW D0Ar6, D0Ar6, #8 | ||
112 | MOV D1Ar5, #32 | ||
113 | SUB D1Ar5, D1Ar5, D0Ar6 | ||
114 | ! Move data 4 bytes before we enter the main loop | ||
115 | MOV D0Re0, D1Re0 | ||
116 | |||
117 | $Lloop_5_6_7: | ||
118 | GETL D0Ar2, D1Ar1, [++A1.2] | ||
119 | ! form 64-bit data in D0Re0, D1Re0 | ||
120 | LSR D0Re0, D0Re0, D0Ar6 | ||
121 | MOV D1Re0, D0Ar2 | ||
122 | LSL D1Re0, D1Re0, D1Ar5 | ||
123 | ADD D0Re0, D0Re0, D1Re0 | ||
124 | |||
125 | LSR D0Ar2, D0Ar2, D0Ar6 | ||
126 | LSL D1Re0, D1Ar1, D1Ar5 | ||
127 | ADD D1Re0, D1Re0, D0Ar2 | ||
128 | |||
129 | SETL [A0.2++], D0Re0, D1Re0 | ||
130 | MOV D0Re0, D1Ar1 | ||
131 | BR $Lloop_5_6_7 | ||
132 | |||
133 | B $Lunaligned_end | ||
134 | |||
135 | $Lunaligned_1_2_3: | ||
136 | ! Calculate the bit offsets required for the shift operations necesssary | ||
137 | ! to align the data. | ||
138 | ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) | ||
139 | MULW D0Ar6, D0Ar6, #8 | ||
140 | MOV D1Ar5, #32 | ||
141 | SUB D1Ar5, D1Ar5, D0Ar6 | ||
142 | |||
143 | $Lloop_1_2_3: | ||
144 | ! form 64-bit data in D0Re0,D1Re0 | ||
145 | LSR D0Re0, D0Re0, D0Ar6 | ||
146 | LSL D1Ar1, D1Re0, D1Ar5 | ||
147 | ADD D0Re0, D0Re0, D1Ar1 | ||
148 | MOV D0Ar2, D1Re0 | ||
149 | LSR D0FrT, D0Ar2, D0Ar6 | ||
150 | GETL D0Ar2, D1Ar1, [++A1.2] | ||
151 | |||
152 | MOV D1Re0, D0Ar2 | ||
153 | LSL D1Re0, D1Re0, D1Ar5 | ||
154 | ADD D1Re0, D1Re0, D0FrT | ||
155 | |||
156 | SETL [A0.2++], D0Re0, D1Re0 | ||
157 | MOV D0Re0, D0Ar2 | ||
158 | MOV D1Re0, D1Ar1 | ||
159 | BR $Lloop_1_2_3 | ||
160 | |||
161 | B $Lunaligned_end | ||
162 | |||
163 | ! The 4 byte mis-alignment case - this does not require any shifting, just a | ||
164 | ! shuffling of registers. | ||
165 | $Lunaligned_4: | ||
166 | MOV D0Re0, D1Re0 | ||
167 | $Lloop_4: | ||
168 | GETL D0Ar2, D1Ar1, [++A1.2] | ||
169 | MOV D1Re0, D0Ar2 | ||
170 | SETL [A0.2++], D0Re0, D1Re0 | ||
171 | MOV D0Re0, D1Ar1 | ||
172 | BR $Lloop_4 | ||
173 | |||
174 | $Lunaligned_end: | ||
175 | ! If there are no remaining bytes to copy, we are done. | ||
176 | ANDS D1Ar3, D1Ar3, #7 | ||
177 | BZ $Lend | ||
178 | ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte | ||
179 | ! address of the remaining bytes, and fall through to the byte copy loop. | ||
180 | MOV D0Ar6, A1.2 | ||
181 | ADD D1Ar5, D0Ar4, D0Ar6 | ||
182 | MOV A1.2, D1Ar5 | ||
183 | B $Lbyte_copy | ||
184 | |||
185 | .size _memcpy,.-_memcpy | ||
diff --git a/arch/metag/lib/memmove.S b/arch/metag/lib/memmove.S new file mode 100644 index 000000000000..228ea04d7b39 --- /dev/null +++ b/arch/metag/lib/memmove.S | |||
@@ -0,0 +1,345 @@ | |||
1 | ! Copyright (C) 2008-2012 Imagination Technologies Ltd. | ||
2 | |||
3 | .text | ||
4 | .global _memmove | ||
5 | .type _memmove,function | ||
6 | ! D1Ar1 dst | ||
7 | ! D0Ar2 src | ||
8 | ! D1Ar3 cnt | ||
9 | ! D0Re0 dst | ||
10 | _memmove: | ||
11 | CMP D1Ar3, #0 | ||
12 | MOV D0Re0, D1Ar1 | ||
13 | BZ $LEND2 | ||
14 | MSETL [A0StP], D0.5, D0.6, D0.7 | ||
15 | MOV D1Ar5, D0Ar2 | ||
16 | CMP D1Ar1, D1Ar5 | ||
17 | BLT $Lforwards_copy | ||
18 | SUB D0Ar4, D1Ar1, D1Ar3 | ||
19 | ADD D0Ar4, D0Ar4, #1 | ||
20 | CMP D0Ar2, D0Ar4 | ||
21 | BLT $Lforwards_copy | ||
22 | ! should copy backwards | ||
23 | MOV D1Re0, D0Ar2 | ||
24 | ! adjust pointer to the end of mem | ||
25 | ADD D0Ar2, D1Re0, D1Ar3 | ||
26 | ADD D1Ar1, D1Ar1, D1Ar3 | ||
27 | |||
28 | MOV A1.2, D0Ar2 | ||
29 | MOV A0.2, D1Ar1 | ||
30 | CMP D1Ar3, #8 | ||
31 | BLT $Lbbyte_loop | ||
32 | |||
33 | MOV D0Ar4, D0Ar2 | ||
34 | MOV D1Ar5, D1Ar1 | ||
35 | |||
36 | ! test 8 byte alignment | ||
37 | ANDS D1Ar5, D1Ar5, #7 | ||
38 | BNE $Lbdest_unaligned | ||
39 | |||
40 | ANDS D0Ar4, D0Ar4, #7 | ||
41 | BNE $Lbsrc_unaligned | ||
42 | |||
43 | LSR D1Ar5, D1Ar3, #3 | ||
44 | |||
45 | $Lbaligned_loop: | ||
46 | GETL D0Re0, D1Re0, [--A1.2] | ||
47 | SETL [--A0.2], D0Re0, D1Re0 | ||
48 | SUBS D1Ar5, D1Ar5, #1 | ||
49 | BNE $Lbaligned_loop | ||
50 | |||
51 | ANDS D1Ar3, D1Ar3, #7 | ||
52 | BZ $Lbbyte_loop_exit | ||
53 | $Lbbyte_loop: | ||
54 | GETB D1Re0, [--A1.2] | ||
55 | SETB [--A0.2], D1Re0 | ||
56 | SUBS D1Ar3, D1Ar3, #1 | ||
57 | BNE $Lbbyte_loop | ||
58 | $Lbbyte_loop_exit: | ||
59 | MOV D0Re0, A0.2 | ||
60 | $LEND: | ||
61 | SUB A0.2, A0StP, #24 | ||
62 | MGETL D0.5, D0.6, D0.7, [A0.2] | ||
63 | SUB A0StP, A0StP, #24 | ||
64 | $LEND2: | ||
65 | MOV PC, D1RtP | ||
66 | |||
67 | $Lbdest_unaligned: | ||
68 | GETB D0Re0, [--A1.2] | ||
69 | SETB [--A0.2], D0Re0 | ||
70 | SUBS D1Ar5, D1Ar5, #1 | ||
71 | SUB D1Ar3, D1Ar3, #1 | ||
72 | BNE $Lbdest_unaligned | ||
73 | CMP D1Ar3, #8 | ||
74 | BLT $Lbbyte_loop | ||
75 | $Lbsrc_unaligned: | ||
76 | LSR D1Ar5, D1Ar3, #3 | ||
77 | ! adjust A1.2 | ||
78 | MOV D0Ar4, A1.2 | ||
79 | ! save original address | ||
80 | MOV D0Ar6, A1.2 | ||
81 | |||
82 | ADD D0Ar4, D0Ar4, #7 | ||
83 | ANDMB D0Ar4, D0Ar4, #0xfff8 | ||
84 | ! new address is the 8-byte aligned one above the original | ||
85 | MOV A1.2, D0Ar4 | ||
86 | |||
87 | ! A0.2 dst 64-bit is aligned | ||
88 | ! measure the gap size | ||
89 | SUB D0Ar6, D0Ar4, D0Ar6 | ||
90 | MOVS D0Ar4, D0Ar6 | ||
91 | ! keep this information for the later adjustment | ||
92 | ! both aligned | ||
93 | BZ $Lbaligned_loop | ||
94 | |||
95 | ! prefetch | ||
96 | GETL D0Re0, D1Re0, [--A1.2] | ||
97 | |||
98 | CMP D0Ar6, #4 | ||
99 | BLT $Lbunaligned_1_2_3 | ||
100 | ! 32-bit aligned | ||
101 | BZ $Lbaligned_4 | ||
102 | |||
103 | SUB D0Ar6, D0Ar6, #4 | ||
104 | ! D1.6 stores the gap size in bits | ||
105 | MULW D1.6, D0Ar6, #8 | ||
106 | MOV D0.6, #32 | ||
107 | ! D0.6 stores the complement of the gap size | ||
108 | SUB D0.6, D0.6, D1.6 | ||
109 | |||
110 | $Lbunaligned_5_6_7: | ||
111 | GETL D0.7, D1.7, [--A1.2] | ||
112 | ! form 64-bit data in D0Re0, D1Re0 | ||
113 | MOV D1Re0, D0Re0 | ||
114 | ! D1Re0 << gap-size | ||
115 | LSL D1Re0, D1Re0, D1.6 | ||
116 | MOV D0Re0, D1.7 | ||
117 | ! D0Re0 >> complement | ||
118 | LSR D0Re0, D0Re0, D0.6 | ||
119 | MOV D1.5, D0Re0 | ||
120 | ! combine the both | ||
121 | ADD D1Re0, D1Re0, D1.5 | ||
122 | |||
123 | MOV D1.5, D1.7 | ||
124 | LSL D1.5, D1.5, D1.6 | ||
125 | MOV D0Re0, D0.7 | ||
126 | LSR D0Re0, D0Re0, D0.6 | ||
127 | MOV D0.5, D1.5 | ||
128 | ADD D0Re0, D0Re0, D0.5 | ||
129 | |||
130 | SETL [--A0.2], D0Re0, D1Re0 | ||
131 | MOV D0Re0, D0.7 | ||
132 | MOV D1Re0, D1.7 | ||
133 | SUBS D1Ar5, D1Ar5, #1 | ||
134 | BNE $Lbunaligned_5_6_7 | ||
135 | |||
136 | ANDS D1Ar3, D1Ar3, #7 | ||
137 | BZ $Lbbyte_loop_exit | ||
138 | ! Adjust A1.2 | ||
139 | ! A1.2 <- A1.2 +8 - gapsize | ||
140 | ADD A1.2, A1.2, #8 | ||
141 | SUB A1.2, A1.2, D0Ar4 | ||
142 | B $Lbbyte_loop | ||
143 | |||
144 | $Lbunaligned_1_2_3: | ||
145 | MULW D1.6, D0Ar6, #8 | ||
146 | MOV D0.6, #32 | ||
147 | SUB D0.6, D0.6, D1.6 | ||
148 | |||
149 | $Lbunaligned_1_2_3_loop: | ||
150 | GETL D0.7, D1.7, [--A1.2] | ||
151 | ! form 64-bit data in D0Re0, D1Re0 | ||
152 | LSL D1Re0, D1Re0, D1.6 | ||
153 | ! save D0Re0 for later use | ||
154 | MOV D0.5, D0Re0 | ||
155 | LSR D0Re0, D0Re0, D0.6 | ||
156 | MOV D1.5, D0Re0 | ||
157 | ADD D1Re0, D1Re0, D1.5 | ||
158 | |||
159 | ! orignal data in D0Re0 | ||
160 | MOV D1.5, D0.5 | ||
161 | LSL D1.5, D1.5, D1.6 | ||
162 | MOV D0Re0, D1.7 | ||
163 | LSR D0Re0, D0Re0, D0.6 | ||
164 | MOV D0.5, D1.5 | ||
165 | ADD D0Re0, D0Re0, D0.5 | ||
166 | |||
167 | SETL [--A0.2], D0Re0, D1Re0 | ||
168 | MOV D0Re0, D0.7 | ||
169 | MOV D1Re0, D1.7 | ||
170 | SUBS D1Ar5, D1Ar5, #1 | ||
171 | BNE $Lbunaligned_1_2_3_loop | ||
172 | |||
173 | ANDS D1Ar3, D1Ar3, #7 | ||
174 | BZ $Lbbyte_loop_exit | ||
175 | ! Adjust A1.2 | ||
176 | ADD A1.2, A1.2, #8 | ||
177 | SUB A1.2, A1.2, D0Ar4 | ||
178 | B $Lbbyte_loop | ||
179 | |||
180 | $Lbaligned_4: | ||
181 | GETL D0.7, D1.7, [--A1.2] | ||
182 | MOV D1Re0, D0Re0 | ||
183 | MOV D0Re0, D1.7 | ||
184 | SETL [--A0.2], D0Re0, D1Re0 | ||
185 | MOV D0Re0, D0.7 | ||
186 | MOV D1Re0, D1.7 | ||
187 | SUBS D1Ar5, D1Ar5, #1 | ||
188 | BNE $Lbaligned_4 | ||
189 | ANDS D1Ar3, D1Ar3, #7 | ||
190 | BZ $Lbbyte_loop_exit | ||
191 | ! Adjust A1.2 | ||
192 | ADD A1.2, A1.2, #8 | ||
193 | SUB A1.2, A1.2, D0Ar4 | ||
194 | B $Lbbyte_loop | ||
195 | |||
196 | $Lforwards_copy: | ||
197 | MOV A1.2, D0Ar2 | ||
198 | MOV A0.2, D1Ar1 | ||
199 | CMP D1Ar3, #8 | ||
200 | BLT $Lfbyte_loop | ||
201 | |||
202 | MOV D0Ar4, D0Ar2 | ||
203 | MOV D1Ar5, D1Ar1 | ||
204 | |||
205 | ANDS D1Ar5, D1Ar5, #7 | ||
206 | BNE $Lfdest_unaligned | ||
207 | |||
208 | ANDS D0Ar4, D0Ar4, #7 | ||
209 | BNE $Lfsrc_unaligned | ||
210 | |||
211 | LSR D1Ar5, D1Ar3, #3 | ||
212 | |||
213 | $Lfaligned_loop: | ||
214 | GETL D0Re0, D1Re0, [A1.2++] | ||
215 | SUBS D1Ar5, D1Ar5, #1 | ||
216 | SETL [A0.2++], D0Re0, D1Re0 | ||
217 | BNE $Lfaligned_loop | ||
218 | |||
219 | ANDS D1Ar3, D1Ar3, #7 | ||
220 | BZ $Lfbyte_loop_exit | ||
221 | $Lfbyte_loop: | ||
222 | GETB D1Re0, [A1.2++] | ||
223 | SETB [A0.2++], D1Re0 | ||
224 | SUBS D1Ar3, D1Ar3, #1 | ||
225 | BNE $Lfbyte_loop | ||
226 | $Lfbyte_loop_exit: | ||
227 | MOV D0Re0, D1Ar1 | ||
228 | B $LEND | ||
229 | |||
230 | $Lfdest_unaligned: | ||
231 | GETB D0Re0, [A1.2++] | ||
232 | ADD D1Ar5, D1Ar5, #1 | ||
233 | SUB D1Ar3, D1Ar3, #1 | ||
234 | SETB [A0.2++], D0Re0 | ||
235 | CMP D1Ar5, #8 | ||
236 | BNE $Lfdest_unaligned | ||
237 | CMP D1Ar3, #8 | ||
238 | BLT $Lfbyte_loop | ||
239 | $Lfsrc_unaligned: | ||
240 | ! adjust A1.2 | ||
241 | LSR D1Ar5, D1Ar3, #3 | ||
242 | |||
243 | MOV D0Ar4, A1.2 | ||
244 | MOV D0Ar6, A1.2 | ||
245 | ANDMB D0Ar4, D0Ar4, #0xfff8 | ||
246 | MOV A1.2, D0Ar4 | ||
247 | |||
248 | ! A0.2 dst 64-bit is aligned | ||
249 | SUB D0Ar6, D0Ar6, D0Ar4 | ||
250 | ! keep the information for the later adjustment | ||
251 | MOVS D0Ar4, D0Ar6 | ||
252 | |||
253 | ! both aligned | ||
254 | BZ $Lfaligned_loop | ||
255 | |||
256 | ! prefetch | ||
257 | GETL D0Re0, D1Re0, [A1.2] | ||
258 | |||
259 | CMP D0Ar6, #4 | ||
260 | BLT $Lfunaligned_1_2_3 | ||
261 | BZ $Lfaligned_4 | ||
262 | |||
263 | SUB D0Ar6, D0Ar6, #4 | ||
264 | MULW D0.6, D0Ar6, #8 | ||
265 | MOV D1.6, #32 | ||
266 | SUB D1.6, D1.6, D0.6 | ||
267 | |||
268 | $Lfunaligned_5_6_7: | ||
269 | GETL D0.7, D1.7, [++A1.2] | ||
270 | ! form 64-bit data in D0Re0, D1Re0 | ||
271 | MOV D0Re0, D1Re0 | ||
272 | LSR D0Re0, D0Re0, D0.6 | ||
273 | MOV D1Re0, D0.7 | ||
274 | LSL D1Re0, D1Re0, D1.6 | ||
275 | MOV D0.5, D1Re0 | ||
276 | ADD D0Re0, D0Re0, D0.5 | ||
277 | |||
278 | MOV D0.5, D0.7 | ||
279 | LSR D0.5, D0.5, D0.6 | ||
280 | MOV D1Re0, D1.7 | ||
281 | LSL D1Re0, D1Re0, D1.6 | ||
282 | MOV D1.5, D0.5 | ||
283 | ADD D1Re0, D1Re0, D1.5 | ||
284 | |||
285 | SETL [A0.2++], D0Re0, D1Re0 | ||
286 | MOV D0Re0, D0.7 | ||
287 | MOV D1Re0, D1.7 | ||
288 | SUBS D1Ar5, D1Ar5, #1 | ||
289 | BNE $Lfunaligned_5_6_7 | ||
290 | |||
291 | ANDS D1Ar3, D1Ar3, #7 | ||
292 | BZ $Lfbyte_loop_exit | ||
293 | ! Adjust A1.2 | ||
294 | ADD A1.2, A1.2, D0Ar4 | ||
295 | B $Lfbyte_loop | ||
296 | |||
297 | $Lfunaligned_1_2_3: | ||
298 | MULW D0.6, D0Ar6, #8 | ||
299 | MOV D1.6, #32 | ||
300 | SUB D1.6, D1.6, D0.6 | ||
301 | |||
302 | $Lfunaligned_1_2_3_loop: | ||
303 | GETL D0.7, D1.7, [++A1.2] | ||
304 | ! form 64-bit data in D0Re0, D1Re0 | ||
305 | LSR D0Re0, D0Re0, D0.6 | ||
306 | MOV D1.5, D1Re0 | ||
307 | LSL D1Re0, D1Re0, D1.6 | ||
308 | MOV D0.5, D1Re0 | ||
309 | ADD D0Re0, D0Re0, D0.5 | ||
310 | |||
311 | MOV D0.5, D1.5 | ||
312 | LSR D0.5, D0.5, D0.6 | ||
313 | MOV D1Re0, D0.7 | ||
314 | LSL D1Re0, D1Re0, D1.6 | ||
315 | MOV D1.5, D0.5 | ||
316 | ADD D1Re0, D1Re0, D1.5 | ||
317 | |||
318 | SETL [A0.2++], D0Re0, D1Re0 | ||
319 | MOV D0Re0, D0.7 | ||
320 | MOV D1Re0, D1.7 | ||
321 | SUBS D1Ar5, D1Ar5, #1 | ||
322 | BNE $Lfunaligned_1_2_3_loop | ||
323 | |||
324 | ANDS D1Ar3, D1Ar3, #7 | ||
325 | BZ $Lfbyte_loop_exit | ||
326 | ! Adjust A1.2 | ||
327 | ADD A1.2, A1.2, D0Ar4 | ||
328 | B $Lfbyte_loop | ||
329 | |||
330 | $Lfaligned_4: | ||
331 | GETL D0.7, D1.7, [++A1.2] | ||
332 | MOV D0Re0, D1Re0 | ||
333 | MOV D1Re0, D0.7 | ||
334 | SETL [A0.2++], D0Re0, D1Re0 | ||
335 | MOV D0Re0, D0.7 | ||
336 | MOV D1Re0, D1.7 | ||
337 | SUBS D1Ar5, D1Ar5, #1 | ||
338 | BNE $Lfaligned_4 | ||
339 | ANDS D1Ar3, D1Ar3, #7 | ||
340 | BZ $Lfbyte_loop_exit | ||
341 | ! Adjust A1.2 | ||
342 | ADD A1.2, A1.2, D0Ar4 | ||
343 | B $Lfbyte_loop | ||
344 | |||
345 | .size _memmove,.-_memmove | ||
diff --git a/arch/metag/lib/memset.S b/arch/metag/lib/memset.S new file mode 100644 index 000000000000..721085bad1d2 --- /dev/null +++ b/arch/metag/lib/memset.S | |||
@@ -0,0 +1,86 @@ | |||
1 | ! Copyright (C) 2008-2012 Imagination Technologies Ltd. | ||
2 | |||
3 | .text | ||
4 | .global _memset | ||
5 | .type _memset,function | ||
6 | ! D1Ar1 dst | ||
7 | ! D0Ar2 c | ||
8 | ! D1Ar3 cnt | ||
9 | ! D0Re0 dst | ||
10 | _memset: | ||
11 | AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value | ||
12 | MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15 | ||
13 | ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst | ||
14 | LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31 | ||
15 | ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2) | ||
16 | MOV D0Re0,D1Ar1 ! Return dst | ||
17 | BZ $LLongStub ! if start address is aligned | ||
18 | ! start address is not aligned on an 8 byte boundary, so we | ||
19 | ! need the number of bytes up to the next 8 byte address | ||
20 | ! boundary, or the length of the string if less than 8, in D1Ar5 | ||
21 | MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ... | ||
22 | SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N | ||
23 | CMP D1Ar3,D1Ar5 | ||
24 | MOVMI D1Ar5,D1Ar3 | ||
25 | B $LByteStub ! dst is mis-aligned, do $LByteStub | ||
26 | |||
27 | ! | ||
28 | ! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles) | ||
29 | ! | ||
30 | $LLongStub: | ||
31 | LSRS D0Ar2,D1Ar3,#5 | ||
32 | AND D1Ar3,D1Ar3,#0x1F | ||
33 | MOV A1.2,A0.2 | ||
34 | BEQ $LLongishStub | ||
35 | SUB TXRPT,D0Ar2,#1 | ||
36 | CMP D1Ar3,#0 | ||
37 | $LLongLoop: | ||
38 | SETL [D1Ar1++],A0.2,A1.2 | ||
39 | SETL [D1Ar1++],A0.2,A1.2 | ||
40 | SETL [D1Ar1++],A0.2,A1.2 | ||
41 | SETL [D1Ar1++],A0.2,A1.2 | ||
42 | BR $LLongLoop | ||
43 | BZ $Lexit | ||
44 | ! | ||
45 | ! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles) | ||
46 | ! | ||
47 | $LLongishStub: | ||
48 | LSRS D0Ar2,D1Ar3,#3 | ||
49 | AND D1Ar3,D1Ar3,#0x7 | ||
50 | MOV D1Ar5,D1Ar3 | ||
51 | BEQ $LByteStub | ||
52 | SUB TXRPT,D0Ar2,#1 | ||
53 | CMP D1Ar3,#0 | ||
54 | $LLongishLoop: | ||
55 | SETL [D1Ar1++],A0.2,A1.2 | ||
56 | BR $LLongishLoop | ||
57 | BZ $Lexit | ||
58 | ! | ||
59 | ! This does a byte structured burst of up to 7 bytes | ||
60 | ! | ||
61 | ! D1Ar1 should point to the location required | ||
62 | ! D1Ar3 should be the remaining total byte count | ||
63 | ! D1Ar5 should be burst size (<= D1Ar3) | ||
64 | ! | ||
65 | $LByteStub: | ||
66 | SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count | ||
67 | ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area | ||
68 | MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4) | ||
69 | SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ... | ||
70 | MOV A1.2,D1Ar5 | ||
71 | SUB PC,CPC1,A1.2 ! Jump into table below | ||
72 | SETB [D1Ar1+#(-7)],A0.2 | ||
73 | SETB [D1Ar1+#(-6)],A0.2 | ||
74 | SETB [D1Ar1+#(-5)],A0.2 | ||
75 | SETB [D1Ar1+#(-4)],A0.2 | ||
76 | SETB [D1Ar1+#(-3)],A0.2 | ||
77 | SETB [D1Ar1+#(-2)],A0.2 | ||
78 | SETB [D1Ar1+#(-1)],A0.2 | ||
79 | ! | ||
80 | ! Return if all data has been output, otherwise do $LLongStub | ||
81 | ! | ||
82 | BNZ $LLongStub | ||
83 | $Lexit: | ||
84 | MOV PC,D1RtP | ||
85 | .size _memset,.-_memset | ||
86 | |||
diff --git a/arch/metag/lib/modsi3.S b/arch/metag/lib/modsi3.S new file mode 100644 index 000000000000..210cfa856593 --- /dev/null +++ b/arch/metag/lib/modsi3.S | |||
@@ -0,0 +1,38 @@ | |||
1 | ! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 | ||
2 | ! Imagination Technologies Ltd | ||
3 | ! | ||
4 | ! Integer modulus routines. | ||
5 | ! | ||
6 | !! | ||
7 | !! 32-bit modulus unsigned i/p - passed unsigned 32-bit numbers | ||
8 | !! | ||
9 | .text | ||
10 | .global ___umodsi3 | ||
11 | .type ___umodsi3,function | ||
12 | .align 2 | ||
13 | ___umodsi3: | ||
14 | MOV D0FrT,D1RtP ! Save original return address | ||
15 | CALLR D1RtP,___udivsi3 | ||
16 | MOV D1RtP,D0FrT ! Recover return address | ||
17 | MOV D0Re0,D1Ar1 ! Return remainder | ||
18 | MOV PC,D1RtP | ||
19 | .size ___umodsi3,.-___umodsi3 | ||
20 | |||
21 | !! | ||
22 | !! 32-bit modulus signed i/p - passed signed 32-bit numbers | ||
23 | !! | ||
24 | .global ___modsi3 | ||
25 | .type ___modsi3,function | ||
26 | .align 2 | ||
27 | ___modsi3: | ||
28 | MOV D0FrT,D1RtP ! Save original return address | ||
29 | MOV A0.2,D1Ar1 ! Save A in A0.2 | ||
30 | CALLR D1RtP,___divsi3 | ||
31 | MOV D1RtP,D0FrT ! Recover return address | ||
32 | MOV D1Re0,A0.2 ! Recover A | ||
33 | MOV D0Re0,D1Ar1 ! Return remainder | ||
34 | ORS D1Re0,D1Re0,D1Re0 ! Was A negative? | ||
35 | NEG D1Ar1,D1Ar1 ! Negate remainder | ||
36 | MOVMI D0Re0,D1Ar1 ! Return neg remainder | ||
37 | MOV PC, D1RtP | ||
38 | .size ___modsi3,.-___modsi3 | ||
diff --git a/arch/metag/lib/muldi3.S b/arch/metag/lib/muldi3.S new file mode 100644 index 000000000000..ee66ca8644d0 --- /dev/null +++ b/arch/metag/lib/muldi3.S | |||
@@ -0,0 +1,44 @@ | |||
1 | ! Copyright (C) 2012 by Imagination Technologies Ltd. | ||
2 | ! | ||
3 | ! 64-bit multiply routine. | ||
4 | ! | ||
5 | |||
6 | ! | ||
7 | ! 64-bit signed/unsigned multiply | ||
8 | ! | ||
9 | ! A = D1Ar1:D0Ar2 = a 2^48 + b 2^32 + c 2^16 + d 2^0 | ||
10 | ! | ||
11 | ! B = D1Ar3:D0Ar4 = w 2^48 + x 2^32 + y 2^16 + z 2^0 | ||
12 | ! | ||
13 | .text | ||
14 | .global ___muldi3 | ||
15 | .type ___muldi3,function | ||
16 | |||
17 | ___muldi3: | ||
18 | MULD D1Re0,D1Ar1,D0Ar4 ! (a 2^48 + b 2^32)(y 2^16 + z 2^0) | ||
19 | MULD D0Re0,D0Ar2,D1Ar3 ! (w 2^48 + x 2^32)(c 2^16 + d 2^0) | ||
20 | ADD D1Re0,D1Re0,D0Re0 | ||
21 | |||
22 | MULW D0Re0,D0Ar2,D0Ar4 ! (d 2^0) * (z 2^0) | ||
23 | |||
24 | RTDW D0Ar2,D0Ar2 | ||
25 | MULW D0Ar6,D0Ar2,D0Ar4 ! (c 2^16)(z 2^0) | ||
26 | LSR D1Ar5,D0Ar6,#16 | ||
27 | LSL D0Ar6,D0Ar6,#16 | ||
28 | ADDS D0Re0,D0Re0,D0Ar6 | ||
29 | ADDCS D1Re0,D1Re0,#1 | ||
30 | RTDW D0Ar4,D0Ar4 | ||
31 | ADD D1Re0,D1Re0,D1Ar5 | ||
32 | |||
33 | MULW D0Ar6,D0Ar2,D0Ar4 ! (c 2^16)(y 2^16) | ||
34 | ADD D1Re0,D1Re0,D0Ar6 | ||
35 | |||
36 | RTDW D0Ar2,D0Ar2 | ||
37 | MULW D0Ar6,D0Ar2,D0Ar4 ! (d 2^0)(y 2^16) | ||
38 | LSR D1Ar5,D0Ar6,#16 | ||
39 | LSL D0Ar6,D0Ar6,#16 | ||
40 | ADDS D0Re0,D0Re0,D0Ar6 | ||
41 | ADD D1Re0,D1Re0,D1Ar5 | ||
42 | ADDCS D1Re0,D1Re0,#1 | ||
43 | MOV PC, D1RtP | ||
44 | .size ___muldi3,.-___muldi3 | ||
diff --git a/arch/metag/lib/ucmpdi2.S b/arch/metag/lib/ucmpdi2.S new file mode 100644 index 000000000000..6f3347f7daeb --- /dev/null +++ b/arch/metag/lib/ucmpdi2.S | |||
@@ -0,0 +1,27 @@ | |||
1 | ! Copyright (C) 2012 by Imagination Technologies Ltd. | ||
2 | ! | ||
3 | ! 64-bit unsigned compare routine. | ||
4 | ! | ||
5 | |||
6 | .text | ||
7 | .global ___ucmpdi2 | ||
8 | .type ___ucmpdi2,function | ||
9 | |||
10 | ! low high | ||
11 | ! u64 a (D0Ar2, D1Ar1) | ||
12 | ! u64 b (D0Ar4, D1Ar3) | ||
13 | ___ucmpdi2: | ||
14 | ! start at 1 (equal) and conditionally increment or decrement | ||
15 | MOV D0Re0,#1 | ||
16 | |||
17 | ! high words | ||
18 | CMP D1Ar1,D1Ar3 | ||
19 | ! or if equal, low words | ||
20 | CMPEQ D0Ar2,D0Ar4 | ||
21 | |||
22 | ! unsigned compare | ||
23 | SUBLO D0Re0,D0Re0,#1 | ||
24 | ADDHI D0Re0,D0Re0,#1 | ||
25 | |||
26 | MOV PC,D1RtP | ||
27 | .size ___ucmpdi2,.-___ucmpdi2 | ||