diff options
author | Ley Foon Tan <lftan@altera.com> | 2014-11-06 02:20:00 -0500 |
---|---|---|
committer | Ley Foon Tan <lftan@altera.com> | 2014-12-07 23:56:00 -0500 |
commit | eea9507a69d637d52705de8703b168bf6bfe5643 (patch) | |
tree | 7a75b11289b9d4b040468fbccbfbee65280adeba /arch/nios2/lib/memcpy.c | |
parent | b53e906d255d7bc3539c2729afb8a18c309cd41e (diff) |
nios2: Library functions
Add optimised library functions for nios2.
Signed-off-by: Ley Foon Tan <lftan@altera.com>
Diffstat (limited to 'arch/nios2/lib/memcpy.c')
-rw-r--r-- | arch/nios2/lib/memcpy.c | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/arch/nios2/lib/memcpy.c b/arch/nios2/lib/memcpy.c new file mode 100644 index 000000000000..1715f5d28b11 --- /dev/null +++ b/arch/nios2/lib/memcpy.c | |||
@@ -0,0 +1,202 @@ | |||
1 | /* Extracted from GLIBC memcpy.c and memcopy.h, which is: | ||
2 | Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. | ||
3 | This file is part of the GNU C Library. | ||
4 | Contributed by Torbjorn Granlund (tege@sics.se). | ||
5 | |||
6 | The GNU C Library is free software; you can redistribute it and/or | ||
7 | modify it under the terms of the GNU Lesser General Public | ||
8 | License as published by the Free Software Foundation; either | ||
9 | version 2.1 of the License, or (at your option) any later version. | ||
10 | |||
11 | The GNU C Library is distributed in the hope that it will be useful, | ||
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | Lesser General Public License for more details. | ||
15 | |||
16 | You should have received a copy of the GNU Lesser General Public | ||
17 | License along with the GNU C Library; if not, see | ||
18 | <http://www.gnu.org/licenses/>. */ | ||
19 | |||
20 | #include <linux/types.h> | ||
21 | |||
22 | /* Type to use for aligned memory operations. | ||
23 | This should normally be the biggest type supported by a single load | ||
24 | and store. */ | ||
25 | #define op_t unsigned long int | ||
26 | #define OPSIZ (sizeof(op_t)) | ||
27 | |||
28 | /* Optimal type for storing bytes in registers. */ | ||
29 | #define reg_char char | ||
30 | |||
31 | #define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) | ||
32 | |||
33 | /* Copy exactly NBYTES bytes from SRC_BP to DST_BP, | ||
34 | without any assumptions about alignment of the pointers. */ | ||
35 | #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ | ||
36 | do { \ | ||
37 | size_t __nbytes = (nbytes); \ | ||
38 | while (__nbytes > 0) { \ | ||
39 | unsigned char __x = ((unsigned char *) src_bp)[0]; \ | ||
40 | src_bp += 1; \ | ||
41 | __nbytes -= 1; \ | ||
42 | ((unsigned char *) dst_bp)[0] = __x; \ | ||
43 | dst_bp += 1; \ | ||
44 | } \ | ||
45 | } while (0) | ||
46 | |||
47 | /* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with | ||
48 | the assumption that DST_BP is aligned on an OPSIZ multiple. If | ||
49 | not all bytes could be easily copied, store remaining number of bytes | ||
50 | in NBYTES_LEFT, otherwise store 0. */ | ||
51 | /* extern void _wordcopy_fwd_aligned __P ((long int, long int, size_t)); */ | ||
52 | /* extern void _wordcopy_fwd_dest_aligned __P ((long int, long int, size_t)); */ | ||
53 | #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ | ||
54 | do { \ | ||
55 | if (src_bp % OPSIZ == 0) \ | ||
56 | _wordcopy_fwd_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ | ||
57 | else \ | ||
58 | _wordcopy_fwd_dest_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ | ||
59 | src_bp += (nbytes) & -OPSIZ; \ | ||
60 | dst_bp += (nbytes) & -OPSIZ; \ | ||
61 | (nbytes_left) = (nbytes) % OPSIZ; \ | ||
62 | } while (0) | ||
63 | |||
64 | |||
65 | /* Threshold value for when to enter the unrolled loops. */ | ||
66 | #define OP_T_THRES 16 | ||
67 | |||
68 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to | ||
69 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | ||
70 | Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ | ||
71 | /* stream-lined (read x8 + write x8) */ | ||
72 | static void _wordcopy_fwd_aligned(long int dstp, long int srcp, size_t len) | ||
73 | { | ||
74 | while (len > 7) { | ||
75 | register op_t a0, a1, a2, a3, a4, a5, a6, a7; | ||
76 | |||
77 | a0 = ((op_t *) srcp)[0]; | ||
78 | a1 = ((op_t *) srcp)[1]; | ||
79 | a2 = ((op_t *) srcp)[2]; | ||
80 | a3 = ((op_t *) srcp)[3]; | ||
81 | a4 = ((op_t *) srcp)[4]; | ||
82 | a5 = ((op_t *) srcp)[5]; | ||
83 | a6 = ((op_t *) srcp)[6]; | ||
84 | a7 = ((op_t *) srcp)[7]; | ||
85 | ((op_t *) dstp)[0] = a0; | ||
86 | ((op_t *) dstp)[1] = a1; | ||
87 | ((op_t *) dstp)[2] = a2; | ||
88 | ((op_t *) dstp)[3] = a3; | ||
89 | ((op_t *) dstp)[4] = a4; | ||
90 | ((op_t *) dstp)[5] = a5; | ||
91 | ((op_t *) dstp)[6] = a6; | ||
92 | ((op_t *) dstp)[7] = a7; | ||
93 | |||
94 | srcp += 8 * OPSIZ; | ||
95 | dstp += 8 * OPSIZ; | ||
96 | len -= 8; | ||
97 | } | ||
98 | while (len > 0) { | ||
99 | *(op_t *)dstp = *(op_t *)srcp; | ||
100 | |||
101 | srcp += OPSIZ; | ||
102 | dstp += OPSIZ; | ||
103 | len -= 1; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to | ||
108 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | ||
109 | DSTP should be aligned for memory operations on `op_t's, but SRCP must | ||
110 | *not* be aligned. */ | ||
111 | /* stream-lined (read x4 + write x4) */ | ||
112 | static void _wordcopy_fwd_dest_aligned(long int dstp, long int srcp, | ||
113 | size_t len) | ||
114 | { | ||
115 | op_t ap; | ||
116 | int sh_1, sh_2; | ||
117 | |||
118 | /* Calculate how to shift a word read at the memory operation | ||
119 | aligned srcp to make it aligned for copy. */ | ||
120 | |||
121 | sh_1 = 8 * (srcp % OPSIZ); | ||
122 | sh_2 = 8 * OPSIZ - sh_1; | ||
123 | |||
124 | /* Make SRCP aligned by rounding it down to the beginning of the `op_t' | ||
125 | it points in the middle of. */ | ||
126 | srcp &= -OPSIZ; | ||
127 | ap = ((op_t *) srcp)[0]; | ||
128 | srcp += OPSIZ; | ||
129 | |||
130 | while (len > 3) { | ||
131 | op_t a0, a1, a2, a3; | ||
132 | |||
133 | a0 = ((op_t *) srcp)[0]; | ||
134 | a1 = ((op_t *) srcp)[1]; | ||
135 | a2 = ((op_t *) srcp)[2]; | ||
136 | a3 = ((op_t *) srcp)[3]; | ||
137 | ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); | ||
138 | ((op_t *) dstp)[1] = MERGE(a0, sh_1, a1, sh_2); | ||
139 | ((op_t *) dstp)[2] = MERGE(a1, sh_1, a2, sh_2); | ||
140 | ((op_t *) dstp)[3] = MERGE(a2, sh_1, a3, sh_2); | ||
141 | |||
142 | ap = a3; | ||
143 | srcp += 4 * OPSIZ; | ||
144 | dstp += 4 * OPSIZ; | ||
145 | len -= 4; | ||
146 | } | ||
147 | while (len > 0) { | ||
148 | register op_t a0; | ||
149 | |||
150 | a0 = ((op_t *) srcp)[0]; | ||
151 | ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); | ||
152 | |||
153 | ap = a0; | ||
154 | srcp += OPSIZ; | ||
155 | dstp += OPSIZ; | ||
156 | len -= 1; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | void *memcpy(void *dstpp, const void *srcpp, size_t len) | ||
161 | { | ||
162 | unsigned long int dstp = (long int) dstpp; | ||
163 | unsigned long int srcp = (long int) srcpp; | ||
164 | |||
165 | /* Copy from the beginning to the end. */ | ||
166 | |||
167 | /* If there not too few bytes to copy, use word copy. */ | ||
168 | if (len >= OP_T_THRES) { | ||
169 | /* Copy just a few bytes to make DSTP aligned. */ | ||
170 | len -= (-dstp) % OPSIZ; | ||
171 | BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); | ||
172 | |||
173 | /* Copy whole pages from SRCP to DSTP by virtual address | ||
174 | manipulation, as much as possible. */ | ||
175 | |||
176 | /* PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len); */ | ||
177 | |||
178 | /* Copy from SRCP to DSTP taking advantage of the known | ||
179 | alignment of DSTP. Number of bytes remaining is put in the | ||
180 | third argument, i.e. in LEN. This number may vary from | ||
181 | machine to machine. */ | ||
182 | |||
183 | WORD_COPY_FWD(dstp, srcp, len, len); | ||
184 | |||
185 | /* Fall out and copy the tail. */ | ||
186 | } | ||
187 | |||
188 | /* There are just a few bytes to copy. Use byte memory operations. */ | ||
189 | BYTE_COPY_FWD(dstp, srcp, len); | ||
190 | |||
191 | return dstpp; | ||
192 | } | ||
193 | |||
194 | void *memcpyb(void *dstpp, const void *srcpp, unsigned len) | ||
195 | { | ||
196 | unsigned long int dstp = (long int) dstpp; | ||
197 | unsigned long int srcp = (long int) srcpp; | ||
198 | |||
199 | BYTE_COPY_FWD(dstp, srcp, len); | ||
200 | |||
201 | return dstpp; | ||
202 | } | ||