aboutsummaryrefslogtreecommitdiffstats
path: root/arch/cris/arch-v10/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/cris/arch-v10/lib
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/cris/arch-v10/lib')
-rw-r--r--arch/cris/arch-v10/lib/Makefile9
-rw-r--r--arch/cris/arch-v10/lib/checksum.S124
-rw-r--r--arch/cris/arch-v10/lib/checksumcopy.S132
-rw-r--r--arch/cris/arch-v10/lib/csumcpfruser.S64
-rw-r--r--arch/cris/arch-v10/lib/dmacopy.c43
-rw-r--r--arch/cris/arch-v10/lib/dram_init.S205
-rw-r--r--arch/cris/arch-v10/lib/hw_settings.S62
-rw-r--r--arch/cris/arch-v10/lib/memset.c252
-rw-r--r--arch/cris/arch-v10/lib/old_checksum.c85
-rw-r--r--arch/cris/arch-v10/lib/string.c225
-rw-r--r--arch/cris/arch-v10/lib/usercopy.c523
11 files changed, 1724 insertions, 0 deletions
diff --git a/arch/cris/arch-v10/lib/Makefile b/arch/cris/arch-v10/lib/Makefile
new file mode 100644
index 000000000000..36e9a9c5239b
--- /dev/null
+++ b/arch/cris/arch-v10/lib/Makefile
@@ -0,0 +1,9 @@
1#
2# Makefile for Etrax-specific library files..
3#
4
5
6EXTRA_AFLAGS := -traditional
7
8lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o
9
diff --git a/arch/cris/arch-v10/lib/checksum.S b/arch/cris/arch-v10/lib/checksum.S
new file mode 100644
index 000000000000..85c48f0a9ec2
--- /dev/null
+++ b/arch/cris/arch-v10/lib/checksum.S
@@ -0,0 +1,124 @@
1/* $Id: checksum.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
2 * A fast checksum routine using movem
3 * Copyright (c) 1998-2001 Axis Communications AB
4 *
5 * csum_partial(const unsigned char * buff, int len, unsigned int sum)
6 */
7
8 .globl csum_partial
9csum_partial:
10
11 ;; r10 - src
12 ;; r11 - length
13 ;; r12 - checksum
14
15 ;; check for breakeven length between movem and normal word looping versions
16 ;; we also do _NOT_ want to compute a checksum over more than the
17 ;; actual length when length < 40
18
19 cmpu.w 80,$r11
20 blo _word_loop
21 nop
22
23 ;; need to save the registers we use below in the movem loop
24 ;; this overhead is why we have a check above for breakeven length
25 ;; only r0 - r8 have to be saved, the other ones are clobber-able
26 ;; according to the ABI
27
28 subq 9*4,$sp
29 movem $r8,[$sp]
30
31 ;; do a movem checksum
32
33 subq 10*4,$r11 ; update length for the first loop
34
35_mloop: movem [$r10+],$r9 ; read 10 longwords
36
37 ;; perform dword checksumming on the 10 longwords
38
39 add.d $r0,$r12
40 ax
41 add.d $r1,$r12
42 ax
43 add.d $r2,$r12
44 ax
45 add.d $r3,$r12
46 ax
47 add.d $r4,$r12
48 ax
49 add.d $r5,$r12
50 ax
51 add.d $r6,$r12
52 ax
53 add.d $r7,$r12
54 ax
55 add.d $r8,$r12
56 ax
57 add.d $r9,$r12
58
59 ;; fold the carry into the checksum, to avoid having to loop the carry
60 ;; back into the top
61
62 ax
63 addq 0,$r12
64 ax ; do it again, since we might have generated a carry
65 addq 0,$r12
66
67 subq 10*4,$r11
68 bge _mloop
69 nop
70
71 addq 10*4,$r11 ; compensate for last loop underflowing length
72
73 movem [$sp+],$r8 ; restore regs
74
75_word_loop:
76 ;; only fold if there is anything to fold.
77
78 cmpq 0,$r12
79 beq _no_fold
80
81 ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
82 ;; r9 and r13 can be used as temporaries.
83
84 moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9
85 lsrq 16,$r9
86
87 move.d $r12,$r13
88 lsrq 16,$r13 ; r13 = checksum >> 16
89 and.d $r9,$r12 ; checksum = checksum & 0xffff
90 add.d $r13,$r12 ; checksum += r13
91 move.d $r12,$r13 ; do the same again, maybe we got a carry last add
92 lsrq 16,$r13
93 and.d $r9,$r12
94 add.d $r13,$r12
95
96_no_fold:
97 cmpq 2,$r11
98 blt _no_words
99 nop
100
101 ;; checksum the rest of the words
102
103 subq 2,$r11
104
105_wloop: subq 2,$r11
106 bge _wloop
107 addu.w [$r10+],$r12
108
109 addq 2,$r11
110
111_no_words:
112 ;; see if we have one odd byte more
113 cmpq 1,$r11
114 beq _do_byte
115 nop
116 ret
117 move.d $r12, $r10
118
119_do_byte:
120 ;; copy and checksum the last byte
121 addu.b [$r10],$r12
122 ret
123 move.d $r12, $r10
124
diff --git a/arch/cris/arch-v10/lib/checksumcopy.S b/arch/cris/arch-v10/lib/checksumcopy.S
new file mode 100644
index 000000000000..35cbffb306fd
--- /dev/null
+++ b/arch/cris/arch-v10/lib/checksumcopy.S
@@ -0,0 +1,132 @@
1/* $Id: checksumcopy.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
2 * A fast checksum+copy routine using movem
3 * Copyright (c) 1998, 2001 Axis Communications AB
4 *
5 * Authors: Bjorn Wesen
6 *
7 * csum_partial_copy_nocheck(const char *src, char *dst,
8 * int len, unsigned int sum)
9 */
10
11 .globl csum_partial_copy_nocheck
12csum_partial_copy_nocheck:
13
14 ;; r10 - src
15 ;; r11 - dst
16 ;; r12 - length
17 ;; r13 - checksum
18
19 ;; check for breakeven length between movem and normal word looping versions
20 ;; we also do _NOT_ want to compute a checksum over more than the
21 ;; actual length when length < 40
22
23 cmpu.w 80, $r12
24 blo _word_loop
25 nop
26
27 ;; need to save the registers we use below in the movem loop
28 ;; this overhead is why we have a check above for breakeven length
29 ;; only r0 - r8 have to be saved, the other ones are clobber-able
30 ;; according to the ABI
31
32 subq 9*4, $sp
33 movem $r8, [$sp]
34
35 ;; do a movem copy and checksum
36
37 subq 10*4, $r12 ; update length for the first loop
38
39_mloop: movem [$r10+],$r9 ; read 10 longwords
401: ;; A failing userspace access will have this as PC.
41 movem $r9,[$r11+] ; write 10 longwords
42
43 ;; perform dword checksumming on the 10 longwords
44
45 add.d $r0,$r13
46 ax
47 add.d $r1,$r13
48 ax
49 add.d $r2,$r13
50 ax
51 add.d $r3,$r13
52 ax
53 add.d $r4,$r13
54 ax
55 add.d $r5,$r13
56 ax
57 add.d $r6,$r13
58 ax
59 add.d $r7,$r13
60 ax
61 add.d $r8,$r13
62 ax
63 add.d $r9,$r13
64
65 ;; fold the carry into the checksum, to avoid having to loop the carry
66 ;; back into the top
67
68 ax
69 addq 0,$r13
70 ax ; do it again, since we might have generated a carry
71 addq 0,$r13
72
73 subq 10*4,$r12
74 bge _mloop
75 nop
76
77 addq 10*4,$r12 ; compensate for last loop underflowing length
78
79 movem [$sp+],$r8 ; restore regs
80
81_word_loop:
82 ;; only fold if there is anything to fold.
83
84 cmpq 0,$r13
85 beq _no_fold
86
87 ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
88 ;; r9 can be used as temporary.
89
90 move.d $r13,$r9
91 lsrq 16,$r9 ; r0 = checksum >> 16
92 and.d 0xffff,$r13 ; checksum = checksum & 0xffff
93 add.d $r9,$r13 ; checksum += r0
94 move.d $r13,$r9 ; do the same again, maybe we got a carry last add
95 lsrq 16,$r9
96 and.d 0xffff,$r13
97 add.d $r9,$r13
98
99_no_fold:
100 cmpq 2,$r12
101 blt _no_words
102 nop
103
104 ;; copy and checksum the rest of the words
105
106 subq 2,$r12
107
108_wloop: move.w [$r10+],$r9
1092: ;; A failing userspace access will have this as PC.
110 addu.w $r9,$r13
111 subq 2,$r12
112 bge _wloop
113 move.w $r9,[$r11+]
114
115 addq 2,$r12
116
117_no_words:
118 ;; see if we have one odd byte more
119 cmpq 1,$r12
120 beq _do_byte
121 nop
122 ret
123 move.d $r13, $r10
124
125_do_byte:
126 ;; copy and checksum the last byte
127 move.b [$r10],$r9
1283: ;; A failing userspace access will have this as PC.
129 addu.b $r9,$r13
130 move.b $r9,[$r11]
131 ret
132 move.d $r13, $r10
diff --git a/arch/cris/arch-v10/lib/csumcpfruser.S b/arch/cris/arch-v10/lib/csumcpfruser.S
new file mode 100644
index 000000000000..5f41ccd62754
--- /dev/null
+++ b/arch/cris/arch-v10/lib/csumcpfruser.S
@@ -0,0 +1,64 @@
1/*
2 * Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into
3 * csum_partial_copy_from_user by adding exception records.
4 *
5 * Copyright (C) 2001 Axis Communications AB.
6 *
7 * Author: Hans-Peter Nilsson.
8 */
9
10#include <asm/errno.h>
11
12/* Same function body, but a different name. If we just added exception
13 records to _csum_partial_copy_nocheck and made it generic, we wouldn't
14 know a user fault from a kernel fault and we would have overhead in
15 each kernel caller for the error-pointer argument.
16
17 unsigned int csum_partial_copy_from_user
18 (const char *src, char *dst, int len, unsigned int sum, int *errptr);
19
20 Note that the errptr argument is only set if we encounter an error.
21 It is conveniently located on the stack, so the normal function body
22 does not have to handle it. */
23
24#define csum_partial_copy_nocheck csum_partial_copy_from_user
25
26/* There are local labels numbered 1, 2 and 3 present to mark the
27 different from-user accesses. */
28#include "checksumcopy.S"
29
30 .section .fixup,"ax"
31
32;; Here from the movem loop; restore stack.
334:
34 movem [$sp+],$r8
35;; r12 is already decremented. Add back chunk_size-2.
36 addq 40-2,$r12
37
38;; Here from the word loop; r12 is off by 2; add it back.
395:
40 addq 2,$r12
41
42;; Here from a failing single byte.
436:
44
45;; Signal in *errptr that we had a failing access.
46 moveq -EFAULT,$r9
47 move.d $r9,[[$sp]]
48
49;; Clear the rest of the destination area using memset. Preserve the
50;; checksum for the readable bytes.
51 push $srp
52 push $r13
53 move.d $r11,$r10
54 clear.d $r11
55 jsr memset
56 pop $r10
57 jump [$sp+]
58
59 .previous
60 .section __ex_table,"a"
61 .dword 1b,4b
62 .dword 2b,5b
63 .dword 3b,6b
64 .previous
diff --git a/arch/cris/arch-v10/lib/dmacopy.c b/arch/cris/arch-v10/lib/dmacopy.c
new file mode 100644
index 000000000000..e5fb44f505c5
--- /dev/null
+++ b/arch/cris/arch-v10/lib/dmacopy.c
@@ -0,0 +1,43 @@
1/* $Id: dmacopy.c,v 1.1 2001/12/17 13:59:27 bjornw Exp $
2 *
3 * memcpy for large blocks, using memory-memory DMA channels 6 and 7 in Etrax
4 */
5
6#include <asm/svinto.h>
7#include <asm/io.h>
8
9#define D(x)
10
11void *dma_memcpy(void *pdst,
12 const void *psrc,
13 unsigned int pn)
14{
15 static etrax_dma_descr indma, outdma;
16
17 D(printk("dma_memcpy %d bytes... ", pn));
18
19#if 0
20 *R_GEN_CONFIG = genconfig_shadow =
21 (genconfig_shadow & ~0x3c0000) |
22 IO_STATE(R_GEN_CONFIG, dma6, intdma7) |
23 IO_STATE(R_GEN_CONFIG, dma7, intdma6);
24#endif
25 indma.sw_len = outdma.sw_len = pn;
26 indma.ctrl = d_eol | d_eop;
27 outdma.ctrl = d_eol;
28 indma.buf = psrc;
29 outdma.buf = pdst;
30
31 *R_DMA_CH6_FIRST = &indma;
32 *R_DMA_CH7_FIRST = &outdma;
33 *R_DMA_CH6_CMD = IO_STATE(R_DMA_CH6_CMD, cmd, start);
34 *R_DMA_CH7_CMD = IO_STATE(R_DMA_CH7_CMD, cmd, start);
35
36 while(*R_DMA_CH7_CMD == 1) /* wait for completion */ ;
37
38 D(printk("done\n"));
39
40}
41
42
43
diff --git a/arch/cris/arch-v10/lib/dram_init.S b/arch/cris/arch-v10/lib/dram_init.S
new file mode 100644
index 000000000000..2ef4ad5706ef
--- /dev/null
+++ b/arch/cris/arch-v10/lib/dram_init.S
@@ -0,0 +1,205 @@
1/* $Id: dram_init.S,v 1.4 2003/09/22 09:21:59 starvik Exp $
2 *
3 * DRAM/SDRAM initialization - alter with care
4 * This file is intended to be included from other assembler files
5 *
6 * Note: This file may not modify r9 because r9 is used to carry
7 * information from the decompresser to the kernel
8 *
9 * Copyright (C) 2000, 2001 Axis Communications AB
10 *
11 * Authors: Mikael Starvik (starvik@axis.com)
12 *
13 * $Log: dram_init.S,v $
14 * Revision 1.4 2003/09/22 09:21:59 starvik
15 * Decompresser is linked to 0x407xxxxx and sdram commands are at 0x000xxxxx
16 * so we need to mask off 12 bits.
17 *
18 * Revision 1.3 2003/03/31 09:38:37 starvik
19 * Corrected calculation of end of sdram init commands
20 *
21 * Revision 1.2 2002/11/19 13:33:29 starvik
22 * Changes from Linux 2.4
23 *
24 * Revision 1.13 2002/10/30 07:42:28 starvik
25 * Always read SDRAM command sequence from flash
26 *
27 * Revision 1.12 2002/08/09 11:37:37 orjanf
28 * Added double initialization work-around for Samsung SDRAMs.
29 *
30 * Revision 1.11 2002/06/04 11:43:21 starvik
31 * Check if mrs_data is specified in kernelconfig (necessary for MCM)
32 *
33 * Revision 1.10 2001/10/04 12:00:21 martinnn
34 * Added missing underscores.
35 *
36 * Revision 1.9 2001/10/01 14:47:35 bjornw
37 * Added register prefixes and removed underscores
38 *
39 * Revision 1.8 2001/05/15 07:12:45 hp
40 * Copy warning from head.S about r8 and r9
41 *
42 * Revision 1.7 2001/04/18 12:05:39 bjornw
43 * Fixed comments, and explicitely include config.h to be sure its there
44 *
45 * Revision 1.6 2001/04/10 06:20:16 starvik
46 * Delay should be 200us, not 200ns
47 *
48 * Revision 1.5 2001/04/09 06:01:13 starvik
49 * Added support for 100 MHz SDRAMs
50 *
51 * Revision 1.4 2001/03/26 14:24:01 bjornw
52 * Namechange of some config options
53 *
54 * Revision 1.3 2001/03/23 08:29:41 starvik
55 * Corrected calculation of mrs_data
56 *
57 * Revision 1.2 2001/02/08 15:20:00 starvik
58 * Corrected SDRAM initialization
59 * Should now be included as inline
60 *
61 * Revision 1.1 2001/01/29 13:08:02 starvik
62 * Initial version
63 * This file should be included from all assembler files that needs to
64 * initialize DRAM/SDRAM.
65 *
66 */
67
68/* Just to be certain the config file is included, we include it here
69 * explicitely instead of depending on it being included in the file that
70 * uses this code.
71 */
72
73#include <linux/config.h>
74
75 ;; WARNING! The registers r8 and r9 are used as parameters carrying
76 ;; information from the decompressor (if the kernel was compressed).
77 ;; They should not be used in the code below.
78
79#ifndef CONFIG_SVINTO_SIM
80 move.d CONFIG_ETRAX_DEF_R_WAITSTATES, $r0
81 move.d $r0, [R_WAITSTATES]
82
83 move.d CONFIG_ETRAX_DEF_R_BUS_CONFIG, $r0
84 move.d $r0, [R_BUS_CONFIG]
85
86#ifndef CONFIG_ETRAX_SDRAM
87 move.d CONFIG_ETRAX_DEF_R_DRAM_CONFIG, $r0
88 move.d $r0, [R_DRAM_CONFIG]
89
90 move.d CONFIG_ETRAX_DEF_R_DRAM_TIMING, $r0
91 move.d $r0, [R_DRAM_TIMING]
92#else
93 ;; Samsung SDRAMs seem to require to be initialized twice to work properly.
94 moveq 2, $r6
95_sdram_init:
96
97 ; Refer to ETRAX 100LX Designers Reference for a description of SDRAM initialization
98
99 ; Bank configuration
100 move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r0
101 move.d $r0, [R_SDRAM_CONFIG]
102
103 ; Calculate value of mrs_data
104 ; CAS latency = 2 && bus_width = 32 => 0x40
105 ; CAS latency = 3 && bus_width = 32 => 0x60
106 ; CAS latency = 2 && bus_width = 16 => 0x20
107 ; CAS latency = 3 && bus_width = 16 => 0x30
108
109 ; Check if value is already supplied in kernel config
110 move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r2
111 and.d 0x00ff0000, $r2
112 bne _set_timing
113 lsrq 16, $r2
114
115 move.d 0x40, $r2 ; Assume 32 bits and CAS latency = 2
116 move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
117 move.d $r1, $r3
118 and.d 0x03, $r1 ; Get CAS latency
119 and.d 0x1000, $r3 ; 50 or 100 MHz?
120 beq _speed_50
121 nop
122_speed_100:
123 cmp.d 0x00, $r1 ; CAS latency = 2?
124 beq _bw_check
125 nop
126 or.d 0x20, $r2 ; CAS latency = 3
127 ba _bw_check
128 nop
129_speed_50:
130 cmp.d 0x01, $r1 ; CAS latency = 2?
131 beq _bw_check
132 nop
133 or.d 0x20, $r2 ; CAS latency = 3
134_bw_check:
135 move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r1
136 and.d 0x800000, $r1 ; DRAM width is bit 23
137 bne _set_timing
138 nop
139 lsrq 1, $r2 ; 16 bits. Shift down value.
140
141 ; Set timing parameters. Starts master clock
142_set_timing:
143 move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
144 and.d 0x8000f9ff, $r1 ; Make sure mrs data and command is 0
145 or.d 0x80000000, $r1 ; Make sure sdram enable bit is set
146 move.d $r1, $r5
147 or.d 0x0000c000, $r1 ; ref = disable
148 lslq 16, $r2 ; mrs data starts at bit 16
149 or.d $r2, $r1
150 move.d $r1, [R_SDRAM_TIMING]
151
152 ; Wait 200us
153 move.d 10000, $r2
1541: bne 1b
155 subq 1, $r2
156
157 ; Issue initialization command sequence
158 move.d _sdram_commands_start, $r2
159 and.d 0x000fffff, $r2 ; Make sure commands are read from flash
160 move.d _sdram_commands_end, $r3
161 and.d 0x000fffff, $r3
1621: clear.d $r4
163 move.b [$r2+], $r4
164 lslq 9, $r4 ; Command starts at bit 9
165 or.d $r1, $r4
166 move.d $r4, [R_SDRAM_TIMING]
167 nop ; Wait five nop cycles between each command
168 nop
169 nop
170 nop
171 nop
172 cmp.d $r2, $r3
173 bne 1b
174 nop
175 move.d $r5, [R_SDRAM_TIMING]
176 subq 1, $r6
177 bne _sdram_init
178 nop
179 ba _sdram_commands_end
180 nop
181
182_sdram_commands_start:
183 .byte 3 ; Precharge
184 .byte 0 ; nop
185 .byte 2 ; refresh
186 .byte 0 ; nop
187 .byte 2 ; refresh
188 .byte 0 ; nop
189 .byte 2 ; refresh
190 .byte 0 ; nop
191 .byte 2 ; refresh
192 .byte 0 ; nop
193 .byte 2 ; refresh
194 .byte 0 ; nop
195 .byte 2 ; refresh
196 .byte 0 ; nop
197 .byte 2 ; refresh
198 .byte 0 ; nop
199 .byte 2 ; refresh
200 .byte 0 ; nop
201 .byte 1 ; mrs
202 .byte 0 ; nop
203_sdram_commands_end:
204#endif
205#endif
diff --git a/arch/cris/arch-v10/lib/hw_settings.S b/arch/cris/arch-v10/lib/hw_settings.S
new file mode 100644
index 000000000000..56905aaa7b6e
--- /dev/null
+++ b/arch/cris/arch-v10/lib/hw_settings.S
@@ -0,0 +1,62 @@
1/*
2 * $Id: hw_settings.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
3 *
4 * This table is used by some tools to extract hardware parameters.
5 * The table should be included in the kernel and the decompressor.
6 * Don't forget to update the tools if you change this table.
7 *
8 * Copyright (C) 2001 Axis Communications AB
9 *
10 * Authors: Mikael Starvik (starvik@axis.com)
11 */
12
13#define PA_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PA_DIR << 8) | \
14 (CONFIG_ETRAX_DEF_R_PORT_PA_DATA))
15#define PB_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PB_CONFIG << 16) | \
16 (CONFIG_ETRAX_DEF_R_PORT_PB_DIR << 8) | \
17 (CONFIG_ETRAX_DEF_R_PORT_PB_DATA))
18
19 .ascii "HW_PARAM_MAGIC" ; Magic number
20 .dword 0xc0004000 ; Kernel start address
21
22 ; Debug port
23#ifdef CONFIG_ETRAX_DEBUG_PORT0
24 .dword 0
25#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
26 .dword 1
27#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
28 .dword 2
29#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
30 .dword 3
31#else
32 .dword 4 ; No debug
33#endif
34
35 ; SDRAM or EDO DRAM?
36#ifdef CONFIG_ETRAX_SDRAM
37 .dword 1
38#else
39 .dword 0
40#endif
41
42 ; Register values
43 .dword R_WAITSTATES
44 .dword CONFIG_ETRAX_DEF_R_WAITSTATES
45 .dword R_BUS_CONFIG
46 .dword CONFIG_ETRAX_DEF_R_BUS_CONFIG
47#ifdef CONFIG_ETRAX_SDRAM
48 .dword R_SDRAM_CONFIG
49 .dword CONFIG_ETRAX_DEF_R_SDRAM_CONFIG
50 .dword R_SDRAM_TIMING
51 .dword CONFIG_ETRAX_DEF_R_SDRAM_TIMING
52#else
53 .dword R_DRAM_CONFIG
54 .dword CONFIG_ETRAX_DEF_R_DRAM_CONFIG
55 .dword R_DRAM_TIMING
56 .dword CONFIG_ETRAX_DEF_R_DRAM_TIMING
57#endif
58 .dword R_PORT_PA_SET
59 .dword PA_SET_VALUE
60 .dword R_PORT_PB_SET
61 .dword PB_SET_VALUE
62 .dword 0 ; No more register values
diff --git a/arch/cris/arch-v10/lib/memset.c b/arch/cris/arch-v10/lib/memset.c
new file mode 100644
index 000000000000..82bb66839171
--- /dev/null
+++ b/arch/cris/arch-v10/lib/memset.c
@@ -0,0 +1,252 @@
1/*#************************************************************************#*/
2/*#-------------------------------------------------------------------------*/
3/*# */
4/*# FUNCTION NAME: memset() */
5/*# */
6/*# PARAMETERS: void* dst; Destination address. */
7/*# int c; Value of byte to write. */
8/*# int len; Number of bytes to write. */
9/*# */
10/*# RETURNS: dst. */
11/*# */
12/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */
13/*# Framework taken from memcpy. This routine is */
14/*# very sensitive to compiler changes in register allocation. */
15/*# Should really be rewritten to avoid this problem. */
16/*# */
17/*#-------------------------------------------------------------------------*/
18/*# */
19/*# HISTORY */
20/*# */
21/*# DATE NAME CHANGES */
22/*# ---- ---- ------- */
23/*# 990713 HP Tired of watching this function (or */
24/*# really, the nonoptimized generic */
25/*# implementation) take up 90% of simulator */
26/*# output. Measurements needed. */
27/*# */
28/*#-------------------------------------------------------------------------*/
29
30#include <linux/types.h>
31
32/* No, there's no macro saying 12*4, since it is "hard" to get it into
33 the asm in a good way. Thus better to expose the problem everywhere.
34 */
35
36/* Assuming 1 cycle per dword written or read (ok, not really true), and
37 one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
38 so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
39
40#define ZERO_BLOCK_SIZE (1*12*4)
41
42void *memset(void *pdst,
43 int c,
44 size_t plen)
45{
46 /* Ok. Now we want the parameters put in special registers.
47 Make sure the compiler is able to make something useful of this. */
48
49 register char *return_dst __asm__ ("r10") = pdst;
50 register int n __asm__ ("r12") = plen;
51 register int lc __asm__ ("r11") = c;
52
53 /* Most apps use memset sanely. Only those memsetting about 3..4
54 bytes or less get penalized compared to the generic implementation
55 - and that's not really sane use. */
56
57 /* Ugh. This is fragile at best. Check with newer GCC releases, if
58 they compile cascaded "x |= x << 8" sanely! */
59 __asm__("movu.b %0,$r13\n\t"
60 "lslq 8,$r13\n\t"
61 "move.b %0,$r13\n\t"
62 "move.d $r13,%0\n\t"
63 "lslq 16,$r13\n\t"
64 "or.d $r13,%0"
65 : "=r" (lc) : "0" (lc) : "r13");
66
67 {
68 register char *dst __asm__ ("r13") = pdst;
69
70 /* This is NONPORTABLE, but since this whole routine is */
71 /* grossly nonportable that doesn't matter. */
72
73 if (((unsigned long) pdst & 3) != 0
74 /* Oops! n=0 must be a legal call, regardless of alignment. */
75 && n >= 3)
76 {
77 if ((unsigned long)dst & 1)
78 {
79 *dst = (char) lc;
80 n--;
81 dst++;
82 }
83
84 if ((unsigned long)dst & 2)
85 {
86 *(short *)dst = lc;
87 n -= 2;
88 dst += 2;
89 }
90 }
91
92 /* Now the fun part. For the threshold value of this, check the equation
93 above. */
94 /* Decide which copying method to use. */
95 if (n >= ZERO_BLOCK_SIZE)
96 {
97 /* For large copies we use 'movem' */
98
99 /* It is not optimal to tell the compiler about clobbering any
100 registers; that will move the saving/restoring of those registers
101 to the function prologue/epilogue, and make non-movem sizes
102 suboptimal.
103
104 This method is not foolproof; it assumes that the "asm reg"
105 declarations at the beginning of the function really are used
106 here (beware: they may be moved to temporary registers).
107 This way, we do not have to save/move the registers around into
108 temporaries; we can safely use them straight away.
109
110 If you want to check that the allocation was right; then
111 check the equalities in the first comment. It should say
112 "r13=r13, r12=r12, r11=r11" */
113 __asm__ volatile ("
114 ;; Check that the following is true (same register names on
115 ;; both sides of equal sign, as in r8=r8):
116 ;; %0=r13, %1=r12, %4=r11
117 ;;
118 ;; Save the registers we'll clobber in the movem process
119 ;; on the stack. Don't mention them to gcc, it will only be
120 ;; upset.
121 subq 11*4,$sp
122 movem $r10,[$sp]
123
124 move.d $r11,$r0
125 move.d $r11,$r1
126 move.d $r11,$r2
127 move.d $r11,$r3
128 move.d $r11,$r4
129 move.d $r11,$r5
130 move.d $r11,$r6
131 move.d $r11,$r7
132 move.d $r11,$r8
133 move.d $r11,$r9
134 move.d $r11,$r10
135
136 ;; Now we've got this:
137 ;; r13 - dst
138 ;; r12 - n
139
140 ;; Update n for the first loop
141 subq 12*4,$r12
1420:
143 subq 12*4,$r12
144 bge 0b
145 movem $r11,[$r13+]
146
147 addq 12*4,$r12 ;; compensate for last loop underflowing n
148
149 ;; Restore registers from stack
150 movem [$sp+],$r10"
151
152 /* Outputs */ : "=r" (dst), "=r" (n)
153 /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
154
155 }
156
157 /* Either we directly starts copying, using dword copying
158 in a loop, or we copy as much as possible with 'movem'
159 and then the last block (<44 bytes) is copied here.
160 This will work since 'movem' will have updated src,dst,n. */
161
162 while ( n >= 16 )
163 {
164 *((long*)dst)++ = lc;
165 *((long*)dst)++ = lc;
166 *((long*)dst)++ = lc;
167 *((long*)dst)++ = lc;
168 n -= 16;
169 }
170
171 /* A switch() is definitely the fastest although it takes a LOT of code.
172 * Particularly if you inline code this.
173 */
174 switch (n)
175 {
176 case 0:
177 break;
178 case 1:
179 *(char*)dst = (char) lc;
180 break;
181 case 2:
182 *(short*)dst = (short) lc;
183 break;
184 case 3:
185 *((short*)dst)++ = (short) lc;
186 *(char*)dst = (char) lc;
187 break;
188 case 4:
189 *((long*)dst)++ = lc;
190 break;
191 case 5:
192 *((long*)dst)++ = lc;
193 *(char*)dst = (char) lc;
194 break;
195 case 6:
196 *((long*)dst)++ = lc;
197 *(short*)dst = (short) lc;
198 break;
199 case 7:
200 *((long*)dst)++ = lc;
201 *((short*)dst)++ = (short) lc;
202 *(char*)dst = (char) lc;
203 break;
204 case 8:
205 *((long*)dst)++ = lc;
206 *((long*)dst)++ = lc;
207 break;
208 case 9:
209 *((long*)dst)++ = lc;
210 *((long*)dst)++ = lc;
211 *(char*)dst = (char) lc;
212 break;
213 case 10:
214 *((long*)dst)++ = lc;
215 *((long*)dst)++ = lc;
216 *(short*)dst = (short) lc;
217 break;
218 case 11:
219 *((long*)dst)++ = lc;
220 *((long*)dst)++ = lc;
221 *((short*)dst)++ = (short) lc;
222 *(char*)dst = (char) lc;
223 break;
224 case 12:
225 *((long*)dst)++ = lc;
226 *((long*)dst)++ = lc;
227 *((long*)dst)++ = lc;
228 break;
229 case 13:
230 *((long*)dst)++ = lc;
231 *((long*)dst)++ = lc;
232 *((long*)dst)++ = lc;
233 *(char*)dst = (char) lc;
234 break;
235 case 14:
236 *((long*)dst)++ = lc;
237 *((long*)dst)++ = lc;
238 *((long*)dst)++ = lc;
239 *(short*)dst = (short) lc;
240 break;
241 case 15:
242 *((long*)dst)++ = lc;
243 *((long*)dst)++ = lc;
244 *((long*)dst)++ = lc;
245 *((short*)dst)++ = (short) lc;
246 *(char*)dst = (char) lc;
247 break;
248 }
249 }
250
251 return return_dst; /* destination pointer. */
252} /* memset() */
diff --git a/arch/cris/arch-v10/lib/old_checksum.c b/arch/cris/arch-v10/lib/old_checksum.c
new file mode 100644
index 000000000000..22a6f0aa9cef
--- /dev/null
+++ b/arch/cris/arch-v10/lib/old_checksum.c
@@ -0,0 +1,85 @@
1/* $Id: old_checksum.c,v 1.3 2003/10/27 08:04:32 starvik Exp $
2 *
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * IP/TCP/UDP checksumming routines
8 *
9 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11 * Tom May, <ftom@netcom.com>
12 * Lots of code moved from tcp.c and ip.c; see those files
13 * for more names.
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 */
20
21#include <net/checksum.h>
22#include <net/module.h>
23
24#undef PROFILE_CHECKSUM
25
26#ifdef PROFILE_CHECKSUM
27/* these are just for profiling the checksum code with an oscillioscope.. uh */
28#if 0
29#define BITOFF *((unsigned char *)0xb0000030) = 0xff
30#define BITON *((unsigned char *)0xb0000030) = 0x0
31#endif
32#include <asm/io.h>
33#define CBITON LED_ACTIVE_SET(1)
34#define CBITOFF LED_ACTIVE_SET(0)
35#define BITOFF
36#define BITON
37#else
38#define BITOFF
39#define BITON
40#define CBITOFF
41#define CBITON
42#endif
43
44/*
45 * computes a partial checksum, e.g. for TCP/UDP fragments
46 */
47
48#include <asm/delay.h>
49
50unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
51{
52 /*
53 * Experiments with ethernet and slip connections show that buff
54 * is aligned on either a 2-byte or 4-byte boundary.
55 */
56 const unsigned char *endMarker = buff + len;
57 const unsigned char *marker = endMarker - (len % 16);
58#if 0
59 if((int)buff & 0x3)
60 printk("unaligned buff %p\n", buff);
61 __delay(900); /* extra delay of 90 us to test performance hit */
62#endif
63 BITON;
64 while (buff < marker) {
65 sum += *((unsigned short *)buff)++;
66 sum += *((unsigned short *)buff)++;
67 sum += *((unsigned short *)buff)++;
68 sum += *((unsigned short *)buff)++;
69 sum += *((unsigned short *)buff)++;
70 sum += *((unsigned short *)buff)++;
71 sum += *((unsigned short *)buff)++;
72 sum += *((unsigned short *)buff)++;
73 }
74 marker = endMarker - (len % 2);
75 while(buff < marker) {
76 sum += *((unsigned short *)buff)++;
77 }
78 if(endMarker - buff > 0) {
79 sum += *buff; /* add extra byte seperately */
80 }
81 BITOFF;
82 return(sum);
83}
84
85EXPORT_SYMBOL(csum_partial);
diff --git a/arch/cris/arch-v10/lib/string.c b/arch/cris/arch-v10/lib/string.c
new file mode 100644
index 000000000000..8ffde4901b57
--- /dev/null
+++ b/arch/cris/arch-v10/lib/string.c
@@ -0,0 +1,225 @@
1/*#************************************************************************#*/
2/*#-------------------------------------------------------------------------*/
3/*# */
4/*# FUNCTION NAME: memcpy() */
5/*# */
6/*# PARAMETERS: void* dst; Destination address. */
7/*# void* src; Source address. */
8/*# int len; Number of bytes to copy. */
9/*# */
10/*# RETURNS: dst. */
11/*# */
12/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */
13/*# about copying of overlapping memory areas. This routine is */
14/*# very sensitive to compiler changes in register allocation. */
15/*# Should really be rewritten to avoid this problem. */
16/*# */
17/*#-------------------------------------------------------------------------*/
18/*# */
19/*# HISTORY */
20/*# */
21/*# DATE NAME CHANGES */
22/*# ---- ---- ------- */
23/*# 941007 Kenny R Creation */
24/*# 941011 Kenny R Lots of optimizations and inlining. */
25/*# 941129 Ulf A Adapted for use in libc. */
26/*# 950216 HP N==0 forgotten if non-aligned src/dst. */
27/*# Added some optimizations. */
28/*# 001025 HP Make src and dst char *. Align dst to */
29/*# dword, not just word-if-both-src-and-dst- */
30/*# are-misaligned. */
31/*# */
32/*#-------------------------------------------------------------------------*/
33
34#include <linux/types.h>
35
36void *memcpy(void *pdst,
37 const void *psrc,
38 size_t pn)
39{
40 /* Ok. Now we want the parameters put in special registers.
41 Make sure the compiler is able to make something useful of this.
42 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
43
44 If gcc was allright, it really would need no temporaries, and no
45 stack space to save stuff on. */
46
47 register void *return_dst __asm__ ("r10") = pdst;
48 register char *dst __asm__ ("r13") = pdst;
49 register const char *src __asm__ ("r11") = psrc;
50 register int n __asm__ ("r12") = pn;
51
52
53 /* When src is aligned but not dst, this makes a few extra needless
54 cycles. I believe it would take as many to check that the
55 re-alignment was unnecessary. */
56 if (((unsigned long) dst & 3) != 0
57 /* Don't align if we wouldn't copy more than a few bytes; so we
58 don't have to check further for overflows. */
59 && n >= 3)
60 {
61 if ((unsigned long) dst & 1)
62 {
63 n--;
64 *(char*)dst = *(char*)src;
65 src++;
66 dst++;
67 }
68
69 if ((unsigned long) dst & 2)
70 {
71 n -= 2;
72 *(short*)dst = *(short*)src;
73 src += 2;
74 dst += 2;
75 }
76 }
77
78 /* Decide which copying method to use. */
79 if (n >= 44*2) /* Break even between movem and
80 move16 is at 38.7*2, but modulo 44. */
81 {
82 /* For large copies we use 'movem' */
83
84 /* It is not optimal to tell the compiler about clobbering any
85 registers; that will move the saving/restoring of those registers
86 to the function prologue/epilogue, and make non-movem sizes
87 suboptimal.
88
89 This method is not foolproof; it assumes that the "asm reg"
90 declarations at the beginning of the function really are used
91 here (beware: they may be moved to temporary registers).
92 This way, we do not have to save/move the registers around into
93 temporaries; we can safely use them straight away.
94
95 If you want to check that the allocation was right; then
96 check the equalities in the first comment. It should say
97 "r13=r13, r11=r11, r12=r12" */
98 __asm__ volatile ("
99 ;; Check that the following is true (same register names on
100 ;; both sides of equal sign, as in r8=r8):
101 ;; %0=r13, %1=r11, %2=r12
102 ;;
103 ;; Save the registers we'll use in the movem process
104 ;; on the stack.
105 subq 11*4,$sp
106 movem $r10,[$sp]
107
108 ;; Now we've got this:
109 ;; r11 - src
110 ;; r13 - dst
111 ;; r12 - n
112
113 ;; Update n for the first loop
114 subq 44,$r12
1150:
116 movem [$r11+],$r10
117 subq 44,$r12
118 bge 0b
119 movem $r10,[$r13+]
120
121 addq 44,$r12 ;; compensate for last loop underflowing n
122
123 ;; Restore registers from stack
124 movem [$sp+],$r10"
125
126 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
127 /* Inputs */ : "0" (dst), "1" (src), "2" (n));
128
129 }
130
131 /* Either we directly starts copying, using dword copying
132 in a loop, or we copy as much as possible with 'movem'
133 and then the last block (<44 bytes) is copied here.
134 This will work since 'movem' will have updated src,dst,n. */
135
136 while ( n >= 16 )
137 {
138 *((long*)dst)++ = *((long*)src)++;
139 *((long*)dst)++ = *((long*)src)++;
140 *((long*)dst)++ = *((long*)src)++;
141 *((long*)dst)++ = *((long*)src)++;
142 n -= 16;
143 }
144
145 /* A switch() is definitely the fastest although it takes a LOT of code.
146 * Particularly if you inline code this.
147 */
148 switch (n)
149 {
150 case 0:
151 break;
152 case 1:
153 *(char*)dst = *(char*)src;
154 break;
155 case 2:
156 *(short*)dst = *(short*)src;
157 break;
158 case 3:
159 *((short*)dst)++ = *((short*)src)++;
160 *(char*)dst = *(char*)src;
161 break;
162 case 4:
163 *((long*)dst)++ = *((long*)src)++;
164 break;
165 case 5:
166 *((long*)dst)++ = *((long*)src)++;
167 *(char*)dst = *(char*)src;
168 break;
169 case 6:
170 *((long*)dst)++ = *((long*)src)++;
171 *(short*)dst = *(short*)src;
172 break;
173 case 7:
174 *((long*)dst)++ = *((long*)src)++;
175 *((short*)dst)++ = *((short*)src)++;
176 *(char*)dst = *(char*)src;
177 break;
178 case 8:
179 *((long*)dst)++ = *((long*)src)++;
180 *((long*)dst)++ = *((long*)src)++;
181 break;
182 case 9:
183 *((long*)dst)++ = *((long*)src)++;
184 *((long*)dst)++ = *((long*)src)++;
185 *(char*)dst = *(char*)src;
186 break;
187 case 10:
188 *((long*)dst)++ = *((long*)src)++;
189 *((long*)dst)++ = *((long*)src)++;
190 *(short*)dst = *(short*)src;
191 break;
192 case 11:
193 *((long*)dst)++ = *((long*)src)++;
194 *((long*)dst)++ = *((long*)src)++;
195 *((short*)dst)++ = *((short*)src)++;
196 *(char*)dst = *(char*)src;
197 break;
198 case 12:
199 *((long*)dst)++ = *((long*)src)++;
200 *((long*)dst)++ = *((long*)src)++;
201 *((long*)dst)++ = *((long*)src)++;
202 break;
203 case 13:
204 *((long*)dst)++ = *((long*)src)++;
205 *((long*)dst)++ = *((long*)src)++;
206 *((long*)dst)++ = *((long*)src)++;
207 *(char*)dst = *(char*)src;
208 break;
209 case 14:
210 *((long*)dst)++ = *((long*)src)++;
211 *((long*)dst)++ = *((long*)src)++;
212 *((long*)dst)++ = *((long*)src)++;
213 *(short*)dst = *(short*)src;
214 break;
215 case 15:
216 *((long*)dst)++ = *((long*)src)++;
217 *((long*)dst)++ = *((long*)src)++;
218 *((long*)dst)++ = *((long*)src)++;
219 *((short*)dst)++ = *((short*)src)++;
220 *(char*)dst = *(char*)src;
221 break;
222 }
223
224 return return_dst; /* destination pointer. */
225} /* memcpy() */
diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
new file mode 100644
index 000000000000..43778d53c254
--- /dev/null
+++ b/arch/cris/arch-v10/lib/usercopy.c
@@ -0,0 +1,523 @@
1/*
2 * User address space access functions.
3 * The non-inlined parts of asm-cris/uaccess.h are here.
4 *
5 * Copyright (C) 2000, Axis Communications AB.
6 *
7 * Written by Hans-Peter Nilsson.
8 * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
9 */
10
11#include <asm/uaccess.h>
12
13/* Asm:s have been tweaked (within the domain of correctness) to give
14 satisfactory results for "gcc version 2.96 20000427 (experimental)".
15
16 Check regularly...
17
18 Note that the PC saved at a bus-fault is the address *after* the
19 faulting instruction, which means the branch-target for instructions in
20 delay-slots for taken branches. Note also that the postincrement in
21 the instruction is performed regardless of bus-fault; the register is
22 seen updated in fault handlers.
23
24 Oh, and on the code formatting issue, to whomever feels like "fixing
25 it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
26 string.c too. I just don't think too many people will hack this file
27 for the code format to be an issue. */
28
29
30/* Copy to userspace. This is based on the memcpy used for
31 kernel-to-kernel copying; see "string.c". */
32
33unsigned long
34__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
35{
36 /* We want the parameters put in special registers.
37 Make sure the compiler is able to make something useful of this.
38 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
39
40 FIXME: Comment for old gcc version. Check.
41 If gcc was allright, it really would need no temporaries, and no
42 stack space to save stuff on. */
43
44 register char *dst __asm__ ("r13") = pdst;
45 register const char *src __asm__ ("r11") = psrc;
46 register int n __asm__ ("r12") = pn;
47 register int retn __asm__ ("r10") = 0;
48
49
50 /* When src is aligned but not dst, this makes a few extra needless
51 cycles. I believe it would take as many to check that the
52 re-alignment was unnecessary. */
53 if (((unsigned long) dst & 3) != 0
54 /* Don't align if we wouldn't copy more than a few bytes; so we
55 don't have to check further for overflows. */
56 && n >= 3)
57 {
58 if ((unsigned long) dst & 1)
59 {
60 __asm_copy_to_user_1 (dst, src, retn);
61 n--;
62 }
63
64 if ((unsigned long) dst & 2)
65 {
66 __asm_copy_to_user_2 (dst, src, retn);
67 n -= 2;
68 }
69 }
70
71 /* Decide which copying method to use. */
72 if (n >= 44*2) /* Break even between movem and
73 move16 is at 38.7*2, but modulo 44. */
74 {
75 /* For large copies we use 'movem'. */
76
77 /* It is not optimal to tell the compiler about clobbering any
78 registers; that will move the saving/restoring of those registers
79 to the function prologue/epilogue, and make non-movem sizes
80 suboptimal.
81
82 This method is not foolproof; it assumes that the "asm reg"
83 declarations at the beginning of the function really are used
84 here (beware: they may be moved to temporary registers).
85 This way, we do not have to save/move the registers around into
86 temporaries; we can safely use them straight away.
87
88 If you want to check that the allocation was right; then
89 check the equalities in the first comment. It should say
90 "r13=r13, r11=r11, r12=r12". */
91 __asm__ volatile ("\
92 .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
93 .err \n\
94 .endif \n\
95
96 ;; Save the registers we'll use in the movem process
97 ;; on the stack.
98 subq 11*4,$sp
99 movem $r10,[$sp]
100
101 ;; Now we've got this:
102 ;; r11 - src
103 ;; r13 - dst
104 ;; r12 - n
105
106 ;; Update n for the first loop
107 subq 44,$r12
108
109; Since the noted PC of a faulting instruction in a delay-slot of a taken
110; branch, is that of the branch target, we actually point at the from-movem
111; for this case. There is no ambiguity here; if there was a fault in that
112; instruction (meaning a kernel oops), the faulted PC would be the address
113; after *that* movem.
114
1150:
116 movem [$r11+],$r10
117 subq 44,$r12
118 bge 0b
119 movem $r10,[$r13+]
1201:
121 addq 44,$r12 ;; compensate for last loop underflowing n
122
123 ;; Restore registers from stack
124 movem [$sp+],$r10
1252:
126 .section .fixup,\"ax\"
127
128; To provide a correct count in r10 of bytes that failed to be copied,
129; we jump back into the loop if the loop-branch was taken. There is no
130; performance penalty for sany use; the program will segfault soon enough.
131
1323:
133 move.d [$sp],$r10
134 addq 44,$r10
135 move.d $r10,[$sp]
136 jump 0b
1374:
138 movem [$sp+],$r10
139 addq 44,$r10
140 addq 44,$r12
141 jump 2b
142
143 .previous
144 .section __ex_table,\"a\"
145 .dword 0b,3b
146 .dword 1b,4b
147 .previous"
148
149 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
150 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
151
152 }
153
154 /* Either we directly start copying, using dword copying in a loop, or
155 we copy as much as possible with 'movem' and then the last block (<44
156 bytes) is copied here. This will work since 'movem' will have
157 updated SRC, DST and N. */
158
159 while (n >= 16)
160 {
161 __asm_copy_to_user_16 (dst, src, retn);
162 n -= 16;
163 }
164
165 /* Having a separate by-four loops cuts down on cache footprint.
166 FIXME: Test with and without; increasing switch to be 0..15. */
167 while (n >= 4)
168 {
169 __asm_copy_to_user_4 (dst, src, retn);
170 n -= 4;
171 }
172
173 switch (n)
174 {
175 case 0:
176 break;
177 case 1:
178 __asm_copy_to_user_1 (dst, src, retn);
179 break;
180 case 2:
181 __asm_copy_to_user_2 (dst, src, retn);
182 break;
183 case 3:
184 __asm_copy_to_user_3 (dst, src, retn);
185 break;
186 }
187
188 return retn;
189}
190
191/* Copy from user to kernel, zeroing the bytes that were inaccessible in
192 userland. The return-value is the number of bytes that were
193 inaccessible. */
194
195unsigned long
196__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn)
197{
198 /* We want the parameters put in special registers.
199 Make sure the compiler is able to make something useful of this.
200 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
201
202 FIXME: Comment for old gcc version. Check.
203 If gcc was allright, it really would need no temporaries, and no
204 stack space to save stuff on. */
205
206 register char *dst __asm__ ("r13") = pdst;
207 register const char *src __asm__ ("r11") = psrc;
208 register int n __asm__ ("r12") = pn;
209 register int retn __asm__ ("r10") = 0;
210
211 /* The best reason to align src is that we then know that a read-fault
212 was for aligned bytes; there's no 1..3 remaining good bytes to
213 pickle. */
214 if (((unsigned long) src & 3) != 0)
215 {
216 if (((unsigned long) src & 1) && n != 0)
217 {
218 __asm_copy_from_user_1 (dst, src, retn);
219 n--;
220 }
221
222 if (((unsigned long) src & 2) && n >= 2)
223 {
224 __asm_copy_from_user_2 (dst, src, retn);
225 n -= 2;
226 }
227
228 /* We only need one check after the unalignment-adjustments, because
229 if both adjustments were done, either both or neither reference
230 had an exception. */
231 if (retn != 0)
232 goto copy_exception_bytes;
233 }
234
235 /* Decide which copying method to use. */
236 if (n >= 44*2) /* Break even between movem and
237 move16 is at 38.7*2, but modulo 44.
238 FIXME: We use move4 now. */
239 {
240 /* For large copies we use 'movem' */
241
242 /* It is not optimal to tell the compiler about clobbering any
243 registers; that will move the saving/restoring of those registers
244 to the function prologue/epilogue, and make non-movem sizes
245 suboptimal.
246
247 This method is not foolproof; it assumes that the "asm reg"
248 declarations at the beginning of the function really are used
249 here (beware: they may be moved to temporary registers).
250 This way, we do not have to save/move the registers around into
251 temporaries; we can safely use them straight away.
252
253 If you want to check that the allocation was right; then
254 check the equalities in the first comment. It should say
255 "r13=r13, r11=r11, r12=r12" */
256 __asm__ volatile ("
257 .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
258 .err \n\
259 .endif \n\
260
261 ;; Save the registers we'll use in the movem process
262 ;; on the stack.
263 subq 11*4,$sp
264 movem $r10,[$sp]
265
266 ;; Now we've got this:
267 ;; r11 - src
268 ;; r13 - dst
269 ;; r12 - n
270
271 ;; Update n for the first loop
272 subq 44,$r12
2730:
274 movem [$r11+],$r10
2751:
276 subq 44,$r12
277 bge 0b
278 movem $r10,[$r13+]
279
280 addq 44,$r12 ;; compensate for last loop underflowing n
281
282 ;; Restore registers from stack
283 movem [$sp+],$r10
2844:
285 .section .fixup,\"ax\"
286
287;; Do not jump back into the loop if we fail. For some uses, we get a
288;; page fault somewhere on the line. Without checking for page limits,
289;; we don't know where, but we need to copy accurately and keep an
290;; accurate count; not just clear the whole line. To do that, we fall
291;; down in the code below, proceeding with smaller amounts. It should
292;; be kept in mind that we have to cater to code like what at one time
293;; was in fs/super.c:
294;; i = size - copy_from_user((void *)page, data, size);
295;; which would cause repeated faults while clearing the remainder of
296;; the SIZE bytes at PAGE after the first fault.
297;; A caveat here is that we must not fall through from a failing page
298;; to a valid page.
299
3003:
301 movem [$sp+],$r10
302 addq 44,$r12 ;; Get back count before faulting point.
303 subq 44,$r11 ;; Get back pointer to faulting movem-line.
304 jump 4b ;; Fall through, pretending the fault didn't happen.
305
306 .previous
307 .section __ex_table,\"a\"
308 .dword 1b,3b
309 .previous"
310
311 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
312 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
313
314 }
315
316 /* Either we directly start copying here, using dword copying in a loop,
317 or we copy as much as possible with 'movem' and then the last block
318 (<44 bytes) is copied here. This will work since 'movem' will have
319 updated src, dst and n. (Except with failing src.)
320
321 Since we want to keep src accurate, we can't use
322 __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
323 retn, but not src (by design; it's value is ignored elsewhere). */
324
325 while (n >= 4)
326 {
327 __asm_copy_from_user_4 (dst, src, retn);
328 n -= 4;
329
330 if (retn)
331 goto copy_exception_bytes;
332 }
333
334 /* If we get here, there were no memory read faults. */
335 switch (n)
336 {
337 /* These copies are at least "naturally aligned" (so we don't have
338 to check each byte), due to the src alignment code before the
339 movem loop. The *_3 case *will* get the correct count for retn. */
340 case 0:
341 /* This case deliberately left in (if you have doubts check the
342 generated assembly code). */
343 break;
344 case 1:
345 __asm_copy_from_user_1 (dst, src, retn);
346 break;
347 case 2:
348 __asm_copy_from_user_2 (dst, src, retn);
349 break;
350 case 3:
351 __asm_copy_from_user_3 (dst, src, retn);
352 break;
353 }
354
355 /* If we get here, retn correctly reflects the number of failing
356 bytes. */
357 return retn;
358
359copy_exception_bytes:
360 /* We already have "retn" bytes cleared, and need to clear the
361 remaining "n" bytes. A non-optimized simple byte-for-byte in-line
362 memset is preferred here, since this isn't speed-critical code and
363 we'd rather have this a leaf-function than calling memset. */
364 {
365 char *endp;
366 for (endp = dst + n; dst < endp; dst++)
367 *dst = 0;
368 }
369
370 return retn + n;
371}
372
373/* Zero userspace. */
374
375unsigned long
376__do_clear_user (void __user *pto, unsigned long pn)
377{
378 /* We want the parameters put in special registers.
379 Make sure the compiler is able to make something useful of this.
380 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
381
382 FIXME: Comment for old gcc version. Check.
383 If gcc was allright, it really would need no temporaries, and no
384 stack space to save stuff on. */
385
386 register char *dst __asm__ ("r13") = pto;
387 register int n __asm__ ("r12") = pn;
388 register int retn __asm__ ("r10") = 0;
389
390
391 if (((unsigned long) dst & 3) != 0
392 /* Don't align if we wouldn't copy more than a few bytes. */
393 && n >= 3)
394 {
395 if ((unsigned long) dst & 1)
396 {
397 __asm_clear_1 (dst, retn);
398 n--;
399 }
400
401 if ((unsigned long) dst & 2)
402 {
403 __asm_clear_2 (dst, retn);
404 n -= 2;
405 }
406 }
407
408 /* Decide which copying method to use.
409 FIXME: This number is from the "ordinary" kernel memset. */
410 if (n >= (1*48))
411 {
412 /* For large clears we use 'movem' */
413
414 /* It is not optimal to tell the compiler about clobbering any
415 call-saved registers; that will move the saving/restoring of
416 those registers to the function prologue/epilogue, and make
417 non-movem sizes suboptimal.
418
419 This method is not foolproof; it assumes that the "asm reg"
420 declarations at the beginning of the function really are used
421 here (beware: they may be moved to temporary registers).
422 This way, we do not have to save/move the registers around into
423 temporaries; we can safely use them straight away.
424
425 If you want to check that the allocation was right; then
426 check the equalities in the first comment. It should say
427 something like "r13=r13, r11=r11, r12=r12". */
428 __asm__ volatile ("
429 .ifnc %0%1%2,$r13$r12$r10 \n\
430 .err \n\
431 .endif \n\
432
433 ;; Save the registers we'll clobber in the movem process
434 ;; on the stack. Don't mention them to gcc, it will only be
435 ;; upset.
436 subq 11*4,$sp
437 movem $r10,[$sp]
438
439 clear.d $r0
440 clear.d $r1
441 clear.d $r2
442 clear.d $r3
443 clear.d $r4
444 clear.d $r5
445 clear.d $r6
446 clear.d $r7
447 clear.d $r8
448 clear.d $r9
449 clear.d $r10
450 clear.d $r11
451
452 ;; Now we've got this:
453 ;; r13 - dst
454 ;; r12 - n
455
456 ;; Update n for the first loop
457 subq 12*4,$r12
4580:
459 subq 12*4,$r12
460 bge 0b
461 movem $r11,[$r13+]
4621:
463 addq 12*4,$r12 ;; compensate for last loop underflowing n
464
465 ;; Restore registers from stack
466 movem [$sp+],$r10
4672:
468 .section .fixup,\"ax\"
4693:
470 move.d [$sp],$r10
471 addq 12*4,$r10
472 move.d $r10,[$sp]
473 clear.d $r10
474 jump 0b
475
4764:
477 movem [$sp+],$r10
478 addq 12*4,$r10
479 addq 12*4,$r12
480 jump 2b
481
482 .previous
483 .section __ex_table,\"a\"
484 .dword 0b,3b
485 .dword 1b,4b
486 .previous"
487
488 /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
489 /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
490 /* Clobber */ : "r11");
491 }
492
493 while (n >= 16)
494 {
495 __asm_clear_16 (dst, retn);
496 n -= 16;
497 }
498
499 /* Having a separate by-four loops cuts down on cache footprint.
500 FIXME: Test with and without; increasing switch to be 0..15. */
501 while (n >= 4)
502 {
503 __asm_clear_4 (dst, retn);
504 n -= 4;
505 }
506
507 switch (n)
508 {
509 case 0:
510 break;
511 case 1:
512 __asm_clear_1 (dst, retn);
513 break;
514 case 2:
515 __asm_clear_2 (dst, retn);
516 break;
517 case 3:
518 __asm_clear_3 (dst, retn);
519 break;
520 }
521
522 return retn;
523}