diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/cris/arch-v10/lib |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/cris/arch-v10/lib')
-rw-r--r-- | arch/cris/arch-v10/lib/Makefile | 9 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/checksum.S | 124 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/checksumcopy.S | 132 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/csumcpfruser.S | 64 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/dmacopy.c | 43 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/dram_init.S | 205 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/hw_settings.S | 62 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/memset.c | 252 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/old_checksum.c | 85 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/string.c | 225 | ||||
-rw-r--r-- | arch/cris/arch-v10/lib/usercopy.c | 523 |
11 files changed, 1724 insertions, 0 deletions
diff --git a/arch/cris/arch-v10/lib/Makefile b/arch/cris/arch-v10/lib/Makefile new file mode 100644 index 000000000000..36e9a9c5239b --- /dev/null +++ b/arch/cris/arch-v10/lib/Makefile | |||
@@ -0,0 +1,9 @@ | |||
1 | # | ||
2 | # Makefile for Etrax-specific library files.. | ||
3 | # | ||
4 | |||
5 | |||
6 | EXTRA_AFLAGS := -traditional | ||
7 | |||
8 | lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o | ||
9 | |||
diff --git a/arch/cris/arch-v10/lib/checksum.S b/arch/cris/arch-v10/lib/checksum.S new file mode 100644 index 000000000000..85c48f0a9ec2 --- /dev/null +++ b/arch/cris/arch-v10/lib/checksum.S | |||
@@ -0,0 +1,124 @@ | |||
1 | /* $Id: checksum.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $ | ||
2 | * A fast checksum routine using movem | ||
3 | * Copyright (c) 1998-2001 Axis Communications AB | ||
4 | * | ||
5 | * csum_partial(const unsigned char * buff, int len, unsigned int sum) | ||
6 | */ | ||
7 | |||
8 | .globl csum_partial | ||
9 | csum_partial: | ||
10 | |||
11 | ;; r10 - src | ||
12 | ;; r11 - length | ||
13 | ;; r12 - checksum | ||
14 | |||
15 | ;; check for breakeven length between movem and normal word looping versions | ||
16 | ;; we also do _NOT_ want to compute a checksum over more than the | ||
17 | ;; actual length when length < 40 | ||
18 | |||
19 | cmpu.w 80,$r11 | ||
20 | blo _word_loop | ||
21 | nop | ||
22 | |||
23 | ;; need to save the registers we use below in the movem loop | ||
24 | ;; this overhead is why we have a check above for breakeven length | ||
25 | ;; only r0 - r8 have to be saved, the other ones are clobber-able | ||
26 | ;; according to the ABI | ||
27 | |||
28 | subq 9*4,$sp | ||
29 | movem $r8,[$sp] | ||
30 | |||
31 | ;; do a movem checksum | ||
32 | |||
33 | subq 10*4,$r11 ; update length for the first loop | ||
34 | |||
35 | _mloop: movem [$r10+],$r9 ; read 10 longwords | ||
36 | |||
37 | ;; perform dword checksumming on the 10 longwords | ||
38 | |||
39 | add.d $r0,$r12 | ||
40 | ax | ||
41 | add.d $r1,$r12 | ||
42 | ax | ||
43 | add.d $r2,$r12 | ||
44 | ax | ||
45 | add.d $r3,$r12 | ||
46 | ax | ||
47 | add.d $r4,$r12 | ||
48 | ax | ||
49 | add.d $r5,$r12 | ||
50 | ax | ||
51 | add.d $r6,$r12 | ||
52 | ax | ||
53 | add.d $r7,$r12 | ||
54 | ax | ||
55 | add.d $r8,$r12 | ||
56 | ax | ||
57 | add.d $r9,$r12 | ||
58 | |||
59 | ;; fold the carry into the checksum, to avoid having to loop the carry | ||
60 | ;; back into the top | ||
61 | |||
62 | ax | ||
63 | addq 0,$r12 | ||
64 | ax ; do it again, since we might have generated a carry | ||
65 | addq 0,$r12 | ||
66 | |||
67 | subq 10*4,$r11 | ||
68 | bge _mloop | ||
69 | nop | ||
70 | |||
71 | addq 10*4,$r11 ; compensate for last loop underflowing length | ||
72 | |||
73 | movem [$sp+],$r8 ; restore regs | ||
74 | |||
75 | _word_loop: | ||
76 | ;; only fold if there is anything to fold. | ||
77 | |||
78 | cmpq 0,$r12 | ||
79 | beq _no_fold | ||
80 | |||
81 | ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below. | ||
82 | ;; r9 and r13 can be used as temporaries. | ||
83 | |||
84 | moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9 | ||
85 | lsrq 16,$r9 | ||
86 | |||
87 | move.d $r12,$r13 | ||
88 | lsrq 16,$r13 ; r13 = checksum >> 16 | ||
89 | and.d $r9,$r12 ; checksum = checksum & 0xffff | ||
90 | add.d $r13,$r12 ; checksum += r13 | ||
91 | move.d $r12,$r13 ; do the same again, maybe we got a carry last add | ||
92 | lsrq 16,$r13 | ||
93 | and.d $r9,$r12 | ||
94 | add.d $r13,$r12 | ||
95 | |||
96 | _no_fold: | ||
97 | cmpq 2,$r11 | ||
98 | blt _no_words | ||
99 | nop | ||
100 | |||
101 | ;; checksum the rest of the words | ||
102 | |||
103 | subq 2,$r11 | ||
104 | |||
105 | _wloop: subq 2,$r11 | ||
106 | bge _wloop | ||
107 | addu.w [$r10+],$r12 | ||
108 | |||
109 | addq 2,$r11 | ||
110 | |||
111 | _no_words: | ||
112 | ;; see if we have one odd byte more | ||
113 | cmpq 1,$r11 | ||
114 | beq _do_byte | ||
115 | nop | ||
116 | ret | ||
117 | move.d $r12, $r10 | ||
118 | |||
119 | _do_byte: | ||
120 | ;; copy and checksum the last byte | ||
121 | addu.b [$r10],$r12 | ||
122 | ret | ||
123 | move.d $r12, $r10 | ||
124 | |||
diff --git a/arch/cris/arch-v10/lib/checksumcopy.S b/arch/cris/arch-v10/lib/checksumcopy.S new file mode 100644 index 000000000000..35cbffb306fd --- /dev/null +++ b/arch/cris/arch-v10/lib/checksumcopy.S | |||
@@ -0,0 +1,132 @@ | |||
1 | /* $Id: checksumcopy.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $ | ||
2 | * A fast checksum+copy routine using movem | ||
3 | * Copyright (c) 1998, 2001 Axis Communications AB | ||
4 | * | ||
5 | * Authors: Bjorn Wesen | ||
6 | * | ||
7 | * csum_partial_copy_nocheck(const char *src, char *dst, | ||
8 | * int len, unsigned int sum) | ||
9 | */ | ||
10 | |||
11 | .globl csum_partial_copy_nocheck | ||
12 | csum_partial_copy_nocheck: | ||
13 | |||
14 | ;; r10 - src | ||
15 | ;; r11 - dst | ||
16 | ;; r12 - length | ||
17 | ;; r13 - checksum | ||
18 | |||
19 | ;; check for breakeven length between movem and normal word looping versions | ||
20 | ;; we also do _NOT_ want to compute a checksum over more than the | ||
21 | ;; actual length when length < 40 | ||
22 | |||
23 | cmpu.w 80, $r12 | ||
24 | blo _word_loop | ||
25 | nop | ||
26 | |||
27 | ;; need to save the registers we use below in the movem loop | ||
28 | ;; this overhead is why we have a check above for breakeven length | ||
29 | ;; only r0 - r8 have to be saved, the other ones are clobber-able | ||
30 | ;; according to the ABI | ||
31 | |||
32 | subq 9*4, $sp | ||
33 | movem $r8, [$sp] | ||
34 | |||
35 | ;; do a movem copy and checksum | ||
36 | |||
37 | subq 10*4, $r12 ; update length for the first loop | ||
38 | |||
39 | _mloop: movem [$r10+],$r9 ; read 10 longwords | ||
40 | 1: ;; A failing userspace access will have this as PC. | ||
41 | movem $r9,[$r11+] ; write 10 longwords | ||
42 | |||
43 | ;; perform dword checksumming on the 10 longwords | ||
44 | |||
45 | add.d $r0,$r13 | ||
46 | ax | ||
47 | add.d $r1,$r13 | ||
48 | ax | ||
49 | add.d $r2,$r13 | ||
50 | ax | ||
51 | add.d $r3,$r13 | ||
52 | ax | ||
53 | add.d $r4,$r13 | ||
54 | ax | ||
55 | add.d $r5,$r13 | ||
56 | ax | ||
57 | add.d $r6,$r13 | ||
58 | ax | ||
59 | add.d $r7,$r13 | ||
60 | ax | ||
61 | add.d $r8,$r13 | ||
62 | ax | ||
63 | add.d $r9,$r13 | ||
64 | |||
65 | ;; fold the carry into the checksum, to avoid having to loop the carry | ||
66 | ;; back into the top | ||
67 | |||
68 | ax | ||
69 | addq 0,$r13 | ||
70 | ax ; do it again, since we might have generated a carry | ||
71 | addq 0,$r13 | ||
72 | |||
73 | subq 10*4,$r12 | ||
74 | bge _mloop | ||
75 | nop | ||
76 | |||
77 | addq 10*4,$r12 ; compensate for last loop underflowing length | ||
78 | |||
79 | movem [$sp+],$r8 ; restore regs | ||
80 | |||
81 | _word_loop: | ||
82 | ;; only fold if there is anything to fold. | ||
83 | |||
84 | cmpq 0,$r13 | ||
85 | beq _no_fold | ||
86 | |||
87 | ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below | ||
88 | ;; r9 can be used as temporary. | ||
89 | |||
90 | move.d $r13,$r9 | ||
91 | lsrq 16,$r9 ; r0 = checksum >> 16 | ||
92 | and.d 0xffff,$r13 ; checksum = checksum & 0xffff | ||
93 | add.d $r9,$r13 ; checksum += r0 | ||
94 | move.d $r13,$r9 ; do the same again, maybe we got a carry last add | ||
95 | lsrq 16,$r9 | ||
96 | and.d 0xffff,$r13 | ||
97 | add.d $r9,$r13 | ||
98 | |||
99 | _no_fold: | ||
100 | cmpq 2,$r12 | ||
101 | blt _no_words | ||
102 | nop | ||
103 | |||
104 | ;; copy and checksum the rest of the words | ||
105 | |||
106 | subq 2,$r12 | ||
107 | |||
108 | _wloop: move.w [$r10+],$r9 | ||
109 | 2: ;; A failing userspace access will have this as PC. | ||
110 | addu.w $r9,$r13 | ||
111 | subq 2,$r12 | ||
112 | bge _wloop | ||
113 | move.w $r9,[$r11+] | ||
114 | |||
115 | addq 2,$r12 | ||
116 | |||
117 | _no_words: | ||
118 | ;; see if we have one odd byte more | ||
119 | cmpq 1,$r12 | ||
120 | beq _do_byte | ||
121 | nop | ||
122 | ret | ||
123 | move.d $r13, $r10 | ||
124 | |||
125 | _do_byte: | ||
126 | ;; copy and checksum the last byte | ||
127 | move.b [$r10],$r9 | ||
128 | 3: ;; A failing userspace access will have this as PC. | ||
129 | addu.b $r9,$r13 | ||
130 | move.b $r9,[$r11] | ||
131 | ret | ||
132 | move.d $r13, $r10 | ||
diff --git a/arch/cris/arch-v10/lib/csumcpfruser.S b/arch/cris/arch-v10/lib/csumcpfruser.S new file mode 100644 index 000000000000..5f41ccd62754 --- /dev/null +++ b/arch/cris/arch-v10/lib/csumcpfruser.S | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into | ||
3 | * csum_partial_copy_from_user by adding exception records. | ||
4 | * | ||
5 | * Copyright (C) 2001 Axis Communications AB. | ||
6 | * | ||
7 | * Author: Hans-Peter Nilsson. | ||
8 | */ | ||
9 | |||
10 | #include <asm/errno.h> | ||
11 | |||
12 | /* Same function body, but a different name. If we just added exception | ||
13 | records to _csum_partial_copy_nocheck and made it generic, we wouldn't | ||
14 | know a user fault from a kernel fault and we would have overhead in | ||
15 | each kernel caller for the error-pointer argument. | ||
16 | |||
17 | unsigned int csum_partial_copy_from_user | ||
18 | (const char *src, char *dst, int len, unsigned int sum, int *errptr); | ||
19 | |||
20 | Note that the errptr argument is only set if we encounter an error. | ||
21 | It is conveniently located on the stack, so the normal function body | ||
22 | does not have to handle it. */ | ||
23 | |||
24 | #define csum_partial_copy_nocheck csum_partial_copy_from_user | ||
25 | |||
26 | /* There are local labels numbered 1, 2 and 3 present to mark the | ||
27 | different from-user accesses. */ | ||
28 | #include "checksumcopy.S" | ||
29 | |||
30 | .section .fixup,"ax" | ||
31 | |||
32 | ;; Here from the movem loop; restore stack. | ||
33 | 4: | ||
34 | movem [$sp+],$r8 | ||
35 | ;; r12 is already decremented. Add back chunk_size-2. | ||
36 | addq 40-2,$r12 | ||
37 | |||
38 | ;; Here from the word loop; r12 is off by 2; add it back. | ||
39 | 5: | ||
40 | addq 2,$r12 | ||
41 | |||
42 | ;; Here from a failing single byte. | ||
43 | 6: | ||
44 | |||
45 | ;; Signal in *errptr that we had a failing access. | ||
46 | moveq -EFAULT,$r9 | ||
47 | move.d $r9,[[$sp]] | ||
48 | |||
49 | ;; Clear the rest of the destination area using memset. Preserve the | ||
50 | ;; checksum for the readable bytes. | ||
51 | push $srp | ||
52 | push $r13 | ||
53 | move.d $r11,$r10 | ||
54 | clear.d $r11 | ||
55 | jsr memset | ||
56 | pop $r10 | ||
57 | jump [$sp+] | ||
58 | |||
59 | .previous | ||
60 | .section __ex_table,"a" | ||
61 | .dword 1b,4b | ||
62 | .dword 2b,5b | ||
63 | .dword 3b,6b | ||
64 | .previous | ||
diff --git a/arch/cris/arch-v10/lib/dmacopy.c b/arch/cris/arch-v10/lib/dmacopy.c new file mode 100644 index 000000000000..e5fb44f505c5 --- /dev/null +++ b/arch/cris/arch-v10/lib/dmacopy.c | |||
@@ -0,0 +1,43 @@ | |||
1 | /* $Id: dmacopy.c,v 1.1 2001/12/17 13:59:27 bjornw Exp $ | ||
2 | * | ||
3 | * memcpy for large blocks, using memory-memory DMA channels 6 and 7 in Etrax | ||
4 | */ | ||
5 | |||
6 | #include <asm/svinto.h> | ||
7 | #include <asm/io.h> | ||
8 | |||
9 | #define D(x) | ||
10 | |||
11 | void *dma_memcpy(void *pdst, | ||
12 | const void *psrc, | ||
13 | unsigned int pn) | ||
14 | { | ||
15 | static etrax_dma_descr indma, outdma; | ||
16 | |||
17 | D(printk("dma_memcpy %d bytes... ", pn)); | ||
18 | |||
19 | #if 0 | ||
20 | *R_GEN_CONFIG = genconfig_shadow = | ||
21 | (genconfig_shadow & ~0x3c0000) | | ||
22 | IO_STATE(R_GEN_CONFIG, dma6, intdma7) | | ||
23 | IO_STATE(R_GEN_CONFIG, dma7, intdma6); | ||
24 | #endif | ||
25 | indma.sw_len = outdma.sw_len = pn; | ||
26 | indma.ctrl = d_eol | d_eop; | ||
27 | outdma.ctrl = d_eol; | ||
28 | indma.buf = psrc; | ||
29 | outdma.buf = pdst; | ||
30 | |||
31 | *R_DMA_CH6_FIRST = &indma; | ||
32 | *R_DMA_CH7_FIRST = &outdma; | ||
33 | *R_DMA_CH6_CMD = IO_STATE(R_DMA_CH6_CMD, cmd, start); | ||
34 | *R_DMA_CH7_CMD = IO_STATE(R_DMA_CH7_CMD, cmd, start); | ||
35 | |||
36 | while(*R_DMA_CH7_CMD == 1) /* wait for completion */ ; | ||
37 | |||
38 | D(printk("done\n")); | ||
39 | |||
40 | } | ||
41 | |||
42 | |||
43 | |||
diff --git a/arch/cris/arch-v10/lib/dram_init.S b/arch/cris/arch-v10/lib/dram_init.S new file mode 100644 index 000000000000..2ef4ad5706ef --- /dev/null +++ b/arch/cris/arch-v10/lib/dram_init.S | |||
@@ -0,0 +1,205 @@ | |||
1 | /* $Id: dram_init.S,v 1.4 2003/09/22 09:21:59 starvik Exp $ | ||
2 | * | ||
3 | * DRAM/SDRAM initialization - alter with care | ||
4 | * This file is intended to be included from other assembler files | ||
5 | * | ||
6 | * Note: This file may not modify r9 because r9 is used to carry | ||
7 | * information from the decompresser to the kernel | ||
8 | * | ||
9 | * Copyright (C) 2000, 2001 Axis Communications AB | ||
10 | * | ||
11 | * Authors: Mikael Starvik (starvik@axis.com) | ||
12 | * | ||
13 | * $Log: dram_init.S,v $ | ||
14 | * Revision 1.4 2003/09/22 09:21:59 starvik | ||
15 | * Decompresser is linked to 0x407xxxxx and sdram commands are at 0x000xxxxx | ||
16 | * so we need to mask off 12 bits. | ||
17 | * | ||
18 | * Revision 1.3 2003/03/31 09:38:37 starvik | ||
19 | * Corrected calculation of end of sdram init commands | ||
20 | * | ||
21 | * Revision 1.2 2002/11/19 13:33:29 starvik | ||
22 | * Changes from Linux 2.4 | ||
23 | * | ||
24 | * Revision 1.13 2002/10/30 07:42:28 starvik | ||
25 | * Always read SDRAM command sequence from flash | ||
26 | * | ||
27 | * Revision 1.12 2002/08/09 11:37:37 orjanf | ||
28 | * Added double initialization work-around for Samsung SDRAMs. | ||
29 | * | ||
30 | * Revision 1.11 2002/06/04 11:43:21 starvik | ||
31 | * Check if mrs_data is specified in kernelconfig (necessary for MCM) | ||
32 | * | ||
33 | * Revision 1.10 2001/10/04 12:00:21 martinnn | ||
34 | * Added missing underscores. | ||
35 | * | ||
36 | * Revision 1.9 2001/10/01 14:47:35 bjornw | ||
37 | * Added register prefixes and removed underscores | ||
38 | * | ||
39 | * Revision 1.8 2001/05/15 07:12:45 hp | ||
40 | * Copy warning from head.S about r8 and r9 | ||
41 | * | ||
42 | * Revision 1.7 2001/04/18 12:05:39 bjornw | ||
43 | * Fixed comments, and explicitely include config.h to be sure its there | ||
44 | * | ||
45 | * Revision 1.6 2001/04/10 06:20:16 starvik | ||
46 | * Delay should be 200us, not 200ns | ||
47 | * | ||
48 | * Revision 1.5 2001/04/09 06:01:13 starvik | ||
49 | * Added support for 100 MHz SDRAMs | ||
50 | * | ||
51 | * Revision 1.4 2001/03/26 14:24:01 bjornw | ||
52 | * Namechange of some config options | ||
53 | * | ||
54 | * Revision 1.3 2001/03/23 08:29:41 starvik | ||
55 | * Corrected calculation of mrs_data | ||
56 | * | ||
57 | * Revision 1.2 2001/02/08 15:20:00 starvik | ||
58 | * Corrected SDRAM initialization | ||
59 | * Should now be included as inline | ||
60 | * | ||
61 | * Revision 1.1 2001/01/29 13:08:02 starvik | ||
62 | * Initial version | ||
63 | * This file should be included from all assembler files that needs to | ||
64 | * initialize DRAM/SDRAM. | ||
65 | * | ||
66 | */ | ||
67 | |||
68 | /* Just to be certain the config file is included, we include it here | ||
69 | * explicitely instead of depending on it being included in the file that | ||
70 | * uses this code. | ||
71 | */ | ||
72 | |||
73 | #include <linux/config.h> | ||
74 | |||
75 | ;; WARNING! The registers r8 and r9 are used as parameters carrying | ||
76 | ;; information from the decompressor (if the kernel was compressed). | ||
77 | ;; They should not be used in the code below. | ||
78 | |||
79 | #ifndef CONFIG_SVINTO_SIM | ||
80 | move.d CONFIG_ETRAX_DEF_R_WAITSTATES, $r0 | ||
81 | move.d $r0, [R_WAITSTATES] | ||
82 | |||
83 | move.d CONFIG_ETRAX_DEF_R_BUS_CONFIG, $r0 | ||
84 | move.d $r0, [R_BUS_CONFIG] | ||
85 | |||
86 | #ifndef CONFIG_ETRAX_SDRAM | ||
87 | move.d CONFIG_ETRAX_DEF_R_DRAM_CONFIG, $r0 | ||
88 | move.d $r0, [R_DRAM_CONFIG] | ||
89 | |||
90 | move.d CONFIG_ETRAX_DEF_R_DRAM_TIMING, $r0 | ||
91 | move.d $r0, [R_DRAM_TIMING] | ||
92 | #else | ||
93 | ;; Samsung SDRAMs seem to require to be initialized twice to work properly. | ||
94 | moveq 2, $r6 | ||
95 | _sdram_init: | ||
96 | |||
97 | ; Refer to ETRAX 100LX Designers Reference for a description of SDRAM initialization | ||
98 | |||
99 | ; Bank configuration | ||
100 | move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r0 | ||
101 | move.d $r0, [R_SDRAM_CONFIG] | ||
102 | |||
103 | ; Calculate value of mrs_data | ||
104 | ; CAS latency = 2 && bus_width = 32 => 0x40 | ||
105 | ; CAS latency = 3 && bus_width = 32 => 0x60 | ||
106 | ; CAS latency = 2 && bus_width = 16 => 0x20 | ||
107 | ; CAS latency = 3 && bus_width = 16 => 0x30 | ||
108 | |||
109 | ; Check if value is already supplied in kernel config | ||
110 | move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r2 | ||
111 | and.d 0x00ff0000, $r2 | ||
112 | bne _set_timing | ||
113 | lsrq 16, $r2 | ||
114 | |||
115 | move.d 0x40, $r2 ; Assume 32 bits and CAS latency = 2 | ||
116 | move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1 | ||
117 | move.d $r1, $r3 | ||
118 | and.d 0x03, $r1 ; Get CAS latency | ||
119 | and.d 0x1000, $r3 ; 50 or 100 MHz? | ||
120 | beq _speed_50 | ||
121 | nop | ||
122 | _speed_100: | ||
123 | cmp.d 0x00, $r1 ; CAS latency = 2? | ||
124 | beq _bw_check | ||
125 | nop | ||
126 | or.d 0x20, $r2 ; CAS latency = 3 | ||
127 | ba _bw_check | ||
128 | nop | ||
129 | _speed_50: | ||
130 | cmp.d 0x01, $r1 ; CAS latency = 2? | ||
131 | beq _bw_check | ||
132 | nop | ||
133 | or.d 0x20, $r2 ; CAS latency = 3 | ||
134 | _bw_check: | ||
135 | move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r1 | ||
136 | and.d 0x800000, $r1 ; DRAM width is bit 23 | ||
137 | bne _set_timing | ||
138 | nop | ||
139 | lsrq 1, $r2 ; 16 bits. Shift down value. | ||
140 | |||
141 | ; Set timing parameters. Starts master clock | ||
142 | _set_timing: | ||
143 | move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1 | ||
144 | and.d 0x8000f9ff, $r1 ; Make sure mrs data and command is 0 | ||
145 | or.d 0x80000000, $r1 ; Make sure sdram enable bit is set | ||
146 | move.d $r1, $r5 | ||
147 | or.d 0x0000c000, $r1 ; ref = disable | ||
148 | lslq 16, $r2 ; mrs data starts at bit 16 | ||
149 | or.d $r2, $r1 | ||
150 | move.d $r1, [R_SDRAM_TIMING] | ||
151 | |||
152 | ; Wait 200us | ||
153 | move.d 10000, $r2 | ||
154 | 1: bne 1b | ||
155 | subq 1, $r2 | ||
156 | |||
157 | ; Issue initialization command sequence | ||
158 | move.d _sdram_commands_start, $r2 | ||
159 | and.d 0x000fffff, $r2 ; Make sure commands are read from flash | ||
160 | move.d _sdram_commands_end, $r3 | ||
161 | and.d 0x000fffff, $r3 | ||
162 | 1: clear.d $r4 | ||
163 | move.b [$r2+], $r4 | ||
164 | lslq 9, $r4 ; Command starts at bit 9 | ||
165 | or.d $r1, $r4 | ||
166 | move.d $r4, [R_SDRAM_TIMING] | ||
167 | nop ; Wait five nop cycles between each command | ||
168 | nop | ||
169 | nop | ||
170 | nop | ||
171 | nop | ||
172 | cmp.d $r2, $r3 | ||
173 | bne 1b | ||
174 | nop | ||
175 | move.d $r5, [R_SDRAM_TIMING] | ||
176 | subq 1, $r6 | ||
177 | bne _sdram_init | ||
178 | nop | ||
179 | ba _sdram_commands_end | ||
180 | nop | ||
181 | |||
182 | _sdram_commands_start: | ||
183 | .byte 3 ; Precharge | ||
184 | .byte 0 ; nop | ||
185 | .byte 2 ; refresh | ||
186 | .byte 0 ; nop | ||
187 | .byte 2 ; refresh | ||
188 | .byte 0 ; nop | ||
189 | .byte 2 ; refresh | ||
190 | .byte 0 ; nop | ||
191 | .byte 2 ; refresh | ||
192 | .byte 0 ; nop | ||
193 | .byte 2 ; refresh | ||
194 | .byte 0 ; nop | ||
195 | .byte 2 ; refresh | ||
196 | .byte 0 ; nop | ||
197 | .byte 2 ; refresh | ||
198 | .byte 0 ; nop | ||
199 | .byte 2 ; refresh | ||
200 | .byte 0 ; nop | ||
201 | .byte 1 ; mrs | ||
202 | .byte 0 ; nop | ||
203 | _sdram_commands_end: | ||
204 | #endif | ||
205 | #endif | ||
diff --git a/arch/cris/arch-v10/lib/hw_settings.S b/arch/cris/arch-v10/lib/hw_settings.S new file mode 100644 index 000000000000..56905aaa7b6e --- /dev/null +++ b/arch/cris/arch-v10/lib/hw_settings.S | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * $Id: hw_settings.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $ | ||
3 | * | ||
4 | * This table is used by some tools to extract hardware parameters. | ||
5 | * The table should be included in the kernel and the decompressor. | ||
6 | * Don't forget to update the tools if you change this table. | ||
7 | * | ||
8 | * Copyright (C) 2001 Axis Communications AB | ||
9 | * | ||
10 | * Authors: Mikael Starvik (starvik@axis.com) | ||
11 | */ | ||
12 | |||
13 | #define PA_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PA_DIR << 8) | \ | ||
14 | (CONFIG_ETRAX_DEF_R_PORT_PA_DATA)) | ||
15 | #define PB_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PB_CONFIG << 16) | \ | ||
16 | (CONFIG_ETRAX_DEF_R_PORT_PB_DIR << 8) | \ | ||
17 | (CONFIG_ETRAX_DEF_R_PORT_PB_DATA)) | ||
18 | |||
19 | .ascii "HW_PARAM_MAGIC" ; Magic number | ||
20 | .dword 0xc0004000 ; Kernel start address | ||
21 | |||
22 | ; Debug port | ||
23 | #ifdef CONFIG_ETRAX_DEBUG_PORT0 | ||
24 | .dword 0 | ||
25 | #elif defined(CONFIG_ETRAX_DEBUG_PORT1) | ||
26 | .dword 1 | ||
27 | #elif defined(CONFIG_ETRAX_DEBUG_PORT2) | ||
28 | .dword 2 | ||
29 | #elif defined(CONFIG_ETRAX_DEBUG_PORT3) | ||
30 | .dword 3 | ||
31 | #else | ||
32 | .dword 4 ; No debug | ||
33 | #endif | ||
34 | |||
35 | ; SDRAM or EDO DRAM? | ||
36 | #ifdef CONFIG_ETRAX_SDRAM | ||
37 | .dword 1 | ||
38 | #else | ||
39 | .dword 0 | ||
40 | #endif | ||
41 | |||
42 | ; Register values | ||
43 | .dword R_WAITSTATES | ||
44 | .dword CONFIG_ETRAX_DEF_R_WAITSTATES | ||
45 | .dword R_BUS_CONFIG | ||
46 | .dword CONFIG_ETRAX_DEF_R_BUS_CONFIG | ||
47 | #ifdef CONFIG_ETRAX_SDRAM | ||
48 | .dword R_SDRAM_CONFIG | ||
49 | .dword CONFIG_ETRAX_DEF_R_SDRAM_CONFIG | ||
50 | .dword R_SDRAM_TIMING | ||
51 | .dword CONFIG_ETRAX_DEF_R_SDRAM_TIMING | ||
52 | #else | ||
53 | .dword R_DRAM_CONFIG | ||
54 | .dword CONFIG_ETRAX_DEF_R_DRAM_CONFIG | ||
55 | .dword R_DRAM_TIMING | ||
56 | .dword CONFIG_ETRAX_DEF_R_DRAM_TIMING | ||
57 | #endif | ||
58 | .dword R_PORT_PA_SET | ||
59 | .dword PA_SET_VALUE | ||
60 | .dword R_PORT_PB_SET | ||
61 | .dword PB_SET_VALUE | ||
62 | .dword 0 ; No more register values | ||
diff --git a/arch/cris/arch-v10/lib/memset.c b/arch/cris/arch-v10/lib/memset.c new file mode 100644 index 000000000000..82bb66839171 --- /dev/null +++ b/arch/cris/arch-v10/lib/memset.c | |||
@@ -0,0 +1,252 @@ | |||
1 | /*#************************************************************************#*/ | ||
2 | /*#-------------------------------------------------------------------------*/ | ||
3 | /*# */ | ||
4 | /*# FUNCTION NAME: memset() */ | ||
5 | /*# */ | ||
6 | /*# PARAMETERS: void* dst; Destination address. */ | ||
7 | /*# int c; Value of byte to write. */ | ||
8 | /*# int len; Number of bytes to write. */ | ||
9 | /*# */ | ||
10 | /*# RETURNS: dst. */ | ||
11 | /*# */ | ||
12 | /*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */ | ||
13 | /*# Framework taken from memcpy. This routine is */ | ||
14 | /*# very sensitive to compiler changes in register allocation. */ | ||
15 | /*# Should really be rewritten to avoid this problem. */ | ||
16 | /*# */ | ||
17 | /*#-------------------------------------------------------------------------*/ | ||
18 | /*# */ | ||
19 | /*# HISTORY */ | ||
20 | /*# */ | ||
21 | /*# DATE NAME CHANGES */ | ||
22 | /*# ---- ---- ------- */ | ||
23 | /*# 990713 HP Tired of watching this function (or */ | ||
24 | /*# really, the nonoptimized generic */ | ||
25 | /*# implementation) take up 90% of simulator */ | ||
26 | /*# output. Measurements needed. */ | ||
27 | /*# */ | ||
28 | /*#-------------------------------------------------------------------------*/ | ||
29 | |||
30 | #include <linux/types.h> | ||
31 | |||
32 | /* No, there's no macro saying 12*4, since it is "hard" to get it into | ||
33 | the asm in a good way. Thus better to expose the problem everywhere. | ||
34 | */ | ||
35 | |||
36 | /* Assuming 1 cycle per dword written or read (ok, not really true), and | ||
37 | one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1) | ||
38 | so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */ | ||
39 | |||
40 | #define ZERO_BLOCK_SIZE (1*12*4) | ||
41 | |||
42 | void *memset(void *pdst, | ||
43 | int c, | ||
44 | size_t plen) | ||
45 | { | ||
46 | /* Ok. Now we want the parameters put in special registers. | ||
47 | Make sure the compiler is able to make something useful of this. */ | ||
48 | |||
49 | register char *return_dst __asm__ ("r10") = pdst; | ||
50 | register int n __asm__ ("r12") = plen; | ||
51 | register int lc __asm__ ("r11") = c; | ||
52 | |||
53 | /* Most apps use memset sanely. Only those memsetting about 3..4 | ||
54 | bytes or less get penalized compared to the generic implementation | ||
55 | - and that's not really sane use. */ | ||
56 | |||
57 | /* Ugh. This is fragile at best. Check with newer GCC releases, if | ||
58 | they compile cascaded "x |= x << 8" sanely! */ | ||
59 | __asm__("movu.b %0,$r13\n\t" | ||
60 | "lslq 8,$r13\n\t" | ||
61 | "move.b %0,$r13\n\t" | ||
62 | "move.d $r13,%0\n\t" | ||
63 | "lslq 16,$r13\n\t" | ||
64 | "or.d $r13,%0" | ||
65 | : "=r" (lc) : "0" (lc) : "r13"); | ||
66 | |||
67 | { | ||
68 | register char *dst __asm__ ("r13") = pdst; | ||
69 | |||
70 | /* This is NONPORTABLE, but since this whole routine is */ | ||
71 | /* grossly nonportable that doesn't matter. */ | ||
72 | |||
73 | if (((unsigned long) pdst & 3) != 0 | ||
74 | /* Oops! n=0 must be a legal call, regardless of alignment. */ | ||
75 | && n >= 3) | ||
76 | { | ||
77 | if ((unsigned long)dst & 1) | ||
78 | { | ||
79 | *dst = (char) lc; | ||
80 | n--; | ||
81 | dst++; | ||
82 | } | ||
83 | |||
84 | if ((unsigned long)dst & 2) | ||
85 | { | ||
86 | *(short *)dst = lc; | ||
87 | n -= 2; | ||
88 | dst += 2; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | /* Now the fun part. For the threshold value of this, check the equation | ||
93 | above. */ | ||
94 | /* Decide which copying method to use. */ | ||
95 | if (n >= ZERO_BLOCK_SIZE) | ||
96 | { | ||
97 | /* For large copies we use 'movem' */ | ||
98 | |||
99 | /* It is not optimal to tell the compiler about clobbering any | ||
100 | registers; that will move the saving/restoring of those registers | ||
101 | to the function prologue/epilogue, and make non-movem sizes | ||
102 | suboptimal. | ||
103 | |||
104 | This method is not foolproof; it assumes that the "asm reg" | ||
105 | declarations at the beginning of the function really are used | ||
106 | here (beware: they may be moved to temporary registers). | ||
107 | This way, we do not have to save/move the registers around into | ||
108 | temporaries; we can safely use them straight away. | ||
109 | |||
110 | If you want to check that the allocation was right; then | ||
111 | check the equalities in the first comment. It should say | ||
112 | "r13=r13, r12=r12, r11=r11" */ | ||
113 | __asm__ volatile (" | ||
114 | ;; Check that the following is true (same register names on | ||
115 | ;; both sides of equal sign, as in r8=r8): | ||
116 | ;; %0=r13, %1=r12, %4=r11 | ||
117 | ;; | ||
118 | ;; Save the registers we'll clobber in the movem process | ||
119 | ;; on the stack. Don't mention them to gcc, it will only be | ||
120 | ;; upset. | ||
121 | subq 11*4,$sp | ||
122 | movem $r10,[$sp] | ||
123 | |||
124 | move.d $r11,$r0 | ||
125 | move.d $r11,$r1 | ||
126 | move.d $r11,$r2 | ||
127 | move.d $r11,$r3 | ||
128 | move.d $r11,$r4 | ||
129 | move.d $r11,$r5 | ||
130 | move.d $r11,$r6 | ||
131 | move.d $r11,$r7 | ||
132 | move.d $r11,$r8 | ||
133 | move.d $r11,$r9 | ||
134 | move.d $r11,$r10 | ||
135 | |||
136 | ;; Now we've got this: | ||
137 | ;; r13 - dst | ||
138 | ;; r12 - n | ||
139 | |||
140 | ;; Update n for the first loop | ||
141 | subq 12*4,$r12 | ||
142 | 0: | ||
143 | subq 12*4,$r12 | ||
144 | bge 0b | ||
145 | movem $r11,[$r13+] | ||
146 | |||
147 | addq 12*4,$r12 ;; compensate for last loop underflowing n | ||
148 | |||
149 | ;; Restore registers from stack | ||
150 | movem [$sp+],$r10" | ||
151 | |||
152 | /* Outputs */ : "=r" (dst), "=r" (n) | ||
153 | /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); | ||
154 | |||
155 | } | ||
156 | |||
157 | /* Either we directly starts copying, using dword copying | ||
158 | in a loop, or we copy as much as possible with 'movem' | ||
159 | and then the last block (<44 bytes) is copied here. | ||
160 | This will work since 'movem' will have updated src,dst,n. */ | ||
161 | |||
162 | while ( n >= 16 ) | ||
163 | { | ||
164 | *((long*)dst)++ = lc; | ||
165 | *((long*)dst)++ = lc; | ||
166 | *((long*)dst)++ = lc; | ||
167 | *((long*)dst)++ = lc; | ||
168 | n -= 16; | ||
169 | } | ||
170 | |||
171 | /* A switch() is definitely the fastest although it takes a LOT of code. | ||
172 | * Particularly if you inline code this. | ||
173 | */ | ||
174 | switch (n) | ||
175 | { | ||
176 | case 0: | ||
177 | break; | ||
178 | case 1: | ||
179 | *(char*)dst = (char) lc; | ||
180 | break; | ||
181 | case 2: | ||
182 | *(short*)dst = (short) lc; | ||
183 | break; | ||
184 | case 3: | ||
185 | *((short*)dst)++ = (short) lc; | ||
186 | *(char*)dst = (char) lc; | ||
187 | break; | ||
188 | case 4: | ||
189 | *((long*)dst)++ = lc; | ||
190 | break; | ||
191 | case 5: | ||
192 | *((long*)dst)++ = lc; | ||
193 | *(char*)dst = (char) lc; | ||
194 | break; | ||
195 | case 6: | ||
196 | *((long*)dst)++ = lc; | ||
197 | *(short*)dst = (short) lc; | ||
198 | break; | ||
199 | case 7: | ||
200 | *((long*)dst)++ = lc; | ||
201 | *((short*)dst)++ = (short) lc; | ||
202 | *(char*)dst = (char) lc; | ||
203 | break; | ||
204 | case 8: | ||
205 | *((long*)dst)++ = lc; | ||
206 | *((long*)dst)++ = lc; | ||
207 | break; | ||
208 | case 9: | ||
209 | *((long*)dst)++ = lc; | ||
210 | *((long*)dst)++ = lc; | ||
211 | *(char*)dst = (char) lc; | ||
212 | break; | ||
213 | case 10: | ||
214 | *((long*)dst)++ = lc; | ||
215 | *((long*)dst)++ = lc; | ||
216 | *(short*)dst = (short) lc; | ||
217 | break; | ||
218 | case 11: | ||
219 | *((long*)dst)++ = lc; | ||
220 | *((long*)dst)++ = lc; | ||
221 | *((short*)dst)++ = (short) lc; | ||
222 | *(char*)dst = (char) lc; | ||
223 | break; | ||
224 | case 12: | ||
225 | *((long*)dst)++ = lc; | ||
226 | *((long*)dst)++ = lc; | ||
227 | *((long*)dst)++ = lc; | ||
228 | break; | ||
229 | case 13: | ||
230 | *((long*)dst)++ = lc; | ||
231 | *((long*)dst)++ = lc; | ||
232 | *((long*)dst)++ = lc; | ||
233 | *(char*)dst = (char) lc; | ||
234 | break; | ||
235 | case 14: | ||
236 | *((long*)dst)++ = lc; | ||
237 | *((long*)dst)++ = lc; | ||
238 | *((long*)dst)++ = lc; | ||
239 | *(short*)dst = (short) lc; | ||
240 | break; | ||
241 | case 15: | ||
242 | *((long*)dst)++ = lc; | ||
243 | *((long*)dst)++ = lc; | ||
244 | *((long*)dst)++ = lc; | ||
245 | *((short*)dst)++ = (short) lc; | ||
246 | *(char*)dst = (char) lc; | ||
247 | break; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | return return_dst; /* destination pointer. */ | ||
252 | } /* memset() */ | ||
diff --git a/arch/cris/arch-v10/lib/old_checksum.c b/arch/cris/arch-v10/lib/old_checksum.c new file mode 100644 index 000000000000..22a6f0aa9cef --- /dev/null +++ b/arch/cris/arch-v10/lib/old_checksum.c | |||
@@ -0,0 +1,85 @@ | |||
1 | /* $Id: old_checksum.c,v 1.3 2003/10/27 08:04:32 starvik Exp $ | ||
2 | * | ||
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
4 | * operating system. INET is implemented using the BSD Socket | ||
5 | * interface as the means of communication with the user level. | ||
6 | * | ||
7 | * IP/TCP/UDP checksumming routines | ||
8 | * | ||
9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
11 | * Tom May, <ftom@netcom.com> | ||
12 | * Lots of code moved from tcp.c and ip.c; see those files | ||
13 | * for more names. | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or | ||
16 | * modify it under the terms of the GNU General Public License | ||
17 | * as published by the Free Software Foundation; either version | ||
18 | * 2 of the License, or (at your option) any later version. | ||
19 | */ | ||
20 | |||
21 | #include <net/checksum.h> | ||
22 | #include <net/module.h> | ||
23 | |||
24 | #undef PROFILE_CHECKSUM | ||
25 | |||
26 | #ifdef PROFILE_CHECKSUM | ||
27 | /* these are just for profiling the checksum code with an oscillioscope.. uh */ | ||
28 | #if 0 | ||
29 | #define BITOFF *((unsigned char *)0xb0000030) = 0xff | ||
30 | #define BITON *((unsigned char *)0xb0000030) = 0x0 | ||
31 | #endif | ||
32 | #include <asm/io.h> | ||
33 | #define CBITON LED_ACTIVE_SET(1) | ||
34 | #define CBITOFF LED_ACTIVE_SET(0) | ||
35 | #define BITOFF | ||
36 | #define BITON | ||
37 | #else | ||
38 | #define BITOFF | ||
39 | #define BITON | ||
40 | #define CBITOFF | ||
41 | #define CBITON | ||
42 | #endif | ||
43 | |||
44 | /* | ||
45 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
46 | */ | ||
47 | |||
48 | #include <asm/delay.h> | ||
49 | |||
50 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | ||
51 | { | ||
52 | /* | ||
53 | * Experiments with ethernet and slip connections show that buff | ||
54 | * is aligned on either a 2-byte or 4-byte boundary. | ||
55 | */ | ||
56 | const unsigned char *endMarker = buff + len; | ||
57 | const unsigned char *marker = endMarker - (len % 16); | ||
58 | #if 0 | ||
59 | if((int)buff & 0x3) | ||
60 | printk("unaligned buff %p\n", buff); | ||
61 | __delay(900); /* extra delay of 90 us to test performance hit */ | ||
62 | #endif | ||
63 | BITON; | ||
64 | while (buff < marker) { | ||
65 | sum += *((unsigned short *)buff)++; | ||
66 | sum += *((unsigned short *)buff)++; | ||
67 | sum += *((unsigned short *)buff)++; | ||
68 | sum += *((unsigned short *)buff)++; | ||
69 | sum += *((unsigned short *)buff)++; | ||
70 | sum += *((unsigned short *)buff)++; | ||
71 | sum += *((unsigned short *)buff)++; | ||
72 | sum += *((unsigned short *)buff)++; | ||
73 | } | ||
74 | marker = endMarker - (len % 2); | ||
75 | while(buff < marker) { | ||
76 | sum += *((unsigned short *)buff)++; | ||
77 | } | ||
78 | if(endMarker - buff > 0) { | ||
79 | sum += *buff; /* add extra byte seperately */ | ||
80 | } | ||
81 | BITOFF; | ||
82 | return(sum); | ||
83 | } | ||
84 | |||
85 | EXPORT_SYMBOL(csum_partial); | ||
diff --git a/arch/cris/arch-v10/lib/string.c b/arch/cris/arch-v10/lib/string.c new file mode 100644 index 000000000000..8ffde4901b57 --- /dev/null +++ b/arch/cris/arch-v10/lib/string.c | |||
@@ -0,0 +1,225 @@ | |||
1 | /*#************************************************************************#*/ | ||
2 | /*#-------------------------------------------------------------------------*/ | ||
3 | /*# */ | ||
4 | /*# FUNCTION NAME: memcpy() */ | ||
5 | /*# */ | ||
6 | /*# PARAMETERS: void* dst; Destination address. */ | ||
7 | /*# void* src; Source address. */ | ||
8 | /*# int len; Number of bytes to copy. */ | ||
9 | /*# */ | ||
10 | /*# RETURNS: dst. */ | ||
11 | /*# */ | ||
12 | /*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */ | ||
13 | /*# about copying of overlapping memory areas. This routine is */ | ||
14 | /*# very sensitive to compiler changes in register allocation. */ | ||
15 | /*# Should really be rewritten to avoid this problem. */ | ||
16 | /*# */ | ||
17 | /*#-------------------------------------------------------------------------*/ | ||
18 | /*# */ | ||
19 | /*# HISTORY */ | ||
20 | /*# */ | ||
21 | /*# DATE NAME CHANGES */ | ||
22 | /*# ---- ---- ------- */ | ||
23 | /*# 941007 Kenny R Creation */ | ||
24 | /*# 941011 Kenny R Lots of optimizations and inlining. */ | ||
25 | /*# 941129 Ulf A Adapted for use in libc. */ | ||
26 | /*# 950216 HP N==0 forgotten if non-aligned src/dst. */ | ||
27 | /*# Added some optimizations. */ | ||
28 | /*# 001025 HP Make src and dst char *. Align dst to */ | ||
29 | /*# dword, not just word-if-both-src-and-dst- */ | ||
30 | /*# are-misaligned. */ | ||
31 | /*# */ | ||
32 | /*#-------------------------------------------------------------------------*/ | ||
33 | |||
34 | #include <linux/types.h> | ||
35 | |||
36 | void *memcpy(void *pdst, | ||
37 | const void *psrc, | ||
38 | size_t pn) | ||
39 | { | ||
40 | /* Ok. Now we want the parameters put in special registers. | ||
41 | Make sure the compiler is able to make something useful of this. | ||
42 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | ||
43 | |||
44 | If gcc was allright, it really would need no temporaries, and no | ||
45 | stack space to save stuff on. */ | ||
46 | |||
47 | register void *return_dst __asm__ ("r10") = pdst; | ||
48 | register char *dst __asm__ ("r13") = pdst; | ||
49 | register const char *src __asm__ ("r11") = psrc; | ||
50 | register int n __asm__ ("r12") = pn; | ||
51 | |||
52 | |||
53 | /* When src is aligned but not dst, this makes a few extra needless | ||
54 | cycles. I believe it would take as many to check that the | ||
55 | re-alignment was unnecessary. */ | ||
56 | if (((unsigned long) dst & 3) != 0 | ||
57 | /* Don't align if we wouldn't copy more than a few bytes; so we | ||
58 | don't have to check further for overflows. */ | ||
59 | && n >= 3) | ||
60 | { | ||
61 | if ((unsigned long) dst & 1) | ||
62 | { | ||
63 | n--; | ||
64 | *(char*)dst = *(char*)src; | ||
65 | src++; | ||
66 | dst++; | ||
67 | } | ||
68 | |||
69 | if ((unsigned long) dst & 2) | ||
70 | { | ||
71 | n -= 2; | ||
72 | *(short*)dst = *(short*)src; | ||
73 | src += 2; | ||
74 | dst += 2; | ||
75 | } | ||
76 | } | ||
77 | |||
78 | /* Decide which copying method to use. */ | ||
79 | if (n >= 44*2) /* Break even between movem and | ||
80 | move16 is at 38.7*2, but modulo 44. */ | ||
81 | { | ||
82 | /* For large copies we use 'movem' */ | ||
83 | |||
84 | /* It is not optimal to tell the compiler about clobbering any | ||
85 | registers; that will move the saving/restoring of those registers | ||
86 | to the function prologue/epilogue, and make non-movem sizes | ||
87 | suboptimal. | ||
88 | |||
89 | This method is not foolproof; it assumes that the "asm reg" | ||
90 | declarations at the beginning of the function really are used | ||
91 | here (beware: they may be moved to temporary registers). | ||
92 | This way, we do not have to save/move the registers around into | ||
93 | temporaries; we can safely use them straight away. | ||
94 | |||
95 | If you want to check that the allocation was right; then | ||
96 | check the equalities in the first comment. It should say | ||
97 | "r13=r13, r11=r11, r12=r12" */ | ||
98 | __asm__ volatile (" | ||
99 | ;; Check that the following is true (same register names on | ||
100 | ;; both sides of equal sign, as in r8=r8): | ||
101 | ;; %0=r13, %1=r11, %2=r12 | ||
102 | ;; | ||
103 | ;; Save the registers we'll use in the movem process | ||
104 | ;; on the stack. | ||
105 | subq 11*4,$sp | ||
106 | movem $r10,[$sp] | ||
107 | |||
108 | ;; Now we've got this: | ||
109 | ;; r11 - src | ||
110 | ;; r13 - dst | ||
111 | ;; r12 - n | ||
112 | |||
113 | ;; Update n for the first loop | ||
114 | subq 44,$r12 | ||
115 | 0: | ||
116 | movem [$r11+],$r10 | ||
117 | subq 44,$r12 | ||
118 | bge 0b | ||
119 | movem $r10,[$r13+] | ||
120 | |||
121 | addq 44,$r12 ;; compensate for last loop underflowing n | ||
122 | |||
123 | ;; Restore registers from stack | ||
124 | movem [$sp+],$r10" | ||
125 | |||
126 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) | ||
127 | /* Inputs */ : "0" (dst), "1" (src), "2" (n)); | ||
128 | |||
129 | } | ||
130 | |||
131 | /* Either we directly starts copying, using dword copying | ||
132 | in a loop, or we copy as much as possible with 'movem' | ||
133 | and then the last block (<44 bytes) is copied here. | ||
134 | This will work since 'movem' will have updated src,dst,n. */ | ||
135 | |||
136 | while ( n >= 16 ) | ||
137 | { | ||
138 | *((long*)dst)++ = *((long*)src)++; | ||
139 | *((long*)dst)++ = *((long*)src)++; | ||
140 | *((long*)dst)++ = *((long*)src)++; | ||
141 | *((long*)dst)++ = *((long*)src)++; | ||
142 | n -= 16; | ||
143 | } | ||
144 | |||
145 | /* A switch() is definitely the fastest although it takes a LOT of code. | ||
146 | * Particularly if you inline code this. | ||
147 | */ | ||
148 | switch (n) | ||
149 | { | ||
150 | case 0: | ||
151 | break; | ||
152 | case 1: | ||
153 | *(char*)dst = *(char*)src; | ||
154 | break; | ||
155 | case 2: | ||
156 | *(short*)dst = *(short*)src; | ||
157 | break; | ||
158 | case 3: | ||
159 | *((short*)dst)++ = *((short*)src)++; | ||
160 | *(char*)dst = *(char*)src; | ||
161 | break; | ||
162 | case 4: | ||
163 | *((long*)dst)++ = *((long*)src)++; | ||
164 | break; | ||
165 | case 5: | ||
166 | *((long*)dst)++ = *((long*)src)++; | ||
167 | *(char*)dst = *(char*)src; | ||
168 | break; | ||
169 | case 6: | ||
170 | *((long*)dst)++ = *((long*)src)++; | ||
171 | *(short*)dst = *(short*)src; | ||
172 | break; | ||
173 | case 7: | ||
174 | *((long*)dst)++ = *((long*)src)++; | ||
175 | *((short*)dst)++ = *((short*)src)++; | ||
176 | *(char*)dst = *(char*)src; | ||
177 | break; | ||
178 | case 8: | ||
179 | *((long*)dst)++ = *((long*)src)++; | ||
180 | *((long*)dst)++ = *((long*)src)++; | ||
181 | break; | ||
182 | case 9: | ||
183 | *((long*)dst)++ = *((long*)src)++; | ||
184 | *((long*)dst)++ = *((long*)src)++; | ||
185 | *(char*)dst = *(char*)src; | ||
186 | break; | ||
187 | case 10: | ||
188 | *((long*)dst)++ = *((long*)src)++; | ||
189 | *((long*)dst)++ = *((long*)src)++; | ||
190 | *(short*)dst = *(short*)src; | ||
191 | break; | ||
192 | case 11: | ||
193 | *((long*)dst)++ = *((long*)src)++; | ||
194 | *((long*)dst)++ = *((long*)src)++; | ||
195 | *((short*)dst)++ = *((short*)src)++; | ||
196 | *(char*)dst = *(char*)src; | ||
197 | break; | ||
198 | case 12: | ||
199 | *((long*)dst)++ = *((long*)src)++; | ||
200 | *((long*)dst)++ = *((long*)src)++; | ||
201 | *((long*)dst)++ = *((long*)src)++; | ||
202 | break; | ||
203 | case 13: | ||
204 | *((long*)dst)++ = *((long*)src)++; | ||
205 | *((long*)dst)++ = *((long*)src)++; | ||
206 | *((long*)dst)++ = *((long*)src)++; | ||
207 | *(char*)dst = *(char*)src; | ||
208 | break; | ||
209 | case 14: | ||
210 | *((long*)dst)++ = *((long*)src)++; | ||
211 | *((long*)dst)++ = *((long*)src)++; | ||
212 | *((long*)dst)++ = *((long*)src)++; | ||
213 | *(short*)dst = *(short*)src; | ||
214 | break; | ||
215 | case 15: | ||
216 | *((long*)dst)++ = *((long*)src)++; | ||
217 | *((long*)dst)++ = *((long*)src)++; | ||
218 | *((long*)dst)++ = *((long*)src)++; | ||
219 | *((short*)dst)++ = *((short*)src)++; | ||
220 | *(char*)dst = *(char*)src; | ||
221 | break; | ||
222 | } | ||
223 | |||
224 | return return_dst; /* destination pointer. */ | ||
225 | } /* memcpy() */ | ||
diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c new file mode 100644 index 000000000000..43778d53c254 --- /dev/null +++ b/arch/cris/arch-v10/lib/usercopy.c | |||
@@ -0,0 +1,523 @@ | |||
1 | /* | ||
2 | * User address space access functions. | ||
3 | * The non-inlined parts of asm-cris/uaccess.h are here. | ||
4 | * | ||
5 | * Copyright (C) 2000, Axis Communications AB. | ||
6 | * | ||
7 | * Written by Hans-Peter Nilsson. | ||
8 | * Pieces used from memcpy, originally by Kenny Ranerup long time ago. | ||
9 | */ | ||
10 | |||
11 | #include <asm/uaccess.h> | ||
12 | |||
13 | /* Asm:s have been tweaked (within the domain of correctness) to give | ||
14 | satisfactory results for "gcc version 2.96 20000427 (experimental)". | ||
15 | |||
16 | Check regularly... | ||
17 | |||
18 | Note that the PC saved at a bus-fault is the address *after* the | ||
19 | faulting instruction, which means the branch-target for instructions in | ||
20 | delay-slots for taken branches. Note also that the postincrement in | ||
21 | the instruction is performed regardless of bus-fault; the register is | ||
22 | seen updated in fault handlers. | ||
23 | |||
24 | Oh, and on the code formatting issue, to whomever feels like "fixing | ||
25 | it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix" | ||
26 | string.c too. I just don't think too many people will hack this file | ||
27 | for the code format to be an issue. */ | ||
28 | |||
29 | |||
30 | /* Copy to userspace. This is based on the memcpy used for | ||
31 | kernel-to-kernel copying; see "string.c". */ | ||
32 | |||
33 | unsigned long | ||
34 | __copy_user (void __user *pdst, const void *psrc, unsigned long pn) | ||
35 | { | ||
36 | /* We want the parameters put in special registers. | ||
37 | Make sure the compiler is able to make something useful of this. | ||
38 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | ||
39 | |||
40 | FIXME: Comment for old gcc version. Check. | ||
41 | If gcc was allright, it really would need no temporaries, and no | ||
42 | stack space to save stuff on. */ | ||
43 | |||
44 | register char *dst __asm__ ("r13") = pdst; | ||
45 | register const char *src __asm__ ("r11") = psrc; | ||
46 | register int n __asm__ ("r12") = pn; | ||
47 | register int retn __asm__ ("r10") = 0; | ||
48 | |||
49 | |||
50 | /* When src is aligned but not dst, this makes a few extra needless | ||
51 | cycles. I believe it would take as many to check that the | ||
52 | re-alignment was unnecessary. */ | ||
53 | if (((unsigned long) dst & 3) != 0 | ||
54 | /* Don't align if we wouldn't copy more than a few bytes; so we | ||
55 | don't have to check further for overflows. */ | ||
56 | && n >= 3) | ||
57 | { | ||
58 | if ((unsigned long) dst & 1) | ||
59 | { | ||
60 | __asm_copy_to_user_1 (dst, src, retn); | ||
61 | n--; | ||
62 | } | ||
63 | |||
64 | if ((unsigned long) dst & 2) | ||
65 | { | ||
66 | __asm_copy_to_user_2 (dst, src, retn); | ||
67 | n -= 2; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | /* Decide which copying method to use. */ | ||
72 | if (n >= 44*2) /* Break even between movem and | ||
73 | move16 is at 38.7*2, but modulo 44. */ | ||
74 | { | ||
75 | /* For large copies we use 'movem'. */ | ||
76 | |||
77 | /* It is not optimal to tell the compiler about clobbering any | ||
78 | registers; that will move the saving/restoring of those registers | ||
79 | to the function prologue/epilogue, and make non-movem sizes | ||
80 | suboptimal. | ||
81 | |||
82 | This method is not foolproof; it assumes that the "asm reg" | ||
83 | declarations at the beginning of the function really are used | ||
84 | here (beware: they may be moved to temporary registers). | ||
85 | This way, we do not have to save/move the registers around into | ||
86 | temporaries; we can safely use them straight away. | ||
87 | |||
88 | If you want to check that the allocation was right; then | ||
89 | check the equalities in the first comment. It should say | ||
90 | "r13=r13, r11=r11, r12=r12". */ | ||
91 | __asm__ volatile ("\ | ||
92 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | ||
93 | .err \n\ | ||
94 | .endif \n\ | ||
95 | |||
96 | ;; Save the registers we'll use in the movem process | ||
97 | ;; on the stack. | ||
98 | subq 11*4,$sp | ||
99 | movem $r10,[$sp] | ||
100 | |||
101 | ;; Now we've got this: | ||
102 | ;; r11 - src | ||
103 | ;; r13 - dst | ||
104 | ;; r12 - n | ||
105 | |||
106 | ;; Update n for the first loop | ||
107 | subq 44,$r12 | ||
108 | |||
109 | ; Since the noted PC of a faulting instruction in a delay-slot of a taken | ||
110 | ; branch, is that of the branch target, we actually point at the from-movem | ||
111 | ; for this case. There is no ambiguity here; if there was a fault in that | ||
112 | ; instruction (meaning a kernel oops), the faulted PC would be the address | ||
113 | ; after *that* movem. | ||
114 | |||
115 | 0: | ||
116 | movem [$r11+],$r10 | ||
117 | subq 44,$r12 | ||
118 | bge 0b | ||
119 | movem $r10,[$r13+] | ||
120 | 1: | ||
121 | addq 44,$r12 ;; compensate for last loop underflowing n | ||
122 | |||
123 | ;; Restore registers from stack | ||
124 | movem [$sp+],$r10 | ||
125 | 2: | ||
126 | .section .fixup,\"ax\" | ||
127 | |||
128 | ; To provide a correct count in r10 of bytes that failed to be copied, | ||
129 | ; we jump back into the loop if the loop-branch was taken. There is no | ||
130 | ; performance penalty for sany use; the program will segfault soon enough. | ||
131 | |||
132 | 3: | ||
133 | move.d [$sp],$r10 | ||
134 | addq 44,$r10 | ||
135 | move.d $r10,[$sp] | ||
136 | jump 0b | ||
137 | 4: | ||
138 | movem [$sp+],$r10 | ||
139 | addq 44,$r10 | ||
140 | addq 44,$r12 | ||
141 | jump 2b | ||
142 | |||
143 | .previous | ||
144 | .section __ex_table,\"a\" | ||
145 | .dword 0b,3b | ||
146 | .dword 1b,4b | ||
147 | .previous" | ||
148 | |||
149 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | ||
150 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | ||
151 | |||
152 | } | ||
153 | |||
154 | /* Either we directly start copying, using dword copying in a loop, or | ||
155 | we copy as much as possible with 'movem' and then the last block (<44 | ||
156 | bytes) is copied here. This will work since 'movem' will have | ||
157 | updated SRC, DST and N. */ | ||
158 | |||
159 | while (n >= 16) | ||
160 | { | ||
161 | __asm_copy_to_user_16 (dst, src, retn); | ||
162 | n -= 16; | ||
163 | } | ||
164 | |||
165 | /* Having a separate by-four loops cuts down on cache footprint. | ||
166 | FIXME: Test with and without; increasing switch to be 0..15. */ | ||
167 | while (n >= 4) | ||
168 | { | ||
169 | __asm_copy_to_user_4 (dst, src, retn); | ||
170 | n -= 4; | ||
171 | } | ||
172 | |||
173 | switch (n) | ||
174 | { | ||
175 | case 0: | ||
176 | break; | ||
177 | case 1: | ||
178 | __asm_copy_to_user_1 (dst, src, retn); | ||
179 | break; | ||
180 | case 2: | ||
181 | __asm_copy_to_user_2 (dst, src, retn); | ||
182 | break; | ||
183 | case 3: | ||
184 | __asm_copy_to_user_3 (dst, src, retn); | ||
185 | break; | ||
186 | } | ||
187 | |||
188 | return retn; | ||
189 | } | ||
190 | |||
191 | /* Copy from user to kernel, zeroing the bytes that were inaccessible in | ||
192 | userland. The return-value is the number of bytes that were | ||
193 | inaccessible. */ | ||
194 | |||
195 | unsigned long | ||
196 | __copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn) | ||
197 | { | ||
198 | /* We want the parameters put in special registers. | ||
199 | Make sure the compiler is able to make something useful of this. | ||
200 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | ||
201 | |||
202 | FIXME: Comment for old gcc version. Check. | ||
203 | If gcc was allright, it really would need no temporaries, and no | ||
204 | stack space to save stuff on. */ | ||
205 | |||
206 | register char *dst __asm__ ("r13") = pdst; | ||
207 | register const char *src __asm__ ("r11") = psrc; | ||
208 | register int n __asm__ ("r12") = pn; | ||
209 | register int retn __asm__ ("r10") = 0; | ||
210 | |||
211 | /* The best reason to align src is that we then know that a read-fault | ||
212 | was for aligned bytes; there's no 1..3 remaining good bytes to | ||
213 | pickle. */ | ||
214 | if (((unsigned long) src & 3) != 0) | ||
215 | { | ||
216 | if (((unsigned long) src & 1) && n != 0) | ||
217 | { | ||
218 | __asm_copy_from_user_1 (dst, src, retn); | ||
219 | n--; | ||
220 | } | ||
221 | |||
222 | if (((unsigned long) src & 2) && n >= 2) | ||
223 | { | ||
224 | __asm_copy_from_user_2 (dst, src, retn); | ||
225 | n -= 2; | ||
226 | } | ||
227 | |||
228 | /* We only need one check after the unalignment-adjustments, because | ||
229 | if both adjustments were done, either both or neither reference | ||
230 | had an exception. */ | ||
231 | if (retn != 0) | ||
232 | goto copy_exception_bytes; | ||
233 | } | ||
234 | |||
235 | /* Decide which copying method to use. */ | ||
236 | if (n >= 44*2) /* Break even between movem and | ||
237 | move16 is at 38.7*2, but modulo 44. | ||
238 | FIXME: We use move4 now. */ | ||
239 | { | ||
240 | /* For large copies we use 'movem' */ | ||
241 | |||
242 | /* It is not optimal to tell the compiler about clobbering any | ||
243 | registers; that will move the saving/restoring of those registers | ||
244 | to the function prologue/epilogue, and make non-movem sizes | ||
245 | suboptimal. | ||
246 | |||
247 | This method is not foolproof; it assumes that the "asm reg" | ||
248 | declarations at the beginning of the function really are used | ||
249 | here (beware: they may be moved to temporary registers). | ||
250 | This way, we do not have to save/move the registers around into | ||
251 | temporaries; we can safely use them straight away. | ||
252 | |||
253 | If you want to check that the allocation was right; then | ||
254 | check the equalities in the first comment. It should say | ||
255 | "r13=r13, r11=r11, r12=r12" */ | ||
256 | __asm__ volatile (" | ||
257 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | ||
258 | .err \n\ | ||
259 | .endif \n\ | ||
260 | |||
261 | ;; Save the registers we'll use in the movem process | ||
262 | ;; on the stack. | ||
263 | subq 11*4,$sp | ||
264 | movem $r10,[$sp] | ||
265 | |||
266 | ;; Now we've got this: | ||
267 | ;; r11 - src | ||
268 | ;; r13 - dst | ||
269 | ;; r12 - n | ||
270 | |||
271 | ;; Update n for the first loop | ||
272 | subq 44,$r12 | ||
273 | 0: | ||
274 | movem [$r11+],$r10 | ||
275 | 1: | ||
276 | subq 44,$r12 | ||
277 | bge 0b | ||
278 | movem $r10,[$r13+] | ||
279 | |||
280 | addq 44,$r12 ;; compensate for last loop underflowing n | ||
281 | |||
282 | ;; Restore registers from stack | ||
283 | movem [$sp+],$r10 | ||
284 | 4: | ||
285 | .section .fixup,\"ax\" | ||
286 | |||
287 | ;; Do not jump back into the loop if we fail. For some uses, we get a | ||
288 | ;; page fault somewhere on the line. Without checking for page limits, | ||
289 | ;; we don't know where, but we need to copy accurately and keep an | ||
290 | ;; accurate count; not just clear the whole line. To do that, we fall | ||
291 | ;; down in the code below, proceeding with smaller amounts. It should | ||
292 | ;; be kept in mind that we have to cater to code like what at one time | ||
293 | ;; was in fs/super.c: | ||
294 | ;; i = size - copy_from_user((void *)page, data, size); | ||
295 | ;; which would cause repeated faults while clearing the remainder of | ||
296 | ;; the SIZE bytes at PAGE after the first fault. | ||
297 | ;; A caveat here is that we must not fall through from a failing page | ||
298 | ;; to a valid page. | ||
299 | |||
300 | 3: | ||
301 | movem [$sp+],$r10 | ||
302 | addq 44,$r12 ;; Get back count before faulting point. | ||
303 | subq 44,$r11 ;; Get back pointer to faulting movem-line. | ||
304 | jump 4b ;; Fall through, pretending the fault didn't happen. | ||
305 | |||
306 | .previous | ||
307 | .section __ex_table,\"a\" | ||
308 | .dword 1b,3b | ||
309 | .previous" | ||
310 | |||
311 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | ||
312 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | ||
313 | |||
314 | } | ||
315 | |||
316 | /* Either we directly start copying here, using dword copying in a loop, | ||
317 | or we copy as much as possible with 'movem' and then the last block | ||
318 | (<44 bytes) is copied here. This will work since 'movem' will have | ||
319 | updated src, dst and n. (Except with failing src.) | ||
320 | |||
321 | Since we want to keep src accurate, we can't use | ||
322 | __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and | ||
323 | retn, but not src (by design; it's value is ignored elsewhere). */ | ||
324 | |||
325 | while (n >= 4) | ||
326 | { | ||
327 | __asm_copy_from_user_4 (dst, src, retn); | ||
328 | n -= 4; | ||
329 | |||
330 | if (retn) | ||
331 | goto copy_exception_bytes; | ||
332 | } | ||
333 | |||
334 | /* If we get here, there were no memory read faults. */ | ||
335 | switch (n) | ||
336 | { | ||
337 | /* These copies are at least "naturally aligned" (so we don't have | ||
338 | to check each byte), due to the src alignment code before the | ||
339 | movem loop. The *_3 case *will* get the correct count for retn. */ | ||
340 | case 0: | ||
341 | /* This case deliberately left in (if you have doubts check the | ||
342 | generated assembly code). */ | ||
343 | break; | ||
344 | case 1: | ||
345 | __asm_copy_from_user_1 (dst, src, retn); | ||
346 | break; | ||
347 | case 2: | ||
348 | __asm_copy_from_user_2 (dst, src, retn); | ||
349 | break; | ||
350 | case 3: | ||
351 | __asm_copy_from_user_3 (dst, src, retn); | ||
352 | break; | ||
353 | } | ||
354 | |||
355 | /* If we get here, retn correctly reflects the number of failing | ||
356 | bytes. */ | ||
357 | return retn; | ||
358 | |||
359 | copy_exception_bytes: | ||
360 | /* We already have "retn" bytes cleared, and need to clear the | ||
361 | remaining "n" bytes. A non-optimized simple byte-for-byte in-line | ||
362 | memset is preferred here, since this isn't speed-critical code and | ||
363 | we'd rather have this a leaf-function than calling memset. */ | ||
364 | { | ||
365 | char *endp; | ||
366 | for (endp = dst + n; dst < endp; dst++) | ||
367 | *dst = 0; | ||
368 | } | ||
369 | |||
370 | return retn + n; | ||
371 | } | ||
372 | |||
373 | /* Zero userspace. */ | ||
374 | |||
375 | unsigned long | ||
376 | __do_clear_user (void __user *pto, unsigned long pn) | ||
377 | { | ||
378 | /* We want the parameters put in special registers. | ||
379 | Make sure the compiler is able to make something useful of this. | ||
380 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | ||
381 | |||
382 | FIXME: Comment for old gcc version. Check. | ||
383 | If gcc was allright, it really would need no temporaries, and no | ||
384 | stack space to save stuff on. */ | ||
385 | |||
386 | register char *dst __asm__ ("r13") = pto; | ||
387 | register int n __asm__ ("r12") = pn; | ||
388 | register int retn __asm__ ("r10") = 0; | ||
389 | |||
390 | |||
391 | if (((unsigned long) dst & 3) != 0 | ||
392 | /* Don't align if we wouldn't copy more than a few bytes. */ | ||
393 | && n >= 3) | ||
394 | { | ||
395 | if ((unsigned long) dst & 1) | ||
396 | { | ||
397 | __asm_clear_1 (dst, retn); | ||
398 | n--; | ||
399 | } | ||
400 | |||
401 | if ((unsigned long) dst & 2) | ||
402 | { | ||
403 | __asm_clear_2 (dst, retn); | ||
404 | n -= 2; | ||
405 | } | ||
406 | } | ||
407 | |||
408 | /* Decide which copying method to use. | ||
409 | FIXME: This number is from the "ordinary" kernel memset. */ | ||
410 | if (n >= (1*48)) | ||
411 | { | ||
412 | /* For large clears we use 'movem' */ | ||
413 | |||
414 | /* It is not optimal to tell the compiler about clobbering any | ||
415 | call-saved registers; that will move the saving/restoring of | ||
416 | those registers to the function prologue/epilogue, and make | ||
417 | non-movem sizes suboptimal. | ||
418 | |||
419 | This method is not foolproof; it assumes that the "asm reg" | ||
420 | declarations at the beginning of the function really are used | ||
421 | here (beware: they may be moved to temporary registers). | ||
422 | This way, we do not have to save/move the registers around into | ||
423 | temporaries; we can safely use them straight away. | ||
424 | |||
425 | If you want to check that the allocation was right; then | ||
426 | check the equalities in the first comment. It should say | ||
427 | something like "r13=r13, r11=r11, r12=r12". */ | ||
428 | __asm__ volatile (" | ||
429 | .ifnc %0%1%2,$r13$r12$r10 \n\ | ||
430 | .err \n\ | ||
431 | .endif \n\ | ||
432 | |||
433 | ;; Save the registers we'll clobber in the movem process | ||
434 | ;; on the stack. Don't mention them to gcc, it will only be | ||
435 | ;; upset. | ||
436 | subq 11*4,$sp | ||
437 | movem $r10,[$sp] | ||
438 | |||
439 | clear.d $r0 | ||
440 | clear.d $r1 | ||
441 | clear.d $r2 | ||
442 | clear.d $r3 | ||
443 | clear.d $r4 | ||
444 | clear.d $r5 | ||
445 | clear.d $r6 | ||
446 | clear.d $r7 | ||
447 | clear.d $r8 | ||
448 | clear.d $r9 | ||
449 | clear.d $r10 | ||
450 | clear.d $r11 | ||
451 | |||
452 | ;; Now we've got this: | ||
453 | ;; r13 - dst | ||
454 | ;; r12 - n | ||
455 | |||
456 | ;; Update n for the first loop | ||
457 | subq 12*4,$r12 | ||
458 | 0: | ||
459 | subq 12*4,$r12 | ||
460 | bge 0b | ||
461 | movem $r11,[$r13+] | ||
462 | 1: | ||
463 | addq 12*4,$r12 ;; compensate for last loop underflowing n | ||
464 | |||
465 | ;; Restore registers from stack | ||
466 | movem [$sp+],$r10 | ||
467 | 2: | ||
468 | .section .fixup,\"ax\" | ||
469 | 3: | ||
470 | move.d [$sp],$r10 | ||
471 | addq 12*4,$r10 | ||
472 | move.d $r10,[$sp] | ||
473 | clear.d $r10 | ||
474 | jump 0b | ||
475 | |||
476 | 4: | ||
477 | movem [$sp+],$r10 | ||
478 | addq 12*4,$r10 | ||
479 | addq 12*4,$r12 | ||
480 | jump 2b | ||
481 | |||
482 | .previous | ||
483 | .section __ex_table,\"a\" | ||
484 | .dword 0b,3b | ||
485 | .dword 1b,4b | ||
486 | .previous" | ||
487 | |||
488 | /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) | ||
489 | /* Inputs */ : "0" (dst), "1" (n), "2" (retn) | ||
490 | /* Clobber */ : "r11"); | ||
491 | } | ||
492 | |||
493 | while (n >= 16) | ||
494 | { | ||
495 | __asm_clear_16 (dst, retn); | ||
496 | n -= 16; | ||
497 | } | ||
498 | |||
499 | /* Having a separate by-four loops cuts down on cache footprint. | ||
500 | FIXME: Test with and without; increasing switch to be 0..15. */ | ||
501 | while (n >= 4) | ||
502 | { | ||
503 | __asm_clear_4 (dst, retn); | ||
504 | n -= 4; | ||
505 | } | ||
506 | |||
507 | switch (n) | ||
508 | { | ||
509 | case 0: | ||
510 | break; | ||
511 | case 1: | ||
512 | __asm_clear_1 (dst, retn); | ||
513 | break; | ||
514 | case 2: | ||
515 | __asm_clear_2 (dst, retn); | ||
516 | break; | ||
517 | case 3: | ||
518 | __asm_clear_3 (dst, retn); | ||
519 | break; | ||
520 | } | ||
521 | |||
522 | return retn; | ||
523 | } | ||