aboutsummaryrefslogtreecommitdiffstats
path: root/arch/cris/arch-v32/lib
diff options
context:
space:
mode:
authorMikael Starvik <mikael.starvik@axis.com>2005-07-27 14:44:44 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-07-27 19:26:01 -0400
commit51533b615e605d86154ec1b4e585c8ca1b0b15b7 (patch)
tree4a6d7d8494d2017632d83624fb71b36031e0e7e5 /arch/cris/arch-v32/lib
parent5d01e6ce785884a5db5792cd2e5bb36fa82fe23c (diff)
[PATCH] CRIS update: new subarchitecture v32
New CRIS sub architecture named v32. From: Dave Jones <davej@redhat.com> Fix swapped kmalloc args Signed-off-by: Mikael Starvik <starvik@axis.com> Signed-off-by: Dave Jones <davej@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/cris/arch-v32/lib')
-rw-r--r--arch/cris/arch-v32/lib/Makefile6
-rw-r--r--arch/cris/arch-v32/lib/checksum.S111
-rw-r--r--arch/cris/arch-v32/lib/checksumcopy.S120
-rw-r--r--arch/cris/arch-v32/lib/csumcpfruser.S69
-rw-r--r--arch/cris/arch-v32/lib/dram_init.S120
-rw-r--r--arch/cris/arch-v32/lib/hw_settings.S73
-rw-r--r--arch/cris/arch-v32/lib/memset.c253
-rw-r--r--arch/cris/arch-v32/lib/nand_init.S179
-rw-r--r--arch/cris/arch-v32/lib/spinlock.S33
-rw-r--r--arch/cris/arch-v32/lib/string.c219
-rw-r--r--arch/cris/arch-v32/lib/usercopy.c470
11 files changed, 1653 insertions, 0 deletions
diff --git a/arch/cris/arch-v32/lib/Makefile b/arch/cris/arch-v32/lib/Makefile
new file mode 100644
index 000000000000..05b3ec6978d6
--- /dev/null
+++ b/arch/cris/arch-v32/lib/Makefile
@@ -0,0 +1,6 @@
1#
2# Makefile for Etrax-specific library files..
3#
4
5lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o spinlock.o
6
diff --git a/arch/cris/arch-v32/lib/checksum.S b/arch/cris/arch-v32/lib/checksum.S
new file mode 100644
index 000000000000..32e66181b826
--- /dev/null
+++ b/arch/cris/arch-v32/lib/checksum.S
@@ -0,0 +1,111 @@
1/*
2 * A fast checksum routine using movem
3 * Copyright (c) 1998-2001, 2003 Axis Communications AB
4 *
5 * csum_partial(const unsigned char * buff, int len, unsigned int sum)
6 */
7
8 .globl csum_partial
9csum_partial:
10
11 ;; r10 - src
12 ;; r11 - length
13 ;; r12 - checksum
14
15 ;; check for breakeven length between movem and normal word looping versions
16 ;; we also do _NOT_ want to compute a checksum over more than the
17 ;; actual length when length < 40
18
19 cmpu.w 80,$r11
20 blo _word_loop
21 nop
22
23 ;; need to save the registers we use below in the movem loop
24 ;; this overhead is why we have a check above for breakeven length
25 ;; only r0 - r8 have to be saved, the other ones are clobber-able
26 ;; according to the ABI
27
28 subq 9*4,$sp
29 subq 10*4,$r11 ; update length for the first loop
30 movem $r8,[$sp]
31
32 ;; do a movem checksum
33
34_mloop: movem [$r10+],$r9 ; read 10 longwords
35
36 ;; perform dword checksumming on the 10 longwords
37
38 add.d $r0,$r12
39 addc $r1,$r12
40 addc $r2,$r12
41 addc $r3,$r12
42 addc $r4,$r12
43 addc $r5,$r12
44 addc $r6,$r12
45 addc $r7,$r12
46 addc $r8,$r12
47 addc $r9,$r12
48
49 ;; fold the carry into the checksum, to avoid having to loop the carry
50 ;; back into the top
51
52 addc 0,$r12
53 addc 0,$r12 ; do it again, since we might have generated a carry
54
55 subq 10*4,$r11
56 bge _mloop
57 nop
58
59 addq 10*4,$r11 ; compensate for last loop underflowing length
60
61 movem [$sp+],$r8 ; restore regs
62
63_word_loop:
64 ;; only fold if there is anything to fold.
65
66 cmpq 0,$r12
67 beq _no_fold
68
69 ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
70 ;; r9 and r13 can be used as temporaries.
71
72 moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9
73 lsrq 16,$r9
74
75 move.d $r12,$r13
76 lsrq 16,$r13 ; r13 = checksum >> 16
77 and.d $r9,$r12 ; checksum = checksum & 0xffff
78 add.d $r13,$r12 ; checksum += r13
79 move.d $r12,$r13 ; do the same again, maybe we got a carry last add
80 lsrq 16,$r13
81 and.d $r9,$r12
82 add.d $r13,$r12
83
84_no_fold:
85 cmpq 2,$r11
86 blt _no_words
87 nop
88
89 ;; checksum the rest of the words
90
91 subq 2,$r11
92
93_wloop: subq 2,$r11
94 bge _wloop
95 addu.w [$r10+],$r12
96
97 addq 2,$r11
98
99_no_words:
100 ;; see if we have one odd byte more
101 cmpq 1,$r11
102 beq _do_byte
103 nop
104 ret
105 move.d $r12,$r10
106
107_do_byte:
108 ;; copy and checksum the last byte
109 addu.b [$r10],$r12
110 ret
111 move.d $r12,$r10
diff --git a/arch/cris/arch-v32/lib/checksumcopy.S b/arch/cris/arch-v32/lib/checksumcopy.S
new file mode 100644
index 000000000000..9303ccbadc6d
--- /dev/null
+++ b/arch/cris/arch-v32/lib/checksumcopy.S
@@ -0,0 +1,120 @@
1/*
2 * A fast checksum+copy routine using movem
3 * Copyright (c) 1998, 2001, 2003 Axis Communications AB
4 *
5 * Authors: Bjorn Wesen
6 *
7 * csum_partial_copy_nocheck(const char *src, char *dst,
8 * int len, unsigned int sum)
9 */
10
11 .globl csum_partial_copy_nocheck
12csum_partial_copy_nocheck:
13
14 ;; r10 - src
15 ;; r11 - dst
16 ;; r12 - length
17 ;; r13 - checksum
18
19 ;; check for breakeven length between movem and normal word looping versions
20 ;; we also do _NOT_ want to compute a checksum over more than the
21 ;; actual length when length < 40
22
23 cmpu.w 80,$r12
24 blo _word_loop
25 nop
26
27 ;; need to save the registers we use below in the movem loop
28 ;; this overhead is why we have a check above for breakeven length
29 ;; only r0 - r8 have to be saved, the other ones are clobber-able
30 ;; according to the ABI
31
32 subq 9*4,$sp
33 subq 10*4,$r12 ; update length for the first loop
34 movem $r8,[$sp]
35
36 ;; do a movem copy and checksum
37
381: ;; A failing userspace access (the read) will have this as PC.
39_mloop: movem [$r10+],$r9 ; read 10 longwords
40 movem $r9,[$r11+] ; write 10 longwords
41
42 ;; perform dword checksumming on the 10 longwords
43
44 add.d $r0,$r13
45 addc $r1,$r13
46 addc $r2,$r13
47 addc $r3,$r13
48 addc $r4,$r13
49 addc $r5,$r13
50 addc $r6,$r13
51 addc $r7,$r13
52 addc $r8,$r13
53 addc $r9,$r13
54
55 ;; fold the carry into the checksum, to avoid having to loop the carry
56 ;; back into the top
57
58 addc 0,$r13
59 addc 0,$r13 ; do it again, since we might have generated a carry
60
61 subq 10*4,$r12
62 bge _mloop
63 nop
64
65 addq 10*4,$r12 ; compensate for last loop underflowing length
66
67 movem [$sp+],$r8 ; restore regs
68
69_word_loop:
70 ;; only fold if there is anything to fold.
71
72 cmpq 0,$r13
73 beq _no_fold
74
75 ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
76 ;; r9 can be used as temporary.
77
78 move.d $r13,$r9
79 lsrq 16,$r9 ; r0 = checksum >> 16
80 and.d 0xffff,$r13 ; checksum = checksum & 0xffff
81 add.d $r9,$r13 ; checksum += r0
82 move.d $r13,$r9 ; do the same again, maybe we got a carry last add
83 lsrq 16,$r9
84 and.d 0xffff,$r13
85 add.d $r9,$r13
86
87_no_fold:
88 cmpq 2,$r12
89 blt _no_words
90 nop
91
92 ;; copy and checksum the rest of the words
93
94 subq 2,$r12
95
962: ;; A failing userspace access for the read below will have this as PC.
97_wloop: move.w [$r10+],$r9
98 addu.w $r9,$r13
99 subq 2,$r12
100 bge _wloop
101 move.w $r9,[$r11+]
102
103 addq 2,$r12
104
105_no_words:
106 ;; see if we have one odd byte more
107 cmpq 1,$r12
108 beq _do_byte
109 nop
110 ret
111 move.d $r13,$r10
112
113_do_byte:
114 ;; copy and checksum the last byte
1153: ;; A failing userspace access for the read below will have this as PC.
116 move.b [$r10],$r9
117 addu.b $r9,$r13
118 move.b $r9,[$r11]
119 ret
120 move.d $r13,$r10
diff --git a/arch/cris/arch-v32/lib/csumcpfruser.S b/arch/cris/arch-v32/lib/csumcpfruser.S
new file mode 100644
index 000000000000..600ec16b9f28
--- /dev/null
+++ b/arch/cris/arch-v32/lib/csumcpfruser.S
@@ -0,0 +1,69 @@
1/*
2 * Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into
3 * csum_partial_copy_from_user by adding exception records.
4 *
5 * Copyright (C) 2001, 2003 Axis Communications AB.
6 *
7 * Author: Hans-Peter Nilsson.
8 */
9
10#include <asm/errno.h>
11
12/* Same function body, but a different name. If we just added exception
13 records to _csum_partial_copy_nocheck and made it generic, we wouldn't
14 know a user fault from a kernel fault and we would have overhead in
15 each kernel caller for the error-pointer argument.
16
17 unsigned int csum_partial_copy_from_user
18 (const char *src, char *dst, int len, unsigned int sum, int *errptr);
19
20 Note that the errptr argument is only set if we encounter an error.
21 It is conveniently located on the stack, so the normal function body
22 does not have to handle it. */
23
24#define csum_partial_copy_nocheck csum_partial_copy_from_user
25
26/* There are local labels numbered 1, 2 and 3 present to mark the
27 different from-user accesses. */
28#include "checksumcopy.S"
29
30 .section .fixup,"ax"
31
32;; Here from the movem loop; restore stack.
334:
34 movem [$sp+],$r8
35;; r12 is already decremented. Add back chunk_size-2.
36 addq 40-2,$r12
37
38;; Here from the word loop; r12 is off by 2; add it back.
395:
40 addq 2,$r12
41
42;; Here from a failing single byte.
436:
44
45;; Signal in *errptr that we had a failing access.
46 move.d [$sp],$acr
47 moveq -EFAULT,$r9
48 subq 4,$sp
49 move.d $r9,[$acr]
50
51;; Clear the rest of the destination area using memset. Preserve the
52;; checksum for the readable bytes.
53 move.d $r13,[$sp]
54 subq 4,$sp
55 move.d $r11,$r10
56 move $srp,[$sp]
57 jsr memset
58 clear.d $r11
59
60 move [$sp+],$srp
61 ret
62 move.d [$sp+],$r10
63
64 .previous
65 .section __ex_table,"a"
66 .dword 1b,4b
67 .dword 2b,5b
68 .dword 3b,6b
69 .previous
diff --git a/arch/cris/arch-v32/lib/dram_init.S b/arch/cris/arch-v32/lib/dram_init.S
new file mode 100644
index 000000000000..47b6cf5f4afd
--- /dev/null
+++ b/arch/cris/arch-v32/lib/dram_init.S
@@ -0,0 +1,120 @@
1/* $Id: dram_init.S,v 1.4 2005/04/24 18:48:32 starvik Exp $
2 *
3 * DRAM/SDRAM initialization - alter with care
4 * This file is intended to be included from other assembler files
5 *
6 * Note: This file may not modify r8 or r9 because they are used to
7 * carry information from the decompresser to the kernel
8 *
9 * Copyright (C) 2000-2003 Axis Communications AB
10 *
11 * Authors: Mikael Starvik (starvik@axis.com)
12 */
13
14/* Just to be certain the config file is included, we include it here
15 * explicitely instead of depending on it being included in the file that
16 * uses this code.
17 */
18
19#include <linux/config.h>
20#include <asm/arch/hwregs/asm/reg_map_asm.h>
21#include <asm/arch/hwregs/asm/bif_core_defs_asm.h>
22
23 ;; WARNING! The registers r8 and r9 are used as parameters carrying
24 ;; information from the decompressor (if the kernel was compressed).
25 ;; They should not be used in the code below.
26
27 ; Refer to BIF MDS for a description of SDRAM initialization
28
29 ; Bank configuration
30 move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp0), $r0
31 move.d CONFIG_ETRAX_SDRAM_GRP0_CONFIG, $r1
32 move.d $r1, [$r0]
33 move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp1), $r0
34 move.d CONFIG_ETRAX_SDRAM_GRP1_CONFIG, $r1
35 move.d $r1, [$r0]
36
37 ; Calculate value of mrs_data
38 ; CAS latency = 2 && bus_width = 32 => 0x40
39 ; CAS latency = 3 && bus_width = 32 => 0x60
40 ; CAS latency = 2 && bus_width = 16 => 0x20
41 ; CAS latency = 3 && bus_width = 16 => 0x30
42
43 ; Check if value is already supplied in kernel config
44 move.d CONFIG_ETRAX_SDRAM_COMMAND, $r2
45 bne _set_timing
46 nop
47
48 move.d 0x40, $r4 ; Assume 32 bits and CAS latency = 2
49 move.d CONFIG_ETRAX_SDRAM_TIMING, $r1
50 and.d 0x07, $r1 ; Get CAS latency
51 cmpq 2, $r1 ; CL = 2 ?
52 beq _bw_check
53 nop
54 move.d 0x60, $r4
55
56_bw_check:
57 ; Assume that group 0 width is equal to group 1. This assumption
58 ; is wrong for a group 1 only hardware (such as the grand old
59 ; StorPoint+).
60 move.d CONFIG_ETRAX_SDRAM_GRP0_CONFIG, $r1
61 and.d 0x200, $r1 ; DRAM width is bit 9
62 beq _set_timing
63 lslq 2, $r4 ; mrs_data starts at bit 2
64 lsrq 1, $r4 ; 16 bits. Shift down value.
65
66 ; Set timing parameters (refresh off to avoid Guinness TR 83)
67_set_timing:
68 move.d CONFIG_ETRAX_SDRAM_TIMING, $r1
69 and.d ~(3 << reg_bif_core_rw_sdram_timing___ref___lsb), $r1
70 move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing), $r0
71 move.d $r1, [$r0]
72
73 ; Issue NOP command
74 move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_cmd), $r5
75 moveq regk_bif_core_nop, $r1
76 move.d $r1, [$r5]
77
78 ; Wait 200us
79 move.d 10000, $r2
801: bne 1b
81 subq 1, $r2
82
83 ; Issue initialization command sequence
84 move.d _sdram_commands_start, $r2
85 and.d 0x000fffff, $r2 ; Make sure commands are read from flash
86 move.d _sdram_commands_end, $r3
87 and.d 0x000fffff, $r3
881: clear.d $r6
89 move.b [$r2+], $r6 ; Load command
90 or.d $r4, $r6 ; Add calculated mrs
91 move.d $r6, [$r5] ; Write rw_sdram_cmd
92 ; Wait 80 ns between each command
93 move.d 4000, $r7
942: bne 2b
95 subq 1, $r7
96 cmp.d $r2, $r3 ; Last command?
97 bne 1b
98 nop
99
100 ; Start refresh
101 move.d CONFIG_ETRAX_SDRAM_TIMING, $r1
102 move.d REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing), $r0
103 move.d $r1, [$r0]
104
105 ; Initialization finished
106 ba _sdram_commands_end
107 nop
108
109_sdram_commands_start:
110 .byte regk_bif_core_pre ; Precharge
111 .byte regk_bif_core_ref ; refresh
112 .byte regk_bif_core_ref ; refresh
113 .byte regk_bif_core_ref ; refresh
114 .byte regk_bif_core_ref ; refresh
115 .byte regk_bif_core_ref ; refresh
116 .byte regk_bif_core_ref ; refresh
117 .byte regk_bif_core_ref ; refresh
118 .byte regk_bif_core_ref ; refresh
119 .byte regk_bif_core_mrs ; mrs
120_sdram_commands_end:
diff --git a/arch/cris/arch-v32/lib/hw_settings.S b/arch/cris/arch-v32/lib/hw_settings.S
new file mode 100644
index 000000000000..5182e8c2cff2
--- /dev/null
+++ b/arch/cris/arch-v32/lib/hw_settings.S
@@ -0,0 +1,73 @@
1/*
2 * $Id: hw_settings.S,v 1.3 2005/04/24 18:36:57 starvik Exp $
3 *
4 * This table is used by some tools to extract hardware parameters.
5 * The table should be included in the kernel and the decompressor.
6 * Don't forget to update the tools if you change this table.
7 *
8 * Copyright (C) 2001 Axis Communications AB
9 *
10 * Authors: Mikael Starvik (starvik@axis.com)
11 */
12
13#include <linux/config.h>
14#include <asm/arch/hwregs/asm/reg_map_asm.h>
15#include <asm/arch/hwregs/asm/bif_core_defs_asm.h>
16#include <asm/arch/hwregs/asm/gio_defs_asm.h>
17
18 .ascii "HW_PARAM_MAGIC" ; Magic number
19 .dword 0xc0004000 ; Kernel start address
20
21 ; Debug port
22#ifdef CONFIG_ETRAX_DEBUG_PORT0
23 .dword 0
24#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
25 .dword 1
26#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
27 .dword 2
28#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
29 .dword 3
30#else
31 .dword 4 ; No debug
32#endif
33
34 ; Register values
35 .dword REG_ADDR(bif_core, regi_bif_core, rw_grp1_cfg)
36 .dword CONFIG_ETRAX_MEM_GRP1_CONFIG
37 .dword REG_ADDR(bif_core, regi_bif_core, rw_grp2_cfg)
38 .dword CONFIG_ETRAX_MEM_GRP2_CONFIG
39 .dword REG_ADDR(bif_core, regi_bif_core, rw_grp3_cfg)
40 .dword CONFIG_ETRAX_MEM_GRP3_CONFIG
41 .dword REG_ADDR(bif_core, regi_bif_core, rw_grp4_cfg)
42 .dword CONFIG_ETRAX_MEM_GRP4_CONFIG
43 .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp0)
44 .dword CONFIG_ETRAX_SDRAM_GRP0_CONFIG
45 .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cfg_grp1)
46 .dword CONFIG_ETRAX_SDRAM_GRP1_CONFIG
47 .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_timing)
48 .dword CONFIG_ETRAX_SDRAM_TIMING
49 .dword REG_ADDR(bif_core, regi_bif_core, rw_sdram_cmd)
50 .dword CONFIG_ETRAX_SDRAM_COMMAND
51
52 .dword REG_ADDR(gio, regi_gio, rw_pa_dout)
53 .dword CONFIG_ETRAX_DEF_GIO_PA_OUT
54 .dword REG_ADDR(gio, regi_gio, rw_pa_oe)
55 .dword CONFIG_ETRAX_DEF_GIO_PA_OE
56 .dword REG_ADDR(gio, regi_gio, rw_pb_dout)
57 .dword CONFIG_ETRAX_DEF_GIO_PB_OUT
58 .dword REG_ADDR(gio, regi_gio, rw_pb_oe)
59 .dword CONFIG_ETRAX_DEF_GIO_PB_OE
60 .dword REG_ADDR(gio, regi_gio, rw_pc_dout)
61 .dword CONFIG_ETRAX_DEF_GIO_PC_OUT
62 .dword REG_ADDR(gio, regi_gio, rw_pc_oe)
63 .dword CONFIG_ETRAX_DEF_GIO_PC_OE
64 .dword REG_ADDR(gio, regi_gio, rw_pd_dout)
65 .dword CONFIG_ETRAX_DEF_GIO_PD_OUT
66 .dword REG_ADDR(gio, regi_gio, rw_pd_oe)
67 .dword CONFIG_ETRAX_DEF_GIO_PD_OE
68 .dword REG_ADDR(gio, regi_gio, rw_pe_dout)
69 .dword CONFIG_ETRAX_DEF_GIO_PE_OUT
70 .dword REG_ADDR(gio, regi_gio, rw_pe_oe)
71 .dword CONFIG_ETRAX_DEF_GIO_PE_OE
72
73 .dword 0 ; No more register values
diff --git a/arch/cris/arch-v32/lib/memset.c b/arch/cris/arch-v32/lib/memset.c
new file mode 100644
index 000000000000..ffca1214674e
--- /dev/null
+++ b/arch/cris/arch-v32/lib/memset.c
@@ -0,0 +1,253 @@
1/*#************************************************************************#*/
2/*#-------------------------------------------------------------------------*/
3/*# */
4/*# FUNCTION NAME: memset() */
5/*# */
6/*# PARAMETERS: void* dst; Destination address. */
7/*# int c; Value of byte to write. */
8/*# int len; Number of bytes to write. */
9/*# */
10/*# RETURNS: dst. */
11/*# */
12/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */
13/*# Framework taken from memcpy. This routine is */
14/*# very sensitive to compiler changes in register allocation. */
15/*# Should really be rewritten to avoid this problem. */
16/*# */
17/*#-------------------------------------------------------------------------*/
18/*# */
19/*# HISTORY */
20/*# */
21/*# DATE NAME CHANGES */
22/*# ---- ---- ------- */
23/*# 990713 HP Tired of watching this function (or */
24/*# really, the nonoptimized generic */
25/*# implementation) take up 90% of simulator */
26/*# output. Measurements needed. */
27/*# */
28/*#-------------------------------------------------------------------------*/
29
30#include <linux/types.h>
31
32/* No, there's no macro saying 12*4, since it is "hard" to get it into
33 the asm in a good way. Thus better to expose the problem everywhere.
34 */
35
36/* Assuming 1 cycle per dword written or read (ok, not really true), and
37 one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
38 so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
39
40#define ZERO_BLOCK_SIZE (1*12*4)
41
42void *memset(void *pdst,
43 int c,
44 size_t plen)
45{
46 /* Ok. Now we want the parameters put in special registers.
47 Make sure the compiler is able to make something useful of this. */
48
49 register char *return_dst __asm__ ("r10") = pdst;
50 register int n __asm__ ("r12") = plen;
51 register int lc __asm__ ("r11") = c;
52
53 /* Most apps use memset sanely. Only those memsetting about 3..4
54 bytes or less get penalized compared to the generic implementation
55 - and that's not really sane use. */
56
57 /* Ugh. This is fragile at best. Check with newer GCC releases, if
58 they compile cascaded "x |= x << 8" sanely! */
59 __asm__("movu.b %0,$r13 \n\
60 lslq 8,$r13 \n\
61 move.b %0,$r13 \n\
62 move.d $r13,%0 \n\
63 lslq 16,$r13 \n\
64 or.d $r13,%0"
65 : "=r" (lc) : "0" (lc) : "r13");
66
67 {
68 register char *dst __asm__ ("r13") = pdst;
69
70 /* This is NONPORTABLE, but since this whole routine is */
71 /* grossly nonportable that doesn't matter. */
72
73 if (((unsigned long) pdst & 3) != 0
74 /* Oops! n=0 must be a legal call, regardless of alignment. */
75 && n >= 3)
76 {
77 if ((unsigned long)dst & 1)
78 {
79 *dst = (char) lc;
80 n--;
81 dst++;
82 }
83
84 if ((unsigned long)dst & 2)
85 {
86 *(short *)dst = lc;
87 n -= 2;
88 dst += 2;
89 }
90 }
91
92 /* Now the fun part. For the threshold value of this, check the equation
93 above. */
94 /* Decide which copying method to use. */
95 if (n >= ZERO_BLOCK_SIZE)
96 {
97 /* For large copies we use 'movem' */
98
99 /* It is not optimal to tell the compiler about clobbering any
100 registers; that will move the saving/restoring of those registers
101 to the function prologue/epilogue, and make non-movem sizes
102 suboptimal.
103
104 This method is not foolproof; it assumes that the "asm reg"
105 declarations at the beginning of the function really are used
106 here (beware: they may be moved to temporary registers).
107 This way, we do not have to save/move the registers around into
108 temporaries; we can safely use them straight away.
109
110 If you want to check that the allocation was right; then
111 check the equalities in the first comment. It should say
112 "r13=r13, r12=r12, r11=r11" */
113 __asm__ volatile (" \n\
114 ;; Check that the register asm declaration got right. \n\
115 ;; The GCC manual says it will work, but there *has* been bugs. \n\
116 .ifnc %0-%1-%4,$r13-$r12-$r11 \n\
117 .err \n\
118 .endif \n\
119 \n\
120 ;; Save the registers we'll clobber in the movem process \n\
121 ;; on the stack. Don't mention them to gcc, it will only be \n\
122 ;; upset. \n\
123 subq 11*4,$sp \n\
124 movem $r10,[$sp] \n\
125 \n\
126 move.d $r11,$r0 \n\
127 move.d $r11,$r1 \n\
128 move.d $r11,$r2 \n\
129 move.d $r11,$r3 \n\
130 move.d $r11,$r4 \n\
131 move.d $r11,$r5 \n\
132 move.d $r11,$r6 \n\
133 move.d $r11,$r7 \n\
134 move.d $r11,$r8 \n\
135 move.d $r11,$r9 \n\
136 move.d $r11,$r10 \n\
137 \n\
138 ;; Now we've got this: \n\
139 ;; r13 - dst \n\
140 ;; r12 - n \n\
141 \n\
142 ;; Update n for the first loop \n\
143 subq 12*4,$r12 \n\
1440: \n\
145 subq 12*4,$r12 \n\
146 bge 0b \n\
147 movem $r11,[$r13+] \n\
148 \n\
149 addq 12*4,$r12 ;; compensate for last loop underflowing n \n\
150 \n\
151 ;; Restore registers from stack \n\
152 movem [$sp+],$r10"
153
154 /* Outputs */ : "=r" (dst), "=r" (n)
155 /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
156 }
157
158 /* Either we directly starts copying, using dword copying
159 in a loop, or we copy as much as possible with 'movem'
160 and then the last block (<44 bytes) is copied here.
161 This will work since 'movem' will have updated src,dst,n. */
162
163 while ( n >= 16 )
164 {
165 *((long*)dst)++ = lc;
166 *((long*)dst)++ = lc;
167 *((long*)dst)++ = lc;
168 *((long*)dst)++ = lc;
169 n -= 16;
170 }
171
172 /* A switch() is definitely the fastest although it takes a LOT of code.
173 * Particularly if you inline code this.
174 */
175 switch (n)
176 {
177 case 0:
178 break;
179 case 1:
180 *(char*)dst = (char) lc;
181 break;
182 case 2:
183 *(short*)dst = (short) lc;
184 break;
185 case 3:
186 *((short*)dst)++ = (short) lc;
187 *(char*)dst = (char) lc;
188 break;
189 case 4:
190 *((long*)dst)++ = lc;
191 break;
192 case 5:
193 *((long*)dst)++ = lc;
194 *(char*)dst = (char) lc;
195 break;
196 case 6:
197 *((long*)dst)++ = lc;
198 *(short*)dst = (short) lc;
199 break;
200 case 7:
201 *((long*)dst)++ = lc;
202 *((short*)dst)++ = (short) lc;
203 *(char*)dst = (char) lc;
204 break;
205 case 8:
206 *((long*)dst)++ = lc;
207 *((long*)dst)++ = lc;
208 break;
209 case 9:
210 *((long*)dst)++ = lc;
211 *((long*)dst)++ = lc;
212 *(char*)dst = (char) lc;
213 break;
214 case 10:
215 *((long*)dst)++ = lc;
216 *((long*)dst)++ = lc;
217 *(short*)dst = (short) lc;
218 break;
219 case 11:
220 *((long*)dst)++ = lc;
221 *((long*)dst)++ = lc;
222 *((short*)dst)++ = (short) lc;
223 *(char*)dst = (char) lc;
224 break;
225 case 12:
226 *((long*)dst)++ = lc;
227 *((long*)dst)++ = lc;
228 *((long*)dst)++ = lc;
229 break;
230 case 13:
231 *((long*)dst)++ = lc;
232 *((long*)dst)++ = lc;
233 *((long*)dst)++ = lc;
234 *(char*)dst = (char) lc;
235 break;
236 case 14:
237 *((long*)dst)++ = lc;
238 *((long*)dst)++ = lc;
239 *((long*)dst)++ = lc;
240 *(short*)dst = (short) lc;
241 break;
242 case 15:
243 *((long*)dst)++ = lc;
244 *((long*)dst)++ = lc;
245 *((long*)dst)++ = lc;
246 *((short*)dst)++ = (short) lc;
247 *(char*)dst = (char) lc;
248 break;
249 }
250 }
251
252 return return_dst; /* destination pointer. */
253} /* memset() */
diff --git a/arch/cris/arch-v32/lib/nand_init.S b/arch/cris/arch-v32/lib/nand_init.S
new file mode 100644
index 000000000000..aba5c751c282
--- /dev/null
+++ b/arch/cris/arch-v32/lib/nand_init.S
@@ -0,0 +1,179 @@
1##=============================================================================
2##
3## nand_init.S
4##
5## The bootrom copies data from the NAND flash to the internal RAM but
6## due to a bug/feature we can only trust the 256 first bytes. So this
7## code copies more data from NAND flash to internal RAM. Obvioulsy this
8## code must fit in the first 256 bytes so alter with care.
9##
10## Some notes about the bug/feature for future reference:
11## The bootrom copies the first 127 KB from NAND flash to internal
12## memory. The problem is that it does a bytewise copy. NAND flashes
13## does autoincrement on the address so for a 16-bite device each
14## read/write increases the address by two. So the copy loop in the
15## bootrom will discard every second byte. This is solved by inserting
16## zeroes in every second byte in the first erase block.
17##
18## The bootrom also incorrectly assumes that it can read the flash
19## linear with only one read command but the flash will actually
20## switch between normal area and spare area if you do that so we
21## can't trust more than the first 256 bytes.
22##
23##=============================================================================
24
25#include <asm/arch/hwregs/asm/reg_map_asm.h>
26#include <asm/arch/hwregs/asm/gio_defs_asm.h>
27#include <asm/arch/hwregs/asm/pinmux_defs_asm.h>
28#include <asm/arch/hwregs/asm/bif_core_defs_asm.h>
29#include <asm/arch/hwregs/asm/config_defs_asm.h>
30#include <linux/config.h>
31
32;; There are 8-bit NAND flashes and 16-bit NAND flashes.
33;; We need to treat them slightly different.
34#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
35#define PAGE_SIZE 256
36#else
37#error 2
38#define PAGE_SIZE 512
39#endif
40#define ERASE_BLOCK 16384
41
42;; GPIO pins connected to NAND flash
43#define CE 4
44#define CLE 5
45#define ALE 6
46#define BY 7
47
48;; Address space for NAND flash
49#define NAND_RD_ADDR 0x90000000
50#define NAND_WR_ADDR 0x94000000
51
52#define READ_CMD 0x00
53
54;; Readability macros
55#define CSP_MASK \
56 REG_MASK(bif_core, rw_grp3_cfg, gated_csp0) | \
57 REG_MASK(bif_core, rw_grp3_cfg, gated_csp1)
58#define CSP_VAL \
59 REG_STATE(bif_core, rw_grp3_cfg, gated_csp0, rd) | \
60 REG_STATE(bif_core, rw_grp3_cfg, gated_csp1, wr)
61
62;;----------------------------------------------------------------------------
63;; Macros to set/clear GPIO bits
64
65.macro SET x
66 or.b (1<<\x),$r9
67 move.d $r9, [$r2]
68.endm
69
70.macro CLR x
71 and.b ~(1<<\x),$r9
72 move.d $r9, [$r2]
73.endm
74
75;;----------------------------------------------------------------------------
76
77nand_boot:
78 ;; Check if nand boot was selected
79 move.d REG_ADDR(config, regi_config, r_bootsel), $r0
80 move.d [$r0], $r0
81 and.d REG_MASK(config, r_bootsel, boot_mode), $r0
82 cmp.d REG_STATE(config, r_bootsel, boot_mode, nand), $r0
83 bne normal_boot ; No NAND boot
84 nop
85
86copy_nand_to_ram:
87 ;; copy_nand_to_ram
88 ;; Arguments
89 ;; r10 - destination
90 ;; r11 - source offset
91 ;; r12 - size
92 ;; r13 - Address to jump to after completion
93 ;; Note : r10-r12 are clobbered on return
94 ;; Registers used:
95 ;; r0 - NAND_RD_ADDR
96 ;; r1 - NAND_WR_ADDR
97 ;; r2 - reg_gio_rw_pa_dout
98 ;; r3 - reg_gio_r_pa_din
99 ;; r4 - tmp
100 ;; r5 - byte counter within a page
101 ;; r6 - reg_pinmux_rw_pa
102 ;; r7 - reg_gio_rw_pa_oe
103 ;; r8 - reg_bif_core_rw_grp3_cfg
104 ;; r9 - reg_gio_rw_pa_dout shadow
105 move.d 0x90000000, $r0
106 move.d 0x94000000, $r1
107 move.d REG_ADDR(gio, regi_gio, rw_pa_dout), $r2
108 move.d REG_ADDR(gio, regi_gio, r_pa_din), $r3
109 move.d REG_ADDR(pinmux, regi_pinmux, rw_pa), $r6
110 move.d REG_ADDR(gio, regi_gio, rw_pa_oe), $r7
111 move.d REG_ADDR(bif_core, regi_bif_core, rw_grp3_cfg), $r8
112
113#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
114 lsrq 1, $r11
115#endif
116 ;; Set up GPIO
117 move.d [$r2], $r9
118 move.d [$r7], $r4
119 or.b (1<<ALE) | (1 << CLE) | (1<<CE), $r4
120 move.d $r4, [$r7]
121
122 ;; Set up bif
123 move.d [$r8], $r4
124 and.d CSP_MASK, $r4
125 or.d CSP_VAL, $r4
126 move.d $r4, [$r8]
127
1281: ;; Copy one page
129 CLR CE
130 SET CLE
131 moveq READ_CMD, $r4
132 move.b $r4, [$r1]
133 moveq 20, $r4
1342: bne 2b
135 subq 1, $r4
136 CLR CLE
137 SET ALE
138 clear.w [$r1] ; Column address = 0
139 move.d $r11, $r4
140 lsrq 8, $r4
141 move.b $r4, [$r1] ; Row address
142 lsrq 8, $r4
143 move.b $r4, [$r1] ; Row adddress
144 moveq 20, $r4
1452: bne 2b
146 subq 1, $r4
147 CLR ALE
1482: move.d [$r3], $r4
149 and.d 1 << BY, $r4
150 beq 2b
151 movu.w PAGE_SIZE, $r5
1522: ; Copy one byte/word
153#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
154 move.w [$r0], $r4
155#else
156 move.b [$r0], $r4
157#endif
158 subq 1, $r5
159 bne 2b
160#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
161 move.w $r4, [$r10+]
162 subu.w PAGE_SIZE*2, $r12
163#else
164 move.b $r4, [$r10+]
165 subu.w PAGE_SIZE, $r12
166#endif
167 bpl 1b
168 addu.w PAGE_SIZE, $r11
169
170 ;; End of copy
171 jump $r13
172 nop
173
174 ;; This will warn if the code above is too large. If you consider
175 ;; to remove this you don't understand the bug/feature.
176 .org 256
177 .org ERASE_BLOCK
178
179normal_boot:
diff --git a/arch/cris/arch-v32/lib/spinlock.S b/arch/cris/arch-v32/lib/spinlock.S
new file mode 100644
index 000000000000..2437ae7f6ed2
--- /dev/null
+++ b/arch/cris/arch-v32/lib/spinlock.S
@@ -0,0 +1,33 @@
1;; Core of the spinlock implementation
2;;
3;; Copyright (C) 2004 Axis Communications AB.
4;;
5;; Author: Mikael Starvik
6
7
8 .global cris_spin_lock
9 .global cris_spin_trylock
10
11 .text
12
13cris_spin_lock:
14 clearf p
151: test.d [$r10]
16 beq 1b
17 clearf p
18 ax
19 clear.d [$r10]
20 bcs 1b
21 clearf p
22 ret
23 nop
24
25cris_spin_trylock:
26 clearf p
271: move.d [$r10], $r11
28 ax
29 clear.d [$r10]
30 bcs 1b
31 clearf p
32 ret
33 move.d $r11,$r10
diff --git a/arch/cris/arch-v32/lib/string.c b/arch/cris/arch-v32/lib/string.c
new file mode 100644
index 000000000000..98e282ac824a
--- /dev/null
+++ b/arch/cris/arch-v32/lib/string.c
@@ -0,0 +1,219 @@
1/*#************************************************************************#*/
2/*#-------------------------------------------------------------------------*/
3/*# */
4/*# FUNCTION NAME: memcpy() */
5/*# */
6/*# PARAMETERS: void* dst; Destination address. */
7/*# void* src; Source address. */
8/*# int len; Number of bytes to copy. */
9/*# */
10/*# RETURNS: dst. */
11/*# */
12/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */
13/*# about copying of overlapping memory areas. This routine is */
14/*# very sensitive to compiler changes in register allocation. */
15/*# Should really be rewritten to avoid this problem. */
16/*# */
17/*#-------------------------------------------------------------------------*/
18/*# */
19/*# HISTORY */
20/*# */
21/*# DATE NAME CHANGES */
22/*# ---- ---- ------- */
23/*# 941007 Kenny R Creation */
24/*# 941011 Kenny R Lots of optimizations and inlining. */
25/*# 941129 Ulf A Adapted for use in libc. */
26/*# 950216 HP N==0 forgotten if non-aligned src/dst. */
27/*# Added some optimizations. */
28/*# 001025 HP Make src and dst char *. Align dst to */
29/*# dword, not just word-if-both-src-and-dst- */
30/*# are-misaligned. */
31/*# */
32/*#-------------------------------------------------------------------------*/
33
34#include <linux/types.h>
35
36void *memcpy(void *pdst,
37 const void *psrc,
38 size_t pn)
39{
40 /* Ok. Now we want the parameters put in special registers.
41 Make sure the compiler is able to make something useful of this.
42 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
43
44 If gcc was allright, it really would need no temporaries, and no
45 stack space to save stuff on. */
46
47 register void *return_dst __asm__ ("r10") = pdst;
48 register char *dst __asm__ ("r13") = pdst;
49 register const char *src __asm__ ("r11") = psrc;
50 register int n __asm__ ("r12") = pn;
51
52
53 /* When src is aligned but not dst, this makes a few extra needless
54 cycles. I believe it would take as many to check that the
55 re-alignment was unnecessary. */
56 if (((unsigned long) dst & 3) != 0
57 /* Don't align if we wouldn't copy more than a few bytes; so we
58 don't have to check further for overflows. */
59 && n >= 3)
60 {
61 if ((unsigned long) dst & 1)
62 {
63 n--;
64 *(char*)dst = *(char*)src;
65 src++;
66 dst++;
67 }
68
69 if ((unsigned long) dst & 2)
70 {
71 n -= 2;
72 *(short*)dst = *(short*)src;
73 src += 2;
74 dst += 2;
75 }
76 }
77
78 /* Decide which copying method to use. Movem is dirt cheap, so the
79 overheap is low enough to always use the minimum block size as the
80 threshold. */
81 if (n >= 44)
82 {
83 /* For large copies we use 'movem' */
84
85 /* It is not optimal to tell the compiler about clobbering any
86 registers; that will move the saving/restoring of those registers
87 to the function prologue/epilogue, and make non-movem sizes
88 suboptimal. */
89 __asm__ volatile (" \n\
90 ;; Check that the register asm declaration got right. \n\
91 ;; The GCC manual explicitly says TRT will happen. \n\
92 .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
93 .err \n\
94 .endif \n\
95 \n\
96 ;; Save the registers we'll use in the movem process \n\
97 \n\
98 ;; on the stack. \n\
99 subq 11*4,$sp \n\
100 movem $r10,[$sp] \n\
101 \n\
102 ;; Now we've got this: \n\
103 ;; r11 - src \n\
104 ;; r13 - dst \n\
105 ;; r12 - n \n\
106 \n\
107 ;; Update n for the first loop \n\
108 subq 44,$r12 \n\
1090: \n\
110 movem [$r11+],$r10 \n\
111 subq 44,$r12 \n\
112 bge 0b \n\
113 movem $r10,[$r13+] \n\
114 \n\
115 addq 44,$r12 ;; compensate for last loop underflowing n \n\
116 \n\
117 ;; Restore registers from stack \n\
118 movem [$sp+],$r10"
119
120 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
121 /* Inputs */ : "0" (dst), "1" (src), "2" (n));
122
123 }
124
125 /* Either we directly starts copying, using dword copying
126 in a loop, or we copy as much as possible with 'movem'
127 and then the last block (<44 bytes) is copied here.
128 This will work since 'movem' will have updated src,dst,n. */
129
130 while ( n >= 16 )
131 {
132 *((long*)dst)++ = *((long*)src)++;
133 *((long*)dst)++ = *((long*)src)++;
134 *((long*)dst)++ = *((long*)src)++;
135 *((long*)dst)++ = *((long*)src)++;
136 n -= 16;
137 }
138
139 /* A switch() is definitely the fastest although it takes a LOT of code.
140 * Particularly if you inline code this.
141 */
142 switch (n)
143 {
144 case 0:
145 break;
146 case 1:
147 *(char*)dst = *(char*)src;
148 break;
149 case 2:
150 *(short*)dst = *(short*)src;
151 break;
152 case 3:
153 *((short*)dst)++ = *((short*)src)++;
154 *(char*)dst = *(char*)src;
155 break;
156 case 4:
157 *((long*)dst)++ = *((long*)src)++;
158 break;
159 case 5:
160 *((long*)dst)++ = *((long*)src)++;
161 *(char*)dst = *(char*)src;
162 break;
163 case 6:
164 *((long*)dst)++ = *((long*)src)++;
165 *(short*)dst = *(short*)src;
166 break;
167 case 7:
168 *((long*)dst)++ = *((long*)src)++;
169 *((short*)dst)++ = *((short*)src)++;
170 *(char*)dst = *(char*)src;
171 break;
172 case 8:
173 *((long*)dst)++ = *((long*)src)++;
174 *((long*)dst)++ = *((long*)src)++;
175 break;
176 case 9:
177 *((long*)dst)++ = *((long*)src)++;
178 *((long*)dst)++ = *((long*)src)++;
179 *(char*)dst = *(char*)src;
180 break;
181 case 10:
182 *((long*)dst)++ = *((long*)src)++;
183 *((long*)dst)++ = *((long*)src)++;
184 *(short*)dst = *(short*)src;
185 break;
186 case 11:
187 *((long*)dst)++ = *((long*)src)++;
188 *((long*)dst)++ = *((long*)src)++;
189 *((short*)dst)++ = *((short*)src)++;
190 *(char*)dst = *(char*)src;
191 break;
192 case 12:
193 *((long*)dst)++ = *((long*)src)++;
194 *((long*)dst)++ = *((long*)src)++;
195 *((long*)dst)++ = *((long*)src)++;
196 break;
197 case 13:
198 *((long*)dst)++ = *((long*)src)++;
199 *((long*)dst)++ = *((long*)src)++;
200 *((long*)dst)++ = *((long*)src)++;
201 *(char*)dst = *(char*)src;
202 break;
203 case 14:
204 *((long*)dst)++ = *((long*)src)++;
205 *((long*)dst)++ = *((long*)src)++;
206 *((long*)dst)++ = *((long*)src)++;
207 *(short*)dst = *(short*)src;
208 break;
209 case 15:
210 *((long*)dst)++ = *((long*)src)++;
211 *((long*)dst)++ = *((long*)src)++;
212 *((long*)dst)++ = *((long*)src)++;
213 *((short*)dst)++ = *((short*)src)++;
214 *(char*)dst = *(char*)src;
215 break;
216 }
217
218 return return_dst; /* destination pointer. */
219} /* memcpy() */
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
new file mode 100644
index 000000000000..f0b08460c1be
--- /dev/null
+++ b/arch/cris/arch-v32/lib/usercopy.c
@@ -0,0 +1,470 @@
1/*
2 * User address space access functions.
3 * The non-inlined parts of asm-cris/uaccess.h are here.
4 *
5 * Copyright (C) 2000, 2003 Axis Communications AB.
6 *
7 * Written by Hans-Peter Nilsson.
8 * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
9 */
10
11#include <asm/uaccess.h>
12
13/* Asm:s have been tweaked (within the domain of correctness) to give
14 satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32".
15
16 Check regularly...
17
18 Note that for CRISv32, the PC saved at a bus-fault is the address
19 *at* the faulting instruction, with a special case for instructions
20 in delay slots: then it's the address of the branch. Note also that
21 in contrast to v10, a postincrement in the instruction is *not*
22 performed at a bus-fault; the register is seen having the original
23 value in fault handlers. */
24
25
26/* Copy to userspace. This is based on the memcpy used for
27 kernel-to-kernel copying; see "string.c". */
28
29unsigned long
30__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
31{
32 /* We want the parameters put in special registers.
33 Make sure the compiler is able to make something useful of this.
34 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
35
36 FIXME: Comment for old gcc version. Check.
37 If gcc was allright, it really would need no temporaries, and no
38 stack space to save stuff on. */
39
40 register char *dst __asm__ ("r13") = pdst;
41 register const char *src __asm__ ("r11") = psrc;
42 register int n __asm__ ("r12") = pn;
43 register int retn __asm__ ("r10") = 0;
44
45
46 /* When src is aligned but not dst, this makes a few extra needless
47 cycles. I believe it would take as many to check that the
48 re-alignment was unnecessary. */
49 if (((unsigned long) dst & 3) != 0
50 /* Don't align if we wouldn't copy more than a few bytes; so we
51 don't have to check further for overflows. */
52 && n >= 3)
53 {
54 if ((unsigned long) dst & 1)
55 {
56 __asm_copy_to_user_1 (dst, src, retn);
57 n--;
58 }
59
60 if ((unsigned long) dst & 2)
61 {
62 __asm_copy_to_user_2 (dst, src, retn);
63 n -= 2;
64 }
65 }
66
67 /* Movem is dirt cheap. The overheap is low enough to always use the
68 minimum possible block size as the threshold. */
69 if (n >= 44)
70 {
71 /* For large copies we use 'movem'. */
72
73 /* It is not optimal to tell the compiler about clobbering any
74 registers; that will move the saving/restoring of those registers
75 to the function prologue/epilogue, and make non-movem sizes
76 suboptimal. */
77 __asm__ volatile ("\
78 ;; Check that the register asm declaration got right. \n\
79 ;; The GCC manual explicitly says TRT will happen. \n\
80 .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
81 .err \n\
82 .endif \n\
83 \n\
84 ;; Save the registers we'll use in the movem process \n\
85 ;; on the stack. \n\
86 subq 11*4,$sp \n\
87 movem $r10,[$sp] \n\
88 \n\
89 ;; Now we've got this: \n\
90 ;; r11 - src \n\
91 ;; r13 - dst \n\
92 ;; r12 - n \n\
93 \n\
94 ;; Update n for the first loop \n\
95 subq 44,$r12 \n\
960: \n\
97 movem [$r11+],$r10 \n\
98 subq 44,$r12 \n\
991: bge 0b \n\
100 movem $r10,[$r13+] \n\
1013: \n\
102 addq 44,$r12 ;; compensate for last loop underflowing n \n\
103 \n\
104 ;; Restore registers from stack \n\
105 movem [$sp+],$r10 \n\
1062: \n\
107 .section .fixup,\"ax\" \n\
1084: \n\
109; When failing on any of the 1..44 bytes in a chunk, we adjust back the \n\
110; source pointer and just drop through to the by-16 and by-4 loops to \n\
111; get the correct number of failing bytes. This necessarily means a \n\
112; few extra exceptions, but invalid user pointers shouldn't happen in \n\
113; time-critical code anyway. \n\
114 jump 3b \n\
115 subq 44,$r11 \n\
116 \n\
117 .previous \n\
118 .section __ex_table,\"a\" \n\
119 .dword 1b,4b \n\
120 .previous"
121
122 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
123 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
124
125 }
126
127 while (n >= 16)
128 {
129 __asm_copy_to_user_16 (dst, src, retn);
130 n -= 16;
131 }
132
133 /* Having a separate by-four loops cuts down on cache footprint.
134 FIXME: Test with and without; increasing switch to be 0..15. */
135 while (n >= 4)
136 {
137 __asm_copy_to_user_4 (dst, src, retn);
138 n -= 4;
139 }
140
141 switch (n)
142 {
143 case 0:
144 break;
145 case 1:
146 __asm_copy_to_user_1 (dst, src, retn);
147 break;
148 case 2:
149 __asm_copy_to_user_2 (dst, src, retn);
150 break;
151 case 3:
152 __asm_copy_to_user_3 (dst, src, retn);
153 break;
154 }
155
156 return retn;
157}
158
159/* Copy from user to kernel, zeroing the bytes that were inaccessible in
160 userland. The return-value is the number of bytes that were
161 inaccessible. */
162
163unsigned long
164__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn)
165{
166 /* We want the parameters put in special registers.
167 Make sure the compiler is able to make something useful of this.
168 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
169
170 FIXME: Comment for old gcc version. Check.
171 If gcc was allright, it really would need no temporaries, and no
172 stack space to save stuff on. */
173
174 register char *dst __asm__ ("r13") = pdst;
175 register const char *src __asm__ ("r11") = psrc;
176 register int n __asm__ ("r12") = pn;
177 register int retn __asm__ ("r10") = 0;
178
179 /* The best reason to align src is that we then know that a read-fault
180 was for aligned bytes; there's no 1..3 remaining good bytes to
181 pickle. */
182 if (((unsigned long) src & 3) != 0)
183 {
184 if (((unsigned long) src & 1) && n != 0)
185 {
186 __asm_copy_from_user_1 (dst, src, retn);
187 n--;
188 }
189
190 if (((unsigned long) src & 2) && n >= 2)
191 {
192 __asm_copy_from_user_2 (dst, src, retn);
193 n -= 2;
194 }
195
196 /* We only need one check after the unalignment-adjustments, because
197 if both adjustments were done, either both or neither reference
198 had an exception. */
199 if (retn != 0)
200 goto copy_exception_bytes;
201 }
202
203 /* Movem is dirt cheap. The overheap is low enough to always use the
204 minimum possible block size as the threshold. */
205 if (n >= 44)
206 {
207 /* It is not optimal to tell the compiler about clobbering any
208 registers; that will move the saving/restoring of those registers
209 to the function prologue/epilogue, and make non-movem sizes
210 suboptimal. */
211 __asm__ volatile ("\
212 .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
213 .err \n\
214 .endif \n\
215 \n\
216 ;; Save the registers we'll use in the movem process \n\
217 ;; on the stack. \n\
218 subq 11*4,$sp \n\
219 movem $r10,[$sp] \n\
220 \n\
221 ;; Now we've got this: \n\
222 ;; r11 - src \n\
223 ;; r13 - dst \n\
224 ;; r12 - n \n\
225 \n\
226 ;; Update n for the first loop \n\
227 subq 44,$r12 \n\
2280: \n\
229 movem [$r11+],$r10 \n\
230 \n\
231 subq 44,$r12 \n\
232 bge 0b \n\
233 movem $r10,[$r13+] \n\
234 \n\
2354: \n\
236 addq 44,$r12 ;; compensate for last loop underflowing n \n\
237 \n\
238 ;; Restore registers from stack \n\
239 movem [$sp+],$r10 \n\
240 .section .fixup,\"ax\" \n\
241 \n\
242;; Do not jump back into the loop if we fail. For some uses, we get a \n\
243;; page fault somewhere on the line. Without checking for page limits, \n\
244;; we don't know where, but we need to copy accurately and keep an \n\
245;; accurate count; not just clear the whole line. To do that, we fall \n\
246;; down in the code below, proceeding with smaller amounts. It should \n\
247;; be kept in mind that we have to cater to code like what at one time \n\
248;; was in fs/super.c: \n\
249;; i = size - copy_from_user((void *)page, data, size); \n\
250;; which would cause repeated faults while clearing the remainder of \n\
251;; the SIZE bytes at PAGE after the first fault. \n\
252;; A caveat here is that we must not fall through from a failing page \n\
253;; to a valid page. \n\
254 \n\
2553: \n\
256 jump 4b ;; Fall through, pretending the fault didn't happen. \n\
257 nop \n\
258 \n\
259 .previous \n\
260 .section __ex_table,\"a\" \n\
261 .dword 0b,3b \n\
262 .previous"
263
264 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
265 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
266 }
267
268 /* Either we directly start copying here, using dword copying in a loop,
269 or we copy as much as possible with 'movem' and then the last block
270 (<44 bytes) is copied here. This will work since 'movem' will have
271 updated src, dst and n. (Except with failing src.)
272
273 Since we want to keep src accurate, we can't use
274 __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
275 retn, but not src (by design; it's value is ignored elsewhere). */
276
277 while (n >= 4)
278 {
279 __asm_copy_from_user_4 (dst, src, retn);
280 n -= 4;
281
282 if (retn)
283 goto copy_exception_bytes;
284 }
285
286 /* If we get here, there were no memory read faults. */
287 switch (n)
288 {
289 /* These copies are at least "naturally aligned" (so we don't have
290 to check each byte), due to the src alignment code before the
291 movem loop. The *_3 case *will* get the correct count for retn. */
292 case 0:
293 /* This case deliberately left in (if you have doubts check the
294 generated assembly code). */
295 break;
296 case 1:
297 __asm_copy_from_user_1 (dst, src, retn);
298 break;
299 case 2:
300 __asm_copy_from_user_2 (dst, src, retn);
301 break;
302 case 3:
303 __asm_copy_from_user_3 (dst, src, retn);
304 break;
305 }
306
307 /* If we get here, retn correctly reflects the number of failing
308 bytes. */
309 return retn;
310
311copy_exception_bytes:
312 /* We already have "retn" bytes cleared, and need to clear the
313 remaining "n" bytes. A non-optimized simple byte-for-byte in-line
314 memset is preferred here, since this isn't speed-critical code and
315 we'd rather have this a leaf-function than calling memset. */
316 {
317 char *endp;
318 for (endp = dst + n; dst < endp; dst++)
319 *dst = 0;
320 }
321
322 return retn + n;
323}
324
325/* Zero userspace. */
326
327unsigned long
328__do_clear_user (void __user *pto, unsigned long pn)
329{
330 /* We want the parameters put in special registers.
331 Make sure the compiler is able to make something useful of this.
332 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
333
334 FIXME: Comment for old gcc version. Check.
335 If gcc was allright, it really would need no temporaries, and no
336 stack space to save stuff on. */
337
338 register char *dst __asm__ ("r13") = pto;
339 register int n __asm__ ("r12") = pn;
340 register int retn __asm__ ("r10") = 0;
341
342
343 if (((unsigned long) dst & 3) != 0
344 /* Don't align if we wouldn't copy more than a few bytes. */
345 && n >= 3)
346 {
347 if ((unsigned long) dst & 1)
348 {
349 __asm_clear_1 (dst, retn);
350 n--;
351 }
352
353 if ((unsigned long) dst & 2)
354 {
355 __asm_clear_2 (dst, retn);
356 n -= 2;
357 }
358 }
359
360 /* Decide which copying method to use.
361 FIXME: This number is from the "ordinary" kernel memset. */
362 if (n >= 48)
363 {
364 /* For large clears we use 'movem' */
365
366 /* It is not optimal to tell the compiler about clobbering any
367 call-saved registers; that will move the saving/restoring of
368 those registers to the function prologue/epilogue, and make
369 non-movem sizes suboptimal.
370
371 This method is not foolproof; it assumes that the "asm reg"
372 declarations at the beginning of the function really are used
373 here (beware: they may be moved to temporary registers).
374 This way, we do not have to save/move the registers around into
375 temporaries; we can safely use them straight away.
376
377 If you want to check that the allocation was right; then
378 check the equalities in the first comment. It should say
379 something like "r13=r13, r11=r11, r12=r12". */
380 __asm__ volatile ("\
381 .ifnc %0%1%2,$r13$r12$r10 \n\
382 .err \n\
383 .endif \n\
384 \n\
385 ;; Save the registers we'll clobber in the movem process \n\
386 ;; on the stack. Don't mention them to gcc, it will only be \n\
387 ;; upset. \n\
388 subq 11*4,$sp \n\
389 movem $r10,[$sp] \n\
390 \n\
391 clear.d $r0 \n\
392 clear.d $r1 \n\
393 clear.d $r2 \n\
394 clear.d $r3 \n\
395 clear.d $r4 \n\
396 clear.d $r5 \n\
397 clear.d $r6 \n\
398 clear.d $r7 \n\
399 clear.d $r8 \n\
400 clear.d $r9 \n\
401 clear.d $r10 \n\
402 clear.d $r11 \n\
403 \n\
404 ;; Now we've got this: \n\
405 ;; r13 - dst \n\
406 ;; r12 - n \n\
407 \n\
408 ;; Update n for the first loop \n\
409 subq 12*4,$r12 \n\
4100: \n\
411 subq 12*4,$r12 \n\
4121: \n\
413 bge 0b \n\
414 movem $r11,[$r13+] \n\
415 \n\
416 addq 12*4,$r12 ;; compensate for last loop underflowing n \n\
417 \n\
418 ;; Restore registers from stack \n\
419 movem [$sp+],$r10 \n\
4202: \n\
421 .section .fixup,\"ax\" \n\
4223: \n\
423 movem [$sp],$r10 \n\
424 addq 12*4,$r10 \n\
425 addq 12*4,$r13 \n\
426 movem $r10,[$sp] \n\
427 jump 0b \n\
428 clear.d $r10 \n\
429 \n\
430 .previous \n\
431 .section __ex_table,\"a\" \n\
432 .dword 1b,3b \n\
433 .previous"
434
435 /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
436 /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
437 /* Clobber */ : "r11");
438 }
439
440 while (n >= 16)
441 {
442 __asm_clear_16 (dst, retn);
443 n -= 16;
444 }
445
446 /* Having a separate by-four loops cuts down on cache footprint.
447 FIXME: Test with and without; increasing switch to be 0..15. */
448 while (n >= 4)
449 {
450 __asm_clear_4 (dst, retn);
451 n -= 4;
452 }
453
454 switch (n)
455 {
456 case 0:
457 break;
458 case 1:
459 __asm_clear_1 (dst, retn);
460 break;
461 case 2:
462 __asm_clear_2 (dst, retn);
463 break;
464 case 3:
465 __asm_clear_3 (dst, retn);
466 break;
467 }
468
469 return retn;
470}