diff options
Diffstat (limited to 'arch/parisc/lib')
-rw-r--r-- | arch/parisc/lib/Makefile | 9 | ||||
-rw-r--r-- | arch/parisc/lib/bitops.c | 84 | ||||
-rw-r--r-- | arch/parisc/lib/checksum.c | 148 | ||||
-rw-r--r-- | arch/parisc/lib/debuglocks.c | 277 | ||||
-rw-r--r-- | arch/parisc/lib/fixup.S | 89 | ||||
-rw-r--r-- | arch/parisc/lib/io.c | 488 | ||||
-rw-r--r-- | arch/parisc/lib/iomap.c | 422 | ||||
-rw-r--r-- | arch/parisc/lib/lusercopy.S | 193 | ||||
-rw-r--r-- | arch/parisc/lib/memcpy.c | 522 | ||||
-rw-r--r-- | arch/parisc/lib/memset.c | 91 |
10 files changed, 2323 insertions, 0 deletions
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile new file mode 100644 index 000000000000..7bf705676297 --- /dev/null +++ b/arch/parisc/lib/Makefile | |||
@@ -0,0 +1,9 @@ | |||
1 | # | ||
2 | # Makefile for parisc-specific library files | ||
3 | # | ||
4 | |||
5 | lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o | ||
6 | |||
7 | obj-y := iomap.o | ||
8 | |||
9 | lib-$(CONFIG_SMP) += debuglocks.o | ||
diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c new file mode 100644 index 000000000000..2de182f6fe8a --- /dev/null +++ b/arch/parisc/lib/bitops.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * bitops.c: atomic operations which got too long to be inlined all over | ||
3 | * the place. | ||
4 | * | ||
5 | * Copyright 1999 Philipp Rumpf (prumpf@tux.org) | ||
6 | * Copyright 2000 Grant Grundler (grundler@cup.hp.com) | ||
7 | */ | ||
8 | |||
9 | #include <linux/config.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/atomic.h> | ||
14 | |||
15 | #ifdef CONFIG_SMP | ||
16 | spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = { | ||
17 | [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED | ||
18 | }; | ||
19 | #endif | ||
20 | |||
21 | #ifdef __LP64__ | ||
22 | unsigned long __xchg64(unsigned long x, unsigned long *ptr) | ||
23 | { | ||
24 | unsigned long temp, flags; | ||
25 | |||
26 | _atomic_spin_lock_irqsave(ptr, flags); | ||
27 | temp = *ptr; | ||
28 | *ptr = x; | ||
29 | _atomic_spin_unlock_irqrestore(ptr, flags); | ||
30 | return temp; | ||
31 | } | ||
32 | #endif | ||
33 | |||
34 | unsigned long __xchg32(int x, int *ptr) | ||
35 | { | ||
36 | unsigned long flags; | ||
37 | long temp; | ||
38 | |||
39 | _atomic_spin_lock_irqsave(ptr, flags); | ||
40 | temp = (long) *ptr; /* XXX - sign extension wanted? */ | ||
41 | *ptr = x; | ||
42 | _atomic_spin_unlock_irqrestore(ptr, flags); | ||
43 | return (unsigned long)temp; | ||
44 | } | ||
45 | |||
46 | |||
47 | unsigned long __xchg8(char x, char *ptr) | ||
48 | { | ||
49 | unsigned long flags; | ||
50 | long temp; | ||
51 | |||
52 | _atomic_spin_lock_irqsave(ptr, flags); | ||
53 | temp = (long) *ptr; /* XXX - sign extension wanted? */ | ||
54 | *ptr = x; | ||
55 | _atomic_spin_unlock_irqrestore(ptr, flags); | ||
56 | return (unsigned long)temp; | ||
57 | } | ||
58 | |||
59 | |||
60 | #ifdef __LP64__ | ||
61 | unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new) | ||
62 | { | ||
63 | unsigned long flags; | ||
64 | unsigned long prev; | ||
65 | |||
66 | _atomic_spin_lock_irqsave(ptr, flags); | ||
67 | if ((prev = *ptr) == old) | ||
68 | *ptr = new; | ||
69 | _atomic_spin_unlock_irqrestore(ptr, flags); | ||
70 | return prev; | ||
71 | } | ||
72 | #endif | ||
73 | |||
74 | unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new) | ||
75 | { | ||
76 | unsigned long flags; | ||
77 | unsigned int prev; | ||
78 | |||
79 | _atomic_spin_lock_irqsave(ptr, flags); | ||
80 | if ((prev = *ptr) == old) | ||
81 | *ptr = new; | ||
82 | _atomic_spin_unlock_irqrestore(ptr, flags); | ||
83 | return (unsigned long)prev; | ||
84 | } | ||
diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c new file mode 100644 index 000000000000..8a1e08068e7d --- /dev/null +++ b/arch/parisc/lib/checksum.c | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * MIPS specific IP/TCP/UDP checksumming routines | ||
7 | * | ||
8 | * Authors: Ralf Baechle, <ralf@waldorf-gmbh.de> | ||
9 | * Lots of code moved from tcp.c and ip.c; see those files | ||
10 | * for more names. | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version | ||
15 | * 2 of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * $Id: checksum.c,v 1.3 1997/12/01 17:57:34 ralf Exp $ | ||
18 | */ | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/types.h> | ||
21 | |||
22 | #include <net/checksum.h> | ||
23 | #include <asm/byteorder.h> | ||
24 | #include <asm/string.h> | ||
25 | #include <asm/uaccess.h> | ||
26 | |||
27 | #define addc(_t,_r) \ | ||
28 | __asm__ __volatile__ ( \ | ||
29 | " add %0, %1, %0\n" \ | ||
30 | " addc %0, %%r0, %0\n" \ | ||
31 | : "=r"(_t) \ | ||
32 | : "r"(_r), "0"(_t)); | ||
33 | |||
34 | static inline unsigned short from32to16(unsigned int x) | ||
35 | { | ||
36 | /* 32 bits --> 16 bits + carry */ | ||
37 | x = (x & 0xffff) + (x >> 16); | ||
38 | /* 16 bits + carry --> 16 bits including carry */ | ||
39 | x = (x & 0xffff) + (x >> 16); | ||
40 | return (unsigned short)x; | ||
41 | } | ||
42 | |||
43 | static inline unsigned int do_csum(const unsigned char * buff, int len) | ||
44 | { | ||
45 | int odd, count; | ||
46 | unsigned int result = 0; | ||
47 | |||
48 | if (len <= 0) | ||
49 | goto out; | ||
50 | odd = 1 & (unsigned long) buff; | ||
51 | if (odd) { | ||
52 | result = be16_to_cpu(*buff); | ||
53 | len--; | ||
54 | buff++; | ||
55 | } | ||
56 | count = len >> 1; /* nr of 16-bit words.. */ | ||
57 | if (count) { | ||
58 | if (2 & (unsigned long) buff) { | ||
59 | result += *(unsigned short *) buff; | ||
60 | count--; | ||
61 | len -= 2; | ||
62 | buff += 2; | ||
63 | } | ||
64 | count >>= 1; /* nr of 32-bit words.. */ | ||
65 | if (count) { | ||
66 | while (count >= 4) { | ||
67 | unsigned int r1, r2, r3, r4; | ||
68 | r1 = *(unsigned int *)(buff + 0); | ||
69 | r2 = *(unsigned int *)(buff + 4); | ||
70 | r3 = *(unsigned int *)(buff + 8); | ||
71 | r4 = *(unsigned int *)(buff + 12); | ||
72 | addc(result, r1); | ||
73 | addc(result, r2); | ||
74 | addc(result, r3); | ||
75 | addc(result, r4); | ||
76 | count -= 4; | ||
77 | buff += 16; | ||
78 | } | ||
79 | while (count) { | ||
80 | unsigned int w = *(unsigned int *) buff; | ||
81 | count--; | ||
82 | buff += 4; | ||
83 | addc(result, w); | ||
84 | } | ||
85 | result = (result & 0xffff) + (result >> 16); | ||
86 | } | ||
87 | if (len & 2) { | ||
88 | result += *(unsigned short *) buff; | ||
89 | buff += 2; | ||
90 | } | ||
91 | } | ||
92 | if (len & 1) | ||
93 | result += le16_to_cpu(*buff); | ||
94 | result = from32to16(result); | ||
95 | if (odd) | ||
96 | result = swab16(result); | ||
97 | out: | ||
98 | return result; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
103 | */ | ||
104 | unsigned int csum_partial(const unsigned char *buff, int len, unsigned int sum) | ||
105 | { | ||
106 | unsigned int result = do_csum(buff, len); | ||
107 | addc(result, sum); | ||
108 | return from32to16(result); | ||
109 | } | ||
110 | |||
111 | EXPORT_SYMBOL(csum_partial); | ||
112 | |||
113 | /* | ||
114 | * copy while checksumming, otherwise like csum_partial | ||
115 | */ | ||
116 | unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, | ||
117 | int len, unsigned int sum) | ||
118 | { | ||
119 | /* | ||
120 | * It's 2:30 am and I don't feel like doing it real ... | ||
121 | * This is lots slower than the real thing (tm) | ||
122 | */ | ||
123 | sum = csum_partial(src, len, sum); | ||
124 | memcpy(dst, src, len); | ||
125 | |||
126 | return sum; | ||
127 | } | ||
128 | EXPORT_SYMBOL(csum_partial_copy_nocheck); | ||
129 | |||
130 | /* | ||
131 | * Copy from userspace and compute checksum. If we catch an exception | ||
132 | * then zero the rest of the buffer. | ||
133 | */ | ||
134 | unsigned int csum_partial_copy_from_user(const unsigned char __user *src, | ||
135 | unsigned char *dst, int len, | ||
136 | unsigned int sum, int *err_ptr) | ||
137 | { | ||
138 | int missing; | ||
139 | |||
140 | missing = copy_from_user(dst, src, len); | ||
141 | if (missing) { | ||
142 | memset(dst + len - missing, 0, missing); | ||
143 | *err_ptr = -EFAULT; | ||
144 | } | ||
145 | |||
146 | return csum_partial(dst, len, sum); | ||
147 | } | ||
148 | EXPORT_SYMBOL(csum_partial_copy_from_user); | ||
diff --git a/arch/parisc/lib/debuglocks.c b/arch/parisc/lib/debuglocks.c new file mode 100644 index 000000000000..1b33fe6e5b7a --- /dev/null +++ b/arch/parisc/lib/debuglocks.c | |||
@@ -0,0 +1,277 @@ | |||
1 | /* | ||
2 | * Debugging versions of SMP locking primitives. | ||
3 | * | ||
4 | * Copyright (C) 2004 Thibaut VARENE <varenet@parisc-linux.org> | ||
5 | * | ||
6 | * Some code stollen from alpha & sparc64 ;) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | * | ||
22 | * We use pdc_printf() throughout the file for all output messages, to avoid | ||
23 | * losing messages because of disabled interrupts. Since we're using these | ||
24 | * messages for debugging purposes, it makes sense not to send them to the | ||
25 | * linux console. | ||
26 | */ | ||
27 | |||
28 | |||
29 | #include <linux/config.h> | ||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/spinlock.h> | ||
33 | #include <linux/hardirq.h> /* in_interrupt() */ | ||
34 | #include <asm/system.h> | ||
35 | #include <asm/hardirq.h> /* in_interrupt() */ | ||
36 | #include <asm/pdc.h> | ||
37 | |||
38 | #undef INIT_STUCK | ||
39 | #define INIT_STUCK 1L << 30 | ||
40 | |||
41 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
42 | |||
43 | |||
44 | void _dbg_spin_lock(spinlock_t * lock, const char *base_file, int line_no) | ||
45 | { | ||
46 | volatile unsigned int *a; | ||
47 | long stuck = INIT_STUCK; | ||
48 | void *inline_pc = __builtin_return_address(0); | ||
49 | unsigned long started = jiffies; | ||
50 | int printed = 0; | ||
51 | int cpu = smp_processor_id(); | ||
52 | |||
53 | try_again: | ||
54 | |||
55 | /* Do the actual locking */ | ||
56 | /* <T-Bone> ggg: we can't get stuck on the outter loop? | ||
57 | * <ggg> T-Bone: We can hit the outer loop | ||
58 | * alot if multiple CPUs are constantly racing for a lock | ||
59 | * and the backplane is NOT fair about which CPU sees | ||
60 | * the update first. But it won't hang since every failed | ||
61 | * attempt will drop us back into the inner loop and | ||
62 | * decrement `stuck'. | ||
63 | * <ggg> K-class and some of the others are NOT fair in the HW | ||
64 | * implementation so we could see false positives. | ||
65 | * But fixing the lock contention is easier than | ||
66 | * fixing the HW to be fair. | ||
67 | * <tausq> __ldcw() returns 1 if we get the lock; otherwise we | ||
68 | * spin until the value of the lock changes, or we time out. | ||
69 | */ | ||
70 | mb(); | ||
71 | a = __ldcw_align(lock); | ||
72 | while (stuck && (__ldcw(a) == 0)) | ||
73 | while ((*a == 0) && --stuck); | ||
74 | mb(); | ||
75 | |||
76 | if (unlikely(stuck <= 0)) { | ||
77 | pdc_printf( | ||
78 | "%s:%d: spin_lock(%s/%p) stuck in %s at %p(%d)" | ||
79 | " owned by %s:%d in %s at %p(%d)\n", | ||
80 | base_file, line_no, lock->module, lock, | ||
81 | current->comm, inline_pc, cpu, | ||
82 | lock->bfile, lock->bline, lock->task->comm, | ||
83 | lock->previous, lock->oncpu); | ||
84 | stuck = INIT_STUCK; | ||
85 | printed = 1; | ||
86 | goto try_again; | ||
87 | } | ||
88 | |||
89 | /* Exiting. Got the lock. */ | ||
90 | lock->oncpu = cpu; | ||
91 | lock->previous = inline_pc; | ||
92 | lock->task = current; | ||
93 | lock->bfile = (char *)base_file; | ||
94 | lock->bline = line_no; | ||
95 | |||
96 | if (unlikely(printed)) { | ||
97 | pdc_printf( | ||
98 | "%s:%d: spin_lock grabbed in %s at %p(%d) %ld ticks\n", | ||
99 | base_file, line_no, current->comm, inline_pc, | ||
100 | cpu, jiffies - started); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | void _dbg_spin_unlock(spinlock_t * lock, const char *base_file, int line_no) | ||
105 | { | ||
106 | CHECK_LOCK(lock); | ||
107 | volatile unsigned int *a; | ||
108 | mb(); | ||
109 | a = __ldcw_align(lock); | ||
110 | if (unlikely((*a != 0) && lock->babble)) { | ||
111 | lock->babble--; | ||
112 | pdc_printf( | ||
113 | "%s:%d: spin_unlock(%s:%p) not locked\n", | ||
114 | base_file, line_no, lock->module, lock); | ||
115 | } | ||
116 | *a = 1; | ||
117 | mb(); | ||
118 | } | ||
119 | |||
120 | int _dbg_spin_trylock(spinlock_t * lock, const char *base_file, int line_no) | ||
121 | { | ||
122 | int ret; | ||
123 | volatile unsigned int *a; | ||
124 | mb(); | ||
125 | a = __ldcw_align(lock); | ||
126 | ret = (__ldcw(a) != 0); | ||
127 | mb(); | ||
128 | if (ret) { | ||
129 | lock->oncpu = smp_processor_id(); | ||
130 | lock->previous = __builtin_return_address(0); | ||
131 | lock->task = current; | ||
132 | } else { | ||
133 | lock->bfile = (char *)base_file; | ||
134 | lock->bline = line_no; | ||
135 | } | ||
136 | return ret; | ||
137 | } | ||
138 | |||
139 | #endif /* CONFIG_DEBUG_SPINLOCK */ | ||
140 | |||
141 | #ifdef CONFIG_DEBUG_RWLOCK | ||
142 | |||
143 | /* Interrupts trouble detailed explanation, thx Grant: | ||
144 | * | ||
145 | * o writer (wants to modify data) attempts to acquire the rwlock | ||
146 | * o He gets the write lock. | ||
147 | * o Interupts are still enabled, we take an interrupt with the | ||
148 | * write still holding the lock. | ||
149 | * o interrupt handler tries to acquire the rwlock for read. | ||
150 | * o deadlock since the writer can't release it at this point. | ||
151 | * | ||
152 | * In general, any use of spinlocks that competes between "base" | ||
153 | * level and interrupt level code will risk deadlock. Interrupts | ||
154 | * need to be disabled in the base level routines to avoid it. | ||
155 | * Or more precisely, only the IRQ the base level routine | ||
156 | * is competing with for the lock. But it's more efficient/faster | ||
157 | * to just disable all interrupts on that CPU to guarantee | ||
158 | * once it gets the lock it can release it quickly too. | ||
159 | */ | ||
160 | |||
161 | void _dbg_write_lock(rwlock_t *rw, const char *bfile, int bline) | ||
162 | { | ||
163 | void *inline_pc = __builtin_return_address(0); | ||
164 | unsigned long started = jiffies; | ||
165 | long stuck = INIT_STUCK; | ||
166 | int printed = 0; | ||
167 | int cpu = smp_processor_id(); | ||
168 | |||
169 | if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */ | ||
170 | pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline); | ||
171 | BUG(); | ||
172 | } | ||
173 | |||
174 | /* Note: if interrupts are disabled (which is most likely), the printk | ||
175 | will never show on the console. We might need a polling method to flush | ||
176 | the dmesg buffer anyhow. */ | ||
177 | |||
178 | retry: | ||
179 | _raw_spin_lock(&rw->lock); | ||
180 | |||
181 | if(rw->counter != 0) { | ||
182 | /* this basically never happens */ | ||
183 | _raw_spin_unlock(&rw->lock); | ||
184 | |||
185 | stuck--; | ||
186 | if ((unlikely(stuck <= 0)) && (rw->counter < 0)) { | ||
187 | pdc_printf( | ||
188 | "%s:%d: write_lock stuck on writer" | ||
189 | " in %s at %p(%d) %ld ticks\n", | ||
190 | bfile, bline, current->comm, inline_pc, | ||
191 | cpu, jiffies - started); | ||
192 | stuck = INIT_STUCK; | ||
193 | printed = 1; | ||
194 | } | ||
195 | else if (unlikely(stuck <= 0)) { | ||
196 | pdc_printf( | ||
197 | "%s:%d: write_lock stuck on reader" | ||
198 | " in %s at %p(%d) %ld ticks\n", | ||
199 | bfile, bline, current->comm, inline_pc, | ||
200 | cpu, jiffies - started); | ||
201 | stuck = INIT_STUCK; | ||
202 | printed = 1; | ||
203 | } | ||
204 | |||
205 | while(rw->counter != 0); | ||
206 | |||
207 | goto retry; | ||
208 | } | ||
209 | |||
210 | /* got it. now leave without unlocking */ | ||
211 | rw->counter = -1; /* remember we are locked */ | ||
212 | |||
213 | if (unlikely(printed)) { | ||
214 | pdc_printf( | ||
215 | "%s:%d: write_lock grabbed in %s at %p(%d) %ld ticks\n", | ||
216 | bfile, bline, current->comm, inline_pc, | ||
217 | cpu, jiffies - started); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | int _dbg_write_trylock(rwlock_t *rw, const char *bfile, int bline) | ||
222 | { | ||
223 | #if 0 | ||
224 | void *inline_pc = __builtin_return_address(0); | ||
225 | int cpu = smp_processor_id(); | ||
226 | #endif | ||
227 | |||
228 | if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */ | ||
229 | pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline); | ||
230 | BUG(); | ||
231 | } | ||
232 | |||
233 | /* Note: if interrupts are disabled (which is most likely), the printk | ||
234 | will never show on the console. We might need a polling method to flush | ||
235 | the dmesg buffer anyhow. */ | ||
236 | |||
237 | _raw_spin_lock(&rw->lock); | ||
238 | |||
239 | if(rw->counter != 0) { | ||
240 | /* this basically never happens */ | ||
241 | _raw_spin_unlock(&rw->lock); | ||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | /* got it. now leave without unlocking */ | ||
246 | rw->counter = -1; /* remember we are locked */ | ||
247 | #if 0 | ||
248 | pdc_printf("%s:%d: try write_lock grabbed in %s at %p(%d)\n", | ||
249 | bfile, bline, current->comm, inline_pc, cpu); | ||
250 | #endif | ||
251 | return 1; | ||
252 | } | ||
253 | |||
254 | void _dbg_read_lock(rwlock_t * rw, const char *bfile, int bline) | ||
255 | { | ||
256 | #if 0 | ||
257 | void *inline_pc = __builtin_return_address(0); | ||
258 | unsigned long started = jiffies; | ||
259 | int cpu = smp_processor_id(); | ||
260 | #endif | ||
261 | unsigned long flags; | ||
262 | |||
263 | local_irq_save(flags); | ||
264 | _raw_spin_lock(&rw->lock); | ||
265 | |||
266 | rw->counter++; | ||
267 | #if 0 | ||
268 | pdc_printf( | ||
269 | "%s:%d: read_lock grabbed in %s at %p(%d) %ld ticks\n", | ||
270 | bfile, bline, current->comm, inline_pc, | ||
271 | cpu, jiffies - started); | ||
272 | #endif | ||
273 | _raw_spin_unlock(&rw->lock); | ||
274 | local_irq_restore(flags); | ||
275 | } | ||
276 | |||
277 | #endif /* CONFIG_DEBUG_RWLOCK */ | ||
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S new file mode 100644 index 000000000000..134f0cd240f5 --- /dev/null +++ b/arch/parisc/lib/fixup.S | |||
@@ -0,0 +1,89 @@ | |||
1 | /* | ||
2 | * Linux/PA-RISC Project (http://www.parisc-linux.org/) | ||
3 | * | ||
4 | * Copyright (C) 2004 Randolph Chung <tausq@debian.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
19 | * | ||
20 | * Fixup routines for kernel exception handling. | ||
21 | */ | ||
22 | #include <linux/config.h> | ||
23 | #include <asm/offsets.h> | ||
24 | #include <asm/assembly.h> | ||
25 | #include <asm/errno.h> | ||
26 | |||
27 | #ifdef CONFIG_SMP | ||
28 | .macro get_fault_ip t1 t2 | ||
29 | addil LT%__per_cpu_offset,%r27 | ||
30 | LDREG RT%__per_cpu_offset(%r1),\t1 | ||
31 | /* t2 = smp_processor_id() */ | ||
32 | mfctl 30,\t2 | ||
33 | ldw TI_CPU(\t2),\t2 | ||
34 | #ifdef __LP64__ | ||
35 | extrd,u \t2,63,32,\t2 | ||
36 | #endif | ||
37 | /* t2 = &__per_cpu_offset[smp_processor_id()]; */ | ||
38 | LDREG,s \t2(\t1),\t2 | ||
39 | addil LT%per_cpu__exception_data,%r27 | ||
40 | LDREG RT%per_cpu__exception_data(%r1),\t1 | ||
41 | /* t1 = &__get_cpu_var(exception_data) */ | ||
42 | add,l \t1,\t2,\t1 | ||
43 | /* t1 = t1->fault_ip */ | ||
44 | LDREG EXCDATA_IP(\t1), \t1 | ||
45 | .endm | ||
46 | #else | ||
47 | .macro get_fault_ip t1 t2 | ||
48 | /* t1 = &__get_cpu_var(exception_data) */ | ||
49 | addil LT%per_cpu__exception_data,%r27 | ||
50 | LDREG RT%per_cpu__exception_data(%r1),\t2 | ||
51 | /* t1 = t2->fault_ip */ | ||
52 | LDREG EXCDATA_IP(\t2), \t1 | ||
53 | .endm | ||
54 | #endif | ||
55 | |||
56 | .text | ||
57 | .section .fixup, "ax" | ||
58 | |||
59 | /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */ | ||
60 | .export fixup_get_user_skip_1 | ||
61 | fixup_get_user_skip_1: | ||
62 | get_fault_ip %r1,%r8 | ||
63 | ldo 4(%r1), %r1 | ||
64 | ldi -EFAULT, %r8 | ||
65 | bv %r0(%r1) | ||
66 | copy %r0, %r9 | ||
67 | |||
68 | .export fixup_get_user_skip_2 | ||
69 | fixup_get_user_skip_2: | ||
70 | get_fault_ip %r1,%r8 | ||
71 | ldo 8(%r1), %r1 | ||
72 | ldi -EFAULT, %r8 | ||
73 | bv %r0(%r1) | ||
74 | copy %r0, %r9 | ||
75 | |||
76 | /* put_user() fixups, store -EFAULT in r8 */ | ||
77 | .export fixup_put_user_skip_1 | ||
78 | fixup_put_user_skip_1: | ||
79 | get_fault_ip %r1,%r8 | ||
80 | ldo 4(%r1), %r1 | ||
81 | bv %r0(%r1) | ||
82 | ldi -EFAULT, %r8 | ||
83 | |||
84 | .export fixup_put_user_skip_2 | ||
85 | fixup_put_user_skip_2: | ||
86 | get_fault_ip %r1,%r8 | ||
87 | ldo 8(%r1), %r1 | ||
88 | bv %r0(%r1) | ||
89 | ldi -EFAULT, %r8 | ||
diff --git a/arch/parisc/lib/io.c b/arch/parisc/lib/io.c new file mode 100644 index 000000000000..7c1406ff825e --- /dev/null +++ b/arch/parisc/lib/io.c | |||
@@ -0,0 +1,488 @@ | |||
1 | /* | ||
2 | * arch/parisc/lib/io.c | ||
3 | * | ||
4 | * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard | ||
5 | * Copyright (c) Randolph Chung 2001 <tausq@debian.org> | ||
6 | * | ||
7 | * IO accessing functions which shouldn't be inlined because they're too big | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <asm/io.h> | ||
13 | |||
14 | /* Copies a block of memory to a device in an efficient manner. | ||
15 | * Assumes the device can cope with 32-bit transfers. If it can't, | ||
16 | * don't use this function. | ||
17 | */ | ||
18 | void memcpy_toio(volatile void __iomem *dst, const void *src, int count) | ||
19 | { | ||
20 | if (((unsigned long)dst & 3) != ((unsigned long)src & 3)) | ||
21 | goto bytecopy; | ||
22 | while ((unsigned long)dst & 3) { | ||
23 | writeb(*(char *)src, dst++); | ||
24 | src++; | ||
25 | count--; | ||
26 | } | ||
27 | while (count > 3) { | ||
28 | __raw_writel(*(u32 *)src, dst); | ||
29 | src += 4; | ||
30 | dst += 4; | ||
31 | count -= 4; | ||
32 | } | ||
33 | bytecopy: | ||
34 | while (count--) { | ||
35 | writeb(*(char *)src, dst++); | ||
36 | src++; | ||
37 | } | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | ** Copies a block of memory from a device in an efficient manner. | ||
42 | ** Assumes the device can cope with 32-bit transfers. If it can't, | ||
43 | ** don't use this function. | ||
44 | ** | ||
45 | ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM: | ||
46 | ** 27341/64 = 427 cyc per int | ||
47 | ** 61311/128 = 478 cyc per short | ||
48 | ** 122637/256 = 479 cyc per byte | ||
49 | ** Ergo bus latencies dominant (not transfer size). | ||
50 | ** Minimize total number of transfers at cost of CPU cycles. | ||
51 | ** TODO: only look at src alignment and adjust the stores to dest. | ||
52 | */ | ||
53 | void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) | ||
54 | { | ||
55 | /* first compare alignment of src/dst */ | ||
56 | if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) ) | ||
57 | goto bytecopy; | ||
58 | |||
59 | if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) ) | ||
60 | goto shortcopy; | ||
61 | |||
62 | /* Then check for misaligned start address */ | ||
63 | if ((unsigned long)src & 1) { | ||
64 | *(u8 *)dst = readb(src); | ||
65 | src++; | ||
66 | dst++; | ||
67 | count--; | ||
68 | if (count < 2) goto bytecopy; | ||
69 | } | ||
70 | |||
71 | if ((unsigned long)src & 2) { | ||
72 | *(u16 *)dst = __raw_readw(src); | ||
73 | src += 2; | ||
74 | dst += 2; | ||
75 | count -= 2; | ||
76 | } | ||
77 | |||
78 | while (count > 3) { | ||
79 | *(u32 *)dst = __raw_readl(src); | ||
80 | dst += 4; | ||
81 | src += 4; | ||
82 | count -= 4; | ||
83 | } | ||
84 | |||
85 | shortcopy: | ||
86 | while (count > 1) { | ||
87 | *(u16 *)dst = __raw_readw(src); | ||
88 | src += 2; | ||
89 | dst += 2; | ||
90 | count -= 2; | ||
91 | } | ||
92 | |||
93 | bytecopy: | ||
94 | while (count--) { | ||
95 | *(char *)dst = readb(src); | ||
96 | src++; | ||
97 | dst++; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | /* Sets a block of memory on a device to a given value. | ||
102 | * Assumes the device can cope with 32-bit transfers. If it can't, | ||
103 | * don't use this function. | ||
104 | */ | ||
105 | void memset_io(volatile void __iomem *addr, unsigned char val, int count) | ||
106 | { | ||
107 | u32 val32 = (val << 24) | (val << 16) | (val << 8) | val; | ||
108 | while ((unsigned long)addr & 3) { | ||
109 | writeb(val, addr++); | ||
110 | count--; | ||
111 | } | ||
112 | while (count > 3) { | ||
113 | __raw_writel(val32, addr); | ||
114 | addr += 4; | ||
115 | count -= 4; | ||
116 | } | ||
117 | while (count--) { | ||
118 | writeb(val, addr++); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Read COUNT 8-bit bytes from port PORT into memory starting at | ||
124 | * SRC. | ||
125 | */ | ||
126 | void insb (unsigned long port, void *dst, unsigned long count) | ||
127 | { | ||
128 | unsigned char *p; | ||
129 | |||
130 | p = (unsigned char *)dst; | ||
131 | |||
132 | while (((unsigned long)p) & 0x3) { | ||
133 | if (!count) | ||
134 | return; | ||
135 | count--; | ||
136 | *p = inb(port); | ||
137 | p++; | ||
138 | } | ||
139 | |||
140 | while (count >= 4) { | ||
141 | unsigned int w; | ||
142 | count -= 4; | ||
143 | w = inb(port) << 24; | ||
144 | w |= inb(port) << 16; | ||
145 | w |= inb(port) << 8; | ||
146 | w |= inb(port); | ||
147 | *(unsigned int *) p = w; | ||
148 | p += 4; | ||
149 | } | ||
150 | |||
151 | while (count) { | ||
152 | --count; | ||
153 | *p = inb(port); | ||
154 | p++; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | |||
159 | /* | ||
160 | * Read COUNT 16-bit words from port PORT into memory starting at | ||
161 | * SRC. SRC must be at least short aligned. This is used by the | ||
162 | * IDE driver to read disk sectors. Performance is important, but | ||
163 | * the interfaces seems to be slow: just using the inlined version | ||
164 | * of the inw() breaks things. | ||
165 | */ | ||
166 | void insw (unsigned long port, void *dst, unsigned long count) | ||
167 | { | ||
168 | unsigned int l = 0, l2; | ||
169 | unsigned char *p; | ||
170 | |||
171 | p = (unsigned char *)dst; | ||
172 | |||
173 | if (!count) | ||
174 | return; | ||
175 | |||
176 | switch (((unsigned long)p) & 0x3) | ||
177 | { | ||
178 | case 0x00: /* Buffer 32-bit aligned */ | ||
179 | while (count>=2) { | ||
180 | |||
181 | count -= 2; | ||
182 | l = cpu_to_le16(inw(port)) << 16; | ||
183 | l |= cpu_to_le16(inw(port)); | ||
184 | *(unsigned int *)p = l; | ||
185 | p += 4; | ||
186 | } | ||
187 | if (count) { | ||
188 | *(unsigned short *)p = cpu_to_le16(inw(port)); | ||
189 | } | ||
190 | break; | ||
191 | |||
192 | case 0x02: /* Buffer 16-bit aligned */ | ||
193 | *(unsigned short *)p = cpu_to_le16(inw(port)); | ||
194 | p += 2; | ||
195 | count--; | ||
196 | while (count>=2) { | ||
197 | |||
198 | count -= 2; | ||
199 | l = cpu_to_le16(inw(port)) << 16; | ||
200 | l |= cpu_to_le16(inw(port)); | ||
201 | *(unsigned int *)p = l; | ||
202 | p += 4; | ||
203 | } | ||
204 | if (count) { | ||
205 | *(unsigned short *)p = cpu_to_le16(inw(port)); | ||
206 | } | ||
207 | break; | ||
208 | |||
209 | case 0x01: /* Buffer 8-bit aligned */ | ||
210 | case 0x03: | ||
211 | /* I don't bother with 32bit transfers | ||
212 | * in this case, 16bit will have to do -- DE */ | ||
213 | --count; | ||
214 | |||
215 | l = cpu_to_le16(inw(port)); | ||
216 | *p = l >> 8; | ||
217 | p++; | ||
218 | while (count--) | ||
219 | { | ||
220 | l2 = cpu_to_le16(inw(port)); | ||
221 | *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8); | ||
222 | p += 2; | ||
223 | l = l2; | ||
224 | } | ||
225 | *p = l & 0xff; | ||
226 | break; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | |||
231 | |||
232 | /* | ||
233 | * Read COUNT 32-bit words from port PORT into memory starting at | ||
234 | * SRC. Now works with any alignment in SRC. Performance is important, | ||
235 | * but the interfaces seems to be slow: just using the inlined version | ||
236 | * of the inl() breaks things. | ||
237 | */ | ||
238 | void insl (unsigned long port, void *dst, unsigned long count) | ||
239 | { | ||
240 | unsigned int l = 0, l2; | ||
241 | unsigned char *p; | ||
242 | |||
243 | p = (unsigned char *)dst; | ||
244 | |||
245 | if (!count) | ||
246 | return; | ||
247 | |||
248 | switch (((unsigned long) dst) & 0x3) | ||
249 | { | ||
250 | case 0x00: /* Buffer 32-bit aligned */ | ||
251 | while (count--) | ||
252 | { | ||
253 | *(unsigned int *)p = cpu_to_le32(inl(port)); | ||
254 | p += 4; | ||
255 | } | ||
256 | break; | ||
257 | |||
258 | case 0x02: /* Buffer 16-bit aligned */ | ||
259 | --count; | ||
260 | |||
261 | l = cpu_to_le32(inl(port)); | ||
262 | *(unsigned short *)p = l >> 16; | ||
263 | p += 2; | ||
264 | |||
265 | while (count--) | ||
266 | { | ||
267 | l2 = cpu_to_le32(inl(port)); | ||
268 | *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16); | ||
269 | p += 4; | ||
270 | l = l2; | ||
271 | } | ||
272 | *(unsigned short *)p = l & 0xffff; | ||
273 | break; | ||
274 | case 0x01: /* Buffer 8-bit aligned */ | ||
275 | --count; | ||
276 | |||
277 | l = cpu_to_le32(inl(port)); | ||
278 | *(unsigned char *)p = l >> 24; | ||
279 | p++; | ||
280 | *(unsigned short *)p = (l >> 8) & 0xffff; | ||
281 | p += 2; | ||
282 | while (count--) | ||
283 | { | ||
284 | l2 = cpu_to_le32(inl(port)); | ||
285 | *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8); | ||
286 | p += 4; | ||
287 | l = l2; | ||
288 | } | ||
289 | *p = l & 0xff; | ||
290 | break; | ||
291 | case 0x03: /* Buffer 8-bit aligned */ | ||
292 | --count; | ||
293 | |||
294 | l = cpu_to_le32(inl(port)); | ||
295 | *p = l >> 24; | ||
296 | p++; | ||
297 | while (count--) | ||
298 | { | ||
299 | l2 = cpu_to_le32(inl(port)); | ||
300 | *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24; | ||
301 | p += 4; | ||
302 | l = l2; | ||
303 | } | ||
304 | *(unsigned short *)p = (l >> 8) & 0xffff; | ||
305 | p += 2; | ||
306 | *p = l & 0xff; | ||
307 | break; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | |||
312 | /* | ||
313 | * Like insb but in the opposite direction. | ||
314 | * Don't worry as much about doing aligned memory transfers: | ||
315 | * doing byte reads the "slow" way isn't nearly as slow as | ||
316 | * doing byte writes the slow way (no r-m-w cycle). | ||
317 | */ | ||
318 | void outsb(unsigned long port, const void * src, unsigned long count) | ||
319 | { | ||
320 | const unsigned char *p; | ||
321 | |||
322 | p = (const unsigned char *)src; | ||
323 | while (count) { | ||
324 | count--; | ||
325 | outb(*p, port); | ||
326 | p++; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Like insw but in the opposite direction. This is used by the IDE | ||
332 | * driver to write disk sectors. Performance is important, but the | ||
333 | * interfaces seems to be slow: just using the inlined version of the | ||
334 | * outw() breaks things. | ||
335 | */ | ||
336 | void outsw (unsigned long port, const void *src, unsigned long count) | ||
337 | { | ||
338 | unsigned int l = 0, l2; | ||
339 | const unsigned char *p; | ||
340 | |||
341 | p = (const unsigned char *)src; | ||
342 | |||
343 | if (!count) | ||
344 | return; | ||
345 | |||
346 | switch (((unsigned long)p) & 0x3) | ||
347 | { | ||
348 | case 0x00: /* Buffer 32-bit aligned */ | ||
349 | while (count>=2) { | ||
350 | count -= 2; | ||
351 | l = *(unsigned int *)p; | ||
352 | p += 4; | ||
353 | outw(le16_to_cpu(l >> 16), port); | ||
354 | outw(le16_to_cpu(l & 0xffff), port); | ||
355 | } | ||
356 | if (count) { | ||
357 | outw(le16_to_cpu(*(unsigned short*)p), port); | ||
358 | } | ||
359 | break; | ||
360 | |||
361 | case 0x02: /* Buffer 16-bit aligned */ | ||
362 | |||
363 | outw(le16_to_cpu(*(unsigned short*)p), port); | ||
364 | p += 2; | ||
365 | count--; | ||
366 | |||
367 | while (count>=2) { | ||
368 | count -= 2; | ||
369 | l = *(unsigned int *)p; | ||
370 | p += 4; | ||
371 | outw(le16_to_cpu(l >> 16), port); | ||
372 | outw(le16_to_cpu(l & 0xffff), port); | ||
373 | } | ||
374 | if (count) { | ||
375 | outw(le16_to_cpu(*(unsigned short *)p), port); | ||
376 | } | ||
377 | break; | ||
378 | |||
379 | case 0x01: /* Buffer 8-bit aligned */ | ||
380 | /* I don't bother with 32bit transfers | ||
381 | * in this case, 16bit will have to do -- DE */ | ||
382 | |||
383 | l = *p << 8; | ||
384 | p++; | ||
385 | count--; | ||
386 | while (count) | ||
387 | { | ||
388 | count--; | ||
389 | l2 = *(unsigned short *)p; | ||
390 | p += 2; | ||
391 | outw(le16_to_cpu(l | l2 >> 8), port); | ||
392 | l = l2 << 8; | ||
393 | } | ||
394 | l2 = *(unsigned char *)p; | ||
395 | outw (le16_to_cpu(l | l2>>8), port); | ||
396 | break; | ||
397 | |||
398 | } | ||
399 | } | ||
400 | |||
401 | |||
402 | /* | ||
403 | * Like insl but in the opposite direction. This is used by the IDE | ||
404 | * driver to write disk sectors. Works with any alignment in SRC. | ||
405 | * Performance is important, but the interfaces seems to be slow: | ||
406 | * just using the inlined version of the outl() breaks things. | ||
407 | */ | ||
408 | void outsl (unsigned long port, const void *src, unsigned long count) | ||
409 | { | ||
410 | unsigned int l = 0, l2; | ||
411 | const unsigned char *p; | ||
412 | |||
413 | p = (const unsigned char *)src; | ||
414 | |||
415 | if (!count) | ||
416 | return; | ||
417 | |||
418 | switch (((unsigned long)p) & 0x3) | ||
419 | { | ||
420 | case 0x00: /* Buffer 32-bit aligned */ | ||
421 | while (count--) | ||
422 | { | ||
423 | outl(le32_to_cpu(*(unsigned int *)p), port); | ||
424 | p += 4; | ||
425 | } | ||
426 | break; | ||
427 | |||
428 | case 0x02: /* Buffer 16-bit aligned */ | ||
429 | --count; | ||
430 | |||
431 | l = *(unsigned short *)p; | ||
432 | p += 2; | ||
433 | |||
434 | while (count--) | ||
435 | { | ||
436 | l2 = *(unsigned int *)p; | ||
437 | p += 4; | ||
438 | outl (le32_to_cpu(l << 16 | l2 >> 16), port); | ||
439 | l = l2; | ||
440 | } | ||
441 | l2 = *(unsigned short *)p; | ||
442 | outl (le32_to_cpu(l << 16 | l2), port); | ||
443 | break; | ||
444 | case 0x01: /* Buffer 8-bit aligned */ | ||
445 | --count; | ||
446 | |||
447 | l = *p << 24; | ||
448 | p++; | ||
449 | l |= *(unsigned short *)p << 8; | ||
450 | p += 2; | ||
451 | |||
452 | while (count--) | ||
453 | { | ||
454 | l2 = *(unsigned int *)p; | ||
455 | p += 4; | ||
456 | outl (le32_to_cpu(l | l2 >> 24), port); | ||
457 | l = l2 << 8; | ||
458 | } | ||
459 | l2 = *p; | ||
460 | outl (le32_to_cpu(l | l2), port); | ||
461 | break; | ||
462 | case 0x03: /* Buffer 8-bit aligned */ | ||
463 | --count; | ||
464 | |||
465 | l = *p << 24; | ||
466 | p++; | ||
467 | |||
468 | while (count--) | ||
469 | { | ||
470 | l2 = *(unsigned int *)p; | ||
471 | p += 4; | ||
472 | outl (le32_to_cpu(l | l2 >> 8), port); | ||
473 | l = l2 << 24; | ||
474 | } | ||
475 | l2 = *(unsigned short *)p << 16; | ||
476 | p += 2; | ||
477 | l2 |= *p; | ||
478 | outl (le32_to_cpu(l | l2), port); | ||
479 | break; | ||
480 | } | ||
481 | } | ||
482 | |||
483 | EXPORT_SYMBOL(insb); | ||
484 | EXPORT_SYMBOL(insw); | ||
485 | EXPORT_SYMBOL(insl); | ||
486 | EXPORT_SYMBOL(outsb); | ||
487 | EXPORT_SYMBOL(outsw); | ||
488 | EXPORT_SYMBOL(outsl); | ||
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c new file mode 100644 index 000000000000..290a62e7120b --- /dev/null +++ b/arch/parisc/lib/iomap.c | |||
@@ -0,0 +1,422 @@ | |||
1 | /* | ||
2 | * iomap.c - Implement iomap interface for PA-RISC | ||
3 | * Copyright (c) 2004 Matthew Wilcox | ||
4 | */ | ||
5 | |||
6 | #include <linux/ioport.h> | ||
7 | #include <linux/pci.h> | ||
8 | #include <asm/io.h> | ||
9 | |||
10 | /* | ||
11 | * The iomap space on 32-bit PA-RISC is intended to look like this: | ||
12 | * 00000000-7fffffff virtual mapped IO | ||
13 | * 80000000-8fffffff ISA/EISA port space that can't be virtually mapped | ||
14 | * 90000000-9fffffff Dino port space | ||
15 | * a0000000-afffffff Astro port space | ||
16 | * b0000000-bfffffff PAT port space | ||
17 | * c0000000-cfffffff non-swapped memory IO | ||
18 | * f0000000-ffffffff legacy IO memory pointers | ||
19 | * | ||
20 | * For the moment, here's what it looks like: | ||
21 | * 80000000-8fffffff All ISA/EISA port space | ||
22 | * f0000000-ffffffff legacy IO memory pointers | ||
23 | * | ||
24 | * On 64-bit, everything is extended, so: | ||
25 | * 8000000000000000-8fffffffffffffff All ISA/EISA port space | ||
26 | * f000000000000000-ffffffffffffffff legacy IO memory pointers | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * Technically, this should be 'if (VMALLOC_START < addr < VMALLOC_END), | ||
31 | * but that's slow and we know it'll be within the first 2GB. | ||
32 | */ | ||
33 | #ifdef CONFIG_64BIT | ||
34 | #define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<63) != 0) | ||
35 | #define ADDR_TO_REGION(addr) (((unsigned long)addr >> 60) & 7) | ||
36 | #define IOPORT_MAP_BASE (8UL << 60) | ||
37 | #else | ||
38 | #define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<31) != 0) | ||
39 | #define ADDR_TO_REGION(addr) (((unsigned long)addr >> 28) & 7) | ||
40 | #define IOPORT_MAP_BASE (8UL << 28) | ||
41 | #endif | ||
42 | |||
43 | struct iomap_ops { | ||
44 | unsigned int (*read8)(void __iomem *); | ||
45 | unsigned int (*read16)(void __iomem *); | ||
46 | unsigned int (*read32)(void __iomem *); | ||
47 | void (*write8)(u8, void __iomem *); | ||
48 | void (*write16)(u16, void __iomem *); | ||
49 | void (*write32)(u32, void __iomem *); | ||
50 | void (*read8r)(void __iomem *, void *, unsigned long); | ||
51 | void (*read16r)(void __iomem *, void *, unsigned long); | ||
52 | void (*read32r)(void __iomem *, void *, unsigned long); | ||
53 | void (*write8r)(void __iomem *, const void *, unsigned long); | ||
54 | void (*write16r)(void __iomem *, const void *, unsigned long); | ||
55 | void (*write32r)(void __iomem *, const void *, unsigned long); | ||
56 | }; | ||
57 | |||
58 | /* Generic ioport ops. To be replaced later by specific dino/elroy/wax code */ | ||
59 | |||
60 | #define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff) | ||
61 | |||
62 | static unsigned int ioport_read8(void __iomem *addr) | ||
63 | { | ||
64 | return inb(ADDR2PORT(addr)); | ||
65 | } | ||
66 | |||
67 | static unsigned int ioport_read16(void __iomem *addr) | ||
68 | { | ||
69 | return inw(ADDR2PORT(addr)); | ||
70 | } | ||
71 | |||
72 | static unsigned int ioport_read32(void __iomem *addr) | ||
73 | { | ||
74 | return inl(ADDR2PORT(addr)); | ||
75 | } | ||
76 | |||
77 | static void ioport_write8(u8 datum, void __iomem *addr) | ||
78 | { | ||
79 | outb(datum, ADDR2PORT(addr)); | ||
80 | } | ||
81 | |||
82 | static void ioport_write16(u16 datum, void __iomem *addr) | ||
83 | { | ||
84 | outw(datum, ADDR2PORT(addr)); | ||
85 | } | ||
86 | |||
87 | static void ioport_write32(u32 datum, void __iomem *addr) | ||
88 | { | ||
89 | outl(datum, ADDR2PORT(addr)); | ||
90 | } | ||
91 | |||
92 | static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count) | ||
93 | { | ||
94 | insb(ADDR2PORT(addr), dst, count); | ||
95 | } | ||
96 | |||
97 | static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count) | ||
98 | { | ||
99 | insw(ADDR2PORT(addr), dst, count); | ||
100 | } | ||
101 | |||
102 | static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count) | ||
103 | { | ||
104 | insl(ADDR2PORT(addr), dst, count); | ||
105 | } | ||
106 | |||
107 | static void ioport_write8r(void __iomem *addr, const void *s, unsigned long n) | ||
108 | { | ||
109 | outsb(ADDR2PORT(addr), s, n); | ||
110 | } | ||
111 | |||
112 | static void ioport_write16r(void __iomem *addr, const void *s, unsigned long n) | ||
113 | { | ||
114 | outsw(ADDR2PORT(addr), s, n); | ||
115 | } | ||
116 | |||
117 | static void ioport_write32r(void __iomem *addr, const void *s, unsigned long n) | ||
118 | { | ||
119 | outsl(ADDR2PORT(addr), s, n); | ||
120 | } | ||
121 | |||
122 | static const struct iomap_ops ioport_ops = { | ||
123 | ioport_read8, | ||
124 | ioport_read16, | ||
125 | ioport_read32, | ||
126 | ioport_write8, | ||
127 | ioport_write16, | ||
128 | ioport_write32, | ||
129 | ioport_read8r, | ||
130 | ioport_read16r, | ||
131 | ioport_read32r, | ||
132 | ioport_write8r, | ||
133 | ioport_write16r, | ||
134 | ioport_write32r, | ||
135 | }; | ||
136 | |||
137 | /* Legacy I/O memory ops */ | ||
138 | |||
139 | static unsigned int iomem_read8(void __iomem *addr) | ||
140 | { | ||
141 | return readb(addr); | ||
142 | } | ||
143 | |||
144 | static unsigned int iomem_read16(void __iomem *addr) | ||
145 | { | ||
146 | return readw(addr); | ||
147 | } | ||
148 | |||
149 | static unsigned int iomem_read32(void __iomem *addr) | ||
150 | { | ||
151 | return readl(addr); | ||
152 | } | ||
153 | |||
154 | static void iomem_write8(u8 datum, void __iomem *addr) | ||
155 | { | ||
156 | writeb(datum, addr); | ||
157 | } | ||
158 | |||
159 | static void iomem_write16(u16 datum, void __iomem *addr) | ||
160 | { | ||
161 | writew(datum, addr); | ||
162 | } | ||
163 | |||
164 | static void iomem_write32(u32 datum, void __iomem *addr) | ||
165 | { | ||
166 | writel(datum, addr); | ||
167 | } | ||
168 | |||
169 | static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) | ||
170 | { | ||
171 | while (count--) { | ||
172 | *(u8 *)dst = __raw_readb(addr); | ||
173 | dst++; | ||
174 | } | ||
175 | } | ||
176 | |||
177 | static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) | ||
178 | { | ||
179 | while (count--) { | ||
180 | *(u16 *)dst = __raw_readw(addr); | ||
181 | dst += 2; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count) | ||
186 | { | ||
187 | while (count--) { | ||
188 | *(u32 *)dst = __raw_readl(addr); | ||
189 | dst += 4; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static void iomem_write8r(void __iomem *addr, const void *s, unsigned long n) | ||
194 | { | ||
195 | while (n--) { | ||
196 | __raw_writeb(*(u8 *)s, addr); | ||
197 | s++; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | static void iomem_write16r(void __iomem *addr, const void *s, unsigned long n) | ||
202 | { | ||
203 | while (n--) { | ||
204 | __raw_writew(*(u16 *)s, addr); | ||
205 | s += 2; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | static void iomem_write32r(void __iomem *addr, const void *s, unsigned long n) | ||
210 | { | ||
211 | while (n--) { | ||
212 | __raw_writel(*(u32 *)s, addr); | ||
213 | s += 4; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | static const struct iomap_ops iomem_ops = { | ||
218 | iomem_read8, | ||
219 | iomem_read16, | ||
220 | iomem_read32, | ||
221 | iomem_write8, | ||
222 | iomem_write16, | ||
223 | iomem_write32, | ||
224 | iomem_read8r, | ||
225 | iomem_read16r, | ||
226 | iomem_read32r, | ||
227 | iomem_write8r, | ||
228 | iomem_write16r, | ||
229 | iomem_write32r, | ||
230 | }; | ||
231 | |||
232 | const struct iomap_ops *iomap_ops[8] = { | ||
233 | [0] = &ioport_ops, | ||
234 | #ifdef CONFIG_DEBUG_IOREMAP | ||
235 | [6] = &iomem_ops, | ||
236 | #else | ||
237 | [7] = &iomem_ops | ||
238 | #endif | ||
239 | }; | ||
240 | |||
241 | |||
242 | unsigned int ioread8(void __iomem *addr) | ||
243 | { | ||
244 | if (unlikely(INDIRECT_ADDR(addr))) | ||
245 | return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr); | ||
246 | return *((u8 *)addr); | ||
247 | } | ||
248 | |||
249 | unsigned int ioread16(void __iomem *addr) | ||
250 | { | ||
251 | if (unlikely(INDIRECT_ADDR(addr))) | ||
252 | return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr); | ||
253 | return le16_to_cpup((u16 *)addr); | ||
254 | } | ||
255 | |||
256 | unsigned int ioread32(void __iomem *addr) | ||
257 | { | ||
258 | if (unlikely(INDIRECT_ADDR(addr))) | ||
259 | return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr); | ||
260 | return le32_to_cpup((u32 *)addr); | ||
261 | } | ||
262 | |||
263 | void iowrite8(u8 datum, void __iomem *addr) | ||
264 | { | ||
265 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
266 | iomap_ops[ADDR_TO_REGION(addr)]->write8(datum, addr); | ||
267 | } else { | ||
268 | *((u8 *)addr) = datum; | ||
269 | } | ||
270 | } | ||
271 | |||
272 | void iowrite16(u16 datum, void __iomem *addr) | ||
273 | { | ||
274 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
275 | iomap_ops[ADDR_TO_REGION(addr)]->write16(datum, addr); | ||
276 | } else { | ||
277 | *((u16 *)addr) = cpu_to_le16(datum); | ||
278 | } | ||
279 | } | ||
280 | |||
281 | void iowrite32(u32 datum, void __iomem *addr) | ||
282 | { | ||
283 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
284 | iomap_ops[ADDR_TO_REGION(addr)]->write32(datum, addr); | ||
285 | } else { | ||
286 | *((u32 *)addr) = cpu_to_le32(datum); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | /* Repeating interfaces */ | ||
291 | |||
292 | void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) | ||
293 | { | ||
294 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
295 | iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count); | ||
296 | } else { | ||
297 | while (count--) { | ||
298 | *(u8 *)dst = *(u8 *)addr; | ||
299 | dst++; | ||
300 | } | ||
301 | } | ||
302 | } | ||
303 | |||
304 | void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) | ||
305 | { | ||
306 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
307 | iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count); | ||
308 | } else { | ||
309 | while (count--) { | ||
310 | *(u16 *)dst = *(u16 *)addr; | ||
311 | dst += 2; | ||
312 | } | ||
313 | } | ||
314 | } | ||
315 | |||
316 | void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) | ||
317 | { | ||
318 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
319 | iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count); | ||
320 | } else { | ||
321 | while (count--) { | ||
322 | *(u32 *)dst = *(u32 *)addr; | ||
323 | dst += 4; | ||
324 | } | ||
325 | } | ||
326 | } | ||
327 | |||
328 | void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) | ||
329 | { | ||
330 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
331 | iomap_ops[ADDR_TO_REGION(addr)]->write8r(addr, src, count); | ||
332 | } else { | ||
333 | while (count--) { | ||
334 | *(u8 *)addr = *(u8 *)src; | ||
335 | src++; | ||
336 | } | ||
337 | } | ||
338 | } | ||
339 | |||
340 | void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) | ||
341 | { | ||
342 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
343 | iomap_ops[ADDR_TO_REGION(addr)]->write16r(addr, src, count); | ||
344 | } else { | ||
345 | while (count--) { | ||
346 | *(u16 *)addr = *(u16 *)src; | ||
347 | src += 2; | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | |||
352 | void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) | ||
353 | { | ||
354 | if (unlikely(INDIRECT_ADDR(addr))) { | ||
355 | iomap_ops[ADDR_TO_REGION(addr)]->write32r(addr, src, count); | ||
356 | } else { | ||
357 | while (count--) { | ||
358 | *(u32 *)addr = *(u32 *)src; | ||
359 | src += 4; | ||
360 | } | ||
361 | } | ||
362 | } | ||
363 | |||
364 | /* Mapping interfaces */ | ||
365 | |||
366 | void __iomem *ioport_map(unsigned long port, unsigned int nr) | ||
367 | { | ||
368 | return (void __iomem *)(IOPORT_MAP_BASE | port); | ||
369 | } | ||
370 | |||
371 | void ioport_unmap(void __iomem *addr) | ||
372 | { | ||
373 | if (!INDIRECT_ADDR(addr)) { | ||
374 | iounmap(addr); | ||
375 | } | ||
376 | } | ||
377 | |||
378 | /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ | ||
379 | void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) | ||
380 | { | ||
381 | unsigned long start = pci_resource_start(dev, bar); | ||
382 | unsigned long len = pci_resource_len(dev, bar); | ||
383 | unsigned long flags = pci_resource_flags(dev, bar); | ||
384 | |||
385 | if (!len || !start) | ||
386 | return NULL; | ||
387 | if (maxlen && len > maxlen) | ||
388 | len = maxlen; | ||
389 | if (flags & IORESOURCE_IO) | ||
390 | return ioport_map(start, len); | ||
391 | if (flags & IORESOURCE_MEM) { | ||
392 | if (flags & IORESOURCE_CACHEABLE) | ||
393 | return ioremap(start, len); | ||
394 | return ioremap_nocache(start, len); | ||
395 | } | ||
396 | /* What? */ | ||
397 | return NULL; | ||
398 | } | ||
399 | |||
400 | void pci_iounmap(struct pci_dev *dev, void __iomem * addr) | ||
401 | { | ||
402 | if (!INDIRECT_ADDR(addr)) { | ||
403 | iounmap(addr); | ||
404 | } | ||
405 | } | ||
406 | |||
407 | EXPORT_SYMBOL(ioread8); | ||
408 | EXPORT_SYMBOL(ioread16); | ||
409 | EXPORT_SYMBOL(ioread32); | ||
410 | EXPORT_SYMBOL(iowrite8); | ||
411 | EXPORT_SYMBOL(iowrite16); | ||
412 | EXPORT_SYMBOL(iowrite32); | ||
413 | EXPORT_SYMBOL(ioread8_rep); | ||
414 | EXPORT_SYMBOL(ioread16_rep); | ||
415 | EXPORT_SYMBOL(ioread32_rep); | ||
416 | EXPORT_SYMBOL(iowrite8_rep); | ||
417 | EXPORT_SYMBOL(iowrite16_rep); | ||
418 | EXPORT_SYMBOL(iowrite32_rep); | ||
419 | EXPORT_SYMBOL(ioport_map); | ||
420 | EXPORT_SYMBOL(ioport_unmap); | ||
421 | EXPORT_SYMBOL(pci_iomap); | ||
422 | EXPORT_SYMBOL(pci_iounmap); | ||
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S new file mode 100644 index 000000000000..a0509855c9a7 --- /dev/null +++ b/arch/parisc/lib/lusercopy.S | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * User Space Access Routines | ||
3 | * | ||
4 | * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) | ||
5 | * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> | ||
6 | * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> | ||
7 | * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> | ||
8 | * | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2, or (at your option) | ||
13 | * any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * These routines still have plenty of room for optimization | ||
27 | * (word & doubleword load/store, dual issue, store hints, etc.). | ||
28 | */ | ||
29 | |||
30 | /* | ||
31 | * The following routines assume that space register 3 (sr3) contains | ||
32 | * the space id associated with the current users address space. | ||
33 | */ | ||
34 | |||
35 | |||
36 | .text | ||
37 | |||
38 | #include <asm/assembly.h> | ||
39 | #include <asm/errno.h> | ||
40 | |||
41 | /* | ||
42 | * get_sr gets the appropriate space value into | ||
43 | * sr1 for kernel/user space access, depending | ||
44 | * on the flag stored in the task structure. | ||
45 | */ | ||
46 | |||
47 | .macro get_sr | ||
48 | mfctl %cr30,%r1 | ||
49 | ldw TI_SEGMENT(%r1),%r22 | ||
50 | mfsp %sr3,%r1 | ||
51 | or,<> %r22,%r0,%r0 | ||
52 | copy %r0,%r1 | ||
53 | mtsp %r1,%sr1 | ||
54 | .endm | ||
55 | |||
56 | .macro fixup_branch lbl | ||
57 | ldil L%\lbl, %r1 | ||
58 | ldo R%\lbl(%r1), %r1 | ||
59 | bv %r0(%r1) | ||
60 | .endm | ||
61 | |||
62 | /* | ||
63 | * long lstrncpy_from_user(char *dst, const char *src, long n) | ||
64 | * | ||
65 | * Returns -EFAULT if exception before terminator, | ||
66 | * N if the entire buffer filled, | ||
67 | * otherwise strlen (i.e. excludes zero byte) | ||
68 | */ | ||
69 | |||
70 | .export lstrncpy_from_user,code | ||
71 | lstrncpy_from_user: | ||
72 | .proc | ||
73 | .callinfo NO_CALLS | ||
74 | .entry | ||
75 | comib,= 0,%r24,$lsfu_done | ||
76 | copy %r24,%r23 | ||
77 | get_sr | ||
78 | 1: ldbs,ma 1(%sr1,%r25),%r1 | ||
79 | $lsfu_loop: | ||
80 | stbs,ma %r1,1(%r26) | ||
81 | comib,=,n 0,%r1,$lsfu_done | ||
82 | addib,<>,n -1,%r24,$lsfu_loop | ||
83 | 2: ldbs,ma 1(%sr1,%r25),%r1 | ||
84 | $lsfu_done: | ||
85 | sub %r23,%r24,%r28 | ||
86 | $lsfu_exit: | ||
87 | bv %r0(%r2) | ||
88 | nop | ||
89 | .exit | ||
90 | |||
91 | .section .fixup,"ax" | ||
92 | 3: fixup_branch $lsfu_exit | ||
93 | ldi -EFAULT,%r28 | ||
94 | .previous | ||
95 | |||
96 | .section __ex_table,"aw" | ||
97 | #ifdef __LP64__ | ||
98 | .dword 1b,3b | ||
99 | .dword 2b,3b | ||
100 | #else | ||
101 | .word 1b,3b | ||
102 | .word 2b,3b | ||
103 | #endif | ||
104 | .previous | ||
105 | |||
106 | .procend | ||
107 | |||
108 | /* | ||
109 | * unsigned long lclear_user(void *to, unsigned long n) | ||
110 | * | ||
111 | * Returns 0 for success. | ||
112 | * otherwise, returns number of bytes not transferred. | ||
113 | */ | ||
114 | |||
115 | .export lclear_user,code | ||
116 | lclear_user: | ||
117 | .proc | ||
118 | .callinfo NO_CALLS | ||
119 | .entry | ||
120 | comib,=,n 0,%r25,$lclu_done | ||
121 | get_sr | ||
122 | $lclu_loop: | ||
123 | addib,<> -1,%r25,$lclu_loop | ||
124 | 1: stbs,ma %r0,1(%sr1,%r26) | ||
125 | |||
126 | $lclu_done: | ||
127 | bv %r0(%r2) | ||
128 | copy %r25,%r28 | ||
129 | .exit | ||
130 | |||
131 | .section .fixup,"ax" | ||
132 | 2: fixup_branch $lclu_done | ||
133 | ldo 1(%r25),%r25 | ||
134 | .previous | ||
135 | |||
136 | .section __ex_table,"aw" | ||
137 | #ifdef __LP64__ | ||
138 | .dword 1b,2b | ||
139 | #else | ||
140 | .word 1b,2b | ||
141 | #endif | ||
142 | .previous | ||
143 | |||
144 | .procend | ||
145 | |||
146 | /* | ||
147 | * long lstrnlen_user(char *s, long n) | ||
148 | * | ||
149 | * Returns 0 if exception before zero byte or reaching N, | ||
150 | * N+1 if N would be exceeded, | ||
151 | * else strlen + 1 (i.e. includes zero byte). | ||
152 | */ | ||
153 | |||
154 | .export lstrnlen_user,code | ||
155 | lstrnlen_user: | ||
156 | .proc | ||
157 | .callinfo NO_CALLS | ||
158 | .entry | ||
159 | comib,= 0,%r25,$lslen_nzero | ||
160 | copy %r26,%r24 | ||
161 | get_sr | ||
162 | 1: ldbs,ma 1(%sr1,%r26),%r1 | ||
163 | $lslen_loop: | ||
164 | comib,=,n 0,%r1,$lslen_done | ||
165 | addib,<> -1,%r25,$lslen_loop | ||
166 | 2: ldbs,ma 1(%sr1,%r26),%r1 | ||
167 | $lslen_done: | ||
168 | bv %r0(%r2) | ||
169 | sub %r26,%r24,%r28 | ||
170 | .exit | ||
171 | |||
172 | $lslen_nzero: | ||
173 | b $lslen_done | ||
174 | ldo 1(%r26),%r26 /* special case for N == 0 */ | ||
175 | |||
176 | .section .fixup,"ax" | ||
177 | 3: fixup_branch $lslen_done | ||
178 | copy %r24,%r26 /* reset r26 so 0 is returned on fault */ | ||
179 | .previous | ||
180 | |||
181 | .section __ex_table,"aw" | ||
182 | #ifdef __LP64__ | ||
183 | .dword 1b,3b | ||
184 | .dword 2b,3b | ||
185 | #else | ||
186 | .word 1b,3b | ||
187 | .word 2b,3b | ||
188 | #endif | ||
189 | .previous | ||
190 | |||
191 | .procend | ||
192 | |||
193 | .end | ||
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c new file mode 100644 index 000000000000..feb1b9f42c2b --- /dev/null +++ b/arch/parisc/lib/memcpy.c | |||
@@ -0,0 +1,522 @@ | |||
1 | /* | ||
2 | * Optimized memory copy routines. | ||
3 | * | ||
4 | * Copyright (C) 2004 Randolph Chung <tausq@debian.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
19 | * | ||
20 | * Portions derived from the GNU C Library | ||
21 | * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. | ||
22 | * | ||
23 | * Several strategies are tried to try to get the best performance for various | ||
24 | * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using | ||
25 | * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using | ||
26 | * general registers. Unaligned copies are handled either by aligning the | ||
27 | * destination and then using shift-and-write method, or in a few cases by | ||
28 | * falling back to a byte-at-a-time copy. | ||
29 | * | ||
30 | * I chose to implement this in C because it is easier to maintain and debug, | ||
31 | * and in my experiments it appears that the C code generated by gcc (3.3/3.4 | ||
32 | * at the time of writing) is fairly optimal. Unfortunately some of the | ||
33 | * semantics of the copy routine (exception handling) is difficult to express | ||
34 | * in C, so we have to play some tricks to get it to work. | ||
35 | * | ||
36 | * All the loads and stores are done via explicit asm() code in order to use | ||
37 | * the right space registers. | ||
38 | * | ||
39 | * Testing with various alignments and buffer sizes shows that this code is | ||
40 | * often >10x faster than a simple byte-at-a-time copy, even for strangely | ||
41 | * aligned operands. It is interesting to note that the glibc version | ||
42 | * of memcpy (written in C) is actually quite fast already. This routine is | ||
43 | * able to beat it by 30-40% for aligned copies because of the loop unrolling, | ||
44 | * but in some cases the glibc version is still slightly faster. This lends | ||
45 | * more credibility that gcc can generate very good code as long as we are | ||
46 | * careful. | ||
47 | * | ||
48 | * TODO: | ||
49 | * - cache prefetching needs more experimentation to get optimal settings | ||
50 | * - try not to use the post-increment address modifiers; they create additional | ||
51 | * interlocks | ||
52 | * - replace byte-copy loops with stybs sequences | ||
53 | */ | ||
54 | |||
55 | #ifdef __KERNEL__ | ||
56 | #include <linux/config.h> | ||
57 | #include <linux/module.h> | ||
58 | #include <linux/compiler.h> | ||
59 | #include <asm/uaccess.h> | ||
60 | #define s_space "%%sr1" | ||
61 | #define d_space "%%sr2" | ||
62 | #else | ||
63 | #include "memcpy.h" | ||
64 | #define s_space "%%sr0" | ||
65 | #define d_space "%%sr0" | ||
66 | #define pa_memcpy new2_copy | ||
67 | #endif | ||
68 | |||
69 | DECLARE_PER_CPU(struct exception_data, exception_data); | ||
70 | |||
71 | #define preserve_branch(label) do { \ | ||
72 | volatile int dummy; \ | ||
73 | /* The following branch is never taken, it's just here to */ \ | ||
74 | /* prevent gcc from optimizing away our exception code. */ \ | ||
75 | if (unlikely(dummy != dummy)) \ | ||
76 | goto label; \ | ||
77 | } while (0) | ||
78 | |||
79 | #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) | ||
80 | #define get_kernel_space() (0) | ||
81 | |||
82 | #define MERGE(w0, sh_1, w1, sh_2) ({ \ | ||
83 | unsigned int _r; \ | ||
84 | asm volatile ( \ | ||
85 | "mtsar %3\n" \ | ||
86 | "shrpw %1, %2, %%sar, %0\n" \ | ||
87 | : "=r"(_r) \ | ||
88 | : "r"(w0), "r"(w1), "r"(sh_2) \ | ||
89 | ); \ | ||
90 | _r; \ | ||
91 | }) | ||
92 | #define THRESHOLD 16 | ||
93 | |||
94 | #ifdef DEBUG_MEMCPY | ||
95 | #define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __FUNCTION__ ); printk(KERN_DEBUG fmt, ##args ); } while (0) | ||
96 | #else | ||
97 | #define DPRINTF(fmt, args...) | ||
98 | #endif | ||
99 | |||
100 | #ifndef __LP64__ | ||
101 | #define EXC_WORD ".word" | ||
102 | #else | ||
103 | #define EXC_WORD ".dword" | ||
104 | #endif | ||
105 | |||
106 | #define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ | ||
107 | __asm__ __volatile__ ( \ | ||
108 | "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n" \ | ||
109 | "\t.section __ex_table,\"aw\"\n" \ | ||
110 | "\t" EXC_WORD "\t1b\n" \ | ||
111 | "\t" EXC_WORD "\t" #_e "\n" \ | ||
112 | "\t.previous\n" \ | ||
113 | : _tt(_t), "+r"(_a) \ | ||
114 | : \ | ||
115 | : "r8") | ||
116 | |||
117 | #define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ | ||
118 | __asm__ __volatile__ ( \ | ||
119 | "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n" \ | ||
120 | "\t.section __ex_table,\"aw\"\n" \ | ||
121 | "\t" EXC_WORD "\t1b\n" \ | ||
122 | "\t" EXC_WORD "\t" #_e "\n" \ | ||
123 | "\t.previous\n" \ | ||
124 | : "+r"(_a) \ | ||
125 | : _tt(_t) \ | ||
126 | : "r8") | ||
127 | |||
128 | #define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e) | ||
129 | #define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e) | ||
130 | #define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e) | ||
131 | #define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e) | ||
132 | #define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e) | ||
133 | #define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e) | ||
134 | |||
135 | #define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \ | ||
136 | __asm__ __volatile__ ( \ | ||
137 | "1:\t" #_insn " " #_o "(" _s ",%1), %0\n" \ | ||
138 | "\t.section __ex_table,\"aw\"\n" \ | ||
139 | "\t" EXC_WORD "\t1b\n" \ | ||
140 | "\t" EXC_WORD "\t" #_e "\n" \ | ||
141 | "\t.previous\n" \ | ||
142 | : _tt(_t) \ | ||
143 | : "r"(_a) \ | ||
144 | : "r8") | ||
145 | |||
146 | #define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \ | ||
147 | __asm__ __volatile__ ( \ | ||
148 | "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n" \ | ||
149 | "\t.section __ex_table,\"aw\"\n" \ | ||
150 | "\t" EXC_WORD "\t1b\n" \ | ||
151 | "\t" EXC_WORD "\t" #_e "\n" \ | ||
152 | "\t.previous\n" \ | ||
153 | : \ | ||
154 | : _tt(_t), "r"(_a) \ | ||
155 | : "r8") | ||
156 | |||
157 | #define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e) | ||
158 | #define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e) | ||
159 | |||
160 | #ifdef CONFIG_PREFETCH | ||
161 | extern inline void prefetch_src(const void *addr) | ||
162 | { | ||
163 | __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr)); | ||
164 | } | ||
165 | |||
166 | extern inline void prefetch_dst(const void *addr) | ||
167 | { | ||
168 | __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr)); | ||
169 | } | ||
170 | #else | ||
171 | #define prefetch_src(addr) | ||
172 | #define prefetch_dst(addr) | ||
173 | #endif | ||
174 | |||
175 | /* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words | ||
176 | * per loop. This code is derived from glibc. | ||
177 | */ | ||
178 | static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len) | ||
179 | { | ||
180 | /* gcc complains that a2 and a3 may be uninitialized, but actually | ||
181 | * they cannot be. Initialize a2/a3 to shut gcc up. | ||
182 | */ | ||
183 | register unsigned int a0, a1, a2 = 0, a3 = 0; | ||
184 | int sh_1, sh_2; | ||
185 | struct exception_data *d; | ||
186 | |||
187 | /* prefetch_src((const void *)src); */ | ||
188 | |||
189 | /* Calculate how to shift a word read at the memory operation | ||
190 | aligned srcp to make it aligned for copy. */ | ||
191 | sh_1 = 8 * (src % sizeof(unsigned int)); | ||
192 | sh_2 = 8 * sizeof(unsigned int) - sh_1; | ||
193 | |||
194 | /* Make src aligned by rounding it down. */ | ||
195 | src &= -sizeof(unsigned int); | ||
196 | |||
197 | switch (len % 4) | ||
198 | { | ||
199 | case 2: | ||
200 | /* a1 = ((unsigned int *) src)[0]; | ||
201 | a2 = ((unsigned int *) src)[1]; */ | ||
202 | ldw(s_space, 0, src, a1, cda_ldw_exc); | ||
203 | ldw(s_space, 4, src, a2, cda_ldw_exc); | ||
204 | src -= 1 * sizeof(unsigned int); | ||
205 | dst -= 3 * sizeof(unsigned int); | ||
206 | len += 2; | ||
207 | goto do1; | ||
208 | case 3: | ||
209 | /* a0 = ((unsigned int *) src)[0]; | ||
210 | a1 = ((unsigned int *) src)[1]; */ | ||
211 | ldw(s_space, 0, src, a0, cda_ldw_exc); | ||
212 | ldw(s_space, 4, src, a1, cda_ldw_exc); | ||
213 | src -= 0 * sizeof(unsigned int); | ||
214 | dst -= 2 * sizeof(unsigned int); | ||
215 | len += 1; | ||
216 | goto do2; | ||
217 | case 0: | ||
218 | if (len == 0) | ||
219 | return 0; | ||
220 | /* a3 = ((unsigned int *) src)[0]; | ||
221 | a0 = ((unsigned int *) src)[1]; */ | ||
222 | ldw(s_space, 0, src, a3, cda_ldw_exc); | ||
223 | ldw(s_space, 4, src, a0, cda_ldw_exc); | ||
224 | src -=-1 * sizeof(unsigned int); | ||
225 | dst -= 1 * sizeof(unsigned int); | ||
226 | len += 0; | ||
227 | goto do3; | ||
228 | case 1: | ||
229 | /* a2 = ((unsigned int *) src)[0]; | ||
230 | a3 = ((unsigned int *) src)[1]; */ | ||
231 | ldw(s_space, 0, src, a2, cda_ldw_exc); | ||
232 | ldw(s_space, 4, src, a3, cda_ldw_exc); | ||
233 | src -=-2 * sizeof(unsigned int); | ||
234 | dst -= 0 * sizeof(unsigned int); | ||
235 | len -= 1; | ||
236 | if (len == 0) | ||
237 | goto do0; | ||
238 | goto do4; /* No-op. */ | ||
239 | } | ||
240 | |||
241 | do | ||
242 | { | ||
243 | /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */ | ||
244 | do4: | ||
245 | /* a0 = ((unsigned int *) src)[0]; */ | ||
246 | ldw(s_space, 0, src, a0, cda_ldw_exc); | ||
247 | /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ | ||
248 | stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); | ||
249 | do3: | ||
250 | /* a1 = ((unsigned int *) src)[1]; */ | ||
251 | ldw(s_space, 4, src, a1, cda_ldw_exc); | ||
252 | /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */ | ||
253 | stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc); | ||
254 | do2: | ||
255 | /* a2 = ((unsigned int *) src)[2]; */ | ||
256 | ldw(s_space, 8, src, a2, cda_ldw_exc); | ||
257 | /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */ | ||
258 | stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc); | ||
259 | do1: | ||
260 | /* a3 = ((unsigned int *) src)[3]; */ | ||
261 | ldw(s_space, 12, src, a3, cda_ldw_exc); | ||
262 | /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */ | ||
263 | stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc); | ||
264 | |||
265 | src += 4 * sizeof(unsigned int); | ||
266 | dst += 4 * sizeof(unsigned int); | ||
267 | len -= 4; | ||
268 | } | ||
269 | while (len != 0); | ||
270 | |||
271 | do0: | ||
272 | /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ | ||
273 | stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); | ||
274 | |||
275 | preserve_branch(handle_load_error); | ||
276 | preserve_branch(handle_store_error); | ||
277 | |||
278 | return 0; | ||
279 | |||
280 | handle_load_error: | ||
281 | __asm__ __volatile__ ("cda_ldw_exc:\n"); | ||
282 | d = &__get_cpu_var(exception_data); | ||
283 | DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", | ||
284 | o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); | ||
285 | return o_len * 4 - d->fault_addr + o_src; | ||
286 | |||
287 | handle_store_error: | ||
288 | __asm__ __volatile__ ("cda_stw_exc:\n"); | ||
289 | d = &__get_cpu_var(exception_data); | ||
290 | DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", | ||
291 | o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); | ||
292 | return o_len * 4 - d->fault_addr + o_dst; | ||
293 | } | ||
294 | |||
295 | |||
296 | /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ | ||
297 | unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) | ||
298 | { | ||
299 | register unsigned long src, dst, t1, t2, t3; | ||
300 | register unsigned char *pcs, *pcd; | ||
301 | register unsigned int *pws, *pwd; | ||
302 | register double *pds, *pdd; | ||
303 | unsigned long ret = 0; | ||
304 | unsigned long o_dst, o_src, o_len; | ||
305 | struct exception_data *d; | ||
306 | |||
307 | src = (unsigned long)srcp; | ||
308 | dst = (unsigned long)dstp; | ||
309 | pcs = (unsigned char *)srcp; | ||
310 | pcd = (unsigned char *)dstp; | ||
311 | |||
312 | o_dst = dst; o_src = src; o_len = len; | ||
313 | |||
314 | /* prefetch_src((const void *)srcp); */ | ||
315 | |||
316 | if (len < THRESHOLD) | ||
317 | goto byte_copy; | ||
318 | |||
319 | /* Check alignment */ | ||
320 | t1 = (src ^ dst); | ||
321 | if (unlikely(t1 & (sizeof(double)-1))) | ||
322 | goto unaligned_copy; | ||
323 | |||
324 | /* src and dst have same alignment. */ | ||
325 | |||
326 | /* Copy bytes till we are double-aligned. */ | ||
327 | t2 = src & (sizeof(double) - 1); | ||
328 | if (unlikely(t2 != 0)) { | ||
329 | t2 = sizeof(double) - t2; | ||
330 | while (t2 && len) { | ||
331 | /* *pcd++ = *pcs++; */ | ||
332 | ldbma(s_space, pcs, t3, pmc_load_exc); | ||
333 | len--; | ||
334 | stbma(d_space, t3, pcd, pmc_store_exc); | ||
335 | t2--; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | pds = (double *)pcs; | ||
340 | pdd = (double *)pcd; | ||
341 | |||
342 | /* Copy 8 doubles at a time */ | ||
343 | while (len >= 8*sizeof(double)) { | ||
344 | register double r1, r2, r3, r4, r5, r6, r7, r8; | ||
345 | /* prefetch_src((char *)pds + L1_CACHE_BYTES); */ | ||
346 | flddma(s_space, pds, r1, pmc_load_exc); | ||
347 | flddma(s_space, pds, r2, pmc_load_exc); | ||
348 | flddma(s_space, pds, r3, pmc_load_exc); | ||
349 | flddma(s_space, pds, r4, pmc_load_exc); | ||
350 | fstdma(d_space, r1, pdd, pmc_store_exc); | ||
351 | fstdma(d_space, r2, pdd, pmc_store_exc); | ||
352 | fstdma(d_space, r3, pdd, pmc_store_exc); | ||
353 | fstdma(d_space, r4, pdd, pmc_store_exc); | ||
354 | |||
355 | #if 0 | ||
356 | if (L1_CACHE_BYTES <= 32) | ||
357 | prefetch_src((char *)pds + L1_CACHE_BYTES); | ||
358 | #endif | ||
359 | flddma(s_space, pds, r5, pmc_load_exc); | ||
360 | flddma(s_space, pds, r6, pmc_load_exc); | ||
361 | flddma(s_space, pds, r7, pmc_load_exc); | ||
362 | flddma(s_space, pds, r8, pmc_load_exc); | ||
363 | fstdma(d_space, r5, pdd, pmc_store_exc); | ||
364 | fstdma(d_space, r6, pdd, pmc_store_exc); | ||
365 | fstdma(d_space, r7, pdd, pmc_store_exc); | ||
366 | fstdma(d_space, r8, pdd, pmc_store_exc); | ||
367 | len -= 8*sizeof(double); | ||
368 | } | ||
369 | |||
370 | pws = (unsigned int *)pds; | ||
371 | pwd = (unsigned int *)pdd; | ||
372 | |||
373 | word_copy: | ||
374 | while (len >= 8*sizeof(unsigned int)) { | ||
375 | register unsigned int r1,r2,r3,r4,r5,r6,r7,r8; | ||
376 | /* prefetch_src((char *)pws + L1_CACHE_BYTES); */ | ||
377 | ldwma(s_space, pws, r1, pmc_load_exc); | ||
378 | ldwma(s_space, pws, r2, pmc_load_exc); | ||
379 | ldwma(s_space, pws, r3, pmc_load_exc); | ||
380 | ldwma(s_space, pws, r4, pmc_load_exc); | ||
381 | stwma(d_space, r1, pwd, pmc_store_exc); | ||
382 | stwma(d_space, r2, pwd, pmc_store_exc); | ||
383 | stwma(d_space, r3, pwd, pmc_store_exc); | ||
384 | stwma(d_space, r4, pwd, pmc_store_exc); | ||
385 | |||
386 | ldwma(s_space, pws, r5, pmc_load_exc); | ||
387 | ldwma(s_space, pws, r6, pmc_load_exc); | ||
388 | ldwma(s_space, pws, r7, pmc_load_exc); | ||
389 | ldwma(s_space, pws, r8, pmc_load_exc); | ||
390 | stwma(d_space, r5, pwd, pmc_store_exc); | ||
391 | stwma(d_space, r6, pwd, pmc_store_exc); | ||
392 | stwma(d_space, r7, pwd, pmc_store_exc); | ||
393 | stwma(d_space, r8, pwd, pmc_store_exc); | ||
394 | len -= 8*sizeof(unsigned int); | ||
395 | } | ||
396 | |||
397 | while (len >= 4*sizeof(unsigned int)) { | ||
398 | register unsigned int r1,r2,r3,r4; | ||
399 | ldwma(s_space, pws, r1, pmc_load_exc); | ||
400 | ldwma(s_space, pws, r2, pmc_load_exc); | ||
401 | ldwma(s_space, pws, r3, pmc_load_exc); | ||
402 | ldwma(s_space, pws, r4, pmc_load_exc); | ||
403 | stwma(d_space, r1, pwd, pmc_store_exc); | ||
404 | stwma(d_space, r2, pwd, pmc_store_exc); | ||
405 | stwma(d_space, r3, pwd, pmc_store_exc); | ||
406 | stwma(d_space, r4, pwd, pmc_store_exc); | ||
407 | len -= 4*sizeof(unsigned int); | ||
408 | } | ||
409 | |||
410 | pcs = (unsigned char *)pws; | ||
411 | pcd = (unsigned char *)pwd; | ||
412 | |||
413 | byte_copy: | ||
414 | while (len) { | ||
415 | /* *pcd++ = *pcs++; */ | ||
416 | ldbma(s_space, pcs, t3, pmc_load_exc); | ||
417 | stbma(d_space, t3, pcd, pmc_store_exc); | ||
418 | len--; | ||
419 | } | ||
420 | |||
421 | return 0; | ||
422 | |||
423 | unaligned_copy: | ||
424 | /* possibly we are aligned on a word, but not on a double... */ | ||
425 | if (likely(t1 & (sizeof(unsigned int)-1)) == 0) { | ||
426 | t2 = src & (sizeof(unsigned int) - 1); | ||
427 | |||
428 | if (unlikely(t2 != 0)) { | ||
429 | t2 = sizeof(unsigned int) - t2; | ||
430 | while (t2) { | ||
431 | /* *pcd++ = *pcs++; */ | ||
432 | ldbma(s_space, pcs, t3, pmc_load_exc); | ||
433 | stbma(d_space, t3, pcd, pmc_store_exc); | ||
434 | len--; | ||
435 | t2--; | ||
436 | } | ||
437 | } | ||
438 | |||
439 | pws = (unsigned int *)pcs; | ||
440 | pwd = (unsigned int *)pcd; | ||
441 | goto word_copy; | ||
442 | } | ||
443 | |||
444 | /* Align the destination. */ | ||
445 | if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) { | ||
446 | t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1)); | ||
447 | while (t2) { | ||
448 | /* *pcd++ = *pcs++; */ | ||
449 | ldbma(s_space, pcs, t3, pmc_load_exc); | ||
450 | stbma(d_space, t3, pcd, pmc_store_exc); | ||
451 | len--; | ||
452 | t2--; | ||
453 | } | ||
454 | dst = (unsigned long)pcd; | ||
455 | src = (unsigned long)pcs; | ||
456 | } | ||
457 | |||
458 | ret = copy_dstaligned(dst, src, len / sizeof(unsigned int), | ||
459 | o_dst, o_src, o_len); | ||
460 | if (ret) | ||
461 | return ret; | ||
462 | |||
463 | pcs += (len & -sizeof(unsigned int)); | ||
464 | pcd += (len & -sizeof(unsigned int)); | ||
465 | len %= sizeof(unsigned int); | ||
466 | |||
467 | preserve_branch(handle_load_error); | ||
468 | preserve_branch(handle_store_error); | ||
469 | |||
470 | goto byte_copy; | ||
471 | |||
472 | handle_load_error: | ||
473 | __asm__ __volatile__ ("pmc_load_exc:\n"); | ||
474 | d = &__get_cpu_var(exception_data); | ||
475 | DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", | ||
476 | o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); | ||
477 | return o_len - d->fault_addr + o_src; | ||
478 | |||
479 | handle_store_error: | ||
480 | __asm__ __volatile__ ("pmc_store_exc:\n"); | ||
481 | d = &__get_cpu_var(exception_data); | ||
482 | DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", | ||
483 | o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); | ||
484 | return o_len - d->fault_addr + o_dst; | ||
485 | } | ||
486 | |||
487 | #ifdef __KERNEL__ | ||
488 | unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len) | ||
489 | { | ||
490 | mtsp(get_kernel_space(), 1); | ||
491 | mtsp(get_user_space(), 2); | ||
492 | return pa_memcpy((void __force *)dst, src, len); | ||
493 | } | ||
494 | |||
495 | unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len) | ||
496 | { | ||
497 | mtsp(get_user_space(), 1); | ||
498 | mtsp(get_kernel_space(), 2); | ||
499 | return pa_memcpy(dst, (void __force *)src, len); | ||
500 | } | ||
501 | |||
502 | unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len) | ||
503 | { | ||
504 | mtsp(get_user_space(), 1); | ||
505 | mtsp(get_user_space(), 2); | ||
506 | return pa_memcpy((void __force *)dst, (void __force *)src, len); | ||
507 | } | ||
508 | |||
509 | |||
510 | void * memcpy(void * dst,const void *src, size_t count) | ||
511 | { | ||
512 | mtsp(get_kernel_space(), 1); | ||
513 | mtsp(get_kernel_space(), 2); | ||
514 | pa_memcpy(dst, src, count); | ||
515 | return dst; | ||
516 | } | ||
517 | |||
518 | EXPORT_SYMBOL(copy_to_user); | ||
519 | EXPORT_SYMBOL(copy_from_user); | ||
520 | EXPORT_SYMBOL(copy_in_user); | ||
521 | EXPORT_SYMBOL(memcpy); | ||
522 | #endif | ||
diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c new file mode 100644 index 000000000000..1d7929bd7642 --- /dev/null +++ b/arch/parisc/lib/memset.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* Copyright (C) 1991, 1997 Free Software Foundation, Inc. | ||
2 | This file is part of the GNU C Library. | ||
3 | |||
4 | The GNU C Library is free software; you can redistribute it and/or | ||
5 | modify it under the terms of the GNU Lesser General Public | ||
6 | License as published by the Free Software Foundation; either | ||
7 | version 2.1 of the License, or (at your option) any later version. | ||
8 | |||
9 | The GNU C Library is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | Lesser General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU Lesser General Public | ||
15 | License along with the GNU C Library; if not, write to the Free | ||
16 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
17 | 02111-1307 USA. */ | ||
18 | |||
19 | /* Slight modifications for pa-risc linux - Paul Bame <bame@debian.org> */ | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | #include <asm/string.h> | ||
23 | |||
24 | #define OPSIZ (BITS_PER_LONG/8) | ||
25 | typedef unsigned long op_t; | ||
26 | |||
27 | void * | ||
28 | memset (void *dstpp, int sc, size_t len) | ||
29 | { | ||
30 | unsigned int c = sc; | ||
31 | long int dstp = (long int) dstpp; | ||
32 | |||
33 | if (len >= 8) | ||
34 | { | ||
35 | size_t xlen; | ||
36 | op_t cccc; | ||
37 | |||
38 | cccc = (unsigned char) c; | ||
39 | cccc |= cccc << 8; | ||
40 | cccc |= cccc << 16; | ||
41 | if (OPSIZ > 4) | ||
42 | /* Do the shift in two steps to avoid warning if long has 32 bits. */ | ||
43 | cccc |= (cccc << 16) << 16; | ||
44 | |||
45 | /* There are at least some bytes to set. | ||
46 | No need to test for LEN == 0 in this alignment loop. */ | ||
47 | while (dstp % OPSIZ != 0) | ||
48 | { | ||
49 | ((unsigned char *) dstp)[0] = c; | ||
50 | dstp += 1; | ||
51 | len -= 1; | ||
52 | } | ||
53 | |||
54 | /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ | ||
55 | xlen = len / (OPSIZ * 8); | ||
56 | while (xlen > 0) | ||
57 | { | ||
58 | ((op_t *) dstp)[0] = cccc; | ||
59 | ((op_t *) dstp)[1] = cccc; | ||
60 | ((op_t *) dstp)[2] = cccc; | ||
61 | ((op_t *) dstp)[3] = cccc; | ||
62 | ((op_t *) dstp)[4] = cccc; | ||
63 | ((op_t *) dstp)[5] = cccc; | ||
64 | ((op_t *) dstp)[6] = cccc; | ||
65 | ((op_t *) dstp)[7] = cccc; | ||
66 | dstp += 8 * OPSIZ; | ||
67 | xlen -= 1; | ||
68 | } | ||
69 | len %= OPSIZ * 8; | ||
70 | |||
71 | /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ | ||
72 | xlen = len / OPSIZ; | ||
73 | while (xlen > 0) | ||
74 | { | ||
75 | ((op_t *) dstp)[0] = cccc; | ||
76 | dstp += OPSIZ; | ||
77 | xlen -= 1; | ||
78 | } | ||
79 | len %= OPSIZ; | ||
80 | } | ||
81 | |||
82 | /* Write the last few bytes. */ | ||
83 | while (len > 0) | ||
84 | { | ||
85 | ((unsigned char *) dstp)[0] = c; | ||
86 | dstp += 1; | ||
87 | len -= 1; | ||
88 | } | ||
89 | |||
90 | return dstpp; | ||
91 | } | ||