aboutsummaryrefslogtreecommitdiffstats
path: root/arch/parisc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/parisc/lib')
-rw-r--r--arch/parisc/lib/Makefile9
-rw-r--r--arch/parisc/lib/bitops.c84
-rw-r--r--arch/parisc/lib/checksum.c148
-rw-r--r--arch/parisc/lib/debuglocks.c277
-rw-r--r--arch/parisc/lib/fixup.S89
-rw-r--r--arch/parisc/lib/io.c488
-rw-r--r--arch/parisc/lib/iomap.c422
-rw-r--r--arch/parisc/lib/lusercopy.S193
-rw-r--r--arch/parisc/lib/memcpy.c522
-rw-r--r--arch/parisc/lib/memset.c91
10 files changed, 2323 insertions, 0 deletions
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
new file mode 100644
index 000000000000..7bf705676297
--- /dev/null
+++ b/arch/parisc/lib/Makefile
@@ -0,0 +1,9 @@
1#
2# Makefile for parisc-specific library files
3#
4
5lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o
6
7obj-y := iomap.o
8
9lib-$(CONFIG_SMP) += debuglocks.o
diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c
new file mode 100644
index 000000000000..2de182f6fe8a
--- /dev/null
+++ b/arch/parisc/lib/bitops.c
@@ -0,0 +1,84 @@
1/*
2 * bitops.c: atomic operations which got too long to be inlined all over
3 * the place.
4 *
5 * Copyright 1999 Philipp Rumpf (prumpf@tux.org)
6 * Copyright 2000 Grant Grundler (grundler@cup.hp.com)
7 */
8
9#include <linux/config.h>
10#include <linux/kernel.h>
11#include <linux/spinlock.h>
12#include <asm/system.h>
13#include <asm/atomic.h>
14
15#ifdef CONFIG_SMP
16spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
17 [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
18};
19#endif
20
21#ifdef __LP64__
22unsigned long __xchg64(unsigned long x, unsigned long *ptr)
23{
24 unsigned long temp, flags;
25
26 _atomic_spin_lock_irqsave(ptr, flags);
27 temp = *ptr;
28 *ptr = x;
29 _atomic_spin_unlock_irqrestore(ptr, flags);
30 return temp;
31}
32#endif
33
34unsigned long __xchg32(int x, int *ptr)
35{
36 unsigned long flags;
37 long temp;
38
39 _atomic_spin_lock_irqsave(ptr, flags);
40 temp = (long) *ptr; /* XXX - sign extension wanted? */
41 *ptr = x;
42 _atomic_spin_unlock_irqrestore(ptr, flags);
43 return (unsigned long)temp;
44}
45
46
47unsigned long __xchg8(char x, char *ptr)
48{
49 unsigned long flags;
50 long temp;
51
52 _atomic_spin_lock_irqsave(ptr, flags);
53 temp = (long) *ptr; /* XXX - sign extension wanted? */
54 *ptr = x;
55 _atomic_spin_unlock_irqrestore(ptr, flags);
56 return (unsigned long)temp;
57}
58
59
60#ifdef __LP64__
61unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new)
62{
63 unsigned long flags;
64 unsigned long prev;
65
66 _atomic_spin_lock_irqsave(ptr, flags);
67 if ((prev = *ptr) == old)
68 *ptr = new;
69 _atomic_spin_unlock_irqrestore(ptr, flags);
70 return prev;
71}
72#endif
73
74unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new)
75{
76 unsigned long flags;
77 unsigned int prev;
78
79 _atomic_spin_lock_irqsave(ptr, flags);
80 if ((prev = *ptr) == old)
81 *ptr = new;
82 _atomic_spin_unlock_irqrestore(ptr, flags);
83 return (unsigned long)prev;
84}
diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
new file mode 100644
index 000000000000..8a1e08068e7d
--- /dev/null
+++ b/arch/parisc/lib/checksum.c
@@ -0,0 +1,148 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * MIPS specific IP/TCP/UDP checksumming routines
7 *
8 * Authors: Ralf Baechle, <ralf@waldorf-gmbh.de>
9 * Lots of code moved from tcp.c and ip.c; see those files
10 * for more names.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * $Id: checksum.c,v 1.3 1997/12/01 17:57:34 ralf Exp $
18 */
19#include <linux/module.h>
20#include <linux/types.h>
21
22#include <net/checksum.h>
23#include <asm/byteorder.h>
24#include <asm/string.h>
25#include <asm/uaccess.h>
26
27#define addc(_t,_r) \
28 __asm__ __volatile__ ( \
29" add %0, %1, %0\n" \
30" addc %0, %%r0, %0\n" \
31 : "=r"(_t) \
32 : "r"(_r), "0"(_t));
33
34static inline unsigned short from32to16(unsigned int x)
35{
36 /* 32 bits --> 16 bits + carry */
37 x = (x & 0xffff) + (x >> 16);
38 /* 16 bits + carry --> 16 bits including carry */
39 x = (x & 0xffff) + (x >> 16);
40 return (unsigned short)x;
41}
42
43static inline unsigned int do_csum(const unsigned char * buff, int len)
44{
45 int odd, count;
46 unsigned int result = 0;
47
48 if (len <= 0)
49 goto out;
50 odd = 1 & (unsigned long) buff;
51 if (odd) {
52 result = be16_to_cpu(*buff);
53 len--;
54 buff++;
55 }
56 count = len >> 1; /* nr of 16-bit words.. */
57 if (count) {
58 if (2 & (unsigned long) buff) {
59 result += *(unsigned short *) buff;
60 count--;
61 len -= 2;
62 buff += 2;
63 }
64 count >>= 1; /* nr of 32-bit words.. */
65 if (count) {
66 while (count >= 4) {
67 unsigned int r1, r2, r3, r4;
68 r1 = *(unsigned int *)(buff + 0);
69 r2 = *(unsigned int *)(buff + 4);
70 r3 = *(unsigned int *)(buff + 8);
71 r4 = *(unsigned int *)(buff + 12);
72 addc(result, r1);
73 addc(result, r2);
74 addc(result, r3);
75 addc(result, r4);
76 count -= 4;
77 buff += 16;
78 }
79 while (count) {
80 unsigned int w = *(unsigned int *) buff;
81 count--;
82 buff += 4;
83 addc(result, w);
84 }
85 result = (result & 0xffff) + (result >> 16);
86 }
87 if (len & 2) {
88 result += *(unsigned short *) buff;
89 buff += 2;
90 }
91 }
92 if (len & 1)
93 result += le16_to_cpu(*buff);
94 result = from32to16(result);
95 if (odd)
96 result = swab16(result);
97out:
98 return result;
99}
100
101/*
102 * computes a partial checksum, e.g. for TCP/UDP fragments
103 */
104unsigned int csum_partial(const unsigned char *buff, int len, unsigned int sum)
105{
106 unsigned int result = do_csum(buff, len);
107 addc(result, sum);
108 return from32to16(result);
109}
110
111EXPORT_SYMBOL(csum_partial);
112
113/*
114 * copy while checksumming, otherwise like csum_partial
115 */
116unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst,
117 int len, unsigned int sum)
118{
119 /*
120 * It's 2:30 am and I don't feel like doing it real ...
121 * This is lots slower than the real thing (tm)
122 */
123 sum = csum_partial(src, len, sum);
124 memcpy(dst, src, len);
125
126 return sum;
127}
128EXPORT_SYMBOL(csum_partial_copy_nocheck);
129
130/*
131 * Copy from userspace and compute checksum. If we catch an exception
132 * then zero the rest of the buffer.
133 */
134unsigned int csum_partial_copy_from_user(const unsigned char __user *src,
135 unsigned char *dst, int len,
136 unsigned int sum, int *err_ptr)
137{
138 int missing;
139
140 missing = copy_from_user(dst, src, len);
141 if (missing) {
142 memset(dst + len - missing, 0, missing);
143 *err_ptr = -EFAULT;
144 }
145
146 return csum_partial(dst, len, sum);
147}
148EXPORT_SYMBOL(csum_partial_copy_from_user);
diff --git a/arch/parisc/lib/debuglocks.c b/arch/parisc/lib/debuglocks.c
new file mode 100644
index 000000000000..1b33fe6e5b7a
--- /dev/null
+++ b/arch/parisc/lib/debuglocks.c
@@ -0,0 +1,277 @@
1/*
2 * Debugging versions of SMP locking primitives.
3 *
4 * Copyright (C) 2004 Thibaut VARENE <varenet@parisc-linux.org>
5 *
6 * Some code stollen from alpha & sparc64 ;)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * We use pdc_printf() throughout the file for all output messages, to avoid
23 * losing messages because of disabled interrupts. Since we're using these
24 * messages for debugging purposes, it makes sense not to send them to the
25 * linux console.
26 */
27
28
29#include <linux/config.h>
30#include <linux/kernel.h>
31#include <linux/sched.h>
32#include <linux/spinlock.h>
33#include <linux/hardirq.h> /* in_interrupt() */
34#include <asm/system.h>
35#include <asm/hardirq.h> /* in_interrupt() */
36#include <asm/pdc.h>
37
38#undef INIT_STUCK
39#define INIT_STUCK 1L << 30
40
41#ifdef CONFIG_DEBUG_SPINLOCK
42
43
44void _dbg_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
45{
46 volatile unsigned int *a;
47 long stuck = INIT_STUCK;
48 void *inline_pc = __builtin_return_address(0);
49 unsigned long started = jiffies;
50 int printed = 0;
51 int cpu = smp_processor_id();
52
53try_again:
54
55 /* Do the actual locking */
56 /* <T-Bone> ggg: we can't get stuck on the outter loop?
57 * <ggg> T-Bone: We can hit the outer loop
58 * alot if multiple CPUs are constantly racing for a lock
59 * and the backplane is NOT fair about which CPU sees
60 * the update first. But it won't hang since every failed
61 * attempt will drop us back into the inner loop and
62 * decrement `stuck'.
63 * <ggg> K-class and some of the others are NOT fair in the HW
64 * implementation so we could see false positives.
65 * But fixing the lock contention is easier than
66 * fixing the HW to be fair.
67 * <tausq> __ldcw() returns 1 if we get the lock; otherwise we
68 * spin until the value of the lock changes, or we time out.
69 */
70 mb();
71 a = __ldcw_align(lock);
72 while (stuck && (__ldcw(a) == 0))
73 while ((*a == 0) && --stuck);
74 mb();
75
76 if (unlikely(stuck <= 0)) {
77 pdc_printf(
78 "%s:%d: spin_lock(%s/%p) stuck in %s at %p(%d)"
79 " owned by %s:%d in %s at %p(%d)\n",
80 base_file, line_no, lock->module, lock,
81 current->comm, inline_pc, cpu,
82 lock->bfile, lock->bline, lock->task->comm,
83 lock->previous, lock->oncpu);
84 stuck = INIT_STUCK;
85 printed = 1;
86 goto try_again;
87 }
88
89 /* Exiting. Got the lock. */
90 lock->oncpu = cpu;
91 lock->previous = inline_pc;
92 lock->task = current;
93 lock->bfile = (char *)base_file;
94 lock->bline = line_no;
95
96 if (unlikely(printed)) {
97 pdc_printf(
98 "%s:%d: spin_lock grabbed in %s at %p(%d) %ld ticks\n",
99 base_file, line_no, current->comm, inline_pc,
100 cpu, jiffies - started);
101 }
102}
103
104void _dbg_spin_unlock(spinlock_t * lock, const char *base_file, int line_no)
105{
106 CHECK_LOCK(lock);
107 volatile unsigned int *a;
108 mb();
109 a = __ldcw_align(lock);
110 if (unlikely((*a != 0) && lock->babble)) {
111 lock->babble--;
112 pdc_printf(
113 "%s:%d: spin_unlock(%s:%p) not locked\n",
114 base_file, line_no, lock->module, lock);
115 }
116 *a = 1;
117 mb();
118}
119
120int _dbg_spin_trylock(spinlock_t * lock, const char *base_file, int line_no)
121{
122 int ret;
123 volatile unsigned int *a;
124 mb();
125 a = __ldcw_align(lock);
126 ret = (__ldcw(a) != 0);
127 mb();
128 if (ret) {
129 lock->oncpu = smp_processor_id();
130 lock->previous = __builtin_return_address(0);
131 lock->task = current;
132 } else {
133 lock->bfile = (char *)base_file;
134 lock->bline = line_no;
135 }
136 return ret;
137}
138
139#endif /* CONFIG_DEBUG_SPINLOCK */
140
141#ifdef CONFIG_DEBUG_RWLOCK
142
143/* Interrupts trouble detailed explanation, thx Grant:
144 *
145 * o writer (wants to modify data) attempts to acquire the rwlock
146 * o He gets the write lock.
147 * o Interupts are still enabled, we take an interrupt with the
148 * write still holding the lock.
149 * o interrupt handler tries to acquire the rwlock for read.
150 * o deadlock since the writer can't release it at this point.
151 *
152 * In general, any use of spinlocks that competes between "base"
153 * level and interrupt level code will risk deadlock. Interrupts
154 * need to be disabled in the base level routines to avoid it.
155 * Or more precisely, only the IRQ the base level routine
156 * is competing with for the lock. But it's more efficient/faster
157 * to just disable all interrupts on that CPU to guarantee
158 * once it gets the lock it can release it quickly too.
159 */
160
161void _dbg_write_lock(rwlock_t *rw, const char *bfile, int bline)
162{
163 void *inline_pc = __builtin_return_address(0);
164 unsigned long started = jiffies;
165 long stuck = INIT_STUCK;
166 int printed = 0;
167 int cpu = smp_processor_id();
168
169 if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */
170 pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline);
171 BUG();
172 }
173
174 /* Note: if interrupts are disabled (which is most likely), the printk
175 will never show on the console. We might need a polling method to flush
176 the dmesg buffer anyhow. */
177
178retry:
179 _raw_spin_lock(&rw->lock);
180
181 if(rw->counter != 0) {
182 /* this basically never happens */
183 _raw_spin_unlock(&rw->lock);
184
185 stuck--;
186 if ((unlikely(stuck <= 0)) && (rw->counter < 0)) {
187 pdc_printf(
188 "%s:%d: write_lock stuck on writer"
189 " in %s at %p(%d) %ld ticks\n",
190 bfile, bline, current->comm, inline_pc,
191 cpu, jiffies - started);
192 stuck = INIT_STUCK;
193 printed = 1;
194 }
195 else if (unlikely(stuck <= 0)) {
196 pdc_printf(
197 "%s:%d: write_lock stuck on reader"
198 " in %s at %p(%d) %ld ticks\n",
199 bfile, bline, current->comm, inline_pc,
200 cpu, jiffies - started);
201 stuck = INIT_STUCK;
202 printed = 1;
203 }
204
205 while(rw->counter != 0);
206
207 goto retry;
208 }
209
210 /* got it. now leave without unlocking */
211 rw->counter = -1; /* remember we are locked */
212
213 if (unlikely(printed)) {
214 pdc_printf(
215 "%s:%d: write_lock grabbed in %s at %p(%d) %ld ticks\n",
216 bfile, bline, current->comm, inline_pc,
217 cpu, jiffies - started);
218 }
219}
220
221int _dbg_write_trylock(rwlock_t *rw, const char *bfile, int bline)
222{
223#if 0
224 void *inline_pc = __builtin_return_address(0);
225 int cpu = smp_processor_id();
226#endif
227
228 if(unlikely(in_interrupt())) { /* acquiring write lock in interrupt context, bad idea */
229 pdc_printf("write_lock caller: %s:%d, IRQs enabled,\n", bfile, bline);
230 BUG();
231 }
232
233 /* Note: if interrupts are disabled (which is most likely), the printk
234 will never show on the console. We might need a polling method to flush
235 the dmesg buffer anyhow. */
236
237 _raw_spin_lock(&rw->lock);
238
239 if(rw->counter != 0) {
240 /* this basically never happens */
241 _raw_spin_unlock(&rw->lock);
242 return 0;
243 }
244
245 /* got it. now leave without unlocking */
246 rw->counter = -1; /* remember we are locked */
247#if 0
248 pdc_printf("%s:%d: try write_lock grabbed in %s at %p(%d)\n",
249 bfile, bline, current->comm, inline_pc, cpu);
250#endif
251 return 1;
252}
253
254void _dbg_read_lock(rwlock_t * rw, const char *bfile, int bline)
255{
256#if 0
257 void *inline_pc = __builtin_return_address(0);
258 unsigned long started = jiffies;
259 int cpu = smp_processor_id();
260#endif
261 unsigned long flags;
262
263 local_irq_save(flags);
264 _raw_spin_lock(&rw->lock);
265
266 rw->counter++;
267#if 0
268 pdc_printf(
269 "%s:%d: read_lock grabbed in %s at %p(%d) %ld ticks\n",
270 bfile, bline, current->comm, inline_pc,
271 cpu, jiffies - started);
272#endif
273 _raw_spin_unlock(&rw->lock);
274 local_irq_restore(flags);
275}
276
277#endif /* CONFIG_DEBUG_RWLOCK */
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
new file mode 100644
index 000000000000..134f0cd240f5
--- /dev/null
+++ b/arch/parisc/lib/fixup.S
@@ -0,0 +1,89 @@
1/*
2 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3 *
4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * Fixup routines for kernel exception handling.
21 */
22#include <linux/config.h>
23#include <asm/offsets.h>
24#include <asm/assembly.h>
25#include <asm/errno.h>
26
27#ifdef CONFIG_SMP
28 .macro get_fault_ip t1 t2
29 addil LT%__per_cpu_offset,%r27
30 LDREG RT%__per_cpu_offset(%r1),\t1
31 /* t2 = smp_processor_id() */
32 mfctl 30,\t2
33 ldw TI_CPU(\t2),\t2
34#ifdef __LP64__
35 extrd,u \t2,63,32,\t2
36#endif
37 /* t2 = &__per_cpu_offset[smp_processor_id()]; */
38 LDREG,s \t2(\t1),\t2
39 addil LT%per_cpu__exception_data,%r27
40 LDREG RT%per_cpu__exception_data(%r1),\t1
41 /* t1 = &__get_cpu_var(exception_data) */
42 add,l \t1,\t2,\t1
43 /* t1 = t1->fault_ip */
44 LDREG EXCDATA_IP(\t1), \t1
45 .endm
46#else
47 .macro get_fault_ip t1 t2
48 /* t1 = &__get_cpu_var(exception_data) */
49 addil LT%per_cpu__exception_data,%r27
50 LDREG RT%per_cpu__exception_data(%r1),\t2
51 /* t1 = t2->fault_ip */
52 LDREG EXCDATA_IP(\t2), \t1
53 .endm
54#endif
55
56 .text
57 .section .fixup, "ax"
58
59 /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
60 .export fixup_get_user_skip_1
61fixup_get_user_skip_1:
62 get_fault_ip %r1,%r8
63 ldo 4(%r1), %r1
64 ldi -EFAULT, %r8
65 bv %r0(%r1)
66 copy %r0, %r9
67
68 .export fixup_get_user_skip_2
69fixup_get_user_skip_2:
70 get_fault_ip %r1,%r8
71 ldo 8(%r1), %r1
72 ldi -EFAULT, %r8
73 bv %r0(%r1)
74 copy %r0, %r9
75
76 /* put_user() fixups, store -EFAULT in r8 */
77 .export fixup_put_user_skip_1
78fixup_put_user_skip_1:
79 get_fault_ip %r1,%r8
80 ldo 4(%r1), %r1
81 bv %r0(%r1)
82 ldi -EFAULT, %r8
83
84 .export fixup_put_user_skip_2
85fixup_put_user_skip_2:
86 get_fault_ip %r1,%r8
87 ldo 8(%r1), %r1
88 bv %r0(%r1)
89 ldi -EFAULT, %r8
diff --git a/arch/parisc/lib/io.c b/arch/parisc/lib/io.c
new file mode 100644
index 000000000000..7c1406ff825e
--- /dev/null
+++ b/arch/parisc/lib/io.c
@@ -0,0 +1,488 @@
1/*
2 * arch/parisc/lib/io.c
3 *
4 * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
5 * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
6 *
7 * IO accessing functions which shouldn't be inlined because they're too big
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <asm/io.h>
13
14/* Copies a block of memory to a device in an efficient manner.
15 * Assumes the device can cope with 32-bit transfers. If it can't,
16 * don't use this function.
17 */
18void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
19{
20 if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
21 goto bytecopy;
22 while ((unsigned long)dst & 3) {
23 writeb(*(char *)src, dst++);
24 src++;
25 count--;
26 }
27 while (count > 3) {
28 __raw_writel(*(u32 *)src, dst);
29 src += 4;
30 dst += 4;
31 count -= 4;
32 }
33 bytecopy:
34 while (count--) {
35 writeb(*(char *)src, dst++);
36 src++;
37 }
38}
39
40/*
41** Copies a block of memory from a device in an efficient manner.
42** Assumes the device can cope with 32-bit transfers. If it can't,
43** don't use this function.
44**
45** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
46** 27341/64 = 427 cyc per int
47** 61311/128 = 478 cyc per short
48** 122637/256 = 479 cyc per byte
49** Ergo bus latencies dominant (not transfer size).
50** Minimize total number of transfers at cost of CPU cycles.
51** TODO: only look at src alignment and adjust the stores to dest.
52*/
53void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
54{
55 /* first compare alignment of src/dst */
56 if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
57 goto bytecopy;
58
59 if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
60 goto shortcopy;
61
62 /* Then check for misaligned start address */
63 if ((unsigned long)src & 1) {
64 *(u8 *)dst = readb(src);
65 src++;
66 dst++;
67 count--;
68 if (count < 2) goto bytecopy;
69 }
70
71 if ((unsigned long)src & 2) {
72 *(u16 *)dst = __raw_readw(src);
73 src += 2;
74 dst += 2;
75 count -= 2;
76 }
77
78 while (count > 3) {
79 *(u32 *)dst = __raw_readl(src);
80 dst += 4;
81 src += 4;
82 count -= 4;
83 }
84
85 shortcopy:
86 while (count > 1) {
87 *(u16 *)dst = __raw_readw(src);
88 src += 2;
89 dst += 2;
90 count -= 2;
91 }
92
93 bytecopy:
94 while (count--) {
95 *(char *)dst = readb(src);
96 src++;
97 dst++;
98 }
99}
100
101/* Sets a block of memory on a device to a given value.
102 * Assumes the device can cope with 32-bit transfers. If it can't,
103 * don't use this function.
104 */
105void memset_io(volatile void __iomem *addr, unsigned char val, int count)
106{
107 u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
108 while ((unsigned long)addr & 3) {
109 writeb(val, addr++);
110 count--;
111 }
112 while (count > 3) {
113 __raw_writel(val32, addr);
114 addr += 4;
115 count -= 4;
116 }
117 while (count--) {
118 writeb(val, addr++);
119 }
120}
121
122/*
123 * Read COUNT 8-bit bytes from port PORT into memory starting at
124 * SRC.
125 */
126void insb (unsigned long port, void *dst, unsigned long count)
127{
128 unsigned char *p;
129
130 p = (unsigned char *)dst;
131
132 while (((unsigned long)p) & 0x3) {
133 if (!count)
134 return;
135 count--;
136 *p = inb(port);
137 p++;
138 }
139
140 while (count >= 4) {
141 unsigned int w;
142 count -= 4;
143 w = inb(port) << 24;
144 w |= inb(port) << 16;
145 w |= inb(port) << 8;
146 w |= inb(port);
147 *(unsigned int *) p = w;
148 p += 4;
149 }
150
151 while (count) {
152 --count;
153 *p = inb(port);
154 p++;
155 }
156}
157
158
159/*
160 * Read COUNT 16-bit words from port PORT into memory starting at
161 * SRC. SRC must be at least short aligned. This is used by the
162 * IDE driver to read disk sectors. Performance is important, but
163 * the interfaces seems to be slow: just using the inlined version
164 * of the inw() breaks things.
165 */
166void insw (unsigned long port, void *dst, unsigned long count)
167{
168 unsigned int l = 0, l2;
169 unsigned char *p;
170
171 p = (unsigned char *)dst;
172
173 if (!count)
174 return;
175
176 switch (((unsigned long)p) & 0x3)
177 {
178 case 0x00: /* Buffer 32-bit aligned */
179 while (count>=2) {
180
181 count -= 2;
182 l = cpu_to_le16(inw(port)) << 16;
183 l |= cpu_to_le16(inw(port));
184 *(unsigned int *)p = l;
185 p += 4;
186 }
187 if (count) {
188 *(unsigned short *)p = cpu_to_le16(inw(port));
189 }
190 break;
191
192 case 0x02: /* Buffer 16-bit aligned */
193 *(unsigned short *)p = cpu_to_le16(inw(port));
194 p += 2;
195 count--;
196 while (count>=2) {
197
198 count -= 2;
199 l = cpu_to_le16(inw(port)) << 16;
200 l |= cpu_to_le16(inw(port));
201 *(unsigned int *)p = l;
202 p += 4;
203 }
204 if (count) {
205 *(unsigned short *)p = cpu_to_le16(inw(port));
206 }
207 break;
208
209 case 0x01: /* Buffer 8-bit aligned */
210 case 0x03:
211 /* I don't bother with 32bit transfers
212 * in this case, 16bit will have to do -- DE */
213 --count;
214
215 l = cpu_to_le16(inw(port));
216 *p = l >> 8;
217 p++;
218 while (count--)
219 {
220 l2 = cpu_to_le16(inw(port));
221 *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
222 p += 2;
223 l = l2;
224 }
225 *p = l & 0xff;
226 break;
227 }
228}
229
230
231
232/*
233 * Read COUNT 32-bit words from port PORT into memory starting at
234 * SRC. Now works with any alignment in SRC. Performance is important,
235 * but the interfaces seems to be slow: just using the inlined version
236 * of the inl() breaks things.
237 */
238void insl (unsigned long port, void *dst, unsigned long count)
239{
240 unsigned int l = 0, l2;
241 unsigned char *p;
242
243 p = (unsigned char *)dst;
244
245 if (!count)
246 return;
247
248 switch (((unsigned long) dst) & 0x3)
249 {
250 case 0x00: /* Buffer 32-bit aligned */
251 while (count--)
252 {
253 *(unsigned int *)p = cpu_to_le32(inl(port));
254 p += 4;
255 }
256 break;
257
258 case 0x02: /* Buffer 16-bit aligned */
259 --count;
260
261 l = cpu_to_le32(inl(port));
262 *(unsigned short *)p = l >> 16;
263 p += 2;
264
265 while (count--)
266 {
267 l2 = cpu_to_le32(inl(port));
268 *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
269 p += 4;
270 l = l2;
271 }
272 *(unsigned short *)p = l & 0xffff;
273 break;
274 case 0x01: /* Buffer 8-bit aligned */
275 --count;
276
277 l = cpu_to_le32(inl(port));
278 *(unsigned char *)p = l >> 24;
279 p++;
280 *(unsigned short *)p = (l >> 8) & 0xffff;
281 p += 2;
282 while (count--)
283 {
284 l2 = cpu_to_le32(inl(port));
285 *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
286 p += 4;
287 l = l2;
288 }
289 *p = l & 0xff;
290 break;
291 case 0x03: /* Buffer 8-bit aligned */
292 --count;
293
294 l = cpu_to_le32(inl(port));
295 *p = l >> 24;
296 p++;
297 while (count--)
298 {
299 l2 = cpu_to_le32(inl(port));
300 *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
301 p += 4;
302 l = l2;
303 }
304 *(unsigned short *)p = (l >> 8) & 0xffff;
305 p += 2;
306 *p = l & 0xff;
307 break;
308 }
309}
310
311
312/*
313 * Like insb but in the opposite direction.
314 * Don't worry as much about doing aligned memory transfers:
315 * doing byte reads the "slow" way isn't nearly as slow as
316 * doing byte writes the slow way (no r-m-w cycle).
317 */
318void outsb(unsigned long port, const void * src, unsigned long count)
319{
320 const unsigned char *p;
321
322 p = (const unsigned char *)src;
323 while (count) {
324 count--;
325 outb(*p, port);
326 p++;
327 }
328}
329
330/*
331 * Like insw but in the opposite direction. This is used by the IDE
332 * driver to write disk sectors. Performance is important, but the
333 * interfaces seems to be slow: just using the inlined version of the
334 * outw() breaks things.
335 */
336void outsw (unsigned long port, const void *src, unsigned long count)
337{
338 unsigned int l = 0, l2;
339 const unsigned char *p;
340
341 p = (const unsigned char *)src;
342
343 if (!count)
344 return;
345
346 switch (((unsigned long)p) & 0x3)
347 {
348 case 0x00: /* Buffer 32-bit aligned */
349 while (count>=2) {
350 count -= 2;
351 l = *(unsigned int *)p;
352 p += 4;
353 outw(le16_to_cpu(l >> 16), port);
354 outw(le16_to_cpu(l & 0xffff), port);
355 }
356 if (count) {
357 outw(le16_to_cpu(*(unsigned short*)p), port);
358 }
359 break;
360
361 case 0x02: /* Buffer 16-bit aligned */
362
363 outw(le16_to_cpu(*(unsigned short*)p), port);
364 p += 2;
365 count--;
366
367 while (count>=2) {
368 count -= 2;
369 l = *(unsigned int *)p;
370 p += 4;
371 outw(le16_to_cpu(l >> 16), port);
372 outw(le16_to_cpu(l & 0xffff), port);
373 }
374 if (count) {
375 outw(le16_to_cpu(*(unsigned short *)p), port);
376 }
377 break;
378
379 case 0x01: /* Buffer 8-bit aligned */
380 /* I don't bother with 32bit transfers
381 * in this case, 16bit will have to do -- DE */
382
383 l = *p << 8;
384 p++;
385 count--;
386 while (count)
387 {
388 count--;
389 l2 = *(unsigned short *)p;
390 p += 2;
391 outw(le16_to_cpu(l | l2 >> 8), port);
392 l = l2 << 8;
393 }
394 l2 = *(unsigned char *)p;
395 outw (le16_to_cpu(l | l2>>8), port);
396 break;
397
398 }
399}
400
401
402/*
403 * Like insl but in the opposite direction. This is used by the IDE
404 * driver to write disk sectors. Works with any alignment in SRC.
405 * Performance is important, but the interfaces seems to be slow:
406 * just using the inlined version of the outl() breaks things.
407 */
408void outsl (unsigned long port, const void *src, unsigned long count)
409{
410 unsigned int l = 0, l2;
411 const unsigned char *p;
412
413 p = (const unsigned char *)src;
414
415 if (!count)
416 return;
417
418 switch (((unsigned long)p) & 0x3)
419 {
420 case 0x00: /* Buffer 32-bit aligned */
421 while (count--)
422 {
423 outl(le32_to_cpu(*(unsigned int *)p), port);
424 p += 4;
425 }
426 break;
427
428 case 0x02: /* Buffer 16-bit aligned */
429 --count;
430
431 l = *(unsigned short *)p;
432 p += 2;
433
434 while (count--)
435 {
436 l2 = *(unsigned int *)p;
437 p += 4;
438 outl (le32_to_cpu(l << 16 | l2 >> 16), port);
439 l = l2;
440 }
441 l2 = *(unsigned short *)p;
442 outl (le32_to_cpu(l << 16 | l2), port);
443 break;
444 case 0x01: /* Buffer 8-bit aligned */
445 --count;
446
447 l = *p << 24;
448 p++;
449 l |= *(unsigned short *)p << 8;
450 p += 2;
451
452 while (count--)
453 {
454 l2 = *(unsigned int *)p;
455 p += 4;
456 outl (le32_to_cpu(l | l2 >> 24), port);
457 l = l2 << 8;
458 }
459 l2 = *p;
460 outl (le32_to_cpu(l | l2), port);
461 break;
462 case 0x03: /* Buffer 8-bit aligned */
463 --count;
464
465 l = *p << 24;
466 p++;
467
468 while (count--)
469 {
470 l2 = *(unsigned int *)p;
471 p += 4;
472 outl (le32_to_cpu(l | l2 >> 8), port);
473 l = l2 << 24;
474 }
475 l2 = *(unsigned short *)p << 16;
476 p += 2;
477 l2 |= *p;
478 outl (le32_to_cpu(l | l2), port);
479 break;
480 }
481}
482
483EXPORT_SYMBOL(insb);
484EXPORT_SYMBOL(insw);
485EXPORT_SYMBOL(insl);
486EXPORT_SYMBOL(outsb);
487EXPORT_SYMBOL(outsw);
488EXPORT_SYMBOL(outsl);
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
new file mode 100644
index 000000000000..290a62e7120b
--- /dev/null
+++ b/arch/parisc/lib/iomap.c
@@ -0,0 +1,422 @@
1/*
2 * iomap.c - Implement iomap interface for PA-RISC
3 * Copyright (c) 2004 Matthew Wilcox
4 */
5
6#include <linux/ioport.h>
7#include <linux/pci.h>
8#include <asm/io.h>
9
10/*
11 * The iomap space on 32-bit PA-RISC is intended to look like this:
12 * 00000000-7fffffff virtual mapped IO
13 * 80000000-8fffffff ISA/EISA port space that can't be virtually mapped
14 * 90000000-9fffffff Dino port space
15 * a0000000-afffffff Astro port space
16 * b0000000-bfffffff PAT port space
17 * c0000000-cfffffff non-swapped memory IO
18 * f0000000-ffffffff legacy IO memory pointers
19 *
20 * For the moment, here's what it looks like:
21 * 80000000-8fffffff All ISA/EISA port space
22 * f0000000-ffffffff legacy IO memory pointers
23 *
24 * On 64-bit, everything is extended, so:
25 * 8000000000000000-8fffffffffffffff All ISA/EISA port space
26 * f000000000000000-ffffffffffffffff legacy IO memory pointers
27 */
28
29/*
30 * Technically, this should be 'if (VMALLOC_START < addr < VMALLOC_END),
31 * but that's slow and we know it'll be within the first 2GB.
32 */
33#ifdef CONFIG_64BIT
34#define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<63) != 0)
35#define ADDR_TO_REGION(addr) (((unsigned long)addr >> 60) & 7)
36#define IOPORT_MAP_BASE (8UL << 60)
37#else
38#define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<31) != 0)
39#define ADDR_TO_REGION(addr) (((unsigned long)addr >> 28) & 7)
40#define IOPORT_MAP_BASE (8UL << 28)
41#endif
42
43struct iomap_ops {
44 unsigned int (*read8)(void __iomem *);
45 unsigned int (*read16)(void __iomem *);
46 unsigned int (*read32)(void __iomem *);
47 void (*write8)(u8, void __iomem *);
48 void (*write16)(u16, void __iomem *);
49 void (*write32)(u32, void __iomem *);
50 void (*read8r)(void __iomem *, void *, unsigned long);
51 void (*read16r)(void __iomem *, void *, unsigned long);
52 void (*read32r)(void __iomem *, void *, unsigned long);
53 void (*write8r)(void __iomem *, const void *, unsigned long);
54 void (*write16r)(void __iomem *, const void *, unsigned long);
55 void (*write32r)(void __iomem *, const void *, unsigned long);
56};
57
58/* Generic ioport ops. To be replaced later by specific dino/elroy/wax code */
59
60#define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff)
61
62static unsigned int ioport_read8(void __iomem *addr)
63{
64 return inb(ADDR2PORT(addr));
65}
66
67static unsigned int ioport_read16(void __iomem *addr)
68{
69 return inw(ADDR2PORT(addr));
70}
71
72static unsigned int ioport_read32(void __iomem *addr)
73{
74 return inl(ADDR2PORT(addr));
75}
76
77static void ioport_write8(u8 datum, void __iomem *addr)
78{
79 outb(datum, ADDR2PORT(addr));
80}
81
82static void ioport_write16(u16 datum, void __iomem *addr)
83{
84 outw(datum, ADDR2PORT(addr));
85}
86
87static void ioport_write32(u32 datum, void __iomem *addr)
88{
89 outl(datum, ADDR2PORT(addr));
90}
91
92static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count)
93{
94 insb(ADDR2PORT(addr), dst, count);
95}
96
97static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count)
98{
99 insw(ADDR2PORT(addr), dst, count);
100}
101
102static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count)
103{
104 insl(ADDR2PORT(addr), dst, count);
105}
106
107static void ioport_write8r(void __iomem *addr, const void *s, unsigned long n)
108{
109 outsb(ADDR2PORT(addr), s, n);
110}
111
112static void ioport_write16r(void __iomem *addr, const void *s, unsigned long n)
113{
114 outsw(ADDR2PORT(addr), s, n);
115}
116
117static void ioport_write32r(void __iomem *addr, const void *s, unsigned long n)
118{
119 outsl(ADDR2PORT(addr), s, n);
120}
121
122static const struct iomap_ops ioport_ops = {
123 ioport_read8,
124 ioport_read16,
125 ioport_read32,
126 ioport_write8,
127 ioport_write16,
128 ioport_write32,
129 ioport_read8r,
130 ioport_read16r,
131 ioport_read32r,
132 ioport_write8r,
133 ioport_write16r,
134 ioport_write32r,
135};
136
137/* Legacy I/O memory ops */
138
139static unsigned int iomem_read8(void __iomem *addr)
140{
141 return readb(addr);
142}
143
144static unsigned int iomem_read16(void __iomem *addr)
145{
146 return readw(addr);
147}
148
149static unsigned int iomem_read32(void __iomem *addr)
150{
151 return readl(addr);
152}
153
154static void iomem_write8(u8 datum, void __iomem *addr)
155{
156 writeb(datum, addr);
157}
158
159static void iomem_write16(u16 datum, void __iomem *addr)
160{
161 writew(datum, addr);
162}
163
164static void iomem_write32(u32 datum, void __iomem *addr)
165{
166 writel(datum, addr);
167}
168
169static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count)
170{
171 while (count--) {
172 *(u8 *)dst = __raw_readb(addr);
173 dst++;
174 }
175}
176
177static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count)
178{
179 while (count--) {
180 *(u16 *)dst = __raw_readw(addr);
181 dst += 2;
182 }
183}
184
185static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count)
186{
187 while (count--) {
188 *(u32 *)dst = __raw_readl(addr);
189 dst += 4;
190 }
191}
192
193static void iomem_write8r(void __iomem *addr, const void *s, unsigned long n)
194{
195 while (n--) {
196 __raw_writeb(*(u8 *)s, addr);
197 s++;
198 }
199}
200
201static void iomem_write16r(void __iomem *addr, const void *s, unsigned long n)
202{
203 while (n--) {
204 __raw_writew(*(u16 *)s, addr);
205 s += 2;
206 }
207}
208
209static void iomem_write32r(void __iomem *addr, const void *s, unsigned long n)
210{
211 while (n--) {
212 __raw_writel(*(u32 *)s, addr);
213 s += 4;
214 }
215}
216
217static const struct iomap_ops iomem_ops = {
218 iomem_read8,
219 iomem_read16,
220 iomem_read32,
221 iomem_write8,
222 iomem_write16,
223 iomem_write32,
224 iomem_read8r,
225 iomem_read16r,
226 iomem_read32r,
227 iomem_write8r,
228 iomem_write16r,
229 iomem_write32r,
230};
231
232const struct iomap_ops *iomap_ops[8] = {
233 [0] = &ioport_ops,
234#ifdef CONFIG_DEBUG_IOREMAP
235 [6] = &iomem_ops,
236#else
237 [7] = &iomem_ops
238#endif
239};
240
241
242unsigned int ioread8(void __iomem *addr)
243{
244 if (unlikely(INDIRECT_ADDR(addr)))
245 return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr);
246 return *((u8 *)addr);
247}
248
249unsigned int ioread16(void __iomem *addr)
250{
251 if (unlikely(INDIRECT_ADDR(addr)))
252 return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr);
253 return le16_to_cpup((u16 *)addr);
254}
255
256unsigned int ioread32(void __iomem *addr)
257{
258 if (unlikely(INDIRECT_ADDR(addr)))
259 return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr);
260 return le32_to_cpup((u32 *)addr);
261}
262
263void iowrite8(u8 datum, void __iomem *addr)
264{
265 if (unlikely(INDIRECT_ADDR(addr))) {
266 iomap_ops[ADDR_TO_REGION(addr)]->write8(datum, addr);
267 } else {
268 *((u8 *)addr) = datum;
269 }
270}
271
272void iowrite16(u16 datum, void __iomem *addr)
273{
274 if (unlikely(INDIRECT_ADDR(addr))) {
275 iomap_ops[ADDR_TO_REGION(addr)]->write16(datum, addr);
276 } else {
277 *((u16 *)addr) = cpu_to_le16(datum);
278 }
279}
280
281void iowrite32(u32 datum, void __iomem *addr)
282{
283 if (unlikely(INDIRECT_ADDR(addr))) {
284 iomap_ops[ADDR_TO_REGION(addr)]->write32(datum, addr);
285 } else {
286 *((u32 *)addr) = cpu_to_le32(datum);
287 }
288}
289
290/* Repeating interfaces */
291
292void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
293{
294 if (unlikely(INDIRECT_ADDR(addr))) {
295 iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count);
296 } else {
297 while (count--) {
298 *(u8 *)dst = *(u8 *)addr;
299 dst++;
300 }
301 }
302}
303
304void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
305{
306 if (unlikely(INDIRECT_ADDR(addr))) {
307 iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count);
308 } else {
309 while (count--) {
310 *(u16 *)dst = *(u16 *)addr;
311 dst += 2;
312 }
313 }
314}
315
316void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
317{
318 if (unlikely(INDIRECT_ADDR(addr))) {
319 iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count);
320 } else {
321 while (count--) {
322 *(u32 *)dst = *(u32 *)addr;
323 dst += 4;
324 }
325 }
326}
327
328void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
329{
330 if (unlikely(INDIRECT_ADDR(addr))) {
331 iomap_ops[ADDR_TO_REGION(addr)]->write8r(addr, src, count);
332 } else {
333 while (count--) {
334 *(u8 *)addr = *(u8 *)src;
335 src++;
336 }
337 }
338}
339
340void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
341{
342 if (unlikely(INDIRECT_ADDR(addr))) {
343 iomap_ops[ADDR_TO_REGION(addr)]->write16r(addr, src, count);
344 } else {
345 while (count--) {
346 *(u16 *)addr = *(u16 *)src;
347 src += 2;
348 }
349 }
350}
351
352void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
353{
354 if (unlikely(INDIRECT_ADDR(addr))) {
355 iomap_ops[ADDR_TO_REGION(addr)]->write32r(addr, src, count);
356 } else {
357 while (count--) {
358 *(u32 *)addr = *(u32 *)src;
359 src += 4;
360 }
361 }
362}
363
364/* Mapping interfaces */
365
366void __iomem *ioport_map(unsigned long port, unsigned int nr)
367{
368 return (void __iomem *)(IOPORT_MAP_BASE | port);
369}
370
371void ioport_unmap(void __iomem *addr)
372{
373 if (!INDIRECT_ADDR(addr)) {
374 iounmap(addr);
375 }
376}
377
378/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
379void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
380{
381 unsigned long start = pci_resource_start(dev, bar);
382 unsigned long len = pci_resource_len(dev, bar);
383 unsigned long flags = pci_resource_flags(dev, bar);
384
385 if (!len || !start)
386 return NULL;
387 if (maxlen && len > maxlen)
388 len = maxlen;
389 if (flags & IORESOURCE_IO)
390 return ioport_map(start, len);
391 if (flags & IORESOURCE_MEM) {
392 if (flags & IORESOURCE_CACHEABLE)
393 return ioremap(start, len);
394 return ioremap_nocache(start, len);
395 }
396 /* What? */
397 return NULL;
398}
399
400void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
401{
402 if (!INDIRECT_ADDR(addr)) {
403 iounmap(addr);
404 }
405}
406
407EXPORT_SYMBOL(ioread8);
408EXPORT_SYMBOL(ioread16);
409EXPORT_SYMBOL(ioread32);
410EXPORT_SYMBOL(iowrite8);
411EXPORT_SYMBOL(iowrite16);
412EXPORT_SYMBOL(iowrite32);
413EXPORT_SYMBOL(ioread8_rep);
414EXPORT_SYMBOL(ioread16_rep);
415EXPORT_SYMBOL(ioread32_rep);
416EXPORT_SYMBOL(iowrite8_rep);
417EXPORT_SYMBOL(iowrite16_rep);
418EXPORT_SYMBOL(iowrite32_rep);
419EXPORT_SYMBOL(ioport_map);
420EXPORT_SYMBOL(ioport_unmap);
421EXPORT_SYMBOL(pci_iomap);
422EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
new file mode 100644
index 000000000000..a0509855c9a7
--- /dev/null
+++ b/arch/parisc/lib/lusercopy.S
@@ -0,0 +1,193 @@
1/*
2 * User Space Access Routines
3 *
4 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
8 *
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25/*
26 * These routines still have plenty of room for optimization
27 * (word & doubleword load/store, dual issue, store hints, etc.).
28 */
29
30/*
31 * The following routines assume that space register 3 (sr3) contains
32 * the space id associated with the current users address space.
33 */
34
35
36 .text
37
38#include <asm/assembly.h>
39#include <asm/errno.h>
40
41 /*
42 * get_sr gets the appropriate space value into
43 * sr1 for kernel/user space access, depending
44 * on the flag stored in the task structure.
45 */
46
47 .macro get_sr
48 mfctl %cr30,%r1
49 ldw TI_SEGMENT(%r1),%r22
50 mfsp %sr3,%r1
51 or,<> %r22,%r0,%r0
52 copy %r0,%r1
53 mtsp %r1,%sr1
54 .endm
55
56 .macro fixup_branch lbl
57 ldil L%\lbl, %r1
58 ldo R%\lbl(%r1), %r1
59 bv %r0(%r1)
60 .endm
61
62 /*
63 * long lstrncpy_from_user(char *dst, const char *src, long n)
64 *
65 * Returns -EFAULT if exception before terminator,
66 * N if the entire buffer filled,
67 * otherwise strlen (i.e. excludes zero byte)
68 */
69
70 .export lstrncpy_from_user,code
71lstrncpy_from_user:
72 .proc
73 .callinfo NO_CALLS
74 .entry
75 comib,= 0,%r24,$lsfu_done
76 copy %r24,%r23
77 get_sr
781: ldbs,ma 1(%sr1,%r25),%r1
79$lsfu_loop:
80 stbs,ma %r1,1(%r26)
81 comib,=,n 0,%r1,$lsfu_done
82 addib,<>,n -1,%r24,$lsfu_loop
832: ldbs,ma 1(%sr1,%r25),%r1
84$lsfu_done:
85 sub %r23,%r24,%r28
86$lsfu_exit:
87 bv %r0(%r2)
88 nop
89 .exit
90
91 .section .fixup,"ax"
923: fixup_branch $lsfu_exit
93 ldi -EFAULT,%r28
94 .previous
95
96 .section __ex_table,"aw"
97#ifdef __LP64__
98 .dword 1b,3b
99 .dword 2b,3b
100#else
101 .word 1b,3b
102 .word 2b,3b
103#endif
104 .previous
105
106 .procend
107
108 /*
109 * unsigned long lclear_user(void *to, unsigned long n)
110 *
111 * Returns 0 for success.
112 * otherwise, returns number of bytes not transferred.
113 */
114
115 .export lclear_user,code
116lclear_user:
117 .proc
118 .callinfo NO_CALLS
119 .entry
120 comib,=,n 0,%r25,$lclu_done
121 get_sr
122$lclu_loop:
123 addib,<> -1,%r25,$lclu_loop
1241: stbs,ma %r0,1(%sr1,%r26)
125
126$lclu_done:
127 bv %r0(%r2)
128 copy %r25,%r28
129 .exit
130
131 .section .fixup,"ax"
1322: fixup_branch $lclu_done
133 ldo 1(%r25),%r25
134 .previous
135
136 .section __ex_table,"aw"
137#ifdef __LP64__
138 .dword 1b,2b
139#else
140 .word 1b,2b
141#endif
142 .previous
143
144 .procend
145
146 /*
147 * long lstrnlen_user(char *s, long n)
148 *
149 * Returns 0 if exception before zero byte or reaching N,
150 * N+1 if N would be exceeded,
151 * else strlen + 1 (i.e. includes zero byte).
152 */
153
154 .export lstrnlen_user,code
155lstrnlen_user:
156 .proc
157 .callinfo NO_CALLS
158 .entry
159 comib,= 0,%r25,$lslen_nzero
160 copy %r26,%r24
161 get_sr
1621: ldbs,ma 1(%sr1,%r26),%r1
163$lslen_loop:
164 comib,=,n 0,%r1,$lslen_done
165 addib,<> -1,%r25,$lslen_loop
1662: ldbs,ma 1(%sr1,%r26),%r1
167$lslen_done:
168 bv %r0(%r2)
169 sub %r26,%r24,%r28
170 .exit
171
172$lslen_nzero:
173 b $lslen_done
174 ldo 1(%r26),%r26 /* special case for N == 0 */
175
176 .section .fixup,"ax"
1773: fixup_branch $lslen_done
178 copy %r24,%r26 /* reset r26 so 0 is returned on fault */
179 .previous
180
181 .section __ex_table,"aw"
182#ifdef __LP64__
183 .dword 1b,3b
184 .dword 2b,3b
185#else
186 .word 1b,3b
187 .word 2b,3b
188#endif
189 .previous
190
191 .procend
192
193 .end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
new file mode 100644
index 000000000000..feb1b9f42c2b
--- /dev/null
+++ b/arch/parisc/lib/memcpy.c
@@ -0,0 +1,522 @@
1/*
2 * Optimized memory copy routines.
3 *
4 * Copyright (C) 2004 Randolph Chung <tausq@debian.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * Portions derived from the GNU C Library
21 * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
22 *
23 * Several strategies are tried to try to get the best performance for various
24 * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
25 * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
26 * general registers. Unaligned copies are handled either by aligning the
27 * destination and then using shift-and-write method, or in a few cases by
28 * falling back to a byte-at-a-time copy.
29 *
30 * I chose to implement this in C because it is easier to maintain and debug,
31 * and in my experiments it appears that the C code generated by gcc (3.3/3.4
32 * at the time of writing) is fairly optimal. Unfortunately some of the
33 * semantics of the copy routine (exception handling) is difficult to express
34 * in C, so we have to play some tricks to get it to work.
35 *
36 * All the loads and stores are done via explicit asm() code in order to use
37 * the right space registers.
38 *
39 * Testing with various alignments and buffer sizes shows that this code is
40 * often >10x faster than a simple byte-at-a-time copy, even for strangely
41 * aligned operands. It is interesting to note that the glibc version
42 * of memcpy (written in C) is actually quite fast already. This routine is
43 * able to beat it by 30-40% for aligned copies because of the loop unrolling,
44 * but in some cases the glibc version is still slightly faster. This lends
45 * more credibility that gcc can generate very good code as long as we are
46 * careful.
47 *
48 * TODO:
49 * - cache prefetching needs more experimentation to get optimal settings
50 * - try not to use the post-increment address modifiers; they create additional
51 * interlocks
52 * - replace byte-copy loops with stybs sequences
53 */
54
55#ifdef __KERNEL__
56#include <linux/config.h>
57#include <linux/module.h>
58#include <linux/compiler.h>
59#include <asm/uaccess.h>
60#define s_space "%%sr1"
61#define d_space "%%sr2"
62#else
63#include "memcpy.h"
64#define s_space "%%sr0"
65#define d_space "%%sr0"
66#define pa_memcpy new2_copy
67#endif
68
69DECLARE_PER_CPU(struct exception_data, exception_data);
70
71#define preserve_branch(label) do { \
72 volatile int dummy; \
73 /* The following branch is never taken, it's just here to */ \
74 /* prevent gcc from optimizing away our exception code. */ \
75 if (unlikely(dummy != dummy)) \
76 goto label; \
77} while (0)
78
79#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
80#define get_kernel_space() (0)
81
82#define MERGE(w0, sh_1, w1, sh_2) ({ \
83 unsigned int _r; \
84 asm volatile ( \
85 "mtsar %3\n" \
86 "shrpw %1, %2, %%sar, %0\n" \
87 : "=r"(_r) \
88 : "r"(w0), "r"(w1), "r"(sh_2) \
89 ); \
90 _r; \
91})
92#define THRESHOLD 16
93
94#ifdef DEBUG_MEMCPY
95#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __FUNCTION__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
96#else
97#define DPRINTF(fmt, args...)
98#endif
99
100#ifndef __LP64__
101#define EXC_WORD ".word"
102#else
103#define EXC_WORD ".dword"
104#endif
105
106#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
107 __asm__ __volatile__ ( \
108 "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n" \
109 "\t.section __ex_table,\"aw\"\n" \
110 "\t" EXC_WORD "\t1b\n" \
111 "\t" EXC_WORD "\t" #_e "\n" \
112 "\t.previous\n" \
113 : _tt(_t), "+r"(_a) \
114 : \
115 : "r8")
116
117#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
118 __asm__ __volatile__ ( \
119 "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n" \
120 "\t.section __ex_table,\"aw\"\n" \
121 "\t" EXC_WORD "\t1b\n" \
122 "\t" EXC_WORD "\t" #_e "\n" \
123 "\t.previous\n" \
124 : "+r"(_a) \
125 : _tt(_t) \
126 : "r8")
127
128#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
129#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
130#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
131#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
132#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
133#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
134
135#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \
136 __asm__ __volatile__ ( \
137 "1:\t" #_insn " " #_o "(" _s ",%1), %0\n" \
138 "\t.section __ex_table,\"aw\"\n" \
139 "\t" EXC_WORD "\t1b\n" \
140 "\t" EXC_WORD "\t" #_e "\n" \
141 "\t.previous\n" \
142 : _tt(_t) \
143 : "r"(_a) \
144 : "r8")
145
146#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \
147 __asm__ __volatile__ ( \
148 "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n" \
149 "\t.section __ex_table,\"aw\"\n" \
150 "\t" EXC_WORD "\t1b\n" \
151 "\t" EXC_WORD "\t" #_e "\n" \
152 "\t.previous\n" \
153 : \
154 : _tt(_t), "r"(_a) \
155 : "r8")
156
157#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
158#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e)
159
160#ifdef CONFIG_PREFETCH
161extern inline void prefetch_src(const void *addr)
162{
163 __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
164}
165
166extern inline void prefetch_dst(const void *addr)
167{
168 __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
169}
170#else
171#define prefetch_src(addr)
172#define prefetch_dst(addr)
173#endif
174
175/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
176 * per loop. This code is derived from glibc.
177 */
178static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len)
179{
180 /* gcc complains that a2 and a3 may be uninitialized, but actually
181 * they cannot be. Initialize a2/a3 to shut gcc up.
182 */
183 register unsigned int a0, a1, a2 = 0, a3 = 0;
184 int sh_1, sh_2;
185 struct exception_data *d;
186
187 /* prefetch_src((const void *)src); */
188
189 /* Calculate how to shift a word read at the memory operation
190 aligned srcp to make it aligned for copy. */
191 sh_1 = 8 * (src % sizeof(unsigned int));
192 sh_2 = 8 * sizeof(unsigned int) - sh_1;
193
194 /* Make src aligned by rounding it down. */
195 src &= -sizeof(unsigned int);
196
197 switch (len % 4)
198 {
199 case 2:
200 /* a1 = ((unsigned int *) src)[0];
201 a2 = ((unsigned int *) src)[1]; */
202 ldw(s_space, 0, src, a1, cda_ldw_exc);
203 ldw(s_space, 4, src, a2, cda_ldw_exc);
204 src -= 1 * sizeof(unsigned int);
205 dst -= 3 * sizeof(unsigned int);
206 len += 2;
207 goto do1;
208 case 3:
209 /* a0 = ((unsigned int *) src)[0];
210 a1 = ((unsigned int *) src)[1]; */
211 ldw(s_space, 0, src, a0, cda_ldw_exc);
212 ldw(s_space, 4, src, a1, cda_ldw_exc);
213 src -= 0 * sizeof(unsigned int);
214 dst -= 2 * sizeof(unsigned int);
215 len += 1;
216 goto do2;
217 case 0:
218 if (len == 0)
219 return 0;
220 /* a3 = ((unsigned int *) src)[0];
221 a0 = ((unsigned int *) src)[1]; */
222 ldw(s_space, 0, src, a3, cda_ldw_exc);
223 ldw(s_space, 4, src, a0, cda_ldw_exc);
224 src -=-1 * sizeof(unsigned int);
225 dst -= 1 * sizeof(unsigned int);
226 len += 0;
227 goto do3;
228 case 1:
229 /* a2 = ((unsigned int *) src)[0];
230 a3 = ((unsigned int *) src)[1]; */
231 ldw(s_space, 0, src, a2, cda_ldw_exc);
232 ldw(s_space, 4, src, a3, cda_ldw_exc);
233 src -=-2 * sizeof(unsigned int);
234 dst -= 0 * sizeof(unsigned int);
235 len -= 1;
236 if (len == 0)
237 goto do0;
238 goto do4; /* No-op. */
239 }
240
241 do
242 {
243 /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
244do4:
245 /* a0 = ((unsigned int *) src)[0]; */
246 ldw(s_space, 0, src, a0, cda_ldw_exc);
247 /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
248 stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
249do3:
250 /* a1 = ((unsigned int *) src)[1]; */
251 ldw(s_space, 4, src, a1, cda_ldw_exc);
252 /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
253 stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
254do2:
255 /* a2 = ((unsigned int *) src)[2]; */
256 ldw(s_space, 8, src, a2, cda_ldw_exc);
257 /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
258 stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
259do1:
260 /* a3 = ((unsigned int *) src)[3]; */
261 ldw(s_space, 12, src, a3, cda_ldw_exc);
262 /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
263 stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
264
265 src += 4 * sizeof(unsigned int);
266 dst += 4 * sizeof(unsigned int);
267 len -= 4;
268 }
269 while (len != 0);
270
271do0:
272 /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
273 stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
274
275 preserve_branch(handle_load_error);
276 preserve_branch(handle_store_error);
277
278 return 0;
279
280handle_load_error:
281 __asm__ __volatile__ ("cda_ldw_exc:\n");
282 d = &__get_cpu_var(exception_data);
283 DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
284 o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
285 return o_len * 4 - d->fault_addr + o_src;
286
287handle_store_error:
288 __asm__ __volatile__ ("cda_stw_exc:\n");
289 d = &__get_cpu_var(exception_data);
290 DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
291 o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
292 return o_len * 4 - d->fault_addr + o_dst;
293}
294
295
296/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
297unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
298{
299 register unsigned long src, dst, t1, t2, t3;
300 register unsigned char *pcs, *pcd;
301 register unsigned int *pws, *pwd;
302 register double *pds, *pdd;
303 unsigned long ret = 0;
304 unsigned long o_dst, o_src, o_len;
305 struct exception_data *d;
306
307 src = (unsigned long)srcp;
308 dst = (unsigned long)dstp;
309 pcs = (unsigned char *)srcp;
310 pcd = (unsigned char *)dstp;
311
312 o_dst = dst; o_src = src; o_len = len;
313
314 /* prefetch_src((const void *)srcp); */
315
316 if (len < THRESHOLD)
317 goto byte_copy;
318
319 /* Check alignment */
320 t1 = (src ^ dst);
321 if (unlikely(t1 & (sizeof(double)-1)))
322 goto unaligned_copy;
323
324 /* src and dst have same alignment. */
325
326 /* Copy bytes till we are double-aligned. */
327 t2 = src & (sizeof(double) - 1);
328 if (unlikely(t2 != 0)) {
329 t2 = sizeof(double) - t2;
330 while (t2 && len) {
331 /* *pcd++ = *pcs++; */
332 ldbma(s_space, pcs, t3, pmc_load_exc);
333 len--;
334 stbma(d_space, t3, pcd, pmc_store_exc);
335 t2--;
336 }
337 }
338
339 pds = (double *)pcs;
340 pdd = (double *)pcd;
341
342 /* Copy 8 doubles at a time */
343 while (len >= 8*sizeof(double)) {
344 register double r1, r2, r3, r4, r5, r6, r7, r8;
345 /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
346 flddma(s_space, pds, r1, pmc_load_exc);
347 flddma(s_space, pds, r2, pmc_load_exc);
348 flddma(s_space, pds, r3, pmc_load_exc);
349 flddma(s_space, pds, r4, pmc_load_exc);
350 fstdma(d_space, r1, pdd, pmc_store_exc);
351 fstdma(d_space, r2, pdd, pmc_store_exc);
352 fstdma(d_space, r3, pdd, pmc_store_exc);
353 fstdma(d_space, r4, pdd, pmc_store_exc);
354
355#if 0
356 if (L1_CACHE_BYTES <= 32)
357 prefetch_src((char *)pds + L1_CACHE_BYTES);
358#endif
359 flddma(s_space, pds, r5, pmc_load_exc);
360 flddma(s_space, pds, r6, pmc_load_exc);
361 flddma(s_space, pds, r7, pmc_load_exc);
362 flddma(s_space, pds, r8, pmc_load_exc);
363 fstdma(d_space, r5, pdd, pmc_store_exc);
364 fstdma(d_space, r6, pdd, pmc_store_exc);
365 fstdma(d_space, r7, pdd, pmc_store_exc);
366 fstdma(d_space, r8, pdd, pmc_store_exc);
367 len -= 8*sizeof(double);
368 }
369
370 pws = (unsigned int *)pds;
371 pwd = (unsigned int *)pdd;
372
373word_copy:
374 while (len >= 8*sizeof(unsigned int)) {
375 register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
376 /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
377 ldwma(s_space, pws, r1, pmc_load_exc);
378 ldwma(s_space, pws, r2, pmc_load_exc);
379 ldwma(s_space, pws, r3, pmc_load_exc);
380 ldwma(s_space, pws, r4, pmc_load_exc);
381 stwma(d_space, r1, pwd, pmc_store_exc);
382 stwma(d_space, r2, pwd, pmc_store_exc);
383 stwma(d_space, r3, pwd, pmc_store_exc);
384 stwma(d_space, r4, pwd, pmc_store_exc);
385
386 ldwma(s_space, pws, r5, pmc_load_exc);
387 ldwma(s_space, pws, r6, pmc_load_exc);
388 ldwma(s_space, pws, r7, pmc_load_exc);
389 ldwma(s_space, pws, r8, pmc_load_exc);
390 stwma(d_space, r5, pwd, pmc_store_exc);
391 stwma(d_space, r6, pwd, pmc_store_exc);
392 stwma(d_space, r7, pwd, pmc_store_exc);
393 stwma(d_space, r8, pwd, pmc_store_exc);
394 len -= 8*sizeof(unsigned int);
395 }
396
397 while (len >= 4*sizeof(unsigned int)) {
398 register unsigned int r1,r2,r3,r4;
399 ldwma(s_space, pws, r1, pmc_load_exc);
400 ldwma(s_space, pws, r2, pmc_load_exc);
401 ldwma(s_space, pws, r3, pmc_load_exc);
402 ldwma(s_space, pws, r4, pmc_load_exc);
403 stwma(d_space, r1, pwd, pmc_store_exc);
404 stwma(d_space, r2, pwd, pmc_store_exc);
405 stwma(d_space, r3, pwd, pmc_store_exc);
406 stwma(d_space, r4, pwd, pmc_store_exc);
407 len -= 4*sizeof(unsigned int);
408 }
409
410 pcs = (unsigned char *)pws;
411 pcd = (unsigned char *)pwd;
412
413byte_copy:
414 while (len) {
415 /* *pcd++ = *pcs++; */
416 ldbma(s_space, pcs, t3, pmc_load_exc);
417 stbma(d_space, t3, pcd, pmc_store_exc);
418 len--;
419 }
420
421 return 0;
422
423unaligned_copy:
424 /* possibly we are aligned on a word, but not on a double... */
425 if (likely(t1 & (sizeof(unsigned int)-1)) == 0) {
426 t2 = src & (sizeof(unsigned int) - 1);
427
428 if (unlikely(t2 != 0)) {
429 t2 = sizeof(unsigned int) - t2;
430 while (t2) {
431 /* *pcd++ = *pcs++; */
432 ldbma(s_space, pcs, t3, pmc_load_exc);
433 stbma(d_space, t3, pcd, pmc_store_exc);
434 len--;
435 t2--;
436 }
437 }
438
439 pws = (unsigned int *)pcs;
440 pwd = (unsigned int *)pcd;
441 goto word_copy;
442 }
443
444 /* Align the destination. */
445 if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
446 t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
447 while (t2) {
448 /* *pcd++ = *pcs++; */
449 ldbma(s_space, pcs, t3, pmc_load_exc);
450 stbma(d_space, t3, pcd, pmc_store_exc);
451 len--;
452 t2--;
453 }
454 dst = (unsigned long)pcd;
455 src = (unsigned long)pcs;
456 }
457
458 ret = copy_dstaligned(dst, src, len / sizeof(unsigned int),
459 o_dst, o_src, o_len);
460 if (ret)
461 return ret;
462
463 pcs += (len & -sizeof(unsigned int));
464 pcd += (len & -sizeof(unsigned int));
465 len %= sizeof(unsigned int);
466
467 preserve_branch(handle_load_error);
468 preserve_branch(handle_store_error);
469
470 goto byte_copy;
471
472handle_load_error:
473 __asm__ __volatile__ ("pmc_load_exc:\n");
474 d = &__get_cpu_var(exception_data);
475 DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
476 o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
477 return o_len - d->fault_addr + o_src;
478
479handle_store_error:
480 __asm__ __volatile__ ("pmc_store_exc:\n");
481 d = &__get_cpu_var(exception_data);
482 DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
483 o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
484 return o_len - d->fault_addr + o_dst;
485}
486
487#ifdef __KERNEL__
488unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len)
489{
490 mtsp(get_kernel_space(), 1);
491 mtsp(get_user_space(), 2);
492 return pa_memcpy((void __force *)dst, src, len);
493}
494
495unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len)
496{
497 mtsp(get_user_space(), 1);
498 mtsp(get_kernel_space(), 2);
499 return pa_memcpy(dst, (void __force *)src, len);
500}
501
502unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len)
503{
504 mtsp(get_user_space(), 1);
505 mtsp(get_user_space(), 2);
506 return pa_memcpy((void __force *)dst, (void __force *)src, len);
507}
508
509
510void * memcpy(void * dst,const void *src, size_t count)
511{
512 mtsp(get_kernel_space(), 1);
513 mtsp(get_kernel_space(), 2);
514 pa_memcpy(dst, src, count);
515 return dst;
516}
517
518EXPORT_SYMBOL(copy_to_user);
519EXPORT_SYMBOL(copy_from_user);
520EXPORT_SYMBOL(copy_in_user);
521EXPORT_SYMBOL(memcpy);
522#endif
diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c
new file mode 100644
index 000000000000..1d7929bd7642
--- /dev/null
+++ b/arch/parisc/lib/memset.c
@@ -0,0 +1,91 @@
1/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
18
19/* Slight modifications for pa-risc linux - Paul Bame <bame@debian.org> */
20
21#include <linux/types.h>
22#include <asm/string.h>
23
24#define OPSIZ (BITS_PER_LONG/8)
25typedef unsigned long op_t;
26
27void *
28memset (void *dstpp, int sc, size_t len)
29{
30 unsigned int c = sc;
31 long int dstp = (long int) dstpp;
32
33 if (len >= 8)
34 {
35 size_t xlen;
36 op_t cccc;
37
38 cccc = (unsigned char) c;
39 cccc |= cccc << 8;
40 cccc |= cccc << 16;
41 if (OPSIZ > 4)
42 /* Do the shift in two steps to avoid warning if long has 32 bits. */
43 cccc |= (cccc << 16) << 16;
44
45 /* There are at least some bytes to set.
46 No need to test for LEN == 0 in this alignment loop. */
47 while (dstp % OPSIZ != 0)
48 {
49 ((unsigned char *) dstp)[0] = c;
50 dstp += 1;
51 len -= 1;
52 }
53
54 /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */
55 xlen = len / (OPSIZ * 8);
56 while (xlen > 0)
57 {
58 ((op_t *) dstp)[0] = cccc;
59 ((op_t *) dstp)[1] = cccc;
60 ((op_t *) dstp)[2] = cccc;
61 ((op_t *) dstp)[3] = cccc;
62 ((op_t *) dstp)[4] = cccc;
63 ((op_t *) dstp)[5] = cccc;
64 ((op_t *) dstp)[6] = cccc;
65 ((op_t *) dstp)[7] = cccc;
66 dstp += 8 * OPSIZ;
67 xlen -= 1;
68 }
69 len %= OPSIZ * 8;
70
71 /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */
72 xlen = len / OPSIZ;
73 while (xlen > 0)
74 {
75 ((op_t *) dstp)[0] = cccc;
76 dstp += OPSIZ;
77 xlen -= 1;
78 }
79 len %= OPSIZ;
80 }
81
82 /* Write the last few bytes. */
83 while (len > 0)
84 {
85 ((unsigned char *) dstp)[0] = c;
86 dstp += 1;
87 len -= 1;
88 }
89
90 return dstpp;
91}