aboutsummaryrefslogtreecommitdiffstats
path: root/arch/alpha
diff options
context:
space:
mode:
Diffstat (limited to 'arch/alpha')
-rw-r--r--arch/alpha/Kconfig2
-rw-r--r--arch/alpha/include/asm/atomic.h4
-rw-r--r--arch/alpha/include/asm/fpu.h2
-rw-r--r--arch/alpha/include/asm/ptrace.h5
-rw-r--r--arch/alpha/include/asm/socket.h2
-rw-r--r--arch/alpha/include/asm/uaccess.h34
-rw-r--r--arch/alpha/include/asm/unistd.h4
-rw-r--r--arch/alpha/include/asm/word-at-a-time.h55
-rw-r--r--arch/alpha/kernel/alpha_ksyms.c3
-rw-r--r--arch/alpha/kernel/entry.S161
-rw-r--r--arch/alpha/kernel/osf_sys.c49
-rw-r--r--arch/alpha/kernel/process.c19
-rw-r--r--arch/alpha/kernel/systbls.S4
-rw-r--r--arch/alpha/lib/Makefile2
-rw-r--r--arch/alpha/lib/ev6-strncpy_from_user.S424
-rw-r--r--arch/alpha/lib/ev67-strlen_user.S107
-rw-r--r--arch/alpha/lib/strlen_user.S91
-rw-r--r--arch/alpha/lib/strncpy_from_user.S339
-rw-r--r--arch/alpha/mm/fault.c36
-rw-r--r--arch/alpha/oprofile/common.c1
20 files changed, 177 insertions, 1167 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index d5b9b5e645cc..9944dedee5b1 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -18,6 +18,8 @@ config ALPHA
18 select ARCH_HAVE_NMI_SAFE_CMPXCHG 18 select ARCH_HAVE_NMI_SAFE_CMPXCHG
19 select GENERIC_SMP_IDLE_THREAD 19 select GENERIC_SMP_IDLE_THREAD
20 select GENERIC_CMOS_UPDATE 20 select GENERIC_CMOS_UPDATE
21 select GENERIC_STRNCPY_FROM_USER
22 select GENERIC_STRNLEN_USER
21 help 23 help
22 The Alpha is a 64-bit general-purpose processor designed and 24 The Alpha is a 64-bit general-purpose processor designed and
23 marketed by the Digital Equipment Corporation of blessed memory, 25 marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 3bb7ffeae3bc..c2cbe4fc391c 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -14,8 +14,8 @@
14 */ 14 */
15 15
16 16
17#define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) 17#define ATOMIC_INIT(i) { (i) }
18#define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) 18#define ATOMIC64_INIT(i) { (i) }
19 19
20#define atomic_read(v) (*(volatile int *)&(v)->counter) 20#define atomic_read(v) (*(volatile int *)&(v)->counter)
21#define atomic64_read(v) (*(volatile long *)&(v)->counter) 21#define atomic64_read(v) (*(volatile long *)&(v)->counter)
diff --git a/arch/alpha/include/asm/fpu.h b/arch/alpha/include/asm/fpu.h
index db00f7885faa..e477bcd5b94a 100644
--- a/arch/alpha/include/asm/fpu.h
+++ b/arch/alpha/include/asm/fpu.h
@@ -1,7 +1,9 @@
1#ifndef __ASM_ALPHA_FPU_H 1#ifndef __ASM_ALPHA_FPU_H
2#define __ASM_ALPHA_FPU_H 2#define __ASM_ALPHA_FPU_H
3 3
4#ifdef __KERNEL__
4#include <asm/special_insns.h> 5#include <asm/special_insns.h>
6#endif
5 7
6/* 8/*
7 * Alpha floating-point control register defines: 9 * Alpha floating-point control register defines:
diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index fd698a174f26..b87755a19554 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -76,7 +76,10 @@ struct switch_stack {
76#define task_pt_regs(task) \ 76#define task_pt_regs(task) \
77 ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1) 77 ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1)
78 78
79#define force_successful_syscall_return() (task_pt_regs(current)->r0 = 0) 79#define current_pt_regs() \
80 ((struct pt_regs *) ((char *)current_thread_info() + 2*PAGE_SIZE) - 1)
81
82#define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
80 83
81#endif 84#endif
82 85
diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index dcb221a4b5be..7d2f75be932e 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -76,9 +76,11 @@
76/* Instruct lower device to use last 4-bytes of skb data as FCS */ 76/* Instruct lower device to use last 4-bytes of skb data as FCS */
77#define SO_NOFCS 43 77#define SO_NOFCS 43
78 78
79#ifdef __KERNEL__
79/* O_NONBLOCK clashes with the bits used for socket types. Therefore we 80/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
80 * have to define SOCK_NONBLOCK to a different value here. 81 * have to define SOCK_NONBLOCK to a different value here.
81 */ 82 */
82#define SOCK_NONBLOCK 0x40000000 83#define SOCK_NONBLOCK 0x40000000
84#endif /* __KERNEL__ */
83 85
84#endif /* _ASM_SOCKET_H */ 86#endif /* _ASM_SOCKET_H */
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index b49ec2f8d6e3..766fdfde2b7a 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -433,36 +433,12 @@ clear_user(void __user *to, long len)
433#undef __module_address 433#undef __module_address
434#undef __module_call 434#undef __module_call
435 435
436/* Returns: -EFAULT if exception before terminator, N if the entire 436#define user_addr_max() \
437 buffer filled, else strlen. */ 437 (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
438 438
439extern long __strncpy_from_user(char *__to, const char __user *__from, long __to_len); 439extern long strncpy_from_user(char *dest, const char __user *src, long count);
440 440extern __must_check long strlen_user(const char __user *str);
441extern inline long 441extern __must_check long strnlen_user(const char __user *str, long n);
442strncpy_from_user(char *to, const char __user *from, long n)
443{
444 long ret = -EFAULT;
445 if (__access_ok((unsigned long)from, 0, get_fs()))
446 ret = __strncpy_from_user(to, from, n);
447 return ret;
448}
449
450/* Returns: 0 if bad, string length+1 (memory size) of string if ok */
451extern long __strlen_user(const char __user *);
452
453extern inline long strlen_user(const char __user *str)
454{
455 return access_ok(VERIFY_READ,str,0) ? __strlen_user(str) : 0;
456}
457
458/* Returns: 0 if exception before NUL or reaching the supplied limit (N),
459 * a value greater than N if the limit would be exceeded, else strlen. */
460extern long __strnlen_user(const char __user *, long);
461
462extern inline long strnlen_user(const char __user *str, long n)
463{
464 return access_ok(VERIFY_READ,str,0) ? __strnlen_user(str, n) : 0;
465}
466 442
467/* 443/*
468 * About the exception table: 444 * About the exception table:
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 633b23b0664a..a31a78eac9b9 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -465,10 +465,12 @@
465#define __NR_setns 501 465#define __NR_setns 501
466#define __NR_accept4 502 466#define __NR_accept4 502
467#define __NR_sendmmsg 503 467#define __NR_sendmmsg 503
468#define __NR_process_vm_readv 504
469#define __NR_process_vm_writev 505
468 470
469#ifdef __KERNEL__ 471#ifdef __KERNEL__
470 472
471#define NR_SYSCALLS 504 473#define NR_SYSCALLS 506
472 474
473#define __ARCH_WANT_OLD_READDIR 475#define __ARCH_WANT_OLD_READDIR
474#define __ARCH_WANT_STAT64 476#define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/asm/word-at-a-time.h b/arch/alpha/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..6b340d0f1521
--- /dev/null
+++ b/arch/alpha/include/asm/word-at-a-time.h
@@ -0,0 +1,55 @@
1#ifndef _ASM_WORD_AT_A_TIME_H
2#define _ASM_WORD_AT_A_TIME_H
3
4#include <asm/compiler.h>
5
6/*
7 * word-at-a-time interface for Alpha.
8 */
9
10/*
11 * We do not use the word_at_a_time struct on Alpha, but it needs to be
12 * implemented to humour the generic code.
13 */
14struct word_at_a_time {
15 const unsigned long unused;
16};
17
18#define WORD_AT_A_TIME_CONSTANTS { 0 }
19
20/* Return nonzero if val has a zero */
21static inline unsigned long has_zero(unsigned long val, unsigned long *bits, const struct word_at_a_time *c)
22{
23 unsigned long zero_locations = __kernel_cmpbge(0, val);
24 *bits = zero_locations;
25 return zero_locations;
26}
27
28static inline unsigned long prep_zero_mask(unsigned long val, unsigned long bits, const struct word_at_a_time *c)
29{
30 return bits;
31}
32
33#define create_zero_mask(bits) (bits)
34
35static inline unsigned long find_zero(unsigned long bits)
36{
37#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
38 /* Simple if have CIX instructions */
39 return __kernel_cttz(bits);
40#else
41 unsigned long t1, t2, t3;
42 /* Retain lowest set bit only */
43 bits &= -bits;
44 /* Binary search for lowest set bit */
45 t1 = bits & 0xf0;
46 t2 = bits & 0xcc;
47 t3 = bits & 0xaa;
48 if (t1) t1 = 4;
49 if (t2) t2 = 2;
50 if (t3) t3 = 1;
51 return t1 + t2 + t3;
52#endif
53}
54
55#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index d96e742d4dc2..15fa821d09cd 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -52,7 +52,6 @@ EXPORT_SYMBOL(alpha_write_fp_reg_s);
52 52
53/* entry.S */ 53/* entry.S */
54EXPORT_SYMBOL(kernel_thread); 54EXPORT_SYMBOL(kernel_thread);
55EXPORT_SYMBOL(kernel_execve);
56 55
57/* Networking helper routines. */ 56/* Networking helper routines. */
58EXPORT_SYMBOL(csum_tcpudp_magic); 57EXPORT_SYMBOL(csum_tcpudp_magic);
@@ -74,8 +73,6 @@ EXPORT_SYMBOL(alpha_fp_emul);
74 */ 73 */
75EXPORT_SYMBOL(__copy_user); 74EXPORT_SYMBOL(__copy_user);
76EXPORT_SYMBOL(__do_clear_user); 75EXPORT_SYMBOL(__do_clear_user);
77EXPORT_SYMBOL(__strncpy_from_user);
78EXPORT_SYMBOL(__strnlen_user);
79 76
80/* 77/*
81 * SMP-specific symbols. 78 * SMP-specific symbols.
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index 6d159cee5f2f..ec0da0567ab5 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -663,58 +663,6 @@ kernel_thread:
663 br ret_to_kernel 663 br ret_to_kernel
664.end kernel_thread 664.end kernel_thread
665 665
666/*
667 * kernel_execve(path, argv, envp)
668 */
669 .align 4
670 .globl kernel_execve
671 .ent kernel_execve
672kernel_execve:
673 /* We can be called from a module. */
674 ldgp $gp, 0($27)
675 lda $sp, -(32+SIZEOF_PT_REGS+8)($sp)
676 .frame $sp, 32+SIZEOF_PT_REGS+8, $26, 0
677 stq $26, 0($sp)
678 stq $16, 8($sp)
679 stq $17, 16($sp)
680 stq $18, 24($sp)
681 .prologue 1
682
683 lda $16, 32($sp)
684 lda $17, 0
685 lda $18, SIZEOF_PT_REGS
686 bsr $26, memset !samegp
687
688 /* Avoid the HAE being gratuitously wrong, which would cause us
689 to do the whole turn off interrupts thing and restore it. */
690 ldq $2, alpha_mv+HAE_CACHE
691 stq $2, 152+32($sp)
692
693 ldq $16, 8($sp)
694 ldq $17, 16($sp)
695 ldq $18, 24($sp)
696 lda $19, 32($sp)
697 bsr $26, do_execve !samegp
698
699 ldq $26, 0($sp)
700 bne $0, 1f /* error! */
701
702 /* Move the temporary pt_regs struct from its current location
703 to the top of the kernel stack frame. See copy_thread for
704 details for a normal process. */
705 lda $16, 0x4000 - SIZEOF_PT_REGS($8)
706 lda $17, 32($sp)
707 lda $18, SIZEOF_PT_REGS
708 bsr $26, memmove !samegp
709
710 /* Take that over as our new stack frame and visit userland! */
711 lda $sp, 0x4000 - SIZEOF_PT_REGS($8)
712 br $31, ret_from_sys_call
713
7141: lda $sp, 32+SIZEOF_PT_REGS+8($sp)
715 ret
716.end kernel_execve
717
718 666
719/* 667/*
720 * Special system calls. Most of these are special in that they either 668 * Special system calls. Most of these are special in that they either
@@ -797,115 +745,6 @@ sys_rt_sigreturn:
797.end sys_rt_sigreturn 745.end sys_rt_sigreturn
798 746
799 .align 4 747 .align 4
800 .globl sys_sethae
801 .ent sys_sethae
802sys_sethae:
803 .prologue 0
804 stq $16, 152($sp)
805 ret
806.end sys_sethae
807
808 .align 4
809 .globl osf_getpriority
810 .ent osf_getpriority
811osf_getpriority:
812 lda $sp, -16($sp)
813 stq $26, 0($sp)
814 .prologue 0
815
816 jsr $26, sys_getpriority
817
818 ldq $26, 0($sp)
819 blt $0, 1f
820
821 /* Return value is the unbiased priority, i.e. 20 - prio.
822 This does result in negative return values, so signal
823 no error by writing into the R0 slot. */
824 lda $1, 20
825 stq $31, 16($sp)
826 subl $1, $0, $0
827 unop
828
8291: lda $sp, 16($sp)
830 ret
831.end osf_getpriority
832
833 .align 4
834 .globl sys_getxuid
835 .ent sys_getxuid
836sys_getxuid:
837 .prologue 0
838 ldq $2, TI_TASK($8)
839 ldq $3, TASK_CRED($2)
840 ldl $0, CRED_UID($3)
841 ldl $1, CRED_EUID($3)
842 stq $1, 80($sp)
843 ret
844.end sys_getxuid
845
846 .align 4
847 .globl sys_getxgid
848 .ent sys_getxgid
849sys_getxgid:
850 .prologue 0
851 ldq $2, TI_TASK($8)
852 ldq $3, TASK_CRED($2)
853 ldl $0, CRED_GID($3)
854 ldl $1, CRED_EGID($3)
855 stq $1, 80($sp)
856 ret
857.end sys_getxgid
858
859 .align 4
860 .globl sys_getxpid
861 .ent sys_getxpid
862sys_getxpid:
863 .prologue 0
864 ldq $2, TI_TASK($8)
865
866 /* See linux/kernel/timer.c sys_getppid for discussion
867 about this loop. */
868 ldq $3, TASK_GROUP_LEADER($2)
869 ldq $4, TASK_REAL_PARENT($3)
870 ldl $0, TASK_TGID($2)
8711: ldl $1, TASK_TGID($4)
872#ifdef CONFIG_SMP
873 mov $4, $5
874 mb
875 ldq $3, TASK_GROUP_LEADER($2)
876 ldq $4, TASK_REAL_PARENT($3)
877 cmpeq $4, $5, $5
878 beq $5, 1b
879#endif
880 stq $1, 80($sp)
881 ret
882.end sys_getxpid
883
884 .align 4
885 .globl sys_alpha_pipe
886 .ent sys_alpha_pipe
887sys_alpha_pipe:
888 lda $sp, -16($sp)
889 stq $26, 0($sp)
890 .prologue 0
891
892 mov $31, $17
893 lda $16, 8($sp)
894 jsr $26, do_pipe_flags
895
896 ldq $26, 0($sp)
897 bne $0, 1f
898
899 /* The return values are in $0 and $20. */
900 ldl $1, 12($sp)
901 ldl $0, 8($sp)
902
903 stq $1, 80+16($sp)
9041: lda $sp, 16($sp)
905 ret
906.end sys_alpha_pipe
907
908 .align 4
909 .globl sys_execve 748 .globl sys_execve
910 .ent sys_execve 749 .ent sys_execve
911sys_execve: 750sys_execve:
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 98a103621af6..bc1acdda7a5e 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1404,3 +1404,52 @@ SYSCALL_DEFINE3(osf_writev, unsigned long, fd,
1404} 1404}
1405 1405
1406#endif 1406#endif
1407
1408SYSCALL_DEFINE2(osf_getpriority, int, which, int, who)
1409{
1410 int prio = sys_getpriority(which, who);
1411 if (prio >= 0) {
1412 /* Return value is the unbiased priority, i.e. 20 - prio.
1413 This does result in negative return values, so signal
1414 no error */
1415 force_successful_syscall_return();
1416 prio = 20 - prio;
1417 }
1418 return prio;
1419}
1420
1421SYSCALL_DEFINE0(getxuid)
1422{
1423 current_pt_regs()->r20 = sys_geteuid();
1424 return sys_getuid();
1425}
1426
1427SYSCALL_DEFINE0(getxgid)
1428{
1429 current_pt_regs()->r20 = sys_getegid();
1430 return sys_getgid();
1431}
1432
1433SYSCALL_DEFINE0(getxpid)
1434{
1435 current_pt_regs()->r20 = sys_getppid();
1436 return sys_getpid();
1437}
1438
1439SYSCALL_DEFINE0(alpha_pipe)
1440{
1441 int fd[2];
1442 int res = do_pipe_flags(fd, 0);
1443 if (!res) {
1444 /* The return values are in $0 and $20. */
1445 current_pt_regs()->r20 = fd[1];
1446 res = fd[0];
1447 }
1448 return res;
1449}
1450
1451SYSCALL_DEFINE1(sethae, unsigned long, val)
1452{
1453 current_pt_regs()->hae = val;
1454 return 0;
1455}
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 153d3fce3e8e..d6fde98b74b3 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -455,3 +455,22 @@ get_wchan(struct task_struct *p)
455 } 455 }
456 return pc; 456 return pc;
457} 457}
458
459int kernel_execve(const char *path, const char *const argv[], const char *const envp[])
460{
461 /* Avoid the HAE being gratuitously wrong, which would cause us
462 to do the whole turn off interrupts thing and restore it. */
463 struct pt_regs regs = {.hae = alpha_mv.hae_cache};
464 int err = do_execve(path, argv, envp, &regs);
465 if (!err) {
466 struct pt_regs *p = current_pt_regs();
467 /* copy regs to normal position and off to userland we go... */
468 *p = regs;
469 __asm__ __volatile__ (
470 "mov %0, $sp;"
471 "br $31, ret_from_sys_call"
472 : : "r"(p));
473 }
474 return err;
475}
476EXPORT_SYMBOL(kernel_execve);
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 87835235f114..2ac6b45c3e00 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -111,7 +111,7 @@ sys_call_table:
111 .quad sys_socket 111 .quad sys_socket
112 .quad sys_connect 112 .quad sys_connect
113 .quad sys_accept 113 .quad sys_accept
114 .quad osf_getpriority /* 100 */ 114 .quad sys_osf_getpriority /* 100 */
115 .quad sys_send 115 .quad sys_send
116 .quad sys_recv 116 .quad sys_recv
117 .quad sys_sigreturn 117 .quad sys_sigreturn
@@ -522,6 +522,8 @@ sys_call_table:
522 .quad sys_setns 522 .quad sys_setns
523 .quad sys_accept4 523 .quad sys_accept4
524 .quad sys_sendmmsg 524 .quad sys_sendmmsg
525 .quad sys_process_vm_readv
526 .quad sys_process_vm_writev /* 505 */
525 527
526 .size sys_call_table, . - sys_call_table 528 .size sys_call_table, . - sys_call_table
527 .type sys_call_table, @object 529 .type sys_call_table, @object
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index c0a83ab62b78..59660743237c 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -31,8 +31,6 @@ lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \
31 $(ev6-y)memchr.o \ 31 $(ev6-y)memchr.o \
32 $(ev6-y)copy_user.o \ 32 $(ev6-y)copy_user.o \
33 $(ev6-y)clear_user.o \ 33 $(ev6-y)clear_user.o \
34 $(ev6-y)strncpy_from_user.o \
35 $(ev67-y)strlen_user.o \
36 $(ev6-y)csum_ipv6_magic.o \ 34 $(ev6-y)csum_ipv6_magic.o \
37 $(ev6-y)clear_page.o \ 35 $(ev6-y)clear_page.o \
38 $(ev6-y)copy_page.o \ 36 $(ev6-y)copy_page.o \
diff --git a/arch/alpha/lib/ev6-strncpy_from_user.S b/arch/alpha/lib/ev6-strncpy_from_user.S
deleted file mode 100644
index d2e28178cacc..000000000000
--- a/arch/alpha/lib/ev6-strncpy_from_user.S
+++ /dev/null
@@ -1,424 +0,0 @@
1/*
2 * arch/alpha/lib/ev6-strncpy_from_user.S
3 * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
4 *
5 * Just like strncpy except in the return value:
6 *
7 * -EFAULT if an exception occurs before the terminator is copied.
8 * N if the buffer filled.
9 *
10 * Otherwise the length of the string is returned.
11 *
12 * Much of the information about 21264 scheduling/coding comes from:
13 * Compiler Writer's Guide for the Alpha 21264
14 * abbreviated as 'CWG' in other comments here
15 * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
16 * Scheduling notation:
17 * E - either cluster
18 * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
19 * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
20 * A bunch of instructions got moved and temp registers were changed
21 * to aid in scheduling. Control flow was also re-arranged to eliminate
22 * branches, and to provide longer code sequences to enable better scheduling.
23 * A total rewrite (using byte load/stores for start & tail sequences)
24 * is desirable, but very difficult to do without a from-scratch rewrite.
25 * Save that for the future.
26 */
27
28
29#include <asm/errno.h>
30#include <asm/regdef.h>
31
32
33/* Allow an exception for an insn; exit if we get one. */
34#define EX(x,y...) \
35 99: x,##y; \
36 .section __ex_table,"a"; \
37 .long 99b - .; \
38 lda $31, $exception-99b($0); \
39 .previous
40
41
42 .set noat
43 .set noreorder
44 .text
45
46 .globl __strncpy_from_user
47 .ent __strncpy_from_user
48 .frame $30, 0, $26
49 .prologue 0
50
51 .align 4
52__strncpy_from_user:
53 and a0, 7, t3 # E : find dest misalignment
54 beq a2, $zerolength # U :
55
56 /* Are source and destination co-aligned? */
57 mov a0, v0 # E : save the string start
58 xor a0, a1, t4 # E :
59 EX( ldq_u t1, 0(a1) ) # L : Latency=3 load first quadword
60 ldq_u t0, 0(a0) # L : load first (partial) aligned dest quadword
61
62 addq a2, t3, a2 # E : bias count by dest misalignment
63 subq a2, 1, a3 # E :
64 addq zero, 1, t10 # E :
65 and t4, 7, t4 # E : misalignment between the two
66
67 and a3, 7, t6 # E : number of tail bytes
68 sll t10, t6, t10 # E : t10 = bitmask of last count byte
69 bne t4, $unaligned # U :
70 lda t2, -1 # E : build a mask against false zero
71
72 /*
73 * We are co-aligned; take care of a partial first word.
74 * On entry to this basic block:
75 * t0 == the first destination word for masking back in
76 * t1 == the first source word.
77 */
78
79 srl a3, 3, a2 # E : a2 = loop counter = (count - 1)/8
80 addq a1, 8, a1 # E :
81 mskqh t2, a1, t2 # U : detection in the src word
82 nop
83
84 /* Create the 1st output word and detect 0's in the 1st input word. */
85 mskqh t1, a1, t3 # U :
86 mskql t0, a1, t0 # U : assemble the first output word
87 ornot t1, t2, t2 # E :
88 nop
89
90 cmpbge zero, t2, t8 # E : bits set iff null found
91 or t0, t3, t0 # E :
92 beq a2, $a_eoc # U :
93 bne t8, $a_eos # U : 2nd branch in a quad. Bad.
94
95 /* On entry to this basic block:
96 * t0 == a source quad not containing a null.
97 * a0 - current aligned destination address
98 * a1 - current aligned source address
99 * a2 - count of quadwords to move.
100 * NOTE: Loop improvement - unrolling this is going to be
101 * a huge win, since we're going to stall otherwise.
102 * Fix this later. For _really_ large copies, look
103 * at using wh64 on a look-ahead basis. See the code
104 * in clear_user.S and copy_user.S.
105 * Presumably, since (a0) and (a1) do not overlap (by C definition)
106 * Lots of nops here:
107 * - Separate loads from stores
108 * - Keep it to 1 branch/quadpack so the branch predictor
109 * can train.
110 */
111$a_loop:
112 stq_u t0, 0(a0) # L :
113 addq a0, 8, a0 # E :
114 nop
115 subq a2, 1, a2 # E :
116
117 EX( ldq_u t0, 0(a1) ) # L :
118 addq a1, 8, a1 # E :
119 cmpbge zero, t0, t8 # E : Stall 2 cycles on t0
120 beq a2, $a_eoc # U :
121
122 beq t8, $a_loop # U :
123 nop
124 nop
125 nop
126
127 /* Take care of the final (partial) word store. At this point
128 * the end-of-count bit is set in t8 iff it applies.
129 *
130 * On entry to this basic block we have:
131 * t0 == the source word containing the null
132 * t8 == the cmpbge mask that found it.
133 */
134$a_eos:
135 negq t8, t12 # E : find low bit set
136 and t8, t12, t12 # E :
137
138 /* We're doing a partial word store and so need to combine
139 our source and original destination words. */
140 ldq_u t1, 0(a0) # L :
141 subq t12, 1, t6 # E :
142
143 or t12, t6, t8 # E :
144 zapnot t0, t8, t0 # U : clear src bytes > null
145 zap t1, t8, t1 # U : clear dst bytes <= null
146 or t0, t1, t0 # E :
147
148 stq_u t0, 0(a0) # L :
149 br $finish_up # L0 :
150 nop
151 nop
152
153 /* Add the end-of-count bit to the eos detection bitmask. */
154 .align 4
155$a_eoc:
156 or t10, t8, t8
157 br $a_eos
158 nop
159 nop
160
161
162/* The source and destination are not co-aligned. Align the destination
163 and cope. We have to be very careful about not reading too much and
164 causing a SEGV. */
165
166 .align 4
167$u_head:
168 /* We know just enough now to be able to assemble the first
169 full source word. We can still find a zero at the end of it
170 that prevents us from outputting the whole thing.
171
172 On entry to this basic block:
173 t0 == the first dest word, unmasked
174 t1 == the shifted low bits of the first source word
175 t6 == bytemask that is -1 in dest word bytes */
176
177 EX( ldq_u t2, 8(a1) ) # L : load second src word
178 addq a1, 8, a1 # E :
179 mskql t0, a0, t0 # U : mask trailing garbage in dst
180 extqh t2, a1, t4 # U :
181
182 or t1, t4, t1 # E : first aligned src word complete
183 mskqh t1, a0, t1 # U : mask leading garbage in src
184 or t0, t1, t0 # E : first output word complete
185 or t0, t6, t6 # E : mask original data for zero test
186
187 cmpbge zero, t6, t8 # E :
188 beq a2, $u_eocfin # U :
189 bne t8, $u_final # U : bad news - 2nd branch in a quad
190 lda t6, -1 # E : mask out the bits we have
191
192 mskql t6, a1, t6 # U : already seen
193 stq_u t0, 0(a0) # L : store first output word
194 or t6, t2, t2 # E :
195 cmpbge zero, t2, t8 # E : find nulls in second partial
196
197 addq a0, 8, a0 # E :
198 subq a2, 1, a2 # E :
199 bne t8, $u_late_head_exit # U :
200 nop
201
202 /* Finally, we've got all the stupid leading edge cases taken care
203 of and we can set up to enter the main loop. */
204
205 extql t2, a1, t1 # U : position hi-bits of lo word
206 EX( ldq_u t2, 8(a1) ) # L : read next high-order source word
207 addq a1, 8, a1 # E :
208 cmpbge zero, t2, t8 # E :
209
210 beq a2, $u_eoc # U :
211 bne t8, $u_eos # U :
212 nop
213 nop
214
215 /* Unaligned copy main loop. In order to avoid reading too much,
216 the loop is structured to detect zeros in aligned source words.
217 This has, unfortunately, effectively pulled half of a loop
218 iteration out into the head and half into the tail, but it does
219 prevent nastiness from accumulating in the very thing we want
220 to run as fast as possible.
221
222 On entry to this basic block:
223 t1 == the shifted high-order bits from the previous source word
224 t2 == the unshifted current source word
225
226 We further know that t2 does not contain a null terminator. */
227
228 /*
229 * Extra nops here:
230 * separate load quads from store quads
231 * only one branch/quad to permit predictor training
232 */
233
234 .align 4
235$u_loop:
236 extqh t2, a1, t0 # U : extract high bits for current word
237 addq a1, 8, a1 # E :
238 extql t2, a1, t3 # U : extract low bits for next time
239 addq a0, 8, a0 # E :
240
241 or t0, t1, t0 # E : current dst word now complete
242 EX( ldq_u t2, 0(a1) ) # L : load high word for next time
243 subq a2, 1, a2 # E :
244 nop
245
246 stq_u t0, -8(a0) # L : save the current word
247 mov t3, t1 # E :
248 cmpbge zero, t2, t8 # E : test new word for eos
249 beq a2, $u_eoc # U :
250
251 beq t8, $u_loop # U :
252 nop
253 nop
254 nop
255
256 /* We've found a zero somewhere in the source word we just read.
257 If it resides in the lower half, we have one (probably partial)
258 word to write out, and if it resides in the upper half, we
259 have one full and one partial word left to write out.
260
261 On entry to this basic block:
262 t1 == the shifted high-order bits from the previous source word
263 t2 == the unshifted current source word. */
264 .align 4
265$u_eos:
266 extqh t2, a1, t0 # U :
267 or t0, t1, t0 # E : first (partial) source word complete
268 cmpbge zero, t0, t8 # E : is the null in this first bit?
269 nop
270
271 bne t8, $u_final # U :
272 stq_u t0, 0(a0) # L : the null was in the high-order bits
273 addq a0, 8, a0 # E :
274 subq a2, 1, a2 # E :
275
276 .align 4
277$u_late_head_exit:
278 extql t2, a1, t0 # U :
279 cmpbge zero, t0, t8 # E :
280 or t8, t10, t6 # E :
281 cmoveq a2, t6, t8 # E :
282
283 /* Take care of a final (probably partial) result word.
284 On entry to this basic block:
285 t0 == assembled source word
286 t8 == cmpbge mask that found the null. */
287 .align 4
288$u_final:
289 negq t8, t6 # E : isolate low bit set
290 and t6, t8, t12 # E :
291 ldq_u t1, 0(a0) # L :
292 subq t12, 1, t6 # E :
293
294 or t6, t12, t8 # E :
295 zapnot t0, t8, t0 # U : kill source bytes > null
296 zap t1, t8, t1 # U : kill dest bytes <= null
297 or t0, t1, t0 # E :
298
299 stq_u t0, 0(a0) # E :
300 br $finish_up # U :
301 nop
302 nop
303
304 .align 4
305$u_eoc: # end-of-count
306 extqh t2, a1, t0 # U :
307 or t0, t1, t0 # E :
308 cmpbge zero, t0, t8 # E :
309 nop
310
311 .align 4
312$u_eocfin: # end-of-count, final word
313 or t10, t8, t8 # E :
314 br $u_final # U :
315 nop
316 nop
317
318 /* Unaligned copy entry point. */
319 .align 4
320$unaligned:
321
322 srl a3, 3, a2 # U : a2 = loop counter = (count - 1)/8
323 and a0, 7, t4 # E : find dest misalignment
324 and a1, 7, t5 # E : find src misalignment
325 mov zero, t0 # E :
326
327 /* Conditionally load the first destination word and a bytemask
328 with 0xff indicating that the destination byte is sacrosanct. */
329
330 mov zero, t6 # E :
331 beq t4, 1f # U :
332 ldq_u t0, 0(a0) # L :
333 lda t6, -1 # E :
334
335 mskql t6, a0, t6 # E :
336 nop
337 nop
338 nop
339
340 .align 4
3411:
342 subq a1, t4, a1 # E : sub dest misalignment from src addr
343 /* If source misalignment is larger than dest misalignment, we need
344 extra startup checks to avoid SEGV. */
345 cmplt t4, t5, t12 # E :
346 extql t1, a1, t1 # U : shift src into place
347 lda t2, -1 # E : for creating masks later
348
349 beq t12, $u_head # U :
350 mskqh t2, t5, t2 # U : begin src byte validity mask
351 cmpbge zero, t1, t8 # E : is there a zero?
352 nop
353
354 extql t2, a1, t2 # U :
355 or t8, t10, t5 # E : test for end-of-count too
356 cmpbge zero, t2, t3 # E :
357 cmoveq a2, t5, t8 # E : Latency=2, extra map slot
358
359 nop # E : goes with cmov
360 andnot t8, t3, t8 # E :
361 beq t8, $u_head # U :
362 nop
363
364 /* At this point we've found a zero in the first partial word of
365 the source. We need to isolate the valid source data and mask
366 it into the original destination data. (Incidentally, we know
367 that we'll need at least one byte of that original dest word.) */
368
369 ldq_u t0, 0(a0) # L :
370 negq t8, t6 # E : build bitmask of bytes <= zero
371 mskqh t1, t4, t1 # U :
372 and t6, t8, t12 # E :
373
374 subq t12, 1, t6 # E :
375 or t6, t12, t8 # E :
376 zapnot t2, t8, t2 # U : prepare source word; mirror changes
377 zapnot t1, t8, t1 # U : to source validity mask
378
379 andnot t0, t2, t0 # E : zero place for source to reside
380 or t0, t1, t0 # E : and put it there
381 stq_u t0, 0(a0) # L :
382 nop
383
384 .align 4
385$finish_up:
386 zapnot t0, t12, t4 # U : was last byte written null?
387 and t12, 0xf0, t3 # E : binary search for the address of the
388 cmovne t4, 1, t4 # E : Latency=2, extra map slot
389 nop # E : with cmovne
390
391 and t12, 0xcc, t2 # E : last byte written
392 and t12, 0xaa, t1 # E :
393 cmovne t3, 4, t3 # E : Latency=2, extra map slot
394 nop # E : with cmovne
395
396 bic a0, 7, t0
397 cmovne t2, 2, t2 # E : Latency=2, extra map slot
398 nop # E : with cmovne
399 nop
400
401 cmovne t1, 1, t1 # E : Latency=2, extra map slot
402 nop # E : with cmovne
403 addq t0, t3, t0 # E :
404 addq t1, t2, t1 # E :
405
406 addq t0, t1, t0 # E :
407 addq t0, t4, t0 # add one if we filled the buffer
408 subq t0, v0, v0 # find string length
409 ret # L0 :
410
411 .align 4
412$zerolength:
413 nop
414 nop
415 nop
416 clr v0
417
418$exception:
419 nop
420 nop
421 nop
422 ret
423
424 .end __strncpy_from_user
diff --git a/arch/alpha/lib/ev67-strlen_user.S b/arch/alpha/lib/ev67-strlen_user.S
deleted file mode 100644
index 57e0d77b81a6..000000000000
--- a/arch/alpha/lib/ev67-strlen_user.S
+++ /dev/null
@@ -1,107 +0,0 @@
1/*
2 * arch/alpha/lib/ev67-strlen_user.S
3 * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
4 *
5 * Return the length of the string including the NULL terminator
6 * (strlen+1) or zero if an error occurred.
7 *
8 * In places where it is critical to limit the processing time,
9 * and the data is not trusted, strnlen_user() should be used.
10 * It will return a value greater than its second argument if
11 * that limit would be exceeded. This implementation is allowed
12 * to access memory beyond the limit, but will not cross a page
13 * boundary when doing so.
14 *
15 * Much of the information about 21264 scheduling/coding comes from:
16 * Compiler Writer's Guide for the Alpha 21264
17 * abbreviated as 'CWG' in other comments here
18 * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
19 * Scheduling notation:
20 * E - either cluster
21 * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
22 * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
23 * Try not to change the actual algorithm if possible for consistency.
24 */
25
26#include <asm/regdef.h>
27
28
29/* Allow an exception for an insn; exit if we get one. */
30#define EX(x,y...) \
31 99: x,##y; \
32 .section __ex_table,"a"; \
33 .long 99b - .; \
34 lda v0, $exception-99b(zero); \
35 .previous
36
37
38 .set noreorder
39 .set noat
40 .text
41
42 .globl __strlen_user
43 .ent __strlen_user
44 .frame sp, 0, ra
45
46 .align 4
47__strlen_user:
48 ldah a1, 32767(zero) # do not use plain strlen_user() for strings
49 # that might be almost 2 GB long; you should
50 # be using strnlen_user() instead
51 nop
52 nop
53 nop
54
55 .globl __strnlen_user
56
57 .align 4
58__strnlen_user:
59 .prologue 0
60 EX( ldq_u t0, 0(a0) ) # L : load first quadword (a0 may be misaligned)
61 lda t1, -1(zero) # E :
62
63 insqh t1, a0, t1 # U :
64 andnot a0, 7, v0 # E :
65 or t1, t0, t0 # E :
66 subq a0, 1, a0 # E : get our +1 for the return
67
68 cmpbge zero, t0, t1 # E : t1 <- bitmask: bit i == 1 <==> i-th byte == 0
69 subq a1, 7, t2 # E :
70 subq a0, v0, t0 # E :
71 bne t1, $found # U :
72
73 addq t2, t0, t2 # E :
74 addq a1, 1, a1 # E :
75 nop # E :
76 nop # E :
77
78 .align 4
79$loop: ble t2, $limit # U :
80 EX( ldq t0, 8(v0) ) # L :
81 nop # E :
82 nop # E :
83
84 cmpbge zero, t0, t1 # E :
85 subq t2, 8, t2 # E :
86 addq v0, 8, v0 # E : addr += 8
87 beq t1, $loop # U :
88
89$found: cttz t1, t2 # U0 :
90 addq v0, t2, v0 # E :
91 subq v0, a0, v0 # E :
92 ret # L0 :
93
94$exception:
95 nop
96 nop
97 nop
98 ret
99
100 .align 4 # currently redundant
101$limit:
102 nop
103 nop
104 subq a1, t2, v0
105 ret
106
107 .end __strlen_user
diff --git a/arch/alpha/lib/strlen_user.S b/arch/alpha/lib/strlen_user.S
deleted file mode 100644
index 508a18e96479..000000000000
--- a/arch/alpha/lib/strlen_user.S
+++ /dev/null
@@ -1,91 +0,0 @@
1/*
2 * arch/alpha/lib/strlen_user.S
3 *
4 * Return the length of the string including the NUL terminator
5 * (strlen+1) or zero if an error occurred.
6 *
7 * In places where it is critical to limit the processing time,
8 * and the data is not trusted, strnlen_user() should be used.
9 * It will return a value greater than its second argument if
10 * that limit would be exceeded. This implementation is allowed
11 * to access memory beyond the limit, but will not cross a page
12 * boundary when doing so.
13 */
14
15#include <asm/regdef.h>
16
17
18/* Allow an exception for an insn; exit if we get one. */
19#define EX(x,y...) \
20 99: x,##y; \
21 .section __ex_table,"a"; \
22 .long 99b - .; \
23 lda v0, $exception-99b(zero); \
24 .previous
25
26
27 .set noreorder
28 .set noat
29 .text
30
31 .globl __strlen_user
32 .ent __strlen_user
33 .frame sp, 0, ra
34
35 .align 3
36__strlen_user:
37 ldah a1, 32767(zero) # do not use plain strlen_user() for strings
38 # that might be almost 2 GB long; you should
39 # be using strnlen_user() instead
40
41 .globl __strnlen_user
42
43 .align 3
44__strnlen_user:
45 .prologue 0
46
47 EX( ldq_u t0, 0(a0) ) # load first quadword (a0 may be misaligned)
48 lda t1, -1(zero)
49 insqh t1, a0, t1
50 andnot a0, 7, v0
51 or t1, t0, t0
52 subq a0, 1, a0 # get our +1 for the return
53 cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0
54 subq a1, 7, t2
55 subq a0, v0, t0
56 bne t1, $found
57
58 addq t2, t0, t2
59 addq a1, 1, a1
60
61 .align 3
62$loop: ble t2, $limit
63 EX( ldq t0, 8(v0) )
64 subq t2, 8, t2
65 addq v0, 8, v0 # addr += 8
66 cmpbge zero, t0, t1
67 beq t1, $loop
68
69$found: negq t1, t2 # clear all but least set bit
70 and t1, t2, t1
71
72 and t1, 0xf0, t2 # binary search for that set bit
73 and t1, 0xcc, t3
74 and t1, 0xaa, t4
75 cmovne t2, 4, t2
76 cmovne t3, 2, t3
77 cmovne t4, 1, t4
78 addq t2, t3, t2
79 addq v0, t4, v0
80 addq v0, t2, v0
81 nop # dual issue next two on ev4 and ev5
82 subq v0, a0, v0
83$exception:
84 ret
85
86 .align 3 # currently redundant
87$limit:
88 subq a1, t2, v0
89 ret
90
91 .end __strlen_user
diff --git a/arch/alpha/lib/strncpy_from_user.S b/arch/alpha/lib/strncpy_from_user.S
deleted file mode 100644
index 73ee21160ff7..000000000000
--- a/arch/alpha/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,339 +0,0 @@
1/*
2 * arch/alpha/lib/strncpy_from_user.S
3 * Contributed by Richard Henderson (rth@tamu.edu)
4 *
5 * Just like strncpy except in the return value:
6 *
7 * -EFAULT if an exception occurs before the terminator is copied.
8 * N if the buffer filled.
9 *
10 * Otherwise the length of the string is returned.
11 */
12
13
14#include <asm/errno.h>
15#include <asm/regdef.h>
16
17
18/* Allow an exception for an insn; exit if we get one. */
19#define EX(x,y...) \
20 99: x,##y; \
21 .section __ex_table,"a"; \
22 .long 99b - .; \
23 lda $31, $exception-99b($0); \
24 .previous
25
26
27 .set noat
28 .set noreorder
29 .text
30
31 .globl __strncpy_from_user
32 .ent __strncpy_from_user
33 .frame $30, 0, $26
34 .prologue 0
35
36 .align 3
37$aligned:
38 /* On entry to this basic block:
39 t0 == the first destination word for masking back in
40 t1 == the first source word. */
41
42 /* Create the 1st output word and detect 0's in the 1st input word. */
43 lda t2, -1 # e1 : build a mask against false zero
44 mskqh t2, a1, t2 # e0 : detection in the src word
45 mskqh t1, a1, t3 # e0 :
46 ornot t1, t2, t2 # .. e1 :
47 mskql t0, a1, t0 # e0 : assemble the first output word
48 cmpbge zero, t2, t8 # .. e1 : bits set iff null found
49 or t0, t3, t0 # e0 :
50 beq a2, $a_eoc # .. e1 :
51 bne t8, $a_eos # .. e1 :
52
53 /* On entry to this basic block:
54 t0 == a source word not containing a null. */
55
56$a_loop:
57 stq_u t0, 0(a0) # e0 :
58 addq a0, 8, a0 # .. e1 :
59 EX( ldq_u t0, 0(a1) ) # e0 :
60 addq a1, 8, a1 # .. e1 :
61 subq a2, 1, a2 # e0 :
62 cmpbge zero, t0, t8 # .. e1 (stall)
63 beq a2, $a_eoc # e1 :
64 beq t8, $a_loop # e1 :
65
66 /* Take care of the final (partial) word store. At this point
67 the end-of-count bit is set in t8 iff it applies.
68
69 On entry to this basic block we have:
70 t0 == the source word containing the null
71 t8 == the cmpbge mask that found it. */
72
73$a_eos:
74 negq t8, t12 # e0 : find low bit set
75 and t8, t12, t12 # e1 (stall)
76
77 /* For the sake of the cache, don't read a destination word
78 if we're not going to need it. */
79 and t12, 0x80, t6 # e0 :
80 bne t6, 1f # .. e1 (zdb)
81
82 /* We're doing a partial word store and so need to combine
83 our source and original destination words. */
84 ldq_u t1, 0(a0) # e0 :
85 subq t12, 1, t6 # .. e1 :
86 or t12, t6, t8 # e0 :
87 unop #
88 zapnot t0, t8, t0 # e0 : clear src bytes > null
89 zap t1, t8, t1 # .. e1 : clear dst bytes <= null
90 or t0, t1, t0 # e1 :
91
921: stq_u t0, 0(a0)
93 br $finish_up
94
95 /* Add the end-of-count bit to the eos detection bitmask. */
96$a_eoc:
97 or t10, t8, t8
98 br $a_eos
99
100 /*** The Function Entry Point ***/
101 .align 3
102__strncpy_from_user:
103 mov a0, v0 # save the string start
104 beq a2, $zerolength
105
106 /* Are source and destination co-aligned? */
107 xor a0, a1, t1 # e0 :
108 and a0, 7, t0 # .. e1 : find dest misalignment
109 and t1, 7, t1 # e0 :
110 addq a2, t0, a2 # .. e1 : bias count by dest misalignment
111 subq a2, 1, a2 # e0 :
112 and a2, 7, t2 # e1 :
113 srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8
114 addq zero, 1, t10 # .. e1 :
115 sll t10, t2, t10 # e0 : t10 = bitmask of last count byte
116 bne t1, $unaligned # .. e1 :
117
118 /* We are co-aligned; take care of a partial first word. */
119
120 EX( ldq_u t1, 0(a1) ) # e0 : load first src word
121 addq a1, 8, a1 # .. e1 :
122
123 beq t0, $aligned # avoid loading dest word if not needed
124 ldq_u t0, 0(a0) # e0 :
125 br $aligned # .. e1 :
126
127
128/* The source and destination are not co-aligned. Align the destination
129 and cope. We have to be very careful about not reading too much and
130 causing a SEGV. */
131
132 .align 3
133$u_head:
134 /* We know just enough now to be able to assemble the first
135 full source word. We can still find a zero at the end of it
136 that prevents us from outputting the whole thing.
137
138 On entry to this basic block:
139 t0 == the first dest word, unmasked
140 t1 == the shifted low bits of the first source word
141 t6 == bytemask that is -1 in dest word bytes */
142
143 EX( ldq_u t2, 8(a1) ) # e0 : load second src word
144 addq a1, 8, a1 # .. e1 :
145 mskql t0, a0, t0 # e0 : mask trailing garbage in dst
146 extqh t2, a1, t4 # e0 :
147 or t1, t4, t1 # e1 : first aligned src word complete
148 mskqh t1, a0, t1 # e0 : mask leading garbage in src
149 or t0, t1, t0 # e0 : first output word complete
150 or t0, t6, t6 # e1 : mask original data for zero test
151 cmpbge zero, t6, t8 # e0 :
152 beq a2, $u_eocfin # .. e1 :
153 bne t8, $u_final # e1 :
154
155 lda t6, -1 # e1 : mask out the bits we have
156 mskql t6, a1, t6 # e0 : already seen
157 stq_u t0, 0(a0) # e0 : store first output word
158 or t6, t2, t2 # .. e1 :
159 cmpbge zero, t2, t8 # e0 : find nulls in second partial
160 addq a0, 8, a0 # .. e1 :
161 subq a2, 1, a2 # e0 :
162 bne t8, $u_late_head_exit # .. e1 :
163
164 /* Finally, we've got all the stupid leading edge cases taken care
165 of and we can set up to enter the main loop. */
166
167 extql t2, a1, t1 # e0 : position hi-bits of lo word
168 EX( ldq_u t2, 8(a1) ) # .. e1 : read next high-order source word
169 addq a1, 8, a1 # e0 :
170 cmpbge zero, t2, t8 # e1 (stall)
171 beq a2, $u_eoc # e1 :
172 bne t8, $u_eos # e1 :
173
174 /* Unaligned copy main loop. In order to avoid reading too much,
175 the loop is structured to detect zeros in aligned source words.
176 This has, unfortunately, effectively pulled half of a loop
177 iteration out into the head and half into the tail, but it does
178 prevent nastiness from accumulating in the very thing we want
179 to run as fast as possible.
180
181 On entry to this basic block:
182 t1 == the shifted high-order bits from the previous source word
183 t2 == the unshifted current source word
184
185 We further know that t2 does not contain a null terminator. */
186
187 .align 3
188$u_loop:
189 extqh t2, a1, t0 # e0 : extract high bits for current word
190 addq a1, 8, a1 # .. e1 :
191 extql t2, a1, t3 # e0 : extract low bits for next time
192 addq a0, 8, a0 # .. e1 :
193 or t0, t1, t0 # e0 : current dst word now complete
194 EX( ldq_u t2, 0(a1) ) # .. e1 : load high word for next time
195 stq_u t0, -8(a0) # e0 : save the current word
196 mov t3, t1 # .. e1 :
197 subq a2, 1, a2 # e0 :
198 cmpbge zero, t2, t8 # .. e1 : test new word for eos
199 beq a2, $u_eoc # e1 :
200 beq t8, $u_loop # e1 :
201
202 /* We've found a zero somewhere in the source word we just read.
203 If it resides in the lower half, we have one (probably partial)
204 word to write out, and if it resides in the upper half, we
205 have one full and one partial word left to write out.
206
207 On entry to this basic block:
208 t1 == the shifted high-order bits from the previous source word
209 t2 == the unshifted current source word. */
210$u_eos:
211 extqh t2, a1, t0 # e0 :
212 or t0, t1, t0 # e1 : first (partial) source word complete
213
214 cmpbge zero, t0, t8 # e0 : is the null in this first bit?
215 bne t8, $u_final # .. e1 (zdb)
216
217 stq_u t0, 0(a0) # e0 : the null was in the high-order bits
218 addq a0, 8, a0 # .. e1 :
219 subq a2, 1, a2 # e1 :
220
221$u_late_head_exit:
222 extql t2, a1, t0 # .. e0 :
223 cmpbge zero, t0, t8 # e0 :
224 or t8, t10, t6 # e1 :
225 cmoveq a2, t6, t8 # e0 :
226 nop # .. e1 :
227
228 /* Take care of a final (probably partial) result word.
229 On entry to this basic block:
230 t0 == assembled source word
231 t8 == cmpbge mask that found the null. */
232$u_final:
233 negq t8, t6 # e0 : isolate low bit set
234 and t6, t8, t12 # e1 :
235
236 and t12, 0x80, t6 # e0 : avoid dest word load if we can
237 bne t6, 1f # .. e1 (zdb)
238
239 ldq_u t1, 0(a0) # e0 :
240 subq t12, 1, t6 # .. e1 :
241 or t6, t12, t8 # e0 :
242 zapnot t0, t8, t0 # .. e1 : kill source bytes > null
243 zap t1, t8, t1 # e0 : kill dest bytes <= null
244 or t0, t1, t0 # e1 :
245
2461: stq_u t0, 0(a0) # e0 :
247 br $finish_up
248
249$u_eoc: # end-of-count
250 extqh t2, a1, t0
251 or t0, t1, t0
252 cmpbge zero, t0, t8
253
254$u_eocfin: # end-of-count, final word
255 or t10, t8, t8
256 br $u_final
257
258 /* Unaligned copy entry point. */
259 .align 3
260$unaligned:
261
262 EX( ldq_u t1, 0(a1) ) # e0 : load first source word
263
264 and a0, 7, t4 # .. e1 : find dest misalignment
265 and a1, 7, t5 # e0 : find src misalignment
266
267 /* Conditionally load the first destination word and a bytemask
268 with 0xff indicating that the destination byte is sacrosanct. */
269
270 mov zero, t0 # .. e1 :
271 mov zero, t6 # e0 :
272 beq t4, 1f # .. e1 :
273 ldq_u t0, 0(a0) # e0 :
274 lda t6, -1 # .. e1 :
275 mskql t6, a0, t6 # e0 :
2761:
277 subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr
278
279 /* If source misalignment is larger than dest misalignment, we need
280 extra startup checks to avoid SEGV. */
281
282 cmplt t4, t5, t12 # e1 :
283 extql t1, a1, t1 # .. e0 : shift src into place
284 lda t2, -1 # e0 : for creating masks later
285 beq t12, $u_head # e1 :
286
287 mskqh t2, t5, t2 # e0 : begin src byte validity mask
288 cmpbge zero, t1, t8 # .. e1 : is there a zero?
289 extql t2, a1, t2 # e0 :
290 or t8, t10, t5 # .. e1 : test for end-of-count too
291 cmpbge zero, t2, t3 # e0 :
292 cmoveq a2, t5, t8 # .. e1 :
293 andnot t8, t3, t8 # e0 :
294 beq t8, $u_head # .. e1 (zdb)
295
296 /* At this point we've found a zero in the first partial word of
297 the source. We need to isolate the valid source data and mask
298 it into the original destination data. (Incidentally, we know
299 that we'll need at least one byte of that original dest word.) */
300
301 ldq_u t0, 0(a0) # e0 :
302 negq t8, t6 # .. e1 : build bitmask of bytes <= zero
303 mskqh t1, t4, t1 # e0 :
304 and t6, t8, t12 # .. e1 :
305 subq t12, 1, t6 # e0 :
306 or t6, t12, t8 # e1 :
307
308 zapnot t2, t8, t2 # e0 : prepare source word; mirror changes
309 zapnot t1, t8, t1 # .. e1 : to source validity mask
310
311 andnot t0, t2, t0 # e0 : zero place for source to reside
312 or t0, t1, t0 # e1 : and put it there
313 stq_u t0, 0(a0) # e0 :
314
315$finish_up:
316 zapnot t0, t12, t4 # was last byte written null?
317 cmovne t4, 1, t4
318
319 and t12, 0xf0, t3 # binary search for the address of the
320 and t12, 0xcc, t2 # last byte written
321 and t12, 0xaa, t1
322 bic a0, 7, t0
323 cmovne t3, 4, t3
324 cmovne t2, 2, t2
325 cmovne t1, 1, t1
326 addq t0, t3, t0
327 addq t1, t2, t1
328 addq t0, t1, t0
329 addq t0, t4, t0 # add one if we filled the buffer
330
331 subq t0, v0, v0 # find string length
332 ret
333
334$zerolength:
335 clr v0
336$exception:
337 ret
338
339 .end __strncpy_from_user
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 5eecab1a84ef..0c4132dd3507 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -89,6 +89,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
89 const struct exception_table_entry *fixup; 89 const struct exception_table_entry *fixup;
90 int fault, si_code = SEGV_MAPERR; 90 int fault, si_code = SEGV_MAPERR;
91 siginfo_t info; 91 siginfo_t info;
92 unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
93 (cause > 0 ? FAULT_FLAG_WRITE : 0));
92 94
93 /* As of EV6, a load into $31/$f31 is a prefetch, and never faults 95 /* As of EV6, a load into $31/$f31 is a prefetch, and never faults
94 (or is suppressed by the PALcode). Support that for older CPUs 96 (or is suppressed by the PALcode). Support that for older CPUs
@@ -114,6 +116,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
114 goto vmalloc_fault; 116 goto vmalloc_fault;
115#endif 117#endif
116 118
119retry:
117 down_read(&mm->mmap_sem); 120 down_read(&mm->mmap_sem);
118 vma = find_vma(mm, address); 121 vma = find_vma(mm, address);
119 if (!vma) 122 if (!vma)
@@ -144,8 +147,11 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
144 /* If for any reason at all we couldn't handle the fault, 147 /* If for any reason at all we couldn't handle the fault,
145 make sure we exit gracefully rather than endlessly redo 148 make sure we exit gracefully rather than endlessly redo
146 the fault. */ 149 the fault. */
147 fault = handle_mm_fault(mm, vma, address, cause > 0 ? FAULT_FLAG_WRITE : 0); 150 fault = handle_mm_fault(mm, vma, address, flags);
148 up_read(&mm->mmap_sem); 151
152 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
153 return;
154
149 if (unlikely(fault & VM_FAULT_ERROR)) { 155 if (unlikely(fault & VM_FAULT_ERROR)) {
150 if (fault & VM_FAULT_OOM) 156 if (fault & VM_FAULT_OOM)
151 goto out_of_memory; 157 goto out_of_memory;
@@ -153,10 +159,26 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
153 goto do_sigbus; 159 goto do_sigbus;
154 BUG(); 160 BUG();
155 } 161 }
156 if (fault & VM_FAULT_MAJOR) 162
157 current->maj_flt++; 163 if (flags & FAULT_FLAG_ALLOW_RETRY) {
158 else 164 if (fault & VM_FAULT_MAJOR)
159 current->min_flt++; 165 current->maj_flt++;
166 else
167 current->min_flt++;
168 if (fault & VM_FAULT_RETRY) {
169 flags &= ~FAULT_FLAG_ALLOW_RETRY;
170
171 /* No need to up_read(&mm->mmap_sem) as we would
172 * have already released it in __lock_page_or_retry
173 * in mm/filemap.c.
174 */
175
176 goto retry;
177 }
178 }
179
180 up_read(&mm->mmap_sem);
181
160 return; 182 return;
161 183
162 /* Something tried to access memory that isn't in our memory map. 184 /* Something tried to access memory that isn't in our memory map.
@@ -186,12 +208,14 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
186 /* We ran out of memory, or some other thing happened to us that 208 /* We ran out of memory, or some other thing happened to us that
187 made us unable to handle the page fault gracefully. */ 209 made us unable to handle the page fault gracefully. */
188 out_of_memory: 210 out_of_memory:
211 up_read(&mm->mmap_sem);
189 if (!user_mode(regs)) 212 if (!user_mode(regs))
190 goto no_context; 213 goto no_context;
191 pagefault_out_of_memory(); 214 pagefault_out_of_memory();
192 return; 215 return;
193 216
194 do_sigbus: 217 do_sigbus:
218 up_read(&mm->mmap_sem);
195 /* Send a sigbus, regardless of whether we were in kernel 219 /* Send a sigbus, regardless of whether we were in kernel
196 or user mode. */ 220 or user mode. */
197 info.si_signo = SIGBUS; 221 info.si_signo = SIGBUS;
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index a0a5d27aa215..b8ce18f485d3 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -12,6 +12,7 @@
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/special_insns.h>
15 16
16#include "op_impl.h" 17#include "op_impl.h"
17 18