summaryrefslogtreecommitdiffstats
path: root/arch/alpha/lib
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2016-12-24 20:26:18 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2017-03-28 18:23:16 -0400
commit8525023121de4848b5f0a7d867ffeadbc477774d (patch)
treef4b1c2d1575d752963bda2a8836b40becda1eaa2 /arch/alpha/lib
parentd597580d373774b1bdab84b3d26ff0b55162b916 (diff)
alpha: switch __copy_user() and __do_clean_user() to normal calling conventions
They used to need odd calling conventions due to old exception handling mechanism, the last remnants of which had disappeared back in 2002. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'arch/alpha/lib')
-rw-r--r--arch/alpha/lib/clear_user.S66
-rw-r--r--arch/alpha/lib/copy_user.S82
-rw-r--r--arch/alpha/lib/ev6-clear_user.S84
-rw-r--r--arch/alpha/lib/ev6-copy_user.S104
4 files changed, 140 insertions, 196 deletions
diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S
index bf5b931866ba..006f469fef73 100644
--- a/arch/alpha/lib/clear_user.S
+++ b/arch/alpha/lib/clear_user.S
@@ -8,21 +8,6 @@
8 * right "bytes left to zero" value (and that it is updated only _after_ 8 * right "bytes left to zero" value (and that it is updated only _after_
9 * a successful copy). There is also some rather minor exception setup 9 * a successful copy). There is also some rather minor exception setup
10 * stuff. 10 * stuff.
11 *
12 * NOTE! This is not directly C-callable, because the calling semantics
13 * are different:
14 *
15 * Inputs:
16 * length in $0
17 * destination address in $6
18 * exception pointer in $7
19 * return address in $28 (exceptions expect it there)
20 *
21 * Outputs:
22 * bytes left to copy in $0
23 *
24 * Clobbers:
25 * $1,$2,$3,$4,$5,$6
26 */ 11 */
27#include <asm/export.h> 12#include <asm/export.h>
28 13
@@ -38,62 +23,63 @@
38 .set noreorder 23 .set noreorder
39 .align 4 24 .align 4
40 25
41 .globl __do_clear_user 26 .globl __clear_user
42 .ent __do_clear_user 27 .ent __clear_user
43 .frame $30, 0, $28 28 .frame $30, 0, $26
44 .prologue 0 29 .prologue 0
45 30
46$loop: 31$loop:
47 and $1, 3, $4 # e0 : 32 and $1, 3, $4 # e0 :
48 beq $4, 1f # .. e1 : 33 beq $4, 1f # .. e1 :
49 34
500: EX( stq_u $31, 0($6) ) # e0 : zero one word 350: EX( stq_u $31, 0($16) ) # e0 : zero one word
51 subq $0, 8, $0 # .. e1 : 36 subq $0, 8, $0 # .. e1 :
52 subq $4, 1, $4 # e0 : 37 subq $4, 1, $4 # e0 :
53 addq $6, 8, $6 # .. e1 : 38 addq $16, 8, $16 # .. e1 :
54 bne $4, 0b # e1 : 39 bne $4, 0b # e1 :
55 unop # : 40 unop # :
56 41
571: bic $1, 3, $1 # e0 : 421: bic $1, 3, $1 # e0 :
58 beq $1, $tail # .. e1 : 43 beq $1, $tail # .. e1 :
59 44
602: EX( stq_u $31, 0($6) ) # e0 : zero four words 452: EX( stq_u $31, 0($16) ) # e0 : zero four words
61 subq $0, 8, $0 # .. e1 : 46 subq $0, 8, $0 # .. e1 :
62 EX( stq_u $31, 8($6) ) # e0 : 47 EX( stq_u $31, 8($16) ) # e0 :
63 subq $0, 8, $0 # .. e1 : 48 subq $0, 8, $0 # .. e1 :
64 EX( stq_u $31, 16($6) ) # e0 : 49 EX( stq_u $31, 16($16) ) # e0 :
65 subq $0, 8, $0 # .. e1 : 50 subq $0, 8, $0 # .. e1 :
66 EX( stq_u $31, 24($6) ) # e0 : 51 EX( stq_u $31, 24($16) ) # e0 :
67 subq $0, 8, $0 # .. e1 : 52 subq $0, 8, $0 # .. e1 :
68 subq $1, 4, $1 # e0 : 53 subq $1, 4, $1 # e0 :
69 addq $6, 32, $6 # .. e1 : 54 addq $16, 32, $16 # .. e1 :
70 bne $1, 2b # e1 : 55 bne $1, 2b # e1 :
71 56
72$tail: 57$tail:
73 bne $2, 1f # e1 : is there a tail to do? 58 bne $2, 1f # e1 : is there a tail to do?
74 ret $31, ($28), 1 # .. e1 : 59 ret $31, ($26), 1 # .. e1 :
75 60
761: EX( ldq_u $5, 0($6) ) # e0 : 611: EX( ldq_u $5, 0($16) ) # e0 :
77 clr $0 # .. e1 : 62 clr $0 # .. e1 :
78 nop # e1 : 63 nop # e1 :
79 mskqh $5, $0, $5 # e0 : 64 mskqh $5, $0, $5 # e0 :
80 EX( stq_u $5, 0($6) ) # e0 : 65 EX( stq_u $5, 0($16) ) # e0 :
81 ret $31, ($28), 1 # .. e1 : 66 ret $31, ($26), 1 # .. e1 :
82 67
83__do_clear_user: 68__clear_user:
84 and $6, 7, $4 # e0 : find dest misalignment 69 and $17, $17, $0
70 and $16, 7, $4 # e0 : find dest misalignment
85 beq $0, $zerolength # .. e1 : 71 beq $0, $zerolength # .. e1 :
86 addq $0, $4, $1 # e0 : bias counter 72 addq $0, $4, $1 # e0 : bias counter
87 and $1, 7, $2 # e1 : number of bytes in tail 73 and $1, 7, $2 # e1 : number of bytes in tail
88 srl $1, 3, $1 # e0 : 74 srl $1, 3, $1 # e0 :
89 beq $4, $loop # .. e1 : 75 beq $4, $loop # .. e1 :
90 76
91 EX( ldq_u $5, 0($6) ) # e0 : load dst word to mask back in 77 EX( ldq_u $5, 0($16) ) # e0 : load dst word to mask back in
92 beq $1, $oneword # .. e1 : sub-word store? 78 beq $1, $oneword # .. e1 : sub-word store?
93 79
94 mskql $5, $6, $5 # e0 : take care of misaligned head 80 mskql $5, $16, $5 # e0 : take care of misaligned head
95 addq $6, 8, $6 # .. e1 : 81 addq $16, 8, $16 # .. e1 :
96 EX( stq_u $5, -8($6) ) # e0 : 82 EX( stq_u $5, -8($16) ) # e0 :
97 addq $0, $4, $0 # .. e1 : bytes left -= 8 - misalignment 83 addq $0, $4, $0 # .. e1 : bytes left -= 8 - misalignment
98 subq $1, 1, $1 # e0 : 84 subq $1, 1, $1 # e0 :
99 subq $0, 8, $0 # .. e1 : 85 subq $0, 8, $0 # .. e1 :
@@ -101,15 +87,15 @@ __do_clear_user:
101 unop # : 87 unop # :
102 88
103$oneword: 89$oneword:
104 mskql $5, $6, $4 # e0 : 90 mskql $5, $16, $4 # e0 :
105 mskqh $5, $2, $5 # e0 : 91 mskqh $5, $2, $5 # e0 :
106 or $5, $4, $5 # e1 : 92 or $5, $4, $5 # e1 :
107 EX( stq_u $5, 0($6) ) # e0 : 93 EX( stq_u $5, 0($16) ) # e0 :
108 clr $0 # .. e1 : 94 clr $0 # .. e1 :
109 95
110$zerolength: 96$zerolength:
111$exception: 97$exception:
112 ret $31, ($28), 1 # .. e1 : 98 ret $31, ($26), 1 # .. e1 :
113 99
114 .end __do_clear_user 100 .end __clear_user
115 EXPORT_SYMBOL(__do_clear_user) 101 EXPORT_SYMBOL(__clear_user)
diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S
index 509f62b65311..159f1b7e6e49 100644
--- a/arch/alpha/lib/copy_user.S
+++ b/arch/alpha/lib/copy_user.S
@@ -9,21 +9,6 @@
9 * contains the right "bytes left to copy" value (and that it is updated 9 * contains the right "bytes left to copy" value (and that it is updated
10 * only _after_ a successful copy). There is also some rather minor 10 * only _after_ a successful copy). There is also some rather minor
11 * exception setup stuff.. 11 * exception setup stuff..
12 *
13 * NOTE! This is not directly C-callable, because the calling semantics are
14 * different:
15 *
16 * Inputs:
17 * length in $0
18 * destination address in $6
19 * source address in $7
20 * return address in $28
21 *
22 * Outputs:
23 * bytes left to copy in $0
24 *
25 * Clobbers:
26 * $1,$2,$3,$4,$5,$6,$7
27 */ 12 */
28 13
29#include <asm/export.h> 14#include <asm/export.h>
@@ -49,58 +34,59 @@
49 .ent __copy_user 34 .ent __copy_user
50__copy_user: 35__copy_user:
51 .prologue 0 36 .prologue 0
52 and $6,7,$3 37 and $18,$18,$0
38 and $16,7,$3
53 beq $0,$35 39 beq $0,$35
54 beq $3,$36 40 beq $3,$36
55 subq $3,8,$3 41 subq $3,8,$3
56 .align 4 42 .align 4
57$37: 43$37:
58 EXI( ldq_u $1,0($7) ) 44 EXI( ldq_u $1,0($17) )
59 EXO( ldq_u $2,0($6) ) 45 EXO( ldq_u $2,0($16) )
60 extbl $1,$7,$1 46 extbl $1,$17,$1
61 mskbl $2,$6,$2 47 mskbl $2,$16,$2
62 insbl $1,$6,$1 48 insbl $1,$16,$1
63 addq $3,1,$3 49 addq $3,1,$3
64 bis $1,$2,$1 50 bis $1,$2,$1
65 EXO( stq_u $1,0($6) ) 51 EXO( stq_u $1,0($16) )
66 subq $0,1,$0 52 subq $0,1,$0
67 addq $6,1,$6 53 addq $16,1,$16
68 addq $7,1,$7 54 addq $17,1,$17
69 beq $0,$41 55 beq $0,$41
70 bne $3,$37 56 bne $3,$37
71$36: 57$36:
72 and $7,7,$1 58 and $17,7,$1
73 bic $0,7,$4 59 bic $0,7,$4
74 beq $1,$43 60 beq $1,$43
75 beq $4,$48 61 beq $4,$48
76 EXI( ldq_u $3,0($7) ) 62 EXI( ldq_u $3,0($17) )
77 .align 4 63 .align 4
78$50: 64$50:
79 EXI( ldq_u $2,8($7) ) 65 EXI( ldq_u $2,8($17) )
80 subq $4,8,$4 66 subq $4,8,$4
81 extql $3,$7,$3 67 extql $3,$17,$3
82 extqh $2,$7,$1 68 extqh $2,$17,$1
83 bis $3,$1,$1 69 bis $3,$1,$1
84 EXO( stq $1,0($6) ) 70 EXO( stq $1,0($16) )
85 addq $7,8,$7 71 addq $17,8,$17
86 subq $0,8,$0 72 subq $0,8,$0
87 addq $6,8,$6 73 addq $16,8,$16
88 bis $2,$2,$3 74 bis $2,$2,$3
89 bne $4,$50 75 bne $4,$50
90$48: 76$48:
91 beq $0,$41 77 beq $0,$41
92 .align 4 78 .align 4
93$57: 79$57:
94 EXI( ldq_u $1,0($7) ) 80 EXI( ldq_u $1,0($17) )
95 EXO( ldq_u $2,0($6) ) 81 EXO( ldq_u $2,0($16) )
96 extbl $1,$7,$1 82 extbl $1,$17,$1
97 mskbl $2,$6,$2 83 mskbl $2,$16,$2
98 insbl $1,$6,$1 84 insbl $1,$16,$1
99 bis $1,$2,$1 85 bis $1,$2,$1
100 EXO( stq_u $1,0($6) ) 86 EXO( stq_u $1,0($16) )
101 subq $0,1,$0 87 subq $0,1,$0
102 addq $6,1,$6 88 addq $16,1,$16
103 addq $7,1,$7 89 addq $17,1,$17
104 bne $0,$57 90 bne $0,$57
105 br $31,$41 91 br $31,$41
106 .align 4 92 .align 4
@@ -108,27 +94,27 @@ $43:
108 beq $4,$65 94 beq $4,$65
109 .align 4 95 .align 4
110$66: 96$66:
111 EXI( ldq $1,0($7) ) 97 EXI( ldq $1,0($17) )
112 subq $4,8,$4 98 subq $4,8,$4
113 EXO( stq $1,0($6) ) 99 EXO( stq $1,0($16) )
114 addq $7,8,$7 100 addq $17,8,$17
115 subq $0,8,$0 101 subq $0,8,$0
116 addq $6,8,$6 102 addq $16,8,$16
117 bne $4,$66 103 bne $4,$66
118$65: 104$65:
119 beq $0,$41 105 beq $0,$41
120 EXI( ldq $2,0($7) ) 106 EXI( ldq $2,0($17) )
121 EXO( ldq $1,0($6) ) 107 EXO( ldq $1,0($16) )
122 mskql $2,$0,$2 108 mskql $2,$0,$2
123 mskqh $1,$0,$1 109 mskqh $1,$0,$1
124 bis $2,$1,$2 110 bis $2,$1,$2
125 EXO( stq $2,0($6) ) 111 EXO( stq $2,0($16) )
126 bis $31,$31,$0 112 bis $31,$31,$0
127$41: 113$41:
128$35: 114$35:
129$exitin: 115$exitin:
130$exitout: 116$exitout:
131 ret $31,($28),1 117 ret $31,($26),1
132 118
133 .end __copy_user 119 .end __copy_user
134EXPORT_SYMBOL(__copy_user) 120EXPORT_SYMBOL(__copy_user)
diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S
index 05bef6b50598..e179e4757ef8 100644
--- a/arch/alpha/lib/ev6-clear_user.S
+++ b/arch/alpha/lib/ev6-clear_user.S
@@ -9,21 +9,6 @@
9 * a successful copy). There is also some rather minor exception setup 9 * a successful copy). There is also some rather minor exception setup
10 * stuff. 10 * stuff.
11 * 11 *
12 * NOTE! This is not directly C-callable, because the calling semantics
13 * are different:
14 *
15 * Inputs:
16 * length in $0
17 * destination address in $6
18 * exception pointer in $7
19 * return address in $28 (exceptions expect it there)
20 *
21 * Outputs:
22 * bytes left to copy in $0
23 *
24 * Clobbers:
25 * $1,$2,$3,$4,$5,$6
26 *
27 * Much of the information about 21264 scheduling/coding comes from: 12 * Much of the information about 21264 scheduling/coding comes from:
28 * Compiler Writer's Guide for the Alpha 21264 13 * Compiler Writer's Guide for the Alpha 21264
29 * abbreviated as 'CWG' in other comments here 14 * abbreviated as 'CWG' in other comments here
@@ -56,14 +41,15 @@
56 .set noreorder 41 .set noreorder
57 .align 4 42 .align 4
58 43
59 .globl __do_clear_user 44 .globl __clear_user
60 .ent __do_clear_user 45 .ent __clear_user
61 .frame $30, 0, $28 46 .frame $30, 0, $26
62 .prologue 0 47 .prologue 0
63 48
64 # Pipeline info : Slotting & Comments 49 # Pipeline info : Slotting & Comments
65__do_clear_user: 50__clear_user:
66 and $6, 7, $4 # .. E .. .. : find dest head misalignment 51 and $17, $17, $0
52 and $16, 7, $4 # .. E .. .. : find dest head misalignment
67 beq $0, $zerolength # U .. .. .. : U L U L 53 beq $0, $zerolength # U .. .. .. : U L U L
68 54
69 addq $0, $4, $1 # .. .. .. E : bias counter 55 addq $0, $4, $1 # .. .. .. E : bias counter
@@ -75,14 +61,14 @@ __do_clear_user:
75 61
76/* 62/*
77 * Head is not aligned. Write (8 - $4) bytes to head of destination 63 * Head is not aligned. Write (8 - $4) bytes to head of destination
78 * This means $6 is known to be misaligned 64 * This means $16 is known to be misaligned
79 */ 65 */
80 EX( ldq_u $5, 0($6) ) # .. .. .. L : load dst word to mask back in 66 EX( ldq_u $5, 0($16) ) # .. .. .. L : load dst word to mask back in
81 beq $1, $onebyte # .. .. U .. : sub-word store? 67 beq $1, $onebyte # .. .. U .. : sub-word store?
82 mskql $5, $6, $5 # .. U .. .. : take care of misaligned head 68 mskql $5, $16, $5 # .. U .. .. : take care of misaligned head
83 addq $6, 8, $6 # E .. .. .. : L U U L 69 addq $16, 8, $16 # E .. .. .. : L U U L
84 70
85 EX( stq_u $5, -8($6) ) # .. .. .. L : 71 EX( stq_u $5, -8($16) ) # .. .. .. L :
86 subq $1, 1, $1 # .. .. E .. : 72 subq $1, 1, $1 # .. .. E .. :
87 addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment 73 addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment
88 subq $0, 8, $0 # E .. .. .. : U L U L 74 subq $0, 8, $0 # E .. .. .. : U L U L
@@ -93,11 +79,11 @@ __do_clear_user:
93 * values upon initial entry to the loop 79 * values upon initial entry to the loop
94 * $1 is number of quadwords to clear (zero is a valid value) 80 * $1 is number of quadwords to clear (zero is a valid value)
95 * $2 is number of trailing bytes (0..7) ($2 never used...) 81 * $2 is number of trailing bytes (0..7) ($2 never used...)
96 * $6 is known to be aligned 0mod8 82 * $16 is known to be aligned 0mod8
97 */ 83 */
98$headalign: 84$headalign:
99 subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop 85 subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop
100 and $6, 0x3f, $2 # .. .. E .. : Forward work for huge loop 86 and $16, 0x3f, $2 # .. .. E .. : Forward work for huge loop
101 subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop) 87 subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop)
102 blt $4, $trailquad # U .. .. .. : U L U L 88 blt $4, $trailquad # U .. .. .. : U L U L
103 89
@@ -114,21 +100,21 @@ $headalign:
114 beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64 100 beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64
115 101
116$alignmod64: 102$alignmod64:
117 EX( stq_u $31, 0($6) ) # .. .. .. L 103 EX( stq_u $31, 0($16) ) # .. .. .. L
118 addq $3, 8, $3 # .. .. E .. 104 addq $3, 8, $3 # .. .. E ..
119 subq $0, 8, $0 # .. E .. .. 105 subq $0, 8, $0 # .. E .. ..
120 nop # E .. .. .. : U L U L 106 nop # E .. .. .. : U L U L
121 107
122 nop # .. .. .. E 108 nop # .. .. .. E
123 subq $1, 1, $1 # .. .. E .. 109 subq $1, 1, $1 # .. .. E ..
124 addq $6, 8, $6 # .. E .. .. 110 addq $16, 8, $16 # .. E .. ..
125 blt $3, $alignmod64 # U .. .. .. : U L U L 111 blt $3, $alignmod64 # U .. .. .. : U L U L
126 112
127$bigalign: 113$bigalign:
128/* 114/*
129 * $0 is the number of bytes left 115 * $0 is the number of bytes left
130 * $1 is the number of quads left 116 * $1 is the number of quads left
131 * $6 is aligned 0mod64 117 * $16 is aligned 0mod64
132 * we know that we'll be taking a minimum of one trip through 118 * we know that we'll be taking a minimum of one trip through
133 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle 119 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
134 * We are _not_ going to update $0 after every single store. That 120 * We are _not_ going to update $0 after every single store. That
@@ -145,39 +131,39 @@ $bigalign:
145 nop # E : 131 nop # E :
146 nop # E : 132 nop # E :
147 nop # E : 133 nop # E :
148 bis $6,$6,$3 # E : U L U L : Initial wh64 address is dest 134 bis $16,$16,$3 # E : U L U L : Initial wh64 address is dest
149 /* This might actually help for the current trip... */ 135 /* This might actually help for the current trip... */
150 136
151$do_wh64: 137$do_wh64:
152 wh64 ($3) # .. .. .. L1 : memory subsystem hint 138 wh64 ($3) # .. .. .. L1 : memory subsystem hint
153 subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop? 139 subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop?
154 EX( stq_u $31, 0($6) ) # .. L .. .. 140 EX( stq_u $31, 0($16) ) # .. L .. ..
155 subq $0, 8, $0 # E .. .. .. : U L U L 141 subq $0, 8, $0 # E .. .. .. : U L U L
156 142
157 addq $6, 128, $3 # E : Target address of wh64 143 addq $16, 128, $3 # E : Target address of wh64
158 EX( stq_u $31, 8($6) ) # L : 144 EX( stq_u $31, 8($16) ) # L :
159 EX( stq_u $31, 16($6) ) # L : 145 EX( stq_u $31, 16($16) ) # L :
160 subq $0, 16, $0 # E : U L L U 146 subq $0, 16, $0 # E : U L L U
161 147
162 nop # E : 148 nop # E :
163 EX( stq_u $31, 24($6) ) # L : 149 EX( stq_u $31, 24($16) ) # L :
164 EX( stq_u $31, 32($6) ) # L : 150 EX( stq_u $31, 32($16) ) # L :
165 subq $0, 168, $5 # E : U L L U : two trips through the loop left? 151 subq $0, 168, $5 # E : U L L U : two trips through the loop left?
166 /* 168 = 192 - 24, since we've already completed some stores */ 152 /* 168 = 192 - 24, since we've already completed some stores */
167 153
168 subq $0, 16, $0 # E : 154 subq $0, 16, $0 # E :
169 EX( stq_u $31, 40($6) ) # L : 155 EX( stq_u $31, 40($16) ) # L :
170 EX( stq_u $31, 48($6) ) # L : 156 EX( stq_u $31, 48($16) ) # L :
171 cmovlt $5, $6, $3 # E : U L L U : Latency 2, extra mapping cycle 157 cmovlt $5, $16, $3 # E : U L L U : Latency 2, extra mapping cycle
172 158
173 subq $1, 8, $1 # E : 159 subq $1, 8, $1 # E :
174 subq $0, 16, $0 # E : 160 subq $0, 16, $0 # E :
175 EX( stq_u $31, 56($6) ) # L : 161 EX( stq_u $31, 56($16) ) # L :
176 nop # E : U L U L 162 nop # E : U L U L
177 163
178 nop # E : 164 nop # E :
179 subq $0, 8, $0 # E : 165 subq $0, 8, $0 # E :
180 addq $6, 64, $6 # E : 166 addq $16, 64, $16 # E :
181 bge $4, $do_wh64 # U : U L U L 167 bge $4, $do_wh64 # U : U L U L
182 168
183$trailquad: 169$trailquad:
@@ -190,14 +176,14 @@ $trailquad:
190 beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go 176 beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go
191 177
192$onequad: 178$onequad:
193 EX( stq_u $31, 0($6) ) # .. .. .. L 179 EX( stq_u $31, 0($16) ) # .. .. .. L
194 subq $1, 1, $1 # .. .. E .. 180 subq $1, 1, $1 # .. .. E ..
195 subq $0, 8, $0 # .. E .. .. 181 subq $0, 8, $0 # .. E .. ..
196 nop # E .. .. .. : U L U L 182 nop # E .. .. .. : U L U L
197 183
198 nop # .. .. .. E 184 nop # .. .. .. E
199 nop # .. .. E .. 185 nop # .. .. E ..
200 addq $6, 8, $6 # .. E .. .. 186 addq $16, 8, $16 # .. E .. ..
201 bgt $1, $onequad # U .. .. .. : U L U L 187 bgt $1, $onequad # U .. .. .. : U L U L
202 188
203 # We have an unknown number of bytes left to go. 189 # We have an unknown number of bytes left to go.
@@ -211,9 +197,9 @@ $trailbytes:
211 # so we will use $0 as the loop counter 197 # so we will use $0 as the loop counter
212 # We know for a fact that $0 > 0 zero due to previous context 198 # We know for a fact that $0 > 0 zero due to previous context
213$onebyte: 199$onebyte:
214 EX( stb $31, 0($6) ) # .. .. .. L 200 EX( stb $31, 0($16) ) # .. .. .. L
215 subq $0, 1, $0 # .. .. E .. : 201 subq $0, 1, $0 # .. .. E .. :
216 addq $6, 1, $6 # .. E .. .. : 202 addq $16, 1, $16 # .. E .. .. :
217 bgt $0, $onebyte # U .. .. .. : U L U L 203 bgt $0, $onebyte # U .. .. .. : U L U L
218 204
219$zerolength: 205$zerolength:
@@ -221,6 +207,6 @@ $exception: # Destination for exception recovery(?)
221 nop # .. .. .. E : 207 nop # .. .. .. E :
222 nop # .. .. E .. : 208 nop # .. .. E .. :
223 nop # .. E .. .. : 209 nop # .. E .. .. :
224 ret $31, ($28), 1 # L0 .. .. .. : L U L U 210 ret $31, ($26), 1 # L0 .. .. .. : L U L U
225 .end __do_clear_user 211 .end __clear_user
226 EXPORT_SYMBOL(__do_clear_user) 212 EXPORT_SYMBOL(__clear_user)
diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S
index be720b518af9..35e6710d0700 100644
--- a/arch/alpha/lib/ev6-copy_user.S
+++ b/arch/alpha/lib/ev6-copy_user.S
@@ -12,21 +12,6 @@
12 * only _after_ a successful copy). There is also some rather minor 12 * only _after_ a successful copy). There is also some rather minor
13 * exception setup stuff.. 13 * exception setup stuff..
14 * 14 *
15 * NOTE! This is not directly C-callable, because the calling semantics are
16 * different:
17 *
18 * Inputs:
19 * length in $0
20 * destination address in $6
21 * source address in $7
22 * return address in $28
23 *
24 * Outputs:
25 * bytes left to copy in $0
26 *
27 * Clobbers:
28 * $1,$2,$3,$4,$5,$6,$7
29 *
30 * Much of the information about 21264 scheduling/coding comes from: 15 * Much of the information about 21264 scheduling/coding comes from:
31 * Compiler Writer's Guide for the Alpha 21264 16 * Compiler Writer's Guide for the Alpha 21264
32 * abbreviated as 'CWG' in other comments here 17 * abbreviated as 'CWG' in other comments here
@@ -60,10 +45,11 @@
60 # Pipeline info: Slotting & Comments 45 # Pipeline info: Slotting & Comments
61__copy_user: 46__copy_user:
62 .prologue 0 47 .prologue 0
63 subq $0, 32, $1 # .. E .. .. : Is this going to be a small copy? 48 andq $18, $18, $0
49 subq $18, 32, $1 # .. E .. .. : Is this going to be a small copy?
64 beq $0, $zerolength # U .. .. .. : U L U L 50 beq $0, $zerolength # U .. .. .. : U L U L
65 51
66 and $6,7,$3 # .. .. .. E : is leading dest misalignment 52 and $16,7,$3 # .. .. .. E : is leading dest misalignment
67 ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data 53 ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data
68 beq $3, $destaligned # .. U .. .. : 2nd (one cycle fetcher stall) 54 beq $3, $destaligned # .. U .. .. : 2nd (one cycle fetcher stall)
69 subq $3, 8, $3 # E .. .. .. : L U U L : trip counter 55 subq $3, 8, $3 # E .. .. .. : L U U L : trip counter
@@ -73,17 +59,17 @@ __copy_user:
73 * We know we have at least one trip through this loop 59 * We know we have at least one trip through this loop
74 */ 60 */
75$aligndest: 61$aligndest:
76 EXI( ldbu $1,0($7) ) # .. .. .. L : Keep loads separate from stores 62 EXI( ldbu $1,0($17) ) # .. .. .. L : Keep loads separate from stores
77 addq $6,1,$6 # .. .. E .. : Section 3.8 in the CWG 63 addq $16,1,$16 # .. .. E .. : Section 3.8 in the CWG
78 addq $3,1,$3 # .. E .. .. : 64 addq $3,1,$3 # .. E .. .. :
79 nop # E .. .. .. : U L U L 65 nop # E .. .. .. : U L U L
80 66
81/* 67/*
82 * the -1 is to compensate for the inc($6) done in a previous quadpack 68 * the -1 is to compensate for the inc($16) done in a previous quadpack
83 * which allows us zero dependencies within either quadpack in the loop 69 * which allows us zero dependencies within either quadpack in the loop
84 */ 70 */
85 EXO( stb $1,-1($6) ) # .. .. .. L : 71 EXO( stb $1,-1($16) ) # .. .. .. L :
86 addq $7,1,$7 # .. .. E .. : Section 3.8 in the CWG 72 addq $17,1,$17 # .. .. E .. : Section 3.8 in the CWG
87 subq $0,1,$0 # .. E .. .. : 73 subq $0,1,$0 # .. E .. .. :
88 bne $3, $aligndest # U .. .. .. : U L U L 74 bne $3, $aligndest # U .. .. .. : U L U L
89 75
@@ -92,29 +78,29 @@ $aligndest:
92 * If we arrived via branch, we have a minimum of 32 bytes 78 * If we arrived via branch, we have a minimum of 32 bytes
93 */ 79 */
94$destaligned: 80$destaligned:
95 and $7,7,$1 # .. .. .. E : Check _current_ source alignment 81 and $17,7,$1 # .. .. .. E : Check _current_ source alignment
96 bic $0,7,$4 # .. .. E .. : number bytes as a quadword loop 82 bic $0,7,$4 # .. .. E .. : number bytes as a quadword loop
97 EXI( ldq_u $3,0($7) ) # .. L .. .. : Forward fetch for fallthrough code 83 EXI( ldq_u $3,0($17) ) # .. L .. .. : Forward fetch for fallthrough code
98 beq $1,$quadaligned # U .. .. .. : U L U L 84 beq $1,$quadaligned # U .. .. .. : U L U L
99 85
100/* 86/*
101 * In the worst case, we've just executed an ldq_u here from 0($7) 87 * In the worst case, we've just executed an ldq_u here from 0($17)
102 * and we'll repeat it once if we take the branch 88 * and we'll repeat it once if we take the branch
103 */ 89 */
104 90
105/* Misaligned quadword loop - not unrolled. Leave it that way. */ 91/* Misaligned quadword loop - not unrolled. Leave it that way. */
106$misquad: 92$misquad:
107 EXI( ldq_u $2,8($7) ) # .. .. .. L : 93 EXI( ldq_u $2,8($17) ) # .. .. .. L :
108 subq $4,8,$4 # .. .. E .. : 94 subq $4,8,$4 # .. .. E .. :
109 extql $3,$7,$3 # .. U .. .. : 95 extql $3,$17,$3 # .. U .. .. :
110 extqh $2,$7,$1 # U .. .. .. : U U L L 96 extqh $2,$17,$1 # U .. .. .. : U U L L
111 97
112 bis $3,$1,$1 # .. .. .. E : 98 bis $3,$1,$1 # .. .. .. E :
113 EXO( stq $1,0($6) ) # .. .. L .. : 99 EXO( stq $1,0($16) ) # .. .. L .. :
114 addq $7,8,$7 # .. E .. .. : 100 addq $17,8,$17 # .. E .. .. :
115 subq $0,8,$0 # E .. .. .. : U L L U 101 subq $0,8,$0 # E .. .. .. : U L L U
116 102
117 addq $6,8,$6 # .. .. .. E : 103 addq $16,8,$16 # .. .. .. E :
118 bis $2,$2,$3 # .. .. E .. : 104 bis $2,$2,$3 # .. .. E .. :
119 nop # .. E .. .. : 105 nop # .. E .. .. :
120 bne $4,$misquad # U .. .. .. : U L U L 106 bne $4,$misquad # U .. .. .. : U L U L
@@ -125,8 +111,8 @@ $misquad:
125 beq $0,$zerolength # U .. .. .. : U L U L 111 beq $0,$zerolength # U .. .. .. : U L U L
126 112
127/* We know we have at least one trip through the byte loop */ 113/* We know we have at least one trip through the byte loop */
128 EXI ( ldbu $2,0($7) ) # .. .. .. L : No loads in the same quad 114 EXI ( ldbu $2,0($17) ) # .. .. .. L : No loads in the same quad
129 addq $6,1,$6 # .. .. E .. : as the store (Section 3.8 in CWG) 115 addq $16,1,$16 # .. .. E .. : as the store (Section 3.8 in CWG)
130 nop # .. E .. .. : 116 nop # .. E .. .. :
131 br $31, $dirtyentry # L0 .. .. .. : L U U L 117 br $31, $dirtyentry # L0 .. .. .. : L U U L
132/* Do the trailing byte loop load, then hop into the store part of the loop */ 118/* Do the trailing byte loop load, then hop into the store part of the loop */
@@ -136,8 +122,8 @@ $misquad:
136 * Based upon the usage context, it's worth the effort to unroll this loop 122 * Based upon the usage context, it's worth the effort to unroll this loop
137 * $0 - number of bytes to be moved 123 * $0 - number of bytes to be moved
138 * $4 - number of bytes to move as quadwords 124 * $4 - number of bytes to move as quadwords
139 * $6 is current destination address 125 * $16 is current destination address
140 * $7 is current source address 126 * $17 is current source address
141 */ 127 */
142$quadaligned: 128$quadaligned:
143 subq $4, 32, $2 # .. .. .. E : do not unroll for small stuff 129 subq $4, 32, $2 # .. .. .. E : do not unroll for small stuff
@@ -155,29 +141,29 @@ $quadaligned:
155 * instruction memory hint instruction). 141 * instruction memory hint instruction).
156 */ 142 */
157$unroll4: 143$unroll4:
158 EXI( ldq $1,0($7) ) # .. .. .. L 144 EXI( ldq $1,0($17) ) # .. .. .. L
159 EXI( ldq $2,8($7) ) # .. .. L .. 145 EXI( ldq $2,8($17) ) # .. .. L ..
160 subq $4,32,$4 # .. E .. .. 146 subq $4,32,$4 # .. E .. ..
161 nop # E .. .. .. : U U L L 147 nop # E .. .. .. : U U L L
162 148
163 addq $7,16,$7 # .. .. .. E 149 addq $17,16,$17 # .. .. .. E
164 EXO( stq $1,0($6) ) # .. .. L .. 150 EXO( stq $1,0($16) ) # .. .. L ..
165 EXO( stq $2,8($6) ) # .. L .. .. 151 EXO( stq $2,8($16) ) # .. L .. ..
166 subq $0,16,$0 # E .. .. .. : U L L U 152 subq $0,16,$0 # E .. .. .. : U L L U
167 153
168 addq $6,16,$6 # .. .. .. E 154 addq $16,16,$16 # .. .. .. E
169 EXI( ldq $1,0($7) ) # .. .. L .. 155 EXI( ldq $1,0($17) ) # .. .. L ..
170 EXI( ldq $2,8($7) ) # .. L .. .. 156 EXI( ldq $2,8($17) ) # .. L .. ..
171 subq $4, 32, $3 # E .. .. .. : U U L L : is there enough for another trip? 157 subq $4, 32, $3 # E .. .. .. : U U L L : is there enough for another trip?
172 158
173 EXO( stq $1,0($6) ) # .. .. .. L 159 EXO( stq $1,0($16) ) # .. .. .. L
174 EXO( stq $2,8($6) ) # .. .. L .. 160 EXO( stq $2,8($16) ) # .. .. L ..
175 subq $0,16,$0 # .. E .. .. 161 subq $0,16,$0 # .. E .. ..
176 addq $7,16,$7 # E .. .. .. : U L L U 162 addq $17,16,$17 # E .. .. .. : U L L U
177 163
178 nop # .. .. .. E 164 nop # .. .. .. E
179 nop # .. .. E .. 165 nop # .. .. E ..
180 addq $6,16,$6 # .. E .. .. 166 addq $16,16,$16 # .. E .. ..
181 bgt $3,$unroll4 # U .. .. .. : U L U L 167 bgt $3,$unroll4 # U .. .. .. : U L U L
182 168
183 nop 169 nop
@@ -186,14 +172,14 @@ $unroll4:
186 beq $4, $noquads 172 beq $4, $noquads
187 173
188$onequad: 174$onequad:
189 EXI( ldq $1,0($7) ) 175 EXI( ldq $1,0($17) )
190 subq $4,8,$4 176 subq $4,8,$4
191 addq $7,8,$7 177 addq $17,8,$17
192 nop 178 nop
193 179
194 EXO( stq $1,0($6) ) 180 EXO( stq $1,0($16) )
195 subq $0,8,$0 181 subq $0,8,$0
196 addq $6,8,$6 182 addq $16,8,$16
197 bne $4,$onequad 183 bne $4,$onequad
198 184
199$noquads: 185$noquads:
@@ -207,23 +193,23 @@ $noquads:
207 * There's no point in doing a lot of complex alignment calculations to try to 193 * There's no point in doing a lot of complex alignment calculations to try to
208 * to quadword stuff for a small amount of data. 194 * to quadword stuff for a small amount of data.
209 * $0 - remaining number of bytes left to copy 195 * $0 - remaining number of bytes left to copy
210 * $6 - current dest addr 196 * $16 - current dest addr
211 * $7 - current source addr 197 * $17 - current source addr
212 */ 198 */
213 199
214$onebyteloop: 200$onebyteloop:
215 EXI ( ldbu $2,0($7) ) # .. .. .. L : No loads in the same quad 201 EXI ( ldbu $2,0($17) ) # .. .. .. L : No loads in the same quad
216 addq $6,1,$6 # .. .. E .. : as the store (Section 3.8 in CWG) 202 addq $16,1,$16 # .. .. E .. : as the store (Section 3.8 in CWG)
217 nop # .. E .. .. : 203 nop # .. E .. .. :
218 nop # E .. .. .. : U L U L 204 nop # E .. .. .. : U L U L
219 205
220$dirtyentry: 206$dirtyentry:
221/* 207/*
222 * the -1 is to compensate for the inc($6) done in a previous quadpack 208 * the -1 is to compensate for the inc($16) done in a previous quadpack
223 * which allows us zero dependencies within either quadpack in the loop 209 * which allows us zero dependencies within either quadpack in the loop
224 */ 210 */
225 EXO ( stb $2,-1($6) ) # .. .. .. L : 211 EXO ( stb $2,-1($16) ) # .. .. .. L :
226 addq $7,1,$7 # .. .. E .. : quadpack as the load 212 addq $17,1,$17 # .. .. E .. : quadpack as the load
227 subq $0,1,$0 # .. E .. .. : change count _after_ copy 213 subq $0,1,$0 # .. E .. .. : change count _after_ copy
228 bgt $0,$onebyteloop # U .. .. .. : U L U L 214 bgt $0,$onebyteloop # U .. .. .. : U L U L
229 215
@@ -233,7 +219,7 @@ $exitout: # Destination for exception recovery(?)
233 nop # .. .. .. E 219 nop # .. .. .. E
234 nop # .. .. E .. 220 nop # .. .. E ..
235 nop # .. E .. .. 221 nop # .. E .. ..
236 ret $31,($28),1 # L0 .. .. .. : L U L U 222 ret $31,($26),1 # L0 .. .. .. : L U L U
237 223
238 .end __copy_user 224 .end __copy_user
239 EXPORT_SYMBOL(__copy_user) 225 EXPORT_SYMBOL(__copy_user)