diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2016-12-24 20:26:18 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2017-03-28 18:23:16 -0400 |
commit | 8525023121de4848b5f0a7d867ffeadbc477774d (patch) | |
tree | f4b1c2d1575d752963bda2a8836b40becda1eaa2 /arch/alpha/lib | |
parent | d597580d373774b1bdab84b3d26ff0b55162b916 (diff) |
alpha: switch __copy_user() and __do_clean_user() to normal calling conventions
They used to need odd calling conventions due to old exception handling
mechanism, the last remnants of which had disappeared back in 2002.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'arch/alpha/lib')
-rw-r--r-- | arch/alpha/lib/clear_user.S | 66 | ||||
-rw-r--r-- | arch/alpha/lib/copy_user.S | 82 | ||||
-rw-r--r-- | arch/alpha/lib/ev6-clear_user.S | 84 | ||||
-rw-r--r-- | arch/alpha/lib/ev6-copy_user.S | 104 |
4 files changed, 140 insertions, 196 deletions
diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S index bf5b931866ba..006f469fef73 100644 --- a/arch/alpha/lib/clear_user.S +++ b/arch/alpha/lib/clear_user.S | |||
@@ -8,21 +8,6 @@ | |||
8 | * right "bytes left to zero" value (and that it is updated only _after_ | 8 | * right "bytes left to zero" value (and that it is updated only _after_ |
9 | * a successful copy). There is also some rather minor exception setup | 9 | * a successful copy). There is also some rather minor exception setup |
10 | * stuff. | 10 | * stuff. |
11 | * | ||
12 | * NOTE! This is not directly C-callable, because the calling semantics | ||
13 | * are different: | ||
14 | * | ||
15 | * Inputs: | ||
16 | * length in $0 | ||
17 | * destination address in $6 | ||
18 | * exception pointer in $7 | ||
19 | * return address in $28 (exceptions expect it there) | ||
20 | * | ||
21 | * Outputs: | ||
22 | * bytes left to copy in $0 | ||
23 | * | ||
24 | * Clobbers: | ||
25 | * $1,$2,$3,$4,$5,$6 | ||
26 | */ | 11 | */ |
27 | #include <asm/export.h> | 12 | #include <asm/export.h> |
28 | 13 | ||
@@ -38,62 +23,63 @@ | |||
38 | .set noreorder | 23 | .set noreorder |
39 | .align 4 | 24 | .align 4 |
40 | 25 | ||
41 | .globl __do_clear_user | 26 | .globl __clear_user |
42 | .ent __do_clear_user | 27 | .ent __clear_user |
43 | .frame $30, 0, $28 | 28 | .frame $30, 0, $26 |
44 | .prologue 0 | 29 | .prologue 0 |
45 | 30 | ||
46 | $loop: | 31 | $loop: |
47 | and $1, 3, $4 # e0 : | 32 | and $1, 3, $4 # e0 : |
48 | beq $4, 1f # .. e1 : | 33 | beq $4, 1f # .. e1 : |
49 | 34 | ||
50 | 0: EX( stq_u $31, 0($6) ) # e0 : zero one word | 35 | 0: EX( stq_u $31, 0($16) ) # e0 : zero one word |
51 | subq $0, 8, $0 # .. e1 : | 36 | subq $0, 8, $0 # .. e1 : |
52 | subq $4, 1, $4 # e0 : | 37 | subq $4, 1, $4 # e0 : |
53 | addq $6, 8, $6 # .. e1 : | 38 | addq $16, 8, $16 # .. e1 : |
54 | bne $4, 0b # e1 : | 39 | bne $4, 0b # e1 : |
55 | unop # : | 40 | unop # : |
56 | 41 | ||
57 | 1: bic $1, 3, $1 # e0 : | 42 | 1: bic $1, 3, $1 # e0 : |
58 | beq $1, $tail # .. e1 : | 43 | beq $1, $tail # .. e1 : |
59 | 44 | ||
60 | 2: EX( stq_u $31, 0($6) ) # e0 : zero four words | 45 | 2: EX( stq_u $31, 0($16) ) # e0 : zero four words |
61 | subq $0, 8, $0 # .. e1 : | 46 | subq $0, 8, $0 # .. e1 : |
62 | EX( stq_u $31, 8($6) ) # e0 : | 47 | EX( stq_u $31, 8($16) ) # e0 : |
63 | subq $0, 8, $0 # .. e1 : | 48 | subq $0, 8, $0 # .. e1 : |
64 | EX( stq_u $31, 16($6) ) # e0 : | 49 | EX( stq_u $31, 16($16) ) # e0 : |
65 | subq $0, 8, $0 # .. e1 : | 50 | subq $0, 8, $0 # .. e1 : |
66 | EX( stq_u $31, 24($6) ) # e0 : | 51 | EX( stq_u $31, 24($16) ) # e0 : |
67 | subq $0, 8, $0 # .. e1 : | 52 | subq $0, 8, $0 # .. e1 : |
68 | subq $1, 4, $1 # e0 : | 53 | subq $1, 4, $1 # e0 : |
69 | addq $6, 32, $6 # .. e1 : | 54 | addq $16, 32, $16 # .. e1 : |
70 | bne $1, 2b # e1 : | 55 | bne $1, 2b # e1 : |
71 | 56 | ||
72 | $tail: | 57 | $tail: |
73 | bne $2, 1f # e1 : is there a tail to do? | 58 | bne $2, 1f # e1 : is there a tail to do? |
74 | ret $31, ($28), 1 # .. e1 : | 59 | ret $31, ($26), 1 # .. e1 : |
75 | 60 | ||
76 | 1: EX( ldq_u $5, 0($6) ) # e0 : | 61 | 1: EX( ldq_u $5, 0($16) ) # e0 : |
77 | clr $0 # .. e1 : | 62 | clr $0 # .. e1 : |
78 | nop # e1 : | 63 | nop # e1 : |
79 | mskqh $5, $0, $5 # e0 : | 64 | mskqh $5, $0, $5 # e0 : |
80 | EX( stq_u $5, 0($6) ) # e0 : | 65 | EX( stq_u $5, 0($16) ) # e0 : |
81 | ret $31, ($28), 1 # .. e1 : | 66 | ret $31, ($26), 1 # .. e1 : |
82 | 67 | ||
83 | __do_clear_user: | 68 | __clear_user: |
84 | and $6, 7, $4 # e0 : find dest misalignment | 69 | and $17, $17, $0 |
70 | and $16, 7, $4 # e0 : find dest misalignment | ||
85 | beq $0, $zerolength # .. e1 : | 71 | beq $0, $zerolength # .. e1 : |
86 | addq $0, $4, $1 # e0 : bias counter | 72 | addq $0, $4, $1 # e0 : bias counter |
87 | and $1, 7, $2 # e1 : number of bytes in tail | 73 | and $1, 7, $2 # e1 : number of bytes in tail |
88 | srl $1, 3, $1 # e0 : | 74 | srl $1, 3, $1 # e0 : |
89 | beq $4, $loop # .. e1 : | 75 | beq $4, $loop # .. e1 : |
90 | 76 | ||
91 | EX( ldq_u $5, 0($6) ) # e0 : load dst word to mask back in | 77 | EX( ldq_u $5, 0($16) ) # e0 : load dst word to mask back in |
92 | beq $1, $oneword # .. e1 : sub-word store? | 78 | beq $1, $oneword # .. e1 : sub-word store? |
93 | 79 | ||
94 | mskql $5, $6, $5 # e0 : take care of misaligned head | 80 | mskql $5, $16, $5 # e0 : take care of misaligned head |
95 | addq $6, 8, $6 # .. e1 : | 81 | addq $16, 8, $16 # .. e1 : |
96 | EX( stq_u $5, -8($6) ) # e0 : | 82 | EX( stq_u $5, -8($16) ) # e0 : |
97 | addq $0, $4, $0 # .. e1 : bytes left -= 8 - misalignment | 83 | addq $0, $4, $0 # .. e1 : bytes left -= 8 - misalignment |
98 | subq $1, 1, $1 # e0 : | 84 | subq $1, 1, $1 # e0 : |
99 | subq $0, 8, $0 # .. e1 : | 85 | subq $0, 8, $0 # .. e1 : |
@@ -101,15 +87,15 @@ __do_clear_user: | |||
101 | unop # : | 87 | unop # : |
102 | 88 | ||
103 | $oneword: | 89 | $oneword: |
104 | mskql $5, $6, $4 # e0 : | 90 | mskql $5, $16, $4 # e0 : |
105 | mskqh $5, $2, $5 # e0 : | 91 | mskqh $5, $2, $5 # e0 : |
106 | or $5, $4, $5 # e1 : | 92 | or $5, $4, $5 # e1 : |
107 | EX( stq_u $5, 0($6) ) # e0 : | 93 | EX( stq_u $5, 0($16) ) # e0 : |
108 | clr $0 # .. e1 : | 94 | clr $0 # .. e1 : |
109 | 95 | ||
110 | $zerolength: | 96 | $zerolength: |
111 | $exception: | 97 | $exception: |
112 | ret $31, ($28), 1 # .. e1 : | 98 | ret $31, ($26), 1 # .. e1 : |
113 | 99 | ||
114 | .end __do_clear_user | 100 | .end __clear_user |
115 | EXPORT_SYMBOL(__do_clear_user) | 101 | EXPORT_SYMBOL(__clear_user) |
diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S index 509f62b65311..159f1b7e6e49 100644 --- a/arch/alpha/lib/copy_user.S +++ b/arch/alpha/lib/copy_user.S | |||
@@ -9,21 +9,6 @@ | |||
9 | * contains the right "bytes left to copy" value (and that it is updated | 9 | * contains the right "bytes left to copy" value (and that it is updated |
10 | * only _after_ a successful copy). There is also some rather minor | 10 | * only _after_ a successful copy). There is also some rather minor |
11 | * exception setup stuff.. | 11 | * exception setup stuff.. |
12 | * | ||
13 | * NOTE! This is not directly C-callable, because the calling semantics are | ||
14 | * different: | ||
15 | * | ||
16 | * Inputs: | ||
17 | * length in $0 | ||
18 | * destination address in $6 | ||
19 | * source address in $7 | ||
20 | * return address in $28 | ||
21 | * | ||
22 | * Outputs: | ||
23 | * bytes left to copy in $0 | ||
24 | * | ||
25 | * Clobbers: | ||
26 | * $1,$2,$3,$4,$5,$6,$7 | ||
27 | */ | 12 | */ |
28 | 13 | ||
29 | #include <asm/export.h> | 14 | #include <asm/export.h> |
@@ -49,58 +34,59 @@ | |||
49 | .ent __copy_user | 34 | .ent __copy_user |
50 | __copy_user: | 35 | __copy_user: |
51 | .prologue 0 | 36 | .prologue 0 |
52 | and $6,7,$3 | 37 | and $18,$18,$0 |
38 | and $16,7,$3 | ||
53 | beq $0,$35 | 39 | beq $0,$35 |
54 | beq $3,$36 | 40 | beq $3,$36 |
55 | subq $3,8,$3 | 41 | subq $3,8,$3 |
56 | .align 4 | 42 | .align 4 |
57 | $37: | 43 | $37: |
58 | EXI( ldq_u $1,0($7) ) | 44 | EXI( ldq_u $1,0($17) ) |
59 | EXO( ldq_u $2,0($6) ) | 45 | EXO( ldq_u $2,0($16) ) |
60 | extbl $1,$7,$1 | 46 | extbl $1,$17,$1 |
61 | mskbl $2,$6,$2 | 47 | mskbl $2,$16,$2 |
62 | insbl $1,$6,$1 | 48 | insbl $1,$16,$1 |
63 | addq $3,1,$3 | 49 | addq $3,1,$3 |
64 | bis $1,$2,$1 | 50 | bis $1,$2,$1 |
65 | EXO( stq_u $1,0($6) ) | 51 | EXO( stq_u $1,0($16) ) |
66 | subq $0,1,$0 | 52 | subq $0,1,$0 |
67 | addq $6,1,$6 | 53 | addq $16,1,$16 |
68 | addq $7,1,$7 | 54 | addq $17,1,$17 |
69 | beq $0,$41 | 55 | beq $0,$41 |
70 | bne $3,$37 | 56 | bne $3,$37 |
71 | $36: | 57 | $36: |
72 | and $7,7,$1 | 58 | and $17,7,$1 |
73 | bic $0,7,$4 | 59 | bic $0,7,$4 |
74 | beq $1,$43 | 60 | beq $1,$43 |
75 | beq $4,$48 | 61 | beq $4,$48 |
76 | EXI( ldq_u $3,0($7) ) | 62 | EXI( ldq_u $3,0($17) ) |
77 | .align 4 | 63 | .align 4 |
78 | $50: | 64 | $50: |
79 | EXI( ldq_u $2,8($7) ) | 65 | EXI( ldq_u $2,8($17) ) |
80 | subq $4,8,$4 | 66 | subq $4,8,$4 |
81 | extql $3,$7,$3 | 67 | extql $3,$17,$3 |
82 | extqh $2,$7,$1 | 68 | extqh $2,$17,$1 |
83 | bis $3,$1,$1 | 69 | bis $3,$1,$1 |
84 | EXO( stq $1,0($6) ) | 70 | EXO( stq $1,0($16) ) |
85 | addq $7,8,$7 | 71 | addq $17,8,$17 |
86 | subq $0,8,$0 | 72 | subq $0,8,$0 |
87 | addq $6,8,$6 | 73 | addq $16,8,$16 |
88 | bis $2,$2,$3 | 74 | bis $2,$2,$3 |
89 | bne $4,$50 | 75 | bne $4,$50 |
90 | $48: | 76 | $48: |
91 | beq $0,$41 | 77 | beq $0,$41 |
92 | .align 4 | 78 | .align 4 |
93 | $57: | 79 | $57: |
94 | EXI( ldq_u $1,0($7) ) | 80 | EXI( ldq_u $1,0($17) ) |
95 | EXO( ldq_u $2,0($6) ) | 81 | EXO( ldq_u $2,0($16) ) |
96 | extbl $1,$7,$1 | 82 | extbl $1,$17,$1 |
97 | mskbl $2,$6,$2 | 83 | mskbl $2,$16,$2 |
98 | insbl $1,$6,$1 | 84 | insbl $1,$16,$1 |
99 | bis $1,$2,$1 | 85 | bis $1,$2,$1 |
100 | EXO( stq_u $1,0($6) ) | 86 | EXO( stq_u $1,0($16) ) |
101 | subq $0,1,$0 | 87 | subq $0,1,$0 |
102 | addq $6,1,$6 | 88 | addq $16,1,$16 |
103 | addq $7,1,$7 | 89 | addq $17,1,$17 |
104 | bne $0,$57 | 90 | bne $0,$57 |
105 | br $31,$41 | 91 | br $31,$41 |
106 | .align 4 | 92 | .align 4 |
@@ -108,27 +94,27 @@ $43: | |||
108 | beq $4,$65 | 94 | beq $4,$65 |
109 | .align 4 | 95 | .align 4 |
110 | $66: | 96 | $66: |
111 | EXI( ldq $1,0($7) ) | 97 | EXI( ldq $1,0($17) ) |
112 | subq $4,8,$4 | 98 | subq $4,8,$4 |
113 | EXO( stq $1,0($6) ) | 99 | EXO( stq $1,0($16) ) |
114 | addq $7,8,$7 | 100 | addq $17,8,$17 |
115 | subq $0,8,$0 | 101 | subq $0,8,$0 |
116 | addq $6,8,$6 | 102 | addq $16,8,$16 |
117 | bne $4,$66 | 103 | bne $4,$66 |
118 | $65: | 104 | $65: |
119 | beq $0,$41 | 105 | beq $0,$41 |
120 | EXI( ldq $2,0($7) ) | 106 | EXI( ldq $2,0($17) ) |
121 | EXO( ldq $1,0($6) ) | 107 | EXO( ldq $1,0($16) ) |
122 | mskql $2,$0,$2 | 108 | mskql $2,$0,$2 |
123 | mskqh $1,$0,$1 | 109 | mskqh $1,$0,$1 |
124 | bis $2,$1,$2 | 110 | bis $2,$1,$2 |
125 | EXO( stq $2,0($6) ) | 111 | EXO( stq $2,0($16) ) |
126 | bis $31,$31,$0 | 112 | bis $31,$31,$0 |
127 | $41: | 113 | $41: |
128 | $35: | 114 | $35: |
129 | $exitin: | 115 | $exitin: |
130 | $exitout: | 116 | $exitout: |
131 | ret $31,($28),1 | 117 | ret $31,($26),1 |
132 | 118 | ||
133 | .end __copy_user | 119 | .end __copy_user |
134 | EXPORT_SYMBOL(__copy_user) | 120 | EXPORT_SYMBOL(__copy_user) |
diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S index 05bef6b50598..e179e4757ef8 100644 --- a/arch/alpha/lib/ev6-clear_user.S +++ b/arch/alpha/lib/ev6-clear_user.S | |||
@@ -9,21 +9,6 @@ | |||
9 | * a successful copy). There is also some rather minor exception setup | 9 | * a successful copy). There is also some rather minor exception setup |
10 | * stuff. | 10 | * stuff. |
11 | * | 11 | * |
12 | * NOTE! This is not directly C-callable, because the calling semantics | ||
13 | * are different: | ||
14 | * | ||
15 | * Inputs: | ||
16 | * length in $0 | ||
17 | * destination address in $6 | ||
18 | * exception pointer in $7 | ||
19 | * return address in $28 (exceptions expect it there) | ||
20 | * | ||
21 | * Outputs: | ||
22 | * bytes left to copy in $0 | ||
23 | * | ||
24 | * Clobbers: | ||
25 | * $1,$2,$3,$4,$5,$6 | ||
26 | * | ||
27 | * Much of the information about 21264 scheduling/coding comes from: | 12 | * Much of the information about 21264 scheduling/coding comes from: |
28 | * Compiler Writer's Guide for the Alpha 21264 | 13 | * Compiler Writer's Guide for the Alpha 21264 |
29 | * abbreviated as 'CWG' in other comments here | 14 | * abbreviated as 'CWG' in other comments here |
@@ -56,14 +41,15 @@ | |||
56 | .set noreorder | 41 | .set noreorder |
57 | .align 4 | 42 | .align 4 |
58 | 43 | ||
59 | .globl __do_clear_user | 44 | .globl __clear_user |
60 | .ent __do_clear_user | 45 | .ent __clear_user |
61 | .frame $30, 0, $28 | 46 | .frame $30, 0, $26 |
62 | .prologue 0 | 47 | .prologue 0 |
63 | 48 | ||
64 | # Pipeline info : Slotting & Comments | 49 | # Pipeline info : Slotting & Comments |
65 | __do_clear_user: | 50 | __clear_user: |
66 | and $6, 7, $4 # .. E .. .. : find dest head misalignment | 51 | and $17, $17, $0 |
52 | and $16, 7, $4 # .. E .. .. : find dest head misalignment | ||
67 | beq $0, $zerolength # U .. .. .. : U L U L | 53 | beq $0, $zerolength # U .. .. .. : U L U L |
68 | 54 | ||
69 | addq $0, $4, $1 # .. .. .. E : bias counter | 55 | addq $0, $4, $1 # .. .. .. E : bias counter |
@@ -75,14 +61,14 @@ __do_clear_user: | |||
75 | 61 | ||
76 | /* | 62 | /* |
77 | * Head is not aligned. Write (8 - $4) bytes to head of destination | 63 | * Head is not aligned. Write (8 - $4) bytes to head of destination |
78 | * This means $6 is known to be misaligned | 64 | * This means $16 is known to be misaligned |
79 | */ | 65 | */ |
80 | EX( ldq_u $5, 0($6) ) # .. .. .. L : load dst word to mask back in | 66 | EX( ldq_u $5, 0($16) ) # .. .. .. L : load dst word to mask back in |
81 | beq $1, $onebyte # .. .. U .. : sub-word store? | 67 | beq $1, $onebyte # .. .. U .. : sub-word store? |
82 | mskql $5, $6, $5 # .. U .. .. : take care of misaligned head | 68 | mskql $5, $16, $5 # .. U .. .. : take care of misaligned head |
83 | addq $6, 8, $6 # E .. .. .. : L U U L | 69 | addq $16, 8, $16 # E .. .. .. : L U U L |
84 | 70 | ||
85 | EX( stq_u $5, -8($6) ) # .. .. .. L : | 71 | EX( stq_u $5, -8($16) ) # .. .. .. L : |
86 | subq $1, 1, $1 # .. .. E .. : | 72 | subq $1, 1, $1 # .. .. E .. : |
87 | addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment | 73 | addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment |
88 | subq $0, 8, $0 # E .. .. .. : U L U L | 74 | subq $0, 8, $0 # E .. .. .. : U L U L |
@@ -93,11 +79,11 @@ __do_clear_user: | |||
93 | * values upon initial entry to the loop | 79 | * values upon initial entry to the loop |
94 | * $1 is number of quadwords to clear (zero is a valid value) | 80 | * $1 is number of quadwords to clear (zero is a valid value) |
95 | * $2 is number of trailing bytes (0..7) ($2 never used...) | 81 | * $2 is number of trailing bytes (0..7) ($2 never used...) |
96 | * $6 is known to be aligned 0mod8 | 82 | * $16 is known to be aligned 0mod8 |
97 | */ | 83 | */ |
98 | $headalign: | 84 | $headalign: |
99 | subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop | 85 | subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop |
100 | and $6, 0x3f, $2 # .. .. E .. : Forward work for huge loop | 86 | and $16, 0x3f, $2 # .. .. E .. : Forward work for huge loop |
101 | subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop) | 87 | subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop) |
102 | blt $4, $trailquad # U .. .. .. : U L U L | 88 | blt $4, $trailquad # U .. .. .. : U L U L |
103 | 89 | ||
@@ -114,21 +100,21 @@ $headalign: | |||
114 | beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64 | 100 | beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64 |
115 | 101 | ||
116 | $alignmod64: | 102 | $alignmod64: |
117 | EX( stq_u $31, 0($6) ) # .. .. .. L | 103 | EX( stq_u $31, 0($16) ) # .. .. .. L |
118 | addq $3, 8, $3 # .. .. E .. | 104 | addq $3, 8, $3 # .. .. E .. |
119 | subq $0, 8, $0 # .. E .. .. | 105 | subq $0, 8, $0 # .. E .. .. |
120 | nop # E .. .. .. : U L U L | 106 | nop # E .. .. .. : U L U L |
121 | 107 | ||
122 | nop # .. .. .. E | 108 | nop # .. .. .. E |
123 | subq $1, 1, $1 # .. .. E .. | 109 | subq $1, 1, $1 # .. .. E .. |
124 | addq $6, 8, $6 # .. E .. .. | 110 | addq $16, 8, $16 # .. E .. .. |
125 | blt $3, $alignmod64 # U .. .. .. : U L U L | 111 | blt $3, $alignmod64 # U .. .. .. : U L U L |
126 | 112 | ||
127 | $bigalign: | 113 | $bigalign: |
128 | /* | 114 | /* |
129 | * $0 is the number of bytes left | 115 | * $0 is the number of bytes left |
130 | * $1 is the number of quads left | 116 | * $1 is the number of quads left |
131 | * $6 is aligned 0mod64 | 117 | * $16 is aligned 0mod64 |
132 | * we know that we'll be taking a minimum of one trip through | 118 | * we know that we'll be taking a minimum of one trip through |
133 | * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle | 119 | * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle |
134 | * We are _not_ going to update $0 after every single store. That | 120 | * We are _not_ going to update $0 after every single store. That |
@@ -145,39 +131,39 @@ $bigalign: | |||
145 | nop # E : | 131 | nop # E : |
146 | nop # E : | 132 | nop # E : |
147 | nop # E : | 133 | nop # E : |
148 | bis $6,$6,$3 # E : U L U L : Initial wh64 address is dest | 134 | bis $16,$16,$3 # E : U L U L : Initial wh64 address is dest |
149 | /* This might actually help for the current trip... */ | 135 | /* This might actually help for the current trip... */ |
150 | 136 | ||
151 | $do_wh64: | 137 | $do_wh64: |
152 | wh64 ($3) # .. .. .. L1 : memory subsystem hint | 138 | wh64 ($3) # .. .. .. L1 : memory subsystem hint |
153 | subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop? | 139 | subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop? |
154 | EX( stq_u $31, 0($6) ) # .. L .. .. | 140 | EX( stq_u $31, 0($16) ) # .. L .. .. |
155 | subq $0, 8, $0 # E .. .. .. : U L U L | 141 | subq $0, 8, $0 # E .. .. .. : U L U L |
156 | 142 | ||
157 | addq $6, 128, $3 # E : Target address of wh64 | 143 | addq $16, 128, $3 # E : Target address of wh64 |
158 | EX( stq_u $31, 8($6) ) # L : | 144 | EX( stq_u $31, 8($16) ) # L : |
159 | EX( stq_u $31, 16($6) ) # L : | 145 | EX( stq_u $31, 16($16) ) # L : |
160 | subq $0, 16, $0 # E : U L L U | 146 | subq $0, 16, $0 # E : U L L U |
161 | 147 | ||
162 | nop # E : | 148 | nop # E : |
163 | EX( stq_u $31, 24($6) ) # L : | 149 | EX( stq_u $31, 24($16) ) # L : |
164 | EX( stq_u $31, 32($6) ) # L : | 150 | EX( stq_u $31, 32($16) ) # L : |
165 | subq $0, 168, $5 # E : U L L U : two trips through the loop left? | 151 | subq $0, 168, $5 # E : U L L U : two trips through the loop left? |
166 | /* 168 = 192 - 24, since we've already completed some stores */ | 152 | /* 168 = 192 - 24, since we've already completed some stores */ |
167 | 153 | ||
168 | subq $0, 16, $0 # E : | 154 | subq $0, 16, $0 # E : |
169 | EX( stq_u $31, 40($6) ) # L : | 155 | EX( stq_u $31, 40($16) ) # L : |
170 | EX( stq_u $31, 48($6) ) # L : | 156 | EX( stq_u $31, 48($16) ) # L : |
171 | cmovlt $5, $6, $3 # E : U L L U : Latency 2, extra mapping cycle | 157 | cmovlt $5, $16, $3 # E : U L L U : Latency 2, extra mapping cycle |
172 | 158 | ||
173 | subq $1, 8, $1 # E : | 159 | subq $1, 8, $1 # E : |
174 | subq $0, 16, $0 # E : | 160 | subq $0, 16, $0 # E : |
175 | EX( stq_u $31, 56($6) ) # L : | 161 | EX( stq_u $31, 56($16) ) # L : |
176 | nop # E : U L U L | 162 | nop # E : U L U L |
177 | 163 | ||
178 | nop # E : | 164 | nop # E : |
179 | subq $0, 8, $0 # E : | 165 | subq $0, 8, $0 # E : |
180 | addq $6, 64, $6 # E : | 166 | addq $16, 64, $16 # E : |
181 | bge $4, $do_wh64 # U : U L U L | 167 | bge $4, $do_wh64 # U : U L U L |
182 | 168 | ||
183 | $trailquad: | 169 | $trailquad: |
@@ -190,14 +176,14 @@ $trailquad: | |||
190 | beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go | 176 | beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go |
191 | 177 | ||
192 | $onequad: | 178 | $onequad: |
193 | EX( stq_u $31, 0($6) ) # .. .. .. L | 179 | EX( stq_u $31, 0($16) ) # .. .. .. L |
194 | subq $1, 1, $1 # .. .. E .. | 180 | subq $1, 1, $1 # .. .. E .. |
195 | subq $0, 8, $0 # .. E .. .. | 181 | subq $0, 8, $0 # .. E .. .. |
196 | nop # E .. .. .. : U L U L | 182 | nop # E .. .. .. : U L U L |
197 | 183 | ||
198 | nop # .. .. .. E | 184 | nop # .. .. .. E |
199 | nop # .. .. E .. | 185 | nop # .. .. E .. |
200 | addq $6, 8, $6 # .. E .. .. | 186 | addq $16, 8, $16 # .. E .. .. |
201 | bgt $1, $onequad # U .. .. .. : U L U L | 187 | bgt $1, $onequad # U .. .. .. : U L U L |
202 | 188 | ||
203 | # We have an unknown number of bytes left to go. | 189 | # We have an unknown number of bytes left to go. |
@@ -211,9 +197,9 @@ $trailbytes: | |||
211 | # so we will use $0 as the loop counter | 197 | # so we will use $0 as the loop counter |
212 | # We know for a fact that $0 > 0 zero due to previous context | 198 | # We know for a fact that $0 > 0 zero due to previous context |
213 | $onebyte: | 199 | $onebyte: |
214 | EX( stb $31, 0($6) ) # .. .. .. L | 200 | EX( stb $31, 0($16) ) # .. .. .. L |
215 | subq $0, 1, $0 # .. .. E .. : | 201 | subq $0, 1, $0 # .. .. E .. : |
216 | addq $6, 1, $6 # .. E .. .. : | 202 | addq $16, 1, $16 # .. E .. .. : |
217 | bgt $0, $onebyte # U .. .. .. : U L U L | 203 | bgt $0, $onebyte # U .. .. .. : U L U L |
218 | 204 | ||
219 | $zerolength: | 205 | $zerolength: |
@@ -221,6 +207,6 @@ $exception: # Destination for exception recovery(?) | |||
221 | nop # .. .. .. E : | 207 | nop # .. .. .. E : |
222 | nop # .. .. E .. : | 208 | nop # .. .. E .. : |
223 | nop # .. E .. .. : | 209 | nop # .. E .. .. : |
224 | ret $31, ($28), 1 # L0 .. .. .. : L U L U | 210 | ret $31, ($26), 1 # L0 .. .. .. : L U L U |
225 | .end __do_clear_user | 211 | .end __clear_user |
226 | EXPORT_SYMBOL(__do_clear_user) | 212 | EXPORT_SYMBOL(__clear_user) |
diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S index be720b518af9..35e6710d0700 100644 --- a/arch/alpha/lib/ev6-copy_user.S +++ b/arch/alpha/lib/ev6-copy_user.S | |||
@@ -12,21 +12,6 @@ | |||
12 | * only _after_ a successful copy). There is also some rather minor | 12 | * only _after_ a successful copy). There is also some rather minor |
13 | * exception setup stuff.. | 13 | * exception setup stuff.. |
14 | * | 14 | * |
15 | * NOTE! This is not directly C-callable, because the calling semantics are | ||
16 | * different: | ||
17 | * | ||
18 | * Inputs: | ||
19 | * length in $0 | ||
20 | * destination address in $6 | ||
21 | * source address in $7 | ||
22 | * return address in $28 | ||
23 | * | ||
24 | * Outputs: | ||
25 | * bytes left to copy in $0 | ||
26 | * | ||
27 | * Clobbers: | ||
28 | * $1,$2,$3,$4,$5,$6,$7 | ||
29 | * | ||
30 | * Much of the information about 21264 scheduling/coding comes from: | 15 | * Much of the information about 21264 scheduling/coding comes from: |
31 | * Compiler Writer's Guide for the Alpha 21264 | 16 | * Compiler Writer's Guide for the Alpha 21264 |
32 | * abbreviated as 'CWG' in other comments here | 17 | * abbreviated as 'CWG' in other comments here |
@@ -60,10 +45,11 @@ | |||
60 | # Pipeline info: Slotting & Comments | 45 | # Pipeline info: Slotting & Comments |
61 | __copy_user: | 46 | __copy_user: |
62 | .prologue 0 | 47 | .prologue 0 |
63 | subq $0, 32, $1 # .. E .. .. : Is this going to be a small copy? | 48 | andq $18, $18, $0 |
49 | subq $18, 32, $1 # .. E .. .. : Is this going to be a small copy? | ||
64 | beq $0, $zerolength # U .. .. .. : U L U L | 50 | beq $0, $zerolength # U .. .. .. : U L U L |
65 | 51 | ||
66 | and $6,7,$3 # .. .. .. E : is leading dest misalignment | 52 | and $16,7,$3 # .. .. .. E : is leading dest misalignment |
67 | ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data | 53 | ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data |
68 | beq $3, $destaligned # .. U .. .. : 2nd (one cycle fetcher stall) | 54 | beq $3, $destaligned # .. U .. .. : 2nd (one cycle fetcher stall) |
69 | subq $3, 8, $3 # E .. .. .. : L U U L : trip counter | 55 | subq $3, 8, $3 # E .. .. .. : L U U L : trip counter |
@@ -73,17 +59,17 @@ __copy_user: | |||
73 | * We know we have at least one trip through this loop | 59 | * We know we have at least one trip through this loop |
74 | */ | 60 | */ |
75 | $aligndest: | 61 | $aligndest: |
76 | EXI( ldbu $1,0($7) ) # .. .. .. L : Keep loads separate from stores | 62 | EXI( ldbu $1,0($17) ) # .. .. .. L : Keep loads separate from stores |
77 | addq $6,1,$6 # .. .. E .. : Section 3.8 in the CWG | 63 | addq $16,1,$16 # .. .. E .. : Section 3.8 in the CWG |
78 | addq $3,1,$3 # .. E .. .. : | 64 | addq $3,1,$3 # .. E .. .. : |
79 | nop # E .. .. .. : U L U L | 65 | nop # E .. .. .. : U L U L |
80 | 66 | ||
81 | /* | 67 | /* |
82 | * the -1 is to compensate for the inc($6) done in a previous quadpack | 68 | * the -1 is to compensate for the inc($16) done in a previous quadpack |
83 | * which allows us zero dependencies within either quadpack in the loop | 69 | * which allows us zero dependencies within either quadpack in the loop |
84 | */ | 70 | */ |
85 | EXO( stb $1,-1($6) ) # .. .. .. L : | 71 | EXO( stb $1,-1($16) ) # .. .. .. L : |
86 | addq $7,1,$7 # .. .. E .. : Section 3.8 in the CWG | 72 | addq $17,1,$17 # .. .. E .. : Section 3.8 in the CWG |
87 | subq $0,1,$0 # .. E .. .. : | 73 | subq $0,1,$0 # .. E .. .. : |
88 | bne $3, $aligndest # U .. .. .. : U L U L | 74 | bne $3, $aligndest # U .. .. .. : U L U L |
89 | 75 | ||
@@ -92,29 +78,29 @@ $aligndest: | |||
92 | * If we arrived via branch, we have a minimum of 32 bytes | 78 | * If we arrived via branch, we have a minimum of 32 bytes |
93 | */ | 79 | */ |
94 | $destaligned: | 80 | $destaligned: |
95 | and $7,7,$1 # .. .. .. E : Check _current_ source alignment | 81 | and $17,7,$1 # .. .. .. E : Check _current_ source alignment |
96 | bic $0,7,$4 # .. .. E .. : number bytes as a quadword loop | 82 | bic $0,7,$4 # .. .. E .. : number bytes as a quadword loop |
97 | EXI( ldq_u $3,0($7) ) # .. L .. .. : Forward fetch for fallthrough code | 83 | EXI( ldq_u $3,0($17) ) # .. L .. .. : Forward fetch for fallthrough code |
98 | beq $1,$quadaligned # U .. .. .. : U L U L | 84 | beq $1,$quadaligned # U .. .. .. : U L U L |
99 | 85 | ||
100 | /* | 86 | /* |
101 | * In the worst case, we've just executed an ldq_u here from 0($7) | 87 | * In the worst case, we've just executed an ldq_u here from 0($17) |
102 | * and we'll repeat it once if we take the branch | 88 | * and we'll repeat it once if we take the branch |
103 | */ | 89 | */ |
104 | 90 | ||
105 | /* Misaligned quadword loop - not unrolled. Leave it that way. */ | 91 | /* Misaligned quadword loop - not unrolled. Leave it that way. */ |
106 | $misquad: | 92 | $misquad: |
107 | EXI( ldq_u $2,8($7) ) # .. .. .. L : | 93 | EXI( ldq_u $2,8($17) ) # .. .. .. L : |
108 | subq $4,8,$4 # .. .. E .. : | 94 | subq $4,8,$4 # .. .. E .. : |
109 | extql $3,$7,$3 # .. U .. .. : | 95 | extql $3,$17,$3 # .. U .. .. : |
110 | extqh $2,$7,$1 # U .. .. .. : U U L L | 96 | extqh $2,$17,$1 # U .. .. .. : U U L L |
111 | 97 | ||
112 | bis $3,$1,$1 # .. .. .. E : | 98 | bis $3,$1,$1 # .. .. .. E : |
113 | EXO( stq $1,0($6) ) # .. .. L .. : | 99 | EXO( stq $1,0($16) ) # .. .. L .. : |
114 | addq $7,8,$7 # .. E .. .. : | 100 | addq $17,8,$17 # .. E .. .. : |
115 | subq $0,8,$0 # E .. .. .. : U L L U | 101 | subq $0,8,$0 # E .. .. .. : U L L U |
116 | 102 | ||
117 | addq $6,8,$6 # .. .. .. E : | 103 | addq $16,8,$16 # .. .. .. E : |
118 | bis $2,$2,$3 # .. .. E .. : | 104 | bis $2,$2,$3 # .. .. E .. : |
119 | nop # .. E .. .. : | 105 | nop # .. E .. .. : |
120 | bne $4,$misquad # U .. .. .. : U L U L | 106 | bne $4,$misquad # U .. .. .. : U L U L |
@@ -125,8 +111,8 @@ $misquad: | |||
125 | beq $0,$zerolength # U .. .. .. : U L U L | 111 | beq $0,$zerolength # U .. .. .. : U L U L |
126 | 112 | ||
127 | /* We know we have at least one trip through the byte loop */ | 113 | /* We know we have at least one trip through the byte loop */ |
128 | EXI ( ldbu $2,0($7) ) # .. .. .. L : No loads in the same quad | 114 | EXI ( ldbu $2,0($17) ) # .. .. .. L : No loads in the same quad |
129 | addq $6,1,$6 # .. .. E .. : as the store (Section 3.8 in CWG) | 115 | addq $16,1,$16 # .. .. E .. : as the store (Section 3.8 in CWG) |
130 | nop # .. E .. .. : | 116 | nop # .. E .. .. : |
131 | br $31, $dirtyentry # L0 .. .. .. : L U U L | 117 | br $31, $dirtyentry # L0 .. .. .. : L U U L |
132 | /* Do the trailing byte loop load, then hop into the store part of the loop */ | 118 | /* Do the trailing byte loop load, then hop into the store part of the loop */ |
@@ -136,8 +122,8 @@ $misquad: | |||
136 | * Based upon the usage context, it's worth the effort to unroll this loop | 122 | * Based upon the usage context, it's worth the effort to unroll this loop |
137 | * $0 - number of bytes to be moved | 123 | * $0 - number of bytes to be moved |
138 | * $4 - number of bytes to move as quadwords | 124 | * $4 - number of bytes to move as quadwords |
139 | * $6 is current destination address | 125 | * $16 is current destination address |
140 | * $7 is current source address | 126 | * $17 is current source address |
141 | */ | 127 | */ |
142 | $quadaligned: | 128 | $quadaligned: |
143 | subq $4, 32, $2 # .. .. .. E : do not unroll for small stuff | 129 | subq $4, 32, $2 # .. .. .. E : do not unroll for small stuff |
@@ -155,29 +141,29 @@ $quadaligned: | |||
155 | * instruction memory hint instruction). | 141 | * instruction memory hint instruction). |
156 | */ | 142 | */ |
157 | $unroll4: | 143 | $unroll4: |
158 | EXI( ldq $1,0($7) ) # .. .. .. L | 144 | EXI( ldq $1,0($17) ) # .. .. .. L |
159 | EXI( ldq $2,8($7) ) # .. .. L .. | 145 | EXI( ldq $2,8($17) ) # .. .. L .. |
160 | subq $4,32,$4 # .. E .. .. | 146 | subq $4,32,$4 # .. E .. .. |
161 | nop # E .. .. .. : U U L L | 147 | nop # E .. .. .. : U U L L |
162 | 148 | ||
163 | addq $7,16,$7 # .. .. .. E | 149 | addq $17,16,$17 # .. .. .. E |
164 | EXO( stq $1,0($6) ) # .. .. L .. | 150 | EXO( stq $1,0($16) ) # .. .. L .. |
165 | EXO( stq $2,8($6) ) # .. L .. .. | 151 | EXO( stq $2,8($16) ) # .. L .. .. |
166 | subq $0,16,$0 # E .. .. .. : U L L U | 152 | subq $0,16,$0 # E .. .. .. : U L L U |
167 | 153 | ||
168 | addq $6,16,$6 # .. .. .. E | 154 | addq $16,16,$16 # .. .. .. E |
169 | EXI( ldq $1,0($7) ) # .. .. L .. | 155 | EXI( ldq $1,0($17) ) # .. .. L .. |
170 | EXI( ldq $2,8($7) ) # .. L .. .. | 156 | EXI( ldq $2,8($17) ) # .. L .. .. |
171 | subq $4, 32, $3 # E .. .. .. : U U L L : is there enough for another trip? | 157 | subq $4, 32, $3 # E .. .. .. : U U L L : is there enough for another trip? |
172 | 158 | ||
173 | EXO( stq $1,0($6) ) # .. .. .. L | 159 | EXO( stq $1,0($16) ) # .. .. .. L |
174 | EXO( stq $2,8($6) ) # .. .. L .. | 160 | EXO( stq $2,8($16) ) # .. .. L .. |
175 | subq $0,16,$0 # .. E .. .. | 161 | subq $0,16,$0 # .. E .. .. |
176 | addq $7,16,$7 # E .. .. .. : U L L U | 162 | addq $17,16,$17 # E .. .. .. : U L L U |
177 | 163 | ||
178 | nop # .. .. .. E | 164 | nop # .. .. .. E |
179 | nop # .. .. E .. | 165 | nop # .. .. E .. |
180 | addq $6,16,$6 # .. E .. .. | 166 | addq $16,16,$16 # .. E .. .. |
181 | bgt $3,$unroll4 # U .. .. .. : U L U L | 167 | bgt $3,$unroll4 # U .. .. .. : U L U L |
182 | 168 | ||
183 | nop | 169 | nop |
@@ -186,14 +172,14 @@ $unroll4: | |||
186 | beq $4, $noquads | 172 | beq $4, $noquads |
187 | 173 | ||
188 | $onequad: | 174 | $onequad: |
189 | EXI( ldq $1,0($7) ) | 175 | EXI( ldq $1,0($17) ) |
190 | subq $4,8,$4 | 176 | subq $4,8,$4 |
191 | addq $7,8,$7 | 177 | addq $17,8,$17 |
192 | nop | 178 | nop |
193 | 179 | ||
194 | EXO( stq $1,0($6) ) | 180 | EXO( stq $1,0($16) ) |
195 | subq $0,8,$0 | 181 | subq $0,8,$0 |
196 | addq $6,8,$6 | 182 | addq $16,8,$16 |
197 | bne $4,$onequad | 183 | bne $4,$onequad |
198 | 184 | ||
199 | $noquads: | 185 | $noquads: |
@@ -207,23 +193,23 @@ $noquads: | |||
207 | * There's no point in doing a lot of complex alignment calculations to try to | 193 | * There's no point in doing a lot of complex alignment calculations to try to |
208 | * to quadword stuff for a small amount of data. | 194 | * to quadword stuff for a small amount of data. |
209 | * $0 - remaining number of bytes left to copy | 195 | * $0 - remaining number of bytes left to copy |
210 | * $6 - current dest addr | 196 | * $16 - current dest addr |
211 | * $7 - current source addr | 197 | * $17 - current source addr |
212 | */ | 198 | */ |
213 | 199 | ||
214 | $onebyteloop: | 200 | $onebyteloop: |
215 | EXI ( ldbu $2,0($7) ) # .. .. .. L : No loads in the same quad | 201 | EXI ( ldbu $2,0($17) ) # .. .. .. L : No loads in the same quad |
216 | addq $6,1,$6 # .. .. E .. : as the store (Section 3.8 in CWG) | 202 | addq $16,1,$16 # .. .. E .. : as the store (Section 3.8 in CWG) |
217 | nop # .. E .. .. : | 203 | nop # .. E .. .. : |
218 | nop # E .. .. .. : U L U L | 204 | nop # E .. .. .. : U L U L |
219 | 205 | ||
220 | $dirtyentry: | 206 | $dirtyentry: |
221 | /* | 207 | /* |
222 | * the -1 is to compensate for the inc($6) done in a previous quadpack | 208 | * the -1 is to compensate for the inc($16) done in a previous quadpack |
223 | * which allows us zero dependencies within either quadpack in the loop | 209 | * which allows us zero dependencies within either quadpack in the loop |
224 | */ | 210 | */ |
225 | EXO ( stb $2,-1($6) ) # .. .. .. L : | 211 | EXO ( stb $2,-1($16) ) # .. .. .. L : |
226 | addq $7,1,$7 # .. .. E .. : quadpack as the load | 212 | addq $17,1,$17 # .. .. E .. : quadpack as the load |
227 | subq $0,1,$0 # .. E .. .. : change count _after_ copy | 213 | subq $0,1,$0 # .. E .. .. : change count _after_ copy |
228 | bgt $0,$onebyteloop # U .. .. .. : U L U L | 214 | bgt $0,$onebyteloop # U .. .. .. : U L U L |
229 | 215 | ||
@@ -233,7 +219,7 @@ $exitout: # Destination for exception recovery(?) | |||
233 | nop # .. .. .. E | 219 | nop # .. .. .. E |
234 | nop # .. .. E .. | 220 | nop # .. .. E .. |
235 | nop # .. E .. .. | 221 | nop # .. E .. .. |
236 | ret $31,($28),1 # L0 .. .. .. : L U L U | 222 | ret $31,($26),1 # L0 .. .. .. : L U L U |
237 | 223 | ||
238 | .end __copy_user | 224 | .end __copy_user |
239 | EXPORT_SYMBOL(__copy_user) | 225 | EXPORT_SYMBOL(__copy_user) |