diff options
Diffstat (limited to 'arch/m68k/ifpsp060/src/ilsp.S')
-rw-r--r-- | arch/m68k/ifpsp060/src/ilsp.S | 932 |
1 files changed, 932 insertions, 0 deletions
diff --git a/arch/m68k/ifpsp060/src/ilsp.S b/arch/m68k/ifpsp060/src/ilsp.S new file mode 100644 index 00000000000..afa7422cddb --- /dev/null +++ b/arch/m68k/ifpsp060/src/ilsp.S | |||
@@ -0,0 +1,932 @@ | |||
1 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
2 | MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP | ||
3 | M68000 Hi-Performance Microprocessor Division | ||
4 | M68060 Software Package | ||
5 | Production Release P1.00 -- October 10, 1994 | ||
6 | |||
7 | M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. | ||
8 | |||
9 | THE SOFTWARE is provided on an "AS IS" basis and without warranty. | ||
10 | To the maximum extent permitted by applicable law, | ||
11 | MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, | ||
12 | INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE | ||
13 | and any warranty against infringement with regard to the SOFTWARE | ||
14 | (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. | ||
15 | |||
16 | To the maximum extent permitted by applicable law, | ||
17 | IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER | ||
18 | (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, | ||
19 | BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) | ||
20 | ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. | ||
21 | Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. | ||
22 | |||
23 | You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE | ||
24 | so long as this entire notice is retained without alteration in any modified and/or | ||
25 | redistributed versions, and that such modified versions are clearly identified as such. | ||
26 | No licenses are granted by implication, estoppel or otherwise under any patents | ||
27 | or trademarks of Motorola, Inc. | ||
28 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
29 | # litop.s: | ||
30 | # This file is appended to the top of the 060FPLSP package | ||
31 | # and contains the entry points into the package. The user, in | ||
32 | # effect, branches to one of the branch table entries located here. | ||
33 | # | ||
34 | |||
35 | bra.l _060LSP__idivs64_ | ||
36 | short 0x0000 | ||
37 | bra.l _060LSP__idivu64_ | ||
38 | short 0x0000 | ||
39 | |||
40 | bra.l _060LSP__imuls64_ | ||
41 | short 0x0000 | ||
42 | bra.l _060LSP__imulu64_ | ||
43 | short 0x0000 | ||
44 | |||
45 | bra.l _060LSP__cmp2_Ab_ | ||
46 | short 0x0000 | ||
47 | bra.l _060LSP__cmp2_Aw_ | ||
48 | short 0x0000 | ||
49 | bra.l _060LSP__cmp2_Al_ | ||
50 | short 0x0000 | ||
51 | bra.l _060LSP__cmp2_Db_ | ||
52 | short 0x0000 | ||
53 | bra.l _060LSP__cmp2_Dw_ | ||
54 | short 0x0000 | ||
55 | bra.l _060LSP__cmp2_Dl_ | ||
56 | short 0x0000 | ||
57 | |||
58 | # leave room for future possible aditions. | ||
59 | align 0x200 | ||
60 | |||
61 | ######################################################################### | ||
62 | # XDEF **************************************************************** # | ||
63 | # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. # | ||
64 | # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. # | ||
65 | # # | ||
66 | # This is the library version which is accessed as a subroutine # | ||
67 | # and therefore does not work exactly like the 680X0 div{s,u}.l # | ||
68 | # 64-bit divide instruction. # | ||
69 | # # | ||
70 | # XREF **************************************************************** # | ||
71 | # None. # | ||
72 | # # | ||
73 | # INPUT *************************************************************** # | ||
74 | # 0x4(sp) = divisor # | ||
75 | # 0x8(sp) = hi(dividend) # | ||
76 | # 0xc(sp) = lo(dividend) # | ||
77 | # 0x10(sp) = pointer to location to place quotient/remainder # | ||
78 | # # | ||
79 | # OUTPUT ************************************************************** # | ||
80 | # 0x10(sp) = points to location of remainder/quotient. # | ||
81 | # remainder is in first longword, quotient is in 2nd. # | ||
82 | # # | ||
83 | # ALGORITHM *********************************************************** # | ||
84 | # If the operands are signed, make them unsigned and save the # | ||
85 | # sign info for later. Separate out special cases like divide-by-zero # | ||
86 | # or 32-bit divides if possible. Else, use a special math algorithm # | ||
87 | # to calculate the result. # | ||
88 | # Restore sign info if signed instruction. Set the condition # | ||
89 | # codes before performing the final "rts". If the divisor was equal to # | ||
90 | # zero, then perform a divide-by-zero using a 16-bit implemented # | ||
91 | # divide instruction. This way, the operating system can record that # | ||
92 | # the event occurred even though it may not point to the correct place. # | ||
93 | # # | ||
94 | ######################################################################### | ||
95 | |||
96 | set POSNEG, -1 | ||
97 | set NDIVISOR, -2 | ||
98 | set NDIVIDEND, -3 | ||
99 | set DDSECOND, -4 | ||
100 | set DDNORMAL, -8 | ||
101 | set DDQUOTIENT, -12 | ||
102 | set DIV64_CC, -16 | ||
103 | |||
104 | ########## | ||
105 | # divs.l # | ||
106 | ########## | ||
107 | global _060LSP__idivs64_ | ||
108 | _060LSP__idivs64_: | ||
109 | # PROLOGUE BEGIN ######################################################## | ||
110 | link.w %a6,&-16 | ||
111 | movm.l &0x3f00,-(%sp) # save d2-d7 | ||
112 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
113 | # PROLOGUE END ########################################################## | ||
114 | |||
115 | mov.w %cc,DIV64_CC(%a6) | ||
116 | st POSNEG(%a6) # signed operation | ||
117 | bra.b ldiv64_cont | ||
118 | |||
119 | ########## | ||
120 | # divu.l # | ||
121 | ########## | ||
122 | global _060LSP__idivu64_ | ||
123 | _060LSP__idivu64_: | ||
124 | # PROLOGUE BEGIN ######################################################## | ||
125 | link.w %a6,&-16 | ||
126 | movm.l &0x3f00,-(%sp) # save d2-d7 | ||
127 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
128 | # PROLOGUE END ########################################################## | ||
129 | |||
130 | mov.w %cc,DIV64_CC(%a6) | ||
131 | sf POSNEG(%a6) # unsigned operation | ||
132 | |||
133 | ldiv64_cont: | ||
134 | mov.l 0x8(%a6),%d7 # fetch divisor | ||
135 | |||
136 | beq.w ldiv64eq0 # divisor is = 0!!! | ||
137 | |||
138 | mov.l 0xc(%a6), %d5 # get dividend hi | ||
139 | mov.l 0x10(%a6), %d6 # get dividend lo | ||
140 | |||
141 | # separate signed and unsigned divide | ||
142 | tst.b POSNEG(%a6) # signed or unsigned? | ||
143 | beq.b ldspecialcases # use positive divide | ||
144 | |||
145 | # save the sign of the divisor | ||
146 | # make divisor unsigned if it's negative | ||
147 | tst.l %d7 # chk sign of divisor | ||
148 | slt NDIVISOR(%a6) # save sign of divisor | ||
149 | bpl.b ldsgndividend | ||
150 | neg.l %d7 # complement negative divisor | ||
151 | |||
152 | # save the sign of the dividend | ||
153 | # make dividend unsigned if it's negative | ||
154 | ldsgndividend: | ||
155 | tst.l %d5 # chk sign of hi(dividend) | ||
156 | slt NDIVIDEND(%a6) # save sign of dividend | ||
157 | bpl.b ldspecialcases | ||
158 | |||
159 | mov.w &0x0, %cc # clear 'X' cc bit | ||
160 | negx.l %d6 # complement signed dividend | ||
161 | negx.l %d5 | ||
162 | |||
163 | # extract some special cases: | ||
164 | # - is (dividend == 0) ? | ||
165 | # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div) | ||
166 | ldspecialcases: | ||
167 | tst.l %d5 # is (hi(dividend) == 0) | ||
168 | bne.b ldnormaldivide # no, so try it the long way | ||
169 | |||
170 | tst.l %d6 # is (lo(dividend) == 0), too | ||
171 | beq.w lddone # yes, so (dividend == 0) | ||
172 | |||
173 | cmp.l %d7,%d6 # is (divisor <= lo(dividend)) | ||
174 | bls.b ld32bitdivide # yes, so use 32 bit divide | ||
175 | |||
176 | exg %d5,%d6 # q = 0, r = dividend | ||
177 | bra.w ldivfinish # can't divide, we're done. | ||
178 | |||
179 | ld32bitdivide: | ||
180 | tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div! | ||
181 | |||
182 | bra.b ldivfinish | ||
183 | |||
184 | ldnormaldivide: | ||
185 | # last special case: | ||
186 | # - is hi(dividend) >= divisor ? if yes, then overflow | ||
187 | cmp.l %d7,%d5 | ||
188 | bls.b lddovf # answer won't fit in 32 bits | ||
189 | |||
190 | # perform the divide algorithm: | ||
191 | bsr.l ldclassical # do int divide | ||
192 | |||
193 | # separate into signed and unsigned finishes. | ||
194 | ldivfinish: | ||
195 | tst.b POSNEG(%a6) # do divs, divu separately | ||
196 | beq.b lddone # divu has no processing!!! | ||
197 | |||
198 | # it was a divs.l, so ccode setting is a little more complicated... | ||
199 | tst.b NDIVIDEND(%a6) # remainder has same sign | ||
200 | beq.b ldcc # as dividend. | ||
201 | neg.l %d5 # sgn(rem) = sgn(dividend) | ||
202 | ldcc: | ||
203 | mov.b NDIVISOR(%a6), %d0 | ||
204 | eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative | ||
205 | beq.b ldqpos # branch to quot positive | ||
206 | |||
207 | # 0x80000000 is the largest number representable as a 32-bit negative | ||
208 | # number. the negative of 0x80000000 is 0x80000000. | ||
209 | cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits? | ||
210 | bhi.b lddovf | ||
211 | |||
212 | neg.l %d6 # make (-quot) 2's comp | ||
213 | |||
214 | bra.b lddone | ||
215 | |||
216 | ldqpos: | ||
217 | btst &0x1f, %d6 # will (+quot) fit in 32 bits? | ||
218 | bne.b lddovf | ||
219 | |||
220 | lddone: | ||
221 | # if the register numbers are the same, only the quotient gets saved. | ||
222 | # so, if we always save the quotient second, we save ourselves a cmp&beq | ||
223 | andi.w &0x10,DIV64_CC(%a6) | ||
224 | mov.w DIV64_CC(%a6),%cc | ||
225 | tst.l %d6 # may set 'N' ccode bit | ||
226 | |||
227 | # here, the result is in d1 and d0. the current strategy is to save | ||
228 | # the values at the location pointed to by a0. | ||
229 | # use movm here to not disturb the condition codes. | ||
230 | ldexit: | ||
231 | movm.l &0x0060,([0x14,%a6]) # save result | ||
232 | |||
233 | # EPILOGUE BEGIN ######################################################## | ||
234 | # fmovm.l (%sp)+,&0x0 # restore no fpregs | ||
235 | movm.l (%sp)+,&0x00fc # restore d2-d7 | ||
236 | unlk %a6 | ||
237 | # EPILOGUE END ########################################################## | ||
238 | |||
239 | rts | ||
240 | |||
241 | # the result should be the unchanged dividend | ||
242 | lddovf: | ||
243 | mov.l 0xc(%a6), %d5 # get dividend hi | ||
244 | mov.l 0x10(%a6), %d6 # get dividend lo | ||
245 | |||
246 | andi.w &0x1c,DIV64_CC(%a6) | ||
247 | ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit | ||
248 | mov.w DIV64_CC(%a6),%cc | ||
249 | |||
250 | bra.b ldexit | ||
251 | |||
252 | ldiv64eq0: | ||
253 | mov.l 0xc(%a6),([0x14,%a6]) | ||
254 | mov.l 0x10(%a6),([0x14,%a6],0x4) | ||
255 | |||
256 | mov.w DIV64_CC(%a6),%cc | ||
257 | |||
258 | # EPILOGUE BEGIN ######################################################## | ||
259 | # fmovm.l (%sp)+,&0x0 # restore no fpregs | ||
260 | movm.l (%sp)+,&0x00fc # restore d2-d7 | ||
261 | unlk %a6 | ||
262 | # EPILOGUE END ########################################################## | ||
263 | |||
264 | divu.w &0x0,%d0 # force a divbyzero exception | ||
265 | rts | ||
266 | |||
267 | ########################################################################### | ||
268 | ######################################################################### | ||
269 | # This routine uses the 'classical' Algorithm D from Donald Knuth's # | ||
270 | # Art of Computer Programming, vol II, Seminumerical Algorithms. # | ||
271 | # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, # | ||
272 | # where U,V are words of the quadword dividend and longword divisor, # | ||
273 | # and U1, V1 are the most significant words. # | ||
274 | # # | ||
275 | # The most sig. longword of the 64 bit dividend must be in %d5, least # | ||
276 | # in %d6. The divisor must be in the variable ddivisor, and the # | ||
277 | # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). # | ||
278 | # The quotient is returned in %d6, remainder in %d5, unless the # | ||
279 | # v (overflow) bit is set in the saved %ccr. If overflow, the dividend # | ||
280 | # is unchanged. # | ||
281 | ######################################################################### | ||
282 | ldclassical: | ||
283 | # if the divisor msw is 0, use simpler algorithm then the full blown | ||
284 | # one at ddknuth: | ||
285 | |||
286 | cmpi.l %d7, &0xffff | ||
287 | bhi.b lddknuth # go use D. Knuth algorithm | ||
288 | |||
289 | # Since the divisor is only a word (and larger than the mslw of the dividend), | ||
290 | # a simpler algorithm may be used : | ||
291 | # In the general case, four quotient words would be created by | ||
292 | # dividing the divisor word into each dividend word. In this case, | ||
293 | # the first two quotient words must be zero, or overflow would occur. | ||
294 | # Since we already checked this case above, we can treat the most significant | ||
295 | # longword of the dividend as (0) remainder (see Knuth) and merely complete | ||
296 | # the last two divisions to get a quotient longword and word remainder: | ||
297 | |||
298 | clr.l %d1 | ||
299 | swap %d5 # same as r*b if previous step rqd | ||
300 | swap %d6 # get u3 to lsw position | ||
301 | mov.w %d6, %d5 # rb + u3 | ||
302 | |||
303 | divu.w %d7, %d5 | ||
304 | |||
305 | mov.w %d5, %d1 # first quotient word | ||
306 | swap %d6 # get u4 | ||
307 | mov.w %d6, %d5 # rb + u4 | ||
308 | |||
309 | divu.w %d7, %d5 | ||
310 | |||
311 | swap %d1 | ||
312 | mov.w %d5, %d1 # 2nd quotient 'digit' | ||
313 | clr.w %d5 | ||
314 | swap %d5 # now remainder | ||
315 | mov.l %d1, %d6 # and quotient | ||
316 | |||
317 | rts | ||
318 | |||
319 | lddknuth: | ||
320 | # In this algorithm, the divisor is treated as a 2 digit (word) number | ||
321 | # which is divided into a 3 digit (word) dividend to get one quotient | ||
322 | # digit (word). After subtraction, the dividend is shifted and the | ||
323 | # process repeated. Before beginning, the divisor and quotient are | ||
324 | # 'normalized' so that the process of estimating the quotient digit | ||
325 | # will yield verifiably correct results.. | ||
326 | |||
327 | clr.l DDNORMAL(%a6) # count of shifts for normalization | ||
328 | clr.b DDSECOND(%a6) # clear flag for quotient digits | ||
329 | clr.l %d1 # %d1 will hold trial quotient | ||
330 | lddnchk: | ||
331 | btst &31, %d7 # must we normalize? first word of | ||
332 | bne.b lddnormalized # divisor (V1) must be >= 65536/2 | ||
333 | addq.l &0x1, DDNORMAL(%a6) # count normalization shifts | ||
334 | lsl.l &0x1, %d7 # shift the divisor | ||
335 | lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2 | ||
336 | roxl.l &0x1, %d5 # shift u1,u2 | ||
337 | bra.w lddnchk | ||
338 | lddnormalized: | ||
339 | |||
340 | # Now calculate an estimate of the quotient words (msw first, then lsw). | ||
341 | # The comments use subscripts for the first quotient digit determination. | ||
342 | mov.l %d7, %d3 # divisor | ||
343 | mov.l %d5, %d2 # dividend mslw | ||
344 | swap %d2 | ||
345 | swap %d3 | ||
346 | cmp.w %d2, %d3 # V1 = U1 ? | ||
347 | bne.b lddqcalc1 | ||
348 | mov.w &0xffff, %d1 # use max trial quotient word | ||
349 | bra.b lddadj0 | ||
350 | lddqcalc1: | ||
351 | mov.l %d5, %d1 | ||
352 | |||
353 | divu.w %d3, %d1 # use quotient of mslw/msw | ||
354 | |||
355 | andi.l &0x0000ffff, %d1 # zero any remainder | ||
356 | lddadj0: | ||
357 | |||
358 | # now test the trial quotient and adjust. This step plus the | ||
359 | # normalization assures (according to Knuth) that the trial | ||
360 | # quotient will be at worst 1 too large. | ||
361 | mov.l %d6, -(%sp) | ||
362 | clr.w %d6 # word u3 left | ||
363 | swap %d6 # in lsw position | ||
364 | lddadj1: mov.l %d7, %d3 | ||
365 | mov.l %d1, %d2 | ||
366 | mulu.w %d7, %d2 # V2q | ||
367 | swap %d3 | ||
368 | mulu.w %d1, %d3 # V1q | ||
369 | mov.l %d5, %d4 # U1U2 | ||
370 | sub.l %d3, %d4 # U1U2 - V1q | ||
371 | |||
372 | swap %d4 | ||
373 | |||
374 | mov.w %d4,%d0 | ||
375 | mov.w %d6,%d4 # insert lower word (U3) | ||
376 | |||
377 | tst.w %d0 # is upper word set? | ||
378 | bne.w lddadjd1 | ||
379 | |||
380 | # add.l %d6, %d4 # (U1U2 - V1q) + U3 | ||
381 | |||
382 | cmp.l %d2, %d4 | ||
383 | bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ? | ||
384 | subq.l &0x1, %d1 # yes, decrement and recheck | ||
385 | bra.b lddadj1 | ||
386 | lddadjd1: | ||
387 | # now test the word by multiplying it by the divisor (V1V2) and comparing | ||
388 | # the 3 digit (word) result with the current dividend words | ||
389 | mov.l %d5, -(%sp) # save %d5 (%d6 already saved) | ||
390 | mov.l %d1, %d6 | ||
391 | swap %d6 # shift answer to ms 3 words | ||
392 | mov.l %d7, %d5 | ||
393 | bsr.l ldmm2 | ||
394 | mov.l %d5, %d2 # now %d2,%d3 are trial*divisor | ||
395 | mov.l %d6, %d3 | ||
396 | mov.l (%sp)+, %d5 # restore dividend | ||
397 | mov.l (%sp)+, %d6 | ||
398 | sub.l %d3, %d6 | ||
399 | subx.l %d2, %d5 # subtract double precision | ||
400 | bcc ldd2nd # no carry, do next quotient digit | ||
401 | subq.l &0x1, %d1 # q is one too large | ||
402 | # need to add back divisor longword to current ms 3 digits of dividend | ||
403 | # - according to Knuth, this is done only 2 out of 65536 times for random | ||
404 | # divisor, dividend selection. | ||
405 | clr.l %d2 | ||
406 | mov.l %d7, %d3 | ||
407 | swap %d3 | ||
408 | clr.w %d3 # %d3 now ls word of divisor | ||
409 | add.l %d3, %d6 # aligned with 3rd word of dividend | ||
410 | addx.l %d2, %d5 | ||
411 | mov.l %d7, %d3 | ||
412 | clr.w %d3 # %d3 now ms word of divisor | ||
413 | swap %d3 # aligned with 2nd word of dividend | ||
414 | add.l %d3, %d5 | ||
415 | ldd2nd: | ||
416 | tst.b DDSECOND(%a6) # both q words done? | ||
417 | bne.b lddremain | ||
418 | # first quotient digit now correct. store digit and shift the | ||
419 | # (subtracted) dividend | ||
420 | mov.w %d1, DDQUOTIENT(%a6) | ||
421 | clr.l %d1 | ||
422 | swap %d5 | ||
423 | swap %d6 | ||
424 | mov.w %d6, %d5 | ||
425 | clr.w %d6 | ||
426 | st DDSECOND(%a6) # second digit | ||
427 | bra.w lddnormalized | ||
428 | lddremain: | ||
429 | # add 2nd word to quotient, get the remainder. | ||
430 | mov.w %d1, DDQUOTIENT+2(%a6) | ||
431 | # shift down one word/digit to renormalize remainder. | ||
432 | mov.w %d5, %d6 | ||
433 | swap %d6 | ||
434 | swap %d5 | ||
435 | mov.l DDNORMAL(%a6), %d7 # get norm shift count | ||
436 | beq.b lddrn | ||
437 | subq.l &0x1, %d7 # set for loop count | ||
438 | lddnlp: | ||
439 | lsr.l &0x1, %d5 # shift into %d6 | ||
440 | roxr.l &0x1, %d6 | ||
441 | dbf %d7, lddnlp | ||
442 | lddrn: | ||
443 | mov.l %d6, %d5 # remainder | ||
444 | mov.l DDQUOTIENT(%a6), %d6 # quotient | ||
445 | |||
446 | rts | ||
447 | ldmm2: | ||
448 | # factors for the 32X32->64 multiplication are in %d5 and %d6. | ||
449 | # returns 64 bit result in %d5 (hi) %d6(lo). | ||
450 | # destroys %d2,%d3,%d4. | ||
451 | |||
452 | # multiply hi,lo words of each factor to get 4 intermediate products | ||
453 | mov.l %d6, %d2 | ||
454 | mov.l %d6, %d3 | ||
455 | mov.l %d5, %d4 | ||
456 | swap %d3 | ||
457 | swap %d4 | ||
458 | mulu.w %d5, %d6 # %d6 <- lsw*lsw | ||
459 | mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source | ||
460 | mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest | ||
461 | mulu.w %d4, %d3 # %d3 <- msw*msw | ||
462 | # now use swap and addx to consolidate to two longwords | ||
463 | clr.l %d4 | ||
464 | swap %d6 | ||
465 | add.w %d5, %d6 # add msw of l*l to lsw of m*l product | ||
466 | addx.w %d4, %d3 # add any carry to m*m product | ||
467 | add.w %d2, %d6 # add in lsw of other m*l product | ||
468 | addx.w %d4, %d3 # add any carry to m*m product | ||
469 | swap %d6 # %d6 is low 32 bits of final product | ||
470 | clr.w %d5 | ||
471 | clr.w %d2 # lsw of two mixed products used, | ||
472 | swap %d5 # now use msws of longwords | ||
473 | swap %d2 | ||
474 | add.l %d2, %d5 | ||
475 | add.l %d3, %d5 # %d5 now ms 32 bits of final product | ||
476 | rts | ||
477 | |||
478 | ######################################################################### | ||
479 | # XDEF **************************************************************** # | ||
480 | # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction # | ||
481 | # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. # | ||
482 | # # | ||
483 | # This is the library version which is accessed as a subroutine # | ||
484 | # and therefore does not work exactly like the 680X0 mul{s,u}.l # | ||
485 | # 64-bit multiply instruction. # | ||
486 | # # | ||
487 | # XREF **************************************************************** # | ||
488 | # None # | ||
489 | # # | ||
490 | # INPUT *************************************************************** # | ||
491 | # 0x4(sp) = multiplier # | ||
492 | # 0x8(sp) = multiplicand # | ||
493 | # 0xc(sp) = pointer to location to place 64-bit result # | ||
494 | # # | ||
495 | # OUTPUT ************************************************************** # | ||
496 | # 0xc(sp) = points to location of 64-bit result # | ||
497 | # # | ||
498 | # ALGORITHM *********************************************************** # | ||
499 | # Perform the multiply in pieces using 16x16->32 unsigned # | ||
500 | # multiplies and "add" instructions. # | ||
501 | # Set the condition codes as appropriate before performing an # | ||
502 | # "rts". # | ||
503 | # # | ||
504 | ######################################################################### | ||
505 | |||
506 | set MUL64_CC, -4 | ||
507 | |||
508 | global _060LSP__imulu64_ | ||
509 | _060LSP__imulu64_: | ||
510 | |||
511 | # PROLOGUE BEGIN ######################################################## | ||
512 | link.w %a6,&-4 | ||
513 | movm.l &0x3800,-(%sp) # save d2-d4 | ||
514 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
515 | # PROLOGUE END ########################################################## | ||
516 | |||
517 | mov.w %cc,MUL64_CC(%a6) # save incoming ccodes | ||
518 | |||
519 | mov.l 0x8(%a6),%d0 # store multiplier in d0 | ||
520 | beq.w mulu64_zero # handle zero separately | ||
521 | |||
522 | mov.l 0xc(%a6),%d1 # get multiplicand in d1 | ||
523 | beq.w mulu64_zero # handle zero separately | ||
524 | |||
525 | ######################################################################### | ||
526 | # 63 32 0 # | ||
527 | # ---------------------------- # | ||
528 | # | hi(mplier) * hi(mplicand)| # | ||
529 | # ---------------------------- # | ||
530 | # ----------------------------- # | ||
531 | # | hi(mplier) * lo(mplicand) | # | ||
532 | # ----------------------------- # | ||
533 | # ----------------------------- # | ||
534 | # | lo(mplier) * hi(mplicand) | # | ||
535 | # ----------------------------- # | ||
536 | # | ----------------------------- # | ||
537 | # --|-- | lo(mplier) * lo(mplicand) | # | ||
538 | # | ----------------------------- # | ||
539 | # ======================================================== # | ||
540 | # -------------------------------------------------------- # | ||
541 | # | hi(result) | lo(result) | # | ||
542 | # -------------------------------------------------------- # | ||
543 | ######################################################################### | ||
544 | mulu64_alg: | ||
545 | # load temp registers with operands | ||
546 | mov.l %d0,%d2 # mr in d2 | ||
547 | mov.l %d0,%d3 # mr in d3 | ||
548 | mov.l %d1,%d4 # md in d4 | ||
549 | swap %d3 # hi(mr) in lo d3 | ||
550 | swap %d4 # hi(md) in lo d4 | ||
551 | |||
552 | # complete necessary multiplies: | ||
553 | mulu.w %d1,%d0 # [1] lo(mr) * lo(md) | ||
554 | mulu.w %d3,%d1 # [2] hi(mr) * lo(md) | ||
555 | mulu.w %d4,%d2 # [3] lo(mr) * hi(md) | ||
556 | mulu.w %d4,%d3 # [4] hi(mr) * hi(md) | ||
557 | |||
558 | # add lo portions of [2],[3] to hi portion of [1]. | ||
559 | # add carries produced from these adds to [4]. | ||
560 | # lo([1]) is the final lo 16 bits of the result. | ||
561 | clr.l %d4 # load d4 w/ zero value | ||
562 | swap %d0 # hi([1]) <==> lo([1]) | ||
563 | add.w %d1,%d0 # hi([1]) + lo([2]) | ||
564 | addx.l %d4,%d3 # [4] + carry | ||
565 | add.w %d2,%d0 # hi([1]) + lo([3]) | ||
566 | addx.l %d4,%d3 # [4] + carry | ||
567 | swap %d0 # lo([1]) <==> hi([1]) | ||
568 | |||
569 | # lo portions of [2],[3] have been added in to final result. | ||
570 | # now, clear lo, put hi in lo reg, and add to [4] | ||
571 | clr.w %d1 # clear lo([2]) | ||
572 | clr.w %d2 # clear hi([3]) | ||
573 | swap %d1 # hi([2]) in lo d1 | ||
574 | swap %d2 # hi([3]) in lo d2 | ||
575 | add.l %d2,%d1 # [4] + hi([2]) | ||
576 | add.l %d3,%d1 # [4] + hi([3]) | ||
577 | |||
578 | # now, grab the condition codes. only one that can be set is 'N'. | ||
579 | # 'N' CAN be set if the operation is unsigned if bit 63 is set. | ||
580 | mov.w MUL64_CC(%a6),%d4 | ||
581 | andi.b &0x10,%d4 # keep old 'X' bit | ||
582 | tst.l %d1 # may set 'N' bit | ||
583 | bpl.b mulu64_ddone | ||
584 | ori.b &0x8,%d4 # set 'N' bit | ||
585 | mulu64_ddone: | ||
586 | mov.w %d4,%cc | ||
587 | |||
588 | # here, the result is in d1 and d0. the current strategy is to save | ||
589 | # the values at the location pointed to by a0. | ||
590 | # use movm here to not disturb the condition codes. | ||
591 | mulu64_end: | ||
592 | exg %d1,%d0 | ||
593 | movm.l &0x0003,([0x10,%a6]) # save result | ||
594 | |||
595 | # EPILOGUE BEGIN ######################################################## | ||
596 | # fmovm.l (%sp)+,&0x0 # restore no fpregs | ||
597 | movm.l (%sp)+,&0x001c # restore d2-d4 | ||
598 | unlk %a6 | ||
599 | # EPILOGUE END ########################################################## | ||
600 | |||
601 | rts | ||
602 | |||
603 | # one or both of the operands is zero so the result is also zero. | ||
604 | # save the zero result to the register file and set the 'Z' ccode bit. | ||
605 | mulu64_zero: | ||
606 | clr.l %d0 | ||
607 | clr.l %d1 | ||
608 | |||
609 | mov.w MUL64_CC(%a6),%d4 | ||
610 | andi.b &0x10,%d4 | ||
611 | ori.b &0x4,%d4 | ||
612 | mov.w %d4,%cc # set 'Z' ccode bit | ||
613 | |||
614 | bra.b mulu64_end | ||
615 | |||
616 | ########## | ||
617 | # muls.l # | ||
618 | ########## | ||
619 | global _060LSP__imuls64_ | ||
620 | _060LSP__imuls64_: | ||
621 | |||
622 | # PROLOGUE BEGIN ######################################################## | ||
623 | link.w %a6,&-4 | ||
624 | movm.l &0x3c00,-(%sp) # save d2-d5 | ||
625 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
626 | # PROLOGUE END ########################################################## | ||
627 | |||
628 | mov.w %cc,MUL64_CC(%a6) # save incoming ccodes | ||
629 | |||
630 | mov.l 0x8(%a6),%d0 # store multiplier in d0 | ||
631 | beq.b mulu64_zero # handle zero separately | ||
632 | |||
633 | mov.l 0xc(%a6),%d1 # get multiplicand in d1 | ||
634 | beq.b mulu64_zero # handle zero separately | ||
635 | |||
636 | clr.b %d5 # clear sign tag | ||
637 | tst.l %d0 # is multiplier negative? | ||
638 | bge.b muls64_chk_md_sgn # no | ||
639 | neg.l %d0 # make multiplier positive | ||
640 | |||
641 | ori.b &0x1,%d5 # save multiplier sgn | ||
642 | |||
643 | # the result sign is the exclusive or of the operand sign bits. | ||
644 | muls64_chk_md_sgn: | ||
645 | tst.l %d1 # is multiplicand negative? | ||
646 | bge.b muls64_alg # no | ||
647 | neg.l %d1 # make multiplicand positive | ||
648 | |||
649 | eori.b &0x1,%d5 # calculate correct sign | ||
650 | |||
651 | ######################################################################### | ||
652 | # 63 32 0 # | ||
653 | # ---------------------------- # | ||
654 | # | hi(mplier) * hi(mplicand)| # | ||
655 | # ---------------------------- # | ||
656 | # ----------------------------- # | ||
657 | # | hi(mplier) * lo(mplicand) | # | ||
658 | # ----------------------------- # | ||
659 | # ----------------------------- # | ||
660 | # | lo(mplier) * hi(mplicand) | # | ||
661 | # ----------------------------- # | ||
662 | # | ----------------------------- # | ||
663 | # --|-- | lo(mplier) * lo(mplicand) | # | ||
664 | # | ----------------------------- # | ||
665 | # ======================================================== # | ||
666 | # -------------------------------------------------------- # | ||
667 | # | hi(result) | lo(result) | # | ||
668 | # -------------------------------------------------------- # | ||
669 | ######################################################################### | ||
670 | muls64_alg: | ||
671 | # load temp registers with operands | ||
672 | mov.l %d0,%d2 # mr in d2 | ||
673 | mov.l %d0,%d3 # mr in d3 | ||
674 | mov.l %d1,%d4 # md in d4 | ||
675 | swap %d3 # hi(mr) in lo d3 | ||
676 | swap %d4 # hi(md) in lo d4 | ||
677 | |||
678 | # complete necessary multiplies: | ||
679 | mulu.w %d1,%d0 # [1] lo(mr) * lo(md) | ||
680 | mulu.w %d3,%d1 # [2] hi(mr) * lo(md) | ||
681 | mulu.w %d4,%d2 # [3] lo(mr) * hi(md) | ||
682 | mulu.w %d4,%d3 # [4] hi(mr) * hi(md) | ||
683 | |||
684 | # add lo portions of [2],[3] to hi portion of [1]. | ||
685 | # add carries produced from these adds to [4]. | ||
686 | # lo([1]) is the final lo 16 bits of the result. | ||
687 | clr.l %d4 # load d4 w/ zero value | ||
688 | swap %d0 # hi([1]) <==> lo([1]) | ||
689 | add.w %d1,%d0 # hi([1]) + lo([2]) | ||
690 | addx.l %d4,%d3 # [4] + carry | ||
691 | add.w %d2,%d0 # hi([1]) + lo([3]) | ||
692 | addx.l %d4,%d3 # [4] + carry | ||
693 | swap %d0 # lo([1]) <==> hi([1]) | ||
694 | |||
695 | # lo portions of [2],[3] have been added in to final result. | ||
696 | # now, clear lo, put hi in lo reg, and add to [4] | ||
697 | clr.w %d1 # clear lo([2]) | ||
698 | clr.w %d2 # clear hi([3]) | ||
699 | swap %d1 # hi([2]) in lo d1 | ||
700 | swap %d2 # hi([3]) in lo d2 | ||
701 | add.l %d2,%d1 # [4] + hi([2]) | ||
702 | add.l %d3,%d1 # [4] + hi([3]) | ||
703 | |||
704 | tst.b %d5 # should result be signed? | ||
705 | beq.b muls64_done # no | ||
706 | |||
707 | # result should be a signed negative number. | ||
708 | # compute 2's complement of the unsigned number: | ||
709 | # -negate all bits and add 1 | ||
710 | muls64_neg: | ||
711 | not.l %d0 # negate lo(result) bits | ||
712 | not.l %d1 # negate hi(result) bits | ||
713 | addq.l &1,%d0 # add 1 to lo(result) | ||
714 | addx.l %d4,%d1 # add carry to hi(result) | ||
715 | |||
716 | muls64_done: | ||
717 | mov.w MUL64_CC(%a6),%d4 | ||
718 | andi.b &0x10,%d4 # keep old 'X' bit | ||
719 | tst.l %d1 # may set 'N' bit | ||
720 | bpl.b muls64_ddone | ||
721 | ori.b &0x8,%d4 # set 'N' bit | ||
722 | muls64_ddone: | ||
723 | mov.w %d4,%cc | ||
724 | |||
725 | # here, the result is in d1 and d0. the current strategy is to save | ||
726 | # the values at the location pointed to by a0. | ||
727 | # use movm here to not disturb the condition codes. | ||
728 | muls64_end: | ||
729 | exg %d1,%d0 | ||
730 | movm.l &0x0003,([0x10,%a6]) # save result at (a0) | ||
731 | |||
732 | # EPILOGUE BEGIN ######################################################## | ||
733 | # fmovm.l (%sp)+,&0x0 # restore no fpregs | ||
734 | movm.l (%sp)+,&0x003c # restore d2-d5 | ||
735 | unlk %a6 | ||
736 | # EPILOGUE END ########################################################## | ||
737 | |||
738 | rts | ||
739 | |||
740 | # one or both of the operands is zero so the result is also zero. | ||
741 | # save the zero result to the register file and set the 'Z' ccode bit. | ||
742 | muls64_zero: | ||
743 | clr.l %d0 | ||
744 | clr.l %d1 | ||
745 | |||
746 | mov.w MUL64_CC(%a6),%d4 | ||
747 | andi.b &0x10,%d4 | ||
748 | ori.b &0x4,%d4 | ||
749 | mov.w %d4,%cc # set 'Z' ccode bit | ||
750 | |||
751 | bra.b muls64_end | ||
752 | |||
753 | ######################################################################### | ||
754 | # XDEF **************************************************************** # | ||
755 | # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". # | ||
756 | # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". # | ||
757 | # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". # | ||
758 | # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". # | ||
759 | # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". # | ||
760 | # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". # | ||
761 | # # | ||
762 | # This is the library version which is accessed as a subroutine # | ||
763 | # and therefore does not work exactly like the 680X0 "cmp2" # | ||
764 | # instruction. # | ||
765 | # # | ||
766 | # XREF **************************************************************** # | ||
767 | # None # | ||
768 | # # | ||
769 | # INPUT *************************************************************** # | ||
770 | # 0x4(sp) = Rn # | ||
771 | # 0x8(sp) = pointer to boundary pair # | ||
772 | # # | ||
773 | # OUTPUT ************************************************************** # | ||
774 | # cc = condition codes are set correctly # | ||
775 | # # | ||
776 | # ALGORITHM *********************************************************** # | ||
777 | # In the interest of simplicity, all operands are converted to # | ||
778 | # longword size whether the operation is byte, word, or long. The # | ||
779 | # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is # | ||
780 | # also sign extended. If Rn is an address register, it need not be sign # | ||
781 | # extended since the full register is always used. # | ||
782 | # The condition codes are set correctly before the final "rts". # | ||
783 | # # | ||
784 | ######################################################################### | ||
785 | |||
786 | set CMP2_CC, -4 | ||
787 | |||
788 | global _060LSP__cmp2_Ab_ | ||
789 | _060LSP__cmp2_Ab_: | ||
790 | |||
791 | # PROLOGUE BEGIN ######################################################## | ||
792 | link.w %a6,&-4 | ||
793 | movm.l &0x3800,-(%sp) # save d2-d4 | ||
794 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
795 | # PROLOGUE END ########################################################## | ||
796 | |||
797 | mov.w %cc,CMP2_CC(%a6) | ||
798 | mov.l 0x8(%a6), %d2 # get regval | ||
799 | |||
800 | mov.b ([0xc,%a6],0x0),%d0 | ||
801 | mov.b ([0xc,%a6],0x1),%d1 | ||
802 | |||
803 | extb.l %d0 # sign extend lo bnd | ||
804 | extb.l %d1 # sign extend hi bnd | ||
805 | bra.w l_cmp2_cmp # go do the compare emulation | ||
806 | |||
807 | global _060LSP__cmp2_Aw_ | ||
808 | _060LSP__cmp2_Aw_: | ||
809 | |||
810 | # PROLOGUE BEGIN ######################################################## | ||
811 | link.w %a6,&-4 | ||
812 | movm.l &0x3800,-(%sp) # save d2-d4 | ||
813 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
814 | # PROLOGUE END ########################################################## | ||
815 | |||
816 | mov.w %cc,CMP2_CC(%a6) | ||
817 | mov.l 0x8(%a6), %d2 # get regval | ||
818 | |||
819 | mov.w ([0xc,%a6],0x0),%d0 | ||
820 | mov.w ([0xc,%a6],0x2),%d1 | ||
821 | |||
822 | ext.l %d0 # sign extend lo bnd | ||
823 | ext.l %d1 # sign extend hi bnd | ||
824 | bra.w l_cmp2_cmp # go do the compare emulation | ||
825 | |||
826 | global _060LSP__cmp2_Al_ | ||
827 | _060LSP__cmp2_Al_: | ||
828 | |||
829 | # PROLOGUE BEGIN ######################################################## | ||
830 | link.w %a6,&-4 | ||
831 | movm.l &0x3800,-(%sp) # save d2-d4 | ||
832 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
833 | # PROLOGUE END ########################################################## | ||
834 | |||
835 | mov.w %cc,CMP2_CC(%a6) | ||
836 | mov.l 0x8(%a6), %d2 # get regval | ||
837 | |||
838 | mov.l ([0xc,%a6],0x0),%d0 | ||
839 | mov.l ([0xc,%a6],0x4),%d1 | ||
840 | bra.w l_cmp2_cmp # go do the compare emulation | ||
841 | |||
842 | global _060LSP__cmp2_Db_ | ||
843 | _060LSP__cmp2_Db_: | ||
844 | |||
845 | # PROLOGUE BEGIN ######################################################## | ||
846 | link.w %a6,&-4 | ||
847 | movm.l &0x3800,-(%sp) # save d2-d4 | ||
848 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
849 | # PROLOGUE END ########################################################## | ||
850 | |||
851 | mov.w %cc,CMP2_CC(%a6) | ||
852 | mov.l 0x8(%a6), %d2 # get regval | ||
853 | |||
854 | mov.b ([0xc,%a6],0x0),%d0 | ||
855 | mov.b ([0xc,%a6],0x1),%d1 | ||
856 | |||
857 | extb.l %d0 # sign extend lo bnd | ||
858 | extb.l %d1 # sign extend hi bnd | ||
859 | |||
860 | # operation is a data register compare. | ||
861 | # sign extend byte to long so we can do simple longword compares. | ||
862 | extb.l %d2 # sign extend data byte | ||
863 | bra.w l_cmp2_cmp # go do the compare emulation | ||
864 | |||
865 | global _060LSP__cmp2_Dw_ | ||
866 | _060LSP__cmp2_Dw_: | ||
867 | |||
868 | # PROLOGUE BEGIN ######################################################## | ||
869 | link.w %a6,&-4 | ||
870 | movm.l &0x3800,-(%sp) # save d2-d4 | ||
871 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
872 | # PROLOGUE END ########################################################## | ||
873 | |||
874 | mov.w %cc,CMP2_CC(%a6) | ||
875 | mov.l 0x8(%a6), %d2 # get regval | ||
876 | |||
877 | mov.w ([0xc,%a6],0x0),%d0 | ||
878 | mov.w ([0xc,%a6],0x2),%d1 | ||
879 | |||
880 | ext.l %d0 # sign extend lo bnd | ||
881 | ext.l %d1 # sign extend hi bnd | ||
882 | |||
883 | # operation is a data register compare. | ||
884 | # sign extend word to long so we can do simple longword compares. | ||
885 | ext.l %d2 # sign extend data word | ||
886 | bra.w l_cmp2_cmp # go emulate compare | ||
887 | |||
888 | global _060LSP__cmp2_Dl_ | ||
889 | _060LSP__cmp2_Dl_: | ||
890 | |||
891 | # PROLOGUE BEGIN ######################################################## | ||
892 | link.w %a6,&-4 | ||
893 | movm.l &0x3800,-(%sp) # save d2-d4 | ||
894 | # fmovm.l &0x0,-(%sp) # save no fpregs | ||
895 | # PROLOGUE END ########################################################## | ||
896 | |||
897 | mov.w %cc,CMP2_CC(%a6) | ||
898 | mov.l 0x8(%a6), %d2 # get regval | ||
899 | |||
900 | mov.l ([0xc,%a6],0x0),%d0 | ||
901 | mov.l ([0xc,%a6],0x4),%d1 | ||
902 | |||
903 | # | ||
904 | # To set the ccodes correctly: | ||
905 | # (1) save 'Z' bit from (Rn - lo) | ||
906 | # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi)) | ||
907 | # (3) keep 'X', 'N', and 'V' from before instruction | ||
908 | # (4) combine ccodes | ||
909 | # | ||
910 | l_cmp2_cmp: | ||
911 | sub.l %d0, %d2 # (Rn - lo) | ||
912 | mov.w %cc, %d3 # fetch resulting ccodes | ||
913 | andi.b &0x4, %d3 # keep 'Z' bit | ||
914 | sub.l %d0, %d1 # (hi - lo) | ||
915 | cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi)) | ||
916 | |||
917 | mov.w %cc, %d4 # fetch resulting ccodes | ||
918 | or.b %d4, %d3 # combine w/ earlier ccodes | ||
919 | andi.b &0x5, %d3 # keep 'Z' and 'N' | ||
920 | |||
921 | mov.w CMP2_CC(%a6), %d4 # fetch old ccodes | ||
922 | andi.b &0x1a, %d4 # keep 'X','N','V' bits | ||
923 | or.b %d3, %d4 # insert new ccodes | ||
924 | mov.w %d4,%cc # save new ccodes | ||
925 | |||
926 | # EPILOGUE BEGIN ######################################################## | ||
927 | # fmovm.l (%sp)+,&0x0 # restore no fpregs | ||
928 | movm.l (%sp)+,&0x001c # restore d2-d4 | ||
929 | unlk %a6 | ||
930 | # EPILOGUE END ########################################################## | ||
931 | |||
932 | rts | ||