1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <asm/byteorder.h>
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("addcc %r4,%5,%1\n\t" \
"addx %r2,%3,%0\n" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "%rJ" ((USItype)(ah)), \
"rI" ((USItype)(bh)), \
"%rJ" ((USItype)(al)), \
"rI" ((USItype)(bl)) \
: "cc")
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
__asm__ ("subcc %r4,%5,%1\n\t" \
"subx %r2,%3,%0\n" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "rJ" ((USItype)(ah)), \
"rI" ((USItype)(bh)), \
"rJ" ((USItype)(al)), \
"rI" ((USItype)(bl)) \
: "cc")
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("! Inlined umul_ppmm\n\t" \
"wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n\t" \
"sra %3,31,%%g2 ! Don't move this insn\n\t" \
"and %2,%%g2,%%g2 ! Don't move this insn\n\t" \
"andcc %%g0,0,%%g1 ! Don't move this insn\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,%3,%%g1\n\t" \
"mulscc %%g1,0,%%g1\n\t" \
"add %%g1,%%g2,%0\n\t" \
"rd %%y,%1\n" \
: "=r" ((USItype)(w1)), \
"=r" ((USItype)(w0)) \
: "%rI" ((USItype)(u)), \
"r" ((USItype)(v)) \
: "%g1", "%g2", "cc")
/* It's quite necessary to add this much assembler for the sparc.
The default udiv_qrnnd (in C) is more than 10 times slower! */
#define udiv_qrnnd(q, r, n1, n0, d) \
__asm__ ("! Inlined udiv_qrnnd\n\t" \
"mov 32,%%g1\n\t" \
"subcc %1,%2,%%g0\n\t" \
"1: bcs 5f\n\t" \
"addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
"sub %1,%2,%1 ! this kills msb of n\n\t" \
"addx %1,%1,%1 ! so this can't give carry\n\t" \
"subcc %%g1,1,%%g1\n\t" \
"2: bne 1b\n\t" \
"subcc %1,%2,%%g0\n\t" \
"bcs 3f\n\t" \
"addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
"b 3f\n\t" \
"sub %1,%2,%1 ! this kills msb of n\n\t" \
"4: sub %1,%2,%1\n\t" \
"5: addxcc %1,%1,%1\n\t" \
"bcc 2b\n\t" \
"subcc %%g1,1,%%g1\n\t" \
"! Got carry from n. Subtract next step to cancel this carry.\n\t" \
"bne 4b\n\t" \
"addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
"sub %1,%2,%1\n\t" \
"3: xnor %0,0,%0\n\t" \
"! End of inline udiv_qrnnd\n" \
: "=&r" ((USItype)(q)), \
"=&r" ((USItype)(r)) \
: "r" ((USItype)(d)), \
"1" ((USItype)(n1)), \
"0" ((USItype)(n0)) : "%g1", "cc")
#define UDIV_NEEDS_NORMALIZATION 0
#define abort() \
return 0
|