aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/kernel/visemul.c
diff options
context:
space:
mode:
authorSam Ravnborg <sam@ravnborg.org>2008-12-03 06:11:52 -0500
committerDavid S. Miller <davem@davemloft.net>2008-12-04 12:17:21 -0500
commita88b5ba8bd8ac18aad65ee6c6a254e2e74876db3 (patch)
treeeb3d0ffaf53c3f7ec6083752c2097cecd1cb892a /arch/sparc/kernel/visemul.c
parentd670bd4f803c8b646acd20f3ba21e65458293faf (diff)
sparc,sparc64: unify kernel/
o Move all files from sparc64/kernel/ to sparc/kernel - rename as appropriate o Update sparc/Makefile to the changes o Update sparc/kernel/Makefile to include the sparc64 files NOTE: This commit changes link order on sparc64! Link order had to change for either of sparc32 and sparc64. And assuming sparc64 see more testing than sparc32 change link order on sparc64 where issues will be caught faster. Signed-off-by: Sam Ravnborg <sam@ravnborg.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/kernel/visemul.c')
-rw-r--r--arch/sparc/kernel/visemul.c890
1 files changed, 890 insertions, 0 deletions
diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
new file mode 100644
index 000000000000..b956fd71c131
--- /dev/null
+++ b/arch/sparc/kernel/visemul.c
@@ -0,0 +1,890 @@
1/* visemul.c: Emulation of VIS instructions.
2 *
3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
4 */
5#include <linux/kernel.h>
6#include <linux/errno.h>
7#include <linux/thread_info.h>
8
9#include <asm/ptrace.h>
10#include <asm/pstate.h>
11#include <asm/system.h>
12#include <asm/fpumacro.h>
13#include <asm/uaccess.h>
14
15/* OPF field of various VIS instructions. */
16
17/* 000111011 - four 16-bit packs */
18#define FPACK16_OPF 0x03b
19
20/* 000111010 - two 32-bit packs */
21#define FPACK32_OPF 0x03a
22
23/* 000111101 - four 16-bit packs */
24#define FPACKFIX_OPF 0x03d
25
26/* 001001101 - four 16-bit expands */
27#define FEXPAND_OPF 0x04d
28
29/* 001001011 - two 32-bit merges */
30#define FPMERGE_OPF 0x04b
31
32/* 000110001 - 8-by-16-bit partitoned product */
33#define FMUL8x16_OPF 0x031
34
35/* 000110011 - 8-by-16-bit upper alpha partitioned product */
36#define FMUL8x16AU_OPF 0x033
37
38/* 000110101 - 8-by-16-bit lower alpha partitioned product */
39#define FMUL8x16AL_OPF 0x035
40
41/* 000110110 - upper 8-by-16-bit partitioned product */
42#define FMUL8SUx16_OPF 0x036
43
44/* 000110111 - lower 8-by-16-bit partitioned product */
45#define FMUL8ULx16_OPF 0x037
46
47/* 000111000 - upper 8-by-16-bit partitioned product */
48#define FMULD8SUx16_OPF 0x038
49
50/* 000111001 - lower unsigned 8-by-16-bit partitioned product */
51#define FMULD8ULx16_OPF 0x039
52
53/* 000101000 - four 16-bit compare; set rd if src1 > src2 */
54#define FCMPGT16_OPF 0x028
55
56/* 000101100 - two 32-bit compare; set rd if src1 > src2 */
57#define FCMPGT32_OPF 0x02c
58
59/* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
60#define FCMPLE16_OPF 0x020
61
62/* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
63#define FCMPLE32_OPF 0x024
64
65/* 000100010 - four 16-bit compare; set rd if src1 != src2 */
66#define FCMPNE16_OPF 0x022
67
68/* 000100110 - two 32-bit compare; set rd if src1 != src2 */
69#define FCMPNE32_OPF 0x026
70
71/* 000101010 - four 16-bit compare; set rd if src1 == src2 */
72#define FCMPEQ16_OPF 0x02a
73
74/* 000101110 - two 32-bit compare; set rd if src1 == src2 */
75#define FCMPEQ32_OPF 0x02e
76
77/* 000000000 - Eight 8-bit edge boundary processing */
78#define EDGE8_OPF 0x000
79
80/* 000000001 - Eight 8-bit edge boundary processing, no CC */
81#define EDGE8N_OPF 0x001
82
83/* 000000010 - Eight 8-bit edge boundary processing, little-endian */
84#define EDGE8L_OPF 0x002
85
86/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
87#define EDGE8LN_OPF 0x003
88
89/* 000000100 - Four 16-bit edge boundary processing */
90#define EDGE16_OPF 0x004
91
92/* 000000101 - Four 16-bit edge boundary processing, no CC */
93#define EDGE16N_OPF 0x005
94
95/* 000000110 - Four 16-bit edge boundary processing, little-endian */
96#define EDGE16L_OPF 0x006
97
98/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
99#define EDGE16LN_OPF 0x007
100
101/* 000001000 - Two 32-bit edge boundary processing */
102#define EDGE32_OPF 0x008
103
104/* 000001001 - Two 32-bit edge boundary processing, no CC */
105#define EDGE32N_OPF 0x009
106
107/* 000001010 - Two 32-bit edge boundary processing, little-endian */
108#define EDGE32L_OPF 0x00a
109
110/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
111#define EDGE32LN_OPF 0x00b
112
113/* 000111110 - distance between 8 8-bit components */
114#define PDIST_OPF 0x03e
115
116/* 000010000 - convert 8-bit 3-D address to blocked byte address */
117#define ARRAY8_OPF 0x010
118
119/* 000010010 - convert 16-bit 3-D address to blocked byte address */
120#define ARRAY16_OPF 0x012
121
122/* 000010100 - convert 32-bit 3-D address to blocked byte address */
123#define ARRAY32_OPF 0x014
124
125/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
126#define BMASK_OPF 0x019
127
128/* 001001100 - Permute bytes as specified by GSR.MASK */
129#define BSHUFFLE_OPF 0x04c
130
131#define VIS_OPF_SHIFT 5
132#define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
133
134#define RS1(INSN) (((INSN) >> 14) & 0x1f)
135#define RS2(INSN) (((INSN) >> 0) & 0x1f)
136#define RD(INSN) (((INSN) >> 25) & 0x1f)
137
138static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
139 unsigned int rd, int from_kernel)
140{
141 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
142 if (from_kernel != 0)
143 __asm__ __volatile__("flushw");
144 else
145 flushw_user();
146 }
147}
148
149static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
150{
151 unsigned long value;
152
153 if (reg < 16)
154 return (!reg ? 0 : regs->u_regs[reg]);
155 if (regs->tstate & TSTATE_PRIV) {
156 struct reg_window *win;
157 win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
158 value = win->locals[reg - 16];
159 } else if (test_thread_flag(TIF_32BIT)) {
160 struct reg_window32 __user *win32;
161 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
162 get_user(value, &win32->locals[reg - 16]);
163 } else {
164 struct reg_window __user *win;
165 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
166 get_user(value, &win->locals[reg - 16]);
167 }
168 return value;
169}
170
171static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
172 struct pt_regs *regs)
173{
174 BUG_ON(reg < 16);
175 BUG_ON(regs->tstate & TSTATE_PRIV);
176
177 if (test_thread_flag(TIF_32BIT)) {
178 struct reg_window32 __user *win32;
179 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
180 return (unsigned long __user *)&win32->locals[reg - 16];
181 } else {
182 struct reg_window __user *win;
183 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
184 return &win->locals[reg - 16];
185 }
186}
187
188static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
189 struct pt_regs *regs)
190{
191 BUG_ON(reg >= 16);
192 BUG_ON(regs->tstate & TSTATE_PRIV);
193
194 return &regs->u_regs[reg];
195}
196
197static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
198{
199 if (rd < 16) {
200 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
201
202 *rd_kern = val;
203 } else {
204 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
205
206 if (test_thread_flag(TIF_32BIT))
207 __put_user((u32)val, (u32 __user *)rd_user);
208 else
209 __put_user(val, rd_user);
210 }
211}
212
213static inline unsigned long fpd_regval(struct fpustate *f,
214 unsigned int insn_regnum)
215{
216 insn_regnum = (((insn_regnum & 1) << 5) |
217 (insn_regnum & 0x1e));
218
219 return *(unsigned long *) &f->regs[insn_regnum];
220}
221
222static inline unsigned long *fpd_regaddr(struct fpustate *f,
223 unsigned int insn_regnum)
224{
225 insn_regnum = (((insn_regnum & 1) << 5) |
226 (insn_regnum & 0x1e));
227
228 return (unsigned long *) &f->regs[insn_regnum];
229}
230
231static inline unsigned int fps_regval(struct fpustate *f,
232 unsigned int insn_regnum)
233{
234 return f->regs[insn_regnum];
235}
236
237static inline unsigned int *fps_regaddr(struct fpustate *f,
238 unsigned int insn_regnum)
239{
240 return &f->regs[insn_regnum];
241}
242
243struct edge_tab {
244 u16 left, right;
245};
246static struct edge_tab edge8_tab[8] = {
247 { 0xff, 0x80 },
248 { 0x7f, 0xc0 },
249 { 0x3f, 0xe0 },
250 { 0x1f, 0xf0 },
251 { 0x0f, 0xf8 },
252 { 0x07, 0xfc },
253 { 0x03, 0xfe },
254 { 0x01, 0xff },
255};
256static struct edge_tab edge8_tab_l[8] = {
257 { 0xff, 0x01 },
258 { 0xfe, 0x03 },
259 { 0xfc, 0x07 },
260 { 0xf8, 0x0f },
261 { 0xf0, 0x1f },
262 { 0xe0, 0x3f },
263 { 0xc0, 0x7f },
264 { 0x80, 0xff },
265};
266static struct edge_tab edge16_tab[4] = {
267 { 0xf, 0x8 },
268 { 0x7, 0xc },
269 { 0x3, 0xe },
270 { 0x1, 0xf },
271};
272static struct edge_tab edge16_tab_l[4] = {
273 { 0xf, 0x1 },
274 { 0xe, 0x3 },
275 { 0xc, 0x7 },
276 { 0x8, 0xf },
277};
278static struct edge_tab edge32_tab[2] = {
279 { 0x3, 0x2 },
280 { 0x1, 0x3 },
281};
282static struct edge_tab edge32_tab_l[2] = {
283 { 0x3, 0x1 },
284 { 0x2, 0x3 },
285};
286
287static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
288{
289 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
290 u16 left, right;
291
292 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
293 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
294 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
295
296 if (test_thread_flag(TIF_32BIT)) {
297 rs1 = rs1 & 0xffffffff;
298 rs2 = rs2 & 0xffffffff;
299 }
300 switch (opf) {
301 default:
302 case EDGE8_OPF:
303 case EDGE8N_OPF:
304 left = edge8_tab[rs1 & 0x7].left;
305 right = edge8_tab[rs2 & 0x7].right;
306 break;
307 case EDGE8L_OPF:
308 case EDGE8LN_OPF:
309 left = edge8_tab_l[rs1 & 0x7].left;
310 right = edge8_tab_l[rs2 & 0x7].right;
311 break;
312
313 case EDGE16_OPF:
314 case EDGE16N_OPF:
315 left = edge16_tab[(rs1 >> 1) & 0x3].left;
316 right = edge16_tab[(rs2 >> 1) & 0x3].right;
317 break;
318
319 case EDGE16L_OPF:
320 case EDGE16LN_OPF:
321 left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
322 right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
323 break;
324
325 case EDGE32_OPF:
326 case EDGE32N_OPF:
327 left = edge32_tab[(rs1 >> 2) & 0x1].left;
328 right = edge32_tab[(rs2 >> 2) & 0x1].right;
329 break;
330
331 case EDGE32L_OPF:
332 case EDGE32LN_OPF:
333 left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
334 right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
335 break;
336 };
337
338 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
339 rd_val = right & left;
340 else
341 rd_val = left;
342
343 store_reg(regs, rd_val, RD(insn));
344
345 switch (opf) {
346 case EDGE8_OPF:
347 case EDGE8L_OPF:
348 case EDGE16_OPF:
349 case EDGE16L_OPF:
350 case EDGE32_OPF:
351 case EDGE32L_OPF: {
352 unsigned long ccr, tstate;
353
354 __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
355 "rd %%ccr, %0"
356 : "=r" (ccr)
357 : "r" (orig_rs1), "r" (orig_rs2)
358 : "cc");
359 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
360 regs->tstate = tstate | (ccr << 32UL);
361 }
362 };
363}
364
365static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
366{
367 unsigned long rs1, rs2, rd_val;
368 unsigned int bits, bits_mask;
369
370 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
371 rs1 = fetch_reg(RS1(insn), regs);
372 rs2 = fetch_reg(RS2(insn), regs);
373
374 bits = (rs2 > 5 ? 5 : rs2);
375 bits_mask = (1UL << bits) - 1UL;
376
377 rd_val = ((((rs1 >> 11) & 0x3) << 0) |
378 (((rs1 >> 33) & 0x3) << 2) |
379 (((rs1 >> 55) & 0x1) << 4) |
380 (((rs1 >> 13) & 0xf) << 5) |
381 (((rs1 >> 35) & 0xf) << 9) |
382 (((rs1 >> 56) & 0xf) << 13) |
383 (((rs1 >> 17) & bits_mask) << 17) |
384 (((rs1 >> 39) & bits_mask) << (17 + bits)) |
385 (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
386
387 switch (opf) {
388 case ARRAY16_OPF:
389 rd_val <<= 1;
390 break;
391
392 case ARRAY32_OPF:
393 rd_val <<= 2;
394 };
395
396 store_reg(regs, rd_val, RD(insn));
397}
398
399static void bmask(struct pt_regs *regs, unsigned int insn)
400{
401 unsigned long rs1, rs2, rd_val, gsr;
402
403 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
404 rs1 = fetch_reg(RS1(insn), regs);
405 rs2 = fetch_reg(RS2(insn), regs);
406 rd_val = rs1 + rs2;
407
408 store_reg(regs, rd_val, RD(insn));
409
410 gsr = current_thread_info()->gsr[0] & 0xffffffff;
411 gsr |= rd_val << 32UL;
412 current_thread_info()->gsr[0] = gsr;
413}
414
415static void bshuffle(struct pt_regs *regs, unsigned int insn)
416{
417 struct fpustate *f = FPUSTATE;
418 unsigned long rs1, rs2, rd_val;
419 unsigned long bmask, i;
420
421 bmask = current_thread_info()->gsr[0] >> 32UL;
422
423 rs1 = fpd_regval(f, RS1(insn));
424 rs2 = fpd_regval(f, RS2(insn));
425
426 rd_val = 0UL;
427 for (i = 0; i < 8; i++) {
428 unsigned long which = (bmask >> (i * 4)) & 0xf;
429 unsigned long byte;
430
431 if (which < 8)
432 byte = (rs1 >> (which * 8)) & 0xff;
433 else
434 byte = (rs2 >> ((which-8)*8)) & 0xff;
435 rd_val |= (byte << (i * 8));
436 }
437
438 *fpd_regaddr(f, RD(insn)) = rd_val;
439}
440
441static void pdist(struct pt_regs *regs, unsigned int insn)
442{
443 struct fpustate *f = FPUSTATE;
444 unsigned long rs1, rs2, *rd, rd_val;
445 unsigned long i;
446
447 rs1 = fpd_regval(f, RS1(insn));
448 rs2 = fpd_regval(f, RS2(insn));
449 rd = fpd_regaddr(f, RD(insn));
450
451 rd_val = *rd;
452
453 for (i = 0; i < 8; i++) {
454 s16 s1, s2;
455
456 s1 = (rs1 >> (56 - (i * 8))) & 0xff;
457 s2 = (rs2 >> (56 - (i * 8))) & 0xff;
458
459 /* Absolute value of difference. */
460 s1 -= s2;
461 if (s1 < 0)
462 s1 = ~s1 + 1;
463
464 rd_val += s1;
465 }
466
467 *rd = rd_val;
468}
469
470static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
471{
472 struct fpustate *f = FPUSTATE;
473 unsigned long rs1, rs2, gsr, scale, rd_val;
474
475 gsr = current_thread_info()->gsr[0];
476 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
477 switch (opf) {
478 case FPACK16_OPF: {
479 unsigned long byte;
480
481 rs2 = fpd_regval(f, RS2(insn));
482 rd_val = 0;
483 for (byte = 0; byte < 4; byte++) {
484 unsigned int val;
485 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
486 int scaled = src << scale;
487 int from_fixed = scaled >> 7;
488
489 val = ((from_fixed < 0) ?
490 0 :
491 (from_fixed > 255) ?
492 255 : from_fixed);
493
494 rd_val |= (val << (8 * byte));
495 }
496 *fps_regaddr(f, RD(insn)) = rd_val;
497 break;
498 }
499
500 case FPACK32_OPF: {
501 unsigned long word;
502
503 rs1 = fpd_regval(f, RS1(insn));
504 rs2 = fpd_regval(f, RS2(insn));
505 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
506 for (word = 0; word < 2; word++) {
507 unsigned long val;
508 s32 src = (rs2 >> (word * 32UL));
509 s64 scaled = src << scale;
510 s64 from_fixed = scaled >> 23;
511
512 val = ((from_fixed < 0) ?
513 0 :
514 (from_fixed > 255) ?
515 255 : from_fixed);
516
517 rd_val |= (val << (32 * word));
518 }
519 *fpd_regaddr(f, RD(insn)) = rd_val;
520 break;
521 }
522
523 case FPACKFIX_OPF: {
524 unsigned long word;
525
526 rs2 = fpd_regval(f, RS2(insn));
527
528 rd_val = 0;
529 for (word = 0; word < 2; word++) {
530 long val;
531 s32 src = (rs2 >> (word * 32UL));
532 s64 scaled = src << scale;
533 s64 from_fixed = scaled >> 16;
534
535 val = ((from_fixed < -32768) ?
536 -32768 :
537 (from_fixed > 32767) ?
538 32767 : from_fixed);
539
540 rd_val |= ((val & 0xffff) << (word * 16));
541 }
542 *fps_regaddr(f, RD(insn)) = rd_val;
543 break;
544 }
545
546 case FEXPAND_OPF: {
547 unsigned long byte;
548
549 rs2 = fps_regval(f, RS2(insn));
550
551 rd_val = 0;
552 for (byte = 0; byte < 4; byte++) {
553 unsigned long val;
554 u8 src = (rs2 >> (byte * 8)) & 0xff;
555
556 val = src << 4;
557
558 rd_val |= (val << (byte * 16));
559 }
560 *fpd_regaddr(f, RD(insn)) = rd_val;
561 break;
562 }
563
564 case FPMERGE_OPF: {
565 rs1 = fps_regval(f, RS1(insn));
566 rs2 = fps_regval(f, RS2(insn));
567
568 rd_val = (((rs2 & 0x000000ff) << 0) |
569 ((rs1 & 0x000000ff) << 8) |
570 ((rs2 & 0x0000ff00) << 8) |
571 ((rs1 & 0x0000ff00) << 16) |
572 ((rs2 & 0x00ff0000) << 16) |
573 ((rs1 & 0x00ff0000) << 24) |
574 ((rs2 & 0xff000000) << 24) |
575 ((rs1 & 0xff000000) << 32));
576 *fpd_regaddr(f, RD(insn)) = rd_val;
577 break;
578 }
579 };
580}
581
582static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
583{
584 struct fpustate *f = FPUSTATE;
585 unsigned long rs1, rs2, rd_val;
586
587 switch (opf) {
588 case FMUL8x16_OPF: {
589 unsigned long byte;
590
591 rs1 = fps_regval(f, RS1(insn));
592 rs2 = fpd_regval(f, RS2(insn));
593
594 rd_val = 0;
595 for (byte = 0; byte < 4; byte++) {
596 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
597 s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
598 u32 prod = src1 * src2;
599 u16 scaled = ((prod & 0x00ffff00) >> 8);
600
601 /* Round up. */
602 if (prod & 0x80)
603 scaled++;
604 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
605 }
606
607 *fpd_regaddr(f, RD(insn)) = rd_val;
608 break;
609 }
610
611 case FMUL8x16AU_OPF:
612 case FMUL8x16AL_OPF: {
613 unsigned long byte;
614 s16 src2;
615
616 rs1 = fps_regval(f, RS1(insn));
617 rs2 = fps_regval(f, RS2(insn));
618
619 rd_val = 0;
620 src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0);
621 for (byte = 0; byte < 4; byte++) {
622 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
623 u32 prod = src1 * src2;
624 u16 scaled = ((prod & 0x00ffff00) >> 8);
625
626 /* Round up. */
627 if (prod & 0x80)
628 scaled++;
629 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
630 }
631
632 *fpd_regaddr(f, RD(insn)) = rd_val;
633 break;
634 }
635
636 case FMUL8SUx16_OPF:
637 case FMUL8ULx16_OPF: {
638 unsigned long byte, ushift;
639
640 rs1 = fpd_regval(f, RS1(insn));
641 rs2 = fpd_regval(f, RS2(insn));
642
643 rd_val = 0;
644 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
645 for (byte = 0; byte < 4; byte++) {
646 u16 src1;
647 s16 src2;
648 u32 prod;
649 u16 scaled;
650
651 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
652 src2 = ((rs2 >> (16 * byte)) & 0xffff);
653 prod = src1 * src2;
654 scaled = ((prod & 0x00ffff00) >> 8);
655
656 /* Round up. */
657 if (prod & 0x80)
658 scaled++;
659 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
660 }
661
662 *fpd_regaddr(f, RD(insn)) = rd_val;
663 break;
664 }
665
666 case FMULD8SUx16_OPF:
667 case FMULD8ULx16_OPF: {
668 unsigned long byte, ushift;
669
670 rs1 = fps_regval(f, RS1(insn));
671 rs2 = fps_regval(f, RS2(insn));
672
673 rd_val = 0;
674 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
675 for (byte = 0; byte < 2; byte++) {
676 u16 src1;
677 s16 src2;
678 u32 prod;
679 u16 scaled;
680
681 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
682 src2 = ((rs2 >> (16 * byte)) & 0xffff);
683 prod = src1 * src2;
684 scaled = ((prod & 0x00ffff00) >> 8);
685
686 /* Round up. */
687 if (prod & 0x80)
688 scaled++;
689 rd_val |= ((scaled & 0xffffUL) <<
690 ((byte * 32UL) + 7UL));
691 }
692 *fpd_regaddr(f, RD(insn)) = rd_val;
693 break;
694 }
695 };
696}
697
698static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
699{
700 struct fpustate *f = FPUSTATE;
701 unsigned long rs1, rs2, rd_val, i;
702
703 rs1 = fpd_regval(f, RS1(insn));
704 rs2 = fpd_regval(f, RS2(insn));
705
706 rd_val = 0;
707
708 switch (opf) {
709 case FCMPGT16_OPF:
710 for (i = 0; i < 4; i++) {
711 s16 a = (rs1 >> (i * 16)) & 0xffff;
712 s16 b = (rs2 >> (i * 16)) & 0xffff;
713
714 if (a > b)
715 rd_val |= 1 << i;
716 }
717 break;
718
719 case FCMPGT32_OPF:
720 for (i = 0; i < 2; i++) {
721 s32 a = (rs1 >> (i * 32)) & 0xffff;
722 s32 b = (rs2 >> (i * 32)) & 0xffff;
723
724 if (a > b)
725 rd_val |= 1 << i;
726 }
727 break;
728
729 case FCMPLE16_OPF:
730 for (i = 0; i < 4; i++) {
731 s16 a = (rs1 >> (i * 16)) & 0xffff;
732 s16 b = (rs2 >> (i * 16)) & 0xffff;
733
734 if (a <= b)
735 rd_val |= 1 << i;
736 }
737 break;
738
739 case FCMPLE32_OPF:
740 for (i = 0; i < 2; i++) {
741 s32 a = (rs1 >> (i * 32)) & 0xffff;
742 s32 b = (rs2 >> (i * 32)) & 0xffff;
743
744 if (a <= b)
745 rd_val |= 1 << i;
746 }
747 break;
748
749 case FCMPNE16_OPF:
750 for (i = 0; i < 4; i++) {
751 s16 a = (rs1 >> (i * 16)) & 0xffff;
752 s16 b = (rs2 >> (i * 16)) & 0xffff;
753
754 if (a != b)
755 rd_val |= 1 << i;
756 }
757 break;
758
759 case FCMPNE32_OPF:
760 for (i = 0; i < 2; i++) {
761 s32 a = (rs1 >> (i * 32)) & 0xffff;
762 s32 b = (rs2 >> (i * 32)) & 0xffff;
763
764 if (a != b)
765 rd_val |= 1 << i;
766 }
767 break;
768
769 case FCMPEQ16_OPF:
770 for (i = 0; i < 4; i++) {
771 s16 a = (rs1 >> (i * 16)) & 0xffff;
772 s16 b = (rs2 >> (i * 16)) & 0xffff;
773
774 if (a == b)
775 rd_val |= 1 << i;
776 }
777 break;
778
779 case FCMPEQ32_OPF:
780 for (i = 0; i < 2; i++) {
781 s32 a = (rs1 >> (i * 32)) & 0xffff;
782 s32 b = (rs2 >> (i * 32)) & 0xffff;
783
784 if (a == b)
785 rd_val |= 1 << i;
786 }
787 break;
788 };
789
790 maybe_flush_windows(0, 0, RD(insn), 0);
791 store_reg(regs, rd_val, RD(insn));
792}
793
794/* Emulate the VIS instructions which are not implemented in
795 * hardware on Niagara.
796 */
797int vis_emul(struct pt_regs *regs, unsigned int insn)
798{
799 unsigned long pc = regs->tpc;
800 unsigned int opf;
801
802 BUG_ON(regs->tstate & TSTATE_PRIV);
803
804 if (test_thread_flag(TIF_32BIT))
805 pc = (u32)pc;
806
807 if (get_user(insn, (u32 __user *) pc))
808 return -EFAULT;
809
810 save_and_clear_fpu();
811
812 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
813 switch (opf) {
814 default:
815 return -EINVAL;
816
817 /* Pixel Formatting Instructions. */
818 case FPACK16_OPF:
819 case FPACK32_OPF:
820 case FPACKFIX_OPF:
821 case FEXPAND_OPF:
822 case FPMERGE_OPF:
823 pformat(regs, insn, opf);
824 break;
825
826 /* Partitioned Multiply Instructions */
827 case FMUL8x16_OPF:
828 case FMUL8x16AU_OPF:
829 case FMUL8x16AL_OPF:
830 case FMUL8SUx16_OPF:
831 case FMUL8ULx16_OPF:
832 case FMULD8SUx16_OPF:
833 case FMULD8ULx16_OPF:
834 pmul(regs, insn, opf);
835 break;
836
837 /* Pixel Compare Instructions */
838 case FCMPGT16_OPF:
839 case FCMPGT32_OPF:
840 case FCMPLE16_OPF:
841 case FCMPLE32_OPF:
842 case FCMPNE16_OPF:
843 case FCMPNE32_OPF:
844 case FCMPEQ16_OPF:
845 case FCMPEQ32_OPF:
846 pcmp(regs, insn, opf);
847 break;
848
849 /* Edge Handling Instructions */
850 case EDGE8_OPF:
851 case EDGE8N_OPF:
852 case EDGE8L_OPF:
853 case EDGE8LN_OPF:
854 case EDGE16_OPF:
855 case EDGE16N_OPF:
856 case EDGE16L_OPF:
857 case EDGE16LN_OPF:
858 case EDGE32_OPF:
859 case EDGE32N_OPF:
860 case EDGE32L_OPF:
861 case EDGE32LN_OPF:
862 edge(regs, insn, opf);
863 break;
864
865 /* Pixel Component Distance */
866 case PDIST_OPF:
867 pdist(regs, insn);
868 break;
869
870 /* Three-Dimensional Array Addressing Instructions */
871 case ARRAY8_OPF:
872 case ARRAY16_OPF:
873 case ARRAY32_OPF:
874 array(regs, insn, opf);
875 break;
876
877 /* Byte Mask and Shuffle Instructions */
878 case BMASK_OPF:
879 bmask(regs, insn);
880 break;
881
882 case BSHUFFLE_OPF:
883 bshuffle(regs, insn);
884 break;
885 };
886
887 regs->tpc = regs->tnpc;
888 regs->tnpc += 4;
889 return 0;
890}