aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/kernel/visemul.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2006-03-13 04:27:34 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:14:26 -0500
commit0c51ed93ca0ecbf44ec096f4bd04c12a3e761e6b (patch)
tree1d51bfaf8d51063e71b3be5727040c563941389f /arch/sparc64/kernel/visemul.c
parent90a6646bf6a1ca821f32d5510e935855612904df (diff)
[SPARC64]: First cut at VIS simulator for Niagara.
Niagara does not implement some of the VIS instructions in hardware, so we have to emulate them. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/kernel/visemul.c')
-rw-r--r--arch/sparc64/kernel/visemul.c894
1 files changed, 894 insertions, 0 deletions
diff --git a/arch/sparc64/kernel/visemul.c b/arch/sparc64/kernel/visemul.c
new file mode 100644
index 000000000000..84fedaa38aae
--- /dev/null
+++ b/arch/sparc64/kernel/visemul.c
@@ -0,0 +1,894 @@
1/* visemul.c: Emulation of VIS instructions.
2 *
3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
4 */
5#include <linux/kernel.h>
6#include <linux/errno.h>
7#include <linux/thread_info.h>
8
9#include <asm/ptrace.h>
10#include <asm/pstate.h>
11#include <asm/system.h>
12#include <asm/fpumacro.h>
13#include <asm/uaccess.h>
14
15/* OPF field of various VIS instructions. */
16
17/* 000111011 - four 16-bit packs */
18#define FPACK16_OPF 0x03b
19
20/* 000111010 - two 32-bit packs */
21#define FPACK32_OPF 0x03a
22
23/* 000111101 - four 16-bit packs */
24#define FPACKFIX_OPF 0x03d
25
26/* 001001101 - four 16-bit expands */
27#define FEXPAND_OPF 0x04d
28
29/* 001001011 - two 32-bit merges */
30#define FPMERGE_OPF 0x04b
31
32/* 000110001 - 8-by-16-bit partitoned product */
33#define FMUL8x16_OPF 0x031
34
35/* 000110011 - 8-by-16-bit upper alpha partitioned product */
36#define FMUL8x16AU_OPF 0x033
37
38/* 000110101 - 8-by-16-bit lower alpha partitioned product */
39#define FMUL8x16AL_OPF 0x035
40
41/* 000110110 - upper 8-by-16-bit partitioned product */
42#define FMUL8SUx16_OPF 0x036
43
44/* 000110111 - lower 8-by-16-bit partitioned product */
45#define FMUL8ULx16_OPF 0x037
46
47/* 000111000 - upper 8-by-16-bit partitioned product */
48#define FMULD8SUx16_OPF 0x038
49
50/* 000111001 - lower unsigned 8-by-16-bit partitioned product */
51#define FMULD8ULx16_OPF 0x039
52
53/* 000101000 - four 16-bit compare; set rd if src1 > src2 */
54#define FCMPGT16_OPF 0x028
55
56/* 000101100 - two 32-bit compare; set rd if src1 > src2 */
57#define FCMPGT32_OPF 0x02c
58
59/* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
60#define FCMPLE16_OPF 0x020
61
62/* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
63#define FCMPLE32_OPF 0x024
64
65/* 000100010 - four 16-bit compare; set rd if src1 != src2 */
66#define FCMPNE16_OPF 0x022
67
68/* 000100110 - two 32-bit compare; set rd if src1 != src2 */
69#define FCMPNE32_OPF 0x026
70
71/* 000101010 - four 16-bit compare; set rd if src1 == src2 */
72#define FCMPEQ16_OPF 0x02a
73
74/* 000101110 - two 32-bit compare; set rd if src1 == src2 */
75#define FCMPEQ32_OPF 0x02e
76
77/* 000000000 - Eight 8-bit edge boundary processing */
78#define EDGE8_OPF 0x000
79
80/* 000000001 - Eight 8-bit edge boundary processing, no CC */
81#define EDGE8N_OPF 0x001
82
83/* 000000010 - Eight 8-bit edge boundary processing, little-endian */
84#define EDGE8L_OPF 0x002
85
86/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
87#define EDGE8LN_OPF 0x003
88
89/* 000000100 - Four 16-bit edge boundary processing */
90#define EDGE16_OPF 0x004
91
92/* 000000101 - Four 16-bit edge boundary processing, no CC */
93#define EDGE16N_OPF 0x005
94
95/* 000000110 - Four 16-bit edge boundary processing, little-endian */
96#define EDGE16L_OPF 0x006
97
98/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
99#define EDGE16LN_OPF 0x007
100
101/* 000001000 - Two 32-bit edge boundary processing */
102#define EDGE32_OPF 0x008
103
104/* 000001001 - Two 32-bit edge boundary processing, no CC */
105#define EDGE32N_OPF 0x009
106
107/* 000001010 - Two 32-bit edge boundary processing, little-endian */
108#define EDGE32L_OPF 0x00a
109
110/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
111#define EDGE32LN_OPF 0x00b
112
113/* 000111110 - distance between 8 8-bit components */
114#define PDIST_OPF 0x03e
115
116/* 000010000 - convert 8-bit 3-D address to blocked byte address */
117#define ARRAY8_OPF 0x010
118
119/* 000010010 - convert 16-bit 3-D address to blocked byte address */
120#define ARRAY16_OPF 0x012
121
122/* 000010100 - convert 32-bit 3-D address to blocked byte address */
123#define ARRAY32_OPF 0x014
124
125/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
126#define BMASK_OPF 0x019
127
128/* 001001100 - Permute bytes as specified by GSR.MASK */
129#define BSHUFFLE_OPF 0x04c
130
131#define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19))
132#define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19))
133
134#define VIS_OPF_SHIFT 5
135#define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
136
137#define RS1(INSN) (((INSN) >> 24) & 0x1f)
138#define RS2(INSN) (((INSN) >> 0) & 0x1f)
139#define RD(INSN) (((INSN) >> 25) & 0x1f)
140
141static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
142 unsigned int rd, int from_kernel)
143{
144 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
145 if (from_kernel != 0)
146 __asm__ __volatile__("flushw");
147 else
148 flushw_user();
149 }
150}
151
152static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
153{
154 unsigned long value;
155
156 if (reg < 16)
157 return (!reg ? 0 : regs->u_regs[reg]);
158 if (regs->tstate & TSTATE_PRIV) {
159 struct reg_window *win;
160 win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
161 value = win->locals[reg - 16];
162 } else if (test_thread_flag(TIF_32BIT)) {
163 struct reg_window32 __user *win32;
164 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
165 get_user(value, &win32->locals[reg - 16]);
166 } else {
167 struct reg_window __user *win;
168 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
169 get_user(value, &win->locals[reg - 16]);
170 }
171 return value;
172}
173
174static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
175 struct pt_regs *regs)
176{
177 BUG_ON(reg < 16);
178 BUG_ON(regs->tstate & TSTATE_PRIV);
179
180 if (test_thread_flag(TIF_32BIT)) {
181 struct reg_window32 __user *win32;
182 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
183 return (unsigned long __user *)&win32->locals[reg - 16];
184 } else {
185 struct reg_window __user *win;
186 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
187 return &win->locals[reg - 16];
188 }
189}
190
191static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
192 struct pt_regs *regs)
193{
194 BUG_ON(reg >= 16);
195 BUG_ON(regs->tstate & TSTATE_PRIV);
196
197 return &regs->u_regs[reg];
198}
199
200static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
201{
202 if (rd < 16) {
203 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
204
205 *rd_kern = val;
206 } else {
207 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
208
209 if (test_thread_flag(TIF_32BIT))
210 __put_user((u32)val, (u32 __user *)rd_user);
211 else
212 __put_user(val, rd_user);
213 }
214}
215
216static inline unsigned long fpd_regval(struct fpustate *f,
217 unsigned int insn_regnum)
218{
219 insn_regnum = (((insn_regnum & 1) << 5) |
220 (insn_regnum & 0x1e));
221
222 return *(unsigned long *) &f->regs[insn_regnum];
223}
224
225static inline unsigned long *fpd_regaddr(struct fpustate *f,
226 unsigned int insn_regnum)
227{
228 insn_regnum = (((insn_regnum & 1) << 5) |
229 (insn_regnum & 0x1e));
230
231 return (unsigned long *) &f->regs[insn_regnum];
232}
233
234static inline unsigned int fps_regval(struct fpustate *f,
235 unsigned int insn_regnum)
236{
237 return f->regs[insn_regnum];
238}
239
240static inline unsigned int *fps_regaddr(struct fpustate *f,
241 unsigned int insn_regnum)
242{
243 return &f->regs[insn_regnum];
244}
245
246struct edge_tab {
247 u16 left, right;
248};
249struct edge_tab edge8_tab[8] = {
250 { 0xff, 0x80 },
251 { 0x7f, 0xc0 },
252 { 0x3f, 0xe0 },
253 { 0x1f, 0xf0 },
254 { 0x0f, 0xf8 },
255 { 0x07, 0xfc },
256 { 0x03, 0xfe },
257 { 0x01, 0xff },
258};
259struct edge_tab edge8_tab_l[8] = {
260 { 0xff, 0x01 },
261 { 0xfe, 0x03 },
262 { 0xfc, 0x07 },
263 { 0xf8, 0x0f },
264 { 0xf0, 0x1f },
265 { 0xe0, 0x3f },
266 { 0xc0, 0x7f },
267 { 0x80, 0xff },
268};
269struct edge_tab edge16_tab[4] = {
270 { 0xf, 0x8 },
271 { 0x7, 0xc },
272 { 0x3, 0xe },
273 { 0x1, 0xf },
274};
275struct edge_tab edge16_tab_l[4] = {
276 { 0xf, 0x1 },
277 { 0xe, 0x3 },
278 { 0xc, 0x7 },
279 { 0x8, 0xf },
280};
281struct edge_tab edge32_tab[2] = {
282 { 0x3, 0x2 },
283 { 0x1, 0x3 },
284};
285struct edge_tab edge32_tab_l[2] = {
286 { 0x3, 0x1 },
287 { 0x2, 0x3 },
288};
289
290static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
291{
292 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
293 u16 left, right;
294
295 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
296 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
297 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
298
299 if (test_thread_flag(TIF_32BIT)) {
300 rs1 = rs1 & 0xffffffff;
301 rs2 = rs2 & 0xffffffff;
302 }
303 switch (opf) {
304 default:
305 case EDGE8_OPF:
306 case EDGE8N_OPF:
307 left = edge8_tab[rs1 & 0x7].left;
308 right = edge8_tab[rs2 & 0x7].right;
309 break;
310 case EDGE8L_OPF:
311 case EDGE8LN_OPF:
312 left = edge8_tab_l[rs1 & 0x7].left;
313 right = edge8_tab_l[rs2 & 0x7].right;
314 break;
315
316 case EDGE16_OPF:
317 case EDGE16N_OPF:
318 left = edge16_tab[(rs1 >> 1) & 0x3].left;
319 right = edge16_tab[(rs2 >> 1) & 0x3].right;
320 break;
321
322 case EDGE16L_OPF:
323 case EDGE16LN_OPF:
324 left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
325 right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
326 break;
327
328 case EDGE32_OPF:
329 case EDGE32N_OPF:
330 left = edge32_tab[(rs1 >> 2) & 0x1].left;
331 right = edge32_tab[(rs2 >> 2) & 0x1].right;
332 break;
333
334 case EDGE32L_OPF:
335 case EDGE32LN_OPF:
336 left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
337 right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
338 break;
339 };
340
341 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
342 rd_val = right & left;
343 else
344 rd_val = left;
345
346 store_reg(regs, rd_val, RD(insn));
347
348 switch (opf) {
349 case EDGE8_OPF:
350 case EDGE8L_OPF:
351 case EDGE16_OPF:
352 case EDGE16L_OPF:
353 case EDGE32_OPF:
354 case EDGE32L_OPF: {
355 unsigned long ccr, tstate;
356
357 __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
358 "rd %%ccr, %0"
359 : "=r" (ccr)
360 : "r" (orig_rs1), "r" (orig_rs2)
361 : "cc");
362 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
363 regs->tstate = tstate | (ccr << 32UL);
364 }
365 };
366}
367
368static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
369{
370 unsigned long rs1, rs2, rd_val;
371 unsigned int bits, bits_mask;
372
373 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
374 rs1 = fetch_reg(RS1(insn), regs);
375 rs2 = fetch_reg(RS2(insn), regs);
376
377 bits = (rs2 > 5 ? 5 : rs2);
378 bits_mask = (1UL << bits) - 1UL;
379
380 rd_val = ((((rs1 >> 11) & 0x3) << 0) |
381 (((rs1 >> 33) & 0x3) << 2) |
382 (((rs1 >> 55) & 0x1) << 4) |
383 (((rs1 >> 13) & 0xf) << 5) |
384 (((rs1 >> 35) & 0xf) << 9) |
385 (((rs1 >> 56) & 0xf) << 13) |
386 (((rs1 >> 17) & bits_mask) << 17) |
387 (((rs1 >> 39) & bits_mask) << (17 + bits)) |
388 (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
389
390 switch (opf) {
391 case ARRAY16_OPF:
392 rd_val <<= 1;
393 break;
394
395 case ARRAY32_OPF:
396 rd_val <<= 2;
397 };
398
399 store_reg(regs, rd_val, RD(insn));
400}
401
402static void bmask(struct pt_regs *regs, unsigned int insn)
403{
404 unsigned long rs1, rs2, rd_val, gsr;
405
406 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
407 rs1 = fetch_reg(RS1(insn), regs);
408 rs2 = fetch_reg(RS2(insn), regs);
409 rd_val = rs1 + rs2;
410
411 store_reg(regs, rd_val, RD(insn));
412
413 gsr = current_thread_info()->gsr[0] & 0xffffffff;
414 gsr |= rd_val << 32UL;
415 current_thread_info()->gsr[0] = gsr;
416}
417
418static void bshuffle(struct pt_regs *regs, unsigned int insn)
419{
420 struct fpustate *f = FPUSTATE;
421 unsigned long rs1, rs2, rd_val;
422 unsigned long bmask, i;
423
424 bmask = current_thread_info()->gsr[0] >> 32UL;
425
426 rs1 = fpd_regval(f, RS1(insn));
427 rs2 = fpd_regval(f, RS2(insn));
428
429 rd_val = 0UL;
430 for (i = 0; i < 8; i++) {
431 unsigned long which = (bmask >> (i * 4)) & 0xf;
432 unsigned long byte;
433
434 if (which < 8)
435 byte = (rs1 >> (which * 8)) & 0xff;
436 else
437 byte = (rs2 >> ((which-8)*8)) & 0xff;
438 rd_val |= (byte << (i * 8));
439 }
440
441 *fpd_regaddr(f, RD(insn)) = rd_val;
442}
443
444static void pdist(struct pt_regs *regs, unsigned int insn)
445{
446 struct fpustate *f = FPUSTATE;
447 unsigned long rs1, rs2, *rd, rd_val;
448 unsigned long i;
449
450 rs1 = fpd_regval(f, RS1(insn));
451 rs2 = fpd_regval(f, RS1(insn));
452 rd = fpd_regaddr(f, RD(insn));
453
454 rd_val = *rd;
455
456 for (i = 0; i < 8; i++) {
457 s16 s1, s2;
458
459 s1 = (rs1 >> (56 - (i * 8))) & 0xff;
460 s2 = (rs2 >> (56 - (i * 8))) & 0xff;
461
462 /* Absolute value of difference. */
463 s1 -= s2;
464 if (s1 < 0)
465 s1 = ~s1 + 1;
466
467 rd_val += s1;
468 }
469
470 *rd = rd_val;
471}
472
473static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
474{
475 struct fpustate *f = FPUSTATE;
476 unsigned long rs1, rs2, gsr, scale, rd_val;
477
478 gsr = current_thread_info()->gsr[0];
479 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
480 switch (opf) {
481 case FPACK16_OPF: {
482 unsigned long byte;
483
484 rs2 = fpd_regval(f, RS2(insn));
485 rd_val = 0;
486 for (byte = 0; byte < 4; byte++) {
487 unsigned int val;
488 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
489 int scaled = src << scale;
490 int from_fixed = scaled >> 7;
491
492 val = ((from_fixed < 0) ?
493 0 :
494 (from_fixed > 255) ?
495 255 : from_fixed);
496
497 rd_val |= (val << (8 * byte));
498 }
499 *fps_regaddr(f, RD(insn)) = rd_val;
500 break;
501 }
502
503 case FPACK32_OPF: {
504 unsigned long word;
505
506 rs1 = fpd_regval(f, RS1(insn));
507 rs2 = fpd_regval(f, RS2(insn));
508 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
509 for (word = 0; word < 2; word++) {
510 unsigned long val;
511 s32 src = (rs2 >> (word * 32UL));
512 s64 scaled = src << scale;
513 s64 from_fixed = scaled >> 23;
514
515 val = ((from_fixed < 0) ?
516 0 :
517 (from_fixed > 255) ?
518 255 : from_fixed);
519
520 rd_val |= (val << (32 * word));
521 }
522 *fpd_regaddr(f, RD(insn)) = rd_val;
523 break;
524 }
525
526 case FPACKFIX_OPF: {
527 unsigned long word;
528
529 rs2 = fpd_regval(f, RS2(insn));
530
531 rd_val = 0;
532 for (word = 0; word < 2; word++) {
533 long val;
534 s32 src = (rs2 >> (word * 32UL));
535 s64 scaled = src << scale;
536 s64 from_fixed = scaled >> 16;
537
538 val = ((from_fixed < -32768) ?
539 -32768 :
540 (from_fixed > 32767) ?
541 32767 : from_fixed);
542
543 rd_val |= ((val & 0xffff) << (word * 16));
544 }
545 *fps_regaddr(f, RD(insn)) = rd_val;
546 break;
547 }
548
549 case FEXPAND_OPF: {
550 unsigned long byte;
551
552 rs2 = fps_regval(f, RS2(insn));
553
554 rd_val = 0;
555 for (byte = 0; byte < 4; byte++) {
556 unsigned long val;
557 u8 src = (rs2 >> (byte * 8)) & 0xff;
558
559 val = src << 4;
560
561 rd_val |= (val << (byte * 16));
562 }
563 *fpd_regaddr(f, RD(insn)) = rd_val;
564 break;
565 }
566
567 case FPMERGE_OPF: {
568 rs1 = fps_regval(f, RS1(insn));
569 rs2 = fps_regval(f, RS2(insn));
570
571 rd_val = (((rs2 & 0x000000ff) << 0) |
572 ((rs1 & 0x000000ff) << 8) |
573 ((rs2 & 0x0000ff00) << 8) |
574 ((rs1 & 0x0000ff00) << 16) |
575 ((rs2 & 0x00ff0000) << 16) |
576 ((rs1 & 0x00ff0000) << 24) |
577 ((rs2 & 0xff000000) << 24) |
578 ((rs1 & 0xff000000) << 32));
579 *fpd_regaddr(f, RD(insn)) = rd_val;
580 break;
581 }
582 };
583}
584
585static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
586{
587 struct fpustate *f = FPUSTATE;
588 unsigned long rs1, rs2, rd_val;
589
590 switch (opf) {
591 case FMUL8x16_OPF: {
592 unsigned long byte;
593
594 rs1 = fps_regval(f, RS1(insn));
595 rs2 = fpd_regval(f, RS2(insn));
596
597 rd_val = 0;
598 for (byte = 0; byte < 4; byte++) {
599 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
600 s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
601 u32 prod = src1 * src2;
602 u16 scaled = ((prod & 0x00ffff00) >> 8);
603
604 /* Round up. */
605 if (prod & 0x80)
606 scaled++;
607 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
608 }
609
610 *fpd_regaddr(f, RD(insn)) = rd_val;
611 break;
612 }
613
614 case FMUL8x16AU_OPF:
615 case FMUL8x16AL_OPF: {
616 unsigned long byte;
617 s16 src2;
618
619 rs1 = fps_regval(f, RS1(insn));
620 rs2 = fps_regval(f, RS2(insn));
621
622 rd_val = 0;
623 src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0);
624 for (byte = 0; byte < 4; byte++) {
625 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
626 u32 prod = src1 * src2;
627 u16 scaled = ((prod & 0x00ffff00) >> 8);
628
629 /* Round up. */
630 if (prod & 0x80)
631 scaled++;
632 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
633 }
634
635 *fpd_regaddr(f, RD(insn)) = rd_val;
636 break;
637 }
638
639 case FMUL8SUx16_OPF:
640 case FMUL8ULx16_OPF: {
641 unsigned long byte, ushift;
642
643 rs1 = fpd_regval(f, RS1(insn));
644 rs2 = fpd_regval(f, RS2(insn));
645
646 rd_val = 0;
647 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
648 for (byte = 0; byte < 4; byte++) {
649 u16 src1;
650 s16 src2;
651 u32 prod;
652 u16 scaled;
653
654 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
655 src2 = ((rs2 >> (16 * byte)) & 0xffff);
656 prod = src1 * src2;
657 scaled = ((prod & 0x00ffff00) >> 8);
658
659 /* Round up. */
660 if (prod & 0x80)
661 scaled++;
662 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
663 }
664
665 *fpd_regaddr(f, RD(insn)) = rd_val;
666 break;
667 }
668
669 case FMULD8SUx16_OPF:
670 case FMULD8ULx16_OPF: {
671 unsigned long byte, ushift;
672
673 rs1 = fps_regval(f, RS1(insn));
674 rs2 = fps_regval(f, RS2(insn));
675
676 rd_val = 0;
677 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
678 for (byte = 0; byte < 2; byte++) {
679 u16 src1;
680 s16 src2;
681 u32 prod;
682 u16 scaled;
683
684 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
685 src2 = ((rs2 >> (16 * byte)) & 0xffff);
686 prod = src1 * src2;
687 scaled = ((prod & 0x00ffff00) >> 8);
688
689 /* Round up. */
690 if (prod & 0x80)
691 scaled++;
692 rd_val |= ((scaled & 0xffffUL) <<
693 ((byte * 32UL) + 7UL));
694 }
695 *fpd_regaddr(f, RD(insn)) = rd_val;
696 break;
697 }
698 };
699}
700
701static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
702{
703 struct fpustate *f = FPUSTATE;
704 unsigned long rs1, rs2, rd_val, i;
705
706 rs1 = fpd_regval(f, RS1(insn));
707 rs2 = fpd_regval(f, RS2(insn));
708
709 rd_val = 0;
710
711 switch (opf) {
712 case FCMPGT16_OPF:
713 for (i = 0; i < 4; i++) {
714 s16 a = (rs1 >> (i * 16)) & 0xffff;
715 s16 b = (rs2 >> (i * 16)) & 0xffff;
716
717 if (a > b)
718 rd_val |= 1 << i;
719 }
720 break;
721
722 case FCMPGT32_OPF:
723 for (i = 0; i < 2; i++) {
724 s32 a = (rs1 >> (i * 32)) & 0xffff;
725 s32 b = (rs2 >> (i * 32)) & 0xffff;
726
727 if (a > b)
728 rd_val |= 1 << i;
729 }
730 break;
731
732 case FCMPLE16_OPF:
733 for (i = 0; i < 4; i++) {
734 s16 a = (rs1 >> (i * 16)) & 0xffff;
735 s16 b = (rs2 >> (i * 16)) & 0xffff;
736
737 if (a <= b)
738 rd_val |= 1 << i;
739 }
740 break;
741
742 case FCMPLE32_OPF:
743 for (i = 0; i < 2; i++) {
744 s32 a = (rs1 >> (i * 32)) & 0xffff;
745 s32 b = (rs2 >> (i * 32)) & 0xffff;
746
747 if (a <= b)
748 rd_val |= 1 << i;
749 }
750 break;
751
752 case FCMPNE16_OPF:
753 for (i = 0; i < 4; i++) {
754 s16 a = (rs1 >> (i * 16)) & 0xffff;
755 s16 b = (rs2 >> (i * 16)) & 0xffff;
756
757 if (a != b)
758 rd_val |= 1 << i;
759 }
760 break;
761
762 case FCMPNE32_OPF:
763 for (i = 0; i < 2; i++) {
764 s32 a = (rs1 >> (i * 32)) & 0xffff;
765 s32 b = (rs2 >> (i * 32)) & 0xffff;
766
767 if (a != b)
768 rd_val |= 1 << i;
769 }
770 break;
771
772 case FCMPEQ16_OPF:
773 for (i = 0; i < 4; i++) {
774 s16 a = (rs1 >> (i * 16)) & 0xffff;
775 s16 b = (rs2 >> (i * 16)) & 0xffff;
776
777 if (a == b)
778 rd_val |= 1 << i;
779 }
780 break;
781
782 case FCMPEQ32_OPF:
783 for (i = 0; i < 2; i++) {
784 s32 a = (rs1 >> (i * 32)) & 0xffff;
785 s32 b = (rs2 >> (i * 32)) & 0xffff;
786
787 if (a == b)
788 rd_val |= 1 << i;
789 }
790 break;
791 };
792
793 maybe_flush_windows(0, 0, RD(insn), 0);
794 store_reg(regs, rd_val, RD(insn));
795}
796
797/* Emulate the VIS instructions which are not implemented in
798 * hardware on Niagara.
799 */
800int vis_emul(struct pt_regs *regs, unsigned int insn)
801{
802 unsigned long pc = regs->tpc;
803 unsigned int opf;
804
805 BUG_ON(regs->tstate & TSTATE_PRIV);
806
807 if (test_thread_flag(TIF_32BIT))
808 pc = (u32)pc;
809
810 if (get_user(insn, (u32 __user *) pc))
811 return -EFAULT;
812
813 if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL)
814 return -EINVAL;
815
816 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
817 switch (opf) {
818 default:
819 return -EINVAL;
820
821 /* Pixel Formatting Instructions. */
822 case FPACK16_OPF:
823 case FPACK32_OPF:
824 case FPACKFIX_OPF:
825 case FEXPAND_OPF:
826 case FPMERGE_OPF:
827 pformat(regs, insn, opf);
828 break;
829
830 /* Partitioned Multiply Instructions */
831 case FMUL8x16_OPF:
832 case FMUL8x16AU_OPF:
833 case FMUL8x16AL_OPF:
834 case FMUL8SUx16_OPF:
835 case FMUL8ULx16_OPF:
836 case FMULD8SUx16_OPF:
837 case FMULD8ULx16_OPF:
838 pmul(regs, insn, opf);
839 break;
840
841 /* Pixel Compare Instructions */
842 case FCMPGT16_OPF:
843 case FCMPGT32_OPF:
844 case FCMPLE16_OPF:
845 case FCMPLE32_OPF:
846 case FCMPNE16_OPF:
847 case FCMPNE32_OPF:
848 case FCMPEQ16_OPF:
849 case FCMPEQ32_OPF:
850 pcmp(regs, insn, opf);
851 break;
852
853 /* Edge Handling Instructions */
854 case EDGE8_OPF:
855 case EDGE8N_OPF:
856 case EDGE8L_OPF:
857 case EDGE8LN_OPF:
858 case EDGE16_OPF:
859 case EDGE16N_OPF:
860 case EDGE16L_OPF:
861 case EDGE16LN_OPF:
862 case EDGE32_OPF:
863 case EDGE32N_OPF:
864 case EDGE32L_OPF:
865 case EDGE32LN_OPF:
866 edge(regs, insn, opf);
867 break;
868
869 /* Pixel Component Distance */
870 case PDIST_OPF:
871 pdist(regs, insn);
872 break;
873
874 /* Three-Dimensional Array Addressing Instructions */
875 case ARRAY8_OPF:
876 case ARRAY16_OPF:
877 case ARRAY32_OPF:
878 array(regs, insn, opf);
879 break;
880
881 /* Byte Mask and Shuffle Instructions */
882 case BMASK_OPF:
883 bmask(regs, insn);
884 break;
885
886 case BSHUFFLE_OPF:
887 bshuffle(regs, insn);
888 break;
889 };
890
891 regs->tpc = regs->tnpc;
892 regs->tnpc += 4;
893 return 0;
894}