diff options
| author | David S. Miller <davem@sunset.davemloft.net> | 2006-03-13 04:27:34 -0500 |
|---|---|---|
| committer | David S. Miller <davem@sunset.davemloft.net> | 2006-03-20 04:14:26 -0500 |
| commit | 0c51ed93ca0ecbf44ec096f4bd04c12a3e761e6b (patch) | |
| tree | 1d51bfaf8d51063e71b3be5727040c563941389f | |
| parent | 90a6646bf6a1ca821f32d5510e935855612904df (diff) | |
[SPARC64]: First cut at VIS simulator for Niagara.
Niagara does not implement some of the VIS instructions in
hardware, so we have to emulate them.
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | arch/sparc64/kernel/Makefile | 3 | ||||
| -rw-r--r-- | arch/sparc64/kernel/traps.c | 5 | ||||
| -rw-r--r-- | arch/sparc64/kernel/visemul.c | 894 |
3 files changed, 901 insertions, 1 deletions
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index fedfd9c6729d..6f6816488b04 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile | |||
| @@ -11,7 +11,8 @@ obj-y := process.o setup.o cpu.o idprom.o \ | |||
| 11 | traps.o devices.o auxio.o una_asm.o \ | 11 | traps.o devices.o auxio.o una_asm.o \ |
| 12 | irq.o ptrace.o time.o sys_sparc.o signal.o \ | 12 | irq.o ptrace.o time.o sys_sparc.o signal.o \ |
| 13 | unaligned.o central.o pci.o starfire.o semaphore.o \ | 13 | unaligned.o central.o pci.o starfire.o semaphore.o \ |
| 14 | power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o | 14 | power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \ |
| 15 | visemul.o | ||
| 15 | 16 | ||
| 16 | obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ | 17 | obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ |
| 17 | pci_psycho.o pci_sabre.o pci_schizo.o \ | 18 | pci_psycho.o pci_sabre.o pci_schizo.o \ |
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 043a72658f6a..7f7dba0ca96a 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c | |||
| @@ -2273,6 +2273,11 @@ void do_illegal_instruction(struct pt_regs *regs) | |||
| 2273 | } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ { | 2273 | } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ { |
| 2274 | if (handle_ldf_stq(insn, regs)) | 2274 | if (handle_ldf_stq(insn, regs)) |
| 2275 | return; | 2275 | return; |
| 2276 | } else if (tlb_type == hypervisor) { | ||
| 2277 | extern int vis_emul(struct pt_regs *, unsigned int); | ||
| 2278 | |||
| 2279 | if (!vis_emul(regs, insn)) | ||
| 2280 | return; | ||
| 2276 | } | 2281 | } |
| 2277 | } | 2282 | } |
| 2278 | info.si_signo = SIGILL; | 2283 | info.si_signo = SIGILL; |
diff --git a/arch/sparc64/kernel/visemul.c b/arch/sparc64/kernel/visemul.c new file mode 100644 index 000000000000..84fedaa38aae --- /dev/null +++ b/arch/sparc64/kernel/visemul.c | |||
| @@ -0,0 +1,894 @@ | |||
| 1 | /* visemul.c: Emulation of VIS instructions. | ||
| 2 | * | ||
| 3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
| 4 | */ | ||
| 5 | #include <linux/kernel.h> | ||
| 6 | #include <linux/errno.h> | ||
| 7 | #include <linux/thread_info.h> | ||
| 8 | |||
| 9 | #include <asm/ptrace.h> | ||
| 10 | #include <asm/pstate.h> | ||
| 11 | #include <asm/system.h> | ||
| 12 | #include <asm/fpumacro.h> | ||
| 13 | #include <asm/uaccess.h> | ||
| 14 | |||
| 15 | /* OPF field of various VIS instructions. */ | ||
| 16 | |||
| 17 | /* 000111011 - four 16-bit packs */ | ||
| 18 | #define FPACK16_OPF 0x03b | ||
| 19 | |||
| 20 | /* 000111010 - two 32-bit packs */ | ||
| 21 | #define FPACK32_OPF 0x03a | ||
| 22 | |||
| 23 | /* 000111101 - four 16-bit packs */ | ||
| 24 | #define FPACKFIX_OPF 0x03d | ||
| 25 | |||
| 26 | /* 001001101 - four 16-bit expands */ | ||
| 27 | #define FEXPAND_OPF 0x04d | ||
| 28 | |||
| 29 | /* 001001011 - two 32-bit merges */ | ||
| 30 | #define FPMERGE_OPF 0x04b | ||
| 31 | |||
| 32 | /* 000110001 - 8-by-16-bit partitoned product */ | ||
| 33 | #define FMUL8x16_OPF 0x031 | ||
| 34 | |||
| 35 | /* 000110011 - 8-by-16-bit upper alpha partitioned product */ | ||
| 36 | #define FMUL8x16AU_OPF 0x033 | ||
| 37 | |||
| 38 | /* 000110101 - 8-by-16-bit lower alpha partitioned product */ | ||
| 39 | #define FMUL8x16AL_OPF 0x035 | ||
| 40 | |||
| 41 | /* 000110110 - upper 8-by-16-bit partitioned product */ | ||
| 42 | #define FMUL8SUx16_OPF 0x036 | ||
| 43 | |||
| 44 | /* 000110111 - lower 8-by-16-bit partitioned product */ | ||
| 45 | #define FMUL8ULx16_OPF 0x037 | ||
| 46 | |||
| 47 | /* 000111000 - upper 8-by-16-bit partitioned product */ | ||
| 48 | #define FMULD8SUx16_OPF 0x038 | ||
| 49 | |||
| 50 | /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ | ||
| 51 | #define FMULD8ULx16_OPF 0x039 | ||
| 52 | |||
| 53 | /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ | ||
| 54 | #define FCMPGT16_OPF 0x028 | ||
| 55 | |||
| 56 | /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ | ||
| 57 | #define FCMPGT32_OPF 0x02c | ||
| 58 | |||
| 59 | /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ | ||
| 60 | #define FCMPLE16_OPF 0x020 | ||
| 61 | |||
| 62 | /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ | ||
| 63 | #define FCMPLE32_OPF 0x024 | ||
| 64 | |||
| 65 | /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ | ||
| 66 | #define FCMPNE16_OPF 0x022 | ||
| 67 | |||
| 68 | /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ | ||
| 69 | #define FCMPNE32_OPF 0x026 | ||
| 70 | |||
| 71 | /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ | ||
| 72 | #define FCMPEQ16_OPF 0x02a | ||
| 73 | |||
| 74 | /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ | ||
| 75 | #define FCMPEQ32_OPF 0x02e | ||
| 76 | |||
| 77 | /* 000000000 - Eight 8-bit edge boundary processing */ | ||
| 78 | #define EDGE8_OPF 0x000 | ||
| 79 | |||
| 80 | /* 000000001 - Eight 8-bit edge boundary processing, no CC */ | ||
| 81 | #define EDGE8N_OPF 0x001 | ||
| 82 | |||
| 83 | /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ | ||
| 84 | #define EDGE8L_OPF 0x002 | ||
| 85 | |||
| 86 | /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ | ||
| 87 | #define EDGE8LN_OPF 0x003 | ||
| 88 | |||
| 89 | /* 000000100 - Four 16-bit edge boundary processing */ | ||
| 90 | #define EDGE16_OPF 0x004 | ||
| 91 | |||
| 92 | /* 000000101 - Four 16-bit edge boundary processing, no CC */ | ||
| 93 | #define EDGE16N_OPF 0x005 | ||
| 94 | |||
| 95 | /* 000000110 - Four 16-bit edge boundary processing, little-endian */ | ||
| 96 | #define EDGE16L_OPF 0x006 | ||
| 97 | |||
| 98 | /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ | ||
| 99 | #define EDGE16LN_OPF 0x007 | ||
| 100 | |||
| 101 | /* 000001000 - Two 32-bit edge boundary processing */ | ||
| 102 | #define EDGE32_OPF 0x008 | ||
| 103 | |||
| 104 | /* 000001001 - Two 32-bit edge boundary processing, no CC */ | ||
| 105 | #define EDGE32N_OPF 0x009 | ||
| 106 | |||
| 107 | /* 000001010 - Two 32-bit edge boundary processing, little-endian */ | ||
| 108 | #define EDGE32L_OPF 0x00a | ||
| 109 | |||
| 110 | /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ | ||
| 111 | #define EDGE32LN_OPF 0x00b | ||
| 112 | |||
| 113 | /* 000111110 - distance between 8 8-bit components */ | ||
| 114 | #define PDIST_OPF 0x03e | ||
| 115 | |||
| 116 | /* 000010000 - convert 8-bit 3-D address to blocked byte address */ | ||
| 117 | #define ARRAY8_OPF 0x010 | ||
| 118 | |||
| 119 | /* 000010010 - convert 16-bit 3-D address to blocked byte address */ | ||
| 120 | #define ARRAY16_OPF 0x012 | ||
| 121 | |||
| 122 | /* 000010100 - convert 32-bit 3-D address to blocked byte address */ | ||
| 123 | #define ARRAY32_OPF 0x014 | ||
| 124 | |||
| 125 | /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ | ||
| 126 | #define BMASK_OPF 0x019 | ||
| 127 | |||
| 128 | /* 001001100 - Permute bytes as specified by GSR.MASK */ | ||
| 129 | #define BSHUFFLE_OPF 0x04c | ||
| 130 | |||
| 131 | #define VIS_OPCODE_MASK ((0x3 << 30) | (0x3f << 19)) | ||
| 132 | #define VIS_OPCODE_VAL ((0x2 << 30) | (0x36 << 19)) | ||
| 133 | |||
| 134 | #define VIS_OPF_SHIFT 5 | ||
| 135 | #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) | ||
| 136 | |||
| 137 | #define RS1(INSN) (((INSN) >> 24) & 0x1f) | ||
| 138 | #define RS2(INSN) (((INSN) >> 0) & 0x1f) | ||
| 139 | #define RD(INSN) (((INSN) >> 25) & 0x1f) | ||
| 140 | |||
| 141 | static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, | ||
| 142 | unsigned int rd, int from_kernel) | ||
| 143 | { | ||
| 144 | if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { | ||
| 145 | if (from_kernel != 0) | ||
| 146 | __asm__ __volatile__("flushw"); | ||
| 147 | else | ||
| 148 | flushw_user(); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) | ||
| 153 | { | ||
| 154 | unsigned long value; | ||
| 155 | |||
| 156 | if (reg < 16) | ||
| 157 | return (!reg ? 0 : regs->u_regs[reg]); | ||
| 158 | if (regs->tstate & TSTATE_PRIV) { | ||
| 159 | struct reg_window *win; | ||
| 160 | win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS); | ||
| 161 | value = win->locals[reg - 16]; | ||
| 162 | } else if (test_thread_flag(TIF_32BIT)) { | ||
| 163 | struct reg_window32 __user *win32; | ||
| 164 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); | ||
| 165 | get_user(value, &win32->locals[reg - 16]); | ||
| 166 | } else { | ||
| 167 | struct reg_window __user *win; | ||
| 168 | win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); | ||
| 169 | get_user(value, &win->locals[reg - 16]); | ||
| 170 | } | ||
| 171 | return value; | ||
| 172 | } | ||
| 173 | |||
| 174 | static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, | ||
| 175 | struct pt_regs *regs) | ||
| 176 | { | ||
| 177 | BUG_ON(reg < 16); | ||
| 178 | BUG_ON(regs->tstate & TSTATE_PRIV); | ||
| 179 | |||
| 180 | if (test_thread_flag(TIF_32BIT)) { | ||
| 181 | struct reg_window32 __user *win32; | ||
| 182 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP])); | ||
| 183 | return (unsigned long __user *)&win32->locals[reg - 16]; | ||
| 184 | } else { | ||
| 185 | struct reg_window __user *win; | ||
| 186 | win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS); | ||
| 187 | return &win->locals[reg - 16]; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, | ||
| 192 | struct pt_regs *regs) | ||
| 193 | { | ||
| 194 | BUG_ON(reg >= 16); | ||
| 195 | BUG_ON(regs->tstate & TSTATE_PRIV); | ||
| 196 | |||
| 197 | return ®s->u_regs[reg]; | ||
| 198 | } | ||
| 199 | |||
| 200 | static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) | ||
| 201 | { | ||
| 202 | if (rd < 16) { | ||
| 203 | unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); | ||
| 204 | |||
| 205 | *rd_kern = val; | ||
| 206 | } else { | ||
| 207 | unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); | ||
| 208 | |||
| 209 | if (test_thread_flag(TIF_32BIT)) | ||
| 210 | __put_user((u32)val, (u32 __user *)rd_user); | ||
| 211 | else | ||
| 212 | __put_user(val, rd_user); | ||
| 213 | } | ||
| 214 | } | ||
| 215 | |||
| 216 | static inline unsigned long fpd_regval(struct fpustate *f, | ||
| 217 | unsigned int insn_regnum) | ||
| 218 | { | ||
| 219 | insn_regnum = (((insn_regnum & 1) << 5) | | ||
| 220 | (insn_regnum & 0x1e)); | ||
| 221 | |||
| 222 | return *(unsigned long *) &f->regs[insn_regnum]; | ||
| 223 | } | ||
| 224 | |||
| 225 | static inline unsigned long *fpd_regaddr(struct fpustate *f, | ||
| 226 | unsigned int insn_regnum) | ||
| 227 | { | ||
| 228 | insn_regnum = (((insn_regnum & 1) << 5) | | ||
| 229 | (insn_regnum & 0x1e)); | ||
| 230 | |||
| 231 | return (unsigned long *) &f->regs[insn_regnum]; | ||
| 232 | } | ||
| 233 | |||
| 234 | static inline unsigned int fps_regval(struct fpustate *f, | ||
| 235 | unsigned int insn_regnum) | ||
| 236 | { | ||
| 237 | return f->regs[insn_regnum]; | ||
| 238 | } | ||
| 239 | |||
| 240 | static inline unsigned int *fps_regaddr(struct fpustate *f, | ||
| 241 | unsigned int insn_regnum) | ||
| 242 | { | ||
| 243 | return &f->regs[insn_regnum]; | ||
| 244 | } | ||
| 245 | |||
| 246 | struct edge_tab { | ||
| 247 | u16 left, right; | ||
| 248 | }; | ||
| 249 | struct edge_tab edge8_tab[8] = { | ||
| 250 | { 0xff, 0x80 }, | ||
| 251 | { 0x7f, 0xc0 }, | ||
| 252 | { 0x3f, 0xe0 }, | ||
| 253 | { 0x1f, 0xf0 }, | ||
| 254 | { 0x0f, 0xf8 }, | ||
| 255 | { 0x07, 0xfc }, | ||
| 256 | { 0x03, 0xfe }, | ||
| 257 | { 0x01, 0xff }, | ||
| 258 | }; | ||
| 259 | struct edge_tab edge8_tab_l[8] = { | ||
| 260 | { 0xff, 0x01 }, | ||
| 261 | { 0xfe, 0x03 }, | ||
| 262 | { 0xfc, 0x07 }, | ||
| 263 | { 0xf8, 0x0f }, | ||
| 264 | { 0xf0, 0x1f }, | ||
| 265 | { 0xe0, 0x3f }, | ||
| 266 | { 0xc0, 0x7f }, | ||
| 267 | { 0x80, 0xff }, | ||
| 268 | }; | ||
| 269 | struct edge_tab edge16_tab[4] = { | ||
| 270 | { 0xf, 0x8 }, | ||
| 271 | { 0x7, 0xc }, | ||
| 272 | { 0x3, 0xe }, | ||
| 273 | { 0x1, 0xf }, | ||
| 274 | }; | ||
| 275 | struct edge_tab edge16_tab_l[4] = { | ||
| 276 | { 0xf, 0x1 }, | ||
| 277 | { 0xe, 0x3 }, | ||
| 278 | { 0xc, 0x7 }, | ||
| 279 | { 0x8, 0xf }, | ||
| 280 | }; | ||
| 281 | struct edge_tab edge32_tab[2] = { | ||
| 282 | { 0x3, 0x2 }, | ||
| 283 | { 0x1, 0x3 }, | ||
| 284 | }; | ||
| 285 | struct edge_tab edge32_tab_l[2] = { | ||
| 286 | { 0x3, 0x1 }, | ||
| 287 | { 0x2, 0x3 }, | ||
| 288 | }; | ||
| 289 | |||
| 290 | static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) | ||
| 291 | { | ||
| 292 | unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; | ||
| 293 | u16 left, right; | ||
| 294 | |||
| 295 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | ||
| 296 | orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); | ||
| 297 | orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); | ||
| 298 | |||
| 299 | if (test_thread_flag(TIF_32BIT)) { | ||
| 300 | rs1 = rs1 & 0xffffffff; | ||
| 301 | rs2 = rs2 & 0xffffffff; | ||
| 302 | } | ||
| 303 | switch (opf) { | ||
| 304 | default: | ||
| 305 | case EDGE8_OPF: | ||
| 306 | case EDGE8N_OPF: | ||
| 307 | left = edge8_tab[rs1 & 0x7].left; | ||
| 308 | right = edge8_tab[rs2 & 0x7].right; | ||
| 309 | break; | ||
| 310 | case EDGE8L_OPF: | ||
| 311 | case EDGE8LN_OPF: | ||
| 312 | left = edge8_tab_l[rs1 & 0x7].left; | ||
| 313 | right = edge8_tab_l[rs2 & 0x7].right; | ||
| 314 | break; | ||
| 315 | |||
| 316 | case EDGE16_OPF: | ||
| 317 | case EDGE16N_OPF: | ||
| 318 | left = edge16_tab[(rs1 >> 1) & 0x3].left; | ||
| 319 | right = edge16_tab[(rs2 >> 1) & 0x3].right; | ||
| 320 | break; | ||
| 321 | |||
| 322 | case EDGE16L_OPF: | ||
| 323 | case EDGE16LN_OPF: | ||
| 324 | left = edge16_tab_l[(rs1 >> 1) & 0x3].left; | ||
| 325 | right = edge16_tab_l[(rs2 >> 1) & 0x3].right; | ||
| 326 | break; | ||
| 327 | |||
| 328 | case EDGE32_OPF: | ||
| 329 | case EDGE32N_OPF: | ||
| 330 | left = edge32_tab[(rs1 >> 2) & 0x1].left; | ||
| 331 | right = edge32_tab[(rs2 >> 2) & 0x1].right; | ||
| 332 | break; | ||
| 333 | |||
| 334 | case EDGE32L_OPF: | ||
| 335 | case EDGE32LN_OPF: | ||
| 336 | left = edge32_tab_l[(rs1 >> 2) & 0x1].left; | ||
| 337 | right = edge32_tab_l[(rs2 >> 2) & 0x1].right; | ||
| 338 | break; | ||
| 339 | }; | ||
| 340 | |||
| 341 | if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) | ||
| 342 | rd_val = right & left; | ||
| 343 | else | ||
| 344 | rd_val = left; | ||
| 345 | |||
| 346 | store_reg(regs, rd_val, RD(insn)); | ||
| 347 | |||
| 348 | switch (opf) { | ||
| 349 | case EDGE8_OPF: | ||
| 350 | case EDGE8L_OPF: | ||
| 351 | case EDGE16_OPF: | ||
| 352 | case EDGE16L_OPF: | ||
| 353 | case EDGE32_OPF: | ||
| 354 | case EDGE32L_OPF: { | ||
| 355 | unsigned long ccr, tstate; | ||
| 356 | |||
| 357 | __asm__ __volatile__("subcc %1, %2, %%g0\n\t" | ||
| 358 | "rd %%ccr, %0" | ||
| 359 | : "=r" (ccr) | ||
| 360 | : "r" (orig_rs1), "r" (orig_rs2) | ||
| 361 | : "cc"); | ||
| 362 | tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); | ||
| 363 | regs->tstate = tstate | (ccr << 32UL); | ||
| 364 | } | ||
| 365 | }; | ||
| 366 | } | ||
| 367 | |||
| 368 | static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) | ||
| 369 | { | ||
| 370 | unsigned long rs1, rs2, rd_val; | ||
| 371 | unsigned int bits, bits_mask; | ||
| 372 | |||
| 373 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | ||
| 374 | rs1 = fetch_reg(RS1(insn), regs); | ||
| 375 | rs2 = fetch_reg(RS2(insn), regs); | ||
| 376 | |||
| 377 | bits = (rs2 > 5 ? 5 : rs2); | ||
| 378 | bits_mask = (1UL << bits) - 1UL; | ||
| 379 | |||
| 380 | rd_val = ((((rs1 >> 11) & 0x3) << 0) | | ||
| 381 | (((rs1 >> 33) & 0x3) << 2) | | ||
| 382 | (((rs1 >> 55) & 0x1) << 4) | | ||
| 383 | (((rs1 >> 13) & 0xf) << 5) | | ||
| 384 | (((rs1 >> 35) & 0xf) << 9) | | ||
| 385 | (((rs1 >> 56) & 0xf) << 13) | | ||
| 386 | (((rs1 >> 17) & bits_mask) << 17) | | ||
| 387 | (((rs1 >> 39) & bits_mask) << (17 + bits)) | | ||
| 388 | (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); | ||
| 389 | |||
| 390 | switch (opf) { | ||
| 391 | case ARRAY16_OPF: | ||
| 392 | rd_val <<= 1; | ||
| 393 | break; | ||
| 394 | |||
| 395 | case ARRAY32_OPF: | ||
| 396 | rd_val <<= 2; | ||
| 397 | }; | ||
| 398 | |||
| 399 | store_reg(regs, rd_val, RD(insn)); | ||
| 400 | } | ||
| 401 | |||
| 402 | static void bmask(struct pt_regs *regs, unsigned int insn) | ||
| 403 | { | ||
| 404 | unsigned long rs1, rs2, rd_val, gsr; | ||
| 405 | |||
| 406 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | ||
| 407 | rs1 = fetch_reg(RS1(insn), regs); | ||
| 408 | rs2 = fetch_reg(RS2(insn), regs); | ||
| 409 | rd_val = rs1 + rs2; | ||
| 410 | |||
| 411 | store_reg(regs, rd_val, RD(insn)); | ||
| 412 | |||
| 413 | gsr = current_thread_info()->gsr[0] & 0xffffffff; | ||
| 414 | gsr |= rd_val << 32UL; | ||
| 415 | current_thread_info()->gsr[0] = gsr; | ||
| 416 | } | ||
| 417 | |||
| 418 | static void bshuffle(struct pt_regs *regs, unsigned int insn) | ||
| 419 | { | ||
| 420 | struct fpustate *f = FPUSTATE; | ||
| 421 | unsigned long rs1, rs2, rd_val; | ||
| 422 | unsigned long bmask, i; | ||
| 423 | |||
| 424 | bmask = current_thread_info()->gsr[0] >> 32UL; | ||
| 425 | |||
| 426 | rs1 = fpd_regval(f, RS1(insn)); | ||
| 427 | rs2 = fpd_regval(f, RS2(insn)); | ||
| 428 | |||
| 429 | rd_val = 0UL; | ||
| 430 | for (i = 0; i < 8; i++) { | ||
| 431 | unsigned long which = (bmask >> (i * 4)) & 0xf; | ||
| 432 | unsigned long byte; | ||
| 433 | |||
| 434 | if (which < 8) | ||
| 435 | byte = (rs1 >> (which * 8)) & 0xff; | ||
| 436 | else | ||
| 437 | byte = (rs2 >> ((which-8)*8)) & 0xff; | ||
| 438 | rd_val |= (byte << (i * 8)); | ||
| 439 | } | ||
| 440 | |||
| 441 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 442 | } | ||
| 443 | |||
| 444 | static void pdist(struct pt_regs *regs, unsigned int insn) | ||
| 445 | { | ||
| 446 | struct fpustate *f = FPUSTATE; | ||
| 447 | unsigned long rs1, rs2, *rd, rd_val; | ||
| 448 | unsigned long i; | ||
| 449 | |||
| 450 | rs1 = fpd_regval(f, RS1(insn)); | ||
| 451 | rs2 = fpd_regval(f, RS1(insn)); | ||
| 452 | rd = fpd_regaddr(f, RD(insn)); | ||
| 453 | |||
| 454 | rd_val = *rd; | ||
| 455 | |||
| 456 | for (i = 0; i < 8; i++) { | ||
| 457 | s16 s1, s2; | ||
| 458 | |||
| 459 | s1 = (rs1 >> (56 - (i * 8))) & 0xff; | ||
| 460 | s2 = (rs2 >> (56 - (i * 8))) & 0xff; | ||
| 461 | |||
| 462 | /* Absolute value of difference. */ | ||
| 463 | s1 -= s2; | ||
| 464 | if (s1 < 0) | ||
| 465 | s1 = ~s1 + 1; | ||
| 466 | |||
| 467 | rd_val += s1; | ||
| 468 | } | ||
| 469 | |||
| 470 | *rd = rd_val; | ||
| 471 | } | ||
| 472 | |||
| 473 | static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) | ||
| 474 | { | ||
| 475 | struct fpustate *f = FPUSTATE; | ||
| 476 | unsigned long rs1, rs2, gsr, scale, rd_val; | ||
| 477 | |||
| 478 | gsr = current_thread_info()->gsr[0]; | ||
| 479 | scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); | ||
| 480 | switch (opf) { | ||
| 481 | case FPACK16_OPF: { | ||
| 482 | unsigned long byte; | ||
| 483 | |||
| 484 | rs2 = fpd_regval(f, RS2(insn)); | ||
| 485 | rd_val = 0; | ||
| 486 | for (byte = 0; byte < 4; byte++) { | ||
| 487 | unsigned int val; | ||
| 488 | s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; | ||
| 489 | int scaled = src << scale; | ||
| 490 | int from_fixed = scaled >> 7; | ||
| 491 | |||
| 492 | val = ((from_fixed < 0) ? | ||
| 493 | 0 : | ||
| 494 | (from_fixed > 255) ? | ||
| 495 | 255 : from_fixed); | ||
| 496 | |||
| 497 | rd_val |= (val << (8 * byte)); | ||
| 498 | } | ||
| 499 | *fps_regaddr(f, RD(insn)) = rd_val; | ||
| 500 | break; | ||
| 501 | } | ||
| 502 | |||
| 503 | case FPACK32_OPF: { | ||
| 504 | unsigned long word; | ||
| 505 | |||
| 506 | rs1 = fpd_regval(f, RS1(insn)); | ||
| 507 | rs2 = fpd_regval(f, RS2(insn)); | ||
| 508 | rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); | ||
| 509 | for (word = 0; word < 2; word++) { | ||
| 510 | unsigned long val; | ||
| 511 | s32 src = (rs2 >> (word * 32UL)); | ||
| 512 | s64 scaled = src << scale; | ||
| 513 | s64 from_fixed = scaled >> 23; | ||
| 514 | |||
| 515 | val = ((from_fixed < 0) ? | ||
| 516 | 0 : | ||
| 517 | (from_fixed > 255) ? | ||
| 518 | 255 : from_fixed); | ||
| 519 | |||
| 520 | rd_val |= (val << (32 * word)); | ||
| 521 | } | ||
| 522 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 523 | break; | ||
| 524 | } | ||
| 525 | |||
| 526 | case FPACKFIX_OPF: { | ||
| 527 | unsigned long word; | ||
| 528 | |||
| 529 | rs2 = fpd_regval(f, RS2(insn)); | ||
| 530 | |||
| 531 | rd_val = 0; | ||
| 532 | for (word = 0; word < 2; word++) { | ||
| 533 | long val; | ||
| 534 | s32 src = (rs2 >> (word * 32UL)); | ||
| 535 | s64 scaled = src << scale; | ||
| 536 | s64 from_fixed = scaled >> 16; | ||
| 537 | |||
| 538 | val = ((from_fixed < -32768) ? | ||
| 539 | -32768 : | ||
| 540 | (from_fixed > 32767) ? | ||
| 541 | 32767 : from_fixed); | ||
| 542 | |||
| 543 | rd_val |= ((val & 0xffff) << (word * 16)); | ||
| 544 | } | ||
| 545 | *fps_regaddr(f, RD(insn)) = rd_val; | ||
| 546 | break; | ||
| 547 | } | ||
| 548 | |||
| 549 | case FEXPAND_OPF: { | ||
| 550 | unsigned long byte; | ||
| 551 | |||
| 552 | rs2 = fps_regval(f, RS2(insn)); | ||
| 553 | |||
| 554 | rd_val = 0; | ||
| 555 | for (byte = 0; byte < 4; byte++) { | ||
| 556 | unsigned long val; | ||
| 557 | u8 src = (rs2 >> (byte * 8)) & 0xff; | ||
| 558 | |||
| 559 | val = src << 4; | ||
| 560 | |||
| 561 | rd_val |= (val << (byte * 16)); | ||
| 562 | } | ||
| 563 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 564 | break; | ||
| 565 | } | ||
| 566 | |||
| 567 | case FPMERGE_OPF: { | ||
| 568 | rs1 = fps_regval(f, RS1(insn)); | ||
| 569 | rs2 = fps_regval(f, RS2(insn)); | ||
| 570 | |||
| 571 | rd_val = (((rs2 & 0x000000ff) << 0) | | ||
| 572 | ((rs1 & 0x000000ff) << 8) | | ||
| 573 | ((rs2 & 0x0000ff00) << 8) | | ||
| 574 | ((rs1 & 0x0000ff00) << 16) | | ||
| 575 | ((rs2 & 0x00ff0000) << 16) | | ||
| 576 | ((rs1 & 0x00ff0000) << 24) | | ||
| 577 | ((rs2 & 0xff000000) << 24) | | ||
| 578 | ((rs1 & 0xff000000) << 32)); | ||
| 579 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 580 | break; | ||
| 581 | } | ||
| 582 | }; | ||
| 583 | } | ||
| 584 | |||
| 585 | static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) | ||
| 586 | { | ||
| 587 | struct fpustate *f = FPUSTATE; | ||
| 588 | unsigned long rs1, rs2, rd_val; | ||
| 589 | |||
| 590 | switch (opf) { | ||
| 591 | case FMUL8x16_OPF: { | ||
| 592 | unsigned long byte; | ||
| 593 | |||
| 594 | rs1 = fps_regval(f, RS1(insn)); | ||
| 595 | rs2 = fpd_regval(f, RS2(insn)); | ||
| 596 | |||
| 597 | rd_val = 0; | ||
| 598 | for (byte = 0; byte < 4; byte++) { | ||
| 599 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | ||
| 600 | s16 src2 = (rs2 >> (byte * 16)) & 0xffff; | ||
| 601 | u32 prod = src1 * src2; | ||
| 602 | u16 scaled = ((prod & 0x00ffff00) >> 8); | ||
| 603 | |||
| 604 | /* Round up. */ | ||
| 605 | if (prod & 0x80) | ||
| 606 | scaled++; | ||
| 607 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | ||
| 608 | } | ||
| 609 | |||
| 610 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 611 | break; | ||
| 612 | } | ||
| 613 | |||
| 614 | case FMUL8x16AU_OPF: | ||
| 615 | case FMUL8x16AL_OPF: { | ||
| 616 | unsigned long byte; | ||
| 617 | s16 src2; | ||
| 618 | |||
| 619 | rs1 = fps_regval(f, RS1(insn)); | ||
| 620 | rs2 = fps_regval(f, RS2(insn)); | ||
| 621 | |||
| 622 | rd_val = 0; | ||
| 623 | src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0); | ||
| 624 | for (byte = 0; byte < 4; byte++) { | ||
| 625 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | ||
| 626 | u32 prod = src1 * src2; | ||
| 627 | u16 scaled = ((prod & 0x00ffff00) >> 8); | ||
| 628 | |||
| 629 | /* Round up. */ | ||
| 630 | if (prod & 0x80) | ||
| 631 | scaled++; | ||
| 632 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | ||
| 633 | } | ||
| 634 | |||
| 635 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 636 | break; | ||
| 637 | } | ||
| 638 | |||
| 639 | case FMUL8SUx16_OPF: | ||
| 640 | case FMUL8ULx16_OPF: { | ||
| 641 | unsigned long byte, ushift; | ||
| 642 | |||
| 643 | rs1 = fpd_regval(f, RS1(insn)); | ||
| 644 | rs2 = fpd_regval(f, RS2(insn)); | ||
| 645 | |||
| 646 | rd_val = 0; | ||
| 647 | ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; | ||
| 648 | for (byte = 0; byte < 4; byte++) { | ||
| 649 | u16 src1; | ||
| 650 | s16 src2; | ||
| 651 | u32 prod; | ||
| 652 | u16 scaled; | ||
| 653 | |||
| 654 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | ||
| 655 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | ||
| 656 | prod = src1 * src2; | ||
| 657 | scaled = ((prod & 0x00ffff00) >> 8); | ||
| 658 | |||
| 659 | /* Round up. */ | ||
| 660 | if (prod & 0x80) | ||
| 661 | scaled++; | ||
| 662 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | ||
| 663 | } | ||
| 664 | |||
| 665 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 666 | break; | ||
| 667 | } | ||
| 668 | |||
| 669 | case FMULD8SUx16_OPF: | ||
| 670 | case FMULD8ULx16_OPF: { | ||
| 671 | unsigned long byte, ushift; | ||
| 672 | |||
| 673 | rs1 = fps_regval(f, RS1(insn)); | ||
| 674 | rs2 = fps_regval(f, RS2(insn)); | ||
| 675 | |||
| 676 | rd_val = 0; | ||
| 677 | ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; | ||
| 678 | for (byte = 0; byte < 2; byte++) { | ||
| 679 | u16 src1; | ||
| 680 | s16 src2; | ||
| 681 | u32 prod; | ||
| 682 | u16 scaled; | ||
| 683 | |||
| 684 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | ||
| 685 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | ||
| 686 | prod = src1 * src2; | ||
| 687 | scaled = ((prod & 0x00ffff00) >> 8); | ||
| 688 | |||
| 689 | /* Round up. */ | ||
| 690 | if (prod & 0x80) | ||
| 691 | scaled++; | ||
| 692 | rd_val |= ((scaled & 0xffffUL) << | ||
| 693 | ((byte * 32UL) + 7UL)); | ||
| 694 | } | ||
| 695 | *fpd_regaddr(f, RD(insn)) = rd_val; | ||
| 696 | break; | ||
| 697 | } | ||
| 698 | }; | ||
| 699 | } | ||
| 700 | |||
| 701 | static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) | ||
| 702 | { | ||
| 703 | struct fpustate *f = FPUSTATE; | ||
| 704 | unsigned long rs1, rs2, rd_val, i; | ||
| 705 | |||
| 706 | rs1 = fpd_regval(f, RS1(insn)); | ||
| 707 | rs2 = fpd_regval(f, RS2(insn)); | ||
| 708 | |||
| 709 | rd_val = 0; | ||
| 710 | |||
| 711 | switch (opf) { | ||
| 712 | case FCMPGT16_OPF: | ||
| 713 | for (i = 0; i < 4; i++) { | ||
| 714 | s16 a = (rs1 >> (i * 16)) & 0xffff; | ||
| 715 | s16 b = (rs2 >> (i * 16)) & 0xffff; | ||
| 716 | |||
| 717 | if (a > b) | ||
| 718 | rd_val |= 1 << i; | ||
| 719 | } | ||
| 720 | break; | ||
| 721 | |||
| 722 | case FCMPGT32_OPF: | ||
| 723 | for (i = 0; i < 2; i++) { | ||
| 724 | s32 a = (rs1 >> (i * 32)) & 0xffff; | ||
| 725 | s32 b = (rs2 >> (i * 32)) & 0xffff; | ||
| 726 | |||
| 727 | if (a > b) | ||
| 728 | rd_val |= 1 << i; | ||
| 729 | } | ||
| 730 | break; | ||
| 731 | |||
| 732 | case FCMPLE16_OPF: | ||
| 733 | for (i = 0; i < 4; i++) { | ||
| 734 | s16 a = (rs1 >> (i * 16)) & 0xffff; | ||
| 735 | s16 b = (rs2 >> (i * 16)) & 0xffff; | ||
| 736 | |||
| 737 | if (a <= b) | ||
| 738 | rd_val |= 1 << i; | ||
| 739 | } | ||
| 740 | break; | ||
| 741 | |||
| 742 | case FCMPLE32_OPF: | ||
| 743 | for (i = 0; i < 2; i++) { | ||
| 744 | s32 a = (rs1 >> (i * 32)) & 0xffff; | ||
| 745 | s32 b = (rs2 >> (i * 32)) & 0xffff; | ||
| 746 | |||
| 747 | if (a <= b) | ||
| 748 | rd_val |= 1 << i; | ||
| 749 | } | ||
| 750 | break; | ||
| 751 | |||
| 752 | case FCMPNE16_OPF: | ||
| 753 | for (i = 0; i < 4; i++) { | ||
| 754 | s16 a = (rs1 >> (i * 16)) & 0xffff; | ||
| 755 | s16 b = (rs2 >> (i * 16)) & 0xffff; | ||
| 756 | |||
| 757 | if (a != b) | ||
| 758 | rd_val |= 1 << i; | ||
| 759 | } | ||
| 760 | break; | ||
| 761 | |||
| 762 | case FCMPNE32_OPF: | ||
| 763 | for (i = 0; i < 2; i++) { | ||
| 764 | s32 a = (rs1 >> (i * 32)) & 0xffff; | ||
| 765 | s32 b = (rs2 >> (i * 32)) & 0xffff; | ||
| 766 | |||
| 767 | if (a != b) | ||
| 768 | rd_val |= 1 << i; | ||
| 769 | } | ||
| 770 | break; | ||
| 771 | |||
| 772 | case FCMPEQ16_OPF: | ||
| 773 | for (i = 0; i < 4; i++) { | ||
| 774 | s16 a = (rs1 >> (i * 16)) & 0xffff; | ||
| 775 | s16 b = (rs2 >> (i * 16)) & 0xffff; | ||
| 776 | |||
| 777 | if (a == b) | ||
| 778 | rd_val |= 1 << i; | ||
| 779 | } | ||
| 780 | break; | ||
| 781 | |||
| 782 | case FCMPEQ32_OPF: | ||
| 783 | for (i = 0; i < 2; i++) { | ||
| 784 | s32 a = (rs1 >> (i * 32)) & 0xffff; | ||
| 785 | s32 b = (rs2 >> (i * 32)) & 0xffff; | ||
| 786 | |||
| 787 | if (a == b) | ||
| 788 | rd_val |= 1 << i; | ||
| 789 | } | ||
| 790 | break; | ||
| 791 | }; | ||
| 792 | |||
| 793 | maybe_flush_windows(0, 0, RD(insn), 0); | ||
| 794 | store_reg(regs, rd_val, RD(insn)); | ||
| 795 | } | ||
| 796 | |||
| 797 | /* Emulate the VIS instructions which are not implemented in | ||
| 798 | * hardware on Niagara. | ||
| 799 | */ | ||
| 800 | int vis_emul(struct pt_regs *regs, unsigned int insn) | ||
| 801 | { | ||
| 802 | unsigned long pc = regs->tpc; | ||
| 803 | unsigned int opf; | ||
| 804 | |||
| 805 | BUG_ON(regs->tstate & TSTATE_PRIV); | ||
| 806 | |||
| 807 | if (test_thread_flag(TIF_32BIT)) | ||
| 808 | pc = (u32)pc; | ||
| 809 | |||
| 810 | if (get_user(insn, (u32 __user *) pc)) | ||
| 811 | return -EFAULT; | ||
| 812 | |||
| 813 | if ((insn & VIS_OPCODE_MASK) != VIS_OPCODE_VAL) | ||
| 814 | return -EINVAL; | ||
| 815 | |||
| 816 | opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; | ||
| 817 | switch (opf) { | ||
| 818 | default: | ||
| 819 | return -EINVAL; | ||
| 820 | |||
| 821 | /* Pixel Formatting Instructions. */ | ||
| 822 | case FPACK16_OPF: | ||
| 823 | case FPACK32_OPF: | ||
| 824 | case FPACKFIX_OPF: | ||
| 825 | case FEXPAND_OPF: | ||
| 826 | case FPMERGE_OPF: | ||
| 827 | pformat(regs, insn, opf); | ||
| 828 | break; | ||
| 829 | |||
| 830 | /* Partitioned Multiply Instructions */ | ||
| 831 | case FMUL8x16_OPF: | ||
| 832 | case FMUL8x16AU_OPF: | ||
| 833 | case FMUL8x16AL_OPF: | ||
| 834 | case FMUL8SUx16_OPF: | ||
| 835 | case FMUL8ULx16_OPF: | ||
| 836 | case FMULD8SUx16_OPF: | ||
| 837 | case FMULD8ULx16_OPF: | ||
| 838 | pmul(regs, insn, opf); | ||
| 839 | break; | ||
| 840 | |||
| 841 | /* Pixel Compare Instructions */ | ||
| 842 | case FCMPGT16_OPF: | ||
| 843 | case FCMPGT32_OPF: | ||
| 844 | case FCMPLE16_OPF: | ||
| 845 | case FCMPLE32_OPF: | ||
| 846 | case FCMPNE16_OPF: | ||
| 847 | case FCMPNE32_OPF: | ||
| 848 | case FCMPEQ16_OPF: | ||
| 849 | case FCMPEQ32_OPF: | ||
| 850 | pcmp(regs, insn, opf); | ||
| 851 | break; | ||
| 852 | |||
| 853 | /* Edge Handling Instructions */ | ||
| 854 | case EDGE8_OPF: | ||
| 855 | case EDGE8N_OPF: | ||
| 856 | case EDGE8L_OPF: | ||
| 857 | case EDGE8LN_OPF: | ||
| 858 | case EDGE16_OPF: | ||
| 859 | case EDGE16N_OPF: | ||
| 860 | case EDGE16L_OPF: | ||
| 861 | case EDGE16LN_OPF: | ||
| 862 | case EDGE32_OPF: | ||
| 863 | case EDGE32N_OPF: | ||
| 864 | case EDGE32L_OPF: | ||
| 865 | case EDGE32LN_OPF: | ||
| 866 | edge(regs, insn, opf); | ||
| 867 | break; | ||
| 868 | |||
| 869 | /* Pixel Component Distance */ | ||
| 870 | case PDIST_OPF: | ||
| 871 | pdist(regs, insn); | ||
| 872 | break; | ||
| 873 | |||
| 874 | /* Three-Dimensional Array Addressing Instructions */ | ||
| 875 | case ARRAY8_OPF: | ||
| 876 | case ARRAY16_OPF: | ||
| 877 | case ARRAY32_OPF: | ||
| 878 | array(regs, insn, opf); | ||
| 879 | break; | ||
| 880 | |||
| 881 | /* Byte Mask and Shuffle Instructions */ | ||
| 882 | case BMASK_OPF: | ||
| 883 | bmask(regs, insn); | ||
| 884 | break; | ||
| 885 | |||
| 886 | case BSHUFFLE_OPF: | ||
| 887 | bshuffle(regs, insn); | ||
| 888 | break; | ||
| 889 | }; | ||
| 890 | |||
| 891 | regs->tpc = regs->tnpc; | ||
| 892 | regs->tnpc += 4; | ||
| 893 | return 0; | ||
| 894 | } | ||
