1 files changed, 656 insertions, 0 deletions
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
new file mode 100644
index 000000000000..266aae123632
--- /dev/null
+++ b/arch/tile/kernel/single_step.c
@@ -0,0 +1,656 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * A code-rewriter that enables instruction single-stepping.
+ * Derived from iLib's single-stepping code.
+ */
+#ifndef __tilegx__   /* No support for single-step yet. */
+/* These functions are only used on the TILE platform */
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <asm/cacheflush.h>
+#include <asm/opcode-tile.h>
+#include <asm/opcode_constants.h>
+#include <arch/abi.h>
+#define signExtend17(val) sign_extend((val), 17)
+#define TILE_X1_MASK (0xffffffffULL << 31)
+int unaligned_printk;
+static int __init setup_unaligned_printk(char *str)
+{
+        long val;
+        if (strict_strtol(str, 0, &val) != 0)
+                return 0;
+        unaligned_printk = val;
+        printk("Printk for each unaligned data accesses is %s\n",
+               unaligned_printk ? "enabled" : "disabled");
+        return 1;
+}
+__setup("unaligned_printk=", setup_unaligned_printk);
+unsigned int unaligned_fixup_count;
+enum mem_op {
+        MEMOP_NONE,
+        MEMOP_LOAD,
+        MEMOP_STORE,
+        MEMOP_LOAD_POSTINCR,
+        MEMOP_STORE_POSTINCR
+};
+static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset)
+{
+        tile_bundle_bits result;
+        /* mask out the old offset */
+        tile_bundle_bits mask = create_BrOff_X1(-1);
+        result = n & (~mask);
+        /* or in the new offset */
+        result |= create_BrOff_X1(offset);
+        return result;
+}
+static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
+{
+        tile_bundle_bits result;
+        tile_bundle_bits op;
+        result = n & (~TILE_X1_MASK);
+        op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) |
+                create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) |
+                create_Dest_X1(dest) |
+                create_SrcB_X1(TREG_ZERO) |
+                create_SrcA_X1(src) ;
+        result |= op;
+        return result;
+}
+static inline tile_bundle_bits nop_X1(tile_bundle_bits n)
+{
+        return move_X1(n, TREG_ZERO, TREG_ZERO);
+}
+static inline tile_bundle_bits addi_X1(
+        tile_bundle_bits n, int dest, int src, int imm)
+{
+        n &= ~TILE_X1_MASK;
+        n |=  (create_SrcA_X1(src) |
+               create_Dest_X1(dest) |
+               create_Imm8_X1(imm) |
+               create_S_X1(0) |
+               create_Opcode_X1(IMM_0_OPCODE_X1) |
+               create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1));
+        return n;
+}
+static tile_bundle_bits rewrite_load_store_unaligned(
+        struct single_step_state *state,
+        tile_bundle_bits bundle,
+        struct pt_regs *regs,
+        enum mem_op mem_op,
+        int size, int sign_ext)
+{
+        unsigned char *addr;
+        int val_reg, addr_reg, err, val;
+        /* Get address and value registers */
+        if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) {
+                addr_reg = get_SrcA_Y2(bundle);
+                val_reg = get_SrcBDest_Y2(bundle);
+        } else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
+                addr_reg = get_SrcA_X1(bundle);
+                val_reg  = get_Dest_X1(bundle);
+        } else {
+                addr_reg = get_SrcA_X1(bundle);
+                val_reg  = get_SrcB_X1(bundle);
+        }
+        /*
+         * If registers are not GPRs, don't try to handle it.
+         *
+         * FIXME: we could handle non-GPR loads by getting the real value
+         * from memory, writing it to the single step buffer, using a
+         * temp_reg to hold a pointer to that memory, then executing that
+         * instruction and resetting temp_reg.  For non-GPR stores, it's a
+         * little trickier; we could use the single step buffer for that
+         * too, but we'd have to add some more state bits so that we could
+         * call back in here to copy that value to the real target.  For
+         * now, we just handle the simple case.
+         */
+        if ((val_reg >= PTREGS_NR_GPRS &&
+             (val_reg != TREG_ZERO ||
+              mem_op == MEMOP_LOAD ||
+              mem_op == MEMOP_LOAD_POSTINCR)) ||
+            addr_reg >= PTREGS_NR_GPRS)
+                return bundle;
+        /* If it's aligned, don't handle it specially */
+        addr = (void *)regs->regs[addr_reg];
+        if (((unsigned long)addr % size) == 0)
+                return bundle;
+#ifndef __LITTLE_ENDIAN
+# error We assume little-endian representation with copy_xx_user size 2 here
+#endif
+        /* Handle unaligned load/store */
+        if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
+                unsigned short val_16;
+                switch (size) {
+                case 2:
+                        err = copy_from_user(&val_16, addr, sizeof(val_16));
+                        val = sign_ext ? ((short)val_16) : val_16;
+                        break;
+                case 4:
+                        err = copy_from_user(&val, addr, sizeof(val));
+                        break;
+                default:
+                        BUG();
+                }
+                if (err == 0) {
+                        state->update_reg = val_reg;
+                        state->update_value = val;
+                        state->update = 1;
+                }
+        } else {
+                val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg];
+                err = copy_to_user(addr, &val, size);
+        }
+        if (err) {
+                siginfo_t info = {
+                        .si_signo = SIGSEGV,
+                        .si_code = SEGV_MAPERR,
+                        .si_addr = (void __user *)addr
+                };
+                force_sig_info(info.si_signo, &info, current);
+                return (tile_bundle_bits) 0;
+        }
+        if (unaligned_fixup == 0) {
+                siginfo_t info = {
+                        .si_signo = SIGBUS,
+                        .si_code = BUS_ADRALN,
+                        .si_addr = (void __user *)addr
+                };
+                force_sig_info(info.si_signo, &info, current);
+                return (tile_bundle_bits) 0;
+        }
+        if (unaligned_printk || unaligned_fixup_count == 0) {
+                printk("Process %d/%s: PC %#lx: Fixup of"
+                       " unaligned %s at %#lx.\n",
+                       current->pid, current->comm, regs->pc,
+                       (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) ?
+                         "load" : "store",
+                       (unsigned long)addr);
+                if (!unaligned_printk) {
+                        printk("\n"
+"Unaligned fixups in the kernel will slow your application considerably.\n"
+"You can find them by writing \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n"
+"which requests the kernel show all unaligned fixups, or writing a \"0\"\n"
+"to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n"
+"access will become a SIGBUS you can debug. No further warnings will be\n"
+"shown so as to avoid additional slowdown, but you can track the number\n"
+"of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n"
+"Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n"
+                                "\n");
+                }
+        }
+        ++unaligned_fixup_count;
+        if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) {
+                /* Convert the Y2 instruction to a prefetch. */
+                bundle &= ~(create_SrcBDest_Y2(-1) |
+                            create_Opcode_Y2(-1));
+                bundle |= (create_SrcBDest_Y2(TREG_ZERO) |
+                           create_Opcode_Y2(LW_OPCODE_Y2));
+        /* Replace the load postincr with an addi */
+        } else if (mem_op == MEMOP_LOAD_POSTINCR) {
+                bundle = addi_X1(bundle, addr_reg, addr_reg,
+                                 get_Imm8_X1(bundle));
+        /* Replace the store postincr with an addi */
+        } else if (mem_op == MEMOP_STORE_POSTINCR) {
+                bundle = addi_X1(bundle, addr_reg, addr_reg,
+                                 get_Dest_Imm8_X1(bundle));
+        } else {
+                /* Convert the X1 instruction to a nop. */
+                bundle &= ~(create_Opcode_X1(-1) |
+                            create_UnShOpcodeExtension_X1(-1) |
+                            create_UnOpcodeExtension_X1(-1));
+                bundle |= (create_Opcode_X1(SHUN_0_OPCODE_X1) |
+                           create_UnShOpcodeExtension_X1(
+                                   UN_0_SHUN_0_OPCODE_X1) |
+                           create_UnOpcodeExtension_X1(
+                                   NOP_UN_0_SHUN_0_OPCODE_X1));
+        }
+        return bundle;
+}
+/**
+ * single_step_once() - entry point when single stepping has been triggered.
+ * @regs: The machine register state
+ *
+ *  When we arrive at this routine via a trampoline, the single step
+ *  engine copies the executing bundle to the single step buffer.
+ *  If the instruction is a condition branch, then the target is
+ *  reset to one past the next instruction. If the instruction
+ *  sets the lr, then that is noted. If the instruction is a jump
+ *  or call, then the new target pc is preserved and the current
+ *  bundle instruction set to null.
+ *
+ *  The necessary post-single-step rewriting information is stored in
+ *  single_step_state->  We use data segment values because the
+ *  stack will be rewound when we run the rewritten single-stepped
+ *  instruction.
+ */
+void single_step_once(struct pt_regs *regs)
+{
+        extern tile_bundle_bits __single_step_ill_insn;
+        extern tile_bundle_bits __single_step_j_insn;
+        extern tile_bundle_bits __single_step_addli_insn;
+        extern tile_bundle_bits __single_step_auli_insn;
+        struct thread_info *info = (void *)current_thread_info();
+        struct single_step_state *state = info->step_state;
+        int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
+        tile_bundle_bits *buffer, *pc;
+        tile_bundle_bits bundle;
+        int temp_reg;
+        int target_reg = TREG_LR;
+        int err;
+        enum mem_op mem_op = MEMOP_NONE;
+        int size = 0, sign_ext = 0;  /* happy compiler */
+        asm(
+"    .pushsection .rodata.single_step\n"
+"    .align 8\n"
+"    .globl    __single_step_ill_insn\n"
+"__single_step_ill_insn:\n"
+"    ill\n"
+"    .globl    __single_step_addli_insn\n"
+"__single_step_addli_insn:\n"
+"    { nop; addli r0, zero, 0 }\n"
+"    .globl    __single_step_auli_insn\n"
+"__single_step_auli_insn:\n"
+"    { nop; auli r0, r0, 0 }\n"
+"    .globl    __single_step_j_insn\n"
+"__single_step_j_insn:\n"
+"    j .\n"
+"    .popsection\n"
+        );
+        if (state == NULL) {
+                /* allocate a page of writable, executable memory */
+                state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL);
+                if (state == NULL) {
+                        printk("Out of kernel memory trying to single-step\n");
+                        return;
+                }
+                /* allocate a cache line of writable, executable memory */
+                down_write(&current->mm->mmap_sem);
+                buffer = (void *) do_mmap(0, 0, 64,
+                                          PROT_EXEC | PROT_READ | PROT_WRITE,
+                                          MAP_PRIVATE | MAP_ANONYMOUS,
+                                          0);
+                up_write(&current->mm->mmap_sem);
+                if ((int)buffer < 0 && (int)buffer > -PAGE_SIZE) {
+                        kfree(state);
+                        printk("Out of kernel pages trying to single-step\n");
+                        return;
+                }
+                state->buffer = buffer;
+                state->is_enabled = 0;
+                info->step_state = state;
+                /* Validate our stored instruction patterns */
+                BUG_ON(get_Opcode_X1(__single_step_addli_insn) !=
+                       ADDLI_OPCODE_X1);
+                BUG_ON(get_Opcode_X1(__single_step_auli_insn) !=
+                       AULI_OPCODE_X1);
+                BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO);
+                BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0);
+                BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0);
+        }
+        /*
+         * If we are returning from a syscall, we still haven't hit the
+         * "ill" for the swint1 instruction.  So back the PC up to be
+         * pointing at the swint1, but we'll actually return directly
+         * back to the "ill" so we come back in via SIGILL as if we
+         * had "executed" the swint1 without ever being in kernel space.
+         */
+        if (regs->faultnum == INT_SWINT_1)
+                regs->pc -= 8;
+        pc = (tile_bundle_bits *)(regs->pc);
+        bundle = pc[0];
+        /* We'll follow the instruction with 2 ill op bundles */
+        state->orig_pc = (unsigned long) pc;
+        state->next_pc = (unsigned long)(pc + 1);
+        state->branch_next_pc = 0;
+        state->update = 0;
+        if (!(bundle & TILE_BUNDLE_Y_ENCODING_MASK)) {
+                /* two wide, check for control flow */
+                int opcode = get_Opcode_X1(bundle);
+                switch (opcode) {
+                /* branches */
+                case BRANCH_OPCODE_X1:
+                {
+                        int32_t offset = signExtend17(get_BrOff_X1(bundle));
+                        /*
+                         * For branches, we use a rewriting trick to let the
+                         * hardware evaluate whether the branch is taken or
+                         * untaken.  We record the target offset and then
+                         * rewrite the branch instruction to target 1 insn
+                         * ahead if the branch is taken.  We then follow the
+                         * rewritten branch with two bundles, each containing
+                         * an "ill" instruction. The supervisor examines the
+                         * pc after the single step code is executed, and if
+                         * the pc is the first ill instruction, then the
+                         * branch (if any) was not taken.  If the pc is the
+                         * second ill instruction, then the branch was
+                         * taken. The new pc is computed for these cases, and
+                         * inserted into the registers for the thread.  If
+                         * the pc is the start of the single step code, then
+                         * an exception or interrupt was taken before the
+                         * code started processing, and the same "original"
+                         * pc is restored.  This change, different from the
+                         * original implementation, has the advantage of
+                         * executing a single user instruction.
+                         */
+                        state->branch_next_pc = (unsigned long)(pc + offset);
+                        /* rewrite branch offset to go forward one bundle */
+                        bundle = set_BrOff_X1(bundle, 2);
+                }
+                break;
+                /* jumps */
+                case JALB_OPCODE_X1:
+                case JALF_OPCODE_X1:
+                        state->update = 1;
+                        state->next_pc =
+                                (unsigned long) (pc + get_JOffLong_X1(bundle));
+                        break;
+                case JB_OPCODE_X1:
+                case JF_OPCODE_X1:
+                        state->next_pc =
+                                (unsigned long) (pc + get_JOffLong_X1(bundle));
+                        bundle = nop_X1(bundle);
+                        break;
+                case SPECIAL_0_OPCODE_X1:
+                        switch (get_RRROpcodeExtension_X1(bundle)) {
+                        /* jump-register */
+                        case JALRP_SPECIAL_0_OPCODE_X1:
+                        case JALR_SPECIAL_0_OPCODE_X1:
+                                state->update = 1;
+                                state->next_pc =
+                                        regs->regs[get_SrcA_X1(bundle)];
+                                break;
+                        case JRP_SPECIAL_0_OPCODE_X1:
+                        case JR_SPECIAL_0_OPCODE_X1:
+                                state->next_pc =
+                                        regs->regs[get_SrcA_X1(bundle)];
+                                bundle = nop_X1(bundle);
+                                break;
+                        case LNK_SPECIAL_0_OPCODE_X1:
+                                state->update = 1;
+                                target_reg = get_Dest_X1(bundle);
+                                break;
+                        /* stores */
+                        case SH_SPECIAL_0_OPCODE_X1:
+                                mem_op = MEMOP_STORE;
+                                size = 2;
+                                break;
+                        case SW_SPECIAL_0_OPCODE_X1:
+                                mem_op = MEMOP_STORE;
+                                size = 4;
+                                break;
+                        }
+                        break;
+                /* loads and iret */
+                case SHUN_0_OPCODE_X1:
+                        if (get_UnShOpcodeExtension_X1(bundle) ==
+                            UN_0_SHUN_0_OPCODE_X1) {
+                                switch (get_UnOpcodeExtension_X1(bundle)) {
+                                case LH_UN_0_SHUN_0_OPCODE_X1:
+                                        mem_op = MEMOP_LOAD;
+                                        size = 2;
+                                        sign_ext = 1;
+                                        break;
+                                case LH_U_UN_0_SHUN_0_OPCODE_X1:
+                                        mem_op = MEMOP_LOAD;
+                                        size = 2;
+                                        sign_ext = 0;
+                                        break;
+                                case LW_UN_0_SHUN_0_OPCODE_X1:
+                                        mem_op = MEMOP_LOAD;
+                                        size = 4;
+                                        break;
+                                case IRET_UN_0_SHUN_0_OPCODE_X1:
+                                {
+                                        unsigned long ex0_0 = __insn_mfspr(
+                                                SPR_EX_CONTEXT_0_0);
+                                        unsigned long ex0_1 = __insn_mfspr(
+                                                SPR_EX_CONTEXT_0_1);
+                                        /*
+                                         * Special-case it if we're iret'ing
+                                         * to PL0 again.  Otherwise just let
+                                         * it run and it will generate SIGILL.
+                                         */
+                                        if (EX1_PL(ex0_1) == USER_PL) {
+                                                state->next_pc = ex0_0;
+                                                regs->ex1 = ex0_1;
+                                                bundle = nop_X1(bundle);
+                                        }
+                                }
+                                }
+                        }
+                        break;
+#if CHIP_HAS_WH64()
+                /* postincrement operations */
+                case IMM_0_OPCODE_X1:
+                        switch (get_ImmOpcodeExtension_X1(bundle)) {
+                        case LWADD_IMM_0_OPCODE_X1:
+                                mem_op = MEMOP_LOAD_POSTINCR;
+                                size = 4;
+                                break;
+                        case LHADD_IMM_0_OPCODE_X1:
+                                mem_op = MEMOP_LOAD_POSTINCR;
+                                size = 2;
+                                sign_ext = 1;
+                                break;
+                        case LHADD_U_IMM_0_OPCODE_X1:
+                                mem_op = MEMOP_LOAD_POSTINCR;
+                                size = 2;
+                                sign_ext = 0;
+                                break;
+                        case SWADD_IMM_0_OPCODE_X1:
+                                mem_op = MEMOP_STORE_POSTINCR;
+                                size = 4;
+                                break;
+                        case SHADD_IMM_0_OPCODE_X1:
+                                mem_op = MEMOP_STORE_POSTINCR;
+                                size = 2;
+                                break;
+                        default:
+                                break;
+                        }
+                        break;
+#endif /* CHIP_HAS_WH64() */
+                }
+                if (state->update) {
+                        /*
+                         * Get an available register.  We start with a
+                         * bitmask with 1's for available registers.
+                         * We truncate to the low 32 registers since
+                         * we are guaranteed to have set bits in the
+                         * low 32 bits, then use ctz to pick the first.
+                         */
+                        u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) |
+                                           (1ULL << get_SrcA_X0(bundle)) |
+                                           (1ULL << get_SrcB_X0(bundle)) |
+                                           (1ULL << target_reg));
+                        temp_reg = __builtin_ctz(mask);
+                        state->update_reg = temp_reg;
+                        state->update_value = regs->regs[temp_reg];
+                        regs->regs[temp_reg] = (unsigned long) (pc+1);
+                        regs->flags |= PT_FLAGS_RESTORE_REGS;
+                        bundle = move_X1(bundle, target_reg, temp_reg);
+                }
+        } else {
+                int opcode = get_Opcode_Y2(bundle);
+                switch (opcode) {
+                /* loads */
+                case LH_OPCODE_Y2:
+                        mem_op = MEMOP_LOAD;
+                        size = 2;
+                        sign_ext = 1;
+                        break;
+                case LH_U_OPCODE_Y2:
+                        mem_op = MEMOP_LOAD;
+                        size = 2;
+                        sign_ext = 0;
+                        break;
+                case LW_OPCODE_Y2:
+                        mem_op = MEMOP_LOAD;
+                        size = 4;
+                        break;
+                /* stores */
+                case SH_OPCODE_Y2:
+                        mem_op = MEMOP_STORE;
+                        size = 2;
+                        break;
+                case SW_OPCODE_Y2:
+                        mem_op = MEMOP_STORE;
+                        size = 4;
+                        break;
+                }
+        }
+        /*
+         * Check if we need to rewrite an unaligned load/store.
+         * Returning zero is a special value meaning we need to SIGSEGV.
+         */
+        if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) {
+                bundle = rewrite_load_store_unaligned(state, bundle, regs,
+                                                      mem_op, size, sign_ext);
+                if (bundle == 0)
+                        return;
+        }
+        /* write the bundle to our execution area */
+        buffer = state->buffer;
+        err = __put_user(bundle, buffer++);
+        /*
+         * If we're really single-stepping, we take an INT_ILL after.
+         * If we're just handling an unaligned access, we can just
+         * jump directly back to where we were in user code.
+         */
+        if (is_single_step) {
+                err |= __put_user(__single_step_ill_insn, buffer++);
+                err |= __put_user(__single_step_ill_insn, buffer++);
+        } else {
+                long delta;
+                if (state->update) {
+                        /* We have some state to update; do it inline */
+                        int ha16;
+                        bundle = __single_step_addli_insn;
+                        bundle |= create_Dest_X1(state->update_reg);
+                        bundle |= create_Imm16_X1(state->update_value);
+                        err |= __put_user(bundle, buffer++);
+                        bundle = __single_step_auli_insn;
+                        bundle |= create_Dest_X1(state->update_reg);
+                        bundle |= create_SrcA_X1(state->update_reg);
+                        ha16 = (state->update_value + 0x8000) >> 16;
+                        bundle |= create_Imm16_X1(ha16);
+                        err |= __put_user(bundle, buffer++);
+                        state->update = 0;
+                }
+                /* End with a jump back to the next instruction */
+                delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) -
+                        (unsigned long)buffer) >>
+                        TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
+                bundle = __single_step_j_insn;
+                bundle |= create_JOffLong_X1(delta);
+                err |= __put_user(bundle, buffer++);
+        }
+        if (err) {
+                printk("Fault when writing to single-step buffer\n");
+                return;
+        }
+        /*
+         * Flush the buffer.
+         * We do a local flush only, since this is a thread-specific buffer.
+         */
+        __flush_icache_range((unsigned long) state->buffer,
+                             (unsigned long) buffer);
+        /* Indicate enabled */
+        state->is_enabled = is_single_step;
+        regs->pc = (unsigned long) state->buffer;
+        /* Fault immediately if we are coming back from a syscall. */
+        if (regs->faultnum == INT_SWINT_1)
+                regs->pc += 8;
+}
+#endif /* !__tilegx__ */

diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c new file mode 100644 index 000000000000..266aae123632 --- /dev/null +++ b/arch/tile/kernel/single_step.c
@@ -0,0 +1,656 @@
	1	/*
	2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation, version 2.
	7	*
	8	* This program is distributed in the hope that it will be useful, but
	9	* WITHOUT ANY WARRANTY; without even the implied warranty of
	10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
	11	* NON INFRINGEMENT. See the GNU General Public License for
	12	* more details.
	13	*
	14	* A code-rewriter that enables instruction single-stepping.
	15	* Derived from iLib's single-stepping code.
	16	*/
	17
	18	#ifndef __tilegx__ /* No support for single-step yet. */
	19
	20	/* These functions are only used on the TILE platform */
	21	#include <linux/slab.h>
	22	#include <linux/thread_info.h>
	23	#include <linux/uaccess.h>
	24	#include <linux/mman.h>
	25	#include <linux/types.h>
	26	#include <asm/cacheflush.h>
	27	#include <asm/opcode-tile.h>
	28	#include <asm/opcode_constants.h>
	29	#include <arch/abi.h>
	30
	31	#define signExtend17(val) sign_extend((val), 17)
	32	#define TILE_X1_MASK (0xffffffffULL << 31)
	33
	34	int unaligned_printk;
	35
	36	static int __init setup_unaligned_printk(char *str)
	37	{
	38	long val;
	39	if (strict_strtol(str, 0, &val) != 0)
	40	return 0;
	41	unaligned_printk = val;
	42	printk("Printk for each unaligned data accesses is %s\n",
	43	unaligned_printk ? "enabled" : "disabled");
	44	return 1;
	45	}
	46	__setup("unaligned_printk=", setup_unaligned_printk);
	47
	48	unsigned int unaligned_fixup_count;
	49
	50	enum mem_op {
	51	MEMOP_NONE,
	52	MEMOP_LOAD,
	53	MEMOP_STORE,
	54	MEMOP_LOAD_POSTINCR,
	55	MEMOP_STORE_POSTINCR
	56	};
	57
	58	static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset)
	59	{
	60	tile_bundle_bits result;
	61
	62	/* mask out the old offset */
	63	tile_bundle_bits mask = create_BrOff_X1(-1);
	64	result = n & (~mask);
	65
	66	/* or in the new offset */
	67	result \|= create_BrOff_X1(offset);
	68
	69	return result;
	70	}
	71
	72	static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
	73	{
	74	tile_bundle_bits result;
	75	tile_bundle_bits op;
	76
	77	result = n & (~TILE_X1_MASK);
	78
	79	op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) \|
	80	create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) \|
	81	create_Dest_X1(dest) \|
	82	create_SrcB_X1(TREG_ZERO) \|
	83	create_SrcA_X1(src) ;
	84
	85	result \|= op;
	86	return result;
	87	}
	88
	89	static inline tile_bundle_bits nop_X1(tile_bundle_bits n)
	90	{
	91	return move_X1(n, TREG_ZERO, TREG_ZERO);
	92	}
	93
	94	static inline tile_bundle_bits addi_X1(
	95	tile_bundle_bits n, int dest, int src, int imm)
	96	{
	97	n &= ~TILE_X1_MASK;
	98
	99	n \|= (create_SrcA_X1(src) \|
	100	create_Dest_X1(dest) \|
	101	create_Imm8_X1(imm) \|
	102	create_S_X1(0) \|
	103	create_Opcode_X1(IMM_0_OPCODE_X1) \|
	104	create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1));
	105
	106	return n;
	107	}
	108
	109	static tile_bundle_bits rewrite_load_store_unaligned(
	110	struct single_step_state *state,
	111	tile_bundle_bits bundle,
	112	struct pt_regs *regs,
	113	enum mem_op mem_op,
	114	int size, int sign_ext)
	115	{
	116	unsigned char *addr;
	117	int val_reg, addr_reg, err, val;
	118
	119	/* Get address and value registers */
	120	if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) {
	121	addr_reg = get_SrcA_Y2(bundle);
	122	val_reg = get_SrcBDest_Y2(bundle);
	123	} else if (mem_op == MEMOP_LOAD \|\| mem_op == MEMOP_LOAD_POSTINCR) {
	124	addr_reg = get_SrcA_X1(bundle);
	125	val_reg = get_Dest_X1(bundle);
	126	} else {
	127	addr_reg = get_SrcA_X1(bundle);
	128	val_reg = get_SrcB_X1(bundle);
	129	}
	130
	131	/*
	132	* If registers are not GPRs, don't try to handle it.
	133	*
	134	* FIXME: we could handle non-GPR loads by getting the real value
	135	* from memory, writing it to the single step buffer, using a
	136	* temp_reg to hold a pointer to that memory, then executing that
	137	* instruction and resetting temp_reg. For non-GPR stores, it's a
	138	* little trickier; we could use the single step buffer for that
	139	* too, but we'd have to add some more state bits so that we could
	140	* call back in here to copy that value to the real target. For
	141	* now, we just handle the simple case.
	142	*/
	143	if ((val_reg >= PTREGS_NR_GPRS &&
	144	(val_reg != TREG_ZERO \|\|
	145	mem_op == MEMOP_LOAD \|\|
	146	mem_op == MEMOP_LOAD_POSTINCR)) \|\|
	147	addr_reg >= PTREGS_NR_GPRS)
	148	return bundle;
	149
	150	/* If it's aligned, don't handle it specially */
	151	addr = (void *)regs->regs[addr_reg];
	152	if (((unsigned long)addr % size) == 0)
	153	return bundle;
	154
	155	#ifndef __LITTLE_ENDIAN
	156	# error We assume little-endian representation with copy_xx_user size 2 here
	157	#endif
	158	/* Handle unaligned load/store */
	159	if (mem_op == MEMOP_LOAD \|\| mem_op == MEMOP_LOAD_POSTINCR) {
	160	unsigned short val_16;
	161	switch (size) {
	162	case 2:
	163	err = copy_from_user(&val_16, addr, sizeof(val_16));
	164	val = sign_ext ? ((short)val_16) : val_16;
	165	break;
	166	case 4:
	167	err = copy_from_user(&val, addr, sizeof(val));
	168	break;
	169	default:
	170	BUG();
	171	}
	172	if (err == 0) {
	173	state->update_reg = val_reg;
	174	state->update_value = val;
	175	state->update = 1;
	176	}
	177	} else {
	178	val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg];
	179	err = copy_to_user(addr, &val, size);
	180	}
	181
	182	if (err) {
	183	siginfo_t info = {
	184	.si_signo = SIGSEGV,
	185	.si_code = SEGV_MAPERR,
	186	.si_addr = (void __user *)addr
	187	};
	188	force_sig_info(info.si_signo, &info, current);
	189	return (tile_bundle_bits) 0;
	190	}
	191
	192	if (unaligned_fixup == 0) {
	193	siginfo_t info = {
	194	.si_signo = SIGBUS,
	195	.si_code = BUS_ADRALN,
	196	.si_addr = (void __user *)addr
	197	};
	198	force_sig_info(info.si_signo, &info, current);
	199	return (tile_bundle_bits) 0;
	200	}
	201
	202	if (unaligned_printk \|\| unaligned_fixup_count == 0) {
	203	printk("Process %d/%s: PC %#lx: Fixup of"
	204	" unaligned %s at %#lx.\n",
	205	current->pid, current->comm, regs->pc,
	206	(mem_op == MEMOP_LOAD \|\| mem_op == MEMOP_LOAD_POSTINCR) ?
	207	"load" : "store",
	208	(unsigned long)addr);
	209	if (!unaligned_printk) {
	210	printk("\n"
	211	"Unaligned fixups in the kernel will slow your application considerably.\n"
	212	"You can find them by writing \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n"
	213	"which requests the kernel show all unaligned fixups, or writing a \"0\"\n"
	214	"to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n"
	215	"access will become a SIGBUS you can debug. No further warnings will be\n"
	216	"shown so as to avoid additional slowdown, but you can track the number\n"
	217	"of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n"
	218	"Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n"
	219	"\n");
	220	}
	221	}
	222	++unaligned_fixup_count;
	223
	224	if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) {
	225	/* Convert the Y2 instruction to a prefetch. */
	226	bundle &= ~(create_SrcBDest_Y2(-1) \|
	227	create_Opcode_Y2(-1));
	228	bundle \|= (create_SrcBDest_Y2(TREG_ZERO) \|
	229	create_Opcode_Y2(LW_OPCODE_Y2));
	230	/* Replace the load postincr with an addi */
	231	} else if (mem_op == MEMOP_LOAD_POSTINCR) {
	232	bundle = addi_X1(bundle, addr_reg, addr_reg,
	233	get_Imm8_X1(bundle));
	234	/* Replace the store postincr with an addi */
	235	} else if (mem_op == MEMOP_STORE_POSTINCR) {
	236	bundle = addi_X1(bundle, addr_reg, addr_reg,
	237	get_Dest_Imm8_X1(bundle));
	238	} else {
	239	/* Convert the X1 instruction to a nop. */
	240	bundle &= ~(create_Opcode_X1(-1) \|
	241	create_UnShOpcodeExtension_X1(-1) \|
	242	create_UnOpcodeExtension_X1(-1));
	243	bundle \|= (create_Opcode_X1(SHUN_0_OPCODE_X1) \|
	244	create_UnShOpcodeExtension_X1(
	245	UN_0_SHUN_0_OPCODE_X1) \|
	246	create_UnOpcodeExtension_X1(
	247	NOP_UN_0_SHUN_0_OPCODE_X1));
	248	}
	249
	250	return bundle;
	251	}
	252
	253	/**
	254	* single_step_once() - entry point when single stepping has been triggered.
	255	* @regs: The machine register state
	256	*
	257	* When we arrive at this routine via a trampoline, the single step
	258	* engine copies the executing bundle to the single step buffer.
	259	* If the instruction is a condition branch, then the target is
	260	* reset to one past the next instruction. If the instruction
	261	* sets the lr, then that is noted. If the instruction is a jump
	262	* or call, then the new target pc is preserved and the current
	263	* bundle instruction set to null.
	264	*
	265	* The necessary post-single-step rewriting information is stored in
	266	* single_step_state-> We use data segment values because the
	267	* stack will be rewound when we run the rewritten single-stepped
	268	* instruction.
	269	*/
	270	void single_step_once(struct pt_regs *regs)
	271	{
	272	extern tile_bundle_bits __single_step_ill_insn;
	273	extern tile_bundle_bits __single_step_j_insn;
	274	extern tile_bundle_bits __single_step_addli_insn;
	275	extern tile_bundle_bits __single_step_auli_insn;
	276	struct thread_info info = (void )current_thread_info();
	277	struct single_step_state *state = info->step_state;
	278	int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
	279	tile_bundle_bits buffer, pc;
	280	tile_bundle_bits bundle;
	281	int temp_reg;
	282	int target_reg = TREG_LR;
	283	int err;
	284	enum mem_op mem_op = MEMOP_NONE;
	285	int size = 0, sign_ext = 0; /* happy compiler */
	286
	287	asm(
	288	" .pushsection .rodata.single_step\n"
	289	" .align 8\n"
	290	" .globl __single_step_ill_insn\n"
	291	"__single_step_ill_insn:\n"
	292	" ill\n"
	293	" .globl __single_step_addli_insn\n"
	294	"__single_step_addli_insn:\n"
	295	" { nop; addli r0, zero, 0 }\n"
	296	" .globl __single_step_auli_insn\n"
	297	"__single_step_auli_insn:\n"
	298	" { nop; auli r0, r0, 0 }\n"
	299	" .globl __single_step_j_insn\n"
	300	"__single_step_j_insn:\n"
	301	" j .\n"
	302	" .popsection\n"
	303	);
	304
	305	if (state == NULL) {
	306	/* allocate a page of writable, executable memory */
	307	state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL);
	308	if (state == NULL) {
	309	printk("Out of kernel memory trying to single-step\n");
	310	return;
	311	}
	312
	313	/* allocate a cache line of writable, executable memory */
	314	down_write(&current->mm->mmap_sem);
	315	buffer = (void *) do_mmap(0, 0, 64,
	316	PROT_EXEC \| PROT_READ \| PROT_WRITE,
	317	MAP_PRIVATE \| MAP_ANONYMOUS,
	318	0);
	319	up_write(&current->mm->mmap_sem);
	320
	321	if ((int)buffer < 0 && (int)buffer > -PAGE_SIZE) {
	322	kfree(state);
	323	printk("Out of kernel pages trying to single-step\n");
	324	return;
	325	}
	326
	327	state->buffer = buffer;
	328	state->is_enabled = 0;
	329
	330	info->step_state = state;
	331
	332	/* Validate our stored instruction patterns */
	333	BUG_ON(get_Opcode_X1(__single_step_addli_insn) !=
	334	ADDLI_OPCODE_X1);
	335	BUG_ON(get_Opcode_X1(__single_step_auli_insn) !=
	336	AULI_OPCODE_X1);
	337	BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO);
	338	BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0);
	339	BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0);
	340	}
	341
	342	/*
	343	* If we are returning from a syscall, we still haven't hit the
	344	* "ill" for the swint1 instruction. So back the PC up to be
	345	* pointing at the swint1, but we'll actually return directly
	346	* back to the "ill" so we come back in via SIGILL as if we
	347	* had "executed" the swint1 without ever being in kernel space.
	348	*/
	349	if (regs->faultnum == INT_SWINT_1)
	350	regs->pc -= 8;
	351
	352	pc = (tile_bundle_bits *)(regs->pc);
	353	bundle = pc[0];
	354
	355	/* We'll follow the instruction with 2 ill op bundles */
	356	state->orig_pc = (unsigned long) pc;
	357	state->next_pc = (unsigned long)(pc + 1);
	358	state->branch_next_pc = 0;
	359	state->update = 0;
	360
	361	if (!(bundle & TILE_BUNDLE_Y_ENCODING_MASK)) {
	362	/* two wide, check for control flow */
	363	int opcode = get_Opcode_X1(bundle);
	364
	365	switch (opcode) {
	366	/* branches */
	367	case BRANCH_OPCODE_X1:
	368	{
	369	int32_t offset = signExtend17(get_BrOff_X1(bundle));
	370
	371	/*
	372	* For branches, we use a rewriting trick to let the
	373	* hardware evaluate whether the branch is taken or
	374	* untaken. We record the target offset and then
	375	* rewrite the branch instruction to target 1 insn
	376	* ahead if the branch is taken. We then follow the
	377	* rewritten branch with two bundles, each containing
	378	* an "ill" instruction. The supervisor examines the
	379	* pc after the single step code is executed, and if
	380	* the pc is the first ill instruction, then the
	381	* branch (if any) was not taken. If the pc is the
	382	* second ill instruction, then the branch was
	383	* taken. The new pc is computed for these cases, and
	384	* inserted into the registers for the thread. If
	385	* the pc is the start of the single step code, then
	386	* an exception or interrupt was taken before the
	387	* code started processing, and the same "original"
	388	* pc is restored. This change, different from the
	389	* original implementation, has the advantage of
	390	* executing a single user instruction.
	391	*/
	392	state->branch_next_pc = (unsigned long)(pc + offset);
	393
	394	/* rewrite branch offset to go forward one bundle */
	395	bundle = set_BrOff_X1(bundle, 2);
	396	}
	397	break;
	398
	399	/* jumps */
	400	case JALB_OPCODE_X1:
	401	case JALF_OPCODE_X1:
	402	state->update = 1;
	403	state->next_pc =
	404	(unsigned long) (pc + get_JOffLong_X1(bundle));
	405	break;
	406
	407	case JB_OPCODE_X1:
	408	case JF_OPCODE_X1:
	409	state->next_pc =
	410	(unsigned long) (pc + get_JOffLong_X1(bundle));
	411	bundle = nop_X1(bundle);
	412	break;
	413
	414	case SPECIAL_0_OPCODE_X1:
	415	switch (get_RRROpcodeExtension_X1(bundle)) {
	416	/* jump-register */
	417	case JALRP_SPECIAL_0_OPCODE_X1:
	418	case JALR_SPECIAL_0_OPCODE_X1:
	419	state->update = 1;
	420	state->next_pc =
	421	regs->regs[get_SrcA_X1(bundle)];
	422	break;
	423
	424	case JRP_SPECIAL_0_OPCODE_X1:
	425	case JR_SPECIAL_0_OPCODE_X1:
	426	state->next_pc =
	427	regs->regs[get_SrcA_X1(bundle)];
	428	bundle = nop_X1(bundle);
	429	break;
	430
	431	case LNK_SPECIAL_0_OPCODE_X1:
	432	state->update = 1;
	433	target_reg = get_Dest_X1(bundle);
	434	break;
	435
	436	/* stores */
	437	case SH_SPECIAL_0_OPCODE_X1:
	438	mem_op = MEMOP_STORE;
	439	size = 2;
	440	break;
	441
	442	case SW_SPECIAL_0_OPCODE_X1:
	443	mem_op = MEMOP_STORE;
	444	size = 4;
	445	break;
	446	}
	447	break;
	448
	449	/* loads and iret */
	450	case SHUN_0_OPCODE_X1:
	451	if (get_UnShOpcodeExtension_X1(bundle) ==
	452	UN_0_SHUN_0_OPCODE_X1) {
	453	switch (get_UnOpcodeExtension_X1(bundle)) {
	454	case LH_UN_0_SHUN_0_OPCODE_X1:
	455	mem_op = MEMOP_LOAD;
	456	size = 2;
	457	sign_ext = 1;
	458	break;
	459
	460	case LH_U_UN_0_SHUN_0_OPCODE_X1:
	461	mem_op = MEMOP_LOAD;
	462	size = 2;
	463	sign_ext = 0;
	464	break;
	465
	466	case LW_UN_0_SHUN_0_OPCODE_X1:
	467	mem_op = MEMOP_LOAD;
	468	size = 4;
	469	break;
	470
	471	case IRET_UN_0_SHUN_0_OPCODE_X1:
	472	{
	473	unsigned long ex0_0 = __insn_mfspr(
	474	SPR_EX_CONTEXT_0_0);
	475	unsigned long ex0_1 = __insn_mfspr(
	476	SPR_EX_CONTEXT_0_1);
	477	/*
	478	* Special-case it if we're iret'ing
	479	* to PL0 again. Otherwise just let
	480	* it run and it will generate SIGILL.
	481	*/
	482	if (EX1_PL(ex0_1) == USER_PL) {
	483	state->next_pc = ex0_0;
	484	regs->ex1 = ex0_1;
	485	bundle = nop_X1(bundle);
	486	}
	487	}
	488	}
	489	}
	490	break;
	491
	492	#if CHIP_HAS_WH64()
	493	/* postincrement operations */
	494	case IMM_0_OPCODE_X1:
	495	switch (get_ImmOpcodeExtension_X1(bundle)) {
	496	case LWADD_IMM_0_OPCODE_X1:
	497	mem_op = MEMOP_LOAD_POSTINCR;
	498	size = 4;
	499	break;
	500
	501	case LHADD_IMM_0_OPCODE_X1:
	502	mem_op = MEMOP_LOAD_POSTINCR;
	503	size = 2;
	504	sign_ext = 1;
	505	break;
	506
	507	case LHADD_U_IMM_0_OPCODE_X1:
	508	mem_op = MEMOP_LOAD_POSTINCR;
	509	size = 2;
	510	sign_ext = 0;
	511	break;
	512
	513	case SWADD_IMM_0_OPCODE_X1:
	514	mem_op = MEMOP_STORE_POSTINCR;
	515	size = 4;
	516	break;
	517
	518	case SHADD_IMM_0_OPCODE_X1:
	519	mem_op = MEMOP_STORE_POSTINCR;
	520	size = 2;
	521	break;
	522
	523	default:
	524	break;
	525	}
	526	break;
	527	#endif /* CHIP_HAS_WH64() */
	528	}
	529
	530	if (state->update) {
	531	/*
	532	* Get an available register. We start with a
	533	* bitmask with 1's for available registers.
	534	* We truncate to the low 32 registers since
	535	* we are guaranteed to have set bits in the
	536	* low 32 bits, then use ctz to pick the first.
	537	*/
	538	u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) \|
	539	(1ULL << get_SrcA_X0(bundle)) \|
	540	(1ULL << get_SrcB_X0(bundle)) \|
	541	(1ULL << target_reg));
	542	temp_reg = __builtin_ctz(mask);
	543	state->update_reg = temp_reg;
	544	state->update_value = regs->regs[temp_reg];
	545	regs->regs[temp_reg] = (unsigned long) (pc+1);
	546	regs->flags \|= PT_FLAGS_RESTORE_REGS;
	547	bundle = move_X1(bundle, target_reg, temp_reg);
	548	}
	549	} else {
	550	int opcode = get_Opcode_Y2(bundle);
	551
	552	switch (opcode) {
	553	/* loads */
	554	case LH_OPCODE_Y2:
	555	mem_op = MEMOP_LOAD;
	556	size = 2;
	557	sign_ext = 1;
	558	break;
	559
	560	case LH_U_OPCODE_Y2:
	561	mem_op = MEMOP_LOAD;
	562	size = 2;
	563	sign_ext = 0;
	564	break;
	565
	566	case LW_OPCODE_Y2:
	567	mem_op = MEMOP_LOAD;
	568	size = 4;
	569	break;
	570
	571	/* stores */
	572	case SH_OPCODE_Y2:
	573	mem_op = MEMOP_STORE;
	574	size = 2;
	575	break;
	576
	577	case SW_OPCODE_Y2:
	578	mem_op = MEMOP_STORE;
	579	size = 4;
	580	break;
	581	}
	582	}
	583
	584	/*
	585	* Check if we need to rewrite an unaligned load/store.
	586	* Returning zero is a special value meaning we need to SIGSEGV.
	587	*/
	588	if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) {
	589	bundle = rewrite_load_store_unaligned(state, bundle, regs,
	590	mem_op, size, sign_ext);
	591	if (bundle == 0)
	592	return;
	593	}
	594
	595	/* write the bundle to our execution area */
	596	buffer = state->buffer;
	597	err = __put_user(bundle, buffer++);
	598
	599	/*
	600	* If we're really single-stepping, we take an INT_ILL after.
	601	* If we're just handling an unaligned access, we can just
	602	* jump directly back to where we were in user code.
	603	*/
	604	if (is_single_step) {
	605	err \|= __put_user(__single_step_ill_insn, buffer++);
	606	err \|= __put_user(__single_step_ill_insn, buffer++);
	607	} else {
	608	long delta;
	609
	610	if (state->update) {
	611	/* We have some state to update; do it inline */
	612	int ha16;
	613	bundle = __single_step_addli_insn;
	614	bundle \|= create_Dest_X1(state->update_reg);
	615	bundle \|= create_Imm16_X1(state->update_value);
	616	err \|= __put_user(bundle, buffer++);
	617	bundle = __single_step_auli_insn;
	618	bundle \|= create_Dest_X1(state->update_reg);
	619	bundle \|= create_SrcA_X1(state->update_reg);
	620	ha16 = (state->update_value + 0x8000) >> 16;
	621	bundle \|= create_Imm16_X1(ha16);
	622	err \|= __put_user(bundle, buffer++);
	623	state->update = 0;
	624	}
	625
	626	/* End with a jump back to the next instruction */
	627	delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) -
	628	(unsigned long)buffer) >>
	629	TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
	630	bundle = __single_step_j_insn;
	631	bundle \|= create_JOffLong_X1(delta);
	632	err \|= __put_user(bundle, buffer++);
	633	}
	634
	635	if (err) {
	636	printk("Fault when writing to single-step buffer\n");
	637	return;
	638	}
	639
	640	/*
	641	* Flush the buffer.
	642	* We do a local flush only, since this is a thread-specific buffer.
	643	*/
	644	__flush_icache_range((unsigned long) state->buffer,
	645	(unsigned long) buffer);
	646
	647	/* Indicate enabled */
	648	state->is_enabled = is_single_step;
	649	regs->pc = (unsigned long) state->buffer;
	650
	651	/* Fault immediately if we are coming back from a syscall. */
	652	if (regs->faultnum == INT_SWINT_1)
	653	regs->pc += 8;
	654	}
	655
	656	#endif /* !__tilegx__ */