aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2013-08-06 16:04:13 -0400
committerChris Metcalf <cmetcalf@tilera.com>2013-08-13 16:04:10 -0400
commit2f9ac29eec71a696cb0dcc5fb82c0f8d4dac28c9 (patch)
treeee33ba7e452e8614130a811211eb2383a3133194
parentf10da5472c6907a3fbd6886224b36d21925ce47b (diff)
tile: fast-path unaligned memory access for tilegx
This change enables unaligned userspace memory access via a kernel fast path on tilegx. The kernel tracks user PC/instruction pairs per-thread using a direct-mapped cache in userspace. The cache maps those PC/instruction pairs to JIT'ed instruction sequences that load or store using byte-wide load store intructions and then synthesize 2-, 4- or 8-byte load or store results. Once an instruction has been seen to generate an unaligned access once, subsequent hits on that instruction typically require overhead of only around 50 cycles if cache and TLB is hot. We support the prctl() PR_GET_UNALIGN / PR_SET_UNALIGN sys call to enable or disable unaligned fixups on a per-process basis. To do this we pull some of the tilepro unaligned support out of the single_step.c file; tilepro uses instruction disassembly for both single-step and unaligned access support. Since tilegx actually has hardware singlestep support, though, it's cleaner to keep the tilegx unaligned access code in a separate file. While we're at it, properly rename the tilepro-specific types, etc., to have tilepro suffixes instead of generic tile suffixes. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
-rw-r--r--arch/tile/include/asm/processor.h7
-rw-r--r--arch/tile/include/asm/ptrace.h3
-rw-r--r--arch/tile/include/asm/sections.h4
-rw-r--r--arch/tile/include/asm/thread_info.h6
-rw-r--r--arch/tile/include/asm/traps.h11
-rw-r--r--arch/tile/kernel/Makefile3
-rw-r--r--arch/tile/kernel/asm-offsets.c6
-rw-r--r--arch/tile/kernel/intvec_32.S1
-rw-r--r--arch/tile/kernel/intvec_64.S231
-rw-r--r--arch/tile/kernel/proc.c2
-rw-r--r--arch/tile/kernel/process.c21
-rw-r--r--arch/tile/kernel/ptrace.c4
-rw-r--r--arch/tile/kernel/single_step.c116
-rw-r--r--arch/tile/kernel/unaligned.c1609
-rw-r--r--arch/tile/mm/fault.c41
15 files changed, 1996 insertions, 69 deletions
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index b3f104953da2..cda27243fb09 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -247,6 +247,13 @@ unsigned long get_wchan(struct task_struct *p);
247#define KSTK_EIP(task) task_pc(task) 247#define KSTK_EIP(task) task_pc(task)
248#define KSTK_ESP(task) task_sp(task) 248#define KSTK_ESP(task) task_sp(task)
249 249
250/* Fine-grained unaligned JIT support */
251#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr))
252#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
253
254extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
255extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
256
250/* Standard format for printing registers and other word-size data. */ 257/* Standard format for printing registers and other word-size data. */
251#ifdef __tilegx__ 258#ifdef __tilegx__
252# define REGFMT "0x%016lx" 259# define REGFMT "0x%016lx"
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index fd412260aff7..73b681b566f7 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -79,8 +79,7 @@ extern void single_step_execve(void);
79 79
80struct task_struct; 80struct task_struct;
81 81
82extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, 82extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs);
83 int error_code);
84 83
85#ifdef __tilegx__ 84#ifdef __tilegx__
86/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */ 85/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
index 7d8a935a9238..cc95276ef9c9 100644
--- a/arch/tile/include/asm/sections.h
+++ b/arch/tile/include/asm/sections.h
@@ -28,7 +28,9 @@ extern char __w1data_begin[], __w1data_end[];
28 28
29/* Not exactly sections, but PC comparison points in the code. */ 29/* Not exactly sections, but PC comparison points in the code. */
30extern char __rt_sigreturn[], __rt_sigreturn_end[]; 30extern char __rt_sigreturn[], __rt_sigreturn_end[];
31#ifndef __tilegx__ 31#ifdef __tilegx__
32extern char __start_unalign_asm_code[], __end_unalign_asm_code[];
33#else
32extern char sys_cmpxchg[], __sys_cmpxchg_end[]; 34extern char sys_cmpxchg[], __sys_cmpxchg_end[];
33extern char __sys_cmpxchg_grab_lock[]; 35extern char __sys_cmpxchg_grab_lock[];
34extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; 36extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index d1733dee98a2..b8aa6df3e102 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -39,6 +39,11 @@ struct thread_info {
39 struct restart_block restart_block; 39 struct restart_block restart_block;
40 struct single_step_state *step_state; /* single step state 40 struct single_step_state *step_state; /* single step state
41 (if non-zero) */ 41 (if non-zero) */
42 int align_ctl; /* controls unaligned access */
43#ifdef __tilegx__
44 unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
45 void __user *unalign_jit_base; /* unalign fixup JIT base */
46#endif
42}; 47};
43 48
44/* 49/*
@@ -56,6 +61,7 @@ struct thread_info {
56 .fn = do_no_restart_syscall, \ 61 .fn = do_no_restart_syscall, \
57 }, \ 62 }, \
58 .step_state = NULL, \ 63 .step_state = NULL, \
64 .align_ctl = 0, \
59} 65}
60 66
61#define init_thread_info (init_thread_union.thread_info) 67#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index e28c3df4176a..5f172b2403a6 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -15,6 +15,7 @@
15#ifndef _ASM_TILE_TRAPS_H 15#ifndef _ASM_TILE_TRAPS_H
16#define _ASM_TILE_TRAPS_H 16#define _ASM_TILE_TRAPS_H
17 17
18#ifndef __ASSEMBLY__
18#include <arch/chip.h> 19#include <arch/chip.h>
19 20
20/* mm/fault.c */ 21/* mm/fault.c */
@@ -69,6 +70,16 @@ void gx_singlestep_handle(struct pt_regs *, int fault_num);
69 70
70/* kernel/intvec_64.S */ 71/* kernel/intvec_64.S */
71void fill_ra_stack(void); 72void fill_ra_stack(void);
73
74/* Handle unalign data fixup. */
75extern void do_unaligned(struct pt_regs *regs, int vecnum);
76#endif
77
78#endif /* __ASSEMBLY__ */
79
80#ifdef __tilegx__
81/* 128 byte JIT per unalign fixup. */
82#define UNALIGN_JIT_SHIFT 7
72#endif 83#endif
73 84
74#endif /* _ASM_TILE_TRAPS_H */ 85#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 5334be8e2538..6846c4ef5bf1 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -5,7 +5,8 @@
5extra-y := vmlinux.lds head_$(BITS).o 5extra-y := vmlinux.lds head_$(BITS).o
6obj-y := backtrace.o entry.o irq.o messaging.o \ 6obj-y := backtrace.o entry.o irq.o messaging.o \
7 pci-dma.o proc.o process.o ptrace.o reboot.o \ 7 pci-dma.o proc.o process.o ptrace.o reboot.o \
8 setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ 8 setup.o signal.o single_step.o stack.o sys.o \
9 sysfs.o time.o traps.o unaligned.o \
9 intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o 10 intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
10 11
11obj-$(CONFIG_HARDWALL) += hardwall.o 12obj-$(CONFIG_HARDWALL) += hardwall.o
diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c
index 8fff4757fffe..8652b0be4685 100644
--- a/arch/tile/kernel/asm-offsets.c
+++ b/arch/tile/kernel/asm-offsets.c
@@ -60,6 +60,12 @@ void foo(void)
60 offsetof(struct thread_info, homecache_cpu)); 60 offsetof(struct thread_info, homecache_cpu));
61 DEFINE(THREAD_INFO_STEP_STATE_OFFSET, 61 DEFINE(THREAD_INFO_STEP_STATE_OFFSET,
62 offsetof(struct thread_info, step_state)); 62 offsetof(struct thread_info, step_state));
63#ifdef __tilegx__
64 DEFINE(THREAD_INFO_UNALIGN_JIT_BASE_OFFSET,
65 offsetof(struct thread_info, unalign_jit_base));
66 DEFINE(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET,
67 offsetof(struct thread_info, unalign_jit_tmp));
68#endif
63 69
64 DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET, 70 DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET,
65 offsetof(struct task_struct, thread.ksp)); 71 offsetof(struct task_struct, thread.ksp));
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 25966af74a28..388061319c4c 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -1420,7 +1420,6 @@ handle_ill:
1420 { 1420 {
1421 lw r0, r0 /* indirect thru thread_info to get task_info*/ 1421 lw r0, r0 /* indirect thru thread_info to get task_info*/
1422 addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */ 1422 addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */
1423 move r2, zero /* load error code into r2 */
1424 } 1423 }
1425 1424
1426 jal send_sigtrap /* issue a SIGTRAP */ 1425 jal send_sigtrap /* issue a SIGTRAP */
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 85d483957027..884af9ea5bed 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -17,11 +17,13 @@
17#include <linux/linkage.h> 17#include <linux/linkage.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/unistd.h> 19#include <linux/unistd.h>
20#include <linux/init.h>
20#include <asm/ptrace.h> 21#include <asm/ptrace.h>
21#include <asm/thread_info.h> 22#include <asm/thread_info.h>
22#include <asm/irqflags.h> 23#include <asm/irqflags.h>
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
24#include <asm/types.h> 25#include <asm/types.h>
26#include <asm/traps.h>
25#include <asm/signal.h> 27#include <asm/signal.h>
26#include <hv/hypervisor.h> 28#include <hv/hypervisor.h>
27#include <arch/abi.h> 29#include <arch/abi.h>
@@ -98,6 +100,189 @@
98 } 100 }
99 .endm 101 .endm
100 102
103 /*
104 * Unalign data exception fast handling: In order to handle
105 * unaligned data access, a fast JIT version is generated and stored
106 * in a specific area in user space. We first need to do a quick poke
107 * to see if the JIT is available. We use certain bits in the fault
108 * PC (3 to 9 is used for 16KB page size) as index to address the JIT
109 * code area. The first 64bit word is the fault PC, and the 2nd one is
110 * the fault bundle itself. If these 2 words both match, then we
111 * directly "iret" to JIT code. If not, a slow path is invoked to
112 * generate new JIT code. Note: the current JIT code WILL be
113 * overwritten if it existed. So, ideally we can handle 128 unalign
114 * fixups via JIT. For lookup efficiency and to effectively support
115 * tight loops with multiple unaligned reference, a simple
116 * direct-mapped cache is used.
117 *
118 * SPR_EX_CONTEXT_K_0 is modified to return to JIT code.
119 * SPR_EX_CONTEXT_K_1 has ICS set.
120 * SPR_EX_CONTEXT_0_0 is setup to user program's next PC.
121 * SPR_EX_CONTEXT_0_1 = 0.
122 */
123 .macro int_hand_unalign_fast vecnum, vecname
124 .org (\vecnum << 8)
125intvec_\vecname:
126 /* Put r3 in SPR_SYSTEM_SAVE_K_1. */
127 mtspr SPR_SYSTEM_SAVE_K_1, r3
128
129 mfspr r3, SPR_EX_CONTEXT_K_1
130 /*
131 * Examine if exception comes from user without ICS set.
132 * If not, just go directly to the slow path.
133 */
134 bnez r3, hand_unalign_slow_nonuser
135
136 mfspr r3, SPR_SYSTEM_SAVE_K_0
137
138 /* Get &thread_info->unalign_jit_tmp[0] in r3. */
139 mm r3, zero, LOG2_THREAD_SIZE, 63
140#if THREAD_SIZE < 65536
141 addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
142#else
143 addli r3, r3, -(PAGE_SIZE/2)
144 addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
145#endif
146
147 /*
148 * Save r0, r1, r2 into thread_info array r3 points to
149 * from low to high memory in order.
150 */
151 st_add r3, r0, 8
152 st_add r3, r1, 8
153 {
154 st_add r3, r2, 8
155 andi r2, sp, 7
156 }
157
158 /* Save stored r3 value so we can revert it on a page fault. */
159 mfspr r1, SPR_SYSTEM_SAVE_K_1
160 st r3, r1
161
162 {
163 /* Generate a SIGBUS if sp is not 8-byte aligned. */
164 bnez r2, hand_unalign_slow_badsp
165 }
166
167 /*
168 * Get the thread_info in r0; load r1 with pc. Set the low bit of sp
169 * as an indicator to the page fault code in case we fault.
170 */
171 {
172 ori sp, sp, 1
173 mfspr r1, SPR_EX_CONTEXT_K_0
174 }
175
176 /* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */
177 {
178 addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \
179 (THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8))
180 bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT)
181 }
182
183 /* Load the jit_info; multiply r2 by 128. */
184 {
185 ld r0, r0
186 shli r2, r2, UNALIGN_JIT_SHIFT
187 }
188
189 /*
190 * If r0 is NULL, the JIT page is not mapped, so go to slow path;
191 * add offset r2 to r0 at the same time.
192 */
193 {
194 beqz r0, hand_unalign_slow
195 add r2, r0, r2
196 }
197
198 /*
199 * We are loading from userspace (both the JIT info PC and
200 * instruction word, and the instruction word we executed)
201 * and since either could fault while holding the interrupt
202 * critical section, we must tag this region and check it in
203 * do_page_fault() to handle it properly.
204 */
205ENTRY(__start_unalign_asm_code)
206
207 /* Load first word of JIT in r0 and increment r2 by 8. */
208 ld_add r0, r2, 8
209
210 /*
211 * Compare the PC with the 1st word in JIT; load the fault bundle
212 * into r1.
213 */
214 {
215 cmpeq r0, r0, r1
216 ld r1, r1
217 }
218
219 /* Go to slow path if PC doesn't match. */
220 beqz r0, hand_unalign_slow
221
222 /*
223 * Load the 2nd word of JIT, which is supposed to be the fault
224 * bundle for a cache hit. Increment r2; after this bundle r2 will
225 * point to the potential start of the JIT code we want to run.
226 */
227 ld_add r0, r2, 8
228
229 /* No further accesses to userspace are done after this point. */
230ENTRY(__end_unalign_asm_code)
231
232 /* Compare the real bundle with what is saved in the JIT area. */
233 {
234 cmpeq r0, r1, r0
235 mtspr SPR_EX_CONTEXT_0_1, zero
236 }
237
238 /* Go to slow path if the fault bundle does not match. */
239 beqz r0, hand_unalign_slow
240
241 /*
242 * A cache hit is found.
243 * r2 points to start of JIT code (3rd word).
244 * r0 is the fault pc.
245 * r1 is the fault bundle.
246 * Reset the low bit of sp.
247 */
248 {
249 mfspr r0, SPR_EX_CONTEXT_K_0
250 andi sp, sp, ~1
251 }
252
253 /* Write r2 into EX_CONTEXT_K_0 and increment PC. */
254 {
255 mtspr SPR_EX_CONTEXT_K_0, r2
256 addi r0, r0, 8
257 }
258
259 /*
260 * Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to
261 * user with ICS set. This way, if the JIT fixup causes another
262 * unalign exception (which shouldn't be possible) the user
263 * process will be terminated with SIGBUS. Also, our fixup will
264 * run without interleaving with external interrupts.
265 * Each fixup is at most 14 bundles, so it won't hold ICS for long.
266 */
267 {
268 movei r1, PL_ICS_EX1(USER_PL, 1)
269 mtspr SPR_EX_CONTEXT_0_0, r0
270 }
271
272 {
273 mtspr SPR_EX_CONTEXT_K_1, r1
274 addi r3, r3, -(3 * 8)
275 }
276
277 /* Restore r0..r3. */
278 ld_add r0, r3, 8
279 ld_add r1, r3, 8
280 ld_add r2, r3, 8
281 ld r3, r3
282
283 iret
284 ENDPROC(intvec_\vecname)
285 .endm
101 286
102#ifdef __COLLECT_LINKER_FEEDBACK__ 287#ifdef __COLLECT_LINKER_FEEDBACK__
103 .pushsection .text.intvec_feedback,"ax" 288 .pushsection .text.intvec_feedback,"ax"
@@ -118,15 +303,21 @@ intvec_feedback:
118 * The "processing" argument specifies the code for processing 303 * The "processing" argument specifies the code for processing
119 * the interrupt. Defaults to "handle_interrupt". 304 * the interrupt. Defaults to "handle_interrupt".
120 */ 305 */
121 .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt 306 .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt
122 .org (\vecnum << 8)
123intvec_\vecname: 307intvec_\vecname:
124 /* Temporarily save a register so we have somewhere to work. */ 308 /* Temporarily save a register so we have somewhere to work. */
125 309
126 mtspr SPR_SYSTEM_SAVE_K_1, r0 310 mtspr SPR_SYSTEM_SAVE_K_1, r0
127 mfspr r0, SPR_EX_CONTEXT_K_1 311 mfspr r0, SPR_EX_CONTEXT_K_1
128 312
129 andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ 313 /*
314 * The unalign data fastpath code sets the low bit in sp to
315 * force us to reset it here on fault.
316 */
317 {
318 blbs sp, 2f
319 andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
320 }
130 321
131 .ifc \vecnum, INT_DOUBLE_FAULT 322 .ifc \vecnum, INT_DOUBLE_FAULT
132 /* 323 /*
@@ -176,7 +367,7 @@ intvec_\vecname:
176 } 367 }
177 .endif 368 .endif
178 369
179 3702:
180 /* 371 /*
181 * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and 372 * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
182 * the current stack top in the higher bits. So we recover 373 * the current stack top in the higher bits. So we recover
@@ -1223,10 +1414,31 @@ STD_ENTRY(_sys_clone)
1223 j sys_clone 1414 j sys_clone
1224 STD_ENDPROC(_sys_clone) 1415 STD_ENDPROC(_sys_clone)
1225 1416
1226/* The single-step support may need to read all the registers. */ 1417 /*
1418 * Recover r3, r2, r1 and r0 here saved by unalign fast vector.
1419 * The vector area limit is 32 bundles, so we handle the reload here.
1420 * r0, r1, r2 are in thread_info from low to high memory in order.
1421 * r3 points to location the original r3 was saved.
1422 * We put this code in the __HEAD section so it can be reached
1423 * via a conditional branch from the fast path.
1424 */
1425 __HEAD
1426hand_unalign_slow:
1427 andi sp, sp, ~1
1428hand_unalign_slow_badsp:
1429 addi r3, r3, -(3 * 8)
1430 ld_add r0, r3, 8
1431 ld_add r1, r3, 8
1432 ld r2, r3
1433hand_unalign_slow_nonuser:
1434 mfspr r3, SPR_SYSTEM_SAVE_K_1
1435 __int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign
1436
1437/* The unaligned data support needs to read all the registers. */
1227int_unalign: 1438int_unalign:
1228 push_extra_callee_saves r0 1439 push_extra_callee_saves r0
1229 j do_trap 1440 j do_unaligned
1441ENDPROC(hand_unalign_slow)
1230 1442
1231/* Fill the return address stack with nonzero entries. */ 1443/* Fill the return address stack with nonzero entries. */
1232STD_ENTRY(fill_ra_stack) 1444STD_ENTRY(fill_ra_stack)
@@ -1240,6 +1452,11 @@ STD_ENTRY(fill_ra_stack)
12404: jrp r0 14524: jrp r0
1241 STD_ENDPROC(fill_ra_stack) 1453 STD_ENDPROC(fill_ra_stack)
1242 1454
1455 .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
1456 .org (\vecnum << 8)
1457 __int_hand \vecnum, \vecname, \c_routine, \processing
1458 .endm
1459
1243/* Include .intrpt1 array of interrupt vectors */ 1460/* Include .intrpt1 array of interrupt vectors */
1244 .section ".intrpt1", "ax" 1461 .section ".intrpt1", "ax"
1245 1462
@@ -1272,7 +1489,7 @@ STD_ENTRY(fill_ra_stack)
1272 int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall 1489 int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
1273 int_hand INT_SWINT_0, SWINT_0, do_trap 1490 int_hand INT_SWINT_0, SWINT_0, do_trap
1274 int_hand INT_ILL_TRANS, ILL_TRANS, do_trap 1491 int_hand INT_ILL_TRANS, ILL_TRANS, do_trap
1275 int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign 1492 int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA
1276 int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault 1493 int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
1277 int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault 1494 int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
1278 int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap 1495 int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index dafc447b5125..681100c59fda 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -113,7 +113,6 @@ arch_initcall(proc_tile_init);
113 * Support /proc/sys/tile directory 113 * Support /proc/sys/tile directory
114 */ 114 */
115 115
116#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */
117static ctl_table unaligned_subtable[] = { 116static ctl_table unaligned_subtable[] = {
118 { 117 {
119 .procname = "enabled", 118 .procname = "enabled",
@@ -160,4 +159,3 @@ static int __init proc_sys_tile_init(void)
160} 159}
161 160
162arch_initcall(proc_sys_tile_init); 161arch_initcall(proc_sys_tile_init);
163#endif
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 8d6c51d55762..25678b83b747 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -33,6 +33,7 @@
33#include <asm/syscalls.h> 33#include <asm/syscalls.h>
34#include <asm/traps.h> 34#include <asm/traps.h>
35#include <asm/setup.h> 35#include <asm/setup.h>
36#include <asm/uaccess.h>
36#ifdef CONFIG_HARDWALL 37#ifdef CONFIG_HARDWALL
37#include <asm/hardwall.h> 38#include <asm/hardwall.h>
38#endif 39#endif
@@ -147,6 +148,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
147 */ 148 */
148 task_thread_info(p)->step_state = NULL; 149 task_thread_info(p)->step_state = NULL;
149 150
151#ifdef __tilegx__
152 /*
153 * Do not clone unalign jit fixup from the parent; each thread
154 * must allocate its own on demand.
155 */
156 task_thread_info(p)->unalign_jit_base = NULL;
157#endif
158
150 /* 159 /*
151 * Copy the registers onto the kernel stack so the 160 * Copy the registers onto the kernel stack so the
152 * return-from-interrupt code will reload it into registers. 161 * return-from-interrupt code will reload it into registers.
@@ -205,6 +214,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
205 return 0; 214 return 0;
206} 215}
207 216
217int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
218{
219 task_thread_info(tsk)->align_ctl = val;
220 return 0;
221}
222
223int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
224{
225 return put_user(task_thread_info(tsk)->align_ctl,
226 (unsigned int __user *)adr);
227}
228
208/* 229/*
209 * Return "current" if it looks plausible, or else a pointer to a dummy. 230 * Return "current" if it looks plausible, or else a pointer to a dummy.
210 * This can be helpful if we are just trying to emit a clean panic. 231 * This can be helpful if we are just trying to emit a clean panic.
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index 0f83ed4602b2..bac187498d61 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -272,7 +272,7 @@ void do_syscall_trace_exit(struct pt_regs *regs)
272 trace_sys_exit(regs, regs->regs[0]); 272 trace_sys_exit(regs, regs->regs[0]);
273} 273}
274 274
275void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) 275void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs)
276{ 276{
277 struct siginfo info; 277 struct siginfo info;
278 278
@@ -288,5 +288,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
288/* Handle synthetic interrupt delivered only by the simulator. */ 288/* Handle synthetic interrupt delivered only by the simulator. */
289void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num) 289void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
290{ 290{
291 send_sigtrap(current, regs, fault_num); 291 send_sigtrap(current, regs);
292} 292}
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 27742e87e255..5ef2e9eae5c5 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -12,41 +12,30 @@
12 * more details. 12 * more details.
13 * 13 *
14 * A code-rewriter that enables instruction single-stepping. 14 * A code-rewriter that enables instruction single-stepping.
15 * Derived from iLib's single-stepping code.
16 */ 15 */
17 16
18#ifndef __tilegx__ /* Hardware support for single step unavailable. */ 17#include <linux/smp.h>
19 18#include <linux/ptrace.h>
20/* These functions are only used on the TILE platform */
21#include <linux/slab.h> 19#include <linux/slab.h>
22#include <linux/thread_info.h> 20#include <linux/thread_info.h>
23#include <linux/uaccess.h> 21#include <linux/uaccess.h>
24#include <linux/mman.h> 22#include <linux/mman.h>
25#include <linux/types.h> 23#include <linux/types.h>
26#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/prctl.h>
27#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/traps.h>
28#include <asm/uaccess.h>
28#include <asm/unaligned.h> 29#include <asm/unaligned.h>
29#include <arch/abi.h> 30#include <arch/abi.h>
31#include <arch/spr_def.h>
30#include <arch/opcode.h> 32#include <arch/opcode.h>
31 33
32#define signExtend17(val) sign_extend((val), 17)
33#define TILE_X1_MASK (0xffffffffULL << 31)
34
35int unaligned_printk;
36 34
37static int __init setup_unaligned_printk(char *str) 35#ifndef __tilegx__ /* Hardware support for single step unavailable. */
38{
39 long val;
40 if (strict_strtol(str, 0, &val) != 0)
41 return 0;
42 unaligned_printk = val;
43 pr_info("Printk for each unaligned data accesses is %s\n",
44 unaligned_printk ? "enabled" : "disabled");
45 return 1;
46}
47__setup("unaligned_printk=", setup_unaligned_printk);
48 36
49unsigned int unaligned_fixup_count; 37#define signExtend17(val) sign_extend((val), 17)
38#define TILE_X1_MASK (0xffffffffULL << 31)
50 39
51enum mem_op { 40enum mem_op {
52 MEMOP_NONE, 41 MEMOP_NONE,
@@ -56,12 +45,13 @@ enum mem_op {
56 MEMOP_STORE_POSTINCR 45 MEMOP_STORE_POSTINCR
57}; 46};
58 47
59static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) 48static inline tilepro_bundle_bits set_BrOff_X1(tilepro_bundle_bits n,
49 s32 offset)
60{ 50{
61 tile_bundle_bits result; 51 tilepro_bundle_bits result;
62 52
63 /* mask out the old offset */ 53 /* mask out the old offset */
64 tile_bundle_bits mask = create_BrOff_X1(-1); 54 tilepro_bundle_bits mask = create_BrOff_X1(-1);
65 result = n & (~mask); 55 result = n & (~mask);
66 56
67 /* or in the new offset */ 57 /* or in the new offset */
@@ -70,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
70 return result; 60 return result;
71} 61}
72 62
73static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) 63static inline tilepro_bundle_bits move_X1(tilepro_bundle_bits n, int dest,
64 int src)
74{ 65{
75 tile_bundle_bits result; 66 tilepro_bundle_bits result;
76 tile_bundle_bits op; 67 tilepro_bundle_bits op;
77 68
78 result = n & (~TILE_X1_MASK); 69 result = n & (~TILE_X1_MASK);
79 70
@@ -87,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
87 return result; 78 return result;
88} 79}
89 80
90static inline tile_bundle_bits nop_X1(tile_bundle_bits n) 81static inline tilepro_bundle_bits nop_X1(tilepro_bundle_bits n)
91{ 82{
92 return move_X1(n, TREG_ZERO, TREG_ZERO); 83 return move_X1(n, TREG_ZERO, TREG_ZERO);
93} 84}
94 85
95static inline tile_bundle_bits addi_X1( 86static inline tilepro_bundle_bits addi_X1(
96 tile_bundle_bits n, int dest, int src, int imm) 87 tilepro_bundle_bits n, int dest, int src, int imm)
97{ 88{
98 n &= ~TILE_X1_MASK; 89 n &= ~TILE_X1_MASK;
99 90
@@ -107,15 +98,26 @@ static inline tile_bundle_bits addi_X1(
107 return n; 98 return n;
108} 99}
109 100
110static tile_bundle_bits rewrite_load_store_unaligned( 101static tilepro_bundle_bits rewrite_load_store_unaligned(
111 struct single_step_state *state, 102 struct single_step_state *state,
112 tile_bundle_bits bundle, 103 tilepro_bundle_bits bundle,
113 struct pt_regs *regs, 104 struct pt_regs *regs,
114 enum mem_op mem_op, 105 enum mem_op mem_op,
115 int size, int sign_ext) 106 int size, int sign_ext)
116{ 107{
117 unsigned char __user *addr; 108 unsigned char __user *addr;
118 int val_reg, addr_reg, err, val; 109 int val_reg, addr_reg, err, val;
110 int align_ctl;
111
112 align_ctl = unaligned_fixup;
113 switch (task_thread_info(current)->align_ctl) {
114 case PR_UNALIGN_NOPRINT:
115 align_ctl = 1;
116 break;
117 case PR_UNALIGN_SIGBUS:
118 align_ctl = 0;
119 break;
120 }
119 121
120 /* Get address and value registers */ 122 /* Get address and value registers */
121 if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { 123 if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) {
@@ -160,7 +162,7 @@ static tile_bundle_bits rewrite_load_store_unaligned(
160 * tilepro hardware would be doing, if it could provide us with the 162 * tilepro hardware would be doing, if it could provide us with the
161 * actual bad address in an SPR, which it doesn't. 163 * actual bad address in an SPR, which it doesn't.
162 */ 164 */
163 if (unaligned_fixup == 0) { 165 if (align_ctl == 0) {
164 siginfo_t info = { 166 siginfo_t info = {
165 .si_signo = SIGBUS, 167 .si_signo = SIGBUS,
166 .si_code = BUS_ADRALN, 168 .si_code = BUS_ADRALN,
@@ -209,14 +211,14 @@ static tile_bundle_bits rewrite_load_store_unaligned(
209 211
210 if (err) { 212 if (err) {
211 siginfo_t info = { 213 siginfo_t info = {
212 .si_signo = SIGSEGV, 214 .si_signo = SIGBUS,
213 .si_code = SEGV_MAPERR, 215 .si_code = BUS_ADRALN,
214 .si_addr = addr 216 .si_addr = addr
215 }; 217 };
216 trace_unhandled_signal("segfault", regs, 218 trace_unhandled_signal("bad address for unaligned fixup", regs,
217 (unsigned long)addr, SIGSEGV); 219 (unsigned long)addr, SIGBUS);
218 force_sig_info(info.si_signo, &info, current); 220 force_sig_info(info.si_signo, &info, current);
219 return (tile_bundle_bits) 0; 221 return (tilepro_bundle_bits) 0;
220 } 222 }
221 223
222 if (unaligned_printk || unaligned_fixup_count == 0) { 224 if (unaligned_printk || unaligned_fixup_count == 0) {
@@ -285,7 +287,7 @@ void single_step_execve(void)
285 ti->step_state = NULL; 287 ti->step_state = NULL;
286} 288}
287 289
288/** 290/*
289 * single_step_once() - entry point when single stepping has been triggered. 291 * single_step_once() - entry point when single stepping has been triggered.
290 * @regs: The machine register state 292 * @regs: The machine register state
291 * 293 *
@@ -304,20 +306,31 @@ void single_step_execve(void)
304 */ 306 */
305void single_step_once(struct pt_regs *regs) 307void single_step_once(struct pt_regs *regs)
306{ 308{
307 extern tile_bundle_bits __single_step_ill_insn; 309 extern tilepro_bundle_bits __single_step_ill_insn;
308 extern tile_bundle_bits __single_step_j_insn; 310 extern tilepro_bundle_bits __single_step_j_insn;
309 extern tile_bundle_bits __single_step_addli_insn; 311 extern tilepro_bundle_bits __single_step_addli_insn;
310 extern tile_bundle_bits __single_step_auli_insn; 312 extern tilepro_bundle_bits __single_step_auli_insn;
311 struct thread_info *info = (void *)current_thread_info(); 313 struct thread_info *info = (void *)current_thread_info();
312 struct single_step_state *state = info->step_state; 314 struct single_step_state *state = info->step_state;
313 int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); 315 int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
314 tile_bundle_bits __user *buffer, *pc; 316 tilepro_bundle_bits __user *buffer, *pc;
315 tile_bundle_bits bundle; 317 tilepro_bundle_bits bundle;
316 int temp_reg; 318 int temp_reg;
317 int target_reg = TREG_LR; 319 int target_reg = TREG_LR;
318 int err; 320 int err;
319 enum mem_op mem_op = MEMOP_NONE; 321 enum mem_op mem_op = MEMOP_NONE;
320 int size = 0, sign_ext = 0; /* happy compiler */ 322 int size = 0, sign_ext = 0; /* happy compiler */
323 int align_ctl;
324
325 align_ctl = unaligned_fixup;
326 switch (task_thread_info(current)->align_ctl) {
327 case PR_UNALIGN_NOPRINT:
328 align_ctl = 1;
329 break;
330 case PR_UNALIGN_SIGBUS:
331 align_ctl = 0;
332 break;
333 }
321 334
322 asm( 335 asm(
323" .pushsection .rodata.single_step\n" 336" .pushsection .rodata.single_step\n"
@@ -390,7 +403,7 @@ void single_step_once(struct pt_regs *regs)
390 if (regs->faultnum == INT_SWINT_1) 403 if (regs->faultnum == INT_SWINT_1)
391 regs->pc -= 8; 404 regs->pc -= 8;
392 405
393 pc = (tile_bundle_bits __user *)(regs->pc); 406 pc = (tilepro_bundle_bits __user *)(regs->pc);
394 if (get_user(bundle, pc) != 0) { 407 if (get_user(bundle, pc) != 0) {
395 pr_err("Couldn't read instruction at %p trying to step\n", pc); 408 pr_err("Couldn't read instruction at %p trying to step\n", pc);
396 return; 409 return;
@@ -627,9 +640,9 @@ void single_step_once(struct pt_regs *regs)
627 640
628 /* 641 /*
629 * Check if we need to rewrite an unaligned load/store. 642 * Check if we need to rewrite an unaligned load/store.
630 * Returning zero is a special value meaning we need to SIGSEGV. 643 * Returning zero is a special value meaning we generated a signal.
631 */ 644 */
632 if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { 645 if (mem_op != MEMOP_NONE && align_ctl >= 0) {
633 bundle = rewrite_load_store_unaligned(state, bundle, regs, 646 bundle = rewrite_load_store_unaligned(state, bundle, regs,
634 mem_op, size, sign_ext); 647 mem_op, size, sign_ext);
635 if (bundle == 0) 648 if (bundle == 0)
@@ -668,9 +681,9 @@ void single_step_once(struct pt_regs *regs)
668 } 681 }
669 682
670 /* End with a jump back to the next instruction */ 683 /* End with a jump back to the next instruction */
671 delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - 684 delta = ((regs->pc + TILEPRO_BUNDLE_SIZE_IN_BYTES) -
672 (unsigned long)buffer) >> 685 (unsigned long)buffer) >>
673 TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; 686 TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
674 bundle = __single_step_j_insn; 687 bundle = __single_step_j_insn;
675 bundle |= create_JOffLong_X1(delta); 688 bundle |= create_JOffLong_X1(delta);
676 err |= __put_user(bundle, buffer++); 689 err |= __put_user(bundle, buffer++);
@@ -698,9 +711,6 @@ void single_step_once(struct pt_regs *regs)
698} 711}
699 712
700#else 713#else
701#include <linux/smp.h>
702#include <linux/ptrace.h>
703#include <arch/spr_def.h>
704 714
705static DEFINE_PER_CPU(unsigned long, ss_saved_pc); 715static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
706 716
@@ -743,10 +753,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
743 } else if ((*ss_pc != regs->pc) || 753 } else if ((*ss_pc != regs->pc) ||
744 (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { 754 (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) {
745 755
746 ptrace_notify(SIGTRAP);
747 control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; 756 control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK;
748 control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; 757 control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK;
749 __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); 758 __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
759 send_sigtrap(current, regs);
750 } 760 }
751} 761}
752 762
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
new file mode 100644
index 000000000000..b425fb6a480d
--- /dev/null
+++ b/arch/tile/kernel/unaligned.c
@@ -0,0 +1,1609 @@
1/*
2 * Copyright 2013 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * A code-rewriter that handles unaligned exception.
15 */
16
17#include <linux/smp.h>
18#include <linux/ptrace.h>
19#include <linux/slab.h>
20#include <linux/thread_info.h>
21#include <linux/uaccess.h>
22#include <linux/mman.h>
23#include <linux/types.h>
24#include <linux/err.h>
25#include <linux/module.h>
26#include <linux/compat.h>
27#include <linux/prctl.h>
28#include <asm/cacheflush.h>
29#include <asm/traps.h>
30#include <asm/uaccess.h>
31#include <asm/unaligned.h>
32#include <arch/abi.h>
33#include <arch/spr_def.h>
34#include <arch/opcode.h>
35
36
37/*
38 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
39 * exception is supported out of single_step.c
40 */
41
42int unaligned_printk;
43
44static int __init setup_unaligned_printk(char *str)
45{
46 long val;
47 if (kstrtol(str, 0, &val) != 0)
48 return 0;
49 unaligned_printk = val;
50 pr_info("Printk for each unaligned data accesses is %s\n",
51 unaligned_printk ? "enabled" : "disabled");
52 return 1;
53}
54__setup("unaligned_printk=", setup_unaligned_printk);
55
56unsigned int unaligned_fixup_count;
57
58#ifdef __tilegx__
59
60/*
61 * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
62 * The 1st 64-bit word saves fault PC address, 2nd word is the fault
63 * instruction bundle followed by 14 JIT bundles.
64 */
65
66struct unaligned_jit_fragment {
67 unsigned long pc;
68 tilegx_bundle_bits bundle;
69 tilegx_bundle_bits insn[14];
70};
71
72/*
73 * Check if a nop or fnop at bundle's pipeline X0.
74 */
75
76static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
77{
78 return (((get_UnaryOpcodeExtension_X0(bundle) ==
79 NOP_UNARY_OPCODE_X0) &&
80 (get_RRROpcodeExtension_X0(bundle) ==
81 UNARY_RRR_0_OPCODE_X0) &&
82 (get_Opcode_X0(bundle) ==
83 RRR_0_OPCODE_X0)) ||
84 ((get_UnaryOpcodeExtension_X0(bundle) ==
85 FNOP_UNARY_OPCODE_X0) &&
86 (get_RRROpcodeExtension_X0(bundle) ==
87 UNARY_RRR_0_OPCODE_X0) &&
88 (get_Opcode_X0(bundle) ==
89 RRR_0_OPCODE_X0)));
90}
91
92/*
93 * Check if nop or fnop at bundle's pipeline X1.
94 */
95
96static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
97{
98 return (((get_UnaryOpcodeExtension_X1(bundle) ==
99 NOP_UNARY_OPCODE_X1) &&
100 (get_RRROpcodeExtension_X1(bundle) ==
101 UNARY_RRR_0_OPCODE_X1) &&
102 (get_Opcode_X1(bundle) ==
103 RRR_0_OPCODE_X1)) ||
104 ((get_UnaryOpcodeExtension_X1(bundle) ==
105 FNOP_UNARY_OPCODE_X1) &&
106 (get_RRROpcodeExtension_X1(bundle) ==
107 UNARY_RRR_0_OPCODE_X1) &&
108 (get_Opcode_X1(bundle) ==
109 RRR_0_OPCODE_X1)));
110}
111
112/*
113 * Check if nop or fnop at bundle's Y0 pipeline.
114 */
115
116static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
117{
118 return (((get_UnaryOpcodeExtension_Y0(bundle) ==
119 NOP_UNARY_OPCODE_Y0) &&
120 (get_RRROpcodeExtension_Y0(bundle) ==
121 UNARY_RRR_1_OPCODE_Y0) &&
122 (get_Opcode_Y0(bundle) ==
123 RRR_1_OPCODE_Y0)) ||
124 ((get_UnaryOpcodeExtension_Y0(bundle) ==
125 FNOP_UNARY_OPCODE_Y0) &&
126 (get_RRROpcodeExtension_Y0(bundle) ==
127 UNARY_RRR_1_OPCODE_Y0) &&
128 (get_Opcode_Y0(bundle) ==
129 RRR_1_OPCODE_Y0)));
130}
131
132/*
133 * Check if nop or fnop at bundle's pipeline Y1.
134 */
135
136static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
137{
138 return (((get_UnaryOpcodeExtension_Y1(bundle) ==
139 NOP_UNARY_OPCODE_Y1) &&
140 (get_RRROpcodeExtension_Y1(bundle) ==
141 UNARY_RRR_1_OPCODE_Y1) &&
142 (get_Opcode_Y1(bundle) ==
143 RRR_1_OPCODE_Y1)) ||
144 ((get_UnaryOpcodeExtension_Y1(bundle) ==
145 FNOP_UNARY_OPCODE_Y1) &&
146 (get_RRROpcodeExtension_Y1(bundle) ==
147 UNARY_RRR_1_OPCODE_Y1) &&
148 (get_Opcode_Y1(bundle) ==
149 RRR_1_OPCODE_Y1)));
150}
151
152/*
153 * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
154 */
155
156static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
157{
158 return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
159}
160
161/*
162 * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
163 */
164
165static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
166{
167 return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
168}
169
170/*
171 * Find the destination, source registers of fault unalign access instruction
172 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
173 * clob3, which are guaranteed different from any register used in the fault
174 * bundle. r_alias is used to return if the other instructions other than the
175 * unalign load/store shares same register with ra, rb and rd.
176 */
177
178static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
179 uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
180 uint64_t *clob3, bool *r_alias)
181{
182 int i;
183 uint64_t reg;
184 uint64_t reg_map = 0, alias_reg_map = 0, map;
185 bool alias;
186
187 *ra = -1;
188 *rb = -1;
189
190 if (rd)
191 *rd = -1;
192
193 *clob1 = -1;
194 *clob2 = -1;
195 *clob3 = -1;
196 alias = false;
197
198 /*
199 * Parse fault bundle, find potential used registers and mark
200 * corresponding bits in reg_map and alias_map. These 2 bit maps
201 * are used to find the scratch registers and determine if there
202 * is register alais.
203 */
204 if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */
205
206 reg = get_SrcA_Y2(bundle);
207 reg_map |= 1ULL << reg;
208 *ra = reg;
209 reg = get_SrcBDest_Y2(bundle);
210 reg_map |= 1ULL << reg;
211
212 if (rd) {
213 /* Load. */
214 *rd = reg;
215 alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
216 } else {
217 /* Store. */
218 *rb = reg;
219 alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
220 }
221
222 if (!is_bundle_y1_nop(bundle)) {
223 reg = get_SrcA_Y1(bundle);
224 reg_map |= (1ULL << reg);
225 map = (1ULL << reg);
226
227 reg = get_SrcB_Y1(bundle);
228 reg_map |= (1ULL << reg);
229 map |= (1ULL << reg);
230
231 reg = get_Dest_Y1(bundle);
232 reg_map |= (1ULL << reg);
233 map |= (1ULL << reg);
234
235 if (map & alias_reg_map)
236 alias = true;
237 }
238
239 if (!is_bundle_y0_nop(bundle)) {
240 reg = get_SrcA_Y0(bundle);
241 reg_map |= (1ULL << reg);
242 map = (1ULL << reg);
243
244 reg = get_SrcB_Y0(bundle);
245 reg_map |= (1ULL << reg);
246 map |= (1ULL << reg);
247
248 reg = get_Dest_Y0(bundle);
249 reg_map |= (1ULL << reg);
250 map |= (1ULL << reg);
251
252 if (map & alias_reg_map)
253 alias = true;
254 }
255 } else { /* X Mode Bundle. */
256
257 reg = get_SrcA_X1(bundle);
258 reg_map |= (1ULL << reg);
259 *ra = reg;
260 if (rd) {
261 /* Load. */
262 reg = get_Dest_X1(bundle);
263 reg_map |= (1ULL << reg);
264 *rd = reg;
265 alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
266 } else {
267 /* Store. */
268 reg = get_SrcB_X1(bundle);
269 reg_map |= (1ULL << reg);
270 *rb = reg;
271 alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
272 }
273
274 if (!is_bundle_x0_nop(bundle)) {
275 reg = get_SrcA_X0(bundle);
276 reg_map |= (1ULL << reg);
277 map = (1ULL << reg);
278
279 reg = get_SrcB_X0(bundle);
280 reg_map |= (1ULL << reg);
281 map |= (1ULL << reg);
282
283 reg = get_Dest_X0(bundle);
284 reg_map |= (1ULL << reg);
285 map |= (1ULL << reg);
286
287 if (map & alias_reg_map)
288 alias = true;
289 }
290 }
291
292 /*
293 * "alias" indicates if the unalign access registers have collision
294 * with others in the same bundle. We jsut simply test all register
295 * operands case (RRR), ignored the case with immidate. If a bundle
296 * has no register alias, we may do fixup in a simple or fast manner.
297 * So if an immidata field happens to hit with a register, we may end
298 * up fall back to the generic handling.
299 */
300
301 *r_alias = alias;
302
303 /* Flip bits on reg_map. */
304 reg_map ^= -1ULL;
305
306 /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
307 for (i = 0; i < TREG_SP; i++) {
308 if (reg_map & (0x1ULL << i)) {
309 if (*clob1 == -1) {
310 *clob1 = i;
311 } else if (*clob2 == -1) {
312 *clob2 = i;
313 } else if (*clob3 == -1) {
314 *clob3 = i;
315 return;
316 }
317 }
318 }
319}
320
321/*
322 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
323 * is unexpected.
324 */
325
326static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
327 uint64_t clob1, uint64_t clob2, uint64_t clob3)
328{
329 bool unexpected = false;
330 if ((ra >= 56) && (ra != TREG_ZERO))
331 unexpected = true;
332
333 if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
334 unexpected = true;
335
336 if (rd != -1) {
337 if ((rd >= 56) && (rd != TREG_ZERO))
338 unexpected = true;
339 } else {
340 if ((rb >= 56) && (rb != TREG_ZERO))
341 unexpected = true;
342 }
343 return unexpected;
344}
345
346
347#define GX_INSN_X0_MASK ((1ULL << 31) - 1)
348#define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31)
349#define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL))
350#define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31)
351#define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20))
352
353#ifdef __LITTLE_ENDIAN
354#define GX_INSN_BSWAP(_bundle_) (_bundle_)
355#else
356#define GX_INSN_BSWAP(_bundle_) swab64(_bundle_)
357#endif /* __LITTLE_ENDIAN */
358
359/*
360 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
361 * The corresponding static function jix_x#_###(.) generates partial or
362 * whole bundle based on the template and given arguments.
363 */
364
365#define __JIT_CODE(_X_) \
366 asm (".pushsection .rodata.unalign_data, \"a\"\n" \
367 _X_"\n" \
368 ".popsection\n")
369
370__JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}");
371static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
372{
373 extern tilegx_bundle_bits __unalign_jit_x1_mtspr;
374 return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
375 create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
376}
377
378__JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}");
379static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr)
380{
381 extern tilegx_bundle_bits __unalign_jit_x1_mfspr;
382 return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
383 create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
384}
385
386__JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}");
387static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8)
388{
389 extern tilegx_bundle_bits __unalign_jit_x0_addi;
390 return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
391 create_Dest_X0(rd) | create_SrcA_X0(ra) |
392 create_Imm8_X0(imm8);
393}
394
395__JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}");
396static tilegx_bundle_bits jit_x1_ldna(int rd, int ra)
397{
398 extern tilegx_bundle_bits __unalign_jit_x1_ldna;
399 return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) |
400 create_Dest_X1(rd) | create_SrcA_X1(ra);
401}
402
403__JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}");
404static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb)
405{
406 extern tilegx_bundle_bits __unalign_jit_x0_dblalign;
407 return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
408 create_Dest_X0(rd) | create_SrcA_X0(ra) |
409 create_SrcB_X0(rb);
410}
411
412__JIT_CODE("__unalign_jit_x1_iret: {iret}");
413static tilegx_bundle_bits jit_x1_iret(void)
414{
415 extern tilegx_bundle_bits __unalign_jit_x1_iret;
416 return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
417}
418
419__JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}");
420static tilegx_bundle_bits jit_x0_fnop(void)
421{
422 extern tilegx_bundle_bits __unalign_jit_x01_fnop;
423 return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
424}
425
426static tilegx_bundle_bits jit_x1_fnop(void)
427{
428 extern tilegx_bundle_bits __unalign_jit_x01_fnop;
429 return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
430}
431
432__JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}");
433static tilegx_bundle_bits jit_y2_dummy(void)
434{
435 extern tilegx_bundle_bits __unalign_jit_y2_dummy;
436 return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
437}
438
439static tilegx_bundle_bits jit_y1_fnop(void)
440{
441 extern tilegx_bundle_bits __unalign_jit_y2_dummy;
442 return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
443}
444
445__JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}");
446static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8)
447{
448 extern tilegx_bundle_bits __unalign_jit_x1_st1_add;
449 return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
450 (~create_SrcA_X1(-1)) &
451 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
452 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
453}
454
455__JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}");
456static tilegx_bundle_bits jit_x1_st(int ra, int rb)
457{
458 extern tilegx_bundle_bits __unalign_jit_x1_st;
459 return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
460 create_SrcA_X1(ra) | create_SrcB_X1(rb);
461}
462
463__JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}");
464static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8)
465{
466 extern tilegx_bundle_bits __unalign_jit_x1_st_add;
467 return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
468 (~create_SrcA_X1(-1)) &
469 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
470 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
471}
472
473__JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}");
474static tilegx_bundle_bits jit_x1_ld(int rd, int ra)
475{
476 extern tilegx_bundle_bits __unalign_jit_x1_ld;
477 return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
478 create_Dest_X1(rd) | create_SrcA_X1(ra);
479}
480
481__JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}");
482static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8)
483{
484 extern tilegx_bundle_bits __unalign_jit_x1_ld_add;
485 return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
486 (~create_Dest_X1(-1)) &
487 GX_INSN_X1_MASK) | create_Dest_X1(rd) |
488 create_SrcA_X1(ra) | create_Imm8_X1(imm8);
489}
490
491__JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}");
492static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
493{
494 extern tilegx_bundle_bits __unalign_jit_x0_bfexts;
495 return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
496 GX_INSN_X0_MASK) |
497 create_Dest_X0(rd) | create_SrcA_X0(ra) |
498 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
499}
500
501__JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}");
502static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
503{
504 extern tilegx_bundle_bits __unalign_jit_x0_bfextu;
505 return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
506 GX_INSN_X0_MASK) |
507 create_Dest_X0(rd) | create_SrcA_X0(ra) |
508 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
509}
510
511__JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
512static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8)
513{
514 extern tilegx_bundle_bits __unalign_jit_x1_addi;
515 return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
516 create_Dest_X1(rd) | create_SrcA_X1(ra) |
517 create_Imm8_X1(imm8);
518}
519
520__JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}");
521static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6)
522{
523 extern tilegx_bundle_bits __unalign_jit_x0_shrui;
524 return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
525 GX_INSN_X0_MASK) |
526 create_Dest_X0(rd) | create_SrcA_X0(ra) |
527 create_ShAmt_X0(imm6);
528}
529
530__JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}");
531static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6)
532{
533 extern tilegx_bundle_bits __unalign_jit_x0_rotli;
534 return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
535 GX_INSN_X0_MASK) |
536 create_Dest_X0(rd) | create_SrcA_X0(ra) |
537 create_ShAmt_X0(imm6);
538}
539
540__JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}");
541static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff)
542{
543 extern tilegx_bundle_bits __unalign_jit_x1_bnezt;
544 return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
545 GX_INSN_X1_MASK) |
546 create_SrcA_X1(ra) | create_BrOff_X1(broff);
547}
548
549#undef __JIT_CODE
550
551/*
552 * This function generates unalign fixup JIT.
553 *
554 * We fist find unalign load/store instruction's destination, source
555 * reguisters: ra, rb and rd. and 3 scratch registers by calling
556 * find_regs(...). 3 scratch clobbers should not alias with any register
557 * used in the fault bundle. Then analyze the fault bundle to determine
558 * if it's a load or store, operand width, branch or address increment etc.
559 * At last generated JIT is copied into JIT code area in user space.
560 */
561
562static
563void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
564 int align_ctl)
565{
566 struct thread_info *info = current_thread_info();
567 struct unaligned_jit_fragment frag;
568 struct unaligned_jit_fragment *jit_code_area;
569 tilegx_bundle_bits bundle_2 = 0;
570 /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
571 bool bundle_2_enable = true;
572 uint64_t ra, rb, rd = -1, clob1, clob2, clob3;
573 /*
574 * Indicate if the unalign access
575 * instruction's registers hit with
576 * others in the same bundle.
577 */
578 bool alias = false;
579 bool load_n_store = true;
580 bool load_store_signed = false;
581 unsigned int load_store_size = 8;
582 bool y1_br = false; /* True, for a branch in same bundle at Y1.*/
583 int y1_br_reg = 0;
584 /* True for link operation. i.e. jalr or lnk at Y1 */
585 bool y1_lr = false;
586 int y1_lr_reg = 0;
587 bool x1_add = false;/* True, for load/store ADD instruction at X1*/
588 int x1_add_imm8 = 0;
589 bool unexpected = false;
590 int n = 0, k;
591
592 jit_code_area =
593 (struct unaligned_jit_fragment *)(info->unalign_jit_base);
594
595 memset((void *)&frag, 0, sizeof(frag));
596
597 /* 0: X mode, Otherwise: Y mode. */
598 if (bundle & TILEGX_BUNDLE_MODE_MASK) {
599 unsigned int mod, opcode;
600
601 if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
602 get_RRROpcodeExtension_Y1(bundle) ==
603 UNARY_RRR_1_OPCODE_Y1) {
604
605 opcode = get_UnaryOpcodeExtension_Y1(bundle);
606
607 /*
608 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
609 * pipeline.
610 */
611 switch (opcode) {
612 case JALR_UNARY_OPCODE_Y1:
613 case JALRP_UNARY_OPCODE_Y1:
614 y1_lr = true;
615 y1_lr_reg = 55; /* Link register. */
616 /* FALLTHROUGH */
617 case JR_UNARY_OPCODE_Y1:
618 case JRP_UNARY_OPCODE_Y1:
619 y1_br = true;
620 y1_br_reg = get_SrcA_Y1(bundle);
621 break;
622 case LNK_UNARY_OPCODE_Y1:
623 /* "lnk" at Y1 pipeline. */
624 y1_lr = true;
625 y1_lr_reg = get_Dest_Y1(bundle);
626 break;
627 }
628 }
629
630 opcode = get_Opcode_Y2(bundle);
631 mod = get_Mode(bundle);
632
633 /*
634 * bundle_2 is bundle after making Y2 as a dummy operation
635 * - ld zero, sp
636 */
637 bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
638
639 /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
640 if (y1_br || y1_lr) {
641 bundle_2 &= ~(GX_INSN_Y1_MASK);
642 bundle_2 |= jit_y1_fnop();
643 }
644
645 if (is_y0_y1_nop(bundle_2))
646 bundle_2_enable = false;
647
648 if (mod == MODE_OPCODE_YC2) {
649 /* Store. */
650 load_n_store = false;
651 load_store_size = 1 << opcode;
652 load_store_signed = false;
653 find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
654 &clob3, &alias);
655 if (load_store_size > 8)
656 unexpected = true;
657 } else {
658 /* Load. */
659 load_n_store = true;
660 if (mod == MODE_OPCODE_YB2) {
661 switch (opcode) {
662 case LD_OPCODE_Y2:
663 load_store_signed = false;
664 load_store_size = 8;
665 break;
666 case LD4S_OPCODE_Y2:
667 load_store_signed = true;
668 load_store_size = 4;
669 break;
670 case LD4U_OPCODE_Y2:
671 load_store_signed = false;
672 load_store_size = 4;
673 break;
674 default:
675 unexpected = true;
676 }
677 } else if (mod == MODE_OPCODE_YA2) {
678 if (opcode == LD2S_OPCODE_Y2) {
679 load_store_signed = true;
680 load_store_size = 2;
681 } else if (opcode == LD2U_OPCODE_Y2) {
682 load_store_signed = false;
683 load_store_size = 2;
684 } else
685 unexpected = true;
686 } else
687 unexpected = true;
688 find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
689 &clob3, &alias);
690 }
691 } else {
692 unsigned int opcode;
693
694 /* bundle_2 is bundle after making X1 as "fnop". */
695 bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
696
697 if (is_x0_x1_nop(bundle_2))
698 bundle_2_enable = false;
699
700 if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
701 opcode = get_UnaryOpcodeExtension_X1(bundle);
702
703 if (get_RRROpcodeExtension_X1(bundle) ==
704 UNARY_RRR_0_OPCODE_X1) {
705 load_n_store = true;
706 find_regs(bundle, &rd, &ra, &rb, &clob1,
707 &clob2, &clob3, &alias);
708
709 switch (opcode) {
710 case LD_UNARY_OPCODE_X1:
711 load_store_signed = false;
712 load_store_size = 8;
713 break;
714 case LD4S_UNARY_OPCODE_X1:
715 load_store_signed = true;
716 /* FALLTHROUGH */
717 case LD4U_UNARY_OPCODE_X1:
718 load_store_size = 4;
719 break;
720
721 case LD2S_UNARY_OPCODE_X1:
722 load_store_signed = true;
723 /* FALLTHROUGH */
724 case LD2U_UNARY_OPCODE_X1:
725 load_store_size = 2;
726 break;
727 default:
728 unexpected = true;
729 }
730 } else {
731 load_n_store = false;
732 load_store_signed = false;
733 find_regs(bundle, 0, &ra, &rb,
734 &clob1, &clob2, &clob3,
735 &alias);
736
737 opcode = get_RRROpcodeExtension_X1(bundle);
738 switch (opcode) {
739 case ST_RRR_0_OPCODE_X1:
740 load_store_size = 8;
741 break;
742 case ST4_RRR_0_OPCODE_X1:
743 load_store_size = 4;
744 break;
745 case ST2_RRR_0_OPCODE_X1:
746 load_store_size = 2;
747 break;
748 default:
749 unexpected = true;
750 }
751 }
752 } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
753 load_n_store = true;
754 opcode = get_Imm8OpcodeExtension_X1(bundle);
755 switch (opcode) {
756 case LD_ADD_IMM8_OPCODE_X1:
757 load_store_size = 8;
758 break;
759
760 case LD4S_ADD_IMM8_OPCODE_X1:
761 load_store_signed = true;
762 /* FALLTHROUGH */
763 case LD4U_ADD_IMM8_OPCODE_X1:
764 load_store_size = 4;
765 break;
766
767 case LD2S_ADD_IMM8_OPCODE_X1:
768 load_store_signed = true;
769 /* FALLTHROUGH */
770 case LD2U_ADD_IMM8_OPCODE_X1:
771 load_store_size = 2;
772 break;
773
774 case ST_ADD_IMM8_OPCODE_X1:
775 load_n_store = false;
776 load_store_size = 8;
777 break;
778 case ST4_ADD_IMM8_OPCODE_X1:
779 load_n_store = false;
780 load_store_size = 4;
781 break;
782 case ST2_ADD_IMM8_OPCODE_X1:
783 load_n_store = false;
784 load_store_size = 2;
785 break;
786 default:
787 unexpected = true;
788 }
789
790 if (!unexpected) {
791 x1_add = true;
792 if (load_n_store)
793 x1_add_imm8 = get_Imm8_X1(bundle);
794 else
795 x1_add_imm8 = get_Dest_Imm8_X1(bundle);
796 }
797
798 find_regs(bundle, load_n_store ? (&rd) : NULL,
799 &ra, &rb, &clob1, &clob2, &clob3, &alias);
800 } else
801 unexpected = true;
802 }
803
804 /*
805 * Some sanity check for register numbers extracted from fault bundle.
806 */
807 if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
808 unexpected = true;
809
810 /* Give warning if register ra has an aligned address. */
811 if (!unexpected)
812 WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
813
814
815 /*
816 * Fault came from kernel space, here we only need take care of
817 * unaligned "get_user/put_user" macros defined in "uaccess.h".
818 * Basically, we will handle bundle like this:
819 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
820 * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
821 * For either load or store, byte-wise operation is performed by calling
822 * get_user() or put_user(). If the macro returns non-zero value,
823 * set the value to rx, otherwise set zero to rx. Finally make pc point
824 * to next bundle and return.
825 */
826
827 if (EX1_PL(regs->ex1) != USER_PL) {
828
829 unsigned long rx = 0;
830 unsigned long x = 0, ret = 0;
831
832 if (y1_br || y1_lr || x1_add ||
833 (load_store_signed !=
834 (load_n_store && load_store_size == 4))) {
835 /* No branch, link, wrong sign-ext or load/store add. */
836 unexpected = true;
837 } else if (!unexpected) {
838 if (bundle & TILEGX_BUNDLE_MODE_MASK) {
839 /*
840 * Fault bundle is Y mode.
841 * Check if the Y1 and Y0 is the form of
842 * { movei rx, 0; nop/fnop }, if yes,
843 * find the rx.
844 */
845
846 if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
847 && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
848 (get_Imm8_Y1(bundle) == 0) &&
849 is_bundle_y0_nop(bundle)) {
850 rx = get_Dest_Y1(bundle);
851 } else if ((get_Opcode_Y0(bundle) ==
852 ADDI_OPCODE_Y0) &&
853 (get_SrcA_Y0(bundle) == TREG_ZERO) &&
854 (get_Imm8_Y0(bundle) == 0) &&
855 is_bundle_y1_nop(bundle)) {
856 rx = get_Dest_Y0(bundle);
857 } else {
858 unexpected = true;
859 }
860 } else {
861 /*
862 * Fault bundle is X mode.
863 * Check if the X0 is 'movei rx, 0',
864 * if yes, find the rx.
865 */
866
867 if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
868 && (get_Imm8OpcodeExtension_X0(bundle) ==
869 ADDI_IMM8_OPCODE_X0) &&
870 (get_SrcA_X0(bundle) == TREG_ZERO) &&
871 (get_Imm8_X0(bundle) == 0)) {
872 rx = get_Dest_X0(bundle);
873 } else {
874 unexpected = true;
875 }
876 }
877
878 /* rx should be less than 56. */
879 if (!unexpected && (rx >= 56))
880 unexpected = true;
881 }
882
883 if (!search_exception_tables(regs->pc)) {
884 /* No fixup in the exception tables for the pc. */
885 unexpected = true;
886 }
887
888 if (unexpected) {
889 /* Unexpected unalign kernel fault. */
890 struct task_struct *tsk = validate_current();
891
892 bust_spinlocks(1);
893
894 show_regs(regs);
895
896 if (unlikely(tsk->pid < 2)) {
897 panic("Kernel unalign fault running %s!",
898 tsk->pid ? "init" : "the idle task");
899 }
900#ifdef SUPPORT_DIE
901 die("Oops", regs);
902#endif
903 bust_spinlocks(1);
904
905 do_group_exit(SIGKILL);
906
907 } else {
908 unsigned long i, b = 0;
909 unsigned char *ptr =
910 (unsigned char *)regs->regs[ra];
911 if (load_n_store) {
912 /* handle get_user(x, ptr) */
913 for (i = 0; i < load_store_size; i++) {
914 ret = get_user(b, ptr++);
915 if (!ret) {
916 /* Success! update x. */
917#ifdef __LITTLE_ENDIAN
918 x |= (b << (8 * i));
919#else
920 x <<= 8;
921 x |= b;
922#endif /* __LITTLE_ENDIAN */
923 } else {
924 x = 0;
925 break;
926 }
927 }
928
929 /* Sign-extend 4-byte loads. */
930 if (load_store_size == 4)
931 x = (long)(int)x;
932
933 /* Set register rd. */
934 regs->regs[rd] = x;
935
936 /* Set register rx. */
937 regs->regs[rx] = ret;
938
939 /* Bump pc. */
940 regs->pc += 8;
941
942 } else {
943 /* Handle put_user(x, ptr) */
944 x = regs->regs[rb];
945#ifdef __LITTLE_ENDIAN
946 b = x;
947#else
948 /*
949 * Swap x in order to store x from low
950 * to high memory same as the
951 * little-endian case.
952 */
953 switch (load_store_size) {
954 case 8:
955 b = swab64(x);
956 break;
957 case 4:
958 b = swab32(x);
959 break;
960 case 2:
961 b = swab16(x);
962 break;
963 }
964#endif /* __LITTLE_ENDIAN */
965 for (i = 0; i < load_store_size; i++) {
966 ret = put_user(b, ptr++);
967 if (ret)
968 break;
969 /* Success! shift 1 byte. */
970 b >>= 8;
971 }
972 /* Set register rx. */
973 regs->regs[rx] = ret;
974
975 /* Bump pc. */
976 regs->pc += 8;
977 }
978 }
979
980 unaligned_fixup_count++;
981
982 if (unaligned_printk) {
983 pr_info("%s/%d. Unalign fixup for kernel access "
984 "to userspace %lx.",
985 current->comm, current->pid, regs->regs[ra]);
986 }
987
988 /* Done! Return to the exception handler. */
989 return;
990 }
991
992 if ((align_ctl == 0) || unexpected) {
993 siginfo_t info = {
994 .si_signo = SIGBUS,
995 .si_code = BUS_ADRALN,
996 .si_addr = (unsigned char __user *)0
997 };
998 if (unaligned_printk)
999 pr_info("Unalign bundle: unexp @%llx, %llx",
1000 (unsigned long long)regs->pc,
1001 (unsigned long long)bundle);
1002
1003 if (ra < 56) {
1004 unsigned long uaa = (unsigned long)regs->regs[ra];
1005 /* Set bus Address. */
1006 info.si_addr = (unsigned char __user *)uaa;
1007 }
1008
1009 unaligned_fixup_count++;
1010
1011 trace_unhandled_signal("unaligned fixup trap", regs,
1012 (unsigned long)info.si_addr, SIGBUS);
1013 force_sig_info(info.si_signo, &info, current);
1014 return;
1015 }
1016
1017#ifdef __LITTLE_ENDIAN
1018#define UA_FIXUP_ADDR_DELTA 1
1019#define UA_FIXUP_BFEXT_START(_B_) 0
1020#define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1)
1021#else /* __BIG_ENDIAN */
1022#define UA_FIXUP_ADDR_DELTA -1
1023#define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_))
1024#define UA_FIXUP_BFEXT_END(_B_) 63
1025#endif /* __LITTLE_ENDIAN */
1026
1027
1028
1029 if ((ra != rb) && (rd != TREG_SP) && !alias &&
1030 !y1_br && !y1_lr && !x1_add) {
1031 /*
1032 * Simple case: ra != rb and no register alias found,
1033 * and no branch or link. This will be the majority.
1034 * We can do a little better for simplae case than the
1035 * generic scheme below.
1036 */
1037 if (!load_n_store) {
1038 /*
1039 * Simple store: ra != rb, no need for scratch register.
1040 * Just store and rotate to right bytewise.
1041 */
1042#ifdef __BIG_ENDIAN
1043 frag.insn[n++] =
1044 jit_x0_addi(ra, ra, load_store_size - 1) |
1045 jit_x1_fnop();
1046#endif /* __BIG_ENDIAN */
1047 for (k = 0; k < load_store_size; k++) {
1048 /* Store a byte. */
1049 frag.insn[n++] =
1050 jit_x0_rotli(rb, rb, 56) |
1051 jit_x1_st1_add(ra, rb,
1052 UA_FIXUP_ADDR_DELTA);
1053 }
1054#ifdef __BIG_ENDIAN
1055 frag.insn[n] = jit_x1_addi(ra, ra, 1);
1056#else
1057 frag.insn[n] = jit_x1_addi(ra, ra,
1058 -1 * load_store_size);
1059#endif /* __LITTLE_ENDIAN */
1060
1061 if (load_store_size == 8) {
1062 frag.insn[n] |= jit_x0_fnop();
1063 } else if (load_store_size == 4) {
1064 frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1065 } else { /* = 2 */
1066 frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1067 }
1068 n++;
1069 if (bundle_2_enable)
1070 frag.insn[n++] = bundle_2;
1071 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1072 } else {
1073 if (rd == ra) {
1074 /* Use two clobber registers: clob1/2. */
1075 frag.insn[n++] =
1076 jit_x0_addi(TREG_SP, TREG_SP, -16) |
1077 jit_x1_fnop();
1078 frag.insn[n++] =
1079 jit_x0_addi(clob1, ra, 7) |
1080 jit_x1_st_add(TREG_SP, clob1, -8);
1081 frag.insn[n++] =
1082 jit_x0_addi(clob2, ra, 0) |
1083 jit_x1_st(TREG_SP, clob2);
1084 frag.insn[n++] =
1085 jit_x0_fnop() |
1086 jit_x1_ldna(rd, ra);
1087 frag.insn[n++] =
1088 jit_x0_fnop() |
1089 jit_x1_ldna(clob1, clob1);
1090 /*
1091 * Note: we must make sure that rd must not
1092 * be sp. Recover clob1/2 from stack.
1093 */
1094 frag.insn[n++] =
1095 jit_x0_dblalign(rd, clob1, clob2) |
1096 jit_x1_ld_add(clob2, TREG_SP, 8);
1097 frag.insn[n++] =
1098 jit_x0_fnop() |
1099 jit_x1_ld_add(clob1, TREG_SP, 16);
1100 } else {
1101 /* Use one clobber register: clob1 only. */
1102 frag.insn[n++] =
1103 jit_x0_addi(TREG_SP, TREG_SP, -16) |
1104 jit_x1_fnop();
1105 frag.insn[n++] =
1106 jit_x0_addi(clob1, ra, 7) |
1107 jit_x1_st(TREG_SP, clob1);
1108 frag.insn[n++] =
1109 jit_x0_fnop() |
1110 jit_x1_ldna(rd, ra);
1111 frag.insn[n++] =
1112 jit_x0_fnop() |
1113 jit_x1_ldna(clob1, clob1);
1114 /*
1115 * Note: we must make sure that rd must not
1116 * be sp. Recover clob1 from stack.
1117 */
1118 frag.insn[n++] =
1119 jit_x0_dblalign(rd, clob1, ra) |
1120 jit_x1_ld_add(clob1, TREG_SP, 16);
1121 }
1122
1123 if (bundle_2_enable)
1124 frag.insn[n++] = bundle_2;
1125 /*
1126 * For non 8-byte load, extract corresponding bytes and
1127 * signed extension.
1128 */
1129 if (load_store_size == 4) {
1130 if (load_store_signed)
1131 frag.insn[n++] =
1132 jit_x0_bfexts(
1133 rd, rd,
1134 UA_FIXUP_BFEXT_START(4),
1135 UA_FIXUP_BFEXT_END(4)) |
1136 jit_x1_fnop();
1137 else
1138 frag.insn[n++] =
1139 jit_x0_bfextu(
1140 rd, rd,
1141 UA_FIXUP_BFEXT_START(4),
1142 UA_FIXUP_BFEXT_END(4)) |
1143 jit_x1_fnop();
1144 } else if (load_store_size == 2) {
1145 if (load_store_signed)
1146 frag.insn[n++] =
1147 jit_x0_bfexts(
1148 rd, rd,
1149 UA_FIXUP_BFEXT_START(2),
1150 UA_FIXUP_BFEXT_END(2)) |
1151 jit_x1_fnop();
1152 else
1153 frag.insn[n++] =
1154 jit_x0_bfextu(
1155 rd, rd,
1156 UA_FIXUP_BFEXT_START(2),
1157 UA_FIXUP_BFEXT_END(2)) |
1158 jit_x1_fnop();
1159 }
1160
1161 frag.insn[n++] =
1162 jit_x0_fnop() |
1163 jit_x1_iret();
1164 }
1165 } else if (!load_n_store) {
1166
1167 /*
1168 * Generic memory store cases: use 3 clobber registers.
1169 *
1170 * Alloc space for saveing clob2,1,3 on user's stack.
1171 * register clob3 points to where clob2 saved, followed by
1172 * clob1 and 3 from high to low memory.
1173 */
1174 frag.insn[n++] =
1175 jit_x0_addi(TREG_SP, TREG_SP, -32) |
1176 jit_x1_fnop();
1177 frag.insn[n++] =
1178 jit_x0_addi(clob3, TREG_SP, 16) |
1179 jit_x1_st_add(TREG_SP, clob3, 8);
1180#ifdef __LITTLE_ENDIAN
1181 frag.insn[n++] =
1182 jit_x0_addi(clob1, ra, 0) |
1183 jit_x1_st_add(TREG_SP, clob1, 8);
1184#else
1185 frag.insn[n++] =
1186 jit_x0_addi(clob1, ra, load_store_size - 1) |
1187 jit_x1_st_add(TREG_SP, clob1, 8);
1188#endif
1189 if (load_store_size == 8) {
1190 /*
1191 * We save one byte a time, not for fast, but compact
1192 * code. After each store, data source register shift
1193 * right one byte. unchanged after 8 stores.
1194 */
1195 frag.insn[n++] =
1196 jit_x0_addi(clob2, TREG_ZERO, 7) |
1197 jit_x1_st_add(TREG_SP, clob2, 16);
1198 frag.insn[n++] =
1199 jit_x0_rotli(rb, rb, 56) |
1200 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1201 frag.insn[n++] =
1202 jit_x0_addi(clob2, clob2, -1) |
1203 jit_x1_bnezt(clob2, -1);
1204 frag.insn[n++] =
1205 jit_x0_fnop() |
1206 jit_x1_addi(clob2, y1_br_reg, 0);
1207 } else if (load_store_size == 4) {
1208 frag.insn[n++] =
1209 jit_x0_addi(clob2, TREG_ZERO, 3) |
1210 jit_x1_st_add(TREG_SP, clob2, 16);
1211 frag.insn[n++] =
1212 jit_x0_rotli(rb, rb, 56) |
1213 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1214 frag.insn[n++] =
1215 jit_x0_addi(clob2, clob2, -1) |
1216 jit_x1_bnezt(clob2, -1);
1217 /*
1218 * same as 8-byte case, but need shift another 4
1219 * byte to recover rb for 4-byte store.
1220 */
1221 frag.insn[n++] = jit_x0_rotli(rb, rb, 32) |
1222 jit_x1_addi(clob2, y1_br_reg, 0);
1223 } else { /* =2 */
1224 frag.insn[n++] =
1225 jit_x0_addi(clob2, rb, 0) |
1226 jit_x1_st_add(TREG_SP, clob2, 16);
1227 for (k = 0; k < 2; k++) {
1228 frag.insn[n++] =
1229 jit_x0_shrui(rb, rb, 8) |
1230 jit_x1_st1_add(clob1, rb,
1231 UA_FIXUP_ADDR_DELTA);
1232 }
1233 frag.insn[n++] =
1234 jit_x0_addi(rb, clob2, 0) |
1235 jit_x1_addi(clob2, y1_br_reg, 0);
1236 }
1237
1238 if (bundle_2_enable)
1239 frag.insn[n++] = bundle_2;
1240
1241 if (y1_lr) {
1242 frag.insn[n++] =
1243 jit_x0_fnop() |
1244 jit_x1_mfspr(y1_lr_reg,
1245 SPR_EX_CONTEXT_0_0);
1246 }
1247 if (y1_br) {
1248 frag.insn[n++] =
1249 jit_x0_fnop() |
1250 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1251 clob2);
1252 }
1253 if (x1_add) {
1254 frag.insn[n++] =
1255 jit_x0_addi(ra, ra, x1_add_imm8) |
1256 jit_x1_ld_add(clob2, clob3, -8);
1257 } else {
1258 frag.insn[n++] =
1259 jit_x0_fnop() |
1260 jit_x1_ld_add(clob2, clob3, -8);
1261 }
1262 frag.insn[n++] =
1263 jit_x0_fnop() |
1264 jit_x1_ld_add(clob1, clob3, -8);
1265 frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3);
1266 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1267
1268 } else {
1269 /*
1270 * Generic memory load cases.
1271 *
1272 * Alloc space for saveing clob1,2,3 on user's stack.
1273 * register clob3 points to where clob1 saved, followed
1274 * by clob2 and 3 from high to low memory.
1275 */
1276
1277 frag.insn[n++] =
1278 jit_x0_addi(TREG_SP, TREG_SP, -32) |
1279 jit_x1_fnop();
1280 frag.insn[n++] =
1281 jit_x0_addi(clob3, TREG_SP, 16) |
1282 jit_x1_st_add(TREG_SP, clob3, 8);
1283 frag.insn[n++] =
1284 jit_x0_addi(clob2, ra, 0) |
1285 jit_x1_st_add(TREG_SP, clob2, 8);
1286
1287 if (y1_br) {
1288 frag.insn[n++] =
1289 jit_x0_addi(clob1, y1_br_reg, 0) |
1290 jit_x1_st_add(TREG_SP, clob1, 16);
1291 } else {
1292 frag.insn[n++] =
1293 jit_x0_fnop() |
1294 jit_x1_st_add(TREG_SP, clob1, 16);
1295 }
1296
1297 if (bundle_2_enable)
1298 frag.insn[n++] = bundle_2;
1299
1300 if (y1_lr) {
1301 frag.insn[n++] =
1302 jit_x0_fnop() |
1303 jit_x1_mfspr(y1_lr_reg,
1304 SPR_EX_CONTEXT_0_0);
1305 }
1306
1307 if (y1_br) {
1308 frag.insn[n++] =
1309 jit_x0_fnop() |
1310 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1311 clob1);
1312 }
1313
1314 frag.insn[n++] =
1315 jit_x0_addi(clob1, clob2, 7) |
1316 jit_x1_ldna(rd, clob2);
1317 frag.insn[n++] =
1318 jit_x0_fnop() |
1319 jit_x1_ldna(clob1, clob1);
1320 frag.insn[n++] =
1321 jit_x0_dblalign(rd, clob1, clob2) |
1322 jit_x1_ld_add(clob1, clob3, -8);
1323 if (x1_add) {
1324 frag.insn[n++] =
1325 jit_x0_addi(ra, ra, x1_add_imm8) |
1326 jit_x1_ld_add(clob2, clob3, -8);
1327 } else {
1328 frag.insn[n++] =
1329 jit_x0_fnop() |
1330 jit_x1_ld_add(clob2, clob3, -8);
1331 }
1332
1333 frag.insn[n++] =
1334 jit_x0_fnop() |
1335 jit_x1_ld(clob3, clob3);
1336
1337 if (load_store_size == 4) {
1338 if (load_store_signed)
1339 frag.insn[n++] =
1340 jit_x0_bfexts(
1341 rd, rd,
1342 UA_FIXUP_BFEXT_START(4),
1343 UA_FIXUP_BFEXT_END(4)) |
1344 jit_x1_fnop();
1345 else
1346 frag.insn[n++] =
1347 jit_x0_bfextu(
1348 rd, rd,
1349 UA_FIXUP_BFEXT_START(4),
1350 UA_FIXUP_BFEXT_END(4)) |
1351 jit_x1_fnop();
1352 } else if (load_store_size == 2) {
1353 if (load_store_signed)
1354 frag.insn[n++] =
1355 jit_x0_bfexts(
1356 rd, rd,
1357 UA_FIXUP_BFEXT_START(2),
1358 UA_FIXUP_BFEXT_END(2)) |
1359 jit_x1_fnop();
1360 else
1361 frag.insn[n++] =
1362 jit_x0_bfextu(
1363 rd, rd,
1364 UA_FIXUP_BFEXT_START(2),
1365 UA_FIXUP_BFEXT_END(2)) |
1366 jit_x1_fnop();
1367 }
1368
1369 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1370 }
1371
1372 /* Max JIT bundle count is 14. */
1373 WARN_ON(n > 14);
1374
1375 if (!unexpected) {
1376 int status = 0;
1377 int idx = (regs->pc >> 3) &
1378 ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1379
1380 frag.pc = regs->pc;
1381 frag.bundle = bundle;
1382
1383 if (unaligned_printk) {
1384 pr_info("%s/%d, Unalign fixup: pc=%lx "
1385 "bundle=%lx %d %d %d %d %d %d %d %d.",
1386 current->comm, current->pid,
1387 (unsigned long)frag.pc,
1388 (unsigned long)frag.bundle,
1389 (int)alias, (int)rd, (int)ra,
1390 (int)rb, (int)bundle_2_enable,
1391 (int)y1_lr, (int)y1_br, (int)x1_add);
1392
1393 for (k = 0; k < n; k += 2)
1394 pr_info("[%d] %016llx %016llx", k,
1395 (unsigned long long)frag.insn[k],
1396 (unsigned long long)frag.insn[k+1]);
1397 }
1398
1399 /* Swap bundle byte order for big endian sys. */
1400#ifdef __BIG_ENDIAN
1401 frag.bundle = GX_INSN_BSWAP(frag.bundle);
1402 for (k = 0; k < n; k++)
1403 frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1404#endif /* __BIG_ENDIAN */
1405
1406 status = copy_to_user((void __user *)&jit_code_area[idx],
1407 &frag, sizeof(frag));
1408 if (status) {
1409 /* Fail to copy JIT into user land. send SIGSEGV. */
1410 siginfo_t info = {
1411 .si_signo = SIGSEGV,
1412 .si_code = SEGV_MAPERR,
1413 .si_addr = (void __user *)&jit_code_area[idx]
1414 };
1415
1416 pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
1417 current->pid, current->comm,
1418 (unsigned long long)&jit_code_area[idx]);
1419
1420 trace_unhandled_signal("segfault in unalign fixup",
1421 regs,
1422 (unsigned long)info.si_addr,
1423 SIGSEGV);
1424 force_sig_info(info.si_signo, &info, current);
1425 return;
1426 }
1427
1428
1429 /* Do a cheaper increment, not accurate. */
1430 unaligned_fixup_count++;
1431 __flush_icache_range((unsigned long)&jit_code_area[idx],
1432 (unsigned long)&jit_code_area[idx] +
1433 sizeof(frag));
1434
1435 /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1436 __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1437 __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1438
1439 /* Modify pc at the start of new JIT. */
1440 regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1441 /* Set ICS in SPR_EX_CONTEXT_K_1. */
1442 regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1443 }
1444}
1445
1446
1447/*
1448 * C function to generate unalign data JIT. Called from unalign data
1449 * interrupt handler.
1450 *
1451 * First check if unalign fix is disabled or exception did not not come from
1452 * user space or sp register points to unalign address, if true, generate a
1453 * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1454 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1455 * back to exception handler.
1456 *
1457 * The exception handler will "iret" to new generated JIT code after
1458 * restoring caller saved registers. In theory, the JIT code will perform
1459 * another "iret" to resume user's program.
1460 */
1461
1462void do_unaligned(struct pt_regs *regs, int vecnum)
1463{
1464 tilegx_bundle_bits __user *pc;
1465 tilegx_bundle_bits bundle;
1466 struct thread_info *info = current_thread_info();
1467 int align_ctl;
1468
1469 /* Checks the per-process unaligned JIT flags */
1470 align_ctl = unaligned_fixup;
1471 switch (task_thread_info(current)->align_ctl) {
1472 case PR_UNALIGN_NOPRINT:
1473 align_ctl = 1;
1474 break;
1475 case PR_UNALIGN_SIGBUS:
1476 align_ctl = 0;
1477 break;
1478 }
1479
1480 /* Enable iterrupt in order to access user land. */
1481 local_irq_enable();
1482
1483 /*
1484 * The fault came from kernel space. Two choices:
1485 * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1486 * to return -EFAULT. If no fixup, simply panic the kernel.
1487 * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1488 * if it was triggered by get_user/put_user() macros. Panic the
1489 * kernel if it is not fixable.
1490 */
1491
1492 if (EX1_PL(regs->ex1) != USER_PL) {
1493
1494 if (align_ctl < 1) {
1495 unaligned_fixup_count++;
1496 /* If exception came from kernel, try fix it up. */
1497 if (fixup_exception(regs)) {
1498 if (unaligned_printk)
1499 pr_info("Unalign fixup: %d %llx @%llx",
1500 (int)unaligned_fixup,
1501 (unsigned long long)regs->ex1,
1502 (unsigned long long)regs->pc);
1503 return;
1504 }
1505 /* Not fixable. Go panic. */
1506 panic("Unalign exception in Kernel. pc=%lx",
1507 regs->pc);
1508 return;
1509 } else {
1510 /*
1511 * Try to fix the exception. If we can't, panic the
1512 * kernel.
1513 */
1514 bundle = GX_INSN_BSWAP(
1515 *((tilegx_bundle_bits *)(regs->pc)));
1516 jit_bundle_gen(regs, bundle, align_ctl);
1517 return;
1518 }
1519 }
1520
1521 /*
1522 * Fault came from user with ICS or stack is not aligned.
1523 * If so, we will trigger SIGBUS.
1524 */
1525 if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1526 siginfo_t info = {
1527 .si_signo = SIGBUS,
1528 .si_code = BUS_ADRALN,
1529 .si_addr = (unsigned char __user *)0
1530 };
1531
1532 if (unaligned_printk)
1533 pr_info("Unalign fixup: %d %llx @%llx",
1534 (int)unaligned_fixup,
1535 (unsigned long long)regs->ex1,
1536 (unsigned long long)regs->pc);
1537
1538 unaligned_fixup_count++;
1539
1540 trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1541 force_sig_info(info.si_signo, &info, current);
1542 return;
1543 }
1544
1545
1546 /* Read the bundle casued the exception! */
1547 pc = (tilegx_bundle_bits __user *)(regs->pc);
1548 if (get_user(bundle, pc) != 0) {
1549 /* Probably never be here since pc is valid user address.*/
1550 siginfo_t info = {
1551 .si_signo = SIGSEGV,
1552 .si_code = SEGV_MAPERR,
1553 .si_addr = (void __user *)pc
1554 };
1555 pr_err("Couldn't read instruction at %p trying to step\n", pc);
1556 trace_unhandled_signal("segfault in unalign fixup", regs,
1557 (unsigned long)info.si_addr, SIGSEGV);
1558 force_sig_info(info.si_signo, &info, current);
1559 return;
1560 }
1561
1562 if (!info->unalign_jit_base) {
1563 void __user *user_page;
1564
1565 /*
1566 * Allocate a page in userland.
1567 * For 64-bit processes we try to place the mapping far
1568 * from anything else that might be going on (specifically
1569 * 64 GB below the top of the user address space). If it
1570 * happens not to be possible to put it there, it's OK;
1571 * the kernel will choose another location and we'll
1572 * remember it for later.
1573 */
1574 if (is_compat_task())
1575 user_page = NULL;
1576 else
1577 user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1578 (current->pid << PAGE_SHIFT);
1579
1580 user_page = (void __user *) vm_mmap(NULL,
1581 (unsigned long)user_page,
1582 PAGE_SIZE,
1583 PROT_EXEC | PROT_READ |
1584 PROT_WRITE,
1585#ifdef CONFIG_HOMECACHE
1586 MAP_CACHE_HOME_TASK |
1587#endif
1588 MAP_PRIVATE |
1589 MAP_ANONYMOUS,
1590 0);
1591
1592 if (IS_ERR((void __force *)user_page)) {
1593 pr_err("Out of kernel pages trying do_mmap.\n");
1594 return;
1595 }
1596
1597 /* Save the address in the thread_info struct */
1598 info->unalign_jit_base = user_page;
1599 if (unaligned_printk)
1600 pr_info("Unalign bundle: %d:%d, allocate page @%llx",
1601 raw_smp_processor_id(), current->pid,
1602 (unsigned long long)user_page);
1603 }
1604
1605 /* Generate unalign JIT */
1606 jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1607}
1608
1609#endif /* __tilegx__ */
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 6152819e555b..7863298dad4d 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -722,8 +722,49 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
722{ 722{
723 int is_page_fault; 723 int is_page_fault;
724 724
725#ifdef __tilegx__
726 /*
727 * We don't need early do_page_fault_ics() support, since unlike
728 * Pro we don't need to worry about unlocking the atomic locks.
729 * There is only one current case in GX where we touch any memory
730 * under ICS other than our own kernel stack, and we handle that
731 * here. (If we crash due to trying to touch our own stack,
732 * we're in too much trouble for C code to help out anyway.)
733 */
734 if (write & ~1) {
735 unsigned long pc = write & ~1;
736 if (pc >= (unsigned long) __start_unalign_asm_code &&
737 pc < (unsigned long) __end_unalign_asm_code) {
738 struct thread_info *ti = current_thread_info();
739 /*
740 * Our EX_CONTEXT is still what it was from the
741 * initial unalign exception, but now we've faulted
742 * on the JIT page. We would like to complete the
743 * page fault however is appropriate, and then retry
744 * the instruction that caused the unalign exception.
745 * Our state has been "corrupted" by setting the low
746 * bit in "sp", and stashing r0..r3 in the
747 * thread_info area, so we revert all of that, then
748 * continue as if this were a normal page fault.
749 */
750 regs->sp &= ~1UL;
751 regs->regs[0] = ti->unalign_jit_tmp[0];
752 regs->regs[1] = ti->unalign_jit_tmp[1];
753 regs->regs[2] = ti->unalign_jit_tmp[2];
754 regs->regs[3] = ti->unalign_jit_tmp[3];
755 write &= 1;
756 } else {
757 pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n",
758 current->comm, current->pid, pc, address);
759 show_regs(regs);
760 do_group_exit(SIGKILL);
761 return;
762 }
763 }
764#else
725 /* This case should have been handled by do_page_fault_ics(). */ 765 /* This case should have been handled by do_page_fault_ics(). */
726 BUG_ON(write & ~1); 766 BUG_ON(write & ~1);
767#endif
727 768
728#if CHIP_HAS_TILE_DMA() 769#if CHIP_HAS_TILE_DMA()
729 /* 770 /*