aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/mm/fault.c
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2010-05-28 23:09:12 -0400
committerChris Metcalf <cmetcalf@tilera.com>2010-06-04 17:11:18 -0400
commit867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
treec5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile/mm/fault.c
parent5360bd776f73d0a7da571d72a09a03f237e99900 (diff)
arch/tile: core support for Tilera 32-bit chips.
This change is the core kernel support for TILEPro and TILE64 chips. No driver support (except the console driver) is included yet. This includes the relevant Linux headers in asm/; the low-level low-level "Tile architecture" headers in arch/, which are shared with the hypervisor, etc., and are build-system agnostic; and the relevant hypervisor headers in hv/. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Reviewed-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/tile/mm/fault.c')
-rw-r--r--arch/tile/mm/fault.c905
1 files changed, 905 insertions, 0 deletions
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
new file mode 100644
index 000000000000..9b6b92f07def
--- /dev/null
+++ b/arch/tile/mm/fault.c
@@ -0,0 +1,905 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * From i386 code copyright (C) 1995 Linus Torvalds
15 */
16
17#include <linux/signal.h>
18#include <linux/sched.h>
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/string.h>
22#include <linux/types.h>
23#include <linux/ptrace.h>
24#include <linux/mman.h>
25#include <linux/mm.h>
26#include <linux/smp.h>
27#include <linux/smp_lock.h>
28#include <linux/interrupt.h>
29#include <linux/init.h>
30#include <linux/tty.h>
31#include <linux/vt_kern.h> /* For unblank_screen() */
32#include <linux/highmem.h>
33#include <linux/module.h>
34#include <linux/kprobes.h>
35#include <linux/hugetlb.h>
36#include <linux/syscalls.h>
37#include <linux/uaccess.h>
38
39#include <asm/system.h>
40#include <asm/pgalloc.h>
41#include <asm/sections.h>
42
43#include <arch/interrupts.h>
44
45/*
46 * Unlock any spinlocks which will prevent us from getting the
47 * message out
48 */
49void bust_spinlocks(int yes)
50{
51 int loglevel_save = console_loglevel;
52
53 if (yes) {
54 oops_in_progress = 1;
55 return;
56 }
57 oops_in_progress = 0;
58 /*
59 * OK, the message is on the console. Now we call printk()
60 * without oops_in_progress set so that printk will give klogd
61 * a poke. Hold onto your hats...
62 */
63 console_loglevel = 15; /* NMI oopser may have shut the console up */
64 printk(" ");
65 console_loglevel = loglevel_save;
66}
67
68static noinline void force_sig_info_fault(int si_signo, int si_code,
69 unsigned long address, int fault_num, struct task_struct *tsk)
70{
71 siginfo_t info;
72
73 if (unlikely(tsk->pid < 2)) {
74 panic("Signal %d (code %d) at %#lx sent to %s!",
75 si_signo, si_code & 0xffff, address,
76 tsk->pid ? "init" : "the idle task");
77 }
78
79 info.si_signo = si_signo;
80 info.si_errno = 0;
81 info.si_code = si_code;
82 info.si_addr = (void __user *)address;
83 info.si_trapno = fault_num;
84 force_sig_info(si_signo, &info, tsk);
85}
86
87#ifndef __tilegx__
88/*
89 * Synthesize the fault a PL0 process would get by doing a word-load of
90 * an unaligned address or a high kernel address. Called indirectly
91 * from sys_cmpxchg() in kernel/intvec.S.
92 */
93int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *regs)
94{
95 if (address >= PAGE_OFFSET)
96 force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address,
97 INT_DTLB_MISS, current);
98 else
99 force_sig_info_fault(SIGBUS, BUS_ADRALN, address,
100 INT_UNALIGN_DATA, current);
101
102 /*
103 * Adjust pc to point at the actual instruction, which is unusual
104 * for syscalls normally, but is appropriate when we are claiming
105 * that a syscall swint1 caused a page fault or bus error.
106 */
107 regs->pc -= 8;
108
109 /*
110 * Mark this as a caller-save interrupt, like a normal page fault,
111 * so that when we go through the signal handler path we will
112 * properly restore r0, r1, and r2 for the signal handler arguments.
113 */
114 regs->flags |= PT_FLAGS_CALLER_SAVES;
115
116 return 0;
117}
118#endif
119
120static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
121{
122 unsigned index = pgd_index(address);
123 pgd_t *pgd_k;
124 pud_t *pud, *pud_k;
125 pmd_t *pmd, *pmd_k;
126
127 pgd += index;
128 pgd_k = init_mm.pgd + index;
129
130 if (!pgd_present(*pgd_k))
131 return NULL;
132
133 pud = pud_offset(pgd, address);
134 pud_k = pud_offset(pgd_k, address);
135 if (!pud_present(*pud_k))
136 return NULL;
137
138 pmd = pmd_offset(pud, address);
139 pmd_k = pmd_offset(pud_k, address);
140 if (!pmd_present(*pmd_k))
141 return NULL;
142 if (!pmd_present(*pmd)) {
143 set_pmd(pmd, *pmd_k);
144 arch_flush_lazy_mmu_mode();
145 } else
146 BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k));
147 return pmd_k;
148}
149
150/*
151 * Handle a fault on the vmalloc or module mapping area
152 */
153static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
154{
155 pmd_t *pmd_k;
156 pte_t *pte_k;
157
158 /* Make sure we are in vmalloc area */
159 if (!(address >= VMALLOC_START && address < VMALLOC_END))
160 return -1;
161
162 /*
163 * Synchronize this task's top level page-table
164 * with the 'reference' page table.
165 */
166 pmd_k = vmalloc_sync_one(pgd, address);
167 if (!pmd_k)
168 return -1;
169 if (pmd_huge(*pmd_k))
170 return 0; /* support TILE huge_vmap() API */
171 pte_k = pte_offset_kernel(pmd_k, address);
172 if (!pte_present(*pte_k))
173 return -1;
174 return 0;
175}
176
177/* Wait until this PTE has completed migration. */
178static void wait_for_migration(pte_t *pte)
179{
180 if (pte_migrating(*pte)) {
181 /*
182 * Wait until the migrater fixes up this pte.
183 * We scale the loop count by the clock rate so we'll wait for
184 * a few seconds here.
185 */
186 int retries = 0;
187 int bound = get_clock_rate();
188 while (pte_migrating(*pte)) {
189 barrier();
190 if (++retries > bound)
191 panic("Hit migrating PTE (%#llx) and"
192 " page PFN %#lx still migrating",
193 pte->val, pte_pfn(*pte));
194 }
195 }
196}
197
198/*
199 * It's not generally safe to use "current" to get the page table pointer,
200 * since we might be running an oprofile interrupt in the middle of a
201 * task switch.
202 */
203static pgd_t *get_current_pgd(void)
204{
205 HV_Context ctx = hv_inquire_context();
206 unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
207 struct page *pgd_page = pfn_to_page(pgd_pfn);
208 BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */
209 return (pgd_t *) __va(ctx.page_table);
210}
211
212/*
213 * We can receive a page fault from a migrating PTE at any time.
214 * Handle it by just waiting until the fault resolves.
215 *
216 * It's also possible to get a migrating kernel PTE that resolves
217 * itself during the downcall from hypervisor to Linux. We just check
218 * here to see if the PTE seems valid, and if so we retry it.
219 *
220 * NOTE! We MUST NOT take any locks for this case. We may be in an
221 * interrupt or a critical region, and must do as little as possible.
222 * Similarly, we can't use atomic ops here, since we may be handling a
223 * fault caused by an atomic op access.
224 */
225static int handle_migrating_pte(pgd_t *pgd, int fault_num,
226 unsigned long address,
227 int is_kernel_mode, int write)
228{
229 pud_t *pud;
230 pmd_t *pmd;
231 pte_t *pte;
232 pte_t pteval;
233
234 if (pgd_addr_invalid(address))
235 return 0;
236
237 pgd += pgd_index(address);
238 pud = pud_offset(pgd, address);
239 if (!pud || !pud_present(*pud))
240 return 0;
241 pmd = pmd_offset(pud, address);
242 if (!pmd || !pmd_present(*pmd))
243 return 0;
244 pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) :
245 pte_offset_kernel(pmd, address);
246 pteval = *pte;
247 if (pte_migrating(pteval)) {
248 wait_for_migration(pte);
249 return 1;
250 }
251
252 if (!is_kernel_mode || !pte_present(pteval))
253 return 0;
254 if (fault_num == INT_ITLB_MISS) {
255 if (pte_exec(pteval))
256 return 1;
257 } else if (write) {
258 if (pte_write(pteval))
259 return 1;
260 } else {
261 if (pte_read(pteval))
262 return 1;
263 }
264
265 return 0;
266}
267
268/*
269 * This routine is responsible for faulting in user pages.
270 * It passes the work off to one of the appropriate routines.
271 * It returns true if the fault was successfully handled.
272 */
273static int handle_page_fault(struct pt_regs *regs,
274 int fault_num,
275 int is_page_fault,
276 unsigned long address,
277 int write)
278{
279 struct task_struct *tsk;
280 struct mm_struct *mm;
281 struct vm_area_struct *vma;
282 unsigned long stack_offset;
283 int fault;
284 int si_code;
285 int is_kernel_mode;
286 pgd_t *pgd;
287
288 /* on TILE, protection faults are always writes */
289 if (!is_page_fault)
290 write = 1;
291
292 is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
293
294 tsk = validate_current();
295
296 /*
297 * Check to see if we might be overwriting the stack, and bail
298 * out if so. The page fault code is a relatively likely
299 * place to get trapped in an infinite regress, and once we
300 * overwrite the whole stack, it becomes very hard to recover.
301 */
302 stack_offset = stack_pointer & (THREAD_SIZE-1);
303 if (stack_offset < THREAD_SIZE / 8) {
304 printk(KERN_ALERT "Potential stack overrun: sp %#lx\n",
305 stack_pointer);
306 show_regs(regs);
307 printk(KERN_ALERT "Killing current process %d/%s\n",
308 tsk->pid, tsk->comm);
309 do_group_exit(SIGKILL);
310 }
311
312 /*
313 * Early on, we need to check for migrating PTE entries;
314 * see homecache.c. If we find a migrating PTE, we wait until
315 * the backing page claims to be done migrating, then we procede.
316 * For kernel PTEs, we rewrite the PTE and return and retry.
317 * Otherwise, we treat the fault like a normal "no PTE" fault,
318 * rather than trying to patch up the existing PTE.
319 */
320 pgd = get_current_pgd();
321 if (handle_migrating_pte(pgd, fault_num, address,
322 is_kernel_mode, write))
323 return 1;
324
325 si_code = SEGV_MAPERR;
326
327 /*
328 * We fault-in kernel-space virtual memory on-demand. The
329 * 'reference' page table is init_mm.pgd.
330 *
331 * NOTE! We MUST NOT take any locks for this case. We may
332 * be in an interrupt or a critical region, and should
333 * only copy the information from the master page table,
334 * nothing more.
335 *
336 * This verifies that the fault happens in kernel space
337 * and that the fault was not a protection fault.
338 */
339 if (unlikely(address >= TASK_SIZE &&
340 !is_arch_mappable_range(address, 0))) {
341 if (is_kernel_mode && is_page_fault &&
342 vmalloc_fault(pgd, address) >= 0)
343 return 1;
344 /*
345 * Don't take the mm semaphore here. If we fixup a prefetch
346 * fault we could otherwise deadlock.
347 */
348 mm = NULL; /* happy compiler */
349 vma = NULL;
350 goto bad_area_nosemaphore;
351 }
352
353 /*
354 * If we're trying to touch user-space addresses, we must
355 * be either at PL0, or else with interrupts enabled in the
356 * kernel, so either way we can re-enable interrupts here.
357 */
358 local_irq_enable();
359
360 mm = tsk->mm;
361
362 /*
363 * If we're in an interrupt, have no user context or are running in an
364 * atomic region then we must not take the fault.
365 */
366 if (in_atomic() || !mm) {
367 vma = NULL; /* happy compiler */
368 goto bad_area_nosemaphore;
369 }
370
371 /*
372 * When running in the kernel we expect faults to occur only to
373 * addresses in user space. All other faults represent errors in the
374 * kernel and should generate an OOPS. Unfortunately, in the case of an
375 * erroneous fault occurring in a code path which already holds mmap_sem
376 * we will deadlock attempting to validate the fault against the
377 * address space. Luckily the kernel only validly references user
378 * space from well defined areas of code, which are listed in the
379 * exceptions table.
380 *
381 * As the vast majority of faults will be valid we will only perform
382 * the source reference check when there is a possibility of a deadlock.
383 * Attempt to lock the address space, if we cannot we then validate the
384 * source. If this is invalid we can skip the address space check,
385 * thus avoiding the deadlock.
386 */
387 if (!down_read_trylock(&mm->mmap_sem)) {
388 if (is_kernel_mode &&
389 !search_exception_tables(regs->pc)) {
390 vma = NULL; /* happy compiler */
391 goto bad_area_nosemaphore;
392 }
393 down_read(&mm->mmap_sem);
394 }
395
396 vma = find_vma(mm, address);
397 if (!vma)
398 goto bad_area;
399 if (vma->vm_start <= address)
400 goto good_area;
401 if (!(vma->vm_flags & VM_GROWSDOWN))
402 goto bad_area;
403 if (regs->sp < PAGE_OFFSET) {
404 /*
405 * accessing the stack below sp is always a bug.
406 */
407 if (address < regs->sp)
408 goto bad_area;
409 }
410 if (expand_stack(vma, address))
411 goto bad_area;
412
413/*
414 * Ok, we have a good vm_area for this memory access, so
415 * we can handle it..
416 */
417good_area:
418 si_code = SEGV_ACCERR;
419 if (fault_num == INT_ITLB_MISS) {
420 if (!(vma->vm_flags & VM_EXEC))
421 goto bad_area;
422 } else if (write) {
423#ifdef TEST_VERIFY_AREA
424 if (!is_page_fault && regs->cs == KERNEL_CS)
425 printk("WP fault at "REGFMT"\n", regs->eip);
426#endif
427 if (!(vma->vm_flags & VM_WRITE))
428 goto bad_area;
429 } else {
430 if (!is_page_fault || !(vma->vm_flags & VM_READ))
431 goto bad_area;
432 }
433
434 survive:
435 /*
436 * If for any reason at all we couldn't handle the fault,
437 * make sure we exit gracefully rather than endlessly redo
438 * the fault.
439 */
440 fault = handle_mm_fault(mm, vma, address, write);
441 if (unlikely(fault & VM_FAULT_ERROR)) {
442 if (fault & VM_FAULT_OOM)
443 goto out_of_memory;
444 else if (fault & VM_FAULT_SIGBUS)
445 goto do_sigbus;
446 BUG();
447 }
448 if (fault & VM_FAULT_MAJOR)
449 tsk->maj_flt++;
450 else
451 tsk->min_flt++;
452
453 /*
454 * If this was an asynchronous fault,
455 * restart the appropriate engine.
456 */
457 switch (fault_num) {
458#if CHIP_HAS_TILE_DMA()
459 case INT_DMATLB_MISS:
460 case INT_DMATLB_MISS_DWNCL:
461 case INT_DMATLB_ACCESS:
462 case INT_DMATLB_ACCESS_DWNCL:
463 __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
464 break;
465#endif
466#if CHIP_HAS_SN_PROC()
467 case INT_SNITLB_MISS:
468 case INT_SNITLB_MISS_DWNCL:
469 __insn_mtspr(SPR_SNCTL,
470 __insn_mfspr(SPR_SNCTL) &
471 ~SPR_SNCTL__FRZPROC_MASK);
472 break;
473#endif
474 }
475
476 up_read(&mm->mmap_sem);
477 return 1;
478
479/*
480 * Something tried to access memory that isn't in our memory map..
481 * Fix it, but check if it's kernel or user first..
482 */
483bad_area:
484 up_read(&mm->mmap_sem);
485
486bad_area_nosemaphore:
487 /* User mode accesses just cause a SIGSEGV */
488 if (!is_kernel_mode) {
489 /*
490 * It's possible to have interrupts off here.
491 */
492 local_irq_enable();
493
494 force_sig_info_fault(SIGSEGV, si_code, address,
495 fault_num, tsk);
496 return 0;
497 }
498
499no_context:
500 /* Are we prepared to handle this kernel fault? */
501 if (fixup_exception(regs))
502 return 0;
503
504/*
505 * Oops. The kernel tried to access some bad page. We'll have to
506 * terminate things with extreme prejudice.
507 */
508
509 bust_spinlocks(1);
510
511 /* FIXME: no lookup_address() yet */
512#ifdef SUPPORT_LOOKUP_ADDRESS
513 if (fault_num == INT_ITLB_MISS) {
514 pte_t *pte = lookup_address(address);
515
516 if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
517 printk(KERN_CRIT "kernel tried to execute"
518 " non-executable page - exploit attempt?"
519 " (uid: %d)\n", current->uid);
520 }
521#endif
522 if (address < PAGE_SIZE)
523 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference\n");
524 else
525 printk(KERN_ALERT "Unable to handle kernel paging request\n");
526 printk(" at virtual address "REGFMT", pc "REGFMT"\n",
527 address, regs->pc);
528
529 show_regs(regs);
530
531 if (unlikely(tsk->pid < 2)) {
532 panic("Kernel page fault running %s!",
533 tsk->pid ? "init" : "the idle task");
534 }
535
536 /*
537 * More FIXME: we should probably copy the i386 here and
538 * implement a generic die() routine. Not today.
539 */
540#ifdef SUPPORT_DIE
541 die("Oops", regs);
542#endif
543 bust_spinlocks(1);
544
545 do_group_exit(SIGKILL);
546
547/*
548 * We ran out of memory, or some other thing happened to us that made
549 * us unable to handle the page fault gracefully.
550 */
551out_of_memory:
552 up_read(&mm->mmap_sem);
553 if (is_global_init(tsk)) {
554 yield();
555 down_read(&mm->mmap_sem);
556 goto survive;
557 }
558 printk("VM: killing process %s\n", tsk->comm);
559 if (!is_kernel_mode)
560 do_group_exit(SIGKILL);
561 goto no_context;
562
563do_sigbus:
564 up_read(&mm->mmap_sem);
565
566 /* Kernel mode? Handle exceptions or die */
567 if (is_kernel_mode)
568 goto no_context;
569
570 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, fault_num, tsk);
571 return 0;
572}
573
574#ifndef __tilegx__
575
576extern char sys_cmpxchg[], __sys_cmpxchg_end[];
577extern char __sys_cmpxchg_grab_lock[];
578extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
579
580/*
581 * We return this structure in registers to avoid having to write
582 * additional save/restore code in the intvec.S caller.
583 */
584struct intvec_state {
585 void *handler;
586 unsigned long vecnum;
587 unsigned long fault_num;
588 unsigned long info;
589 unsigned long retval;
590};
591
592/* We must release ICS before panicking or we won't get anywhere. */
593#define ics_panic(fmt, ...) do { \
594 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \
595 panic(fmt, __VA_ARGS__); \
596} while (0)
597
598void do_page_fault(struct pt_regs *regs, int fault_num,
599 unsigned long address, unsigned long write);
600
601/*
602 * When we take an ITLB or DTLB fault or access violation in the
603 * supervisor while the critical section bit is set, the hypervisor is
604 * reluctant to write new values into the EX_CONTEXT_1_x registers,
605 * since that might indicate we have not yet squirreled the SPR
606 * contents away and can thus safely take a recursive interrupt.
607 * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2.
608 */
609struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
610 unsigned long address,
611 unsigned long info)
612{
613 unsigned long pc = info & ~1;
614 int write = info & 1;
615 pgd_t *pgd = get_current_pgd();
616
617 /* Retval is 1 at first since we will handle the fault fully. */
618 struct intvec_state state = {
619 do_page_fault, fault_num, address, write, 1
620 };
621
622 /* Validate that we are plausibly in the right routine. */
623 if ((pc & 0x7) != 0 || pc < PAGE_OFFSET ||
624 (fault_num != INT_DTLB_MISS &&
625 fault_num != INT_DTLB_ACCESS)) {
626 unsigned long old_pc = regs->pc;
627 regs->pc = pc;
628 ics_panic("Bad ICS page fault args:"
629 " old PC %#lx, fault %d/%d at %#lx\n",
630 old_pc, fault_num, write, address);
631 }
632
633 /* We might be faulting on a vmalloc page, so check that first. */
634 if (fault_num != INT_DTLB_ACCESS && vmalloc_fault(pgd, address) >= 0)
635 return state;
636
637 /*
638 * If we faulted with ICS set in sys_cmpxchg, we are providing
639 * a user syscall service that should generate a signal on
640 * fault. We didn't set up a kernel stack on initial entry to
641 * sys_cmpxchg, but instead had one set up by the fault, which
642 * (because sys_cmpxchg never releases ICS) came to us via the
643 * SYSTEM_SAVE_1_2 mechanism, and thus EX_CONTEXT_1_[01] are
644 * still referencing the original user code. We release the
645 * atomic lock and rewrite pt_regs so that it appears that we
646 * came from user-space directly, and after we finish the
647 * fault we'll go back to user space and re-issue the swint.
648 * This way the backtrace information is correct if we need to
649 * emit a stack dump at any point while handling this.
650 *
651 * Must match register use in sys_cmpxchg().
652 */
653 if (pc >= (unsigned long) sys_cmpxchg &&
654 pc < (unsigned long) __sys_cmpxchg_end) {
655#ifdef CONFIG_SMP
656 /* Don't unlock before we could have locked. */
657 if (pc >= (unsigned long)__sys_cmpxchg_grab_lock) {
658 int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]);
659 __atomic_fault_unlock(lock_ptr);
660 }
661#endif
662 regs->sp = regs->regs[27];
663 }
664
665 /*
666 * We can also fault in the atomic assembly, in which
667 * case we use the exception table to do the first-level fixup.
668 * We may re-fixup again in the real fault handler if it
669 * turns out the faulting address is just bad, and not,
670 * for example, migrating.
671 */
672 else if (pc >= (unsigned long) __start_atomic_asm_code &&
673 pc < (unsigned long) __end_atomic_asm_code) {
674 const struct exception_table_entry *fixup;
675#ifdef CONFIG_SMP
676 /* Unlock the atomic lock. */
677 int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]);
678 __atomic_fault_unlock(lock_ptr);
679#endif
680 fixup = search_exception_tables(pc);
681 if (!fixup)
682 ics_panic("ICS atomic fault not in table:"
683 " PC %#lx, fault %d", pc, fault_num);
684 regs->pc = fixup->fixup;
685 regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
686 }
687
688 /*
689 * NOTE: the one other type of access that might bring us here
690 * are the memory ops in __tns_atomic_acquire/__tns_atomic_release,
691 * but we don't have to check specially for them since we can
692 * always safely return to the address of the fault and retry,
693 * since no separate atomic locks are involved.
694 */
695
696 /*
697 * Now that we have released the atomic lock (if necessary),
698 * it's safe to spin if the PTE that caused the fault was migrating.
699 */
700 if (fault_num == INT_DTLB_ACCESS)
701 write = 1;
702 if (handle_migrating_pte(pgd, fault_num, address, 1, write))
703 return state;
704
705 /* Return zero so that we continue on with normal fault handling. */
706 state.retval = 0;
707 return state;
708}
709
710#endif /* !__tilegx__ */
711
712/*
713 * This routine handles page faults. It determines the address, and the
714 * problem, and then passes it handle_page_fault() for normal DTLB and
715 * ITLB issues, and for DMA or SN processor faults when we are in user
716 * space. For the latter, if we're in kernel mode, we just save the
717 * interrupt away appropriately and return immediately. We can't do
718 * page faults for user code while in kernel mode.
719 */
720void do_page_fault(struct pt_regs *regs, int fault_num,
721 unsigned long address, unsigned long write)
722{
723 int is_page_fault;
724
725 /* This case should have been handled by do_page_fault_ics(). */
726 BUG_ON(write & ~1);
727
728#if CHIP_HAS_TILE_DMA()
729 /*
730 * If it's a DMA fault, suspend the transfer while we're
731 * handling the miss; we'll restart after it's handled. If we
732 * don't suspend, it's possible that this process could swap
733 * out and back in, and restart the engine since the DMA is
734 * still 'running'.
735 */
736 if (fault_num == INT_DMATLB_MISS ||
737 fault_num == INT_DMATLB_ACCESS ||
738 fault_num == INT_DMATLB_MISS_DWNCL ||
739 fault_num == INT_DMATLB_ACCESS_DWNCL) {
740 __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK);
741 while (__insn_mfspr(SPR_DMA_USER_STATUS) &
742 SPR_DMA_STATUS__BUSY_MASK)
743 ;
744 }
745#endif
746
747 /* Validate fault num and decide if this is a first-time page fault. */
748 switch (fault_num) {
749 case INT_ITLB_MISS:
750 case INT_DTLB_MISS:
751#if CHIP_HAS_TILE_DMA()
752 case INT_DMATLB_MISS:
753 case INT_DMATLB_MISS_DWNCL:
754#endif
755#if CHIP_HAS_SN_PROC()
756 case INT_SNITLB_MISS:
757 case INT_SNITLB_MISS_DWNCL:
758#endif
759 is_page_fault = 1;
760 break;
761
762 case INT_DTLB_ACCESS:
763#if CHIP_HAS_TILE_DMA()
764 case INT_DMATLB_ACCESS:
765 case INT_DMATLB_ACCESS_DWNCL:
766#endif
767 is_page_fault = 0;
768 break;
769
770 default:
771 panic("Bad fault number %d in do_page_fault", fault_num);
772 }
773
774 if (EX1_PL(regs->ex1) != USER_PL) {
775 struct async_tlb *async;
776 switch (fault_num) {
777#if CHIP_HAS_TILE_DMA()
778 case INT_DMATLB_MISS:
779 case INT_DMATLB_ACCESS:
780 case INT_DMATLB_MISS_DWNCL:
781 case INT_DMATLB_ACCESS_DWNCL:
782 async = &current->thread.dma_async_tlb;
783 break;
784#endif
785#if CHIP_HAS_SN_PROC()
786 case INT_SNITLB_MISS:
787 case INT_SNITLB_MISS_DWNCL:
788 async = &current->thread.sn_async_tlb;
789 break;
790#endif
791 default:
792 async = NULL;
793 }
794 if (async) {
795
796 /*
797 * No vmalloc check required, so we can allow
798 * interrupts immediately at this point.
799 */
800 local_irq_enable();
801
802 set_thread_flag(TIF_ASYNC_TLB);
803 if (async->fault_num != 0) {
804 panic("Second async fault %d;"
805 " old fault was %d (%#lx/%ld)",
806 fault_num, async->fault_num,
807 address, write);
808 }
809 BUG_ON(fault_num == 0);
810 async->fault_num = fault_num;
811 async->is_fault = is_page_fault;
812 async->is_write = write;
813 async->address = address;
814 return;
815 }
816 }
817
818 handle_page_fault(regs, fault_num, is_page_fault, address, write);
819}
820
821
822#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
823/*
824 * Check an async_tlb structure to see if a deferred fault is waiting,
825 * and if so pass it to the page-fault code.
826 */
827static void handle_async_page_fault(struct pt_regs *regs,
828 struct async_tlb *async)
829{
830 if (async->fault_num) {
831 /*
832 * Clear async->fault_num before calling the page-fault
833 * handler so that if we re-interrupt before returning
834 * from the function we have somewhere to put the
835 * information from the new interrupt.
836 */
837 int fault_num = async->fault_num;
838 async->fault_num = 0;
839 handle_page_fault(regs, fault_num, async->is_fault,
840 async->address, async->is_write);
841 }
842}
843#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
844
845
846/*
847 * This routine effectively re-issues asynchronous page faults
848 * when we are returning to user space.
849 */
850void do_async_page_fault(struct pt_regs *regs)
851{
852 /*
853 * Clear thread flag early. If we re-interrupt while processing
854 * code here, we will reset it and recall this routine before
855 * returning to user space.
856 */
857 clear_thread_flag(TIF_ASYNC_TLB);
858
859#if CHIP_HAS_TILE_DMA()
860 handle_async_page_fault(regs, &current->thread.dma_async_tlb);
861#endif
862#if CHIP_HAS_SN_PROC()
863 handle_async_page_fault(regs, &current->thread.sn_async_tlb);
864#endif
865}
866
867void vmalloc_sync_all(void)
868{
869#ifdef __tilegx__
870 /* Currently all L1 kernel pmd's are static and shared. */
871 BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START));
872#else
873 /*
874 * Note that races in the updates of insync and start aren't
875 * problematic: insync can only get set bits added, and updates to
876 * start are only improving performance (without affecting correctness
877 * if undone).
878 */
879 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
880 static unsigned long start = PAGE_OFFSET;
881 unsigned long address;
882
883 BUILD_BUG_ON(PAGE_OFFSET & ~PGDIR_MASK);
884 for (address = start; address >= PAGE_OFFSET; address += PGDIR_SIZE) {
885 if (!test_bit(pgd_index(address), insync)) {
886 unsigned long flags;
887 struct list_head *pos;
888
889 spin_lock_irqsave(&pgd_lock, flags);
890 list_for_each(pos, &pgd_list)
891 if (!vmalloc_sync_one(list_to_pgd(pos),
892 address)) {
893 /* Must be at first entry in list. */
894 BUG_ON(pos != pgd_list.next);
895 break;
896 }
897 spin_unlock_irqrestore(&pgd_lock, flags);
898 if (pos != pgd_list.next)
899 set_bit(pgd_index(address), insync);
900 }
901 if (address == start && test_bit(pgd_index(address), insync))
902 start = address + PGDIR_SIZE;
903 }
904#endif
905}