diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-06-23 20:53:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-06-23 20:53:16 -0400 |
commit | 94a6df251dd08c6436ebd6d10c68f03659148ce1 (patch) | |
tree | 51aca5d25d4bffa9860d8c1b1d96d3a64cd28a38 | |
parent | cd5545ae87ed9ca76dba7753b436713ce8df2872 (diff) | |
parent | 34f19ff1b5a0d11e46df479623d6936460105c9f (diff) |
Merge tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Michael Ellerman:
"Some more powerpc fixes for 4.12. Most of these actually came in last
week but got held up for some more testing.
- three fixes for kprobes/ftrace/livepatch interactions.
- properly handle data breakpoints when using the Radix MMU.
- fix for perf sampling of registers during call_usermodehelper().
- properly initialise the thread_info on our emergency stacks
- add an explicit flush when doing TLB invalidations for a process
using NPU2.
Thanks to: Alistair Popple, Naveen N. Rao, Nicholas Piggin, Ravi
Bangoria, Masami Hiramatsu"
* tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
powerpc/64: Initialise thread_info for emergency stacks
powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD
powerpc/perf: Fix oops when kthread execs user process
powerpc/64s: Handle data breakpoints in Radix mode
powerpc/kprobes: Skip livepatch_handler() for jprobes
powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS
powerpc/kprobes: Pause function_graph tracing during jprobes handling
-rw-r--r-- | arch/powerpc/include/asm/kprobes.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/exceptions-64s.S | 11 | ||||
-rw-r--r-- | arch/powerpc/kernel/kprobes.c | 17 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup_64.c | 31 | ||||
-rw-r--r-- | arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 59 | ||||
-rw-r--r-- | arch/powerpc/perf/perf_regs.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/npu-dma.c | 94 |
7 files changed, 166 insertions, 50 deletions
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index a83821f33ea3..8814a7249ceb 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h | |||
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, | |||
103 | extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); | 103 | extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); |
104 | extern int kprobe_handler(struct pt_regs *regs); | 104 | extern int kprobe_handler(struct pt_regs *regs); |
105 | extern int kprobe_post_handler(struct pt_regs *regs); | 105 | extern int kprobe_post_handler(struct pt_regs *regs); |
106 | extern int is_current_kprobe_addr(unsigned long addr); | ||
106 | #ifdef CONFIG_KPROBES_ON_FTRACE | 107 | #ifdef CONFIG_KPROBES_ON_FTRACE |
107 | extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, | 108 | extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, |
108 | struct kprobe_ctlblk *kcb); | 109 | struct kprobe_ctlblk *kcb); |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae418b85c17c..b886795060fd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION() | |||
1411 | .balign IFETCH_ALIGN_BYTES | 1411 | .balign IFETCH_ALIGN_BYTES |
1412 | do_hash_page: | 1412 | do_hash_page: |
1413 | #ifdef CONFIG_PPC_STD_MMU_64 | 1413 | #ifdef CONFIG_PPC_STD_MMU_64 |
1414 | andis. r0,r4,0xa410 /* weird error? */ | 1414 | andis. r0,r4,0xa450 /* weird error? */ |
1415 | bne- handle_page_fault /* if not, try to insert a HPTE */ | 1415 | bne- handle_page_fault /* if not, try to insert a HPTE */ |
1416 | andis. r0,r4,DSISR_DABRMATCH@h | ||
1417 | bne- handle_dabr_fault | ||
1418 | CURRENT_THREAD_INFO(r11, r1) | 1416 | CURRENT_THREAD_INFO(r11, r1) |
1419 | lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ | 1417 | lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ |
1420 | andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ | 1418 | andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ |
@@ -1438,11 +1436,16 @@ do_hash_page: | |||
1438 | 1436 | ||
1439 | /* Error */ | 1437 | /* Error */ |
1440 | blt- 13f | 1438 | blt- 13f |
1439 | |||
1440 | /* Reload DSISR into r4 for the DABR check below */ | ||
1441 | ld r4,_DSISR(r1) | ||
1441 | #endif /* CONFIG_PPC_STD_MMU_64 */ | 1442 | #endif /* CONFIG_PPC_STD_MMU_64 */ |
1442 | 1443 | ||
1443 | /* Here we have a page fault that hash_page can't handle. */ | 1444 | /* Here we have a page fault that hash_page can't handle. */ |
1444 | handle_page_fault: | 1445 | handle_page_fault: |
1445 | 11: ld r4,_DAR(r1) | 1446 | 11: andis. r0,r4,DSISR_DABRMATCH@h |
1447 | bne- handle_dabr_fault | ||
1448 | ld r4,_DAR(r1) | ||
1446 | ld r5,_DSISR(r1) | 1449 | ld r5,_DSISR(r1) |
1447 | addi r3,r1,STACK_FRAME_OVERHEAD | 1450 | addi r3,r1,STACK_FRAME_OVERHEAD |
1448 | bl do_page_fault | 1451 | bl do_page_fault |
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fc4343514bed..01addfb0ed0a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c | |||
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | |||
43 | 43 | ||
44 | struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; | 44 | struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; |
45 | 45 | ||
46 | int is_current_kprobe_addr(unsigned long addr) | ||
47 | { | ||
48 | struct kprobe *p = kprobe_running(); | ||
49 | return (p && (unsigned long)p->addr == addr) ? 1 : 0; | ||
50 | } | ||
51 | |||
46 | bool arch_within_kprobe_blacklist(unsigned long addr) | 52 | bool arch_within_kprobe_blacklist(unsigned long addr) |
47 | { | 53 | { |
48 | return (addr >= (unsigned long)__kprobes_text_start && | 54 | return (addr >= (unsigned long)__kprobes_text_start && |
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
617 | regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); | 623 | regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); |
618 | #endif | 624 | #endif |
619 | 625 | ||
626 | /* | ||
627 | * jprobes use jprobe_return() which skips the normal return | ||
628 | * path of the function, and this messes up the accounting of the | ||
629 | * function graph tracer. | ||
630 | * | ||
631 | * Pause function graph tracing while performing the jprobe function. | ||
632 | */ | ||
633 | pause_graph_tracing(); | ||
634 | |||
620 | return 1; | 635 | return 1; |
621 | } | 636 | } |
622 | NOKPROBE_SYMBOL(setjmp_pre_handler); | 637 | NOKPROBE_SYMBOL(setjmp_pre_handler); |
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
642 | * saved regs... | 657 | * saved regs... |
643 | */ | 658 | */ |
644 | memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); | 659 | memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); |
660 | /* It's OK to start function graph tracing again */ | ||
661 | unpause_graph_tracing(); | ||
645 | preempt_enable_no_resched(); | 662 | preempt_enable_no_resched(); |
646 | return 1; | 663 | return 1; |
647 | } | 664 | } |
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a8c1f99e9607..4640f6d64f8b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c | |||
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void) | |||
616 | #endif | 616 | #endif |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * Emergency stacks are used for a range of things, from asynchronous | ||
620 | * NMIs (system reset, machine check) to synchronous, process context. | ||
621 | * We set preempt_count to zero, even though that isn't necessarily correct. To | ||
622 | * get the right value we'd need to copy it from the previous thread_info, but | ||
623 | * doing that might fault causing more problems. | ||
624 | * TODO: what to do with accounting? | ||
625 | */ | ||
626 | static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) | ||
627 | { | ||
628 | ti->task = NULL; | ||
629 | ti->cpu = cpu; | ||
630 | ti->preempt_count = 0; | ||
631 | ti->local_flags = 0; | ||
632 | ti->flags = 0; | ||
633 | klp_init_thread_info(ti); | ||
634 | } | ||
635 | |||
636 | /* | ||
619 | * Stack space used when we detect a bad kernel stack pointer, and | 637 | * Stack space used when we detect a bad kernel stack pointer, and |
620 | * early in SMP boots before relocation is enabled. Exclusive emergency | 638 | * early in SMP boots before relocation is enabled. Exclusive emergency |
621 | * stack for machine checks. | 639 | * stack for machine checks. |
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void) | |||
633 | * Since we use these as temporary stacks during secondary CPU | 651 | * Since we use these as temporary stacks during secondary CPU |
634 | * bringup, we need to get at them in real mode. This means they | 652 | * bringup, we need to get at them in real mode. This means they |
635 | * must also be within the RMO region. | 653 | * must also be within the RMO region. |
654 | * | ||
655 | * The IRQ stacks allocated elsewhere in this file are zeroed and | ||
656 | * initialized in kernel/irq.c. These are initialized here in order | ||
657 | * to have emergency stacks available as early as possible. | ||
636 | */ | 658 | */ |
637 | limit = min(safe_stack_limit(), ppc64_rma_size); | 659 | limit = min(safe_stack_limit(), ppc64_rma_size); |
638 | 660 | ||
639 | for_each_possible_cpu(i) { | 661 | for_each_possible_cpu(i) { |
640 | struct thread_info *ti; | 662 | struct thread_info *ti; |
641 | ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); | 663 | ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); |
642 | klp_init_thread_info(ti); | 664 | memset(ti, 0, THREAD_SIZE); |
665 | emerg_stack_init_thread_info(ti, i); | ||
643 | paca[i].emergency_sp = (void *)ti + THREAD_SIZE; | 666 | paca[i].emergency_sp = (void *)ti + THREAD_SIZE; |
644 | 667 | ||
645 | #ifdef CONFIG_PPC_BOOK3S_64 | 668 | #ifdef CONFIG_PPC_BOOK3S_64 |
646 | /* emergency stack for NMI exception handling. */ | 669 | /* emergency stack for NMI exception handling. */ |
647 | ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); | 670 | ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); |
648 | klp_init_thread_info(ti); | 671 | memset(ti, 0, THREAD_SIZE); |
672 | emerg_stack_init_thread_info(ti, i); | ||
649 | paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; | 673 | paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; |
650 | 674 | ||
651 | /* emergency stack for machine check exception handling. */ | 675 | /* emergency stack for machine check exception handling. */ |
652 | ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); | 676 | ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); |
653 | klp_init_thread_info(ti); | 677 | memset(ti, 0, THREAD_SIZE); |
678 | emerg_stack_init_thread_info(ti, i); | ||
654 | paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; | 679 | paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; |
655 | #endif | 680 | #endif |
656 | } | 681 | } |
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 7c933a99f5d5..c98e90b4ea7b 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S | |||
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller) | |||
45 | stdu r1,-SWITCH_FRAME_SIZE(r1) | 45 | stdu r1,-SWITCH_FRAME_SIZE(r1) |
46 | 46 | ||
47 | /* Save all gprs to pt_regs */ | 47 | /* Save all gprs to pt_regs */ |
48 | SAVE_8GPRS(0,r1) | 48 | SAVE_GPR(0, r1) |
49 | SAVE_8GPRS(8,r1) | 49 | SAVE_10GPRS(2, r1) |
50 | SAVE_8GPRS(16,r1) | 50 | SAVE_10GPRS(12, r1) |
51 | SAVE_8GPRS(24,r1) | 51 | SAVE_10GPRS(22, r1) |
52 | |||
53 | /* Save previous stack pointer (r1) */ | ||
54 | addi r8, r1, SWITCH_FRAME_SIZE | ||
55 | std r8, GPR1(r1) | ||
52 | 56 | ||
53 | /* Load special regs for save below */ | 57 | /* Load special regs for save below */ |
54 | mfmsr r8 | 58 | mfmsr r8 |
@@ -95,18 +99,44 @@ ftrace_call: | |||
95 | bl ftrace_stub | 99 | bl ftrace_stub |
96 | nop | 100 | nop |
97 | 101 | ||
98 | /* Load ctr with the possibly modified NIP */ | 102 | /* Load the possibly modified NIP */ |
99 | ld r3, _NIP(r1) | 103 | ld r15, _NIP(r1) |
100 | mtctr r3 | 104 | |
101 | #ifdef CONFIG_LIVEPATCH | 105 | #ifdef CONFIG_LIVEPATCH |
102 | cmpd r14,r3 /* has NIP been altered? */ | 106 | cmpd r14, r15 /* has NIP been altered? */ |
107 | #endif | ||
108 | |||
109 | #if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE) | ||
110 | /* NIP has not been altered, skip over further checks */ | ||
111 | beq 1f | ||
112 | |||
113 | /* Check if there is an active kprobe on us */ | ||
114 | subi r3, r14, 4 | ||
115 | bl is_current_kprobe_addr | ||
116 | nop | ||
117 | |||
118 | /* | ||
119 | * If r3 == 1, then this is a kprobe/jprobe. | ||
120 | * else, this is livepatched function. | ||
121 | * | ||
122 | * The conditional branch for livepatch_handler below will use the | ||
123 | * result of this comparison. For kprobe/jprobe, we just need to branch to | ||
124 | * the new NIP, not call livepatch_handler. The branch below is bne, so we | ||
125 | * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want | ||
126 | * CR0[EQ] = (r3 == 1). | ||
127 | */ | ||
128 | cmpdi r3, 1 | ||
129 | 1: | ||
103 | #endif | 130 | #endif |
104 | 131 | ||
132 | /* Load CTR with the possibly modified NIP */ | ||
133 | mtctr r15 | ||
134 | |||
105 | /* Restore gprs */ | 135 | /* Restore gprs */ |
106 | REST_8GPRS(0,r1) | 136 | REST_GPR(0,r1) |
107 | REST_8GPRS(8,r1) | 137 | REST_10GPRS(2,r1) |
108 | REST_8GPRS(16,r1) | 138 | REST_10GPRS(12,r1) |
109 | REST_8GPRS(24,r1) | 139 | REST_10GPRS(22,r1) |
110 | 140 | ||
111 | /* Restore possibly modified LR */ | 141 | /* Restore possibly modified LR */ |
112 | ld r0, _LINK(r1) | 142 | ld r0, _LINK(r1) |
@@ -119,7 +149,10 @@ ftrace_call: | |||
119 | addi r1, r1, SWITCH_FRAME_SIZE | 149 | addi r1, r1, SWITCH_FRAME_SIZE |
120 | 150 | ||
121 | #ifdef CONFIG_LIVEPATCH | 151 | #ifdef CONFIG_LIVEPATCH |
122 | /* Based on the cmpd above, if the NIP was altered handle livepatch */ | 152 | /* |
153 | * Based on the cmpd or cmpdi above, if the NIP was altered and we're | ||
154 | * not on a kprobe/jprobe, then handle livepatch. | ||
155 | */ | ||
123 | bne- livepatch_handler | 156 | bne- livepatch_handler |
124 | #endif | 157 | #endif |
125 | 158 | ||
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index cbd82fde5770..09ceea6175ba 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c | |||
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user, | |||
101 | struct pt_regs *regs_user_copy) | 101 | struct pt_regs *regs_user_copy) |
102 | { | 102 | { |
103 | regs_user->regs = task_pt_regs(current); | 103 | regs_user->regs = task_pt_regs(current); |
104 | regs_user->abi = perf_reg_abi(current); | 104 | regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : |
105 | PERF_SAMPLE_REGS_ABI_NONE; | ||
105 | } | 106 | } |
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index e6f444b46207..b5d960d6db3d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c | |||
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, | |||
449 | return mmio_atsd_reg; | 449 | return mmio_atsd_reg; |
450 | } | 450 | } |
451 | 451 | ||
452 | static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) | 452 | static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) |
453 | { | 453 | { |
454 | unsigned long launch; | 454 | unsigned long launch; |
455 | 455 | ||
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) | |||
465 | /* PID */ | 465 | /* PID */ |
466 | launch |= pid << PPC_BITLSHIFT(38); | 466 | launch |= pid << PPC_BITLSHIFT(38); |
467 | 467 | ||
468 | /* No flush */ | ||
469 | launch |= !flush << PPC_BITLSHIFT(39); | ||
470 | |||
468 | /* Invalidating the entire process doesn't use a va */ | 471 | /* Invalidating the entire process doesn't use a va */ |
469 | return mmio_launch_invalidate(npu, launch, 0); | 472 | return mmio_launch_invalidate(npu, launch, 0); |
470 | } | 473 | } |
471 | 474 | ||
472 | static int mmio_invalidate_va(struct npu *npu, unsigned long va, | 475 | static int mmio_invalidate_va(struct npu *npu, unsigned long va, |
473 | unsigned long pid) | 476 | unsigned long pid, bool flush) |
474 | { | 477 | { |
475 | unsigned long launch; | 478 | unsigned long launch; |
476 | 479 | ||
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, | |||
486 | /* PID */ | 489 | /* PID */ |
487 | launch |= pid << PPC_BITLSHIFT(38); | 490 | launch |= pid << PPC_BITLSHIFT(38); |
488 | 491 | ||
492 | /* No flush */ | ||
493 | launch |= !flush << PPC_BITLSHIFT(39); | ||
494 | |||
489 | return mmio_launch_invalidate(npu, launch, va); | 495 | return mmio_launch_invalidate(npu, launch, va); |
490 | } | 496 | } |
491 | 497 | ||
492 | #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) | 498 | #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) |
493 | 499 | ||
500 | struct mmio_atsd_reg { | ||
501 | struct npu *npu; | ||
502 | int reg; | ||
503 | }; | ||
504 | |||
505 | static void mmio_invalidate_wait( | ||
506 | struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) | ||
507 | { | ||
508 | struct npu *npu; | ||
509 | int i, reg; | ||
510 | |||
511 | /* Wait for all invalidations to complete */ | ||
512 | for (i = 0; i <= max_npu2_index; i++) { | ||
513 | if (mmio_atsd_reg[i].reg < 0) | ||
514 | continue; | ||
515 | |||
516 | /* Wait for completion */ | ||
517 | npu = mmio_atsd_reg[i].npu; | ||
518 | reg = mmio_atsd_reg[i].reg; | ||
519 | while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) | ||
520 | cpu_relax(); | ||
521 | |||
522 | put_mmio_atsd_reg(npu, reg); | ||
523 | |||
524 | /* | ||
525 | * The GPU requires two flush ATSDs to ensure all entries have | ||
526 | * been flushed. We use PID 0 as it will never be used for a | ||
527 | * process on the GPU. | ||
528 | */ | ||
529 | if (flush) | ||
530 | mmio_invalidate_pid(npu, 0, true); | ||
531 | } | ||
532 | } | ||
533 | |||
494 | /* | 534 | /* |
495 | * Invalidate either a single address or an entire PID depending on | 535 | * Invalidate either a single address or an entire PID depending on |
496 | * the value of va. | 536 | * the value of va. |
497 | */ | 537 | */ |
498 | static void mmio_invalidate(struct npu_context *npu_context, int va, | 538 | static void mmio_invalidate(struct npu_context *npu_context, int va, |
499 | unsigned long address) | 539 | unsigned long address, bool flush) |
500 | { | 540 | { |
501 | int i, j, reg; | 541 | int i, j; |
502 | struct npu *npu; | 542 | struct npu *npu; |
503 | struct pnv_phb *nphb; | 543 | struct pnv_phb *nphb; |
504 | struct pci_dev *npdev; | 544 | struct pci_dev *npdev; |
505 | struct { | 545 | struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; |
506 | struct npu *npu; | ||
507 | int reg; | ||
508 | } mmio_atsd_reg[NV_MAX_NPUS]; | ||
509 | unsigned long pid = npu_context->mm->context.id; | 546 | unsigned long pid = npu_context->mm->context.id; |
510 | 547 | ||
511 | /* | 548 | /* |
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, | |||
525 | 562 | ||
526 | if (va) | 563 | if (va) |
527 | mmio_atsd_reg[i].reg = | 564 | mmio_atsd_reg[i].reg = |
528 | mmio_invalidate_va(npu, address, pid); | 565 | mmio_invalidate_va(npu, address, pid, |
566 | flush); | ||
529 | else | 567 | else |
530 | mmio_atsd_reg[i].reg = | 568 | mmio_atsd_reg[i].reg = |
531 | mmio_invalidate_pid(npu, pid); | 569 | mmio_invalidate_pid(npu, pid, flush); |
532 | 570 | ||
533 | /* | 571 | /* |
534 | * The NPU hardware forwards the shootdown to all GPUs | 572 | * The NPU hardware forwards the shootdown to all GPUs |
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, | |||
544 | */ | 582 | */ |
545 | flush_tlb_mm(npu_context->mm); | 583 | flush_tlb_mm(npu_context->mm); |
546 | 584 | ||
547 | /* Wait for all invalidations to complete */ | 585 | mmio_invalidate_wait(mmio_atsd_reg, flush); |
548 | for (i = 0; i <= max_npu2_index; i++) { | 586 | if (flush) |
549 | if (mmio_atsd_reg[i].reg < 0) | 587 | /* Wait for the flush to complete */ |
550 | continue; | 588 | mmio_invalidate_wait(mmio_atsd_reg, false); |
551 | |||
552 | /* Wait for completion */ | ||
553 | npu = mmio_atsd_reg[i].npu; | ||
554 | reg = mmio_atsd_reg[i].reg; | ||
555 | while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) | ||
556 | cpu_relax(); | ||
557 | put_mmio_atsd_reg(npu, reg); | ||
558 | } | ||
559 | } | 589 | } |
560 | 590 | ||
561 | static void pnv_npu2_mn_release(struct mmu_notifier *mn, | 591 | static void pnv_npu2_mn_release(struct mmu_notifier *mn, |
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, | |||
571 | * There should be no more translation requests for this PID, but we | 601 | * There should be no more translation requests for this PID, but we |
572 | * need to ensure any entries for it are removed from the TLB. | 602 | * need to ensure any entries for it are removed from the TLB. |
573 | */ | 603 | */ |
574 | mmio_invalidate(npu_context, 0, 0); | 604 | mmio_invalidate(npu_context, 0, 0, true); |
575 | } | 605 | } |
576 | 606 | ||
577 | static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, | 607 | static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, |
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, | |||
581 | { | 611 | { |
582 | struct npu_context *npu_context = mn_to_npu_context(mn); | 612 | struct npu_context *npu_context = mn_to_npu_context(mn); |
583 | 613 | ||
584 | mmio_invalidate(npu_context, 1, address); | 614 | mmio_invalidate(npu_context, 1, address, true); |
585 | } | 615 | } |
586 | 616 | ||
587 | static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, | 617 | static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, |
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, | |||
590 | { | 620 | { |
591 | struct npu_context *npu_context = mn_to_npu_context(mn); | 621 | struct npu_context *npu_context = mn_to_npu_context(mn); |
592 | 622 | ||
593 | mmio_invalidate(npu_context, 1, address); | 623 | mmio_invalidate(npu_context, 1, address, true); |
594 | } | 624 | } |
595 | 625 | ||
596 | static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, | 626 | static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, |
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, | |||
600 | struct npu_context *npu_context = mn_to_npu_context(mn); | 630 | struct npu_context *npu_context = mn_to_npu_context(mn); |
601 | unsigned long address; | 631 | unsigned long address; |
602 | 632 | ||
603 | for (address = start; address <= end; address += PAGE_SIZE) | 633 | for (address = start; address < end; address += PAGE_SIZE) |
604 | mmio_invalidate(npu_context, 1, address); | 634 | mmio_invalidate(npu_context, 1, address, false); |
635 | |||
636 | /* Do the flush only on the final addess == end */ | ||
637 | mmio_invalidate(npu_context, 1, address, true); | ||
605 | } | 638 | } |
606 | 639 | ||
607 | static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { | 640 | static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { |
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, | |||
651 | /* No nvlink associated with this GPU device */ | 684 | /* No nvlink associated with this GPU device */ |
652 | return ERR_PTR(-ENODEV); | 685 | return ERR_PTR(-ENODEV); |
653 | 686 | ||
654 | if (!mm) { | 687 | if (!mm || mm->context.id == 0) { |
655 | /* kernel thread contexts are not supported */ | 688 | /* |
689 | * Kernel thread contexts are not supported and context id 0 is | ||
690 | * reserved on the GPU. | ||
691 | */ | ||
656 | return ERR_PTR(-EINVAL); | 692 | return ERR_PTR(-EINVAL); |
657 | } | 693 | } |
658 | 694 | ||