aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-06-23 20:53:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-06-23 20:53:16 -0400
commit94a6df251dd08c6436ebd6d10c68f03659148ce1 (patch)
tree51aca5d25d4bffa9860d8c1b1d96d3a64cd28a38
parentcd5545ae87ed9ca76dba7753b436713ce8df2872 (diff)
parent34f19ff1b5a0d11e46df479623d6936460105c9f (diff)
Merge tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Michael Ellerman: "Some more powerpc fixes for 4.12. Most of these actually came in last week but got held up for some more testing. - three fixes for kprobes/ftrace/livepatch interactions. - properly handle data breakpoints when using the Radix MMU. - fix for perf sampling of registers during call_usermodehelper(). - properly initialise the thread_info on our emergency stacks - add an explicit flush when doing TLB invalidations for a process using NPU2. Thanks to: Alistair Popple, Naveen N. Rao, Nicholas Piggin, Ravi Bangoria, Masami Hiramatsu" * tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/64: Initialise thread_info for emergency stacks powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD powerpc/perf: Fix oops when kthread execs user process powerpc/64s: Handle data breakpoints in Radix mode powerpc/kprobes: Skip livepatch_handler() for jprobes powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS powerpc/kprobes: Pause function_graph tracing during jprobes handling
-rw-r--r--arch/powerpc/include/asm/kprobes.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S11
-rw-r--r--arch/powerpc/kernel/kprobes.c17
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S59
-rw-r--r--arch/powerpc/perf/perf_regs.c3
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c94
7 files changed, 166 insertions, 50 deletions
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
103extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); 103extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
104extern int kprobe_handler(struct pt_regs *regs); 104extern int kprobe_handler(struct pt_regs *regs);
105extern int kprobe_post_handler(struct pt_regs *regs); 105extern int kprobe_post_handler(struct pt_regs *regs);
106extern int is_current_kprobe_addr(unsigned long addr);
106#ifdef CONFIG_KPROBES_ON_FTRACE 107#ifdef CONFIG_KPROBES_ON_FTRACE
107extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, 108extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
108 struct kprobe_ctlblk *kcb); 109 struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
1411 .balign IFETCH_ALIGN_BYTES 1411 .balign IFETCH_ALIGN_BYTES
1412do_hash_page: 1412do_hash_page:
1413#ifdef CONFIG_PPC_STD_MMU_64 1413#ifdef CONFIG_PPC_STD_MMU_64
1414 andis. r0,r4,0xa410 /* weird error? */ 1414 andis. r0,r4,0xa450 /* weird error? */
1415 bne- handle_page_fault /* if not, try to insert a HPTE */ 1415 bne- handle_page_fault /* if not, try to insert a HPTE */
1416 andis. r0,r4,DSISR_DABRMATCH@h
1417 bne- handle_dabr_fault
1418 CURRENT_THREAD_INFO(r11, r1) 1416 CURRENT_THREAD_INFO(r11, r1)
1419 lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ 1417 lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
1420 andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ 1418 andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
1438 1436
1439 /* Error */ 1437 /* Error */
1440 blt- 13f 1438 blt- 13f
1439
1440 /* Reload DSISR into r4 for the DABR check below */
1441 ld r4,_DSISR(r1)
1441#endif /* CONFIG_PPC_STD_MMU_64 */ 1442#endif /* CONFIG_PPC_STD_MMU_64 */
1442 1443
1443/* Here we have a page fault that hash_page can't handle. */ 1444/* Here we have a page fault that hash_page can't handle. */
1444handle_page_fault: 1445handle_page_fault:
144511: ld r4,_DAR(r1) 144611: andis. r0,r4,DSISR_DABRMATCH@h
1447 bne- handle_dabr_fault
1448 ld r4,_DAR(r1)
1446 ld r5,_DSISR(r1) 1449 ld r5,_DSISR(r1)
1447 addi r3,r1,STACK_FRAME_OVERHEAD 1450 addi r3,r1,STACK_FRAME_OVERHEAD
1448 bl do_page_fault 1451 bl do_page_fault
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
43 43
44struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; 44struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
45 45
46int is_current_kprobe_addr(unsigned long addr)
47{
48 struct kprobe *p = kprobe_running();
49 return (p && (unsigned long)p->addr == addr) ? 1 : 0;
50}
51
46bool arch_within_kprobe_blacklist(unsigned long addr) 52bool arch_within_kprobe_blacklist(unsigned long addr)
47{ 53{
48 return (addr >= (unsigned long)__kprobes_text_start && 54 return (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
617 regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); 623 regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
618#endif 624#endif
619 625
626 /*
627 * jprobes use jprobe_return() which skips the normal return
628 * path of the function, and this messes up the accounting of the
629 * function graph tracer.
630 *
631 * Pause function graph tracing while performing the jprobe function.
632 */
633 pause_graph_tracing();
634
620 return 1; 635 return 1;
621} 636}
622NOKPROBE_SYMBOL(setjmp_pre_handler); 637NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
642 * saved regs... 657 * saved regs...
643 */ 658 */
644 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); 659 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
660 /* It's OK to start function graph tracing again */
661 unpause_graph_tracing();
645 preempt_enable_no_resched(); 662 preempt_enable_no_resched();
646 return 1; 663 return 1;
647} 664}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a8c1f99e9607..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
616#endif 616#endif
617 617
618/* 618/*
619 * Emergency stacks are used for a range of things, from asynchronous
620 * NMIs (system reset, machine check) to synchronous, process context.
621 * We set preempt_count to zero, even though that isn't necessarily correct. To
622 * get the right value we'd need to copy it from the previous thread_info, but
623 * doing that might fault causing more problems.
624 * TODO: what to do with accounting?
625 */
626static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
627{
628 ti->task = NULL;
629 ti->cpu = cpu;
630 ti->preempt_count = 0;
631 ti->local_flags = 0;
632 ti->flags = 0;
633 klp_init_thread_info(ti);
634}
635
636/*
619 * Stack space used when we detect a bad kernel stack pointer, and 637 * Stack space used when we detect a bad kernel stack pointer, and
620 * early in SMP boots before relocation is enabled. Exclusive emergency 638 * early in SMP boots before relocation is enabled. Exclusive emergency
621 * stack for machine checks. 639 * stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
633 * Since we use these as temporary stacks during secondary CPU 651 * Since we use these as temporary stacks during secondary CPU
634 * bringup, we need to get at them in real mode. This means they 652 * bringup, we need to get at them in real mode. This means they
635 * must also be within the RMO region. 653 * must also be within the RMO region.
654 *
655 * The IRQ stacks allocated elsewhere in this file are zeroed and
656 * initialized in kernel/irq.c. These are initialized here in order
657 * to have emergency stacks available as early as possible.
636 */ 658 */
637 limit = min(safe_stack_limit(), ppc64_rma_size); 659 limit = min(safe_stack_limit(), ppc64_rma_size);
638 660
639 for_each_possible_cpu(i) { 661 for_each_possible_cpu(i) {
640 struct thread_info *ti; 662 struct thread_info *ti;
641 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 663 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
642 klp_init_thread_info(ti); 664 memset(ti, 0, THREAD_SIZE);
665 emerg_stack_init_thread_info(ti, i);
643 paca[i].emergency_sp = (void *)ti + THREAD_SIZE; 666 paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
644 667
645#ifdef CONFIG_PPC_BOOK3S_64 668#ifdef CONFIG_PPC_BOOK3S_64
646 /* emergency stack for NMI exception handling. */ 669 /* emergency stack for NMI exception handling. */
647 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 670 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
648 klp_init_thread_info(ti); 671 memset(ti, 0, THREAD_SIZE);
672 emerg_stack_init_thread_info(ti, i);
649 paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; 673 paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
650 674
651 /* emergency stack for machine check exception handling. */ 675 /* emergency stack for machine check exception handling. */
652 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 676 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
653 klp_init_thread_info(ti); 677 memset(ti, 0, THREAD_SIZE);
678 emerg_stack_init_thread_info(ti, i);
654 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; 679 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
655#endif 680#endif
656 } 681 }
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
45 stdu r1,-SWITCH_FRAME_SIZE(r1) 45 stdu r1,-SWITCH_FRAME_SIZE(r1)
46 46
47 /* Save all gprs to pt_regs */ 47 /* Save all gprs to pt_regs */
48 SAVE_8GPRS(0,r1) 48 SAVE_GPR(0, r1)
49 SAVE_8GPRS(8,r1) 49 SAVE_10GPRS(2, r1)
50 SAVE_8GPRS(16,r1) 50 SAVE_10GPRS(12, r1)
51 SAVE_8GPRS(24,r1) 51 SAVE_10GPRS(22, r1)
52
53 /* Save previous stack pointer (r1) */
54 addi r8, r1, SWITCH_FRAME_SIZE
55 std r8, GPR1(r1)
52 56
53 /* Load special regs for save below */ 57 /* Load special regs for save below */
54 mfmsr r8 58 mfmsr r8
@@ -95,18 +99,44 @@ ftrace_call:
95 bl ftrace_stub 99 bl ftrace_stub
96 nop 100 nop
97 101
98 /* Load ctr with the possibly modified NIP */ 102 /* Load the possibly modified NIP */
99 ld r3, _NIP(r1) 103 ld r15, _NIP(r1)
100 mtctr r3 104
101#ifdef CONFIG_LIVEPATCH 105#ifdef CONFIG_LIVEPATCH
102 cmpd r14,r3 /* has NIP been altered? */ 106 cmpd r14, r15 /* has NIP been altered? */
107#endif
108
109#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
110 /* NIP has not been altered, skip over further checks */
111 beq 1f
112
113 /* Check if there is an active kprobe on us */
114 subi r3, r14, 4
115 bl is_current_kprobe_addr
116 nop
117
118 /*
119 * If r3 == 1, then this is a kprobe/jprobe.
120 * else, this is livepatched function.
121 *
122 * The conditional branch for livepatch_handler below will use the
123 * result of this comparison. For kprobe/jprobe, we just need to branch to
124 * the new NIP, not call livepatch_handler. The branch below is bne, so we
125 * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
126 * CR0[EQ] = (r3 == 1).
127 */
128 cmpdi r3, 1
1291:
103#endif 130#endif
104 131
132 /* Load CTR with the possibly modified NIP */
133 mtctr r15
134
105 /* Restore gprs */ 135 /* Restore gprs */
106 REST_8GPRS(0,r1) 136 REST_GPR(0,r1)
107 REST_8GPRS(8,r1) 137 REST_10GPRS(2,r1)
108 REST_8GPRS(16,r1) 138 REST_10GPRS(12,r1)
109 REST_8GPRS(24,r1) 139 REST_10GPRS(22,r1)
110 140
111 /* Restore possibly modified LR */ 141 /* Restore possibly modified LR */
112 ld r0, _LINK(r1) 142 ld r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
119 addi r1, r1, SWITCH_FRAME_SIZE 149 addi r1, r1, SWITCH_FRAME_SIZE
120 150
121#ifdef CONFIG_LIVEPATCH 151#ifdef CONFIG_LIVEPATCH
122 /* Based on the cmpd above, if the NIP was altered handle livepatch */ 152 /*
153 * Based on the cmpd or cmpdi above, if the NIP was altered and we're
154 * not on a kprobe/jprobe, then handle livepatch.
155 */
123 bne- livepatch_handler 156 bne- livepatch_handler
124#endif 157#endif
125 158
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
101 struct pt_regs *regs_user_copy) 101 struct pt_regs *regs_user_copy)
102{ 102{
103 regs_user->regs = task_pt_regs(current); 103 regs_user->regs = task_pt_regs(current);
104 regs_user->abi = perf_reg_abi(current); 104 regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
105 PERF_SAMPLE_REGS_ABI_NONE;
105} 106}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
449 return mmio_atsd_reg; 449 return mmio_atsd_reg;
450} 450}
451 451
452static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) 452static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
453{ 453{
454 unsigned long launch; 454 unsigned long launch;
455 455
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
465 /* PID */ 465 /* PID */
466 launch |= pid << PPC_BITLSHIFT(38); 466 launch |= pid << PPC_BITLSHIFT(38);
467 467
468 /* No flush */
469 launch |= !flush << PPC_BITLSHIFT(39);
470
468 /* Invalidating the entire process doesn't use a va */ 471 /* Invalidating the entire process doesn't use a va */
469 return mmio_launch_invalidate(npu, launch, 0); 472 return mmio_launch_invalidate(npu, launch, 0);
470} 473}
471 474
472static int mmio_invalidate_va(struct npu *npu, unsigned long va, 475static int mmio_invalidate_va(struct npu *npu, unsigned long va,
473 unsigned long pid) 476 unsigned long pid, bool flush)
474{ 477{
475 unsigned long launch; 478 unsigned long launch;
476 479
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
486 /* PID */ 489 /* PID */
487 launch |= pid << PPC_BITLSHIFT(38); 490 launch |= pid << PPC_BITLSHIFT(38);
488 491
492 /* No flush */
493 launch |= !flush << PPC_BITLSHIFT(39);
494
489 return mmio_launch_invalidate(npu, launch, va); 495 return mmio_launch_invalidate(npu, launch, va);
490} 496}
491 497
492#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) 498#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
493 499
500struct mmio_atsd_reg {
501 struct npu *npu;
502 int reg;
503};
504
505static void mmio_invalidate_wait(
506 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
507{
508 struct npu *npu;
509 int i, reg;
510
511 /* Wait for all invalidations to complete */
512 for (i = 0; i <= max_npu2_index; i++) {
513 if (mmio_atsd_reg[i].reg < 0)
514 continue;
515
516 /* Wait for completion */
517 npu = mmio_atsd_reg[i].npu;
518 reg = mmio_atsd_reg[i].reg;
519 while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
520 cpu_relax();
521
522 put_mmio_atsd_reg(npu, reg);
523
524 /*
525 * The GPU requires two flush ATSDs to ensure all entries have
526 * been flushed. We use PID 0 as it will never be used for a
527 * process on the GPU.
528 */
529 if (flush)
530 mmio_invalidate_pid(npu, 0, true);
531 }
532}
533
494/* 534/*
495 * Invalidate either a single address or an entire PID depending on 535 * Invalidate either a single address or an entire PID depending on
496 * the value of va. 536 * the value of va.
497 */ 537 */
498static void mmio_invalidate(struct npu_context *npu_context, int va, 538static void mmio_invalidate(struct npu_context *npu_context, int va,
499 unsigned long address) 539 unsigned long address, bool flush)
500{ 540{
501 int i, j, reg; 541 int i, j;
502 struct npu *npu; 542 struct npu *npu;
503 struct pnv_phb *nphb; 543 struct pnv_phb *nphb;
504 struct pci_dev *npdev; 544 struct pci_dev *npdev;
505 struct { 545 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
506 struct npu *npu;
507 int reg;
508 } mmio_atsd_reg[NV_MAX_NPUS];
509 unsigned long pid = npu_context->mm->context.id; 546 unsigned long pid = npu_context->mm->context.id;
510 547
511 /* 548 /*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
525 562
526 if (va) 563 if (va)
527 mmio_atsd_reg[i].reg = 564 mmio_atsd_reg[i].reg =
528 mmio_invalidate_va(npu, address, pid); 565 mmio_invalidate_va(npu, address, pid,
566 flush);
529 else 567 else
530 mmio_atsd_reg[i].reg = 568 mmio_atsd_reg[i].reg =
531 mmio_invalidate_pid(npu, pid); 569 mmio_invalidate_pid(npu, pid, flush);
532 570
533 /* 571 /*
534 * The NPU hardware forwards the shootdown to all GPUs 572 * The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
544 */ 582 */
545 flush_tlb_mm(npu_context->mm); 583 flush_tlb_mm(npu_context->mm);
546 584
547 /* Wait for all invalidations to complete */ 585 mmio_invalidate_wait(mmio_atsd_reg, flush);
548 for (i = 0; i <= max_npu2_index; i++) { 586 if (flush)
549 if (mmio_atsd_reg[i].reg < 0) 587 /* Wait for the flush to complete */
550 continue; 588 mmio_invalidate_wait(mmio_atsd_reg, false);
551
552 /* Wait for completion */
553 npu = mmio_atsd_reg[i].npu;
554 reg = mmio_atsd_reg[i].reg;
555 while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
556 cpu_relax();
557 put_mmio_atsd_reg(npu, reg);
558 }
559} 589}
560 590
561static void pnv_npu2_mn_release(struct mmu_notifier *mn, 591static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
571 * There should be no more translation requests for this PID, but we 601 * There should be no more translation requests for this PID, but we
572 * need to ensure any entries for it are removed from the TLB. 602 * need to ensure any entries for it are removed from the TLB.
573 */ 603 */
574 mmio_invalidate(npu_context, 0, 0); 604 mmio_invalidate(npu_context, 0, 0, true);
575} 605}
576 606
577static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, 607static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
581{ 611{
582 struct npu_context *npu_context = mn_to_npu_context(mn); 612 struct npu_context *npu_context = mn_to_npu_context(mn);
583 613
584 mmio_invalidate(npu_context, 1, address); 614 mmio_invalidate(npu_context, 1, address, true);
585} 615}
586 616
587static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, 617static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
590{ 620{
591 struct npu_context *npu_context = mn_to_npu_context(mn); 621 struct npu_context *npu_context = mn_to_npu_context(mn);
592 622
593 mmio_invalidate(npu_context, 1, address); 623 mmio_invalidate(npu_context, 1, address, true);
594} 624}
595 625
596static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, 626static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
600 struct npu_context *npu_context = mn_to_npu_context(mn); 630 struct npu_context *npu_context = mn_to_npu_context(mn);
601 unsigned long address; 631 unsigned long address;
602 632
603 for (address = start; address <= end; address += PAGE_SIZE) 633 for (address = start; address < end; address += PAGE_SIZE)
604 mmio_invalidate(npu_context, 1, address); 634 mmio_invalidate(npu_context, 1, address, false);
635
636 /* Do the flush only on the final addess == end */
637 mmio_invalidate(npu_context, 1, address, true);
605} 638}
606 639
607static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { 640static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
651 /* No nvlink associated with this GPU device */ 684 /* No nvlink associated with this GPU device */
652 return ERR_PTR(-ENODEV); 685 return ERR_PTR(-ENODEV);
653 686
654 if (!mm) { 687 if (!mm || mm->context.id == 0) {
655 /* kernel thread contexts are not supported */ 688 /*
689 * Kernel thread contexts are not supported and context id 0 is
690 * reserved on the GPU.
691 */
656 return ERR_PTR(-EINVAL); 692 return ERR_PTR(-EINVAL);
657 } 693 }
658 694