aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlistair Popple <alistair@popple.id.au>2017-06-20 04:37:28 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2017-06-22 07:21:08 -0400
commitbbd5ff50afffcf4a01d05367524736c57607a478 (patch)
treebd2b3428a18f220daf30ba25a837f838e3070230
parentbf05fc25f268cd62f147f368fe65ad3e5b04fe9f (diff)
powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD
NPU2 requires an extra explicit flush to an active GPU PID when sending address translation shoot downs (ATSDs) to reliably flush the GPU TLB. This patch adds just such a flush at the end of each sequence of ATSDs. We can safely use PID 0 which is always reserved and active on the GPU. PID 0 is only used for init_mm which will never be a user mm on the GPU. To enforce this we add a check in pnv_npu2_init_context() just in case someone tries to use PID 0 on the GPU. Signed-off-by: Alistair Popple <alistair@popple.id.au> [mpe: Use true/false for bool literals] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c94
1 files changed, 65 insertions, 29 deletions
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
449 return mmio_atsd_reg; 449 return mmio_atsd_reg;
450} 450}
451 451
452static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) 452static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
453{ 453{
454 unsigned long launch; 454 unsigned long launch;
455 455
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
465 /* PID */ 465 /* PID */
466 launch |= pid << PPC_BITLSHIFT(38); 466 launch |= pid << PPC_BITLSHIFT(38);
467 467
468 /* No flush */
469 launch |= !flush << PPC_BITLSHIFT(39);
470
468 /* Invalidating the entire process doesn't use a va */ 471 /* Invalidating the entire process doesn't use a va */
469 return mmio_launch_invalidate(npu, launch, 0); 472 return mmio_launch_invalidate(npu, launch, 0);
470} 473}
471 474
472static int mmio_invalidate_va(struct npu *npu, unsigned long va, 475static int mmio_invalidate_va(struct npu *npu, unsigned long va,
473 unsigned long pid) 476 unsigned long pid, bool flush)
474{ 477{
475 unsigned long launch; 478 unsigned long launch;
476 479
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
486 /* PID */ 489 /* PID */
487 launch |= pid << PPC_BITLSHIFT(38); 490 launch |= pid << PPC_BITLSHIFT(38);
488 491
492 /* No flush */
493 launch |= !flush << PPC_BITLSHIFT(39);
494
489 return mmio_launch_invalidate(npu, launch, va); 495 return mmio_launch_invalidate(npu, launch, va);
490} 496}
491 497
492#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) 498#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
493 499
500struct mmio_atsd_reg {
501 struct npu *npu;
502 int reg;
503};
504
505static void mmio_invalidate_wait(
506 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
507{
508 struct npu *npu;
509 int i, reg;
510
511 /* Wait for all invalidations to complete */
512 for (i = 0; i <= max_npu2_index; i++) {
513 if (mmio_atsd_reg[i].reg < 0)
514 continue;
515
516 /* Wait for completion */
517 npu = mmio_atsd_reg[i].npu;
518 reg = mmio_atsd_reg[i].reg;
519 while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
520 cpu_relax();
521
522 put_mmio_atsd_reg(npu, reg);
523
524 /*
525 * The GPU requires two flush ATSDs to ensure all entries have
526 * been flushed. We use PID 0 as it will never be used for a
527 * process on the GPU.
528 */
529 if (flush)
530 mmio_invalidate_pid(npu, 0, true);
531 }
532}
533
494/* 534/*
495 * Invalidate either a single address or an entire PID depending on 535 * Invalidate either a single address or an entire PID depending on
496 * the value of va. 536 * the value of va.
497 */ 537 */
498static void mmio_invalidate(struct npu_context *npu_context, int va, 538static void mmio_invalidate(struct npu_context *npu_context, int va,
499 unsigned long address) 539 unsigned long address, bool flush)
500{ 540{
501 int i, j, reg; 541 int i, j;
502 struct npu *npu; 542 struct npu *npu;
503 struct pnv_phb *nphb; 543 struct pnv_phb *nphb;
504 struct pci_dev *npdev; 544 struct pci_dev *npdev;
505 struct { 545 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
506 struct npu *npu;
507 int reg;
508 } mmio_atsd_reg[NV_MAX_NPUS];
509 unsigned long pid = npu_context->mm->context.id; 546 unsigned long pid = npu_context->mm->context.id;
510 547
511 /* 548 /*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
525 562
526 if (va) 563 if (va)
527 mmio_atsd_reg[i].reg = 564 mmio_atsd_reg[i].reg =
528 mmio_invalidate_va(npu, address, pid); 565 mmio_invalidate_va(npu, address, pid,
566 flush);
529 else 567 else
530 mmio_atsd_reg[i].reg = 568 mmio_atsd_reg[i].reg =
531 mmio_invalidate_pid(npu, pid); 569 mmio_invalidate_pid(npu, pid, flush);
532 570
533 /* 571 /*
534 * The NPU hardware forwards the shootdown to all GPUs 572 * The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
544 */ 582 */
545 flush_tlb_mm(npu_context->mm); 583 flush_tlb_mm(npu_context->mm);
546 584
547 /* Wait for all invalidations to complete */ 585 mmio_invalidate_wait(mmio_atsd_reg, flush);
548 for (i = 0; i <= max_npu2_index; i++) { 586 if (flush)
549 if (mmio_atsd_reg[i].reg < 0) 587 /* Wait for the flush to complete */
550 continue; 588 mmio_invalidate_wait(mmio_atsd_reg, false);
551
552 /* Wait for completion */
553 npu = mmio_atsd_reg[i].npu;
554 reg = mmio_atsd_reg[i].reg;
555 while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
556 cpu_relax();
557 put_mmio_atsd_reg(npu, reg);
558 }
559} 589}
560 590
561static void pnv_npu2_mn_release(struct mmu_notifier *mn, 591static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
571 * There should be no more translation requests for this PID, but we 601 * There should be no more translation requests for this PID, but we
572 * need to ensure any entries for it are removed from the TLB. 602 * need to ensure any entries for it are removed from the TLB.
573 */ 603 */
574 mmio_invalidate(npu_context, 0, 0); 604 mmio_invalidate(npu_context, 0, 0, true);
575} 605}
576 606
577static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, 607static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
581{ 611{
582 struct npu_context *npu_context = mn_to_npu_context(mn); 612 struct npu_context *npu_context = mn_to_npu_context(mn);
583 613
584 mmio_invalidate(npu_context, 1, address); 614 mmio_invalidate(npu_context, 1, address, true);
585} 615}
586 616
587static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, 617static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
590{ 620{
591 struct npu_context *npu_context = mn_to_npu_context(mn); 621 struct npu_context *npu_context = mn_to_npu_context(mn);
592 622
593 mmio_invalidate(npu_context, 1, address); 623 mmio_invalidate(npu_context, 1, address, true);
594} 624}
595 625
596static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, 626static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
600 struct npu_context *npu_context = mn_to_npu_context(mn); 630 struct npu_context *npu_context = mn_to_npu_context(mn);
601 unsigned long address; 631 unsigned long address;
602 632
603 for (address = start; address <= end; address += PAGE_SIZE) 633 for (address = start; address < end; address += PAGE_SIZE)
604 mmio_invalidate(npu_context, 1, address); 634 mmio_invalidate(npu_context, 1, address, false);
635
636 /* Do the flush only on the final addess == end */
637 mmio_invalidate(npu_context, 1, address, true);
605} 638}
606 639
607static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { 640static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
651 /* No nvlink associated with this GPU device */ 684 /* No nvlink associated with this GPU device */
652 return ERR_PTR(-ENODEV); 685 return ERR_PTR(-ENODEV);
653 686
654 if (!mm) { 687 if (!mm || mm->context.id == 0) {
655 /* kernel thread contexts are not supported */ 688 /*
689 * Kernel thread contexts are not supported and context id 0 is
690 * reserved on the GPU.
691 */
656 return ERR_PTR(-EINVAL); 692 return ERR_PTR(-EINVAL);
657 } 693 }
658 694