aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlistair Popple <alistair@popple.id.au>2018-04-17 05:11:28 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2018-04-23 19:46:57 -0400
commitd0cf9b561ca97d5245bb9e0c4774b7fadd897d67 (patch)
tree4b991ef6e71000ab4f70fe6cce1096f6e32a3b90
parenta1409adac748f0db655e096521bbe6904aadeb98 (diff)
powerpc/powernv/npu: Do a PID GPU TLB flush when invalidating a large address range
The NPU has a limited number of address translation shootdown (ATSD) registers and the GPU has limited bandwidth to process ATSDs. This can result in contention of ATSD registers leading to soft lockups on some threads, particularly when invalidating a large address range in pnv_npu2_mn_invalidate_range(). At some threshold it becomes more efficient to flush the entire GPU TLB for the given MM context (PID) than individually flushing each address in the range. This patch will result in ranges greater than 2MB being converted from 32+ ATSDs into a single ATSD which will flush the TLB for the given PID on each GPU. Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alistair Popple <alistair@popple.id.au> Acked-by: Balbir Singh <bsingharora@gmail.com> Tested-by: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c23
1 files changed, 19 insertions, 4 deletions
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index ccd57d1b5bf8..525e966dce34 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -40,6 +40,13 @@
40static DEFINE_SPINLOCK(npu_context_lock); 40static DEFINE_SPINLOCK(npu_context_lock);
41 41
42/* 42/*
43 * When an address shootdown range exceeds this threshold we invalidate the
44 * entire TLB on the GPU for the given PID rather than each specific address in
45 * the range.
46 */
47#define ATSD_THRESHOLD (2*1024*1024)
48
49/*
43 * Other types of TCE cache invalidation are not functional in the 50 * Other types of TCE cache invalidation are not functional in the
44 * hardware. 51 * hardware.
45 */ 52 */
@@ -677,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
677 struct npu_context *npu_context = mn_to_npu_context(mn); 684 struct npu_context *npu_context = mn_to_npu_context(mn);
678 unsigned long address; 685 unsigned long address;
679 686
680 for (address = start; address < end; address += PAGE_SIZE) 687 if (end - start > ATSD_THRESHOLD) {
681 mmio_invalidate(npu_context, 1, address, false); 688 /*
689 * Just invalidate the entire PID if the address range is too
690 * large.
691 */
692 mmio_invalidate(npu_context, 0, 0, true);
693 } else {
694 for (address = start; address < end; address += PAGE_SIZE)
695 mmio_invalidate(npu_context, 1, address, false);
682 696
683 /* Do the flush only on the final addess == end */ 697 /* Do the flush only on the final addess == end */
684 mmio_invalidate(npu_context, 1, address, true); 698 mmio_invalidate(npu_context, 1, address, true);
699 }
685} 700}
686 701
687static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { 702static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {