gpu: nvgpu: Fix some barrier usage

Commit 81868a187fa3b217368206f17b19309846e8e7fb updated barrier usage to use the nvgpu wrappers and in doing so downgraded many plain barriers {mb(), wmb(), rmb()} to the SMP versions of these barriers. The SMP version of the barriers in question are only issued when running on an SMP machine. In most of the cases mentioned above this is fine since the barriers are present to faciliate proper ordering across CPUs. A single CPU is always coherent with itself, so on a non-SMP case we don't need those barriers. However, there are a few places where the barriers in use (GMMU page table programming, IO accessors, userd) where the barrier usage is for communicating and establishing ordering for the GPU. We need these barriers for both SMP machines and non-SMP machines. Therefor we must use the plain barrier versions. Change-Id: I376129840b7dc64af8f3f23f88057e4e81360f89 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1599744 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-11-16 14:29:11 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-16 18:55:52 -0500
commit: b7cc3a2aa6c92a09eed43513287c9062f22ad127 (patch)
tree: 0943ccb3962312810ab236d98fb5ff09b8843119 /drivers
parent: 3590080109511b203e9a9187f83aef61513d3d1a (diff)
3 files changed, 9 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/io.c b/drivers/gpu/nvgpu/common/linux/io.c
index 04a9fbe8..729825e7 100644
--- a/drivers/gpu/nvgpu/common/linux/io.c
+++ b/drivers/gpu/nvgpu/common/linux/io.c
@@ -26,7 +26,7 @@ void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
                gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
        } else {
                writel_relaxed(v, l->regs + r);
-                nvgpu_smp_wmb();
+                nvgpu_wmb();
                gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
        }
 }
@@ -57,7 +57,7 @@ void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v)
                __gk20a_warn_on_no_regs();
                gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
        } else {
-                nvgpu_smp_wmb();
+                nvgpu_wmb();
                do {
                        writel_relaxed(v, l->regs + r);
                } while (readl(l->regs + r) != v);
@@ -73,7 +73,7 @@ void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
                __gk20a_warn_on_no_regs();
                gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
        } else {
-                nvgpu_smp_wmb();
+                nvgpu_wmb();
                writel_relaxed(v, l->bar1 + b);
                gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
        }
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 6cca8c2f..8ad7dac7 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -180,7 +180,7 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
                return err;
        /*
-         * One nvgpu_smp_mb() is done after all mapping operations. Don't need
+         * One nvgpu_mb() is done after all mapping operations. Don't need
         * individual barriers for each PD write.
         */
        vm->pdb.mem->skip_wmb = true;
@@ -275,7 +275,7 @@ static int pd_allocate(struct vm_gk20a *vm,
        }
        /*
-         * One nvgpu_smp_mb() is done after all mapping operations. Don't need
+         * One nvgpu_mb() is done after all mapping operations. Don't need
         * individual barriers for each PD write.
         */
        pd->mem->skip_wmb = true;
@@ -639,7 +639,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
                                                attrs);
        unmap_gmmu_pages(g, &vm->pdb);
-        nvgpu_smp_mb();
+        nvgpu_mb();
        __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
@@ -914,7 +914,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
         * There probably also needs to be a TLB invalidate as well but we leave
         * that to the caller of this function.
         */
-        nvgpu_smp_wmb();
+        nvgpu_wmb();
        return 0;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index f87c6dea..0238ae6c 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -253,8 +253,8 @@ void gv11b_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
        nvgpu_mem_wr32(g, userd_mem, offset + ram_userd_gp_put_w(),
                                                        c->gpfifo.put);
-        /* commit everything to cpu */
+        /* Commit everything to GPU. */
-        nvgpu_smp_mb();
+        nvgpu_mb();
        gv11b_ring_channel_doorbell(c);
 }
author	Alex Waterman <alexw@nvidia.com>	2017-11-16 14:29:11 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-16 18:55:52 -0500
commit	b7cc3a2aa6c92a09eed43513287c9062f22ad127 (patch)
tree	0943ccb3962312810ab236d98fb5ff09b8843119 /drivers
parent	3590080109511b203e9a9187f83aef61513d3d1a (diff)

diff --git a/drivers/gpu/nvgpu/common/linux/io.c b/drivers/gpu/nvgpu/common/linux/io.c index 04a9fbe8..729825e7 100644 --- a/drivers/gpu/nvgpu/common/linux/io.c +++ b/drivers/gpu/nvgpu/common/linux/io.c
@@ -26,7 +26,7 @@ void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
26	gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);	26	gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
27	} else {	27	} else {
28	writel_relaxed(v, l->regs + r);	28	writel_relaxed(v, l->regs + r);
29	nvgpu_smp_wmb();	29	nvgpu_wmb();
30	gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v);	30	gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
31	}	31	}
32	}	32	}
@@ -57,7 +57,7 @@ void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v)
57	__gk20a_warn_on_no_regs();	57	__gk20a_warn_on_no_regs();
58	gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);	58	gk20a_dbg(gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
59	} else {	59	} else {
60	nvgpu_smp_wmb();	60	nvgpu_wmb();
61	do {	61	do {
62	writel_relaxed(v, l->regs + r);	62	writel_relaxed(v, l->regs + r);
63	} while (readl(l->regs + r) != v);	63	} while (readl(l->regs + r) != v);
@@ -73,7 +73,7 @@ void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
73	__gk20a_warn_on_no_regs();	73	__gk20a_warn_on_no_regs();
74	gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);	74	gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
75	} else {	75	} else {
76	nvgpu_smp_wmb();	76	nvgpu_wmb();
77	writel_relaxed(v, l->bar1 + b);	77	writel_relaxed(v, l->bar1 + b);
78	gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v);	78	gk20a_dbg(gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
79	}	79	}


diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 6cca8c2f..8ad7dac7 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -180,7 +180,7 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
180	return err;	180	return err;
181		181
182	/*	182	/*
183	* One nvgpu_smp_mb() is done after all mapping operations. Don't need	183	* One nvgpu_mb() is done after all mapping operations. Don't need
184	* individual barriers for each PD write.	184	* individual barriers for each PD write.
185	*/	185	*/
186	vm->pdb.mem->skip_wmb = true;	186	vm->pdb.mem->skip_wmb = true;
@@ -275,7 +275,7 @@ static int pd_allocate(struct vm_gk20a *vm,
275	}	275	}
276		276
277	/*	277	/*
278	* One nvgpu_smp_mb() is done after all mapping operations. Don't need	278	* One nvgpu_mb() is done after all mapping operations. Don't need
279	* individual barriers for each PD write.	279	* individual barriers for each PD write.
280	*/	280	*/
281	pd->mem->skip_wmb = true;	281	pd->mem->skip_wmb = true;
@@ -639,7 +639,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
639	attrs);	639	attrs);
640		640
641	unmap_gmmu_pages(g, &vm->pdb);	641	unmap_gmmu_pages(g, &vm->pdb);
642	nvgpu_smp_mb();	642	nvgpu_mb();
643		643
644	__gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");	644	__gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
645		645
@@ -914,7 +914,7 @@ int __nvgpu_set_pte(struct gk20a g, struct vm_gk20a vm, u64 vaddr, u32 *pte)
914	* There probably also needs to be a TLB invalidate as well but we leave	914	* There probably also needs to be a TLB invalidate as well but we leave
915	* that to the caller of this function.	915	* that to the caller of this function.
916	*/	916	*/
917	nvgpu_smp_wmb();	917	nvgpu_wmb();
918		918
919	return 0;	919	return 0;
920	}	920	}


diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index f87c6dea..0238ae6c 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -253,8 +253,8 @@ void gv11b_userd_gp_put(struct gk20a g, struct channel_gk20a c)
253		253
254	nvgpu_mem_wr32(g, userd_mem, offset + ram_userd_gp_put_w(),	254	nvgpu_mem_wr32(g, userd_mem, offset + ram_userd_gp_put_w(),
255	c->gpfifo.put);	255	c->gpfifo.put);
256	/* commit everything to cpu */	256	/* Commit everything to GPU. */
257	nvgpu_smp_mb();	257	nvgpu_mb();
258		258
259	gv11b_ring_channel_doorbell(c);	259	gv11b_ring_channel_doorbell(c);
260	}	260	}