summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-02-28 12:19:19 -0500
committerSrikar Srimath Tirumala <srikars@nvidia.com>2018-02-28 16:49:22 -0500
commit5a35a95654d561fce09a3b9abf6b82bb7a29d74b (patch)
tree119a07134188d8e06c29a570dd8c6b143f39c9e1
parent3fdd8e38b280123fd13bcc4f3fd8928c15e94db6 (diff)
Revert "gpu: nvgpu: Get coherency on gv100 + NVLINK working"
Also revert other changes related to IO coherence. This may be the culprit in a recent dev-kernel lockdown. Bug 2070609 Change-Id: Ida178aef161fadbc6db9512521ea51c702c1564b Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1665914 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Srikar Srimath Tirumala <srikars@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/dma.c34
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c8
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c51
-rw-r--r--drivers/gpu/nvgpu/common/linux/pci.c17
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c3
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c16
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c46
-rw-r--r--drivers/gpu/nvgpu/gk20a/bus_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c20
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c33
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c24
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gm20b/bus_gm20b.c6
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c12
-rw-r--r--drivers/gpu/nvgpu/gp106/sec2_gp106.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/fifo_gp10b.c6
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c28
-rw-r--r--drivers/gpu/nvgpu/gv11b/acr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c11
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c10
-rw-r--r--drivers/gpu/nvgpu/gv11b/mm_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/gv11b/pmu_gv11b.c8
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c5
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/enabled.h4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h23
28 files changed, 126 insertions, 274 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index 81aebb7d..c13dae8b 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -222,16 +222,6 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
222 void *alloc_ret; 222 void *alloc_ret;
223 223
224 /* 224 /*
225 * WAR for IO coherent chips: the DMA API does not seem to generate
226 * mappings that work correctly. Unclear why - Bug ID: 2040115.
227 *
228 * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
229 * and then make a vmap() ourselves.
230 */
231 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
232 flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
233
234 /*
235 * Before the debug print so we see this in the total. But during 225 * Before the debug print so we see this in the total. But during
236 * cleanup in the fail path this has to be subtracted. 226 * cleanup in the fail path this has to be subtracted.
237 */ 227 */
@@ -265,17 +255,7 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
265 iova, size, flags); 255 iova, size, flags);
266 } 256 }
267 if (err) 257 if (err)
268 goto fail_free_dma; 258 goto fail_free;
269
270 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
271 mem->cpu_va = vmap(mem->priv.pages,
272 size >> PAGE_SHIFT,
273 0, PAGE_KERNEL);
274 if (!mem->cpu_va) {
275 err = -ENOMEM;
276 goto fail_free_sgt;
277 }
278 }
279 259
280 mem->aligned_size = size; 260 mem->aligned_size = size;
281 mem->aperture = APERTURE_SYSMEM; 261 mem->aperture = APERTURE_SYSMEM;
@@ -285,14 +265,12 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
285 265
286 return 0; 266 return 0;
287 267
288fail_free_sgt: 268fail_free:
289 nvgpu_free_sgtable(g, &mem->priv.sgt); 269 g->dma_memory_used -= mem->aligned_size;
290fail_free_dma:
291 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); 270 dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
292 mem->cpu_va = NULL; 271 mem->cpu_va = NULL;
293 mem->priv.sgt = NULL; 272 mem->priv.sgt = NULL;
294 mem->size = 0; 273 mem->size = 0;
295 g->dma_memory_used -= mem->aligned_size;
296 return err; 274 return err;
297} 275}
298 276
@@ -488,12 +466,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
488 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && 466 if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
489 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && 467 !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
490 (mem->cpu_va || mem->priv.pages)) { 468 (mem->cpu_va || mem->priv.pages)) {
491 /*
492 * Free side of WAR for bug 2040115.
493 */
494 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
495 vunmap(mem->cpu_va);
496
497 if (mem->priv.flags) { 469 if (mem->priv.flags) {
498 NVGPU_DEFINE_DMA_ATTRS(dma_attrs); 470 NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
499 471
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 741c86e7..b103fcea 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -20,7 +20,6 @@
20#include <linux/of.h> 20#include <linux/of.h>
21#include <linux/of_device.h> 21#include <linux/of_device.h>
22#include <linux/of_platform.h> 22#include <linux/of_platform.h>
23#include <linux/of_address.h>
24#include <linux/interrupt.h> 23#include <linux/interrupt.h>
25#include <linux/pm_runtime.h> 24#include <linux/pm_runtime.h>
26#include <linux/reset.h> 25#include <linux/reset.h>
@@ -1108,7 +1107,6 @@ static int gk20a_probe(struct platform_device *dev)
1108 struct gk20a *gk20a; 1107 struct gk20a *gk20a;
1109 int err; 1108 int err;
1110 struct gk20a_platform *platform = NULL; 1109 struct gk20a_platform *platform = NULL;
1111 struct device_node *np;
1112 1110
1113 if (dev->dev.of_node) { 1111 if (dev->dev.of_node) {
1114 const struct of_device_id *match; 1112 const struct of_device_id *match;
@@ -1149,12 +1147,6 @@ static int gk20a_probe(struct platform_device *dev)
1149 if (err) 1147 if (err)
1150 goto return_err; 1148 goto return_err;
1151 1149
1152 np = nvgpu_get_node(gk20a);
1153 if (of_dma_is_coherent(np)) {
1154 __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
1155 __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
1156 }
1157
1158 if (nvgpu_platform_is_simulation(gk20a)) 1150 if (nvgpu_platform_is_simulation(gk20a))
1159 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); 1151 __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
1160 1152
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index 69897694..206b83e1 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -34,25 +34,40 @@
34#include "gk20a/gk20a.h" 34#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h" 35#include "gk20a/mm_gk20a.h"
36 36
37u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
38 u32 sysmem_mask, u32 vidmem_mask)
39{
40 switch (aperture) {
41 case APERTURE_SYSMEM:
42 /* some igpus consider system memory vidmem */
43 return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
44 ? sysmem_mask : vidmem_mask;
45 case APERTURE_VIDMEM:
46 /* for dgpus only */
47 return vidmem_mask;
48 case APERTURE_INVALID:
49 WARN_ON("Bad aperture");
50 }
51 return 0;
52}
53
54u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
55 u32 sysmem_mask, u32 vidmem_mask)
56{
57 return __nvgpu_aperture_mask(g, mem->aperture,
58 sysmem_mask, vidmem_mask);
59}
60
37int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) 61int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
38{ 62{
39 void *cpu_va; 63 void *cpu_va;
40 pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? 64 pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL :
41 PAGE_KERNEL :
42 pgprot_writecombine(PAGE_KERNEL); 65 pgprot_writecombine(PAGE_KERNEL);
43 66
44 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) 67 if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
45 return 0; 68 return 0;
46 69
47 /* 70 /*
48 * WAR for bug 2040115: we already will always have a coherent vmap()
49 * for all sysmem buffers. The prot settings are left alone since
50 * eventually this should be deleted.
51 */
52 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
53 return 0;
54
55 /*
56 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that 71 * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
57 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make 72 * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
58 * another CPU mapping. 73 * another CPU mapping.
@@ -82,13 +97,6 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
82 return; 97 return;
83 98
84 /* 99 /*
85 * WAR for bug 2040115: skip this since the map will be taken care of
86 * during the free in the DMA API.
87 */
88 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
89 return;
90
91 /*
92 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping 100 * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
93 * already made by the DMA API. 101 * already made by the DMA API.
94 */ 102 */
@@ -307,8 +315,7 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
307 */ 315 */
308u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) 316u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
309{ 317{
310 if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || 318 if (!nvgpu_iommuable(g))
311 !nvgpu_iommuable(g))
312 return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); 319 return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
313 320
314 if (sg_dma_address(sgl) == 0) 321 if (sg_dma_address(sgl) == 0)
@@ -408,12 +415,8 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
408 415
409 /* 416 /*
410 * Re-use the CPU mapping only if the mapping was made by the DMA API. 417 * Re-use the CPU mapping only if the mapping was made by the DMA API.
411 *
412 * Bug 2040115: the DMA API wrapper makes the mapping that we should
413 * re-use.
414 */ 418 */
415 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || 419 if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
416 nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
417 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); 420 dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
418 421
419 dest->priv.pages = src->priv.pages + start_page; 422 dest->priv.pages = src->priv.pages + start_page;
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
index 973da9ca..6ebe8dda 100644
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -17,13 +17,13 @@
17#include <linux/pci.h> 17#include <linux/pci.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/pm_runtime.h> 19#include <linux/pm_runtime.h>
20#include <linux/of_platform.h>
21#include <linux/of_address.h>
22 20
23#include <nvgpu/nvgpu_common.h> 21#include <nvgpu/nvgpu_common.h>
24#include <nvgpu/kmem.h> 22#include <nvgpu/kmem.h>
25#include <nvgpu/enabled.h> 23#include <nvgpu/enabled.h>
26#include <nvgpu/nvlink.h> 24#include <nvgpu/nvlink.h>
25#include <linux/of_platform.h>
26#include <linux/of_address.h>
27 27
28#include "gk20a/gk20a.h" 28#include "gk20a/gk20a.h"
29#include "clk/clk.h" 29#include "clk/clk.h"
@@ -566,12 +566,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
566 platform->g = g; 566 platform->g = g;
567 l->dev = &pdev->dev; 567 l->dev = &pdev->dev;
568 568
569 np = nvgpu_get_node(g);
570 if (of_dma_is_coherent(np)) {
571 __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
572 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
573 }
574
575 err = pci_enable_device(pdev); 569 err = pci_enable_device(pdev);
576 if (err) 570 if (err)
577 return err; 571 return err;
@@ -650,6 +644,13 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
650 644
651 g->mm.has_physical_mode = false; 645 g->mm.has_physical_mode = false;
652 646
647 np = nvgpu_get_node(g);
648
649 if (of_dma_is_coherent(np)) {
650 __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
651 __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
652 }
653
653 return 0; 654 return 0;
654} 655}
655 656
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 52b2f30c..e3ca4eda 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -166,8 +166,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
167 vm_aspace_id(vm), 167 vm_aspace_id(vm),
168 mapped_buffer->flags, 168 mapped_buffer->flags,
169 nvgpu_aperture_str(g, 169 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
170 gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
171 170
172 return mapped_buffer; 171 return mapped_buffer;
173} 172}
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 41343718..ffac324c 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,13 +79,6 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
79 if (!sgt) 79 if (!sgt)
80 return -ENOMEM; 80 return -ENOMEM;
81 81
82 /*
83 * If the GPU is IO coherent and the DMA API is giving us IO coherent
84 * CPU mappings then we gotta make sure we use the IO coherent aperture.
85 */
86 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
87 flags |= NVGPU_VM_MAP_IO_COHERENT;
88
89 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 82 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
90 vaddr = g->ops.mm.gmmu_map(vm, addr, 83 vaddr = g->ops.mm.gmmu_map(vm, addr,
91 sgt, /* sg list */ 84 sgt, /* sg list */
@@ -634,7 +627,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 page_size >> 10, 627 page_size >> 10,
635 nvgpu_gmmu_perm_str(attrs->rw_flag), 628 nvgpu_gmmu_perm_str(attrs->rw_flag),
636 attrs->kind_v, 629 attrs->kind_v,
637 nvgpu_aperture_str(g, attrs->aperture), 630 nvgpu_aperture_str(attrs->aperture),
638 attrs->cacheable ? 'C' : '-', 631 attrs->cacheable ? 'C' : '-',
639 attrs->sparse ? 'S' : '-', 632 attrs->sparse ? 'S' : '-',
640 attrs->priv ? 'P' : '-', 633 attrs->priv ? 'P' : '-',
@@ -712,13 +705,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
712 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); 705 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
713 706
714 /* 707 /*
715 * Handle the IO coherency aperture: make sure the .aperture field is
716 * correct based on the IO coherency flag.
717 */
718 if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
719 attrs.aperture = __APERTURE_SYSMEM_COH;
720
721 /*
722 * Only allocate a new GPU VA range if we haven't already been passed a 708 * Only allocate a new GPU VA range if we haven't already been passed a
723 * GPU VA range. This facilitates fixed mappings. 709 * GPU VA range. This facilitates fixed mappings.
724 */ 710 */
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 78a57b4e..73b6b2a7 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,52 +28,6 @@
28 28
29#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
30 30
31/*
32 * Make sure to use the right coherency aperture if you use this function! This
33 * will not add any checks. If you want to simply use the default coherency then
34 * use nvgpu_aperture_mask().
35 */
36u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
37 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
38{
39 /*
40 * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
41 * "sysmem" aperture should really be translated to VIDMEM.
42 */
43 if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
44 aperture = APERTURE_VIDMEM;
45
46 switch (aperture) {
47 case __APERTURE_SYSMEM_COH:
48 return sysmem_coh_mask;
49 case APERTURE_SYSMEM:
50 return sysmem_mask;
51 case APERTURE_VIDMEM:
52 return vidmem_mask;
53 case APERTURE_INVALID:
54 WARN_ON("Bad aperture");
55 }
56 return 0;
57}
58
59u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
60 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
61{
62 enum nvgpu_aperture ap = mem->aperture;
63
64 /*
65 * Handle the coherent aperture: ideally most of the driver is not
66 * aware of the difference between coherent and non-coherent sysmem so
67 * we add this translation step here.
68 */
69 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
70 ap == APERTURE_SYSMEM)
71 ap = __APERTURE_SYSMEM_COH;
72
73 return __nvgpu_aperture_mask(g, ap,
74 sysmem_mask, sysmem_coh_mask, vidmem_mask);
75}
76
77void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) 31void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
78{ 32{
79 return sgt->ops->sgl_next(sgl); 33 return sgt->ops->sgl_next(sgl);
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index b2800772..7f0cfe58 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -21,7 +21,6 @@
21 */ 21 */
22 22
23#include <nvgpu/page_allocator.h> 23#include <nvgpu/page_allocator.h>
24#include <nvgpu/enabled.h>
25#include <nvgpu/log.h> 24#include <nvgpu/log.h>
26#include <nvgpu/soc.h> 25#include <nvgpu/soc.h>
27#include <nvgpu/bus.h> 26#include <nvgpu/bus.h>
@@ -156,9 +155,8 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
156 155
157 gk20a_writel(g, bus_bar1_block_r(), 156 gk20a_writel(g, bus_bar1_block_r(),
158 nvgpu_aperture_mask(g, bar1_inst, 157 nvgpu_aperture_mask(g, bar1_inst,
159 bus_bar1_block_target_sys_mem_ncoh_f(), 158 bus_bar1_block_target_sys_mem_ncoh_f(),
160 bus_bar1_block_target_sys_mem_coh_f(), 159 bus_bar1_block_target_vid_mem_f()) |
161 bus_bar1_block_target_vid_mem_f()) |
162 bus_bar1_block_mode_virtual_f() | 160 bus_bar1_block_mode_virtual_f() |
163 bus_bar1_block_ptr_f(ptr_v)); 161 bus_bar1_block_ptr_f(ptr_v));
164 162
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
index e3052701..a5a2cb51 100644
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -98,9 +98,8 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(), 98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
99 fb_mmu_invalidate_pdb_addr_f(addr_lo) | 99 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
100 nvgpu_aperture_mask(g, pdb, 100 nvgpu_aperture_mask(g, pdb,
101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(), 101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
102 fb_mmu_invalidate_pdb_aperture_sys_mem_f(), 102 fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
103 fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
104 103
105 gk20a_writel(g, fb_mmu_invalidate_r(), 104 gk20a_writel(g, fb_mmu_invalidate_r(),
106 fb_mmu_invalidate_all_va_true_f() | 105 fb_mmu_invalidate_all_va_true_f() |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 4fda0d2e..409661fc 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -653,7 +653,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
653 return -ENOMEM; 653 return -ENOMEM;
654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf, 654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), 655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
657 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); 656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
658 657
659 if (nvgpu_mem_begin(g, mem)) 658 if (nvgpu_mem_begin(g, mem))
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 247557aa..fea46a0e 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -28,7 +28,6 @@
28#include <nvgpu/dma.h> 28#include <nvgpu/dma.h>
29#include <nvgpu/timers.h> 29#include <nvgpu/timers.h>
30#include <nvgpu/semaphore.h> 30#include <nvgpu/semaphore.h>
31#include <nvgpu/enabled.h>
32#include <nvgpu/kmem.h> 31#include <nvgpu/kmem.h>
33#include <nvgpu/log.h> 32#include <nvgpu/log.h>
34#include <nvgpu/soc.h> 33#include <nvgpu/soc.h>
@@ -667,13 +666,11 @@ static void fifo_engine_exception_status(struct gk20a *g,
667static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) 666static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
668{ 667{
669 struct fifo_runlist_info_gk20a *runlist; 668 struct fifo_runlist_info_gk20a *runlist;
670 struct fifo_engine_info_gk20a *engine_info;
671 unsigned int runlist_id; 669 unsigned int runlist_id;
672 u32 i; 670 u32 i;
673 size_t runlist_size; 671 size_t runlist_size;
674 u32 active_engine_id, pbdma_id, engine_id; 672 u32 active_engine_id, pbdma_id, engine_id;
675 int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ? 673 struct fifo_engine_info_gk20a *engine_info;
676 NVGPU_DMA_FORCE_CONTIGUOUS : 0;
677 674
678 nvgpu_log_fn(g, " "); 675 nvgpu_log_fn(g, " ");
679 676
@@ -708,9 +705,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
708 f->num_runlist_entries, runlist_size); 705 f->num_runlist_entries, runlist_size);
709 706
710 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 707 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
711 int err = nvgpu_dma_alloc_flags_sys(g, flags, 708 int err = nvgpu_dma_alloc_sys(g, runlist_size,
712 runlist_size, 709 &runlist->mem[i]);
713 &runlist->mem[i]);
714 if (err) { 710 if (err) {
715 nvgpu_err(g, "memory allocation failed"); 711 nvgpu_err(g, "memory allocation failed");
716 goto clean_up_runlist; 712 goto clean_up_runlist;
@@ -3240,9 +3236,8 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3240 gk20a_writel(g, fifo_runlist_base_r(), 3236 gk20a_writel(g, fifo_runlist_base_r(),
3241 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | 3237 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3242 nvgpu_aperture_mask(g, &runlist->mem[new_buf], 3238 nvgpu_aperture_mask(g, &runlist->mem[new_buf],
3243 fifo_runlist_base_target_sys_mem_ncoh_f(), 3239 fifo_runlist_base_target_sys_mem_ncoh_f(),
3244 fifo_runlist_base_target_sys_mem_coh_f(), 3240 fifo_runlist_base_target_vid_mem_f()));
3245 fifo_runlist_base_target_vid_mem_f()));
3246 } 3241 }
3247 3242
3248 gk20a_writel(g, fifo_runlist_r(), 3243 gk20a_writel(g, fifo_runlist_r(),
@@ -3764,9 +3759,8 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
3764 nvgpu_mem_wr32(g, &c->inst_block, 3759 nvgpu_mem_wr32(g, &c->inst_block,
3765 ram_in_ramfc_w() + ram_fc_userd_w(), 3760 ram_in_ramfc_w() + ram_fc_userd_w(),
3766 nvgpu_aperture_mask(g, &g->fifo.userd, 3761 nvgpu_aperture_mask(g, &g->fifo.userd,
3767 pbdma_userd_target_sys_mem_ncoh_f(), 3762 pbdma_userd_target_sys_mem_ncoh_f(),
3768 pbdma_userd_target_sys_mem_coh_f(), 3763 pbdma_userd_target_vid_mem_f()) |
3769 pbdma_userd_target_vid_mem_f()) |
3770 pbdma_userd_addr_f(addr_lo)); 3764 pbdma_userd_addr_f(addr_lo));
3771 3765
3772 nvgpu_mem_wr32(g, &c->inst_block, 3766 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2cde10ec..8db6b42f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -742,14 +742,13 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
742 742
743static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) 743static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
744{ 744{
745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> 745 u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
746 ram_in_base_shift_v(); 746 >> ram_in_base_shift_v());
747 u32 aperture = nvgpu_aperture_mask(g, inst_block, 747 u32 aperture = nvgpu_aperture_mask(g, inst_block,
748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(), 748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
749 gr_fecs_current_ctx_target_sys_mem_coh_f(), 749 gr_fecs_current_ctx_target_vid_mem_f());
750 gr_fecs_current_ctx_target_vid_mem_f());
751 750
752 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | 751 return gr_fecs_current_ctx_ptr_f(ptr) | aperture |
753 gr_fecs_current_ctx_valid_f(1); 752 gr_fecs_current_ctx_valid_f(1);
754} 753}
755 754
@@ -2172,18 +2171,16 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2172 2171
2173 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); 2172 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2174 gk20a_writel(g, gr_fecs_new_ctx_r(), 2173 gk20a_writel(g, gr_fecs_new_ctx_r(),
2175 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 2174 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2176 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2175 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2177 gr_fecs_new_ctx_target_sys_mem_ncoh_f(), 2176 gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
2178 gr_fecs_new_ctx_target_sys_mem_coh_f(),
2179 gr_fecs_new_ctx_target_vid_mem_f()) | 2177 gr_fecs_new_ctx_target_vid_mem_f()) |
2180 gr_fecs_new_ctx_valid_m()); 2178 gr_fecs_new_ctx_valid_m());
2181 2179
2182 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), 2180 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2183 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | 2181 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2184 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2182 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2185 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), 2183 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
2186 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
2187 gr_fecs_arb_ctx_ptr_target_vid_mem_f())); 2184 gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
2188 2185
2189 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); 2186 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
@@ -4382,9 +4379,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4382 4379
4383 gk20a_writel(g, fb_mmu_debug_wr_r(), 4380 gk20a_writel(g, fb_mmu_debug_wr_r(),
4384 nvgpu_aperture_mask(g, &gr->mmu_wr_mem, 4381 nvgpu_aperture_mask(g, &gr->mmu_wr_mem,
4385 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4382 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4386 fb_mmu_debug_wr_aperture_sys_mem_coh_f(), 4383 fb_mmu_debug_wr_aperture_vid_mem_f()) |
4387 fb_mmu_debug_wr_aperture_vid_mem_f()) |
4388 fb_mmu_debug_wr_vol_false_f() | 4384 fb_mmu_debug_wr_vol_false_f() |
4389 fb_mmu_debug_wr_addr_f(addr)); 4385 fb_mmu_debug_wr_addr_f(addr));
4390 4386
@@ -4393,9 +4389,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4393 4389
4394 gk20a_writel(g, fb_mmu_debug_rd_r(), 4390 gk20a_writel(g, fb_mmu_debug_rd_r(),
4395 nvgpu_aperture_mask(g, &gr->mmu_rd_mem, 4391 nvgpu_aperture_mask(g, &gr->mmu_rd_mem,
4396 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4392 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4397 fb_mmu_debug_wr_aperture_sys_mem_coh_f(), 4393 fb_mmu_debug_rd_aperture_vid_mem_f()) |
4398 fb_mmu_debug_rd_aperture_vid_mem_f()) |
4399 fb_mmu_debug_rd_vol_false_f() | 4394 fb_mmu_debug_rd_vol_false_f() |
4400 fb_mmu_debug_rd_addr_f(addr)); 4395 fb_mmu_debug_rd_addr_f(addr));
4401 4396
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 4ff6125b..b27d1109 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -122,9 +122,8 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
122{ 122{
123 u32 pde0_bits = 123 u32 pde0_bits =
124 nvgpu_aperture_mask(g, pd->mem, 124 nvgpu_aperture_mask(g, pd->mem,
125 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 125 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
126 gmmu_pde_aperture_big_sys_mem_coh_f(), 126 gmmu_pde_aperture_big_video_memory_f()) |
127 gmmu_pde_aperture_big_video_memory_f()) |
128 gmmu_pde_address_big_sys_f( 127 gmmu_pde_address_big_sys_f(
129 (u32)(addr >> gmmu_pde_address_shift_v())); 128 (u32)(addr >> gmmu_pde_address_shift_v()));
130 129
@@ -136,9 +135,8 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g,
136{ 135{
137 u32 pde1_bits = 136 u32 pde1_bits =
138 nvgpu_aperture_mask(g, pd->mem, 137 nvgpu_aperture_mask(g, pd->mem,
139 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 138 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
140 gmmu_pde_aperture_small_sys_mem_coh_f(), 139 gmmu_pde_aperture_small_video_memory_f()) |
141 gmmu_pde_aperture_small_video_memory_f()) |
142 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 140 gmmu_pde_vol_small_true_f() | /* tbd: why? */
143 gmmu_pde_address_small_sys_f( 141 gmmu_pde_address_small_sys_f(
144 (u32)(addr >> gmmu_pde_address_shift_v())); 142 (u32)(addr >> gmmu_pde_address_shift_v()));
@@ -217,7 +215,6 @@ static void __update_pte(struct vm_gk20a *vm,
217 215
218 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, 216 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
219 gmmu_pte_aperture_sys_mem_ncoh_f(), 217 gmmu_pte_aperture_sys_mem_ncoh_f(),
220 gmmu_pte_aperture_sys_mem_coh_f(),
221 gmmu_pte_aperture_video_memory_f()) | 218 gmmu_pte_aperture_video_memory_f()) |
222 gmmu_pte_kind_f(attrs->kind_v) | 219 gmmu_pte_kind_f(attrs->kind_v) |
223 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); 220 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
@@ -271,7 +268,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
271 page_size >> 10, 268 page_size >> 10,
272 nvgpu_gmmu_perm_str(attrs->rw_flag), 269 nvgpu_gmmu_perm_str(attrs->rw_flag),
273 attrs->kind_v, 270 attrs->kind_v,
274 nvgpu_aperture_str(g, attrs->aperture), 271 nvgpu_aperture_str(attrs->aperture),
275 attrs->cacheable ? 'C' : '-', 272 attrs->cacheable ? 'C' : '-',
276 attrs->sparse ? 'S' : '-', 273 attrs->sparse ? 'S' : '-',
277 attrs->priv ? 'P' : '-', 274 attrs->priv ? 'P' : '-',
@@ -366,12 +363,11 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
366 gk20a_dbg_info("pde pa=0x%llx", pdb_addr); 363 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
367 364
368 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 365 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
369 nvgpu_aperture_mask(g, vm->pdb.mem, 366 nvgpu_aperture_mask(g, vm->pdb.mem,
370 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 367 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
371 ram_in_page_dir_base_target_sys_mem_coh_f(), 368 ram_in_page_dir_base_target_vid_mem_f()) |
372 ram_in_page_dir_base_target_vid_mem_f()) | 369 ram_in_page_dir_base_vol_true_f() |
373 ram_in_page_dir_base_vol_true_f() | 370 ram_in_page_dir_base_lo_f(pdb_addr_lo));
374 ram_in_page_dir_base_lo_f(pdb_addr_lo));
375 371
376 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), 372 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
377 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 373 ram_in_page_dir_base_hi_f(pdb_addr_hi));
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 711aeb0d..05d0473e 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -41,7 +41,6 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
41 u32 lo = (u32)(addr & 0xfffff); 41 u32 lo = (u32)(addr & 0xfffff);
42 u32 win = nvgpu_aperture_mask(g, mem, 42 u32 win = nvgpu_aperture_mask(g, mem,
43 bus_bar0_window_target_sys_mem_noncoherent_f(), 43 bus_bar0_window_target_sys_mem_noncoherent_f(),
44 bus_bar0_window_target_sys_mem_coherent_f(),
45 bus_bar0_window_target_vid_mem_f()) | 44 bus_bar0_window_target_vid_mem_f()) |
46 bus_bar0_window_base_f(hi); 45 bus_bar0_window_base_f(hi);
47 46
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
index cdd70d5b..34c8d4b7 100644
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -25,7 +25,6 @@
25#include <nvgpu/timers.h> 25#include <nvgpu/timers.h>
26#include <nvgpu/bus.h> 26#include <nvgpu/bus.h>
27#include <nvgpu/mm.h> 27#include <nvgpu/mm.h>
28#include <nvgpu/enabled.h>
29 28
30#include "bus_gm20b.h" 29#include "bus_gm20b.h"
31#include "gk20a/gk20a.h" 30#include "gk20a/gk20a.h"
@@ -44,9 +43,8 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
44 43
45 gk20a_writel(g, bus_bar1_block_r(), 44 gk20a_writel(g, bus_bar1_block_r(),
46 nvgpu_aperture_mask(g, bar1_inst, 45 nvgpu_aperture_mask(g, bar1_inst,
47 bus_bar1_block_target_sys_mem_ncoh_f(), 46 bus_bar1_block_target_sys_mem_ncoh_f(),
48 bus_bar1_block_target_sys_mem_coh_f(), 47 bus_bar1_block_target_vid_mem_f()) |
49 bus_bar1_block_target_vid_mem_f()) |
50 bus_bar1_block_mode_virtual_f() | 48 bus_bar1_block_mode_virtual_f() |
51 bus_bar1_block_ptr_f(ptr_v)); 49 bus_bar1_block_ptr_f(ptr_v));
52 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); 50 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 15612995..0762e8bd 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -32,7 +32,6 @@
32#include <nvgpu/atomic.h> 32#include <nvgpu/atomic.h>
33#include <nvgpu/barrier.h> 33#include <nvgpu/barrier.h>
34#include <nvgpu/mm.h> 34#include <nvgpu/mm.h>
35#include <nvgpu/enabled.h>
36 35
37#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> 36#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
38#include <nvgpu/hw/gm20b/hw_ram_gm20b.h> 37#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -52,12 +51,11 @@ void channel_gm20b_bind(struct channel_gk20a *c)
52 51
53 52
54 gk20a_writel(g, ccsr_channel_inst_r(c->chid), 53 gk20a_writel(g, ccsr_channel_inst_r(c->chid),
55 ccsr_channel_inst_ptr_f(inst_ptr) | 54 ccsr_channel_inst_ptr_f(inst_ptr) |
56 nvgpu_aperture_mask(g, &c->inst_block, 55 nvgpu_aperture_mask(g, &c->inst_block,
57 ccsr_channel_inst_target_sys_mem_ncoh_f(), 56 ccsr_channel_inst_target_sys_mem_ncoh_f(),
58 ccsr_channel_inst_target_sys_mem_coh_f(), 57 ccsr_channel_inst_target_vid_mem_f()) |
59 ccsr_channel_inst_target_vid_mem_f()) | 58 ccsr_channel_inst_bind_true_f());
60 ccsr_channel_inst_bind_true_f());
61 59
62 gk20a_writel(g, ccsr_channel_r(c->chid), 60 gk20a_writel(g, ccsr_channel_r(c->chid),
63 (gk20a_readl(g, ccsr_channel_r(c->chid)) & 61 (gk20a_readl(g, ccsr_channel_r(c->chid)) &
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 8e4e5900..29aceb7c 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -99,7 +99,6 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
99 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 99 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
100 pwr_pmu_new_instblk_valid_f(1) | 100 pwr_pmu_new_instblk_valid_f(1) |
101 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 101 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
102 pwr_pmu_new_instblk_target_sys_ncoh_f(),
103 pwr_pmu_new_instblk_target_sys_coh_f(), 102 pwr_pmu_new_instblk_target_sys_coh_f(),
104 pwr_pmu_new_instblk_target_fb_f())); 103 pwr_pmu_new_instblk_target_fb_f()));
105 104
@@ -166,7 +165,6 @@ void init_pmu_setup_hw1(struct gk20a *g)
166 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 165 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
167 pwr_pmu_new_instblk_valid_f(1) | 166 pwr_pmu_new_instblk_valid_f(1) |
168 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 167 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
169 pwr_pmu_new_instblk_target_sys_ncoh_f(),
170 pwr_pmu_new_instblk_target_sys_coh_f(), 168 pwr_pmu_new_instblk_target_sys_coh_f(),
171 pwr_pmu_new_instblk_target_fb_f())); 169 pwr_pmu_new_instblk_target_fb_f()));
172 170
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 1436a260..c82fb1cc 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -25,7 +25,6 @@
25#include <nvgpu/dma.h> 25#include <nvgpu/dma.h>
26#include <nvgpu/bug.h> 26#include <nvgpu/bug.h>
27#include <nvgpu/log2.h> 27#include <nvgpu/log2.h>
28#include <nvgpu/enabled.h>
29 28
30#include "fifo_gp10b.h" 29#include "fifo_gp10b.h"
31 30
@@ -79,9 +78,8 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c)
79 nvgpu_mem_wr32(g, &c->inst_block, 78 nvgpu_mem_wr32(g, &c->inst_block,
80 ram_in_ramfc_w() + ram_fc_userd_w(), 79 ram_in_ramfc_w() + ram_fc_userd_w(),
81 nvgpu_aperture_mask(g, &g->fifo.userd, 80 nvgpu_aperture_mask(g, &g->fifo.userd,
82 pbdma_userd_target_sys_mem_ncoh_f(), 81 pbdma_userd_target_sys_mem_ncoh_f(),
83 pbdma_userd_target_sys_mem_coh_f(), 82 pbdma_userd_target_vid_mem_f()) |
84 pbdma_userd_target_vid_mem_f()) |
85 pbdma_userd_addr_f(addr_lo)); 83 pbdma_userd_addr_f(addr_lo));
86 84
87 nvgpu_mem_wr32(g, &c->inst_block, 85 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 7ff5f6a6..0439dda9 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -124,9 +124,8 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g)
124 124
125 gk20a_writel(g, bus_bar2_block_r(), 125 gk20a_writel(g, bus_bar2_block_r(),
126 nvgpu_aperture_mask(g, inst_block, 126 nvgpu_aperture_mask(g, inst_block,
127 bus_bar2_block_target_sys_mem_ncoh_f(), 127 bus_bar2_block_target_sys_mem_ncoh_f(),
128 bus_bar2_block_target_sys_mem_coh_f(), 128 bus_bar2_block_target_vid_mem_f()) |
129 bus_bar2_block_target_vid_mem_f()) |
130 bus_bar2_block_mode_virtual_f() | 129 bus_bar2_block_mode_virtual_f() |
131 bus_bar2_block_ptr_f(inst_pa)); 130 bus_bar2_block_ptr_f(inst_pa));
132 131
@@ -149,9 +148,8 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
149 phys_addr >>= gmmu_new_pde_address_shift_v(); 148 phys_addr >>= gmmu_new_pde_address_shift_v();
150 149
151 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 150 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
152 gmmu_new_pde_aperture_sys_mem_ncoh_f(), 151 gmmu_new_pde_aperture_sys_mem_ncoh_f(),
153 gmmu_new_pde_aperture_sys_mem_coh_f(), 152 gmmu_new_pde_aperture_video_memory_f());
154 gmmu_new_pde_aperture_video_memory_f());
155 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); 153 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
156 pde_v[0] |= gmmu_new_pde_vol_true_f(); 154 pde_v[0] |= gmmu_new_pde_vol_true_f();
157 pde_v[1] |= phys_addr >> 24; 155 pde_v[1] |= phys_addr >> 24;
@@ -196,7 +194,6 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
196 gmmu_new_dual_pde_address_small_sys_f(small_addr); 194 gmmu_new_dual_pde_address_small_sys_f(small_addr);
197 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, 195 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
198 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), 196 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
199 gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(),
200 gmmu_new_dual_pde_aperture_small_video_memory_f()); 197 gmmu_new_dual_pde_aperture_small_video_memory_f());
201 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); 198 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
202 pde_v[3] |= small_addr >> 24; 199 pde_v[3] |= small_addr >> 24;
@@ -207,7 +204,6 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
207 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); 204 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
208 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 205 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
209 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), 206 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
210 gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(),
211 gmmu_new_dual_pde_aperture_big_video_memory_f()); 207 gmmu_new_dual_pde_aperture_big_video_memory_f());
212 pde_v[1] |= big_addr >> 28; 208 pde_v[1] |= big_addr >> 28;
213 } 209 }
@@ -244,10 +240,11 @@ static void __update_pte(struct vm_gk20a *vm,
244 gmmu_new_pte_address_sys_f(phys_shifted) : 240 gmmu_new_pte_address_sys_f(phys_shifted) :
245 gmmu_new_pte_address_vid_f(phys_shifted); 241 gmmu_new_pte_address_vid_f(phys_shifted);
246 u32 pte_tgt = __nvgpu_aperture_mask(g, 242 u32 pte_tgt = __nvgpu_aperture_mask(g,
247 attrs->aperture, 243 attrs->aperture,
248 gmmu_new_pte_aperture_sys_mem_ncoh_f(), 244 attrs->coherent ?
249 gmmu_new_pte_aperture_sys_mem_coh_f(), 245 gmmu_new_pte_aperture_sys_mem_coh_f() :
250 gmmu_new_pte_aperture_video_memory_f()); 246 gmmu_new_pte_aperture_sys_mem_ncoh_f(),
247 gmmu_new_pte_aperture_video_memory_f());
251 248
252 pte_w[0] = pte_valid | pte_addr | pte_tgt; 249 pte_w[0] = pte_valid | pte_addr | pte_tgt;
253 250
@@ -309,7 +306,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
309 page_size >> 10, 306 page_size >> 10,
310 nvgpu_gmmu_perm_str(attrs->rw_flag), 307 nvgpu_gmmu_perm_str(attrs->rw_flag),
311 attrs->kind_v, 308 attrs->kind_v,
312 nvgpu_aperture_str(g, attrs->aperture), 309 nvgpu_aperture_str(attrs->aperture),
313 attrs->cacheable ? 'C' : '-', 310 attrs->cacheable ? 'C' : '-',
314 attrs->sparse ? 'S' : '-', 311 attrs->sparse ? 'S' : '-',
315 attrs->priv ? 'P' : '-', 312 attrs->priv ? 'P' : '-',
@@ -431,9 +428,8 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
431 428
432 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 429 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
433 nvgpu_aperture_mask(g, vm->pdb.mem, 430 nvgpu_aperture_mask(g, vm->pdb.mem,
434 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 431 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
435 ram_in_page_dir_base_target_sys_mem_coh_f(), 432 ram_in_page_dir_base_target_vid_mem_f()) |
436 ram_in_page_dir_base_target_vid_mem_f()) |
437 ram_in_page_dir_base_vol_true_f() | 433 ram_in_page_dir_base_vol_true_f() |
438 ram_in_big_page_size_64kb_f() | 434 ram_in_big_page_size_64kb_f() |
439 ram_in_page_dir_base_lo_f(pdb_addr_lo) | 435 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
diff --git a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
index 4fa3f324..799b2db4 100644
--- a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
@@ -27,10 +27,9 @@
27#include <nvgpu/nvgpu_common.h> 27#include <nvgpu/nvgpu_common.h>
28#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
29#include <nvgpu/nvgpu_mem.h> 29#include <nvgpu/nvgpu_mem.h>
30#include <nvgpu/acr/nvgpu_acr.h>
30#include <nvgpu/firmware.h> 31#include <nvgpu/firmware.h>
31#include <nvgpu/mm.h> 32#include <nvgpu/mm.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/acr/nvgpu_acr.h>
34 33
35#include "gk20a/gk20a.h" 34#include "gk20a/gk20a.h"
36#include "acr_gv11b.h" 35#include "acr_gv11b.h"
@@ -221,9 +220,7 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
221 pwr_pmu_new_instblk_ptr_f( 220 pwr_pmu_new_instblk_ptr_f(
222 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 221 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
223 pwr_pmu_new_instblk_valid_f(1) | 222 pwr_pmu_new_instblk_valid_f(1) |
224 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? 223 pwr_pmu_new_instblk_target_sys_ncoh_f());
225 pwr_pmu_new_instblk_target_sys_coh_f() :
226 pwr_pmu_new_instblk_target_sys_ncoh_f())) ;
227 224
228 /*copy bootloader interface structure to dmem*/ 225 /*copy bootloader interface structure to dmem*/
229 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, 226 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
index 86977bb3..617ea61d 100644
--- a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
@@ -31,14 +31,14 @@
31#include <nvgpu/dma.h> 31#include <nvgpu/dma.h>
32#include <nvgpu/mm.h> 32#include <nvgpu/mm.h>
33#include <nvgpu/sizes.h> 33#include <nvgpu/sizes.h>
34#include <nvgpu/enabled.h>
35#include <nvgpu/log.h>
36#include <nvgpu/bug.h>
37 34
38#include "gk20a/gk20a.h" 35#include "gk20a/gk20a.h"
39#include "gk20a/css_gr_gk20a.h" 36#include "gk20a/css_gr_gk20a.h"
40#include "css_gr_gv11b.h" 37#include "css_gr_gv11b.h"
41 38
39#include <nvgpu/log.h>
40#include <nvgpu/bug.h>
41
42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> 42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h> 43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
44 44
@@ -144,7 +144,6 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
144 perf_pmasys_mem_block_valid_true_f() | 144 perf_pmasys_mem_block_valid_true_f() |
145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, 145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
146 perf_pmasys_mem_block_target_sys_ncoh_f(), 146 perf_pmasys_mem_block_target_sys_ncoh_f(),
147 perf_pmasys_mem_block_target_sys_coh_f(),
148 perf_pmasys_mem_block_target_lfb_f())); 147 perf_pmasys_mem_block_target_lfb_f()));
149 148
150 149
diff --git a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
index 562476ca..e5d88e8c 100644
--- a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
@@ -59,12 +59,11 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; 59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
60 60
61 gk20a_writel(g, perf_pmasys_mem_block_r(), 61 gk20a_writel(g, perf_pmasys_mem_block_r(),
62 perf_pmasys_mem_block_base_f(inst_pa_page) | 62 perf_pmasys_mem_block_base_f(inst_pa_page) |
63 perf_pmasys_mem_block_valid_true_f() | 63 perf_pmasys_mem_block_valid_true_f() |
64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, 64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
65 perf_pmasys_mem_block_target_sys_ncoh_f(), 65+ perf_pmasys_mem_block_target_sys_ncoh_f(),
66 perf_pmasys_mem_block_target_sys_coh_f(), 66+ perf_pmasys_mem_block_target_lfb_f()));
67 perf_pmasys_mem_block_target_lfb_f()));
68 67
69 gk20a_idle(g); 68 gk20a_idle(g);
70 return 0; 69 return 0;
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 6b4b07a6..97ab7aab 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -101,14 +101,12 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist)
101 c->runqueue_sel) | 101 c->runqueue_sel) |
102 ram_rl_entry_chan_userd_target_f( 102 ram_rl_entry_chan_userd_target_f(
103 nvgpu_aperture_mask(g, &g->fifo.userd, 103 nvgpu_aperture_mask(g, &g->fifo.userd,
104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), 104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(),
105 ram_rl_entry_chan_userd_target_sys_mem_coh_v(), 105 ram_rl_entry_chan_userd_target_vid_mem_v())) |
106 ram_rl_entry_chan_userd_target_vid_mem_v())) |
107 ram_rl_entry_chan_inst_target_f( 106 ram_rl_entry_chan_inst_target_f(
108 nvgpu_aperture_mask(g, &c->inst_block, 107 nvgpu_aperture_mask(g, &c->inst_block,
109 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), 108 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(),
110 ram_rl_entry_chan_inst_target_sys_mem_coh_v(), 109 ram_rl_entry_chan_inst_target_vid_mem_v()));
111 ram_rl_entry_chan_inst_target_vid_mem_v()));
112 110
113 addr_lo = u64_lo32(c->userd_iova) >> 111 addr_lo = u64_lo32(c->userd_iova) >>
114 ram_rl_entry_chan_userd_ptr_align_shift_v(); 112 ram_rl_entry_chan_userd_ptr_align_shift_v();
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
index b46ecb0a..ade1d9fe 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
@@ -26,7 +26,6 @@
26#include <nvgpu/dma.h> 26#include <nvgpu/dma.h>
27#include <nvgpu/log.h> 27#include <nvgpu/log.h>
28#include <nvgpu/mm.h> 28#include <nvgpu/mm.h>
29#include <nvgpu/enabled.h>
30 29
31#include "gk20a/gk20a.h" 30#include "gk20a/gk20a.h"
32#include "gk20a/mm_gk20a.h" 31#include "gk20a/mm_gk20a.h"
@@ -293,9 +292,8 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g)
293 292
294 gk20a_writel(g, bus_bar2_block_r(), 293 gk20a_writel(g, bus_bar2_block_r(),
295 nvgpu_aperture_mask(g, inst_block, 294 nvgpu_aperture_mask(g, inst_block,
296 bus_bar2_block_target_sys_mem_ncoh_f(), 295 bus_bar2_block_target_sys_mem_ncoh_f(),
297 bus_bar2_block_target_sys_mem_coh_f(), 296 bus_bar2_block_target_vid_mem_f()) |
298 bus_bar2_block_target_vid_mem_f()) |
299 bus_bar2_block_mode_virtual_f() | 297 bus_bar2_block_mode_virtual_f() |
300 bus_bar2_block_ptr_f(inst_pa)); 298 bus_bar2_block_ptr_f(inst_pa));
301 299
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
index 13e70eca..7dd4f8f4 100644
--- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
@@ -195,11 +195,9 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
195 195
196 gk20a_writel(g, pwr_pmu_new_instblk_r(), 196 gk20a_writel(g, pwr_pmu_new_instblk_r(),
197 pwr_pmu_new_instblk_ptr_f( 197 pwr_pmu_new_instblk_ptr_f(
198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) | 198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB)
199 pwr_pmu_new_instblk_valid_f(1) | 199 | pwr_pmu_new_instblk_valid_f(1)
200 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? 200 | pwr_pmu_new_instblk_target_sys_ncoh_f());
201 pwr_pmu_new_instblk_target_sys_coh_f() :
202 pwr_pmu_new_instblk_target_sys_ncoh_f()));
203 201
204 /* TBD: load all other surfaces */ 202 /* TBD: load all other surfaces */
205 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( 203 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index bda4c8e4..05d7dee0 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -177,9 +177,8 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm,
177 u32 pdb_addr_lo, pdb_addr_hi; 177 u32 pdb_addr_lo, pdb_addr_hi;
178 u64 pdb_addr; 178 u64 pdb_addr;
179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, 179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem,
180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
181 ram_in_sc_page_dir_base_target_sys_mem_coh_v(), 181 ram_in_sc_page_dir_base_target_vid_mem_v());
182 ram_in_sc_page_dir_base_target_vid_mem_v());
183 182
184 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); 183 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
185 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 184 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 24748a19..a3d9df24 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -75,8 +75,8 @@ struct gk20a;
75#define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24 75#define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24
76/* Support batch mapping */ 76/* Support batch mapping */
77#define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25 77#define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25
78/* Use coherent aperture for sysmem. */ 78/* Support DMA coherence */
79#define NVGPU_USE_COHERENT_SYSMEM 26 79#define NVGPU_DMA_COHERENT 26
80/* Use physical scatter tables instead of IOMMU */ 80/* Use physical scatter tables instead of IOMMU */
81#define NVGPU_MM_USE_PHYSICAL_SG 27 81#define NVGPU_MM_USE_PHYSICAL_SG 27
82 82
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index f1ab8a6e..2b8b7015 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -25,7 +25,6 @@
25 25
26#include <nvgpu/types.h> 26#include <nvgpu/types.h>
27#include <nvgpu/list.h> 27#include <nvgpu/list.h>
28#include <nvgpu/enabled.h>
29 28
30#ifdef __KERNEL__ 29#ifdef __KERNEL__
31#include <nvgpu/linux/nvgpu_mem.h> 30#include <nvgpu/linux/nvgpu_mem.h>
@@ -52,10 +51,6 @@ struct nvgpu_page_alloc;
52enum nvgpu_aperture { 51enum nvgpu_aperture {
53 APERTURE_INVALID = 0, /* unallocated or N/A */ 52 APERTURE_INVALID = 0, /* unallocated or N/A */
54 APERTURE_SYSMEM, 53 APERTURE_SYSMEM,
55
56 /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */
57 __APERTURE_SYSMEM_COH,
58
59 APERTURE_VIDMEM 54 APERTURE_VIDMEM
60}; 55};
61 56
@@ -192,18 +187,12 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
192 clear_list_entry)); 187 clear_list_entry));
193}; 188};
194 189
195static inline const char *nvgpu_aperture_str(struct gk20a *g, 190static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture)
196 enum nvgpu_aperture aperture)
197{ 191{
198 switch (aperture) { 192 switch (aperture) {
199 case APERTURE_INVALID: 193 case APERTURE_INVALID: return "INVAL";
200 return "INVAL"; 194 case APERTURE_SYSMEM: return "SYSMEM";
201 case APERTURE_SYSMEM: 195 case APERTURE_VIDMEM: return "VIDMEM";
202 return "SYSMEM";
203 case __APERTURE_SYSMEM_COH:
204 return "SYSCOH";
205 case APERTURE_VIDMEM:
206 return "VIDMEM";
207 }; 196 };
208 return "UNKNOWN"; 197 return "UNKNOWN";
209} 198}
@@ -333,9 +322,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
333u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); 322u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
334 323
335u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, 324u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
336 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); 325 u32 sysmem_mask, u32 vidmem_mask);
337u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, 326u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
338 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); 327 u32 sysmem_mask, u32 vidmem_mask);
339 328
340u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); 329u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys);
341 330