diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-03-01 23:47:25 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-03-03 01:10:14 -0500 |
commit | 89fbf39a05483917c0a9f3453fd94c724bc37375 (patch) | |
tree | 55fdd147c0a7eb80b8fc50ecd9f4b0c80f1322f1 /drivers/gpu/nvgpu | |
parent | ef116a6e632522def7493921666c3241318ce100 (diff) |
Revert "Revert "gpu: nvgpu: Get coherency on gv100 + NVLINK working""
This reverts commit 5a35a95654d561fce09a3b9abf6b82bb7a29d74b.
JIRA EVLR-2333
Change-Id: I923c32496c343d39d34f6d406c38a9f6ce7dc6e0
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1667167
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
28 files changed, 275 insertions, 126 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index c13dae8b..81aebb7d 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c | |||
@@ -222,6 +222,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
222 | void *alloc_ret; | 222 | void *alloc_ret; |
223 | 223 | ||
224 | /* | 224 | /* |
225 | * WAR for IO coherent chips: the DMA API does not seem to generate | ||
226 | * mappings that work correctly. Unclear why - Bug ID: 2040115. | ||
227 | * | ||
228 | * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING | ||
229 | * and then make a vmap() ourselves. | ||
230 | */ | ||
231 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
232 | flags |= NVGPU_DMA_NO_KERNEL_MAPPING; | ||
233 | |||
234 | /* | ||
225 | * Before the debug print so we see this in the total. But during | 235 | * Before the debug print so we see this in the total. But during |
226 | * cleanup in the fail path this has to be subtracted. | 236 | * cleanup in the fail path this has to be subtracted. |
227 | */ | 237 | */ |
@@ -255,7 +265,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
255 | iova, size, flags); | 265 | iova, size, flags); |
256 | } | 266 | } |
257 | if (err) | 267 | if (err) |
258 | goto fail_free; | 268 | goto fail_free_dma; |
269 | |||
270 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { | ||
271 | mem->cpu_va = vmap(mem->priv.pages, | ||
272 | size >> PAGE_SHIFT, | ||
273 | 0, PAGE_KERNEL); | ||
274 | if (!mem->cpu_va) { | ||
275 | err = -ENOMEM; | ||
276 | goto fail_free_sgt; | ||
277 | } | ||
278 | } | ||
259 | 279 | ||
260 | mem->aligned_size = size; | 280 | mem->aligned_size = size; |
261 | mem->aperture = APERTURE_SYSMEM; | 281 | mem->aperture = APERTURE_SYSMEM; |
@@ -265,12 +285,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, | |||
265 | 285 | ||
266 | return 0; | 286 | return 0; |
267 | 287 | ||
268 | fail_free: | 288 | fail_free_sgt: |
269 | g->dma_memory_used -= mem->aligned_size; | 289 | nvgpu_free_sgtable(g, &mem->priv.sgt); |
290 | fail_free_dma: | ||
270 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); | 291 | dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); |
271 | mem->cpu_va = NULL; | 292 | mem->cpu_va = NULL; |
272 | mem->priv.sgt = NULL; | 293 | mem->priv.sgt = NULL; |
273 | mem->size = 0; | 294 | mem->size = 0; |
295 | g->dma_memory_used -= mem->aligned_size; | ||
274 | return err; | 296 | return err; |
275 | } | 297 | } |
276 | 298 | ||
@@ -466,6 +488,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | |||
466 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && | 488 | if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && |
467 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && | 489 | !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && |
468 | (mem->cpu_va || mem->priv.pages)) { | 490 | (mem->cpu_va || mem->priv.pages)) { |
491 | /* | ||
492 | * Free side of WAR for bug 2040115. | ||
493 | */ | ||
494 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
495 | vunmap(mem->cpu_va); | ||
496 | |||
469 | if (mem->priv.flags) { | 497 | if (mem->priv.flags) { |
470 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); | 498 | NVGPU_DEFINE_DMA_ATTRS(dma_attrs); |
471 | 499 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index b103fcea..741c86e7 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/of.h> | 20 | #include <linux/of.h> |
21 | #include <linux/of_device.h> | 21 | #include <linux/of_device.h> |
22 | #include <linux/of_platform.h> | 22 | #include <linux/of_platform.h> |
23 | #include <linux/of_address.h> | ||
23 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
24 | #include <linux/pm_runtime.h> | 25 | #include <linux/pm_runtime.h> |
25 | #include <linux/reset.h> | 26 | #include <linux/reset.h> |
@@ -1107,6 +1108,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1107 | struct gk20a *gk20a; | 1108 | struct gk20a *gk20a; |
1108 | int err; | 1109 | int err; |
1109 | struct gk20a_platform *platform = NULL; | 1110 | struct gk20a_platform *platform = NULL; |
1111 | struct device_node *np; | ||
1110 | 1112 | ||
1111 | if (dev->dev.of_node) { | 1113 | if (dev->dev.of_node) { |
1112 | const struct of_device_id *match; | 1114 | const struct of_device_id *match; |
@@ -1147,6 +1149,12 @@ static int gk20a_probe(struct platform_device *dev) | |||
1147 | if (err) | 1149 | if (err) |
1148 | goto return_err; | 1150 | goto return_err; |
1149 | 1151 | ||
1152 | np = nvgpu_get_node(gk20a); | ||
1153 | if (of_dma_is_coherent(np)) { | ||
1154 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1155 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1156 | } | ||
1157 | |||
1150 | if (nvgpu_platform_is_simulation(gk20a)) | 1158 | if (nvgpu_platform_is_simulation(gk20a)) |
1151 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); | 1159 | __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); |
1152 | 1160 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 7406c4d7..d4549e1b 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -34,40 +34,25 @@ | |||
34 | #include "gk20a/gk20a.h" | 34 | #include "gk20a/gk20a.h" |
35 | #include "gk20a/mm_gk20a.h" | 35 | #include "gk20a/mm_gk20a.h" |
36 | 36 | ||
37 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
38 | u32 sysmem_mask, u32 vidmem_mask) | ||
39 | { | ||
40 | switch (aperture) { | ||
41 | case APERTURE_SYSMEM: | ||
42 | /* some igpus consider system memory vidmem */ | ||
43 | return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE) | ||
44 | ? sysmem_mask : vidmem_mask; | ||
45 | case APERTURE_VIDMEM: | ||
46 | /* for dgpus only */ | ||
47 | return vidmem_mask; | ||
48 | case APERTURE_INVALID: | ||
49 | WARN_ON("Bad aperture"); | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
55 | u32 sysmem_mask, u32 vidmem_mask) | ||
56 | { | ||
57 | return __nvgpu_aperture_mask(g, mem->aperture, | ||
58 | sysmem_mask, vidmem_mask); | ||
59 | } | ||
60 | |||
61 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | 37 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) |
62 | { | 38 | { |
63 | void *cpu_va; | 39 | void *cpu_va; |
64 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL : | 40 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? |
41 | PAGE_KERNEL : | ||
65 | pgprot_writecombine(PAGE_KERNEL); | 42 | pgprot_writecombine(PAGE_KERNEL); |
66 | 43 | ||
67 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | 44 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) |
68 | return 0; | 45 | return 0; |
69 | 46 | ||
70 | /* | 47 | /* |
48 | * WAR for bug 2040115: we already will always have a coherent vmap() | ||
49 | * for all sysmem buffers. The prot settings are left alone since | ||
50 | * eventually this should be deleted. | ||
51 | */ | ||
52 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
53 | return 0; | ||
54 | |||
55 | /* | ||
71 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that | 56 | * A CPU mapping is implicitly made for all SYSMEM DMA allocations that |
72 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make | 57 | * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make |
73 | * another CPU mapping. | 58 | * another CPU mapping. |
@@ -97,6 +82,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | |||
97 | return; | 82 | return; |
98 | 83 | ||
99 | /* | 84 | /* |
85 | * WAR for bug 2040115: skip this since the map will be taken care of | ||
86 | * during the free in the DMA API. | ||
87 | */ | ||
88 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
89 | return; | ||
90 | |||
91 | /* | ||
100 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping | 92 | * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping |
101 | * already made by the DMA API. | 93 | * already made by the DMA API. |
102 | */ | 94 | */ |
@@ -315,7 +307,8 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, | |||
315 | */ | 307 | */ |
316 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) | 308 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) |
317 | { | 309 | { |
318 | if (!nvgpu_iommuable(g)) | 310 | if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) || |
311 | !nvgpu_iommuable(g)) | ||
319 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); | 312 | return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); |
320 | 313 | ||
321 | if (sg_dma_address(sgl) == 0) | 314 | if (sg_dma_address(sgl) == 0) |
@@ -415,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, | |||
415 | 408 | ||
416 | /* | 409 | /* |
417 | * Re-use the CPU mapping only if the mapping was made by the DMA API. | 410 | * Re-use the CPU mapping only if the mapping was made by the DMA API. |
411 | * | ||
412 | * Bug 2040115: the DMA API wrapper makes the mapping that we should | ||
413 | * re-use. | ||
418 | */ | 414 | */ |
419 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | 415 | if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || |
416 | nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
420 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); | 417 | dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); |
421 | 418 | ||
422 | dest->priv.pages = src->priv.pages + start_page; | 419 | dest->priv.pages = src->priv.pages + start_page; |
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 6ebe8dda..973da9ca 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c | |||
@@ -17,13 +17,13 @@ | |||
17 | #include <linux/pci.h> | 17 | #include <linux/pci.h> |
18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
19 | #include <linux/pm_runtime.h> | 19 | #include <linux/pm_runtime.h> |
20 | #include <linux/of_platform.h> | ||
21 | #include <linux/of_address.h> | ||
20 | 22 | ||
21 | #include <nvgpu/nvgpu_common.h> | 23 | #include <nvgpu/nvgpu_common.h> |
22 | #include <nvgpu/kmem.h> | 24 | #include <nvgpu/kmem.h> |
23 | #include <nvgpu/enabled.h> | 25 | #include <nvgpu/enabled.h> |
24 | #include <nvgpu/nvlink.h> | 26 | #include <nvgpu/nvlink.h> |
25 | #include <linux/of_platform.h> | ||
26 | #include <linux/of_address.h> | ||
27 | 27 | ||
28 | #include "gk20a/gk20a.h" | 28 | #include "gk20a/gk20a.h" |
29 | #include "clk/clk.h" | 29 | #include "clk/clk.h" |
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
566 | platform->g = g; | 566 | platform->g = g; |
567 | l->dev = &pdev->dev; | 567 | l->dev = &pdev->dev; |
568 | 568 | ||
569 | np = nvgpu_get_node(g); | ||
570 | if (of_dma_is_coherent(np)) { | ||
571 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); | ||
572 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
573 | } | ||
574 | |||
569 | err = pci_enable_device(pdev); | 575 | err = pci_enable_device(pdev); |
570 | if (err) | 576 | if (err) |
571 | return err; | 577 | return err; |
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
644 | 650 | ||
645 | g->mm.has_physical_mode = false; | 651 | g->mm.has_physical_mode = false; |
646 | 652 | ||
647 | np = nvgpu_get_node(g); | ||
648 | |||
649 | if (of_dma_is_coherent(np)) { | ||
650 | __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true); | ||
651 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
652 | } | ||
653 | |||
654 | return 0; | 653 | return 0; |
655 | } | 654 | } |
656 | 655 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index e3ca4eda..52b2f30c 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | 166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, |
167 | vm_aspace_id(vm), | 167 | vm_aspace_id(vm), |
168 | mapped_buffer->flags, | 168 | mapped_buffer->flags, |
169 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | 169 | nvgpu_aperture_str(g, |
170 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
170 | 171 | ||
171 | return mapped_buffer; | 172 | return mapped_buffer; |
172 | } | 173 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index e1942cbd..3b57e781 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
79 | if (!sgt) | 79 | if (!sgt) |
80 | return -ENOMEM; | 80 | return -ENOMEM; |
81 | 81 | ||
82 | /* | ||
83 | * If the GPU is IO coherent and the DMA API is giving us IO coherent | ||
84 | * CPU mappings then we gotta make sure we use the IO coherent aperture. | ||
85 | */ | ||
86 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
87 | flags |= NVGPU_VM_MAP_IO_COHERENT; | ||
88 | |||
82 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 89 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
83 | vaddr = g->ops.mm.gmmu_map(vm, addr, | 90 | vaddr = g->ops.mm.gmmu_map(vm, addr, |
84 | sgt, /* sg list */ | 91 | sgt, /* sg list */ |
@@ -627,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
627 | page_size >> 10, | 634 | page_size >> 10, |
628 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 635 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
629 | attrs->kind_v, | 636 | attrs->kind_v, |
630 | nvgpu_aperture_str(attrs->aperture), | 637 | nvgpu_aperture_str(g, attrs->aperture), |
631 | attrs->cacheable ? 'C' : '-', | 638 | attrs->cacheable ? 'C' : '-', |
632 | attrs->sparse ? 'S' : '-', | 639 | attrs->sparse ? 'S' : '-', |
633 | attrs->priv ? 'P' : '-', | 640 | attrs->priv ? 'P' : '-', |
@@ -705,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
705 | attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); | 712 | attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); |
706 | 713 | ||
707 | /* | 714 | /* |
715 | * Handle the IO coherency aperture: make sure the .aperture field is | ||
716 | * correct based on the IO coherency flag. | ||
717 | */ | ||
718 | if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM) | ||
719 | attrs.aperture = __APERTURE_SYSMEM_COH; | ||
720 | |||
721 | /* | ||
708 | * Only allocate a new GPU VA range if we haven't already been passed a | 722 | * Only allocate a new GPU VA range if we haven't already been passed a |
709 | * GPU VA range. This facilitates fixed mappings. | 723 | * GPU VA range. This facilitates fixed mappings. |
710 | */ | 724 | */ |
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index f7c51f42..2b32d869 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -28,6 +28,53 @@ | |||
28 | 28 | ||
29 | #include "gk20a/gk20a.h" | 29 | #include "gk20a/gk20a.h" |
30 | 30 | ||
31 | /* | ||
32 | * Make sure to use the right coherency aperture if you use this function! This | ||
33 | * will not add any checks. If you want to simply use the default coherency then | ||
34 | * use nvgpu_aperture_mask(). | ||
35 | */ | ||
36 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
37 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) | ||
38 | { | ||
39 | /* | ||
40 | * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the | ||
41 | * "sysmem" aperture should really be translated to VIDMEM. | ||
42 | */ | ||
43 | if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)) | ||
44 | aperture = APERTURE_VIDMEM; | ||
45 | |||
46 | switch (aperture) { | ||
47 | case __APERTURE_SYSMEM_COH: | ||
48 | return sysmem_coh_mask; | ||
49 | case APERTURE_SYSMEM: | ||
50 | return sysmem_mask; | ||
51 | case APERTURE_VIDMEM: | ||
52 | return vidmem_mask; | ||
53 | case APERTURE_INVALID: | ||
54 | WARN_ON("Bad aperture"); | ||
55 | } | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
60 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) | ||
61 | { | ||
62 | enum nvgpu_aperture ap = mem->aperture; | ||
63 | |||
64 | /* | ||
65 | * Handle the coherent aperture: ideally most of the driver is not | ||
66 | * aware of the difference between coherent and non-coherent sysmem so | ||
67 | * we add this translation step here. | ||
68 | */ | ||
69 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) && | ||
70 | ap == APERTURE_SYSMEM) | ||
71 | ap = __APERTURE_SYSMEM_COH; | ||
72 | |||
73 | return __nvgpu_aperture_mask(g, ap, | ||
74 | sysmem_mask, sysmem_coh_mask, vidmem_mask); | ||
75 | } | ||
76 | |||
77 | |||
31 | struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, | 78 | struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, |
32 | struct nvgpu_sgl *sgl) | 79 | struct nvgpu_sgl *sgl) |
33 | { | 80 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c index 7f0cfe58..b2800772 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <nvgpu/page_allocator.h> | 23 | #include <nvgpu/page_allocator.h> |
24 | #include <nvgpu/enabled.h> | ||
24 | #include <nvgpu/log.h> | 25 | #include <nvgpu/log.h> |
25 | #include <nvgpu/soc.h> | 26 | #include <nvgpu/soc.h> |
26 | #include <nvgpu/bus.h> | 27 | #include <nvgpu/bus.h> |
@@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) | |||
155 | 156 | ||
156 | gk20a_writel(g, bus_bar1_block_r(), | 157 | gk20a_writel(g, bus_bar1_block_r(), |
157 | nvgpu_aperture_mask(g, bar1_inst, | 158 | nvgpu_aperture_mask(g, bar1_inst, |
158 | bus_bar1_block_target_sys_mem_ncoh_f(), | 159 | bus_bar1_block_target_sys_mem_ncoh_f(), |
159 | bus_bar1_block_target_vid_mem_f()) | | 160 | bus_bar1_block_target_sys_mem_coh_f(), |
161 | bus_bar1_block_target_vid_mem_f()) | | ||
160 | bus_bar1_block_mode_virtual_f() | | 162 | bus_bar1_block_mode_virtual_f() | |
161 | bus_bar1_block_ptr_f(ptr_v)); | 163 | bus_bar1_block_ptr_f(ptr_v)); |
162 | 164 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index a5a2cb51..e3052701 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c | |||
@@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb) | |||
98 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), | 98 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), |
99 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | | 99 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | |
100 | nvgpu_aperture_mask(g, pdb, | 100 | nvgpu_aperture_mask(g, pdb, |
101 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), | 101 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), |
102 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); | 102 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), |
103 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); | ||
103 | 104 | ||
104 | gk20a_writel(g, fb_mmu_invalidate_r(), | 105 | gk20a_writel(g, fb_mmu_invalidate_r(), |
105 | fb_mmu_invalidate_all_va_true_f() | | 106 | fb_mmu_invalidate_all_va_true_f() | |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 409661fc..4fda0d2e 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
653 | return -ENOMEM; | 653 | return -ENOMEM; |
654 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, | 654 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, |
655 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), | 655 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), |
656 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), | ||
656 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); | 657 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); |
657 | 658 | ||
658 | if (nvgpu_mem_begin(g, mem)) | 659 | if (nvgpu_mem_begin(g, mem)) |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index e12576d2..258006f9 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <nvgpu/dma.h> | 28 | #include <nvgpu/dma.h> |
29 | #include <nvgpu/timers.h> | 29 | #include <nvgpu/timers.h> |
30 | #include <nvgpu/semaphore.h> | 30 | #include <nvgpu/semaphore.h> |
31 | #include <nvgpu/enabled.h> | ||
31 | #include <nvgpu/kmem.h> | 32 | #include <nvgpu/kmem.h> |
32 | #include <nvgpu/log.h> | 33 | #include <nvgpu/log.h> |
33 | #include <nvgpu/soc.h> | 34 | #include <nvgpu/soc.h> |
@@ -666,11 +667,13 @@ static void fifo_engine_exception_status(struct gk20a *g, | |||
666 | static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | 667 | static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) |
667 | { | 668 | { |
668 | struct fifo_runlist_info_gk20a *runlist; | 669 | struct fifo_runlist_info_gk20a *runlist; |
670 | struct fifo_engine_info_gk20a *engine_info; | ||
669 | unsigned int runlist_id; | 671 | unsigned int runlist_id; |
670 | u32 i; | 672 | u32 i; |
671 | size_t runlist_size; | 673 | size_t runlist_size; |
672 | u32 active_engine_id, pbdma_id, engine_id; | 674 | u32 active_engine_id, pbdma_id, engine_id; |
673 | struct fifo_engine_info_gk20a *engine_info; | 675 | int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ? |
676 | NVGPU_DMA_FORCE_CONTIGUOUS : 0; | ||
674 | 677 | ||
675 | nvgpu_log_fn(g, " "); | 678 | nvgpu_log_fn(g, " "); |
676 | 679 | ||
@@ -705,8 +708,9 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
705 | f->num_runlist_entries, runlist_size); | 708 | f->num_runlist_entries, runlist_size); |
706 | 709 | ||
707 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 710 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
708 | int err = nvgpu_dma_alloc_sys(g, runlist_size, | 711 | int err = nvgpu_dma_alloc_flags_sys(g, flags, |
709 | &runlist->mem[i]); | 712 | runlist_size, |
713 | &runlist->mem[i]); | ||
710 | if (err) { | 714 | if (err) { |
711 | nvgpu_err(g, "memory allocation failed"); | 715 | nvgpu_err(g, "memory allocation failed"); |
712 | goto clean_up_runlist; | 716 | goto clean_up_runlist; |
@@ -3240,8 +3244,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
3240 | gk20a_writel(g, fifo_runlist_base_r(), | 3244 | gk20a_writel(g, fifo_runlist_base_r(), |
3241 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | | 3245 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | |
3242 | nvgpu_aperture_mask(g, &runlist->mem[new_buf], | 3246 | nvgpu_aperture_mask(g, &runlist->mem[new_buf], |
3243 | fifo_runlist_base_target_sys_mem_ncoh_f(), | 3247 | fifo_runlist_base_target_sys_mem_ncoh_f(), |
3244 | fifo_runlist_base_target_vid_mem_f())); | 3248 | fifo_runlist_base_target_sys_mem_coh_f(), |
3249 | fifo_runlist_base_target_vid_mem_f())); | ||
3245 | } | 3250 | } |
3246 | 3251 | ||
3247 | gk20a_writel(g, fifo_runlist_r(), | 3252 | gk20a_writel(g, fifo_runlist_r(), |
@@ -3763,8 +3768,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c) | |||
3763 | nvgpu_mem_wr32(g, &c->inst_block, | 3768 | nvgpu_mem_wr32(g, &c->inst_block, |
3764 | ram_in_ramfc_w() + ram_fc_userd_w(), | 3769 | ram_in_ramfc_w() + ram_fc_userd_w(), |
3765 | nvgpu_aperture_mask(g, &g->fifo.userd, | 3770 | nvgpu_aperture_mask(g, &g->fifo.userd, |
3766 | pbdma_userd_target_sys_mem_ncoh_f(), | 3771 | pbdma_userd_target_sys_mem_ncoh_f(), |
3767 | pbdma_userd_target_vid_mem_f()) | | 3772 | pbdma_userd_target_sys_mem_coh_f(), |
3773 | pbdma_userd_target_vid_mem_f()) | | ||
3768 | pbdma_userd_addr_f(addr_lo)); | 3774 | pbdma_userd_addr_f(addr_lo)); |
3769 | 3775 | ||
3770 | nvgpu_mem_wr32(g, &c->inst_block, | 3776 | nvgpu_mem_wr32(g, &c->inst_block, |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 7160ab6f..bdb54325 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -742,13 +742,14 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
742 | 742 | ||
743 | static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) | 743 | static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) |
744 | { | 744 | { |
745 | u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block) | 745 | u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> |
746 | >> ram_in_base_shift_v()); | 746 | ram_in_base_shift_v(); |
747 | u32 aperture = nvgpu_aperture_mask(g, inst_block, | 747 | u32 aperture = nvgpu_aperture_mask(g, inst_block, |
748 | gr_fecs_current_ctx_target_sys_mem_ncoh_f(), | 748 | gr_fecs_current_ctx_target_sys_mem_ncoh_f(), |
749 | gr_fecs_current_ctx_target_vid_mem_f()); | 749 | gr_fecs_current_ctx_target_sys_mem_coh_f(), |
750 | gr_fecs_current_ctx_target_vid_mem_f()); | ||
750 | 751 | ||
751 | return gr_fecs_current_ctx_ptr_f(ptr) | aperture | | 752 | return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | |
752 | gr_fecs_current_ctx_valid_f(1); | 753 | gr_fecs_current_ctx_valid_f(1); |
753 | } | 754 | } |
754 | 755 | ||
@@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) | |||
2171 | 2172 | ||
2172 | inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); | 2173 | inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); |
2173 | gk20a_writel(g, gr_fecs_new_ctx_r(), | 2174 | gk20a_writel(g, gr_fecs_new_ctx_r(), |
2174 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | | 2175 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | |
2175 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, | 2176 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, |
2176 | gr_fecs_new_ctx_target_sys_mem_ncoh_f(), | 2177 | gr_fecs_new_ctx_target_sys_mem_ncoh_f(), |
2178 | gr_fecs_new_ctx_target_sys_mem_coh_f(), | ||
2177 | gr_fecs_new_ctx_target_vid_mem_f()) | | 2179 | gr_fecs_new_ctx_target_vid_mem_f()) | |
2178 | gr_fecs_new_ctx_valid_m()); | 2180 | gr_fecs_new_ctx_valid_m()); |
2179 | 2181 | ||
2180 | gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), | 2182 | gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), |
2181 | gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | | 2183 | gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | |
2182 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, | 2184 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, |
2183 | gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), | 2185 | gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), |
2186 | gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(), | ||
2184 | gr_fecs_arb_ctx_ptr_target_vid_mem_f())); | 2187 | gr_fecs_arb_ctx_ptr_target_vid_mem_f())); |
2185 | 2188 | ||
2186 | gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); | 2189 | gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); |
@@ -4384,8 +4387,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4384 | 4387 | ||
4385 | gk20a_writel(g, fb_mmu_debug_wr_r(), | 4388 | gk20a_writel(g, fb_mmu_debug_wr_r(), |
4386 | nvgpu_aperture_mask(g, &gr->mmu_wr_mem, | 4389 | nvgpu_aperture_mask(g, &gr->mmu_wr_mem, |
4387 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), | 4390 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), |
4388 | fb_mmu_debug_wr_aperture_vid_mem_f()) | | 4391 | fb_mmu_debug_wr_aperture_sys_mem_coh_f(), |
4392 | fb_mmu_debug_wr_aperture_vid_mem_f()) | | ||
4389 | fb_mmu_debug_wr_vol_false_f() | | 4393 | fb_mmu_debug_wr_vol_false_f() | |
4390 | fb_mmu_debug_wr_addr_f(addr)); | 4394 | fb_mmu_debug_wr_addr_f(addr)); |
4391 | 4395 | ||
@@ -4394,8 +4398,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4394 | 4398 | ||
4395 | gk20a_writel(g, fb_mmu_debug_rd_r(), | 4399 | gk20a_writel(g, fb_mmu_debug_rd_r(), |
4396 | nvgpu_aperture_mask(g, &gr->mmu_rd_mem, | 4400 | nvgpu_aperture_mask(g, &gr->mmu_rd_mem, |
4397 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), | 4401 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), |
4398 | fb_mmu_debug_rd_aperture_vid_mem_f()) | | 4402 | fb_mmu_debug_wr_aperture_sys_mem_coh_f(), |
4403 | fb_mmu_debug_rd_aperture_vid_mem_f()) | | ||
4399 | fb_mmu_debug_rd_vol_false_f() | | 4404 | fb_mmu_debug_rd_vol_false_f() | |
4400 | fb_mmu_debug_rd_addr_f(addr)); | 4405 | fb_mmu_debug_rd_addr_f(addr)); |
4401 | 4406 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b27d1109..4ff6125b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g, | |||
122 | { | 122 | { |
123 | u32 pde0_bits = | 123 | u32 pde0_bits = |
124 | nvgpu_aperture_mask(g, pd->mem, | 124 | nvgpu_aperture_mask(g, pd->mem, |
125 | gmmu_pde_aperture_big_sys_mem_ncoh_f(), | 125 | gmmu_pde_aperture_big_sys_mem_ncoh_f(), |
126 | gmmu_pde_aperture_big_video_memory_f()) | | 126 | gmmu_pde_aperture_big_sys_mem_coh_f(), |
127 | gmmu_pde_aperture_big_video_memory_f()) | | ||
127 | gmmu_pde_address_big_sys_f( | 128 | gmmu_pde_address_big_sys_f( |
128 | (u32)(addr >> gmmu_pde_address_shift_v())); | 129 | (u32)(addr >> gmmu_pde_address_shift_v())); |
129 | 130 | ||
@@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g, | |||
135 | { | 136 | { |
136 | u32 pde1_bits = | 137 | u32 pde1_bits = |
137 | nvgpu_aperture_mask(g, pd->mem, | 138 | nvgpu_aperture_mask(g, pd->mem, |
138 | gmmu_pde_aperture_small_sys_mem_ncoh_f(), | 139 | gmmu_pde_aperture_small_sys_mem_ncoh_f(), |
139 | gmmu_pde_aperture_small_video_memory_f()) | | 140 | gmmu_pde_aperture_small_sys_mem_coh_f(), |
141 | gmmu_pde_aperture_small_video_memory_f()) | | ||
140 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ | 142 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ |
141 | gmmu_pde_address_small_sys_f( | 143 | gmmu_pde_address_small_sys_f( |
142 | (u32)(addr >> gmmu_pde_address_shift_v())); | 144 | (u32)(addr >> gmmu_pde_address_shift_v())); |
@@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm, | |||
215 | 217 | ||
216 | pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, | 218 | pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, |
217 | gmmu_pte_aperture_sys_mem_ncoh_f(), | 219 | gmmu_pte_aperture_sys_mem_ncoh_f(), |
220 | gmmu_pte_aperture_sys_mem_coh_f(), | ||
218 | gmmu_pte_aperture_video_memory_f()) | | 221 | gmmu_pte_aperture_video_memory_f()) | |
219 | gmmu_pte_kind_f(attrs->kind_v) | | 222 | gmmu_pte_kind_f(attrs->kind_v) | |
220 | gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); | 223 | gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); |
@@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
268 | page_size >> 10, | 271 | page_size >> 10, |
269 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 272 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
270 | attrs->kind_v, | 273 | attrs->kind_v, |
271 | nvgpu_aperture_str(attrs->aperture), | 274 | nvgpu_aperture_str(g, attrs->aperture), |
272 | attrs->cacheable ? 'C' : '-', | 275 | attrs->cacheable ? 'C' : '-', |
273 | attrs->sparse ? 'S' : '-', | 276 | attrs->sparse ? 'S' : '-', |
274 | attrs->priv ? 'P' : '-', | 277 | attrs->priv ? 'P' : '-', |
@@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, | |||
363 | gk20a_dbg_info("pde pa=0x%llx", pdb_addr); | 366 | gk20a_dbg_info("pde pa=0x%llx", pdb_addr); |
364 | 367 | ||
365 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), | 368 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), |
366 | nvgpu_aperture_mask(g, vm->pdb.mem, | 369 | nvgpu_aperture_mask(g, vm->pdb.mem, |
367 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), | 370 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), |
368 | ram_in_page_dir_base_target_vid_mem_f()) | | 371 | ram_in_page_dir_base_target_sys_mem_coh_f(), |
369 | ram_in_page_dir_base_vol_true_f() | | 372 | ram_in_page_dir_base_target_vid_mem_f()) | |
370 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | 373 | ram_in_page_dir_base_vol_true_f() | |
374 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | ||
371 | 375 | ||
372 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), | 376 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), |
373 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 377 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c index bb8831e0..67fd2480 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | |||
@@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | |||
41 | u32 lo = (u32)(addr & 0xfffff); | 41 | u32 lo = (u32)(addr & 0xfffff); |
42 | u32 win = nvgpu_aperture_mask(g, mem, | 42 | u32 win = nvgpu_aperture_mask(g, mem, |
43 | bus_bar0_window_target_sys_mem_noncoherent_f(), | 43 | bus_bar0_window_target_sys_mem_noncoherent_f(), |
44 | bus_bar0_window_target_sys_mem_coherent_f(), | ||
44 | bus_bar0_window_target_vid_mem_f()) | | 45 | bus_bar0_window_target_vid_mem_f()) | |
45 | bus_bar0_window_base_f(hi); | 46 | bus_bar0_window_base_f(hi); |
46 | 47 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c index 34c8d4b7..cdd70d5b 100644 --- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <nvgpu/timers.h> | 25 | #include <nvgpu/timers.h> |
26 | #include <nvgpu/bus.h> | 26 | #include <nvgpu/bus.h> |
27 | #include <nvgpu/mm.h> | 27 | #include <nvgpu/mm.h> |
28 | #include <nvgpu/enabled.h> | ||
28 | 29 | ||
29 | #include "bus_gm20b.h" | 30 | #include "bus_gm20b.h" |
30 | #include "gk20a/gk20a.h" | 31 | #include "gk20a/gk20a.h" |
@@ -43,8 +44,9 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) | |||
43 | 44 | ||
44 | gk20a_writel(g, bus_bar1_block_r(), | 45 | gk20a_writel(g, bus_bar1_block_r(), |
45 | nvgpu_aperture_mask(g, bar1_inst, | 46 | nvgpu_aperture_mask(g, bar1_inst, |
46 | bus_bar1_block_target_sys_mem_ncoh_f(), | 47 | bus_bar1_block_target_sys_mem_ncoh_f(), |
47 | bus_bar1_block_target_vid_mem_f()) | | 48 | bus_bar1_block_target_sys_mem_coh_f(), |
49 | bus_bar1_block_target_vid_mem_f()) | | ||
48 | bus_bar1_block_mode_virtual_f() | | 50 | bus_bar1_block_mode_virtual_f() | |
49 | bus_bar1_block_ptr_f(ptr_v)); | 51 | bus_bar1_block_ptr_f(ptr_v)); |
50 | nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); | 52 | nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); |
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 0762e8bd..15612995 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <nvgpu/atomic.h> | 32 | #include <nvgpu/atomic.h> |
33 | #include <nvgpu/barrier.h> | 33 | #include <nvgpu/barrier.h> |
34 | #include <nvgpu/mm.h> | 34 | #include <nvgpu/mm.h> |
35 | #include <nvgpu/enabled.h> | ||
35 | 36 | ||
36 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> | 37 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> |
37 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> | 38 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> |
@@ -51,11 +52,12 @@ void channel_gm20b_bind(struct channel_gk20a *c) | |||
51 | 52 | ||
52 | 53 | ||
53 | gk20a_writel(g, ccsr_channel_inst_r(c->chid), | 54 | gk20a_writel(g, ccsr_channel_inst_r(c->chid), |
54 | ccsr_channel_inst_ptr_f(inst_ptr) | | 55 | ccsr_channel_inst_ptr_f(inst_ptr) | |
55 | nvgpu_aperture_mask(g, &c->inst_block, | 56 | nvgpu_aperture_mask(g, &c->inst_block, |
56 | ccsr_channel_inst_target_sys_mem_ncoh_f(), | 57 | ccsr_channel_inst_target_sys_mem_ncoh_f(), |
57 | ccsr_channel_inst_target_vid_mem_f()) | | 58 | ccsr_channel_inst_target_sys_mem_coh_f(), |
58 | ccsr_channel_inst_bind_true_f()); | 59 | ccsr_channel_inst_target_vid_mem_f()) | |
60 | ccsr_channel_inst_bind_true_f()); | ||
59 | 61 | ||
60 | gk20a_writel(g, ccsr_channel_r(c->chid), | 62 | gk20a_writel(g, ccsr_channel_r(c->chid), |
61 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & | 63 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & |
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c index 29aceb7c..8e4e5900 100644 --- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c +++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c | |||
@@ -99,6 +99,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu, | |||
99 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 99 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
100 | pwr_pmu_new_instblk_valid_f(1) | | 100 | pwr_pmu_new_instblk_valid_f(1) | |
101 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, | 101 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, |
102 | pwr_pmu_new_instblk_target_sys_ncoh_f(), | ||
102 | pwr_pmu_new_instblk_target_sys_coh_f(), | 103 | pwr_pmu_new_instblk_target_sys_coh_f(), |
103 | pwr_pmu_new_instblk_target_fb_f())); | 104 | pwr_pmu_new_instblk_target_fb_f())); |
104 | 105 | ||
@@ -165,6 +166,7 @@ void init_pmu_setup_hw1(struct gk20a *g) | |||
165 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 166 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
166 | pwr_pmu_new_instblk_valid_f(1) | | 167 | pwr_pmu_new_instblk_valid_f(1) | |
167 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, | 168 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, |
169 | pwr_pmu_new_instblk_target_sys_ncoh_f(), | ||
168 | pwr_pmu_new_instblk_target_sys_coh_f(), | 170 | pwr_pmu_new_instblk_target_sys_coh_f(), |
169 | pwr_pmu_new_instblk_target_fb_f())); | 171 | pwr_pmu_new_instblk_target_fb_f())); |
170 | 172 | ||
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index c82fb1cc..1436a260 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <nvgpu/dma.h> | 25 | #include <nvgpu/dma.h> |
26 | #include <nvgpu/bug.h> | 26 | #include <nvgpu/bug.h> |
27 | #include <nvgpu/log2.h> | 27 | #include <nvgpu/log2.h> |
28 | #include <nvgpu/enabled.h> | ||
28 | 29 | ||
29 | #include "fifo_gp10b.h" | 30 | #include "fifo_gp10b.h" |
30 | 31 | ||
@@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c) | |||
78 | nvgpu_mem_wr32(g, &c->inst_block, | 79 | nvgpu_mem_wr32(g, &c->inst_block, |
79 | ram_in_ramfc_w() + ram_fc_userd_w(), | 80 | ram_in_ramfc_w() + ram_fc_userd_w(), |
80 | nvgpu_aperture_mask(g, &g->fifo.userd, | 81 | nvgpu_aperture_mask(g, &g->fifo.userd, |
81 | pbdma_userd_target_sys_mem_ncoh_f(), | 82 | pbdma_userd_target_sys_mem_ncoh_f(), |
82 | pbdma_userd_target_vid_mem_f()) | | 83 | pbdma_userd_target_sys_mem_coh_f(), |
84 | pbdma_userd_target_vid_mem_f()) | | ||
83 | pbdma_userd_addr_f(addr_lo)); | 85 | pbdma_userd_addr_f(addr_lo)); |
84 | 86 | ||
85 | nvgpu_mem_wr32(g, &c->inst_block, | 87 | nvgpu_mem_wr32(g, &c->inst_block, |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 0439dda9..7ff5f6a6 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g) | |||
124 | 124 | ||
125 | gk20a_writel(g, bus_bar2_block_r(), | 125 | gk20a_writel(g, bus_bar2_block_r(), |
126 | nvgpu_aperture_mask(g, inst_block, | 126 | nvgpu_aperture_mask(g, inst_block, |
127 | bus_bar2_block_target_sys_mem_ncoh_f(), | 127 | bus_bar2_block_target_sys_mem_ncoh_f(), |
128 | bus_bar2_block_target_vid_mem_f()) | | 128 | bus_bar2_block_target_sys_mem_coh_f(), |
129 | bus_bar2_block_target_vid_mem_f()) | | ||
129 | bus_bar2_block_mode_virtual_f() | | 130 | bus_bar2_block_mode_virtual_f() | |
130 | bus_bar2_block_ptr_f(inst_pa)); | 131 | bus_bar2_block_ptr_f(inst_pa)); |
131 | 132 | ||
@@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
148 | phys_addr >>= gmmu_new_pde_address_shift_v(); | 149 | phys_addr >>= gmmu_new_pde_address_shift_v(); |
149 | 150 | ||
150 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, | 151 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, |
151 | gmmu_new_pde_aperture_sys_mem_ncoh_f(), | 152 | gmmu_new_pde_aperture_sys_mem_ncoh_f(), |
152 | gmmu_new_pde_aperture_video_memory_f()); | 153 | gmmu_new_pde_aperture_sys_mem_coh_f(), |
154 | gmmu_new_pde_aperture_video_memory_f()); | ||
153 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); | 155 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); |
154 | pde_v[0] |= gmmu_new_pde_vol_true_f(); | 156 | pde_v[0] |= gmmu_new_pde_vol_true_f(); |
155 | pde_v[1] |= phys_addr >> 24; | 157 | pde_v[1] |= phys_addr >> 24; |
@@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
194 | gmmu_new_dual_pde_address_small_sys_f(small_addr); | 196 | gmmu_new_dual_pde_address_small_sys_f(small_addr); |
195 | pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, | 197 | pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, |
196 | gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), | 198 | gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), |
199 | gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(), | ||
197 | gmmu_new_dual_pde_aperture_small_video_memory_f()); | 200 | gmmu_new_dual_pde_aperture_small_video_memory_f()); |
198 | pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); | 201 | pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); |
199 | pde_v[3] |= small_addr >> 24; | 202 | pde_v[3] |= small_addr >> 24; |
@@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
204 | pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); | 207 | pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); |
205 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, | 208 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, |
206 | gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), | 209 | gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), |
210 | gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(), | ||
207 | gmmu_new_dual_pde_aperture_big_video_memory_f()); | 211 | gmmu_new_dual_pde_aperture_big_video_memory_f()); |
208 | pde_v[1] |= big_addr >> 28; | 212 | pde_v[1] |= big_addr >> 28; |
209 | } | 213 | } |
@@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm, | |||
240 | gmmu_new_pte_address_sys_f(phys_shifted) : | 244 | gmmu_new_pte_address_sys_f(phys_shifted) : |
241 | gmmu_new_pte_address_vid_f(phys_shifted); | 245 | gmmu_new_pte_address_vid_f(phys_shifted); |
242 | u32 pte_tgt = __nvgpu_aperture_mask(g, | 246 | u32 pte_tgt = __nvgpu_aperture_mask(g, |
243 | attrs->aperture, | 247 | attrs->aperture, |
244 | attrs->coherent ? | 248 | gmmu_new_pte_aperture_sys_mem_ncoh_f(), |
245 | gmmu_new_pte_aperture_sys_mem_coh_f() : | 249 | gmmu_new_pte_aperture_sys_mem_coh_f(), |
246 | gmmu_new_pte_aperture_sys_mem_ncoh_f(), | 250 | gmmu_new_pte_aperture_video_memory_f()); |
247 | gmmu_new_pte_aperture_video_memory_f()); | ||
248 | 251 | ||
249 | pte_w[0] = pte_valid | pte_addr | pte_tgt; | 252 | pte_w[0] = pte_valid | pte_addr | pte_tgt; |
250 | 253 | ||
@@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
306 | page_size >> 10, | 309 | page_size >> 10, |
307 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 310 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
308 | attrs->kind_v, | 311 | attrs->kind_v, |
309 | nvgpu_aperture_str(attrs->aperture), | 312 | nvgpu_aperture_str(g, attrs->aperture), |
310 | attrs->cacheable ? 'C' : '-', | 313 | attrs->cacheable ? 'C' : '-', |
311 | attrs->sparse ? 'S' : '-', | 314 | attrs->sparse ? 'S' : '-', |
312 | attrs->priv ? 'P' : '-', | 315 | attrs->priv ? 'P' : '-', |
@@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, | |||
428 | 431 | ||
429 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), | 432 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), |
430 | nvgpu_aperture_mask(g, vm->pdb.mem, | 433 | nvgpu_aperture_mask(g, vm->pdb.mem, |
431 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), | 434 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), |
432 | ram_in_page_dir_base_target_vid_mem_f()) | | 435 | ram_in_page_dir_base_target_sys_mem_coh_f(), |
436 | ram_in_page_dir_base_target_vid_mem_f()) | | ||
433 | ram_in_page_dir_base_vol_true_f() | | 437 | ram_in_page_dir_base_vol_true_f() | |
434 | ram_in_big_page_size_64kb_f() | | 438 | ram_in_big_page_size_64kb_f() | |
435 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | | 439 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | |
diff --git a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c index 799b2db4..4fa3f324 100644 --- a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c | |||
@@ -27,9 +27,10 @@ | |||
27 | #include <nvgpu/nvgpu_common.h> | 27 | #include <nvgpu/nvgpu_common.h> |
28 | #include <nvgpu/kmem.h> | 28 | #include <nvgpu/kmem.h> |
29 | #include <nvgpu/nvgpu_mem.h> | 29 | #include <nvgpu/nvgpu_mem.h> |
30 | #include <nvgpu/acr/nvgpu_acr.h> | ||
31 | #include <nvgpu/firmware.h> | 30 | #include <nvgpu/firmware.h> |
32 | #include <nvgpu/mm.h> | 31 | #include <nvgpu/mm.h> |
32 | #include <nvgpu/enabled.h> | ||
33 | #include <nvgpu/acr/nvgpu_acr.h> | ||
33 | 34 | ||
34 | #include "gk20a/gk20a.h" | 35 | #include "gk20a/gk20a.h" |
35 | #include "acr_gv11b.h" | 36 | #include "acr_gv11b.h" |
@@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu, | |||
220 | pwr_pmu_new_instblk_ptr_f( | 221 | pwr_pmu_new_instblk_ptr_f( |
221 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 222 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
222 | pwr_pmu_new_instblk_valid_f(1) | | 223 | pwr_pmu_new_instblk_valid_f(1) | |
223 | pwr_pmu_new_instblk_target_sys_ncoh_f()); | 224 | (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? |
225 | pwr_pmu_new_instblk_target_sys_coh_f() : | ||
226 | pwr_pmu_new_instblk_target_sys_ncoh_f())) ; | ||
224 | 227 | ||
225 | /*copy bootloader interface structure to dmem*/ | 228 | /*copy bootloader interface structure to dmem*/ |
226 | nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, | 229 | nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, |
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c index 617ea61d..86977bb3 100644 --- a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c | |||
@@ -31,14 +31,14 @@ | |||
31 | #include <nvgpu/dma.h> | 31 | #include <nvgpu/dma.h> |
32 | #include <nvgpu/mm.h> | 32 | #include <nvgpu/mm.h> |
33 | #include <nvgpu/sizes.h> | 33 | #include <nvgpu/sizes.h> |
34 | #include <nvgpu/enabled.h> | ||
35 | #include <nvgpu/log.h> | ||
36 | #include <nvgpu/bug.h> | ||
34 | 37 | ||
35 | #include "gk20a/gk20a.h" | 38 | #include "gk20a/gk20a.h" |
36 | #include "gk20a/css_gr_gk20a.h" | 39 | #include "gk20a/css_gr_gk20a.h" |
37 | #include "css_gr_gv11b.h" | 40 | #include "css_gr_gv11b.h" |
38 | 41 | ||
39 | #include <nvgpu/log.h> | ||
40 | #include <nvgpu/bug.h> | ||
41 | |||
42 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> | 42 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> |
43 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> | 43 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> |
44 | 44 | ||
@@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch, | |||
144 | perf_pmasys_mem_block_valid_true_f() | | 144 | perf_pmasys_mem_block_valid_true_f() | |
145 | nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, | 145 | nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, |
146 | perf_pmasys_mem_block_target_sys_ncoh_f(), | 146 | perf_pmasys_mem_block_target_sys_ncoh_f(), |
147 | perf_pmasys_mem_block_target_sys_coh_f(), | ||
147 | perf_pmasys_mem_block_target_lfb_f())); | 148 | perf_pmasys_mem_block_target_lfb_f())); |
148 | 149 | ||
149 | 150 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c index e5d88e8c..562476ca 100644 --- a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c | |||
@@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) | |||
59 | inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; | 59 | inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; |
60 | 60 | ||
61 | gk20a_writel(g, perf_pmasys_mem_block_r(), | 61 | gk20a_writel(g, perf_pmasys_mem_block_r(), |
62 | perf_pmasys_mem_block_base_f(inst_pa_page) | | 62 | perf_pmasys_mem_block_base_f(inst_pa_page) | |
63 | perf_pmasys_mem_block_valid_true_f() | | 63 | perf_pmasys_mem_block_valid_true_f() | |
64 | nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, | 64 | nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, |
65 | + perf_pmasys_mem_block_target_sys_ncoh_f(), | 65 | perf_pmasys_mem_block_target_sys_ncoh_f(), |
66 | + perf_pmasys_mem_block_target_lfb_f())); | 66 | perf_pmasys_mem_block_target_sys_coh_f(), |
67 | perf_pmasys_mem_block_target_lfb_f())); | ||
67 | 68 | ||
68 | gk20a_idle(g); | 69 | gk20a_idle(g); |
69 | return 0; | 70 | return 0; |
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index feed2002..9e60d9f7 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | |||
@@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist) | |||
101 | c->runqueue_sel) | | 101 | c->runqueue_sel) | |
102 | ram_rl_entry_chan_userd_target_f( | 102 | ram_rl_entry_chan_userd_target_f( |
103 | nvgpu_aperture_mask(g, &g->fifo.userd, | 103 | nvgpu_aperture_mask(g, &g->fifo.userd, |
104 | ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), | 104 | ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), |
105 | ram_rl_entry_chan_userd_target_vid_mem_v())) | | 105 | ram_rl_entry_chan_userd_target_sys_mem_coh_v(), |
106 | ram_rl_entry_chan_userd_target_vid_mem_v())) | | ||
106 | ram_rl_entry_chan_inst_target_f( | 107 | ram_rl_entry_chan_inst_target_f( |
107 | nvgpu_aperture_mask(g, &c->inst_block, | 108 | nvgpu_aperture_mask(g, &c->inst_block, |
108 | ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), | 109 | ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), |
109 | ram_rl_entry_chan_inst_target_vid_mem_v())); | 110 | ram_rl_entry_chan_inst_target_sys_mem_coh_v(), |
111 | ram_rl_entry_chan_inst_target_vid_mem_v())); | ||
110 | 112 | ||
111 | addr_lo = u64_lo32(c->userd_iova) >> | 113 | addr_lo = u64_lo32(c->userd_iova) >> |
112 | ram_rl_entry_chan_userd_ptr_align_shift_v(); | 114 | ram_rl_entry_chan_userd_ptr_align_shift_v(); |
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c index ade1d9fe..b46ecb0a 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <nvgpu/dma.h> | 26 | #include <nvgpu/dma.h> |
27 | #include <nvgpu/log.h> | 27 | #include <nvgpu/log.h> |
28 | #include <nvgpu/mm.h> | 28 | #include <nvgpu/mm.h> |
29 | #include <nvgpu/enabled.h> | ||
29 | 30 | ||
30 | #include "gk20a/gk20a.h" | 31 | #include "gk20a/gk20a.h" |
31 | #include "gk20a/mm_gk20a.h" | 32 | #include "gk20a/mm_gk20a.h" |
@@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g) | |||
292 | 293 | ||
293 | gk20a_writel(g, bus_bar2_block_r(), | 294 | gk20a_writel(g, bus_bar2_block_r(), |
294 | nvgpu_aperture_mask(g, inst_block, | 295 | nvgpu_aperture_mask(g, inst_block, |
295 | bus_bar2_block_target_sys_mem_ncoh_f(), | 296 | bus_bar2_block_target_sys_mem_ncoh_f(), |
296 | bus_bar2_block_target_vid_mem_f()) | | 297 | bus_bar2_block_target_sys_mem_coh_f(), |
298 | bus_bar2_block_target_vid_mem_f()) | | ||
297 | bus_bar2_block_mode_virtual_f() | | 299 | bus_bar2_block_mode_virtual_f() | |
298 | bus_bar2_block_ptr_f(inst_pa)); | 300 | bus_bar2_block_ptr_f(inst_pa)); |
299 | 301 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c index 7dd4f8f4..13e70eca 100644 --- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c | |||
@@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu) | |||
195 | 195 | ||
196 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 196 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
197 | pwr_pmu_new_instblk_ptr_f( | 197 | pwr_pmu_new_instblk_ptr_f( |
198 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) | 198 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) | |
199 | | pwr_pmu_new_instblk_valid_f(1) | 199 | pwr_pmu_new_instblk_valid_f(1) | |
200 | | pwr_pmu_new_instblk_target_sys_ncoh_f()); | 200 | (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? |
201 | pwr_pmu_new_instblk_target_sys_coh_f() : | ||
202 | pwr_pmu_new_instblk_target_sys_ncoh_f())); | ||
201 | 203 | ||
202 | /* TBD: load all other surfaces */ | 204 | /* TBD: load all other surfaces */ |
203 | g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( | 205 | g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( |
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 05d7dee0..bda4c8e4 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -177,8 +177,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, | |||
177 | u32 pdb_addr_lo, pdb_addr_hi; | 177 | u32 pdb_addr_lo, pdb_addr_hi; |
178 | u64 pdb_addr; | 178 | u64 pdb_addr; |
179 | u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, | 179 | u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, |
180 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), | 180 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), |
181 | ram_in_sc_page_dir_base_target_vid_mem_v()); | 181 | ram_in_sc_page_dir_base_target_sys_mem_coh_v(), |
182 | ram_in_sc_page_dir_base_target_vid_mem_v()); | ||
182 | 183 | ||
183 | pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); | 184 | pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); |
184 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 185 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index a3d9df24..24748a19 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h | |||
@@ -75,8 +75,8 @@ struct gk20a; | |||
75 | #define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24 | 75 | #define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24 |
76 | /* Support batch mapping */ | 76 | /* Support batch mapping */ |
77 | #define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25 | 77 | #define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25 |
78 | /* Support DMA coherence */ | 78 | /* Use coherent aperture for sysmem. */ |
79 | #define NVGPU_DMA_COHERENT 26 | 79 | #define NVGPU_USE_COHERENT_SYSMEM 26 |
80 | /* Use physical scatter tables instead of IOMMU */ | 80 | /* Use physical scatter tables instead of IOMMU */ |
81 | #define NVGPU_MM_USE_PHYSICAL_SG 27 | 81 | #define NVGPU_MM_USE_PHYSICAL_SG 27 |
82 | 82 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 04e947e0..373c3eef 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include <nvgpu/types.h> | 26 | #include <nvgpu/types.h> |
27 | #include <nvgpu/list.h> | 27 | #include <nvgpu/list.h> |
28 | #include <nvgpu/enabled.h> | ||
28 | 29 | ||
29 | #ifdef __KERNEL__ | 30 | #ifdef __KERNEL__ |
30 | #include <nvgpu/linux/nvgpu_mem.h> | 31 | #include <nvgpu/linux/nvgpu_mem.h> |
@@ -51,6 +52,10 @@ struct nvgpu_page_alloc; | |||
51 | enum nvgpu_aperture { | 52 | enum nvgpu_aperture { |
52 | APERTURE_INVALID = 0, /* unallocated or N/A */ | 53 | APERTURE_INVALID = 0, /* unallocated or N/A */ |
53 | APERTURE_SYSMEM, | 54 | APERTURE_SYSMEM, |
55 | |||
56 | /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */ | ||
57 | __APERTURE_SYSMEM_COH, | ||
58 | |||
54 | APERTURE_VIDMEM | 59 | APERTURE_VIDMEM |
55 | }; | 60 | }; |
56 | 61 | ||
@@ -195,12 +200,18 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node) | |||
195 | clear_list_entry)); | 200 | clear_list_entry)); |
196 | }; | 201 | }; |
197 | 202 | ||
198 | static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture) | 203 | static inline const char *nvgpu_aperture_str(struct gk20a *g, |
204 | enum nvgpu_aperture aperture) | ||
199 | { | 205 | { |
200 | switch (aperture) { | 206 | switch (aperture) { |
201 | case APERTURE_INVALID: return "INVAL"; | 207 | case APERTURE_INVALID: |
202 | case APERTURE_SYSMEM: return "SYSMEM"; | 208 | return "INVAL"; |
203 | case APERTURE_VIDMEM: return "VIDMEM"; | 209 | case APERTURE_SYSMEM: |
210 | return "SYSMEM"; | ||
211 | case __APERTURE_SYSMEM_COH: | ||
212 | return "SYSCOH"; | ||
213 | case APERTURE_VIDMEM: | ||
214 | return "VIDMEM"; | ||
204 | }; | 215 | }; |
205 | return "UNKNOWN"; | 216 | return "UNKNOWN"; |
206 | } | 217 | } |
@@ -332,9 +343,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); | |||
332 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); | 343 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); |
333 | 344 | ||
334 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | 345 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, |
335 | u32 sysmem_mask, u32 vidmem_mask); | 346 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); |
336 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | 347 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, |
337 | u32 sysmem_mask, u32 vidmem_mask); | 348 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); |
338 | 349 | ||
339 | u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); | 350 | u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); |
340 | 351 | ||