diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-02-08 16:41:29 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-02-27 19:03:30 -0500 |
commit | a885f682d6cf2476c0cee695942b5f4c2718aa70 (patch) | |
tree | d6f863a05aca47b069b46b7f4b1456475490d43f /drivers/gpu/nvgpu | |
parent | 84cbb3ad4717afd848fbed6e7c5c2b9d69b89789 (diff) |
gpu: nvgpu: Get coherency on gv100 + NVLINK working
This patch does a couple of things. First it renames
NVGPU_DMA_COHERENT to NVGPU_USE_COHERENT_SYSMEM since the former
is somewhat ambiguous in meaning. The latter clearly states what
must happen: nvgpu needs to treat sysmem as coherent. This flag
does simply follow the state of the DMA API but there's no reason
to expect a casual reader of the code to know that when the DMA
API is coherent nvgpu must treat sysmem as coherent.
One thing to note though: when the dGPU is using PCIe and the
PCIe controller is coherent, it doesn't actually matter what we
do. However, we use this flag for determining how to make CPU
mappings in nvgpu_mem_begin() so this flag is still relevant for
the CPU side of things.
Next this patch adds a check in the core kernel GMMU mapping
routine to make sure that when the NVGPU_USE_COHERENT_SYSMEM flag
is set that the IO coherent flag is passed into the mapping code.
This is the primary fix that made NVLINK start working.
Finally the setting of the USE_COHERENT_SYSMEM flag and the
NVGPU_SUPPORT_IO_COHERENCE flag were set both for PCIe and for
iGPUs. The iGPU also must correctly match it's CPU mappings and
GPU mappings for proper operation.
JIRA EVLR-2333
Change-Id: Icd5f07167c9f48a0a2e8493e34c9cc6238e56907
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1654519
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/pci.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/enabled.h | 4 |
5 files changed, 22 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index b103fcea..52348db0 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/of.h> | 20 | #include <linux/of.h> |
21 | #include <linux/of_device.h> | 21 | #include <linux/of_device.h> |
22 | #include <linux/of_platform.h> | 22 | #include <linux/of_platform.h> |
23 | #include <linux/of_address.h> | ||
23 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
24 | #include <linux/pm_runtime.h> | 25 | #include <linux/pm_runtime.h> |
25 | #include <linux/reset.h> | 26 | #include <linux/reset.h> |
@@ -1107,6 +1108,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1107 | struct gk20a *gk20a; | 1108 | struct gk20a *gk20a; |
1108 | int err; | 1109 | int err; |
1109 | struct gk20a_platform *platform = NULL; | 1110 | struct gk20a_platform *platform = NULL; |
1111 | struct device_node *np; | ||
1110 | 1112 | ||
1111 | if (dev->dev.of_node) { | 1113 | if (dev->dev.of_node) { |
1112 | const struct of_device_id *match; | 1114 | const struct of_device_id *match; |
@@ -1206,6 +1208,12 @@ static int gk20a_probe(struct platform_device *dev) | |||
1206 | 1208 | ||
1207 | gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); | 1209 | gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); |
1208 | 1210 | ||
1211 | np = nvgpu_get_node(gk20a); | ||
1212 | if (of_dma_is_coherent(np)) { | ||
1213 | __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); | ||
1214 | __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); | ||
1215 | } | ||
1216 | |||
1209 | return 0; | 1217 | return 0; |
1210 | 1218 | ||
1211 | return_err: | 1219 | return_err: |
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 206b83e1..64f638e2 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -61,7 +61,8 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | |||
61 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | 61 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) |
62 | { | 62 | { |
63 | void *cpu_va; | 63 | void *cpu_va; |
64 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL : | 64 | pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? |
65 | PAGE_KERNEL : | ||
65 | pgprot_writecombine(PAGE_KERNEL); | 66 | pgprot_writecombine(PAGE_KERNEL); |
66 | 67 | ||
67 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | 68 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) |
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 6ebe8dda..4ba839c4 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c | |||
@@ -17,13 +17,13 @@ | |||
17 | #include <linux/pci.h> | 17 | #include <linux/pci.h> |
18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
19 | #include <linux/pm_runtime.h> | 19 | #include <linux/pm_runtime.h> |
20 | #include <linux/of_platform.h> | ||
21 | #include <linux/of_address.h> | ||
20 | 22 | ||
21 | #include <nvgpu/nvgpu_common.h> | 23 | #include <nvgpu/nvgpu_common.h> |
22 | #include <nvgpu/kmem.h> | 24 | #include <nvgpu/kmem.h> |
23 | #include <nvgpu/enabled.h> | 25 | #include <nvgpu/enabled.h> |
24 | #include <nvgpu/nvlink.h> | 26 | #include <nvgpu/nvlink.h> |
25 | #include <linux/of_platform.h> | ||
26 | #include <linux/of_address.h> | ||
27 | 27 | ||
28 | #include "gk20a/gk20a.h" | 28 | #include "gk20a/gk20a.h" |
29 | #include "clk/clk.h" | 29 | #include "clk/clk.h" |
@@ -647,7 +647,7 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
647 | np = nvgpu_get_node(g); | 647 | np = nvgpu_get_node(g); |
648 | 648 | ||
649 | if (of_dma_is_coherent(np)) { | 649 | if (of_dma_is_coherent(np)) { |
650 | __nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true); | 650 | __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); |
651 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); | 651 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); |
652 | } | 652 | } |
653 | 653 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index ffac324c..5abf5951 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
79 | if (!sgt) | 79 | if (!sgt) |
80 | return -ENOMEM; | 80 | return -ENOMEM; |
81 | 81 | ||
82 | /* | ||
83 | * If the GPU is IO coherent and the DMA API is giving us IO coherent | ||
84 | * CPU mappings then we gotta make sure we use the IO coherent aperture. | ||
85 | */ | ||
86 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) | ||
87 | flags |= NVGPU_VM_MAP_IO_COHERENT; | ||
88 | |||
82 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 89 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
83 | vaddr = g->ops.mm.gmmu_map(vm, addr, | 90 | vaddr = g->ops.mm.gmmu_map(vm, addr, |
84 | sgt, /* sg list */ | 91 | sgt, /* sg list */ |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index a3d9df24..24748a19 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h | |||
@@ -75,8 +75,8 @@ struct gk20a; | |||
75 | #define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24 | 75 | #define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24 |
76 | /* Support batch mapping */ | 76 | /* Support batch mapping */ |
77 | #define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25 | 77 | #define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25 |
78 | /* Support DMA coherence */ | 78 | /* Use coherent aperture for sysmem. */ |
79 | #define NVGPU_DMA_COHERENT 26 | 79 | #define NVGPU_USE_COHERENT_SYSMEM 26 |
80 | /* Use physical scatter tables instead of IOMMU */ | 80 | /* Use physical scatter tables instead of IOMMU */ |
81 | #define NVGPU_MM_USE_PHYSICAL_SG 27 | 81 | #define NVGPU_MM_USE_PHYSICAL_SG 27 |
82 | 82 | ||