diff options
author | Peter Daifuku <pdaifuku@nvidia.com> | 2017-10-27 18:46:53 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-11-03 02:27:12 -0400 |
commit | de399ccb0019513a5f9e8f2bcadb02486f99bc80 (patch) | |
tree | f3bc9e054f501fd4c9cf8c20b614ae160c8a6dd1 /drivers/gpu/nvgpu | |
parent | 566223689538531783a86091f052f70a6ebdef29 (diff) |
gpu: nvgpu: fix patch buf count update for vidmem
gr_gk20a_ctx_patch_write_begin() updates the patch buffer data_count
when the associated graphics context memory buffer has been
CPU-mapped; it was doing so by looking for a non-null cpu_va.
However, if the graphics context has been allocated from vidmem,
cpu_va is always 0, so we can't tell if nvgpu_mem_begin() was called
for the context buffer or not.
Instead:
- add a cpu_accessible flag to the nvgpu_mem struct and set
it in nvgpu_mem_begin()
- return the value of that flag in nvgpu_mem_cpu_accessible()
- gr_gk20a_ctx_patch_write_begin() now calls this new function
instead of checking cpu_va.
Bug 2012077
JIRA ESRM-74
Change-Id: I8401699f30b4ae7154111721c25c7ec3ff95d329
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1587293
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 10 |
3 files changed, 25 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 1dbbd1a0..2bf26602 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -60,6 +60,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | |||
60 | { | 60 | { |
61 | void *cpu_va; | 61 | void *cpu_va; |
62 | 62 | ||
63 | if (WARN_ON(mem->cpu_accessible)) { | ||
64 | nvgpu_warn(g, "nested"); | ||
65 | return -EBUSY; | ||
66 | } | ||
67 | |||
68 | /* flag that the intent is to allow CPU access to the memory. */ | ||
69 | mem->cpu_accessible = true; | ||
70 | |||
63 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | 71 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) |
64 | return 0; | 72 | return 0; |
65 | 73 | ||
@@ -71,17 +79,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | |||
71 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) | 79 | if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) |
72 | return 0; | 80 | return 0; |
73 | 81 | ||
74 | if (WARN_ON(mem->cpu_va)) { | ||
75 | nvgpu_warn(g, "nested"); | ||
76 | return -EBUSY; | ||
77 | } | ||
78 | |||
79 | cpu_va = vmap(mem->priv.pages, | 82 | cpu_va = vmap(mem->priv.pages, |
80 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, | 83 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, |
81 | 0, pgprot_writecombine(PAGE_KERNEL)); | 84 | 0, pgprot_writecombine(PAGE_KERNEL)); |
82 | 85 | ||
83 | if (WARN_ON(!cpu_va)) | 86 | if (WARN_ON(!cpu_va)) { |
87 | mem->cpu_accessible = false; | ||
84 | return -ENOMEM; | 88 | return -ENOMEM; |
89 | } | ||
85 | 90 | ||
86 | mem->cpu_va = cpu_va; | 91 | mem->cpu_va = cpu_va; |
87 | return 0; | 92 | return 0; |
@@ -89,6 +94,8 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | |||
89 | 94 | ||
90 | void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) | 95 | void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) |
91 | { | 96 | { |
97 | mem->cpu_accessible = false; | ||
98 | |||
92 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | 99 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) |
93 | return; | 100 | return; |
94 | 101 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 2a20c2d9..06fb5497 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -682,7 +682,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | |||
682 | if (err) | 682 | if (err) |
683 | return err; | 683 | return err; |
684 | 684 | ||
685 | if (ch_ctx->gr_ctx->mem.cpu_va) { | 685 | if (nvgpu_mem_cpu_accessible(&ch_ctx->gr_ctx->mem)) { |
686 | /* reset patch count if ucode has already processed it */ | 686 | /* reset patch count if ucode has already processed it */ |
687 | ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, | 687 | ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, |
688 | &ch_ctx->gr_ctx->mem, | 688 | &ch_ctx->gr_ctx->mem, |
@@ -699,7 +699,7 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g, | |||
699 | nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); | 699 | nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); |
700 | 700 | ||
701 | /* Write context count to context image if it is mapped */ | 701 | /* Write context count to context image if it is mapped */ |
702 | if (ch_ctx->gr_ctx->mem.cpu_va) { | 702 | if (nvgpu_mem_cpu_accessible(&ch_ctx->gr_ctx->mem)) { |
703 | nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, | 703 | nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, |
704 | ctxsw_prog_main_image_patch_count_o(), | 704 | ctxsw_prog_main_image_patch_count_o(), |
705 | ch_ctx->patch_ctx.data_count); | 705 | ch_ctx->patch_ctx.data_count); |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 6feacff7..bae50347 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
@@ -122,6 +122,7 @@ struct nvgpu_mem { | |||
122 | size_t aligned_size; | 122 | size_t aligned_size; |
123 | u64 gpu_va; | 123 | u64 gpu_va; |
124 | bool skip_wmb; | 124 | bool skip_wmb; |
125 | bool cpu_accessible; | ||
125 | 126 | ||
126 | /* | 127 | /* |
127 | * Set when a nvgpu_mem struct is not a "real" nvgpu_mem struct. Instead | 128 | * Set when a nvgpu_mem struct is not a "real" nvgpu_mem struct. Instead |
@@ -211,6 +212,15 @@ static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem) | |||
211 | } | 212 | } |
212 | 213 | ||
213 | /* | 214 | /* |
215 | * Returns true if the passed nvgpu_mem can be accessed by the CPU by virtue | ||
216 | * of having called nvgpu_mem_begin successfully. | ||
217 | */ | ||
218 | static inline bool nvgpu_mem_cpu_accessible(struct nvgpu_mem *mem) | ||
219 | { | ||
220 | return mem->cpu_accessible; | ||
221 | } | ||
222 | |||
223 | /* | ||
214 | * Create a nvgpu_sgt of the default implementation | 224 | * Create a nvgpu_sgt of the default implementation |
215 | */ | 225 | */ |
216 | struct nvgpu_sgt *nvgpu_sgt_create(struct gk20a *g); | 226 | struct nvgpu_sgt *nvgpu_sgt_create(struct gk20a *g); |