diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-10-05 20:22:41 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-10-20 22:03:57 -0400 |
commit | e26ce10cc6b59314ccf5931a8c5b46a9e57b085a (patch) | |
tree | 2bcafc57fcb8679c09195ba08ccf7a6fdfc91fe1 /drivers/gpu/nvgpu/common/linux | |
parent | 8c5ea40ccaad022401e45e61d5b6ff3354ffa413 (diff) |
gpu: nvgpu: Convert VIDMEM work_struct to thread
Convert the work_struct used by the vidmem background clearing to
a thread to make it more cross platform. The thread waits on a
condition variable to determine when work needs to be done. The
signal comes from the DMA API when it enqueues a new nvgpu_mem that
needs clearing.
Add logic for handling suspend: the CE cannot be accessed while
the GPU is suspended. As such the background thread must be paused
while the GPU is suspended and the CE is not available.
Several other changes were also made:
o Move the code that enqueues a nvgpu_mem from the DMA API
code to a function in the VIDMEM code.
o Move nvgpu_vidmem_get_pending_alloc() to the Linux specific
code as this function is only used there. It's a trivial
function that QNX can easily implement as well.
o Remove the was_empty logic from the enqueue. Now just always
signal the condition variable when anew nvgpu_mem comes in.
o Move CE suspend to after MM suspend.
JIRA NVGPU-30
JIRA NVGPU-138
Change-Id: Ie9286ae5a127c3fced86dfb9794e7d81eab0491c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1574498
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/dma.c | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vidmem.c | 43 |
2 files changed, 33 insertions, 36 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index b62c4593..9e9d1007 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c | |||
@@ -514,7 +514,6 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) | |||
514 | static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | 514 | static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) |
515 | { | 515 | { |
516 | #if defined(CONFIG_GK20A_VIDMEM) | 516 | #if defined(CONFIG_GK20A_VIDMEM) |
517 | bool was_empty; | ||
518 | size_t mem_size = mem->size; | 517 | size_t mem_size = mem->size; |
519 | 518 | ||
520 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); | 519 | dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem"); |
@@ -523,18 +522,19 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) | |||
523 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); | 522 | WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); |
524 | 523 | ||
525 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { | 524 | if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) { |
526 | nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); | 525 | int err = nvgpu_vidmem_clear_list_enqueue(g, mem); |
527 | was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head); | 526 | |
528 | nvgpu_list_add_tail(&mem->clear_list_entry, | 527 | /* |
529 | &g->mm.vidmem.clear_list_head); | 528 | * If there's an error here then that means we can't clear the |
530 | atomic64_add(mem->aligned_size, | 529 | * vidmem. That's too bad; however, we still own the nvgpu_mem |
531 | &g->mm.vidmem.bytes_pending.atomic_var); | 530 | * buf so we have to free that. |
532 | nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex); | 531 | * |
533 | 532 | * We don't need to worry about the vidmem allocator itself | |
534 | if (was_empty) { | 533 | * since when that gets cleaned up in the driver shutdown path |
535 | cancel_work_sync(&g->mm.vidmem.clear_mem_worker); | 534 | * all the outstanding allocs are force freed. |
536 | schedule_work(&g->mm.vidmem.clear_mem_worker); | 535 | */ |
537 | } | 536 | if (err) |
537 | nvgpu_kfree(g, mem); | ||
538 | } else { | 538 | } else { |
539 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); | 539 | nvgpu_memset(g, mem, 0, 0, mem->aligned_size); |
540 | nvgpu_free(mem->allocator, | 540 | nvgpu_free(mem->allocator, |
diff --git a/drivers/gpu/nvgpu/common/linux/vidmem.c b/drivers/gpu/nvgpu/common/linux/vidmem.c index ea8e552f..92e7e504 100644 --- a/drivers/gpu/nvgpu/common/linux/vidmem.c +++ b/drivers/gpu/nvgpu/common/linux/vidmem.c | |||
@@ -84,6 +84,8 @@ static void gk20a_vidbuf_release(struct dma_buf *dmabuf) | |||
84 | 84 | ||
85 | nvgpu_kfree(g, linux_buf); | 85 | nvgpu_kfree(g, linux_buf); |
86 | nvgpu_vidmem_buf_free(g, buf); | 86 | nvgpu_vidmem_buf_free(g, buf); |
87 | |||
88 | gk20a_put(g); | ||
87 | } | 89 | } |
88 | 90 | ||
89 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) | 91 | static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num) |
@@ -160,13 +162,21 @@ struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf) | |||
160 | 162 | ||
161 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | 163 | int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) |
162 | { | 164 | { |
163 | struct nvgpu_vidmem_buf *buf; | 165 | struct nvgpu_vidmem_buf *buf = NULL; |
164 | struct nvgpu_vidmem_linux *priv; | 166 | struct nvgpu_vidmem_linux *priv; |
165 | int err, fd; | 167 | int err, fd; |
166 | 168 | ||
169 | /* | ||
170 | * This ref is released when the dma_buf is closed. | ||
171 | */ | ||
172 | if (!gk20a_get(g)) | ||
173 | return -ENODEV; | ||
174 | |||
167 | priv = nvgpu_kzalloc(g, sizeof(*priv)); | 175 | priv = nvgpu_kzalloc(g, sizeof(*priv)); |
168 | if (!priv) | 176 | if (!priv) { |
169 | return -ENOMEM; | 177 | err = -ENOMEM; |
178 | goto fail; | ||
179 | } | ||
170 | 180 | ||
171 | buf = nvgpu_vidmem_user_alloc(g, bytes); | 181 | buf = nvgpu_vidmem_user_alloc(g, bytes); |
172 | if (!buf) { | 182 | if (!buf) { |
@@ -195,8 +205,10 @@ int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes) | |||
195 | return fd; | 205 | return fd; |
196 | 206 | ||
197 | fail: | 207 | fail: |
198 | nvgpu_kfree(g, priv); | ||
199 | nvgpu_vidmem_buf_free(g, buf); | 208 | nvgpu_vidmem_buf_free(g, buf); |
209 | nvgpu_kfree(g, priv); | ||
210 | gk20a_put(g); | ||
211 | |||
200 | return err; | 212 | return err; |
201 | } | 213 | } |
202 | 214 | ||
@@ -229,24 +241,9 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | |||
229 | return err; | 241 | return err; |
230 | } | 242 | } |
231 | 243 | ||
232 | void nvgpu_vidmem_clear_mem_worker(struct work_struct *work) | 244 | void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem) |
233 | { | 245 | { |
234 | struct mm_gk20a *mm = container_of(work, struct mm_gk20a, | 246 | nvgpu_free(vidmem->allocator, |
235 | vidmem.clear_mem_worker); | 247 | (u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl)); |
236 | struct gk20a *g = mm->g; | 248 | nvgpu_free_sgtable(g, &vidmem->priv.sgt); |
237 | struct nvgpu_mem *mem; | ||
238 | |||
239 | while ((mem = nvgpu_vidmem_get_pending_alloc(mm)) != NULL) { | ||
240 | nvgpu_vidmem_clear(g, mem); | ||
241 | nvgpu_free(mem->allocator, | ||
242 | (u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl)); | ||
243 | nvgpu_free_sgtable(g, &mem->priv.sgt); | ||
244 | |||
245 | WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size, | ||
246 | &g->mm.vidmem.bytes_pending) < 0); | ||
247 | mem->size = 0; | ||
248 | mem->aperture = APERTURE_INVALID; | ||
249 | |||
250 | nvgpu_kfree(g, mem); | ||
251 | } | ||
252 | } | 249 | } |