summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-03-14 04:43:35 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-23 10:13:55 -0400
commit7505a759542c2d95e5abb42d44b8fb6afda74a87 (patch)
tree4276ff74ca3cc8dbc39de4b9f35e0eca88992c92
parent978d13efe5719b28ea90b95d21bf25066df75ca8 (diff)
gpu: nvgpu: take power refcount in gk20a_cde_convert()
We have a gk20a_busy() call in gk20a_buffer_convert_gpu_to_cde() and we again call gk20a_busy() in gk20a_submit_channel_gpfifo() If gk20a_do_idle() is triggered in between these two calls, then this leads to a deadlock and results in idle failure Hence to avoid this take power refcount in a more fine-grained way i.e. in gk20a_cde_convert() instead of taking in gk20a_buffer_convert_gpu_to_cde() Keep gk20a_cde_execute_buffer() out of the gk20a_busy()/ gk20a_idle() pair since we take power refcount in submit path anyways Add correct error handling path in gk20a_cde_convert() Bug 200287073 Change-Id: Iffea2d4c03f42b47dbf05e7fe8fe2994f9c6b37c Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1324329 (cherry picked from commit ce057d784d40a6ce57e892d58e211ed2fd9826f8) Reviewed-on: http://git-master/r/1320408 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c44
1 files changed, 25 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 8f9d7831..df15cf51 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -949,12 +949,16 @@ __releases(&cde_app->mutex)
949 scatterbuffer_byte_offset < compbits_byte_offset) 949 scatterbuffer_byte_offset < compbits_byte_offset)
950 return -EINVAL; 950 return -EINVAL;
951 951
952 err = gk20a_busy(g->dev);
953 if (err)
954 return err;
955
952 nvgpu_mutex_acquire(&g->cde_app.mutex); 956 nvgpu_mutex_acquire(&g->cde_app.mutex);
953 cde_ctx = gk20a_cde_get_context(g); 957 cde_ctx = gk20a_cde_get_context(g);
954 nvgpu_mutex_release(&g->cde_app.mutex); 958 nvgpu_mutex_release(&g->cde_app.mutex);
955 if (IS_ERR(cde_ctx)) { 959 if (IS_ERR(cde_ctx)) {
956 err = PTR_ERR(cde_ctx); 960 err = PTR_ERR(cde_ctx);
957 goto exit_unlock; 961 goto exit_idle;
958 } 962 }
959 963
960 /* First, map the buffer to local va */ 964 /* First, map the buffer to local va */
@@ -962,7 +966,7 @@ __releases(&cde_app->mutex)
962 /* ensure that the compbits buffer has drvdata */ 966 /* ensure that the compbits buffer has drvdata */
963 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, g->dev); 967 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, g->dev);
964 if (err) 968 if (err)
965 goto exit_unlock; 969 goto exit_idle;
966 970
967 /* compbits don't start at page aligned offset, so we need to align 971 /* compbits don't start at page aligned offset, so we need to align
968 the region to be mapped */ 972 the region to be mapped */
@@ -1005,7 +1009,7 @@ __releases(&cde_app->mutex)
1005 if (!map_vaddr) { 1009 if (!map_vaddr) {
1006 dma_buf_put(compbits_scatter_buf); 1010 dma_buf_put(compbits_scatter_buf);
1007 err = -EINVAL; 1011 err = -EINVAL;
1008 goto exit_unlock; 1012 goto exit_idle;
1009 } 1013 }
1010 1014
1011 if (scatterbuffer_byte_offset && 1015 if (scatterbuffer_byte_offset &&
@@ -1019,7 +1023,7 @@ __releases(&cde_app->mutex)
1019 gk20a_warn(g->dev, 1023 gk20a_warn(g->dev,
1020 "dma_buf_vmap failed"); 1024 "dma_buf_vmap failed");
1021 err = -EINVAL; 1025 err = -EINVAL;
1022 goto exit_unlock; 1026 goto exit_unmap_vaddr;
1023 } 1027 }
1024 1028
1025 scatter_buffer = surface + scatterbuffer_byte_offset; 1029 scatter_buffer = surface + scatterbuffer_byte_offset;
@@ -1031,7 +1035,7 @@ __releases(&cde_app->mutex)
1031 gk20a_warn(g->dev, 1035 gk20a_warn(g->dev,
1032 "mm_pin failed"); 1036 "mm_pin failed");
1033 err = -EINVAL; 1037 err = -EINVAL;
1034 goto exit_unlock; 1038 goto exit_unmap_surface;
1035 } else { 1039 } else {
1036 err = g->ops.cde.populate_scatter_buffer(g, sgt, 1040 err = g->ops.cde.populate_scatter_buffer(g, sgt,
1037 compbits_byte_offset, scatter_buffer, 1041 compbits_byte_offset, scatter_buffer,
@@ -1041,7 +1045,7 @@ __releases(&cde_app->mutex)
1041 gk20a_mm_unpin(g->dev, compbits_scatter_buf, 1045 gk20a_mm_unpin(g->dev, compbits_scatter_buf,
1042 sgt); 1046 sgt);
1043 if (err) 1047 if (err)
1044 goto exit_unlock; 1048 goto exit_unmap_surface;
1045 } 1049 }
1046 1050
1047 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); 1051 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
@@ -1078,7 +1082,7 @@ __releases(&cde_app->mutex)
1078 if (id < 0 || id >= MAX_CDE_USER_PARAMS) { 1082 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1079 gk20a_warn(cde_ctx->dev, "cde: unknown user parameter"); 1083 gk20a_warn(cde_ctx->dev, "cde: unknown user parameter");
1080 err = -EINVAL; 1084 err = -EINVAL;
1081 goto exit_unlock; 1085 goto exit_unmap_surface;
1082 } 1086 }
1083 cde_ctx->user_param_values[id] = param->value; 1087 cde_ctx->user_param_values[id] = param->value;
1084 } 1088 }
@@ -1087,7 +1091,7 @@ __releases(&cde_app->mutex)
1087 err = gk20a_cde_patch_params(cde_ctx); 1091 err = gk20a_cde_patch_params(cde_ctx);
1088 if (err) { 1092 if (err) {
1089 gk20a_warn(cde_ctx->dev, "cde: failed to patch parameters"); 1093 gk20a_warn(cde_ctx->dev, "cde: failed to patch parameters");
1090 goto exit_unlock; 1094 goto exit_unmap_surface;
1091 } 1095 }
1092 1096
1093 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", 1097 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
@@ -1097,11 +1101,13 @@ __releases(&cde_app->mutex)
1097 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", 1101 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1098 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); 1102 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1099 1103
1100
1101 /* take always the postfence as it is needed for protecting the 1104 /* take always the postfence as it is needed for protecting the
1102 * cde context */ 1105 * cde context */
1103 flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; 1106 flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
1104 1107
1108 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1109 gk20a_idle(g->dev);
1110
1105 /* execute the conversion buffer, combined with init first if it's the 1111 /* execute the conversion buffer, combined with init first if it's the
1106 * first time */ 1112 * first time */
1107 err = gk20a_cde_execute_buffer(cde_ctx, 1113 err = gk20a_cde_execute_buffer(cde_ctx,
@@ -1112,16 +1118,21 @@ __releases(&cde_app->mutex)
1112 1118
1113 cde_ctx->init_cmd_executed = true; 1119 cde_ctx->init_cmd_executed = true;
1114 1120
1115exit_unlock:
1116
1117 /* unmap the buffers - channel holds references to them now */ 1121 /* unmap the buffers - channel holds references to them now */
1118 if (map_vaddr)
1119 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
1120
1121 if (surface) 1122 if (surface)
1122 dma_buf_vunmap(compbits_scatter_buf, surface); 1123 dma_buf_vunmap(compbits_scatter_buf, surface);
1124 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
1123 1125
1124 return err; 1126 return err;
1127
1128exit_unmap_surface:
1129 if (surface)
1130 dma_buf_vunmap(compbits_scatter_buf, surface);
1131exit_unmap_vaddr:
1132 gk20a_vm_unmap(cde_ctx->vm, map_vaddr);
1133exit_idle:
1134 gk20a_idle(g->dev);
1135 return err;
1125} 1136}
1126 1137
1127static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) 1138static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
@@ -1520,10 +1531,6 @@ static int gk20a_buffer_convert_gpu_to_cde(
1520 if (!g->cde_app.initialised) 1531 if (!g->cde_app.initialised)
1521 return -ENOSYS; 1532 return -ENOSYS;
1522 1533
1523 err = gk20a_busy(g->dev);
1524 if (err)
1525 return err;
1526
1527 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", 1534 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1528 g->cde_app.firmware_version); 1535 g->cde_app.firmware_version);
1529 1536
@@ -1539,7 +1546,6 @@ static int gk20a_buffer_convert_gpu_to_cde(
1539 err = -EINVAL; 1546 err = -EINVAL;
1540 } 1547 }
1541 1548
1542 gk20a_idle(g->dev);
1543 return err; 1549 return err;
1544} 1550}
1545 1551