aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/vc4
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2016-09-27 12:03:13 -0400
committerEric Anholt <eric@anholt.net>2016-10-06 14:53:50 -0400
commit7edabee06a5622190d59689a64f5e17d1c343cc3 (patch)
treeae271f8f9fc2af7cdbf9b9bf84e17f99180a388d /drivers/gpu/drm/vc4
parent57b9f569447c4ab7b2a7e34a13e468311db4cd64 (diff)
drm/vc4: Fix races when the CS reads from render targets.
With the introduction of bin/render pipelining, the previous job may not be completed when we start binning the next one. If the previous job wrote our VBO, IB, or CS textures, then the binning stage might get stale or uninitialized results. Fixes the major rendering failure in glmark2 -b terrain. Signed-off-by: Eric Anholt <eric@anholt.net> Fixes: ca26d28bbaa3 ("drm/vc4: improve throughput by pipelining binning and rendering jobs") Cc: stable@vger.kernel.org
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h19
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c13
-rw-r--r--drivers/gpu/drm/vc4/vc4_render_cl.c21
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate.c17
4 files changed, 62 insertions, 8 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 61c1902168a1..7c1e4d97486f 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -122,9 +122,16 @@ to_vc4_dev(struct drm_device *dev)
122struct vc4_bo { 122struct vc4_bo {
123 struct drm_gem_cma_object base; 123 struct drm_gem_cma_object base;
124 124
125 /* seqno of the last job to render to this BO. */ 125 /* seqno of the last job to render using this BO. */
126 uint64_t seqno; 126 uint64_t seqno;
127 127
128 /* seqno of the last job to use the RCL to write to this BO.
129 *
130 * Note that this doesn't include binner overflow memory
131 * writes.
132 */
133 uint64_t write_seqno;
134
128 /* List entry for the BO's position in either 135 /* List entry for the BO's position in either
129 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list 136 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
130 */ 137 */
@@ -216,6 +223,9 @@ struct vc4_exec_info {
216 /* Sequence number for this bin/render job. */ 223 /* Sequence number for this bin/render job. */
217 uint64_t seqno; 224 uint64_t seqno;
218 225
226 /* Latest write_seqno of any BO that binning depends on. */
227 uint64_t bin_dep_seqno;
228
219 /* Last current addresses the hardware was processing when the 229 /* Last current addresses the hardware was processing when the
220 * hangcheck timer checked on us. 230 * hangcheck timer checked on us.
221 */ 231 */
@@ -230,6 +240,13 @@ struct vc4_exec_info {
230 struct drm_gem_cma_object **bo; 240 struct drm_gem_cma_object **bo;
231 uint32_t bo_count; 241 uint32_t bo_count;
232 242
243 /* List of BOs that are being written by the RCL. Other than
244 * the binner temporary storage, this is all the BOs written
245 * by the job.
246 */
247 struct drm_gem_cma_object *rcl_write_bo[4];
248 uint32_t rcl_write_bo_count;
249
233 /* Pointers for our position in vc4->job_list */ 250 /* Pointers for our position in vc4->job_list */
234 struct list_head head; 251 struct list_head head;
235 252
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 77daea6cb866..47a095f392f8 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -467,6 +467,11 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
467 list_for_each_entry(bo, &exec->unref_list, unref_head) { 467 list_for_each_entry(bo, &exec->unref_list, unref_head) {
468 bo->seqno = seqno; 468 bo->seqno = seqno;
469 } 469 }
470
471 for (i = 0; i < exec->rcl_write_bo_count; i++) {
472 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
473 bo->write_seqno = seqno;
474 }
470} 475}
471 476
472/* Queues a struct vc4_exec_info for execution. If no job is 477/* Queues a struct vc4_exec_info for execution. If no job is
@@ -669,6 +674,14 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
669 goto fail; 674 goto fail;
670 675
671 ret = vc4_validate_shader_recs(dev, exec); 676 ret = vc4_validate_shader_recs(dev, exec);
677 if (ret)
678 goto fail;
679
680 /* Block waiting on any previous rendering into the CS's VBO,
681 * IB, or textures, so that pixels are actually written by the
682 * time we try to read them.
683 */
684 ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
672 685
673fail: 686fail:
674 drm_free_large(temp); 687 drm_free_large(temp);
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 0f12418725e5..08886a309757 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -45,6 +45,8 @@ struct vc4_rcl_setup {
45 45
46 struct drm_gem_cma_object *rcl; 46 struct drm_gem_cma_object *rcl;
47 u32 next_offset; 47 u32 next_offset;
48
49 u32 next_write_bo_index;
48}; 50};
49 51
50static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val) 52static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
@@ -407,6 +409,8 @@ static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
407 if (!*obj) 409 if (!*obj)
408 return -EINVAL; 410 return -EINVAL;
409 411
412 exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
413
410 if (surf->offset & 0xf) { 414 if (surf->offset & 0xf) {
411 DRM_ERROR("MSAA write must be 16b aligned.\n"); 415 DRM_ERROR("MSAA write must be 16b aligned.\n");
412 return -EINVAL; 416 return -EINVAL;
@@ -417,7 +421,8 @@ static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
417 421
418static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, 422static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
419 struct drm_gem_cma_object **obj, 423 struct drm_gem_cma_object **obj,
420 struct drm_vc4_submit_rcl_surface *surf) 424 struct drm_vc4_submit_rcl_surface *surf,
425 bool is_write)
421{ 426{
422 uint8_t tiling = VC4_GET_FIELD(surf->bits, 427 uint8_t tiling = VC4_GET_FIELD(surf->bits,
423 VC4_LOADSTORE_TILE_BUFFER_TILING); 428 VC4_LOADSTORE_TILE_BUFFER_TILING);
@@ -440,6 +445,9 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
440 if (!*obj) 445 if (!*obj)
441 return -EINVAL; 446 return -EINVAL;
442 447
448 if (is_write)
449 exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
450
443 if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { 451 if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
444 if (surf == &exec->args->zs_write) { 452 if (surf == &exec->args->zs_write) {
445 DRM_ERROR("general zs write may not be a full-res.\n"); 453 DRM_ERROR("general zs write may not be a full-res.\n");
@@ -542,6 +550,8 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
542 if (!*obj) 550 if (!*obj)
543 return -EINVAL; 551 return -EINVAL;
544 552
553 exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
554
545 if (tiling > VC4_TILING_FORMAT_LT) { 555 if (tiling > VC4_TILING_FORMAT_LT) {
546 DRM_ERROR("Bad tiling format\n"); 556 DRM_ERROR("Bad tiling format\n");
547 return -EINVAL; 557 return -EINVAL;
@@ -599,15 +609,18 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
599 if (ret) 609 if (ret)
600 return ret; 610 return ret;
601 611
602 ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); 612 ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read,
613 false);
603 if (ret) 614 if (ret)
604 return ret; 615 return ret;
605 616
606 ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read); 617 ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read,
618 false);
607 if (ret) 619 if (ret)
608 return ret; 620 return ret;
609 621
610 ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write); 622 ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write,
623 true);
611 if (ret) 624 if (ret)
612 return ret; 625 return ret;
613 626
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index 9ce1d0adf882..26503e307438 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -267,6 +267,9 @@ validate_indexed_prim_list(VALIDATE_ARGS)
267 if (!ib) 267 if (!ib)
268 return -EINVAL; 268 return -EINVAL;
269 269
270 exec->bin_dep_seqno = max(exec->bin_dep_seqno,
271 to_vc4_bo(&ib->base)->write_seqno);
272
270 if (offset > ib->base.size || 273 if (offset > ib->base.size ||
271 (ib->base.size - offset) / index_size < length) { 274 (ib->base.size - offset) / index_size < length) {
272 DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n", 275 DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
@@ -555,8 +558,7 @@ static bool
555reloc_tex(struct vc4_exec_info *exec, 558reloc_tex(struct vc4_exec_info *exec,
556 void *uniform_data_u, 559 void *uniform_data_u,
557 struct vc4_texture_sample_info *sample, 560 struct vc4_texture_sample_info *sample,
558 uint32_t texture_handle_index) 561 uint32_t texture_handle_index, bool is_cs)
559
560{ 562{
561 struct drm_gem_cma_object *tex; 563 struct drm_gem_cma_object *tex;
562 uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]); 564 uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
@@ -714,6 +716,11 @@ reloc_tex(struct vc4_exec_info *exec,
714 716
715 *validated_p0 = tex->paddr + p0; 717 *validated_p0 = tex->paddr + p0;
716 718
719 if (is_cs) {
720 exec->bin_dep_seqno = max(exec->bin_dep_seqno,
721 to_vc4_bo(&tex->base)->write_seqno);
722 }
723
717 return true; 724 return true;
718 fail: 725 fail:
719 DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0); 726 DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
@@ -835,7 +842,8 @@ validate_gl_shader_rec(struct drm_device *dev,
835 if (!reloc_tex(exec, 842 if (!reloc_tex(exec,
836 uniform_data_u, 843 uniform_data_u,
837 &validated_shader->texture_samples[tex], 844 &validated_shader->texture_samples[tex],
838 texture_handles_u[tex])) { 845 texture_handles_u[tex],
846 i == 2)) {
839 return -EINVAL; 847 return -EINVAL;
840 } 848 }
841 } 849 }
@@ -867,6 +875,9 @@ validate_gl_shader_rec(struct drm_device *dev,
867 uint32_t stride = *(uint8_t *)(pkt_u + o + 5); 875 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
868 uint32_t max_index; 876 uint32_t max_index;
869 877
878 exec->bin_dep_seqno = max(exec->bin_dep_seqno,
879 to_vc4_bo(&vbo->base)->write_seqno);
880
870 if (state->addr & 0x8) 881 if (state->addr & 0x8)
871 stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; 882 stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
872 883