Merge branch 'drm-next-5.1' of git://people.freedesktop.org/~agd5f/linux into drm-next

Updates for 5.1: - GDS fixes - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES interface - GPUVM fixes - PCIE DPM switching fixes for vega20 - Vega10 uclk DPM regression fix - DC Freesync fixes - DC ABM fixes - Various DC cleanups Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190208210214.27666-1-alexander.deucher@amd.com
author: Dave Airlie <airlied@redhat.com> 2019-02-10 23:04:05 -0500
committer: Dave Airlie <airlied@redhat.com> 2019-02-10 23:04:20 -0500
commit: f4bc54b532a62d8bee421ca06adb6d1b3e7ffaa9 (patch)
tree: 3b835f9bed6bd236fa1a6d5d0add836f25ca8262 /drivers/gpu/drm/amd/amdgpu
parent: 5ea3998d56346975c2701df18fb5b6e3ab5c8d9e (diff)
parent: 0461221316ec21e0a535a35fba3feb6ba75706e6 (diff)
12 files changed, 170 insertions, 51 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b8266d69..52a5e4fdc95b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
                case AMDGPU_CHUNK_ID_DEPENDENCIES:
                case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
                case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
                        break;
                default:
@@ -1090,6 +1091,15 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
                fence = amdgpu_ctx_get_fence(ctx, entity,
                                             deps[i].handle);
+                if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+                        struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+                        struct dma_fence *old = fence;
+                        fence = dma_fence_get(&s_fence->scheduled);
+                        dma_fence_put(old);
+                }
                if (IS_ERR(fence)) {
                        r = PTR_ERR(fence);
                        amdgpu_ctx_put(ctx);
@@ -1177,7 +1187,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
                chunk = &p->chunks[i];
-                if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+                if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
+                    chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
                        r = amdgpu_cs_process_fence_dep(p, chunk);
                        if (r)
                                return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 384272603b21..4f8fb4ecde34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3618,6 +3618,38 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
        return r;
 }
+static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
+                                                  enum pci_bus_speed *speed,
+                                                  enum pcie_link_width *width)
+{
+        struct pci_dev *pdev = adev->pdev;
+        enum pci_bus_speed cur_speed;
+        enum pcie_link_width cur_width;
+        *speed = PCI_SPEED_UNKNOWN;
+        *width = PCIE_LNK_WIDTH_UNKNOWN;
+        while (pdev) {
+                cur_speed = pcie_get_speed_cap(pdev);
+                cur_width = pcie_get_width_cap(pdev);
+                if (cur_speed != PCI_SPEED_UNKNOWN) {
+                        if (*speed == PCI_SPEED_UNKNOWN)
+                                *speed = cur_speed;
+                        else if (cur_speed < *speed)
+                                *speed = cur_speed;
+                }
+                if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
+                        if (*width == PCIE_LNK_WIDTH_UNKNOWN)
+                                *width = cur_width;
+                        else if (cur_width < *width)
+                                *width = cur_width;
+                }
+                pdev = pci_upstream_bridge(pdev);
+        }
+}
 /**
 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
 *
@@ -3630,8 +3662,8 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 {
        struct pci_dev *pdev;
-        enum pci_bus_speed speed_cap;
+        enum pci_bus_speed speed_cap, platform_speed_cap;
-        enum pcie_link_width link_width;
+        enum pcie_link_width platform_link_width;
        if (amdgpu_pcie_gen_cap)
                adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
                return;
        }
+        if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
+                return;
+        amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
+                                              &platform_link_width);
        if (adev->pm.pcie_gen_mask == 0) {
                /* asic caps */
                pdev = adev->pdev;
@@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
                                adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
                }
                /* platform caps */
-                pdev = adev->ddev->pdev->bus->self;
+                if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
-                speed_cap = pcie_get_speed_cap(pdev);
-                if (speed_cap == PCI_SPEED_UNKNOWN) {
                        adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
                                                   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
                } else {
-                        if (speed_cap == PCIE_SPEED_16_0GT)
+                        if (platform_speed_cap == PCIE_SPEED_16_0GT)
                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
-                        else if (speed_cap == PCIE_SPEED_8_0GT)
+                        else if (platform_speed_cap == PCIE_SPEED_8_0GT)
                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
-                        else if (speed_cap == PCIE_SPEED_5_0GT)
+                        else if (platform_speed_cap == PCIE_SPEED_5_0GT)
                                adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
                                                           CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
                        else
@@ -3697,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
                }
        }
        if (adev->pm.pcie_mlw_mask == 0) {
-                pdev = adev->ddev->pdev->bus->self;
+                if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
-                link_width = pcie_get_width_cap(pdev);
-                if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
                        adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
                } else {
-                        switch (link_width) {
+                        switch (platform_link_width) {
                        case PCIE_LNK_X32:
                                adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
                                                          CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a1bb3773087b..7f3aa7b7e1d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -71,9 +71,11 @@
 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
 * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
+ * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
 */
 #define KMS_DRIVER_MAJOR        3
-#define KMS_DRIVER_MINOR        27
+#define KMS_DRIVER_MINOR        29
 #define KMS_DRIVER_PATCHLEVEL   0
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index ecbcefe49a98..f89f5734d985 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -37,6 +37,8 @@ struct amdgpu_gds {
        struct amdgpu_gds_asic_info     mem;
        struct amdgpu_gds_asic_info     gws;
        struct amdgpu_gds_asic_info     oa;
+        uint32_t                        gds_compute_max_wave_id;
        /* At present, GDS, GWS and OA resources for gfx (graphics)
         * is always pre-allocated and available for graphics operation.
         * Such resource is shared between all gfx clients.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index f4f00217546e..d21dd2f369da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -54,10 +54,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
        memset(&bp, 0, sizeof(bp));
        *obj = NULL;
-        /* At least align on page size */
-        if (alignment < PAGE_SIZE) {
-                alignment = PAGE_SIZE;
-        }
        bp.size = size;
        bp.byte_align = alignment;
@@ -244,9 +240,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
                        return -EINVAL;
                }
                flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-                /* GDS allocations must be DW aligned */
-                if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
-                        size = ALIGN(size, 4);
        }
        if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 728e15e5d68a..fd9c4beeaaa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -426,12 +426,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
        size_t acc_size;
        int r;
-        page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
+        /* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
-        if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
+        if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
-                          AMDGPU_GEM_DOMAIN_OA))
+                /* GWS and OA don't need any alignment. */
+                page_align = bp->byte_align;
                size <<= PAGE_SHIFT;
-        else
+        } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
+                /* Both size and alignment must be a multiple of 4. */
+                page_align = ALIGN(bp->byte_align, 4);
+                size = ALIGN(size, 4) << PAGE_SHIFT;
+        } else {
+                /* Memory should be aligned at least to a page size. */
+                page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
                size = ALIGN(size, PAGE_SIZE);
+        }
        if (!amdgpu_bo_validate_size(adev, size, bp->domain))
                return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b852abb9db0f..73e71e61dc99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1756,7 +1756,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        }
        r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
-                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+                                    4, AMDGPU_GEM_DOMAIN_GDS,
                                    &adev->gds.gds_gfx_bo, NULL, NULL);
        if (r)
                return r;
@@ -1769,7 +1769,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        }
        r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
-                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+                                    1, AMDGPU_GEM_DOMAIN_GWS,
                                    &adev->gds.gws_gfx_bo, NULL, NULL);
        if (r)
                return r;
@@ -1782,7 +1782,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        }
        r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
-                                    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+                                    1, AMDGPU_GEM_DOMAIN_OA,
                                    &adev->gds.oa_gfx_bo, NULL, NULL);
        if (r)
                return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0bc6f553dc08..75481cf3348f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -107,14 +107,6 @@ struct amdgpu_pte_update_params {
         * DMA addresses to use for mapping, used during VM update by CPU
         */
        dma_addr_t *pages_addr;
-        /**
-         * @kptr:
-         *
-         * Kernel pointer of PD/PT BO that needs to be updated,
-         * used during VM update by CPU
-         */
-        void *kptr;
 };
 /**
@@ -1789,13 +1781,20 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                if (pages_addr)
                        params.src = ~0;
-                /* Wait for PT BOs to be free. PTs share the same resv. object
+                /* Wait for PT BOs to be idle. PTs share the same resv. object
                 * as the root PD BO
                 */
                r = amdgpu_vm_wait_pd(adev, vm, owner);
                if (unlikely(r))
                        return r;
+                /* Wait for any BO move to be completed */
+                if (exclusive) {
+                        r = dma_fence_wait(exclusive, true);
+                        if (unlikely(r))
+                                return r;
+                }
                params.func = amdgpu_vm_cpu_set_ptes;
                params.pages_addr = pages_addr;
                return amdgpu_vm_update_ptes(&params, start, last + 1,
@@ -1809,13 +1808,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
        /*
         * reserve space for two commands every (1 << BLOCK_SIZE)
         *  entries or 2k dwords (whatever is smaller)
-         *
-         * The second command is for the shadow pagetables.
         */
+        ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
+        /* The second command is for the shadow pagetables. */
        if (vm->root.base.bo->shadow)
-                ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
+                ncmds *= 2;
-        else
-                ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
        /* padding, etc. */
        ndw = 64;
@@ -1834,10 +1832,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                ndw += ncmds * 10;
                /* extra commands for begin/end fragments */
+                ncmds = 2 * adev->vm_manager.fragment_size;
                if (vm->root.base.bo->shadow)
-                        ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
+                        ncmds *= 2;
-                else
-                        ndw += 2 * 10 * adev->vm_manager.fragment_size;
+                ndw += 10 * ncmds;
                params.func = amdgpu_vm_do_set_ptes;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 0d90672d0e58..407dd16cc35c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -125,7 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
        if (!hive) {
                ret = -EINVAL;
                dev_err(adev->dev,
-                        "XGMI: node 0x%llx, can not matech hive 0x%llx in the hive list.\n",
+                        "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",
                        adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
                goto exit;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7984292f9282..a59e0fdf5a97 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2264,6 +2264,22 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+        /* Currently, there is a high possibility to get wave ID mismatch
+         * between ME and GDS, leading to a hw deadlock, because ME generates
+         * different wave IDs than the GDS expects. This situation happens
+         * randomly when at least 5 compute pipes use GDS ordered append.
+         * The wave IDs generated by ME are also wrong after suspend/resume.
+         * Those are probably bugs somewhere else in the kernel driver.
+         *
+         * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+         * GDS to 0 for this ring (me/pipe).
+         */
+        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+                amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+                amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+                amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+        }
        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
@@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
                7 + /* gfx_v7_0_ring_emit_pipeline_sync */
                CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
                7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
-        .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
+        .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
        .emit_ib = gfx_v7_0_ring_emit_ib_compute,
        .emit_fence = gfx_v7_0_ring_emit_fence_compute,
        .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
@@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
        adev->gds.gws.total_size = 64;
        adev->gds.oa.total_size = 16;
+        adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
        if (adev->gds.mem.total_size == 64 * 1024) {
                adev->gds.mem.gfx_partition_size = 4096;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a26747681ed6..b8e50a34bdb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6084,6 +6084,22 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+        /* Currently, there is a high possibility to get wave ID mismatch
+         * between ME and GDS, leading to a hw deadlock, because ME generates
+         * different wave IDs than the GDS expects. This situation happens
+         * randomly when at least 5 compute pipes use GDS ordered append.
+         * The wave IDs generated by ME are also wrong after suspend/resume.
+         * Those are probably bugs somewhere else in the kernel driver.
+         *
+         * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+         * GDS to 0 for this ring (me/pipe).
+         */
+        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+                amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+                amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+                amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+        }
        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
@@ -6890,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
-        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
+        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
@@ -6920,7 +6936,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                17 + /* gfx_v8_0_ring_emit_vm_flush */
                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
-        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
+        .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
        .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
        .test_ring = gfx_v8_0_ring_test_ring,
        .insert_nop = amdgpu_ring_insert_nop,
@@ -6996,6 +7012,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
        adev->gds.gws.total_size = 64;
        adev->gds.oa.total_size = 16;
+        adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
        if (adev->gds.mem.total_size == 64 * 1024) {
                adev->gds.mem.gfx_partition_size = 4096;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 262ee3cf6f1c..5533f6e4f4a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4010,6 +4010,22 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+        /* Currently, there is a high possibility to get wave ID mismatch
+         * between ME and GDS, leading to a hw deadlock, because ME generates
+         * different wave IDs than the GDS expects. This situation happens
+         * randomly when at least 5 compute pipes use GDS ordered append.
+         * The wave IDs generated by ME are also wrong after suspend/resume.
+         * Those are probably bugs somewhere else in the kernel driver.
+         *
+         * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+         * GDS to 0 for this ring (me/pipe).
+         */
+        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+                amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+                amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+                amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+        }
        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
        amdgpu_ring_write(ring,
@@ -4729,7 +4745,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                2 + /* gfx_v9_0_ring_emit_vm_flush */
                8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
-        .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
+        .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
        .emit_ib = gfx_v9_0_ring_emit_ib_compute,
        .emit_fence = gfx_v9_0_ring_emit_fence,
        .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
@@ -4764,7 +4780,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
                2 + /* gfx_v9_0_ring_emit_vm_flush */
                8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
-        .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
+        .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
        .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
        .test_ring = gfx_v9_0_ring_test_ring,
        .insert_nop = amdgpu_ring_insert_nop,
@@ -4846,6 +4862,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
                break;
        }
+        switch (adev->asic_type) {
+        case CHIP_VEGA10:
+        case CHIP_VEGA20:
+                adev->gds.gds_compute_max_wave_id = 0x7ff;
+                break;
+        case CHIP_VEGA12:
+                adev->gds.gds_compute_max_wave_id = 0x27f;
+                break;
+        case CHIP_RAVEN:
+                if (adev->rev_id >= 0x8)
+                        adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
+                else
+                        adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
+                break;
+        default:
+                /* this really depends on the chip */
+                adev->gds.gds_compute_max_wave_id = 0x7ff;
+                break;
+        }
        adev->gds.gws.total_size = 64;
        adev->gds.oa.total_size = 16;
author	Dave Airlie <airlied@redhat.com>	2019-02-10 23:04:05 -0500
committer	Dave Airlie <airlied@redhat.com>	2019-02-10 23:04:20 -0500
commit	f4bc54b532a62d8bee421ca06adb6d1b3e7ffaa9 (patch)
tree	3b835f9bed6bd236fa1a6d5d0add836f25ca8262 /drivers/gpu/drm/amd/amdgpu
parent	5ea3998d56346975c2701df18fb5b6e3ab5c8d9e (diff)
parent	0461221316ec21e0a535a35fba3feb6ba75706e6 (diff)