aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-02-10 23:04:05 -0500
committerDave Airlie <airlied@redhat.com>2019-02-10 23:04:20 -0500
commitf4bc54b532a62d8bee421ca06adb6d1b3e7ffaa9 (patch)
tree3b835f9bed6bd236fa1a6d5d0add836f25ca8262 /drivers/gpu/drm/amd/amdgpu
parent5ea3998d56346975c2701df18fb5b6e3ab5c8d9e (diff)
parent0461221316ec21e0a535a35fba3feb6ba75706e6 (diff)
Merge branch 'drm-next-5.1' of git://people.freedesktop.org/~agd5f/linux into drm-next
Updates for 5.1: - GDS fixes - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES interface - GPUVM fixes - PCIE DPM switching fixes for vega20 - Vega10 uclk DPM regression fix - DC Freesync fixes - DC ABM fixes - Various DC cleanups Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190208210214.27666-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c40
12 files changed, 170 insertions, 51 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b8266d69..52a5e4fdc95b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
214 case AMDGPU_CHUNK_ID_DEPENDENCIES: 214 case AMDGPU_CHUNK_ID_DEPENDENCIES:
215 case AMDGPU_CHUNK_ID_SYNCOBJ_IN: 215 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
216 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: 216 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
217 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
217 break; 218 break;
218 219
219 default: 220 default:
@@ -1090,6 +1091,15 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
1090 1091
1091 fence = amdgpu_ctx_get_fence(ctx, entity, 1092 fence = amdgpu_ctx_get_fence(ctx, entity,
1092 deps[i].handle); 1093 deps[i].handle);
1094
1095 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
1096 struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
1097 struct dma_fence *old = fence;
1098
1099 fence = dma_fence_get(&s_fence->scheduled);
1100 dma_fence_put(old);
1101 }
1102
1093 if (IS_ERR(fence)) { 1103 if (IS_ERR(fence)) {
1094 r = PTR_ERR(fence); 1104 r = PTR_ERR(fence);
1095 amdgpu_ctx_put(ctx); 1105 amdgpu_ctx_put(ctx);
@@ -1177,7 +1187,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1177 1187
1178 chunk = &p->chunks[i]; 1188 chunk = &p->chunks[i];
1179 1189
1180 if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { 1190 if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
1191 chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
1181 r = amdgpu_cs_process_fence_dep(p, chunk); 1192 r = amdgpu_cs_process_fence_dep(p, chunk);
1182 if (r) 1193 if (r)
1183 return r; 1194 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 384272603b21..4f8fb4ecde34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3618,6 +3618,38 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
3618 return r; 3618 return r;
3619} 3619}
3620 3620
3621static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
3622 enum pci_bus_speed *speed,
3623 enum pcie_link_width *width)
3624{
3625 struct pci_dev *pdev = adev->pdev;
3626 enum pci_bus_speed cur_speed;
3627 enum pcie_link_width cur_width;
3628
3629 *speed = PCI_SPEED_UNKNOWN;
3630 *width = PCIE_LNK_WIDTH_UNKNOWN;
3631
3632 while (pdev) {
3633 cur_speed = pcie_get_speed_cap(pdev);
3634 cur_width = pcie_get_width_cap(pdev);
3635
3636 if (cur_speed != PCI_SPEED_UNKNOWN) {
3637 if (*speed == PCI_SPEED_UNKNOWN)
3638 *speed = cur_speed;
3639 else if (cur_speed < *speed)
3640 *speed = cur_speed;
3641 }
3642
3643 if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
3644 if (*width == PCIE_LNK_WIDTH_UNKNOWN)
3645 *width = cur_width;
3646 else if (cur_width < *width)
3647 *width = cur_width;
3648 }
3649 pdev = pci_upstream_bridge(pdev);
3650 }
3651}
3652
3621/** 3653/**
3622 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 3654 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3623 * 3655 *
@@ -3630,8 +3662,8 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
3630static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 3662static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3631{ 3663{
3632 struct pci_dev *pdev; 3664 struct pci_dev *pdev;
3633 enum pci_bus_speed speed_cap; 3665 enum pci_bus_speed speed_cap, platform_speed_cap;
3634 enum pcie_link_width link_width; 3666 enum pcie_link_width platform_link_width;
3635 3667
3636 if (amdgpu_pcie_gen_cap) 3668 if (amdgpu_pcie_gen_cap)
3637 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 3669 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3648 return; 3680 return;
3649 } 3681 }
3650 3682
3683 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3684 return;
3685
3686 amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
3687 &platform_link_width);
3688
3651 if (adev->pm.pcie_gen_mask == 0) { 3689 if (adev->pm.pcie_gen_mask == 0) {
3652 /* asic caps */ 3690 /* asic caps */
3653 pdev = adev->pdev; 3691 pdev = adev->pdev;
@@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3673 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 3711 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3674 } 3712 }
3675 /* platform caps */ 3713 /* platform caps */
3676 pdev = adev->ddev->pdev->bus->self; 3714 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
3677 speed_cap = pcie_get_speed_cap(pdev);
3678 if (speed_cap == PCI_SPEED_UNKNOWN) {
3679 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3715 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3680 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3716 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3681 } else { 3717 } else {
3682 if (speed_cap == PCIE_SPEED_16_0GT) 3718 if (platform_speed_cap == PCIE_SPEED_16_0GT)
3683 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3719 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3684 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3720 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3685 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3721 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3686 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 3722 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3687 else if (speed_cap == PCIE_SPEED_8_0GT) 3723 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
3688 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3724 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3689 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3725 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3690 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 3726 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3691 else if (speed_cap == PCIE_SPEED_5_0GT) 3727 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
3692 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3728 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3693 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3729 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3694 else 3730 else
@@ -3697,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3697 } 3733 }
3698 } 3734 }
3699 if (adev->pm.pcie_mlw_mask == 0) { 3735 if (adev->pm.pcie_mlw_mask == 0) {
3700 pdev = adev->ddev->pdev->bus->self; 3736 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3701 link_width = pcie_get_width_cap(pdev);
3702 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3703 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 3737 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3704 } else { 3738 } else {
3705 switch (link_width) { 3739 switch (platform_link_width) {
3706 case PCIE_LNK_X32: 3740 case PCIE_LNK_X32:
3707 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 3741 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3708 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3742 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a1bb3773087b..7f3aa7b7e1d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -71,9 +71,11 @@
71 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). 71 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
72 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. 72 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
73 * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. 73 * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
74 * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
75 * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
74 */ 76 */
75#define KMS_DRIVER_MAJOR 3 77#define KMS_DRIVER_MAJOR 3
76#define KMS_DRIVER_MINOR 27 78#define KMS_DRIVER_MINOR 29
77#define KMS_DRIVER_PATCHLEVEL 0 79#define KMS_DRIVER_PATCHLEVEL 0
78 80
79int amdgpu_vram_limit = 0; 81int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index ecbcefe49a98..f89f5734d985 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -37,6 +37,8 @@ struct amdgpu_gds {
37 struct amdgpu_gds_asic_info mem; 37 struct amdgpu_gds_asic_info mem;
38 struct amdgpu_gds_asic_info gws; 38 struct amdgpu_gds_asic_info gws;
39 struct amdgpu_gds_asic_info oa; 39 struct amdgpu_gds_asic_info oa;
40 uint32_t gds_compute_max_wave_id;
41
40 /* At present, GDS, GWS and OA resources for gfx (graphics) 42 /* At present, GDS, GWS and OA resources for gfx (graphics)
41 * is always pre-allocated and available for graphics operation. 43 * is always pre-allocated and available for graphics operation.
42 * Such resource is shared between all gfx clients. 44 * Such resource is shared between all gfx clients.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index f4f00217546e..d21dd2f369da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -54,10 +54,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
54 54
55 memset(&bp, 0, sizeof(bp)); 55 memset(&bp, 0, sizeof(bp));
56 *obj = NULL; 56 *obj = NULL;
57 /* At least align on page size */
58 if (alignment < PAGE_SIZE) {
59 alignment = PAGE_SIZE;
60 }
61 57
62 bp.size = size; 58 bp.size = size;
63 bp.byte_align = alignment; 59 bp.byte_align = alignment;
@@ -244,9 +240,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
244 return -EINVAL; 240 return -EINVAL;
245 } 241 }
246 flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 242 flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
247 /* GDS allocations must be DW aligned */
248 if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
249 size = ALIGN(size, 4);
250 } 243 }
251 244
252 if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { 245 if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 728e15e5d68a..fd9c4beeaaa4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -426,12 +426,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
426 size_t acc_size; 426 size_t acc_size;
427 int r; 427 int r;
428 428
429 page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; 429 /* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
430 if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | 430 if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
431 AMDGPU_GEM_DOMAIN_OA)) 431 /* GWS and OA don't need any alignment. */
432 page_align = bp->byte_align;
432 size <<= PAGE_SHIFT; 433 size <<= PAGE_SHIFT;
433 else 434 } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
435 /* Both size and alignment must be a multiple of 4. */
436 page_align = ALIGN(bp->byte_align, 4);
437 size = ALIGN(size, 4) << PAGE_SHIFT;
438 } else {
439 /* Memory should be aligned at least to a page size. */
440 page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
434 size = ALIGN(size, PAGE_SIZE); 441 size = ALIGN(size, PAGE_SIZE);
442 }
435 443
436 if (!amdgpu_bo_validate_size(adev, size, bp->domain)) 444 if (!amdgpu_bo_validate_size(adev, size, bp->domain))
437 return -ENOMEM; 445 return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b852abb9db0f..73e71e61dc99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1756,7 +1756,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1756 } 1756 }
1757 1757
1758 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 1758 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1759 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 1759 4, AMDGPU_GEM_DOMAIN_GDS,
1760 &adev->gds.gds_gfx_bo, NULL, NULL); 1760 &adev->gds.gds_gfx_bo, NULL, NULL);
1761 if (r) 1761 if (r)
1762 return r; 1762 return r;
@@ -1769,7 +1769,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1769 } 1769 }
1770 1770
1771 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 1771 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1772 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 1772 1, AMDGPU_GEM_DOMAIN_GWS,
1773 &adev->gds.gws_gfx_bo, NULL, NULL); 1773 &adev->gds.gws_gfx_bo, NULL, NULL);
1774 if (r) 1774 if (r)
1775 return r; 1775 return r;
@@ -1782,7 +1782,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1782 } 1782 }
1783 1783
1784 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 1784 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1785 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 1785 1, AMDGPU_GEM_DOMAIN_OA,
1786 &adev->gds.oa_gfx_bo, NULL, NULL); 1786 &adev->gds.oa_gfx_bo, NULL, NULL);
1787 if (r) 1787 if (r)
1788 return r; 1788 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0bc6f553dc08..75481cf3348f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -107,14 +107,6 @@ struct amdgpu_pte_update_params {
107 * DMA addresses to use for mapping, used during VM update by CPU 107 * DMA addresses to use for mapping, used during VM update by CPU
108 */ 108 */
109 dma_addr_t *pages_addr; 109 dma_addr_t *pages_addr;
110
111 /**
112 * @kptr:
113 *
114 * Kernel pointer of PD/PT BO that needs to be updated,
115 * used during VM update by CPU
116 */
117 void *kptr;
118}; 110};
119 111
120/** 112/**
@@ -1789,13 +1781,20 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1789 if (pages_addr) 1781 if (pages_addr)
1790 params.src = ~0; 1782 params.src = ~0;
1791 1783
1792 /* Wait for PT BOs to be free. PTs share the same resv. object 1784 /* Wait for PT BOs to be idle. PTs share the same resv. object
1793 * as the root PD BO 1785 * as the root PD BO
1794 */ 1786 */
1795 r = amdgpu_vm_wait_pd(adev, vm, owner); 1787 r = amdgpu_vm_wait_pd(adev, vm, owner);
1796 if (unlikely(r)) 1788 if (unlikely(r))
1797 return r; 1789 return r;
1798 1790
1791 /* Wait for any BO move to be completed */
1792 if (exclusive) {
1793 r = dma_fence_wait(exclusive, true);
1794 if (unlikely(r))
1795 return r;
1796 }
1797
1799 params.func = amdgpu_vm_cpu_set_ptes; 1798 params.func = amdgpu_vm_cpu_set_ptes;
1800 params.pages_addr = pages_addr; 1799 params.pages_addr = pages_addr;
1801 return amdgpu_vm_update_ptes(&params, start, last + 1, 1800 return amdgpu_vm_update_ptes(&params, start, last + 1,
@@ -1809,13 +1808,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1809 /* 1808 /*
1810 * reserve space for two commands every (1 << BLOCK_SIZE) 1809 * reserve space for two commands every (1 << BLOCK_SIZE)
1811 * entries or 2k dwords (whatever is smaller) 1810 * entries or 2k dwords (whatever is smaller)
1812 *
1813 * The second command is for the shadow pagetables.
1814 */ 1811 */
1812 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1813
1814 /* The second command is for the shadow pagetables. */
1815 if (vm->root.base.bo->shadow) 1815 if (vm->root.base.bo->shadow)
1816 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; 1816 ncmds *= 2;
1817 else
1818 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1819 1817
1820 /* padding, etc. */ 1818 /* padding, etc. */
1821 ndw = 64; 1819 ndw = 64;
@@ -1834,10 +1832,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1834 ndw += ncmds * 10; 1832 ndw += ncmds * 10;
1835 1833
1836 /* extra commands for begin/end fragments */ 1834 /* extra commands for begin/end fragments */
1835 ncmds = 2 * adev->vm_manager.fragment_size;
1837 if (vm->root.base.bo->shadow) 1836 if (vm->root.base.bo->shadow)
1838 ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; 1837 ncmds *= 2;
1839 else 1838
1840 ndw += 2 * 10 * adev->vm_manager.fragment_size; 1839 ndw += 10 * ncmds;
1841 1840
1842 params.func = amdgpu_vm_do_set_ptes; 1841 params.func = amdgpu_vm_do_set_ptes;
1843 } 1842 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 0d90672d0e58..407dd16cc35c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -125,7 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
125 if (!hive) { 125 if (!hive) {
126 ret = -EINVAL; 126 ret = -EINVAL;
127 dev_err(adev->dev, 127 dev_err(adev->dev,
128 "XGMI: node 0x%llx, can not matech hive 0x%llx in the hive list.\n", 128 "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",
129 adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id); 129 adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
130 goto exit; 130 goto exit;
131 } 131 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7984292f9282..a59e0fdf5a97 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2264,6 +2264,22 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2264 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 2264 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2265 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 2265 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2266 2266
2267 /* Currently, there is a high possibility to get wave ID mismatch
2268 * between ME and GDS, leading to a hw deadlock, because ME generates
2269 * different wave IDs than the GDS expects. This situation happens
2270 * randomly when at least 5 compute pipes use GDS ordered append.
2271 * The wave IDs generated by ME are also wrong after suspend/resume.
2272 * Those are probably bugs somewhere else in the kernel driver.
2273 *
2274 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
2275 * GDS to 0 for this ring (me/pipe).
2276 */
2277 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
2278 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2279 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
2280 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
2281 }
2282
2267 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 2283 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
2268 amdgpu_ring_write(ring, 2284 amdgpu_ring_write(ring,
2269#ifdef __BIG_ENDIAN 2285#ifdef __BIG_ENDIAN
@@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5000 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ 5016 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
5001 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ 5017 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
5002 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ 5018 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
5003 .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ 5019 .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
5004 .emit_ib = gfx_v7_0_ring_emit_ib_compute, 5020 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5005 .emit_fence = gfx_v7_0_ring_emit_fence_compute, 5021 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5006 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, 5022 .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
@@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5057 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 5073 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5058 adev->gds.gws.total_size = 64; 5074 adev->gds.gws.total_size = 64;
5059 adev->gds.oa.total_size = 16; 5075 adev->gds.oa.total_size = 16;
5076 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
5060 5077
5061 if (adev->gds.mem.total_size == 64 * 1024) { 5078 if (adev->gds.mem.total_size == 64 * 1024) {
5062 adev->gds.mem.gfx_partition_size = 4096; 5079 adev->gds.mem.gfx_partition_size = 4096;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a26747681ed6..b8e50a34bdb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6084,6 +6084,22 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6084 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 6084 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6085 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6085 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6086 6086
6087 /* Currently, there is a high possibility to get wave ID mismatch
6088 * between ME and GDS, leading to a hw deadlock, because ME generates
6089 * different wave IDs than the GDS expects. This situation happens
6090 * randomly when at least 5 compute pipes use GDS ordered append.
6091 * The wave IDs generated by ME are also wrong after suspend/resume.
6092 * Those are probably bugs somewhere else in the kernel driver.
6093 *
6094 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6095 * GDS to 0 for this ring (me/pipe).
6096 */
6097 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6098 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6099 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6100 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6101 }
6102
6087 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6103 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6088 amdgpu_ring_write(ring, 6104 amdgpu_ring_write(ring,
6089#ifdef __BIG_ENDIAN 6105#ifdef __BIG_ENDIAN
@@ -6890,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6890 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6906 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6891 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 6907 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6892 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6908 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6893 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6909 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6894 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6910 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6895 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6911 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6896 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6912 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
@@ -6920,7 +6936,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6920 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6936 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6921 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6937 17 + /* gfx_v8_0_ring_emit_vm_flush */
6922 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6938 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6923 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6939 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6924 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6940 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6925 .test_ring = gfx_v8_0_ring_test_ring, 6941 .test_ring = gfx_v8_0_ring_test_ring,
6926 .insert_nop = amdgpu_ring_insert_nop, 6942 .insert_nop = amdgpu_ring_insert_nop,
@@ -6996,6 +7012,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6996 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7012 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6997 adev->gds.gws.total_size = 64; 7013 adev->gds.gws.total_size = 64;
6998 adev->gds.oa.total_size = 16; 7014 adev->gds.oa.total_size = 16;
7015 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
6999 7016
7000 if (adev->gds.mem.total_size == 64 * 1024) { 7017 if (adev->gds.mem.total_size == 64 * 1024) {
7001 adev->gds.mem.gfx_partition_size = 4096; 7018 adev->gds.mem.gfx_partition_size = 4096;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 262ee3cf6f1c..5533f6e4f4a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4010,6 +4010,22 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4010 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4010 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4011 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4011 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4012 4012
4013 /* Currently, there is a high possibility to get wave ID mismatch
4014 * between ME and GDS, leading to a hw deadlock, because ME generates
4015 * different wave IDs than the GDS expects. This situation happens
4016 * randomly when at least 5 compute pipes use GDS ordered append.
4017 * The wave IDs generated by ME are also wrong after suspend/resume.
4018 * Those are probably bugs somewhere else in the kernel driver.
4019 *
4020 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4021 * GDS to 0 for this ring (me/pipe).
4022 */
4023 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4024 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4025 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4026 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4027 }
4028
4013 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4029 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4014 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4030 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4015 amdgpu_ring_write(ring, 4031 amdgpu_ring_write(ring,
@@ -4729,7 +4745,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4729 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 4745 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4730 2 + /* gfx_v9_0_ring_emit_vm_flush */ 4746 2 + /* gfx_v9_0_ring_emit_vm_flush */
4731 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 4747 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
4732 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 4748 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
4733 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 4749 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
4734 .emit_fence = gfx_v9_0_ring_emit_fence, 4750 .emit_fence = gfx_v9_0_ring_emit_fence,
4735 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 4751 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
@@ -4764,7 +4780,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4764 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 4780 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4765 2 + /* gfx_v9_0_ring_emit_vm_flush */ 4781 2 + /* gfx_v9_0_ring_emit_vm_flush */
4766 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 4782 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4767 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 4783 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
4768 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 4784 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
4769 .test_ring = gfx_v9_0_ring_test_ring, 4785 .test_ring = gfx_v9_0_ring_test_ring,
4770 .insert_nop = amdgpu_ring_insert_nop, 4786 .insert_nop = amdgpu_ring_insert_nop,
@@ -4846,6 +4862,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
4846 break; 4862 break;
4847 } 4863 }
4848 4864
4865 switch (adev->asic_type) {
4866 case CHIP_VEGA10:
4867 case CHIP_VEGA20:
4868 adev->gds.gds_compute_max_wave_id = 0x7ff;
4869 break;
4870 case CHIP_VEGA12:
4871 adev->gds.gds_compute_max_wave_id = 0x27f;
4872 break;
4873 case CHIP_RAVEN:
4874 if (adev->rev_id >= 0x8)
4875 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
4876 else
4877 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
4878 break;
4879 default:
4880 /* this really depends on the chip */
4881 adev->gds.gds_compute_max_wave_id = 0x7ff;
4882 break;
4883 }
4884
4849 adev->gds.gws.total_size = 64; 4885 adev->gds.gws.total_size = 64;
4850 adev->gds.oa.total_size = 16; 4886 adev->gds.oa.total_size = 16;
4851 4887