diff options
author | Dave Airlie <airlied@redhat.com> | 2017-10-19 20:47:19 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2017-10-19 20:47:19 -0400 |
commit | 6585d4274b0baf1d09318539c4a726a96b51af34 (patch) | |
tree | 179aacc9409db45966595893ae4842104b314442 /drivers/gpu/drm/amd/amdgpu | |
parent | 40d86701a625eed9e644281b9af228d6a52d8ed9 (diff) | |
parent | 96687ec0bb478088cb6941a7dca3bb6808a19313 (diff) |
Merge branch 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux into drm-next
Last set of features for 4.15. Highlights:
- Add a bo flag to allow buffers to opt out of implicit sync
- Add ctx priority setting interface
- Lots more powerplay cleanups
- Start to plumb through vram lost infrastructure for gpu reset
- ttm support for huge pages
- misc cleanups and bug fixes
* 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux: (73 commits)
drm/amd/powerplay: Place the constant on the right side of the test
drm/amd/powerplay: Remove useless variable
drm/amd/powerplay: Don't cast kzalloc() return value
drm/amdgpu: allow GTT overcommit during bind
drm/amdgpu: linear validate first then bind to GART
drm/amd/pp: Fix overflow when setup decf/pix/disp dpm table.
drm/amd/pp: thermal control not enabled on vega10.
drm/amdgpu: busywait KIQ register accessing (v4)
drm/amdgpu: report more amdgpu_fence_info
drm/amdgpu:don't check soft_reset for sriov
drm/amdgpu:fix duplicated setting job's vram_lost
drm/amdgpu:reduce wb to 512 slot
drm/amdgpu: fix regresstion on SR-IOV gpu reset failed
drm/amd/powerplay: Tidy up cz_dpm_powerup_vce()
drm/amd/powerplay: Tidy up cz_dpm_powerdown_vce()
drm/amd/powerplay: Tidy up cz_dpm_update_vce_dpm()
drm/amd/powerplay: Tidy up cz_dpm_update_uvd_dpm()
drm/amd/powerplay: Tidy up cz_dpm_powerup_uvd()
drm/amd/powerplay: Tidy up cz_dpm_powerdown_uvd()
drm/amd/powerplay: Tidy up cz_start_dpm()
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
34 files changed, 1291 insertions, 258 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 25a95c95df14..ef9a3b6d7b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile | |||
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ | |||
25 | amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ | 25 | amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ |
26 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ | 26 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ |
27 | amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ | 27 | amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ |
28 | amdgpu_queue_mgr.o amdgpu_vf_error.o | 28 | amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o |
29 | 29 | ||
30 | # add asic specific block | 30 | # add asic specific block |
31 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ | 31 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a23b8af95319..cbcb6a153aba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -732,10 +732,14 @@ struct amdgpu_ctx { | |||
732 | struct amdgpu_device *adev; | 732 | struct amdgpu_device *adev; |
733 | struct amdgpu_queue_mgr queue_mgr; | 733 | struct amdgpu_queue_mgr queue_mgr; |
734 | unsigned reset_counter; | 734 | unsigned reset_counter; |
735 | uint32_t vram_lost_counter; | ||
735 | spinlock_t ring_lock; | 736 | spinlock_t ring_lock; |
736 | struct dma_fence **fences; | 737 | struct dma_fence **fences; |
737 | struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; | 738 | struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; |
738 | bool preamble_presented; | 739 | bool preamble_presented; |
740 | enum amd_sched_priority init_priority; | ||
741 | enum amd_sched_priority override_priority; | ||
742 | struct mutex lock; | ||
739 | }; | 743 | }; |
740 | 744 | ||
741 | struct amdgpu_ctx_mgr { | 745 | struct amdgpu_ctx_mgr { |
@@ -752,13 +756,18 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | |||
752 | struct dma_fence *fence, uint64_t *seq); | 756 | struct dma_fence *fence, uint64_t *seq); |
753 | struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | 757 | struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, |
754 | struct amdgpu_ring *ring, uint64_t seq); | 758 | struct amdgpu_ring *ring, uint64_t seq); |
759 | void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, | ||
760 | enum amd_sched_priority priority); | ||
755 | 761 | ||
756 | int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, | 762 | int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, |
757 | struct drm_file *filp); | 763 | struct drm_file *filp); |
758 | 764 | ||
765 | int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); | ||
766 | |||
759 | void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); | 767 | void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); |
760 | void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); | 768 | void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); |
761 | 769 | ||
770 | |||
762 | /* | 771 | /* |
763 | * file private structure | 772 | * file private structure |
764 | */ | 773 | */ |
@@ -770,7 +779,6 @@ struct amdgpu_fpriv { | |||
770 | struct mutex bo_list_lock; | 779 | struct mutex bo_list_lock; |
771 | struct idr bo_list_handles; | 780 | struct idr bo_list_handles; |
772 | struct amdgpu_ctx_mgr ctx_mgr; | 781 | struct amdgpu_ctx_mgr ctx_mgr; |
773 | u32 vram_lost_counter; | ||
774 | }; | 782 | }; |
775 | 783 | ||
776 | /* | 784 | /* |
@@ -871,7 +879,7 @@ struct amdgpu_mec { | |||
871 | struct amdgpu_kiq { | 879 | struct amdgpu_kiq { |
872 | u64 eop_gpu_addr; | 880 | u64 eop_gpu_addr; |
873 | struct amdgpu_bo *eop_obj; | 881 | struct amdgpu_bo *eop_obj; |
874 | struct mutex ring_mutex; | 882 | spinlock_t ring_lock; |
875 | struct amdgpu_ring ring; | 883 | struct amdgpu_ring ring; |
876 | struct amdgpu_irq_src irq; | 884 | struct amdgpu_irq_src irq; |
877 | }; | 885 | }; |
@@ -1035,6 +1043,10 @@ struct amdgpu_gfx { | |||
1035 | bool in_suspend; | 1043 | bool in_suspend; |
1036 | /* NGG */ | 1044 | /* NGG */ |
1037 | struct amdgpu_ngg ngg; | 1045 | struct amdgpu_ngg ngg; |
1046 | |||
1047 | /* pipe reservation */ | ||
1048 | struct mutex pipe_reserve_mutex; | ||
1049 | DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); | ||
1038 | }; | 1050 | }; |
1039 | 1051 | ||
1040 | int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 1052 | int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
@@ -1113,6 +1125,7 @@ struct amdgpu_job { | |||
1113 | uint32_t gds_base, gds_size; | 1125 | uint32_t gds_base, gds_size; |
1114 | uint32_t gws_base, gws_size; | 1126 | uint32_t gws_base, gws_size; |
1115 | uint32_t oa_base, oa_size; | 1127 | uint32_t oa_base, oa_size; |
1128 | uint32_t vram_lost_counter; | ||
1116 | 1129 | ||
1117 | /* user fence handling */ | 1130 | /* user fence handling */ |
1118 | uint64_t uf_addr; | 1131 | uint64_t uf_addr; |
@@ -1138,7 +1151,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, | |||
1138 | /* | 1151 | /* |
1139 | * Writeback | 1152 | * Writeback |
1140 | */ | 1153 | */ |
1141 | #define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */ | 1154 | #define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */ |
1142 | 1155 | ||
1143 | struct amdgpu_wb { | 1156 | struct amdgpu_wb { |
1144 | struct amdgpu_bo *wb_obj; | 1157 | struct amdgpu_bo *wb_obj; |
@@ -1379,6 +1392,18 @@ struct amdgpu_atcs { | |||
1379 | }; | 1392 | }; |
1380 | 1393 | ||
1381 | /* | 1394 | /* |
1395 | * Firmware VRAM reservation | ||
1396 | */ | ||
1397 | struct amdgpu_fw_vram_usage { | ||
1398 | u64 start_offset; | ||
1399 | u64 size; | ||
1400 | struct amdgpu_bo *reserved_bo; | ||
1401 | void *va; | ||
1402 | }; | ||
1403 | |||
1404 | int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev); | ||
1405 | |||
1406 | /* | ||
1382 | * CGS | 1407 | * CGS |
1383 | */ | 1408 | */ |
1384 | struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev); | 1409 | struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev); |
@@ -1582,6 +1607,8 @@ struct amdgpu_device { | |||
1582 | struct delayed_work late_init_work; | 1607 | struct delayed_work late_init_work; |
1583 | 1608 | ||
1584 | struct amdgpu_virt virt; | 1609 | struct amdgpu_virt virt; |
1610 | /* firmware VRAM reservation */ | ||
1611 | struct amdgpu_fw_vram_usage fw_vram_usage; | ||
1585 | 1612 | ||
1586 | /* link all shadow bo */ | 1613 | /* link all shadow bo */ |
1587 | struct list_head shadow_list; | 1614 | struct list_head shadow_list; |
@@ -1833,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void) { return false; } | |||
1833 | extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; | 1860 | extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; |
1834 | extern const int amdgpu_max_kms_ioctl; | 1861 | extern const int amdgpu_max_kms_ioctl; |
1835 | 1862 | ||
1836 | bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, | ||
1837 | struct amdgpu_fpriv *fpriv); | ||
1838 | int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); | 1863 | int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); |
1839 | void amdgpu_driver_unload_kms(struct drm_device *dev); | 1864 | void amdgpu_driver_unload_kms(struct drm_device *dev); |
1840 | void amdgpu_driver_lastclose_kms(struct drm_device *dev); | 1865 | void amdgpu_driver_lastclose_kms(struct drm_device *dev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index ce443586a0c7..f66d33e4baca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | |||
@@ -1807,6 +1807,8 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) | |||
1807 | uint16_t data_offset; | 1807 | uint16_t data_offset; |
1808 | int usage_bytes = 0; | 1808 | int usage_bytes = 0; |
1809 | struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage; | 1809 | struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage; |
1810 | u64 start_addr; | ||
1811 | u64 size; | ||
1810 | 1812 | ||
1811 | if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { | 1813 | if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { |
1812 | firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset); | 1814 | firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset); |
@@ -1815,7 +1817,21 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) | |||
1815 | le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware), | 1817 | le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware), |
1816 | le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb)); | 1818 | le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb)); |
1817 | 1819 | ||
1818 | usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; | 1820 | start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware; |
1821 | size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb; | ||
1822 | |||
1823 | if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == | ||
1824 | (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << | ||
1825 | ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { | ||
1826 | /* Firmware request VRAM reservation for SR-IOV */ | ||
1827 | adev->fw_vram_usage.start_offset = (start_addr & | ||
1828 | (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; | ||
1829 | adev->fw_vram_usage.size = size << 10; | ||
1830 | /* Use the default scratch size */ | ||
1831 | usage_bytes = 0; | ||
1832 | } else { | ||
1833 | usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; | ||
1834 | } | ||
1819 | } | 1835 | } |
1820 | ctx->scratch_size_bytes = 0; | 1836 | ctx->scratch_size_bytes = 0; |
1821 | if (usage_bytes == 0) | 1837 | if (usage_bytes == 0) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ab83dfcabb41..f7fceb63413c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -90,12 +90,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
90 | goto free_chunk; | 90 | goto free_chunk; |
91 | } | 91 | } |
92 | 92 | ||
93 | mutex_lock(&p->ctx->lock); | ||
94 | |||
93 | /* get chunks */ | 95 | /* get chunks */ |
94 | chunk_array_user = u64_to_user_ptr(cs->in.chunks); | 96 | chunk_array_user = u64_to_user_ptr(cs->in.chunks); |
95 | if (copy_from_user(chunk_array, chunk_array_user, | 97 | if (copy_from_user(chunk_array, chunk_array_user, |
96 | sizeof(uint64_t)*cs->in.num_chunks)) { | 98 | sizeof(uint64_t)*cs->in.num_chunks)) { |
97 | ret = -EFAULT; | 99 | ret = -EFAULT; |
98 | goto put_ctx; | 100 | goto free_chunk; |
99 | } | 101 | } |
100 | 102 | ||
101 | p->nchunks = cs->in.num_chunks; | 103 | p->nchunks = cs->in.num_chunks; |
@@ -103,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
103 | GFP_KERNEL); | 105 | GFP_KERNEL); |
104 | if (!p->chunks) { | 106 | if (!p->chunks) { |
105 | ret = -ENOMEM; | 107 | ret = -ENOMEM; |
106 | goto put_ctx; | 108 | goto free_chunk; |
107 | } | 109 | } |
108 | 110 | ||
109 | for (i = 0; i < p->nchunks; i++) { | 111 | for (i = 0; i < p->nchunks; i++) { |
@@ -170,6 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
170 | if (ret) | 172 | if (ret) |
171 | goto free_all_kdata; | 173 | goto free_all_kdata; |
172 | 174 | ||
175 | if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { | ||
176 | ret = -ECANCELED; | ||
177 | goto free_all_kdata; | ||
178 | } | ||
179 | |||
173 | if (p->uf_entry.robj) | 180 | if (p->uf_entry.robj) |
174 | p->job->uf_addr = uf_offset; | 181 | p->job->uf_addr = uf_offset; |
175 | kfree(chunk_array); | 182 | kfree(chunk_array); |
@@ -183,8 +190,6 @@ free_partial_kdata: | |||
183 | kfree(p->chunks); | 190 | kfree(p->chunks); |
184 | p->chunks = NULL; | 191 | p->chunks = NULL; |
185 | p->nchunks = 0; | 192 | p->nchunks = 0; |
186 | put_ctx: | ||
187 | amdgpu_ctx_put(p->ctx); | ||
188 | free_chunk: | 193 | free_chunk: |
189 | kfree(chunk_array); | 194 | kfree(chunk_array); |
190 | 195 | ||
@@ -705,7 +710,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) | |||
705 | 710 | ||
706 | list_for_each_entry(e, &p->validated, tv.head) { | 711 | list_for_each_entry(e, &p->validated, tv.head) { |
707 | struct reservation_object *resv = e->robj->tbo.resv; | 712 | struct reservation_object *resv = e->robj->tbo.resv; |
708 | r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); | 713 | r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, |
714 | amdgpu_bo_explicit_sync(e->robj)); | ||
709 | 715 | ||
710 | if (r) | 716 | if (r) |
711 | return r; | 717 | return r; |
@@ -736,8 +742,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, | |||
736 | 742 | ||
737 | dma_fence_put(parser->fence); | 743 | dma_fence_put(parser->fence); |
738 | 744 | ||
739 | if (parser->ctx) | 745 | if (parser->ctx) { |
746 | mutex_unlock(&parser->ctx->lock); | ||
740 | amdgpu_ctx_put(parser->ctx); | 747 | amdgpu_ctx_put(parser->ctx); |
748 | } | ||
741 | if (parser->bo_list) | 749 | if (parser->bo_list) |
742 | amdgpu_bo_list_put(parser->bo_list); | 750 | amdgpu_bo_list_put(parser->bo_list); |
743 | 751 | ||
@@ -844,14 +852,58 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | |||
844 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 852 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
845 | struct amdgpu_vm *vm = &fpriv->vm; | 853 | struct amdgpu_vm *vm = &fpriv->vm; |
846 | struct amdgpu_ring *ring = p->job->ring; | 854 | struct amdgpu_ring *ring = p->job->ring; |
847 | int i, r; | 855 | int r; |
848 | 856 | ||
849 | /* Only for UVD/VCE VM emulation */ | 857 | /* Only for UVD/VCE VM emulation */ |
850 | if (ring->funcs->parse_cs) { | 858 | if (p->job->ring->funcs->parse_cs) { |
851 | for (i = 0; i < p->job->num_ibs; i++) { | 859 | unsigned i, j; |
852 | r = amdgpu_ring_parse_cs(ring, p, i); | 860 | |
861 | for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { | ||
862 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | ||
863 | struct amdgpu_bo_va_mapping *m; | ||
864 | struct amdgpu_bo *aobj = NULL; | ||
865 | struct amdgpu_cs_chunk *chunk; | ||
866 | struct amdgpu_ib *ib; | ||
867 | uint64_t offset; | ||
868 | uint8_t *kptr; | ||
869 | |||
870 | chunk = &p->chunks[i]; | ||
871 | ib = &p->job->ibs[j]; | ||
872 | chunk_ib = chunk->kdata; | ||
873 | |||
874 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) | ||
875 | continue; | ||
876 | |||
877 | r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, | ||
878 | &aobj, &m); | ||
879 | if (r) { | ||
880 | DRM_ERROR("IB va_start is invalid\n"); | ||
881 | return r; | ||
882 | } | ||
883 | |||
884 | if ((chunk_ib->va_start + chunk_ib->ib_bytes) > | ||
885 | (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { | ||
886 | DRM_ERROR("IB va_start+ib_bytes is invalid\n"); | ||
887 | return -EINVAL; | ||
888 | } | ||
889 | |||
890 | /* the IB should be reserved at this point */ | ||
891 | r = amdgpu_bo_kmap(aobj, (void **)&kptr); | ||
892 | if (r) { | ||
893 | return r; | ||
894 | } | ||
895 | |||
896 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; | ||
897 | kptr += chunk_ib->va_start - offset; | ||
898 | |||
899 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | ||
900 | amdgpu_bo_kunmap(aobj); | ||
901 | |||
902 | r = amdgpu_ring_parse_cs(ring, p, j); | ||
853 | if (r) | 903 | if (r) |
854 | return r; | 904 | return r; |
905 | |||
906 | j++; | ||
855 | } | 907 | } |
856 | } | 908 | } |
857 | 909 | ||
@@ -918,54 +970,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
918 | 970 | ||
919 | parser->job->ring = ring; | 971 | parser->job->ring = ring; |
920 | 972 | ||
921 | if (ring->funcs->parse_cs) { | 973 | r = amdgpu_ib_get(adev, vm, |
922 | struct amdgpu_bo_va_mapping *m; | 974 | ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, |
923 | struct amdgpu_bo *aobj = NULL; | 975 | ib); |
924 | uint64_t offset; | 976 | if (r) { |
925 | uint8_t *kptr; | 977 | DRM_ERROR("Failed to get ib !\n"); |
926 | 978 | return r; | |
927 | r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, | ||
928 | &aobj, &m); | ||
929 | if (r) { | ||
930 | DRM_ERROR("IB va_start is invalid\n"); | ||
931 | return r; | ||
932 | } | ||
933 | |||
934 | if ((chunk_ib->va_start + chunk_ib->ib_bytes) > | ||
935 | (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { | ||
936 | DRM_ERROR("IB va_start+ib_bytes is invalid\n"); | ||
937 | return -EINVAL; | ||
938 | } | ||
939 | |||
940 | /* the IB should be reserved at this point */ | ||
941 | r = amdgpu_bo_kmap(aobj, (void **)&kptr); | ||
942 | if (r) { | ||
943 | return r; | ||
944 | } | ||
945 | |||
946 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; | ||
947 | kptr += chunk_ib->va_start - offset; | ||
948 | |||
949 | r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); | ||
950 | if (r) { | ||
951 | DRM_ERROR("Failed to get ib !\n"); | ||
952 | return r; | ||
953 | } | ||
954 | |||
955 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | ||
956 | amdgpu_bo_kunmap(aobj); | ||
957 | } else { | ||
958 | r = amdgpu_ib_get(adev, vm, 0, ib); | ||
959 | if (r) { | ||
960 | DRM_ERROR("Failed to get ib !\n"); | ||
961 | return r; | ||
962 | } | ||
963 | |||
964 | } | 979 | } |
965 | 980 | ||
966 | ib->gpu_addr = chunk_ib->va_start; | 981 | ib->gpu_addr = chunk_ib->va_start; |
967 | ib->length_dw = chunk_ib->ib_bytes / 4; | 982 | ib->length_dw = chunk_ib->ib_bytes / 4; |
968 | ib->flags = chunk_ib->flags; | 983 | ib->flags = chunk_ib->flags; |
984 | |||
969 | j++; | 985 | j++; |
970 | } | 986 | } |
971 | 987 | ||
@@ -975,7 +991,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
975 | parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) | 991 | parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) |
976 | return -EINVAL; | 992 | return -EINVAL; |
977 | 993 | ||
978 | return 0; | 994 | return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); |
979 | } | 995 | } |
980 | 996 | ||
981 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, | 997 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, |
@@ -1176,6 +1192,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1176 | job->uf_sequence = seq; | 1192 | job->uf_sequence = seq; |
1177 | 1193 | ||
1178 | amdgpu_job_free_resources(job); | 1194 | amdgpu_job_free_resources(job); |
1195 | amdgpu_ring_priority_get(job->ring, | ||
1196 | amd_sched_get_job_priority(&job->base)); | ||
1179 | 1197 | ||
1180 | trace_amdgpu_cs_ioctl(job); | 1198 | trace_amdgpu_cs_ioctl(job); |
1181 | amd_sched_entity_push_job(&job->base); | 1199 | amd_sched_entity_push_job(&job->base); |
@@ -1189,7 +1207,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1189 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | 1207 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) |
1190 | { | 1208 | { |
1191 | struct amdgpu_device *adev = dev->dev_private; | 1209 | struct amdgpu_device *adev = dev->dev_private; |
1192 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
1193 | union drm_amdgpu_cs *cs = data; | 1210 | union drm_amdgpu_cs *cs = data; |
1194 | struct amdgpu_cs_parser parser = {}; | 1211 | struct amdgpu_cs_parser parser = {}; |
1195 | bool reserved_buffers = false; | 1212 | bool reserved_buffers = false; |
@@ -1197,8 +1214,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1197 | 1214 | ||
1198 | if (!adev->accel_working) | 1215 | if (!adev->accel_working) |
1199 | return -EBUSY; | 1216 | return -EBUSY; |
1200 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
1201 | return -ENODEV; | ||
1202 | 1217 | ||
1203 | parser.adev = adev; | 1218 | parser.adev = adev; |
1204 | parser.filp = filp; | 1219 | parser.filp = filp; |
@@ -1209,6 +1224,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1209 | goto out; | 1224 | goto out; |
1210 | } | 1225 | } |
1211 | 1226 | ||
1227 | r = amdgpu_cs_ib_fill(adev, &parser); | ||
1228 | if (r) | ||
1229 | goto out; | ||
1230 | |||
1212 | r = amdgpu_cs_parser_bos(&parser, data); | 1231 | r = amdgpu_cs_parser_bos(&parser, data); |
1213 | if (r) { | 1232 | if (r) { |
1214 | if (r == -ENOMEM) | 1233 | if (r == -ENOMEM) |
@@ -1219,9 +1238,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1219 | } | 1238 | } |
1220 | 1239 | ||
1221 | reserved_buffers = true; | 1240 | reserved_buffers = true; |
1222 | r = amdgpu_cs_ib_fill(adev, &parser); | ||
1223 | if (r) | ||
1224 | goto out; | ||
1225 | 1241 | ||
1226 | r = amdgpu_cs_dependencies(adev, &parser); | 1242 | r = amdgpu_cs_dependencies(adev, &parser); |
1227 | if (r) { | 1243 | if (r) { |
@@ -1257,16 +1273,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
1257 | { | 1273 | { |
1258 | union drm_amdgpu_wait_cs *wait = data; | 1274 | union drm_amdgpu_wait_cs *wait = data; |
1259 | struct amdgpu_device *adev = dev->dev_private; | 1275 | struct amdgpu_device *adev = dev->dev_private; |
1260 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
1261 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); | 1276 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); |
1262 | struct amdgpu_ring *ring = NULL; | 1277 | struct amdgpu_ring *ring = NULL; |
1263 | struct amdgpu_ctx *ctx; | 1278 | struct amdgpu_ctx *ctx; |
1264 | struct dma_fence *fence; | 1279 | struct dma_fence *fence; |
1265 | long r; | 1280 | long r; |
1266 | 1281 | ||
1267 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
1268 | return -ENODEV; | ||
1269 | |||
1270 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); | 1282 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); |
1271 | if (ctx == NULL) | 1283 | if (ctx == NULL) |
1272 | return -EINVAL; | 1284 | return -EINVAL; |
@@ -1284,6 +1296,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
1284 | r = PTR_ERR(fence); | 1296 | r = PTR_ERR(fence); |
1285 | else if (fence) { | 1297 | else if (fence) { |
1286 | r = dma_fence_wait_timeout(fence, true, timeout); | 1298 | r = dma_fence_wait_timeout(fence, true, timeout); |
1299 | if (r > 0 && fence->error) | ||
1300 | r = fence->error; | ||
1287 | dma_fence_put(fence); | 1301 | dma_fence_put(fence); |
1288 | } else | 1302 | } else |
1289 | r = 1; | 1303 | r = 1; |
@@ -1335,16 +1349,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, | |||
1335 | struct drm_file *filp) | 1349 | struct drm_file *filp) |
1336 | { | 1350 | { |
1337 | struct amdgpu_device *adev = dev->dev_private; | 1351 | struct amdgpu_device *adev = dev->dev_private; |
1338 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
1339 | union drm_amdgpu_fence_to_handle *info = data; | 1352 | union drm_amdgpu_fence_to_handle *info = data; |
1340 | struct dma_fence *fence; | 1353 | struct dma_fence *fence; |
1341 | struct drm_syncobj *syncobj; | 1354 | struct drm_syncobj *syncobj; |
1342 | struct sync_file *sync_file; | 1355 | struct sync_file *sync_file; |
1343 | int fd, r; | 1356 | int fd, r; |
1344 | 1357 | ||
1345 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
1346 | return -ENODEV; | ||
1347 | |||
1348 | fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); | 1358 | fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); |
1349 | if (IS_ERR(fence)) | 1359 | if (IS_ERR(fence)) |
1350 | return PTR_ERR(fence); | 1360 | return PTR_ERR(fence); |
@@ -1425,6 +1435,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, | |||
1425 | 1435 | ||
1426 | if (r == 0) | 1436 | if (r == 0) |
1427 | break; | 1437 | break; |
1438 | |||
1439 | if (fence->error) | ||
1440 | return fence->error; | ||
1428 | } | 1441 | } |
1429 | 1442 | ||
1430 | memset(wait, 0, sizeof(*wait)); | 1443 | memset(wait, 0, sizeof(*wait)); |
@@ -1485,7 +1498,7 @@ out: | |||
1485 | wait->out.status = (r > 0); | 1498 | wait->out.status = (r > 0); |
1486 | wait->out.first_signaled = first; | 1499 | wait->out.first_signaled = first; |
1487 | /* set return value 0 to indicate success */ | 1500 | /* set return value 0 to indicate success */ |
1488 | r = 0; | 1501 | r = array[first]->error; |
1489 | 1502 | ||
1490 | err_free_fence_array: | 1503 | err_free_fence_array: |
1491 | for (i = 0; i < fence_count; i++) | 1504 | for (i = 0; i < fence_count; i++) |
@@ -1506,15 +1519,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, | |||
1506 | struct drm_file *filp) | 1519 | struct drm_file *filp) |
1507 | { | 1520 | { |
1508 | struct amdgpu_device *adev = dev->dev_private; | 1521 | struct amdgpu_device *adev = dev->dev_private; |
1509 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
1510 | union drm_amdgpu_wait_fences *wait = data; | 1522 | union drm_amdgpu_wait_fences *wait = data; |
1511 | uint32_t fence_count = wait->in.fence_count; | 1523 | uint32_t fence_count = wait->in.fence_count; |
1512 | struct drm_amdgpu_fence *fences_user; | 1524 | struct drm_amdgpu_fence *fences_user; |
1513 | struct drm_amdgpu_fence *fences; | 1525 | struct drm_amdgpu_fence *fences; |
1514 | int r; | 1526 | int r; |
1515 | 1527 | ||
1516 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
1517 | return -ENODEV; | ||
1518 | /* Get the fences from userspace */ | 1528 | /* Get the fences from userspace */ |
1519 | fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), | 1529 | fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), |
1520 | GFP_KERNEL); | 1530 | GFP_KERNEL); |
@@ -1572,14 +1582,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | |||
1572 | if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) | 1582 | if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) |
1573 | return -EINVAL; | 1583 | return -EINVAL; |
1574 | 1584 | ||
1575 | r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); | 1585 | if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { |
1576 | if (unlikely(r)) | 1586 | (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
1577 | return r; | 1587 | amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); |
1578 | 1588 | r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, | |
1579 | if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) | 1589 | false); |
1580 | return 0; | 1590 | if (r) |
1591 | return r; | ||
1592 | } | ||
1581 | 1593 | ||
1582 | (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | 1594 | return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); |
1583 | amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); | ||
1584 | return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false); | ||
1585 | } | 1595 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 75c933b1a432..c184468e2b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |||
@@ -23,13 +23,41 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <drm/drmP.h> | 25 | #include <drm/drmP.h> |
26 | #include <drm/drm_auth.h> | ||
26 | #include "amdgpu.h" | 27 | #include "amdgpu.h" |
28 | #include "amdgpu_sched.h" | ||
27 | 29 | ||
28 | static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) | 30 | static int amdgpu_ctx_priority_permit(struct drm_file *filp, |
31 | enum amd_sched_priority priority) | ||
32 | { | ||
33 | /* NORMAL and below are accessible by everyone */ | ||
34 | if (priority <= AMD_SCHED_PRIORITY_NORMAL) | ||
35 | return 0; | ||
36 | |||
37 | if (capable(CAP_SYS_NICE)) | ||
38 | return 0; | ||
39 | |||
40 | if (drm_is_current_master(filp)) | ||
41 | return 0; | ||
42 | |||
43 | return -EACCES; | ||
44 | } | ||
45 | |||
46 | static int amdgpu_ctx_init(struct amdgpu_device *adev, | ||
47 | enum amd_sched_priority priority, | ||
48 | struct drm_file *filp, | ||
49 | struct amdgpu_ctx *ctx) | ||
29 | { | 50 | { |
30 | unsigned i, j; | 51 | unsigned i, j; |
31 | int r; | 52 | int r; |
32 | 53 | ||
54 | if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) | ||
55 | return -EINVAL; | ||
56 | |||
57 | r = amdgpu_ctx_priority_permit(filp, priority); | ||
58 | if (r) | ||
59 | return r; | ||
60 | |||
33 | memset(ctx, 0, sizeof(*ctx)); | 61 | memset(ctx, 0, sizeof(*ctx)); |
34 | ctx->adev = adev; | 62 | ctx->adev = adev; |
35 | kref_init(&ctx->refcount); | 63 | kref_init(&ctx->refcount); |
@@ -39,19 +67,24 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) | |||
39 | if (!ctx->fences) | 67 | if (!ctx->fences) |
40 | return -ENOMEM; | 68 | return -ENOMEM; |
41 | 69 | ||
70 | mutex_init(&ctx->lock); | ||
71 | |||
42 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 72 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
43 | ctx->rings[i].sequence = 1; | 73 | ctx->rings[i].sequence = 1; |
44 | ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; | 74 | ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; |
45 | } | 75 | } |
46 | 76 | ||
47 | ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); | 77 | ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); |
78 | ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); | ||
79 | ctx->init_priority = priority; | ||
80 | ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; | ||
48 | 81 | ||
49 | /* create context entity for each ring */ | 82 | /* create context entity for each ring */ |
50 | for (i = 0; i < adev->num_rings; i++) { | 83 | for (i = 0; i < adev->num_rings; i++) { |
51 | struct amdgpu_ring *ring = adev->rings[i]; | 84 | struct amdgpu_ring *ring = adev->rings[i]; |
52 | struct amd_sched_rq *rq; | 85 | struct amd_sched_rq *rq; |
53 | 86 | ||
54 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | 87 | rq = &ring->sched.sched_rq[priority]; |
55 | 88 | ||
56 | if (ring == &adev->gfx.kiq.ring) | 89 | if (ring == &adev->gfx.kiq.ring) |
57 | continue; | 90 | continue; |
@@ -96,10 +129,14 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) | |||
96 | &ctx->rings[i].entity); | 129 | &ctx->rings[i].entity); |
97 | 130 | ||
98 | amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); | 131 | amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); |
132 | |||
133 | mutex_destroy(&ctx->lock); | ||
99 | } | 134 | } |
100 | 135 | ||
101 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, | 136 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, |
102 | struct amdgpu_fpriv *fpriv, | 137 | struct amdgpu_fpriv *fpriv, |
138 | struct drm_file *filp, | ||
139 | enum amd_sched_priority priority, | ||
103 | uint32_t *id) | 140 | uint32_t *id) |
104 | { | 141 | { |
105 | struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; | 142 | struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; |
@@ -117,8 +154,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, | |||
117 | kfree(ctx); | 154 | kfree(ctx); |
118 | return r; | 155 | return r; |
119 | } | 156 | } |
157 | |||
120 | *id = (uint32_t)r; | 158 | *id = (uint32_t)r; |
121 | r = amdgpu_ctx_init(adev, ctx); | 159 | r = amdgpu_ctx_init(adev, priority, filp, ctx); |
122 | if (r) { | 160 | if (r) { |
123 | idr_remove(&mgr->ctx_handles, *id); | 161 | idr_remove(&mgr->ctx_handles, *id); |
124 | *id = 0; | 162 | *id = 0; |
@@ -193,6 +231,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, | |||
193 | { | 231 | { |
194 | int r; | 232 | int r; |
195 | uint32_t id; | 233 | uint32_t id; |
234 | enum amd_sched_priority priority; | ||
196 | 235 | ||
197 | union drm_amdgpu_ctx *args = data; | 236 | union drm_amdgpu_ctx *args = data; |
198 | struct amdgpu_device *adev = dev->dev_private; | 237 | struct amdgpu_device *adev = dev->dev_private; |
@@ -200,10 +239,16 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, | |||
200 | 239 | ||
201 | r = 0; | 240 | r = 0; |
202 | id = args->in.ctx_id; | 241 | id = args->in.ctx_id; |
242 | priority = amdgpu_to_sched_priority(args->in.priority); | ||
243 | |||
244 | /* For backwards compatibility reasons, we need to accept | ||
245 | * ioctls with garbage in the priority field */ | ||
246 | if (priority == AMD_SCHED_PRIORITY_INVALID) | ||
247 | priority = AMD_SCHED_PRIORITY_NORMAL; | ||
203 | 248 | ||
204 | switch (args->in.op) { | 249 | switch (args->in.op) { |
205 | case AMDGPU_CTX_OP_ALLOC_CTX: | 250 | case AMDGPU_CTX_OP_ALLOC_CTX: |
206 | r = amdgpu_ctx_alloc(adev, fpriv, &id); | 251 | r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); |
207 | args->out.alloc.ctx_id = id; | 252 | args->out.alloc.ctx_id = id; |
208 | break; | 253 | break; |
209 | case AMDGPU_CTX_OP_FREE_CTX: | 254 | case AMDGPU_CTX_OP_FREE_CTX: |
@@ -256,12 +301,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | |||
256 | 301 | ||
257 | idx = seq & (amdgpu_sched_jobs - 1); | 302 | idx = seq & (amdgpu_sched_jobs - 1); |
258 | other = cring->fences[idx]; | 303 | other = cring->fences[idx]; |
259 | if (other) { | 304 | if (other) |
260 | signed long r; | 305 | BUG_ON(!dma_fence_is_signaled(other)); |
261 | r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); | ||
262 | if (r < 0) | ||
263 | return r; | ||
264 | } | ||
265 | 306 | ||
266 | dma_fence_get(fence); | 307 | dma_fence_get(fence); |
267 | 308 | ||
@@ -305,6 +346,51 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | |||
305 | return fence; | 346 | return fence; |
306 | } | 347 | } |
307 | 348 | ||
349 | void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, | ||
350 | enum amd_sched_priority priority) | ||
351 | { | ||
352 | int i; | ||
353 | struct amdgpu_device *adev = ctx->adev; | ||
354 | struct amd_sched_rq *rq; | ||
355 | struct amd_sched_entity *entity; | ||
356 | struct amdgpu_ring *ring; | ||
357 | enum amd_sched_priority ctx_prio; | ||
358 | |||
359 | ctx->override_priority = priority; | ||
360 | |||
361 | ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ? | ||
362 | ctx->init_priority : ctx->override_priority; | ||
363 | |||
364 | for (i = 0; i < adev->num_rings; i++) { | ||
365 | ring = adev->rings[i]; | ||
366 | entity = &ctx->rings[i].entity; | ||
367 | rq = &ring->sched.sched_rq[ctx_prio]; | ||
368 | |||
369 | if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) | ||
370 | continue; | ||
371 | |||
372 | amd_sched_entity_set_rq(entity, rq); | ||
373 | } | ||
374 | } | ||
375 | |||
376 | int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) | ||
377 | { | ||
378 | struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; | ||
379 | unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); | ||
380 | struct dma_fence *other = cring->fences[idx]; | ||
381 | |||
382 | if (other) { | ||
383 | signed long r; | ||
384 | r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); | ||
385 | if (r < 0) { | ||
386 | DRM_ERROR("Error (%ld) waiting for fence!\n", r); | ||
387 | return r; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | return 0; | ||
392 | } | ||
393 | |||
308 | void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) | 394 | void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) |
309 | { | 395 | { |
310 | mutex_init(&mgr->lock); | 396 | mutex_init(&mgr->lock); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1949d8aedf49..0b9332e65a4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, | |||
109 | { | 109 | { |
110 | uint32_t ret; | 110 | uint32_t ret; |
111 | 111 | ||
112 | if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { | 112 | if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) |
113 | BUG_ON(in_interrupt()); | ||
114 | return amdgpu_virt_kiq_rreg(adev, reg); | 113 | return amdgpu_virt_kiq_rreg(adev, reg); |
115 | } | ||
116 | 114 | ||
117 | if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) | 115 | if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) |
118 | ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); | 116 | ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); |
@@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, | |||
137 | adev->last_mm_index = v; | 135 | adev->last_mm_index = v; |
138 | } | 136 | } |
139 | 137 | ||
140 | if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { | 138 | if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) |
141 | BUG_ON(in_interrupt()); | ||
142 | return amdgpu_virt_kiq_wreg(adev, reg, v); | 139 | return amdgpu_virt_kiq_wreg(adev, reg, v); |
143 | } | ||
144 | 140 | ||
145 | if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) | 141 | if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) |
146 | writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); | 142 | writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); |
@@ -658,6 +654,81 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) | |||
658 | } | 654 | } |
659 | 655 | ||
660 | /* | 656 | /* |
657 | * Firmware Reservation functions | ||
658 | */ | ||
659 | /** | ||
660 | * amdgpu_fw_reserve_vram_fini - free fw reserved vram | ||
661 | * | ||
662 | * @adev: amdgpu_device pointer | ||
663 | * | ||
664 | * free fw reserved vram if it has been reserved. | ||
665 | */ | ||
666 | void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev) | ||
667 | { | ||
668 | amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, | ||
669 | NULL, &adev->fw_vram_usage.va); | ||
670 | } | ||
671 | |||
672 | /** | ||
673 | * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw | ||
674 | * | ||
675 | * @adev: amdgpu_device pointer | ||
676 | * | ||
677 | * create bo vram reservation from fw. | ||
678 | */ | ||
679 | int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev) | ||
680 | { | ||
681 | int r = 0; | ||
682 | u64 gpu_addr; | ||
683 | u64 vram_size = adev->mc.visible_vram_size; | ||
684 | |||
685 | adev->fw_vram_usage.va = NULL; | ||
686 | adev->fw_vram_usage.reserved_bo = NULL; | ||
687 | |||
688 | if (adev->fw_vram_usage.size > 0 && | ||
689 | adev->fw_vram_usage.size <= vram_size) { | ||
690 | |||
691 | r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, | ||
692 | PAGE_SIZE, true, 0, | ||
693 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
694 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, | ||
695 | &adev->fw_vram_usage.reserved_bo); | ||
696 | if (r) | ||
697 | goto error_create; | ||
698 | |||
699 | r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); | ||
700 | if (r) | ||
701 | goto error_reserve; | ||
702 | r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, | ||
703 | AMDGPU_GEM_DOMAIN_VRAM, | ||
704 | adev->fw_vram_usage.start_offset, | ||
705 | (adev->fw_vram_usage.start_offset + | ||
706 | adev->fw_vram_usage.size), &gpu_addr); | ||
707 | if (r) | ||
708 | goto error_pin; | ||
709 | r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, | ||
710 | &adev->fw_vram_usage.va); | ||
711 | if (r) | ||
712 | goto error_kmap; | ||
713 | |||
714 | amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); | ||
715 | } | ||
716 | return r; | ||
717 | |||
718 | error_kmap: | ||
719 | amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); | ||
720 | error_pin: | ||
721 | amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); | ||
722 | error_reserve: | ||
723 | amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); | ||
724 | error_create: | ||
725 | adev->fw_vram_usage.va = NULL; | ||
726 | adev->fw_vram_usage.reserved_bo = NULL; | ||
727 | return r; | ||
728 | } | ||
729 | |||
730 | |||
731 | /* | ||
661 | * GPU helpers function. | 732 | * GPU helpers function. |
662 | */ | 733 | */ |
663 | /** | 734 | /** |
@@ -1604,7 +1675,6 @@ static int amdgpu_init(struct amdgpu_device *adev) | |||
1604 | return r; | 1675 | return r; |
1605 | } | 1676 | } |
1606 | adev->ip_blocks[i].status.sw = true; | 1677 | adev->ip_blocks[i].status.sw = true; |
1607 | |||
1608 | /* need to do gmc hw init early so we can allocate gpu mem */ | 1678 | /* need to do gmc hw init early so we can allocate gpu mem */ |
1609 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { | 1679 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { |
1610 | r = amdgpu_vram_scratch_init(adev); | 1680 | r = amdgpu_vram_scratch_init(adev); |
@@ -1635,11 +1705,6 @@ static int amdgpu_init(struct amdgpu_device *adev) | |||
1635 | } | 1705 | } |
1636 | } | 1706 | } |
1637 | 1707 | ||
1638 | mutex_lock(&adev->firmware.mutex); | ||
1639 | if (amdgpu_ucode_init_bo(adev)) | ||
1640 | adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; | ||
1641 | mutex_unlock(&adev->firmware.mutex); | ||
1642 | |||
1643 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1708 | for (i = 0; i < adev->num_ip_blocks; i++) { |
1644 | if (!adev->ip_blocks[i].status.sw) | 1709 | if (!adev->ip_blocks[i].status.sw) |
1645 | continue; | 1710 | continue; |
@@ -1775,8 +1840,6 @@ static int amdgpu_fini(struct amdgpu_device *adev) | |||
1775 | 1840 | ||
1776 | adev->ip_blocks[i].status.hw = false; | 1841 | adev->ip_blocks[i].status.hw = false; |
1777 | } | 1842 | } |
1778 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) | ||
1779 | amdgpu_ucode_fini_bo(adev); | ||
1780 | 1843 | ||
1781 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { | 1844 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { |
1782 | if (!adev->ip_blocks[i].status.sw) | 1845 | if (!adev->ip_blocks[i].status.sw) |
@@ -2019,6 +2082,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2019 | adev->vm_manager.vm_pte_num_rings = 0; | 2082 | adev->vm_manager.vm_pte_num_rings = 0; |
2020 | adev->gart.gart_funcs = NULL; | 2083 | adev->gart.gart_funcs = NULL; |
2021 | adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); | 2084 | adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); |
2085 | bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); | ||
2022 | 2086 | ||
2023 | adev->smc_rreg = &amdgpu_invalid_rreg; | 2087 | adev->smc_rreg = &amdgpu_invalid_rreg; |
2024 | adev->smc_wreg = &amdgpu_invalid_wreg; | 2088 | adev->smc_wreg = &amdgpu_invalid_wreg; |
@@ -2047,6 +2111,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2047 | mutex_init(&adev->pm.mutex); | 2111 | mutex_init(&adev->pm.mutex); |
2048 | mutex_init(&adev->gfx.gpu_clock_mutex); | 2112 | mutex_init(&adev->gfx.gpu_clock_mutex); |
2049 | mutex_init(&adev->srbm_mutex); | 2113 | mutex_init(&adev->srbm_mutex); |
2114 | mutex_init(&adev->gfx.pipe_reserve_mutex); | ||
2050 | mutex_init(&adev->grbm_idx_mutex); | 2115 | mutex_init(&adev->grbm_idx_mutex); |
2051 | mutex_init(&adev->mn_lock); | 2116 | mutex_init(&adev->mn_lock); |
2052 | mutex_init(&adev->virt.vf_errors.lock); | 2117 | mutex_init(&adev->virt.vf_errors.lock); |
@@ -2223,6 +2288,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2223 | if (r) | 2288 | if (r) |
2224 | DRM_ERROR("ib ring test failed (%d).\n", r); | 2289 | DRM_ERROR("ib ring test failed (%d).\n", r); |
2225 | 2290 | ||
2291 | if (amdgpu_sriov_vf(adev)) | ||
2292 | amdgpu_virt_init_data_exchange(adev); | ||
2293 | |||
2226 | amdgpu_fbdev_init(adev); | 2294 | amdgpu_fbdev_init(adev); |
2227 | 2295 | ||
2228 | r = amdgpu_pm_sysfs_init(adev); | 2296 | r = amdgpu_pm_sysfs_init(adev); |
@@ -2300,6 +2368,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) | |||
2300 | /* evict vram memory */ | 2368 | /* evict vram memory */ |
2301 | amdgpu_bo_evict_vram(adev); | 2369 | amdgpu_bo_evict_vram(adev); |
2302 | amdgpu_ib_pool_fini(adev); | 2370 | amdgpu_ib_pool_fini(adev); |
2371 | amdgpu_fw_reserve_vram_fini(adev); | ||
2303 | amdgpu_fence_driver_fini(adev); | 2372 | amdgpu_fence_driver_fini(adev); |
2304 | amdgpu_fbdev_fini(adev); | 2373 | amdgpu_fbdev_fini(adev); |
2305 | r = amdgpu_fini(adev); | 2374 | r = amdgpu_fini(adev); |
@@ -2552,6 +2621,9 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) | |||
2552 | int i; | 2621 | int i; |
2553 | bool asic_hang = false; | 2622 | bool asic_hang = false; |
2554 | 2623 | ||
2624 | if (amdgpu_sriov_vf(adev)) | ||
2625 | return true; | ||
2626 | |||
2555 | for (i = 0; i < adev->num_ip_blocks; i++) { | 2627 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2556 | if (!adev->ip_blocks[i].status.valid) | 2628 | if (!adev->ip_blocks[i].status.valid) |
2557 | continue; | 2629 | continue; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad02d3fbb44c..dd2f060d62a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -71,9 +71,11 @@ | |||
71 | * - 3.19.0 - Add support for UVD MJPEG decode | 71 | * - 3.19.0 - Add support for UVD MJPEG decode |
72 | * - 3.20.0 - Add support for local BOs | 72 | * - 3.20.0 - Add support for local BOs |
73 | * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl | 73 | * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl |
74 | * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl | ||
75 | * - 3.23.0 - Add query for VRAM lost counter | ||
74 | */ | 76 | */ |
75 | #define KMS_DRIVER_MAJOR 3 | 77 | #define KMS_DRIVER_MAJOR 3 |
76 | #define KMS_DRIVER_MINOR 21 | 78 | #define KMS_DRIVER_MINOR 23 |
77 | #define KMS_DRIVER_PATCHLEVEL 0 | 79 | #define KMS_DRIVER_PATCHLEVEL 0 |
78 | 80 | ||
79 | int amdgpu_vram_limit = 0; | 81 | int amdgpu_vram_limit = 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 333bad749067..fb9f88ef6059 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) | |||
169 | } | 169 | } |
170 | 170 | ||
171 | /** | 171 | /** |
172 | * amdgpu_fence_emit_polling - emit a fence on the requeste ring | ||
173 | * | ||
174 | * @ring: ring the fence is associated with | ||
175 | * @s: resulting sequence number | ||
176 | * | ||
177 | * Emits a fence command on the requested ring (all asics). | ||
178 | * Used For polling fence. | ||
179 | * Returns 0 on success, -ENOMEM on failure. | ||
180 | */ | ||
181 | int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) | ||
182 | { | ||
183 | uint32_t seq; | ||
184 | |||
185 | if (!s) | ||
186 | return -EINVAL; | ||
187 | |||
188 | seq = ++ring->fence_drv.sync_seq; | ||
189 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, | ||
190 | seq, AMDGPU_FENCE_FLAG_INT); | ||
191 | |||
192 | *s = seq; | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | /** | ||
172 | * amdgpu_fence_schedule_fallback - schedule fallback check | 198 | * amdgpu_fence_schedule_fallback - schedule fallback check |
173 | * | 199 | * |
174 | * @ring: pointer to struct amdgpu_ring | 200 | * @ring: pointer to struct amdgpu_ring |
@@ -282,6 +308,30 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) | |||
282 | } | 308 | } |
283 | 309 | ||
284 | /** | 310 | /** |
311 | * amdgpu_fence_wait_polling - busy wait for givn sequence number | ||
312 | * | ||
313 | * @ring: ring index the fence is associated with | ||
314 | * @wait_seq: sequence number to wait | ||
315 | * @timeout: the timeout for waiting in usecs | ||
316 | * | ||
317 | * Wait for all fences on the requested ring to signal (all asics). | ||
318 | * Returns left time if no timeout, 0 or minus if timeout. | ||
319 | */ | ||
320 | signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, | ||
321 | uint32_t wait_seq, | ||
322 | signed long timeout) | ||
323 | { | ||
324 | uint32_t seq; | ||
325 | |||
326 | do { | ||
327 | seq = amdgpu_fence_read(ring); | ||
328 | udelay(5); | ||
329 | timeout -= 5; | ||
330 | } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0); | ||
331 | |||
332 | return timeout > 0 ? timeout : 0; | ||
333 | } | ||
334 | /** | ||
285 | * amdgpu_fence_count_emitted - get the count of emitted fences | 335 | * amdgpu_fence_count_emitted - get the count of emitted fences |
286 | * | 336 | * |
287 | * @ring: ring the fence is associated with | 337 | * @ring: ring the fence is associated with |
@@ -641,6 +691,19 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) | |||
641 | atomic_read(&ring->fence_drv.last_seq)); | 691 | atomic_read(&ring->fence_drv.last_seq)); |
642 | seq_printf(m, "Last emitted 0x%08x\n", | 692 | seq_printf(m, "Last emitted 0x%08x\n", |
643 | ring->fence_drv.sync_seq); | 693 | ring->fence_drv.sync_seq); |
694 | |||
695 | if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) | ||
696 | continue; | ||
697 | |||
698 | /* set in CP_VMID_PREEMPT and preemption occurred */ | ||
699 | seq_printf(m, "Last preempted 0x%08x\n", | ||
700 | le32_to_cpu(*(ring->fence_drv.cpu_addr + 2))); | ||
701 | /* set in CP_VMID_RESET and reset occurred */ | ||
702 | seq_printf(m, "Last reset 0x%08x\n", | ||
703 | le32_to_cpu(*(ring->fence_drv.cpu_addr + 4))); | ||
704 | /* Both preemption and reset occurred */ | ||
705 | seq_printf(m, "Last both 0x%08x\n", | ||
706 | le32_to_cpu(*(ring->fence_drv.cpu_addr + 6))); | ||
644 | } | 707 | } |
645 | return 0; | 708 | return 0; |
646 | } | 709 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8e6bb3..fb72edc4c026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
@@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
212 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 212 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | |
213 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | | 213 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | |
214 | AMDGPU_GEM_CREATE_VRAM_CLEARED | | 214 | AMDGPU_GEM_CREATE_VRAM_CLEARED | |
215 | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) | 215 | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | |
216 | AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) | ||
217 | |||
216 | return -EINVAL; | 218 | return -EINVAL; |
217 | 219 | ||
218 | /* reject invalid gem domains */ | 220 | /* reject invalid gem domains */ |
@@ -577,11 +579,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, | |||
577 | args->operation); | 579 | args->operation); |
578 | return -EINVAL; | 580 | return -EINVAL; |
579 | } | 581 | } |
580 | if ((args->operation == AMDGPU_VA_OP_MAP) || | ||
581 | (args->operation == AMDGPU_VA_OP_REPLACE)) { | ||
582 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
583 | return -ENODEV; | ||
584 | } | ||
585 | 582 | ||
586 | INIT_LIST_HEAD(&list); | 583 | INIT_LIST_HEAD(&list); |
587 | INIT_LIST_HEAD(&duplicates); | 584 | INIT_LIST_HEAD(&duplicates); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 83435ccbad44..ef043361009f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | |||
@@ -201,7 +201,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, | |||
201 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | 201 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
202 | int r = 0; | 202 | int r = 0; |
203 | 203 | ||
204 | mutex_init(&kiq->ring_mutex); | 204 | spin_lock_init(&kiq->ring_lock); |
205 | 205 | ||
206 | r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); | 206 | r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); |
207 | if (r) | 207 | if (r) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 0d15eb7d31d7..33535d347734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | |||
@@ -169,7 +169,8 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, | |||
169 | int r; | 169 | int r; |
170 | 170 | ||
171 | spin_lock(&mgr->lock); | 171 | spin_lock(&mgr->lock); |
172 | if (atomic64_read(&mgr->available) < mem->num_pages) { | 172 | if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) && |
173 | atomic64_read(&mgr->available) < mem->num_pages) { | ||
173 | spin_unlock(&mgr->lock); | 174 | spin_unlock(&mgr->lock); |
174 | return 0; | 175 | return 0; |
175 | } | 176 | } |
@@ -244,8 +245,9 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, | |||
244 | uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) | 245 | uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) |
245 | { | 246 | { |
246 | struct amdgpu_gtt_mgr *mgr = man->priv; | 247 | struct amdgpu_gtt_mgr *mgr = man->priv; |
248 | s64 result = man->size - atomic64_read(&mgr->available); | ||
247 | 249 | ||
248 | return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; | 250 | return (result > 0 ? result : 0) * PAGE_SIZE; |
249 | } | 251 | } |
250 | 252 | ||
251 | /** | 253 | /** |
@@ -265,7 +267,7 @@ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, | |||
265 | drm_mm_print(&mgr->mm, printer); | 267 | drm_mm_print(&mgr->mm, printer); |
266 | spin_unlock(&mgr->lock); | 268 | spin_unlock(&mgr->lock); |
267 | 269 | ||
268 | drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", | 270 | drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n", |
269 | man->size, (u64)atomic64_read(&mgr->available), | 271 | man->size, (u64)atomic64_read(&mgr->available), |
270 | amdgpu_gtt_mgr_usage(man) >> 20); | 272 | amdgpu_gtt_mgr_usage(man) >> 20); |
271 | } | 273 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4510627ae83e..0cfc68db575b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |||
@@ -65,6 +65,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, | |||
65 | amdgpu_sync_create(&(*job)->sync); | 65 | amdgpu_sync_create(&(*job)->sync); |
66 | amdgpu_sync_create(&(*job)->dep_sync); | 66 | amdgpu_sync_create(&(*job)->dep_sync); |
67 | amdgpu_sync_create(&(*job)->sched_sync); | 67 | amdgpu_sync_create(&(*job)->sched_sync); |
68 | (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); | ||
68 | 69 | ||
69 | return 0; | 70 | return 0; |
70 | } | 71 | } |
@@ -103,6 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) | |||
103 | { | 104 | { |
104 | struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); | 105 | struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); |
105 | 106 | ||
107 | amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); | ||
106 | dma_fence_put(job->fence); | 108 | dma_fence_put(job->fence); |
107 | amdgpu_sync_free(&job->sync); | 109 | amdgpu_sync_free(&job->sync); |
108 | amdgpu_sync_free(&job->dep_sync); | 110 | amdgpu_sync_free(&job->dep_sync); |
@@ -139,6 +141,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, | |||
139 | job->fence_ctx = entity->fence_context; | 141 | job->fence_ctx = entity->fence_context; |
140 | *f = dma_fence_get(&job->base.s_fence->finished); | 142 | *f = dma_fence_get(&job->base.s_fence->finished); |
141 | amdgpu_job_free_resources(job); | 143 | amdgpu_job_free_resources(job); |
144 | amdgpu_ring_priority_get(job->ring, | ||
145 | amd_sched_get_job_priority(&job->base)); | ||
142 | amd_sched_entity_push_job(&job->base); | 146 | amd_sched_entity_push_job(&job->base); |
143 | 147 | ||
144 | return 0; | 148 | return 0; |
@@ -177,8 +181,8 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) | |||
177 | static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) | 181 | static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) |
178 | { | 182 | { |
179 | struct dma_fence *fence = NULL; | 183 | struct dma_fence *fence = NULL; |
184 | struct amdgpu_device *adev; | ||
180 | struct amdgpu_job *job; | 185 | struct amdgpu_job *job; |
181 | struct amdgpu_fpriv *fpriv = NULL; | ||
182 | int r; | 186 | int r; |
183 | 187 | ||
184 | if (!sched_job) { | 188 | if (!sched_job) { |
@@ -186,23 +190,25 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) | |||
186 | return NULL; | 190 | return NULL; |
187 | } | 191 | } |
188 | job = to_amdgpu_job(sched_job); | 192 | job = to_amdgpu_job(sched_job); |
193 | adev = job->adev; | ||
189 | 194 | ||
190 | BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); | 195 | BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); |
191 | 196 | ||
192 | trace_amdgpu_sched_run_job(job); | 197 | trace_amdgpu_sched_run_job(job); |
193 | if (job->vm) | ||
194 | fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); | ||
195 | /* skip ib schedule when vram is lost */ | 198 | /* skip ib schedule when vram is lost */ |
196 | if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) | 199 | if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { |
200 | dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); | ||
197 | DRM_ERROR("Skip scheduling IBs!\n"); | 201 | DRM_ERROR("Skip scheduling IBs!\n"); |
198 | else { | 202 | } else { |
199 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); | 203 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, |
204 | &fence); | ||
200 | if (r) | 205 | if (r) |
201 | DRM_ERROR("Error scheduling IBs (%d)\n", r); | 206 | DRM_ERROR("Error scheduling IBs (%d)\n", r); |
202 | } | 207 | } |
203 | /* if gpu reset, hw fence will be replaced here */ | 208 | /* if gpu reset, hw fence will be replaced here */ |
204 | dma_fence_put(job->fence); | 209 | dma_fence_put(job->fence); |
205 | job->fence = dma_fence_get(fence); | 210 | job->fence = dma_fence_get(fence); |
211 | |||
206 | amdgpu_job_free_resources(job); | 212 | amdgpu_job_free_resources(job); |
207 | return fence; | 213 | return fence; |
208 | } | 214 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 51841259e23f..6f0b26dae3b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <drm/drmP.h> | 28 | #include <drm/drmP.h> |
29 | #include "amdgpu.h" | 29 | #include "amdgpu.h" |
30 | #include <drm/amdgpu_drm.h> | 30 | #include <drm/amdgpu_drm.h> |
31 | #include "amdgpu_sched.h" | ||
31 | #include "amdgpu_uvd.h" | 32 | #include "amdgpu_uvd.h" |
32 | #include "amdgpu_vce.h" | 33 | #include "amdgpu_vce.h" |
33 | 34 | ||
@@ -269,7 +270,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, | |||
269 | static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | 270 | static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) |
270 | { | 271 | { |
271 | struct amdgpu_device *adev = dev->dev_private; | 272 | struct amdgpu_device *adev = dev->dev_private; |
272 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
273 | struct drm_amdgpu_info *info = data; | 273 | struct drm_amdgpu_info *info = data; |
274 | struct amdgpu_mode_info *minfo = &adev->mode_info; | 274 | struct amdgpu_mode_info *minfo = &adev->mode_info; |
275 | void __user *out = (void __user *)(uintptr_t)info->return_pointer; | 275 | void __user *out = (void __user *)(uintptr_t)info->return_pointer; |
@@ -282,8 +282,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
282 | 282 | ||
283 | if (!info->return_size || !info->return_pointer) | 283 | if (!info->return_size || !info->return_pointer) |
284 | return -EINVAL; | 284 | return -EINVAL; |
285 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
286 | return -ENODEV; | ||
287 | 285 | ||
288 | switch (info->query) { | 286 | switch (info->query) { |
289 | case AMDGPU_INFO_ACCEL_WORKING: | 287 | case AMDGPU_INFO_ACCEL_WORKING: |
@@ -765,6 +763,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
765 | } | 763 | } |
766 | return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; | 764 | return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; |
767 | } | 765 | } |
766 | case AMDGPU_INFO_VRAM_LOST_COUNTER: | ||
767 | ui32 = atomic_read(&adev->vram_lost_counter); | ||
768 | return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; | ||
768 | default: | 769 | default: |
769 | DRM_DEBUG_KMS("Invalid request %d\n", info->query); | 770 | DRM_DEBUG_KMS("Invalid request %d\n", info->query); |
770 | return -EINVAL; | 771 | return -EINVAL; |
@@ -791,12 +792,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) | |||
791 | vga_switcheroo_process_delayed_switch(); | 792 | vga_switcheroo_process_delayed_switch(); |
792 | } | 793 | } |
793 | 794 | ||
794 | bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, | ||
795 | struct amdgpu_fpriv *fpriv) | ||
796 | { | ||
797 | return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); | ||
798 | } | ||
799 | |||
800 | /** | 795 | /** |
801 | * amdgpu_driver_open_kms - drm callback for open | 796 | * amdgpu_driver_open_kms - drm callback for open |
802 | * | 797 | * |
@@ -853,7 +848,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) | |||
853 | 848 | ||
854 | amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); | 849 | amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); |
855 | 850 | ||
856 | fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); | ||
857 | file_priv->driver_priv = fpriv; | 851 | file_priv->driver_priv = fpriv; |
858 | 852 | ||
859 | out_suspend: | 853 | out_suspend: |
@@ -1023,6 +1017,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { | |||
1023 | DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1017 | DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
1024 | DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1018 | DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
1025 | DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1019 | DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
1020 | DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), | ||
1026 | DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1021 | DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
1027 | DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), | 1022 | DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), |
1028 | /* KMS */ | 1023 | /* KMS */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6982baeccd14..8b4ed8a98a18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | |||
@@ -40,9 +40,7 @@ | |||
40 | static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) | 40 | static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) |
41 | { | 41 | { |
42 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); | 42 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); |
43 | struct amdgpu_bo *bo; | 43 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); |
44 | |||
45 | bo = container_of(tbo, struct amdgpu_bo, tbo); | ||
46 | 44 | ||
47 | amdgpu_bo_kunmap(bo); | 45 | amdgpu_bo_kunmap(bo); |
48 | 46 | ||
@@ -884,7 +882,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, | |||
884 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) | 882 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) |
885 | return; | 883 | return; |
886 | 884 | ||
887 | abo = container_of(bo, struct amdgpu_bo, tbo); | 885 | abo = ttm_to_amdgpu_bo(bo); |
888 | amdgpu_vm_bo_invalidate(adev, abo, evict); | 886 | amdgpu_vm_bo_invalidate(adev, abo, evict); |
889 | 887 | ||
890 | amdgpu_bo_kunmap(abo); | 888 | amdgpu_bo_kunmap(abo); |
@@ -911,7 +909,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) | |||
911 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) | 909 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) |
912 | return 0; | 910 | return 0; |
913 | 911 | ||
914 | abo = container_of(bo, struct amdgpu_bo, tbo); | 912 | abo = ttm_to_amdgpu_bo(bo); |
915 | 913 | ||
916 | /* Remember that this BO was accessed by the CPU */ | 914 | /* Remember that this BO was accessed by the CPU */ |
917 | abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | 915 | abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 39b6bf6fb051..428aae048f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
@@ -94,6 +94,11 @@ struct amdgpu_bo { | |||
94 | }; | 94 | }; |
95 | }; | 95 | }; |
96 | 96 | ||
97 | static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) | ||
98 | { | ||
99 | return container_of(tbo, struct amdgpu_bo, tbo); | ||
100 | } | ||
101 | |||
97 | /** | 102 | /** |
98 | * amdgpu_mem_type_to_domain - return domain corresponding to mem_type | 103 | * amdgpu_mem_type_to_domain - return domain corresponding to mem_type |
99 | * @mem_type: ttm memory type | 104 | * @mem_type: ttm memory type |
@@ -188,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) | |||
188 | } | 193 | } |
189 | } | 194 | } |
190 | 195 | ||
196 | /** | ||
197 | * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced | ||
198 | */ | ||
199 | static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) | ||
200 | { | ||
201 | return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; | ||
202 | } | ||
203 | |||
191 | int amdgpu_bo_create(struct amdgpu_device *adev, | 204 | int amdgpu_bo_create(struct amdgpu_device *adev, |
192 | unsigned long size, int byte_align, | 205 | unsigned long size, int byte_align, |
193 | bool kernel, u32 domain, u64 flags, | 206 | bool kernel, u32 domain, u64 flags, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 3b42f407971d..5f5aa5fddc16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | |||
@@ -145,6 +145,8 @@ static int amdgpu_pp_hw_init(void *handle) | |||
145 | int ret = 0; | 145 | int ret = 0; |
146 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 146 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
147 | 147 | ||
148 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) | ||
149 | amdgpu_ucode_init_bo(adev); | ||
148 | 150 | ||
149 | if (adev->powerplay.ip_funcs->hw_init) | 151 | if (adev->powerplay.ip_funcs->hw_init) |
150 | ret = adev->powerplay.ip_funcs->hw_init( | 152 | ret = adev->powerplay.ip_funcs->hw_init( |
@@ -162,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle) | |||
162 | ret = adev->powerplay.ip_funcs->hw_fini( | 164 | ret = adev->powerplay.ip_funcs->hw_fini( |
163 | adev->powerplay.pp_handle); | 165 | adev->powerplay.pp_handle); |
164 | 166 | ||
167 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) | ||
168 | amdgpu_ucode_fini_bo(adev); | ||
169 | |||
165 | return ret; | 170 | return ret; |
166 | } | 171 | } |
167 | 172 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index f1035a689d35..447d446b5015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | |||
@@ -411,6 +411,13 @@ static int psp_hw_init(void *handle) | |||
411 | return 0; | 411 | return 0; |
412 | 412 | ||
413 | mutex_lock(&adev->firmware.mutex); | 413 | mutex_lock(&adev->firmware.mutex); |
414 | /* | ||
415 | * This sequence is just used on hw_init only once, no need on | ||
416 | * resume. | ||
417 | */ | ||
418 | ret = amdgpu_ucode_init_bo(adev); | ||
419 | if (ret) | ||
420 | goto failed; | ||
414 | 421 | ||
415 | ret = psp_load_fw(adev); | 422 | ret = psp_load_fw(adev); |
416 | if (ret) { | 423 | if (ret) { |
@@ -435,6 +442,8 @@ static int psp_hw_fini(void *handle) | |||
435 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) | 442 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) |
436 | return 0; | 443 | return 0; |
437 | 444 | ||
445 | amdgpu_ucode_fini_bo(adev); | ||
446 | |||
438 | psp_ring_destroy(psp, PSP_RING_TYPE__KM); | 447 | psp_ring_destroy(psp, PSP_RING_TYPE__KM); |
439 | 448 | ||
440 | amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); | 449 | amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 019932a7ea3a..e5ece1fae149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |||
@@ -155,6 +155,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) | |||
155 | } | 155 | } |
156 | 156 | ||
157 | /** | 157 | /** |
158 | * amdgpu_ring_priority_put - restore a ring's priority | ||
159 | * | ||
160 | * @ring: amdgpu_ring structure holding the information | ||
161 | * @priority: target priority | ||
162 | * | ||
163 | * Release a request for executing at @priority | ||
164 | */ | ||
165 | void amdgpu_ring_priority_put(struct amdgpu_ring *ring, | ||
166 | enum amd_sched_priority priority) | ||
167 | { | ||
168 | int i; | ||
169 | |||
170 | if (!ring->funcs->set_priority) | ||
171 | return; | ||
172 | |||
173 | if (atomic_dec_return(&ring->num_jobs[priority]) > 0) | ||
174 | return; | ||
175 | |||
176 | /* no need to restore if the job is already at the lowest priority */ | ||
177 | if (priority == AMD_SCHED_PRIORITY_NORMAL) | ||
178 | return; | ||
179 | |||
180 | mutex_lock(&ring->priority_mutex); | ||
181 | /* something higher prio is executing, no need to decay */ | ||
182 | if (ring->priority > priority) | ||
183 | goto out_unlock; | ||
184 | |||
185 | /* decay priority to the next level with a job available */ | ||
186 | for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { | ||
187 | if (i == AMD_SCHED_PRIORITY_NORMAL | ||
188 | || atomic_read(&ring->num_jobs[i])) { | ||
189 | ring->priority = i; | ||
190 | ring->funcs->set_priority(ring, i); | ||
191 | break; | ||
192 | } | ||
193 | } | ||
194 | |||
195 | out_unlock: | ||
196 | mutex_unlock(&ring->priority_mutex); | ||
197 | } | ||
198 | |||
199 | /** | ||
200 | * amdgpu_ring_priority_get - change the ring's priority | ||
201 | * | ||
202 | * @ring: amdgpu_ring structure holding the information | ||
203 | * @priority: target priority | ||
204 | * | ||
205 | * Request a ring's priority to be raised to @priority (refcounted). | ||
206 | */ | ||
207 | void amdgpu_ring_priority_get(struct amdgpu_ring *ring, | ||
208 | enum amd_sched_priority priority) | ||
209 | { | ||
210 | if (!ring->funcs->set_priority) | ||
211 | return; | ||
212 | |||
213 | atomic_inc(&ring->num_jobs[priority]); | ||
214 | |||
215 | mutex_lock(&ring->priority_mutex); | ||
216 | if (priority <= ring->priority) | ||
217 | goto out_unlock; | ||
218 | |||
219 | ring->priority = priority; | ||
220 | ring->funcs->set_priority(ring, priority); | ||
221 | |||
222 | out_unlock: | ||
223 | mutex_unlock(&ring->priority_mutex); | ||
224 | } | ||
225 | |||
226 | /** | ||
158 | * amdgpu_ring_init - init driver ring struct. | 227 | * amdgpu_ring_init - init driver ring struct. |
159 | * | 228 | * |
160 | * @adev: amdgpu_device pointer | 229 | * @adev: amdgpu_device pointer |
@@ -169,7 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
169 | unsigned max_dw, struct amdgpu_irq_src *irq_src, | 238 | unsigned max_dw, struct amdgpu_irq_src *irq_src, |
170 | unsigned irq_type) | 239 | unsigned irq_type) |
171 | { | 240 | { |
172 | int r; | 241 | int r, i; |
173 | int sched_hw_submission = amdgpu_sched_hw_submission; | 242 | int sched_hw_submission = amdgpu_sched_hw_submission; |
174 | 243 | ||
175 | /* Set the hw submission limit higher for KIQ because | 244 | /* Set the hw submission limit higher for KIQ because |
@@ -247,9 +316,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
247 | } | 316 | } |
248 | 317 | ||
249 | ring->max_dw = max_dw; | 318 | ring->max_dw = max_dw; |
319 | ring->priority = AMD_SCHED_PRIORITY_NORMAL; | ||
320 | mutex_init(&ring->priority_mutex); | ||
250 | INIT_LIST_HEAD(&ring->lru_list); | 321 | INIT_LIST_HEAD(&ring->lru_list); |
251 | amdgpu_ring_lru_touch(adev, ring); | 322 | amdgpu_ring_lru_touch(adev, ring); |
252 | 323 | ||
324 | for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i) | ||
325 | atomic_set(&ring->num_jobs[i], 0); | ||
326 | |||
253 | if (amdgpu_debugfs_ring_init(adev, ring)) { | 327 | if (amdgpu_debugfs_ring_init(adev, ring)) { |
254 | DRM_ERROR("Failed to register debugfs file for rings !\n"); | 328 | DRM_ERROR("Failed to register debugfs file for rings !\n"); |
255 | } | 329 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 491bd5512dcc..b18c2b96691f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #ifndef __AMDGPU_RING_H__ | 24 | #ifndef __AMDGPU_RING_H__ |
25 | #define __AMDGPU_RING_H__ | 25 | #define __AMDGPU_RING_H__ |
26 | 26 | ||
27 | #include <drm/amdgpu_drm.h> | ||
27 | #include "gpu_scheduler.h" | 28 | #include "gpu_scheduler.h" |
28 | 29 | ||
29 | /* max number of rings */ | 30 | /* max number of rings */ |
@@ -56,6 +57,7 @@ struct amdgpu_device; | |||
56 | struct amdgpu_ring; | 57 | struct amdgpu_ring; |
57 | struct amdgpu_ib; | 58 | struct amdgpu_ib; |
58 | struct amdgpu_cs_parser; | 59 | struct amdgpu_cs_parser; |
60 | struct amdgpu_job; | ||
59 | 61 | ||
60 | /* | 62 | /* |
61 | * Fences. | 63 | * Fences. |
@@ -88,8 +90,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
88 | void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); | 90 | void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); |
89 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev); | 91 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev); |
90 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); | 92 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); |
93 | int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); | ||
91 | void amdgpu_fence_process(struct amdgpu_ring *ring); | 94 | void amdgpu_fence_process(struct amdgpu_ring *ring); |
92 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); | 95 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); |
96 | signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, | ||
97 | uint32_t wait_seq, | ||
98 | signed long timeout); | ||
93 | unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); | 99 | unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); |
94 | 100 | ||
95 | /* | 101 | /* |
@@ -147,6 +153,9 @@ struct amdgpu_ring_funcs { | |||
147 | void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); | 153 | void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); |
148 | void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); | 154 | void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); |
149 | void (*emit_tmz)(struct amdgpu_ring *ring, bool start); | 155 | void (*emit_tmz)(struct amdgpu_ring *ring, bool start); |
156 | /* priority functions */ | ||
157 | void (*set_priority) (struct amdgpu_ring *ring, | ||
158 | enum amd_sched_priority priority); | ||
150 | }; | 159 | }; |
151 | 160 | ||
152 | struct amdgpu_ring { | 161 | struct amdgpu_ring { |
@@ -187,6 +196,12 @@ struct amdgpu_ring { | |||
187 | volatile u32 *cond_exe_cpu_addr; | 196 | volatile u32 *cond_exe_cpu_addr; |
188 | unsigned vm_inv_eng; | 197 | unsigned vm_inv_eng; |
189 | bool has_compute_vm_bug; | 198 | bool has_compute_vm_bug; |
199 | |||
200 | atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX]; | ||
201 | struct mutex priority_mutex; | ||
202 | /* protected by priority_mutex */ | ||
203 | int priority; | ||
204 | |||
190 | #if defined(CONFIG_DEBUG_FS) | 205 | #if defined(CONFIG_DEBUG_FS) |
191 | struct dentry *ent; | 206 | struct dentry *ent; |
192 | #endif | 207 | #endif |
@@ -197,6 +212,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); | |||
197 | void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); | 212 | void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); |
198 | void amdgpu_ring_commit(struct amdgpu_ring *ring); | 213 | void amdgpu_ring_commit(struct amdgpu_ring *ring); |
199 | void amdgpu_ring_undo(struct amdgpu_ring *ring); | 214 | void amdgpu_ring_undo(struct amdgpu_ring *ring); |
215 | void amdgpu_ring_priority_get(struct amdgpu_ring *ring, | ||
216 | enum amd_sched_priority priority); | ||
217 | void amdgpu_ring_priority_put(struct amdgpu_ring *ring, | ||
218 | enum amd_sched_priority priority); | ||
200 | int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | 219 | int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, |
201 | unsigned ring_size, struct amdgpu_irq_src *irq_src, | 220 | unsigned ring_size, struct amdgpu_irq_src *irq_src, |
202 | unsigned irq_type); | 221 | unsigned irq_type); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000000000000..290cc3f9c433 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | |||
@@ -0,0 +1,109 @@ | |||
1 | /* | ||
2 | * Copyright 2017 Valve Corporation | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Authors: Andres Rodriguez <andresx7@gmail.com> | ||
23 | */ | ||
24 | |||
25 | #include <linux/fdtable.h> | ||
26 | #include <linux/pid.h> | ||
27 | #include <drm/amdgpu_drm.h> | ||
28 | #include "amdgpu.h" | ||
29 | |||
30 | #include "amdgpu_vm.h" | ||
31 | |||
32 | enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) | ||
33 | { | ||
34 | switch (amdgpu_priority) { | ||
35 | case AMDGPU_CTX_PRIORITY_VERY_HIGH: | ||
36 | return AMD_SCHED_PRIORITY_HIGH_HW; | ||
37 | case AMDGPU_CTX_PRIORITY_HIGH: | ||
38 | return AMD_SCHED_PRIORITY_HIGH_SW; | ||
39 | case AMDGPU_CTX_PRIORITY_NORMAL: | ||
40 | return AMD_SCHED_PRIORITY_NORMAL; | ||
41 | case AMDGPU_CTX_PRIORITY_LOW: | ||
42 | case AMDGPU_CTX_PRIORITY_VERY_LOW: | ||
43 | return AMD_SCHED_PRIORITY_LOW; | ||
44 | case AMDGPU_CTX_PRIORITY_UNSET: | ||
45 | return AMD_SCHED_PRIORITY_UNSET; | ||
46 | default: | ||
47 | WARN(1, "Invalid context priority %d\n", amdgpu_priority); | ||
48 | return AMD_SCHED_PRIORITY_INVALID; | ||
49 | } | ||
50 | } | ||
51 | |||
52 | static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, | ||
53 | int fd, | ||
54 | enum amd_sched_priority priority) | ||
55 | { | ||
56 | struct file *filp = fcheck(fd); | ||
57 | struct drm_file *file; | ||
58 | struct pid *pid; | ||
59 | struct amdgpu_fpriv *fpriv; | ||
60 | struct amdgpu_ctx *ctx; | ||
61 | uint32_t id; | ||
62 | |||
63 | if (!filp) | ||
64 | return -EINVAL; | ||
65 | |||
66 | pid = get_pid(((struct drm_file *)filp->private_data)->pid); | ||
67 | |||
68 | mutex_lock(&adev->ddev->filelist_mutex); | ||
69 | list_for_each_entry(file, &adev->ddev->filelist, lhead) { | ||
70 | if (file->pid != pid) | ||
71 | continue; | ||
72 | |||
73 | fpriv = file->driver_priv; | ||
74 | idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) | ||
75 | amdgpu_ctx_priority_override(ctx, priority); | ||
76 | } | ||
77 | mutex_unlock(&adev->ddev->filelist_mutex); | ||
78 | |||
79 | put_pid(pid); | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | int amdgpu_sched_ioctl(struct drm_device *dev, void *data, | ||
85 | struct drm_file *filp) | ||
86 | { | ||
87 | union drm_amdgpu_sched *args = data; | ||
88 | struct amdgpu_device *adev = dev->dev_private; | ||
89 | enum amd_sched_priority priority; | ||
90 | int r; | ||
91 | |||
92 | priority = amdgpu_to_sched_priority(args->in.priority); | ||
93 | if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID) | ||
94 | return -EINVAL; | ||
95 | |||
96 | switch (args->in.op) { | ||
97 | case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE: | ||
98 | r = amdgpu_sched_process_priority_override(adev, | ||
99 | args->in.fd, | ||
100 | priority); | ||
101 | break; | ||
102 | default: | ||
103 | DRM_ERROR("Invalid sched op specified: %d\n", args->in.op); | ||
104 | r = -EINVAL; | ||
105 | break; | ||
106 | } | ||
107 | |||
108 | return r; | ||
109 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h new file mode 100644 index 000000000000..b28c067d3822 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * Copyright 2017 Valve Corporation | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Authors: Andres Rodriguez <andresx7@gmail.com> | ||
23 | */ | ||
24 | |||
25 | #ifndef __AMDGPU_SCHED_H__ | ||
26 | #define __AMDGPU_SCHED_H__ | ||
27 | |||
28 | #include <drm/drmP.h> | ||
29 | |||
30 | enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); | ||
31 | int amdgpu_sched_ioctl(struct drm_device *dev, void *data, | ||
32 | struct drm_file *filp); | ||
33 | |||
34 | #endif // __AMDGPU_SCHED_H__ | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44312f9..a4bf21f8f1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, | |||
169 | * | 169 | * |
170 | * @sync: sync object to add fences from reservation object to | 170 | * @sync: sync object to add fences from reservation object to |
171 | * @resv: reservation object with embedded fence | 171 | * @resv: reservation object with embedded fence |
172 | * @shared: true if we should only sync to the exclusive fence | 172 | * @explicit_sync: true if we should only sync to the exclusive fence |
173 | * | 173 | * |
174 | * Sync to the fence | 174 | * Sync to the fence |
175 | */ | 175 | */ |
176 | int amdgpu_sync_resv(struct amdgpu_device *adev, | 176 | int amdgpu_sync_resv(struct amdgpu_device *adev, |
177 | struct amdgpu_sync *sync, | 177 | struct amdgpu_sync *sync, |
178 | struct reservation_object *resv, | 178 | struct reservation_object *resv, |
179 | void *owner) | 179 | void *owner, bool explicit_sync) |
180 | { | 180 | { |
181 | struct reservation_object_list *flist; | 181 | struct reservation_object_list *flist; |
182 | struct dma_fence *f; | 182 | struct dma_fence *f; |
@@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
191 | f = reservation_object_get_excl(resv); | 191 | f = reservation_object_get_excl(resv); |
192 | r = amdgpu_sync_fence(adev, sync, f); | 192 | r = amdgpu_sync_fence(adev, sync, f); |
193 | 193 | ||
194 | if (explicit_sync) | ||
195 | return r; | ||
196 | |||
194 | flist = reservation_object_get_list(resv); | 197 | flist = reservation_object_get_list(resv); |
195 | if (!flist || r) | 198 | if (!flist || r) |
196 | return r; | 199 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc7687993317..70d7e3a279a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | |||
@@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, | |||
45 | int amdgpu_sync_resv(struct amdgpu_device *adev, | 45 | int amdgpu_sync_resv(struct amdgpu_device *adev, |
46 | struct amdgpu_sync *sync, | 46 | struct amdgpu_sync *sync, |
47 | struct reservation_object *resv, | 47 | struct reservation_object *resv, |
48 | void *owner); | 48 | void *owner, |
49 | bool explicit_sync); | ||
49 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, | 50 | struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
50 | struct amdgpu_ring *ring); | 51 | struct amdgpu_ring *ring); |
51 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); | 52 | struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 15a28578d458..51eacefadea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/debugfs.h> | 44 | #include <linux/debugfs.h> |
45 | #include <linux/iommu.h> | 45 | #include <linux/iommu.h> |
46 | #include "amdgpu.h" | 46 | #include "amdgpu.h" |
47 | #include "amdgpu_object.h" | ||
47 | #include "amdgpu_trace.h" | 48 | #include "amdgpu_trace.h" |
48 | #include "bif/bif_4_1_d.h" | 49 | #include "bif/bif_4_1_d.h" |
49 | 50 | ||
@@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, | |||
209 | placement->num_busy_placement = 1; | 210 | placement->num_busy_placement = 1; |
210 | return; | 211 | return; |
211 | } | 212 | } |
212 | abo = container_of(bo, struct amdgpu_bo, tbo); | 213 | abo = ttm_to_amdgpu_bo(bo); |
213 | switch (bo->mem.mem_type) { | 214 | switch (bo->mem.mem_type) { |
214 | case TTM_PL_VRAM: | 215 | case TTM_PL_VRAM: |
215 | if (adev->mman.buffer_funcs && | 216 | if (adev->mman.buffer_funcs && |
@@ -257,7 +258,7 @@ gtt: | |||
257 | 258 | ||
258 | static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) | 259 | static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) |
259 | { | 260 | { |
260 | struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); | 261 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); |
261 | 262 | ||
262 | if (amdgpu_ttm_tt_get_usermm(bo->ttm)) | 263 | if (amdgpu_ttm_tt_get_usermm(bo->ttm)) |
263 | return -EPERM; | 264 | return -EPERM; |
@@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, | |||
289 | return addr; | 290 | return addr; |
290 | } | 291 | } |
291 | 292 | ||
292 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, | 293 | /** |
293 | bool evict, bool no_wait_gpu, | 294 | * amdgpu_find_mm_node - Helper function finds the drm_mm_node |
294 | struct ttm_mem_reg *new_mem, | 295 | * corresponding to @offset. It also modifies the offset to be |
295 | struct ttm_mem_reg *old_mem) | 296 | * within the drm_mm_node returned |
297 | */ | ||
298 | static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, | ||
299 | unsigned long *offset) | ||
296 | { | 300 | { |
297 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); | 301 | struct drm_mm_node *mm_node = mem->mm_node; |
298 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | ||
299 | 302 | ||
300 | struct drm_mm_node *old_mm, *new_mm; | 303 | while (*offset >= (mm_node->size << PAGE_SHIFT)) { |
301 | uint64_t old_start, old_size, new_start, new_size; | 304 | *offset -= (mm_node->size << PAGE_SHIFT); |
302 | unsigned long num_pages; | 305 | ++mm_node; |
303 | struct dma_fence *fence = NULL; | 306 | } |
304 | int r; | 307 | return mm_node; |
308 | } | ||
305 | 309 | ||
306 | BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); | 310 | /** |
311 | * amdgpu_copy_ttm_mem_to_mem - Helper function for copy | ||
312 | * | ||
313 | * The function copies @size bytes from {src->mem + src->offset} to | ||
314 | * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a | ||
315 | * move and different for a BO to BO copy. | ||
316 | * | ||
317 | * @f: Returns the last fence if multiple jobs are submitted. | ||
318 | */ | ||
319 | int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, | ||
320 | struct amdgpu_copy_mem *src, | ||
321 | struct amdgpu_copy_mem *dst, | ||
322 | uint64_t size, | ||
323 | struct reservation_object *resv, | ||
324 | struct dma_fence **f) | ||
325 | { | ||
326 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | ||
327 | struct drm_mm_node *src_mm, *dst_mm; | ||
328 | uint64_t src_node_start, dst_node_start, src_node_size, | ||
329 | dst_node_size, src_page_offset, dst_page_offset; | ||
330 | struct dma_fence *fence = NULL; | ||
331 | int r = 0; | ||
332 | const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * | ||
333 | AMDGPU_GPU_PAGE_SIZE); | ||
307 | 334 | ||
308 | if (!ring->ready) { | 335 | if (!ring->ready) { |
309 | DRM_ERROR("Trying to move memory with ring turned off.\n"); | 336 | DRM_ERROR("Trying to move memory with ring turned off.\n"); |
310 | return -EINVAL; | 337 | return -EINVAL; |
311 | } | 338 | } |
312 | 339 | ||
313 | old_mm = old_mem->mm_node; | 340 | src_mm = amdgpu_find_mm_node(src->mem, &src->offset); |
314 | old_size = old_mm->size; | 341 | src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + |
315 | old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); | 342 | src->offset; |
343 | src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; | ||
344 | src_page_offset = src_node_start & (PAGE_SIZE - 1); | ||
316 | 345 | ||
317 | new_mm = new_mem->mm_node; | 346 | dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); |
318 | new_size = new_mm->size; | 347 | dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + |
319 | new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); | 348 | dst->offset; |
349 | dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; | ||
350 | dst_page_offset = dst_node_start & (PAGE_SIZE - 1); | ||
320 | 351 | ||
321 | num_pages = new_mem->num_pages; | ||
322 | mutex_lock(&adev->mman.gtt_window_lock); | 352 | mutex_lock(&adev->mman.gtt_window_lock); |
323 | while (num_pages) { | 353 | |
324 | unsigned long cur_pages = min(min(old_size, new_size), | 354 | while (size) { |
325 | (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); | 355 | unsigned long cur_size; |
326 | uint64_t from = old_start, to = new_start; | 356 | uint64_t from = src_node_start, to = dst_node_start; |
327 | struct dma_fence *next; | 357 | struct dma_fence *next; |
328 | 358 | ||
329 | if (old_mem->mem_type == TTM_PL_TT && | 359 | /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst |
330 | !amdgpu_gtt_mgr_is_allocated(old_mem)) { | 360 | * begins at an offset, then adjust the size accordingly |
331 | r = amdgpu_map_buffer(bo, old_mem, cur_pages, | 361 | */ |
332 | old_start, 0, ring, &from); | 362 | cur_size = min3(min(src_node_size, dst_node_size), size, |
363 | GTT_MAX_BYTES); | ||
364 | if (cur_size + src_page_offset > GTT_MAX_BYTES || | ||
365 | cur_size + dst_page_offset > GTT_MAX_BYTES) | ||
366 | cur_size -= max(src_page_offset, dst_page_offset); | ||
367 | |||
368 | /* Map only what needs to be accessed. Map src to window 0 and | ||
369 | * dst to window 1 | ||
370 | */ | ||
371 | if (src->mem->mem_type == TTM_PL_TT && | ||
372 | !amdgpu_gtt_mgr_is_allocated(src->mem)) { | ||
373 | r = amdgpu_map_buffer(src->bo, src->mem, | ||
374 | PFN_UP(cur_size + src_page_offset), | ||
375 | src_node_start, 0, ring, | ||
376 | &from); | ||
333 | if (r) | 377 | if (r) |
334 | goto error; | 378 | goto error; |
379 | /* Adjust the offset because amdgpu_map_buffer returns | ||
380 | * start of mapped page | ||
381 | */ | ||
382 | from += src_page_offset; | ||
335 | } | 383 | } |
336 | 384 | ||
337 | if (new_mem->mem_type == TTM_PL_TT && | 385 | if (dst->mem->mem_type == TTM_PL_TT && |
338 | !amdgpu_gtt_mgr_is_allocated(new_mem)) { | 386 | !amdgpu_gtt_mgr_is_allocated(dst->mem)) { |
339 | r = amdgpu_map_buffer(bo, new_mem, cur_pages, | 387 | r = amdgpu_map_buffer(dst->bo, dst->mem, |
340 | new_start, 1, ring, &to); | 388 | PFN_UP(cur_size + dst_page_offset), |
389 | dst_node_start, 1, ring, | ||
390 | &to); | ||
341 | if (r) | 391 | if (r) |
342 | goto error; | 392 | goto error; |
393 | to += dst_page_offset; | ||
343 | } | 394 | } |
344 | 395 | ||
345 | r = amdgpu_copy_buffer(ring, from, to, | 396 | r = amdgpu_copy_buffer(ring, from, to, cur_size, |
346 | cur_pages * PAGE_SIZE, | 397 | resv, &next, false, true); |
347 | bo->resv, &next, false, true); | ||
348 | if (r) | 398 | if (r) |
349 | goto error; | 399 | goto error; |
350 | 400 | ||
351 | dma_fence_put(fence); | 401 | dma_fence_put(fence); |
352 | fence = next; | 402 | fence = next; |
353 | 403 | ||
354 | num_pages -= cur_pages; | 404 | size -= cur_size; |
355 | if (!num_pages) | 405 | if (!size) |
356 | break; | 406 | break; |
357 | 407 | ||
358 | old_size -= cur_pages; | 408 | src_node_size -= cur_size; |
359 | if (!old_size) { | 409 | if (!src_node_size) { |
360 | old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); | 410 | src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, |
361 | old_size = old_mm->size; | 411 | src->mem); |
412 | src_node_size = (src_mm->size << PAGE_SHIFT); | ||
362 | } else { | 413 | } else { |
363 | old_start += cur_pages * PAGE_SIZE; | 414 | src_node_start += cur_size; |
415 | src_page_offset = src_node_start & (PAGE_SIZE - 1); | ||
364 | } | 416 | } |
365 | 417 | dst_node_size -= cur_size; | |
366 | new_size -= cur_pages; | 418 | if (!dst_node_size) { |
367 | if (!new_size) { | 419 | dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, |
368 | new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); | 420 | dst->mem); |
369 | new_size = new_mm->size; | 421 | dst_node_size = (dst_mm->size << PAGE_SHIFT); |
370 | } else { | 422 | } else { |
371 | new_start += cur_pages * PAGE_SIZE; | 423 | dst_node_start += cur_size; |
424 | dst_page_offset = dst_node_start & (PAGE_SIZE - 1); | ||
372 | } | 425 | } |
373 | } | 426 | } |
427 | error: | ||
374 | mutex_unlock(&adev->mman.gtt_window_lock); | 428 | mutex_unlock(&adev->mman.gtt_window_lock); |
429 | if (f) | ||
430 | *f = dma_fence_get(fence); | ||
431 | dma_fence_put(fence); | ||
432 | return r; | ||
433 | } | ||
434 | |||
435 | |||
436 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, | ||
437 | bool evict, bool no_wait_gpu, | ||
438 | struct ttm_mem_reg *new_mem, | ||
439 | struct ttm_mem_reg *old_mem) | ||
440 | { | ||
441 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); | ||
442 | struct amdgpu_copy_mem src, dst; | ||
443 | struct dma_fence *fence = NULL; | ||
444 | int r; | ||
445 | |||
446 | src.bo = bo; | ||
447 | dst.bo = bo; | ||
448 | src.mem = old_mem; | ||
449 | dst.mem = new_mem; | ||
450 | src.offset = 0; | ||
451 | dst.offset = 0; | ||
452 | |||
453 | r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, | ||
454 | new_mem->num_pages << PAGE_SHIFT, | ||
455 | bo->resv, &fence); | ||
456 | if (r) | ||
457 | goto error; | ||
375 | 458 | ||
376 | r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); | 459 | r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); |
377 | dma_fence_put(fence); | 460 | dma_fence_put(fence); |
378 | return r; | 461 | return r; |
379 | 462 | ||
380 | error: | 463 | error: |
381 | mutex_unlock(&adev->mman.gtt_window_lock); | ||
382 | |||
383 | if (fence) | 464 | if (fence) |
384 | dma_fence_wait(fence, false); | 465 | dma_fence_wait(fence, false); |
385 | dma_fence_put(fence); | 466 | dma_fence_put(fence); |
@@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, | |||
484 | int r; | 565 | int r; |
485 | 566 | ||
486 | /* Can't move a pinned BO */ | 567 | /* Can't move a pinned BO */ |
487 | abo = container_of(bo, struct amdgpu_bo, tbo); | 568 | abo = ttm_to_amdgpu_bo(bo); |
488 | if (WARN_ON_ONCE(abo->pin_count > 0)) | 569 | if (WARN_ON_ONCE(abo->pin_count > 0)) |
489 | return -EINVAL; | 570 | return -EINVAL; |
490 | 571 | ||
@@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re | |||
582 | static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, | 663 | static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, |
583 | unsigned long page_offset) | 664 | unsigned long page_offset) |
584 | { | 665 | { |
585 | struct drm_mm_node *mm = bo->mem.mm_node; | 666 | struct drm_mm_node *mm; |
586 | uint64_t size = mm->size; | 667 | unsigned long offset = (page_offset << PAGE_SHIFT); |
587 | uint64_t offset = page_offset; | ||
588 | 668 | ||
589 | page_offset = do_div(offset, size); | 669 | mm = amdgpu_find_mm_node(&bo->mem, &offset); |
590 | mm += offset; | 670 | return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + |
591 | return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset; | 671 | (offset >> PAGE_SHIFT); |
592 | } | 672 | } |
593 | 673 | ||
594 | /* | 674 | /* |
@@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, | |||
1142 | unsigned long offset, | 1222 | unsigned long offset, |
1143 | void *buf, int len, int write) | 1223 | void *buf, int len, int write) |
1144 | { | 1224 | { |
1145 | struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); | 1225 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); |
1146 | struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); | 1226 | struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); |
1147 | struct drm_mm_node *nodes = abo->tbo.mem.mm_node; | 1227 | struct drm_mm_node *nodes; |
1148 | uint32_t value = 0; | 1228 | uint32_t value = 0; |
1149 | int ret = 0; | 1229 | int ret = 0; |
1150 | uint64_t pos; | 1230 | uint64_t pos; |
@@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, | |||
1153 | if (bo->mem.mem_type != TTM_PL_VRAM) | 1233 | if (bo->mem.mem_type != TTM_PL_VRAM) |
1154 | return -EIO; | 1234 | return -EIO; |
1155 | 1235 | ||
1156 | while (offset >= (nodes->size << PAGE_SHIFT)) { | 1236 | nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); |
1157 | offset -= nodes->size << PAGE_SHIFT; | ||
1158 | ++nodes; | ||
1159 | } | ||
1160 | pos = (nodes->start << PAGE_SHIFT) + offset; | 1237 | pos = (nodes->start << PAGE_SHIFT) + offset; |
1161 | 1238 | ||
1162 | while (len && pos < adev->mc.mc_vram_size) { | 1239 | while (len && pos < adev->mc.mc_vram_size) { |
@@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
1255 | /* Change the size here instead of the init above so only lpfn is affected */ | 1332 | /* Change the size here instead of the init above so only lpfn is affected */ |
1256 | amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); | 1333 | amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); |
1257 | 1334 | ||
1335 | /* | ||
1336 | *The reserved vram for firmware must be pinned to the specified | ||
1337 | *place on the VRAM, so reserve it early. | ||
1338 | */ | ||
1339 | r = amdgpu_fw_reserve_vram_init(adev); | ||
1340 | if (r) { | ||
1341 | return r; | ||
1342 | } | ||
1343 | |||
1258 | r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, | 1344 | r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, |
1259 | AMDGPU_GEM_DOMAIN_VRAM, | 1345 | AMDGPU_GEM_DOMAIN_VRAM, |
1260 | &adev->stolen_vga_memory, | 1346 | &adev->stolen_vga_memory, |
@@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, | |||
1479 | job->vm_needs_flush = vm_needs_flush; | 1565 | job->vm_needs_flush = vm_needs_flush; |
1480 | if (resv) { | 1566 | if (resv) { |
1481 | r = amdgpu_sync_resv(adev, &job->sync, resv, | 1567 | r = amdgpu_sync_resv(adev, &job->sync, resv, |
1482 | AMDGPU_FENCE_OWNER_UNDEFINED); | 1568 | AMDGPU_FENCE_OWNER_UNDEFINED, |
1569 | false); | ||
1483 | if (r) { | 1570 | if (r) { |
1484 | DRM_ERROR("sync failed (%d).\n", r); | 1571 | DRM_ERROR("sync failed (%d).\n", r); |
1485 | goto error_free; | 1572 | goto error_free; |
@@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, | |||
1571 | 1658 | ||
1572 | if (resv) { | 1659 | if (resv) { |
1573 | r = amdgpu_sync_resv(adev, &job->sync, resv, | 1660 | r = amdgpu_sync_resv(adev, &job->sync, resv, |
1574 | AMDGPU_FENCE_OWNER_UNDEFINED); | 1661 | AMDGPU_FENCE_OWNER_UNDEFINED, false); |
1575 | if (r) { | 1662 | if (r) { |
1576 | DRM_ERROR("sync failed (%d).\n", r); | 1663 | DRM_ERROR("sync failed (%d).\n", r); |
1577 | goto error_free; | 1664 | goto error_free; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 7abae6867339..abd4084982a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | |||
@@ -58,6 +58,12 @@ struct amdgpu_mman { | |||
58 | struct amd_sched_entity entity; | 58 | struct amd_sched_entity entity; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | struct amdgpu_copy_mem { | ||
62 | struct ttm_buffer_object *bo; | ||
63 | struct ttm_mem_reg *mem; | ||
64 | unsigned long offset; | ||
65 | }; | ||
66 | |||
61 | extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; | 67 | extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; |
62 | extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; | 68 | extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; |
63 | 69 | ||
@@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, | |||
72 | struct reservation_object *resv, | 78 | struct reservation_object *resv, |
73 | struct dma_fence **fence, bool direct_submit, | 79 | struct dma_fence **fence, bool direct_submit, |
74 | bool vm_needs_flush); | 80 | bool vm_needs_flush); |
81 | int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, | ||
82 | struct amdgpu_copy_mem *src, | ||
83 | struct amdgpu_copy_mem *dst, | ||
84 | uint64_t size, | ||
85 | struct reservation_object *resv, | ||
86 | struct dma_fence **f); | ||
75 | int amdgpu_fill_buffer(struct amdgpu_bo *bo, | 87 | int amdgpu_fill_buffer(struct amdgpu_bo *bo, |
76 | uint64_t src_data, | 88 | uint64_t src_data, |
77 | struct reservation_object *resv, | 89 | struct reservation_object *resv, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ab05121b9272..e97f80f86005 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | |||
@@ -22,7 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
25 | #define MAX_KIQ_REG_WAIT 100000 | 25 | #define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ |
26 | 26 | ||
27 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev) | 27 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev) |
28 | { | 28 | { |
@@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) | |||
114 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) | 114 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) |
115 | { | 115 | { |
116 | signed long r; | 116 | signed long r; |
117 | uint32_t val; | 117 | uint32_t val, seq; |
118 | struct dma_fence *f; | ||
119 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | 118 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
120 | struct amdgpu_ring *ring = &kiq->ring; | 119 | struct amdgpu_ring *ring = &kiq->ring; |
121 | 120 | ||
122 | BUG_ON(!ring->funcs->emit_rreg); | 121 | BUG_ON(!ring->funcs->emit_rreg); |
123 | 122 | ||
124 | mutex_lock(&kiq->ring_mutex); | 123 | spin_lock(&kiq->ring_lock); |
125 | amdgpu_ring_alloc(ring, 32); | 124 | amdgpu_ring_alloc(ring, 32); |
126 | amdgpu_ring_emit_rreg(ring, reg); | 125 | amdgpu_ring_emit_rreg(ring, reg); |
127 | amdgpu_fence_emit(ring, &f); | 126 | amdgpu_fence_emit_polling(ring, &seq); |
128 | amdgpu_ring_commit(ring); | 127 | amdgpu_ring_commit(ring); |
129 | mutex_unlock(&kiq->ring_mutex); | 128 | spin_unlock(&kiq->ring_lock); |
130 | 129 | ||
131 | r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); | 130 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
132 | dma_fence_put(f); | ||
133 | if (r < 1) { | 131 | if (r < 1) { |
134 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | 132 | DRM_ERROR("wait for kiq fence error: %ld\n", r); |
135 | return ~0; | 133 | return ~0; |
136 | } | 134 | } |
137 | |||
138 | val = adev->wb.wb[adev->virt.reg_val_offs]; | 135 | val = adev->wb.wb[adev->virt.reg_val_offs]; |
139 | 136 | ||
140 | return val; | 137 | return val; |
@@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) | |||
143 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) | 140 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) |
144 | { | 141 | { |
145 | signed long r; | 142 | signed long r; |
146 | struct dma_fence *f; | 143 | uint32_t seq; |
147 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | 144 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
148 | struct amdgpu_ring *ring = &kiq->ring; | 145 | struct amdgpu_ring *ring = &kiq->ring; |
149 | 146 | ||
150 | BUG_ON(!ring->funcs->emit_wreg); | 147 | BUG_ON(!ring->funcs->emit_wreg); |
151 | 148 | ||
152 | mutex_lock(&kiq->ring_mutex); | 149 | spin_lock(&kiq->ring_lock); |
153 | amdgpu_ring_alloc(ring, 32); | 150 | amdgpu_ring_alloc(ring, 32); |
154 | amdgpu_ring_emit_wreg(ring, reg, v); | 151 | amdgpu_ring_emit_wreg(ring, reg, v); |
155 | amdgpu_fence_emit(ring, &f); | 152 | amdgpu_fence_emit_polling(ring, &seq); |
156 | amdgpu_ring_commit(ring); | 153 | amdgpu_ring_commit(ring); |
157 | mutex_unlock(&kiq->ring_mutex); | 154 | spin_unlock(&kiq->ring_lock); |
158 | 155 | ||
159 | r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); | 156 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
160 | if (r < 1) | 157 | if (r < 1) |
161 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | 158 | DRM_ERROR("wait for kiq fence error: %ld\n", r); |
162 | dma_fence_put(f); | ||
163 | } | 159 | } |
164 | 160 | ||
165 | /** | 161 | /** |
@@ -274,3 +270,78 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) | |||
274 | (void *)&adev->virt.mm_table.cpu_addr); | 270 | (void *)&adev->virt.mm_table.cpu_addr); |
275 | adev->virt.mm_table.gpu_addr = 0; | 271 | adev->virt.mm_table.gpu_addr = 0; |
276 | } | 272 | } |
273 | |||
274 | |||
275 | int amdgpu_virt_fw_reserve_get_checksum(void *obj, | ||
276 | unsigned long obj_size, | ||
277 | unsigned int key, | ||
278 | unsigned int chksum) | ||
279 | { | ||
280 | unsigned int ret = key; | ||
281 | unsigned long i = 0; | ||
282 | unsigned char *pos; | ||
283 | |||
284 | pos = (char *)obj; | ||
285 | /* calculate checksum */ | ||
286 | for (i = 0; i < obj_size; ++i) | ||
287 | ret += *(pos + i); | ||
288 | /* minus the chksum itself */ | ||
289 | pos = (char *)&chksum; | ||
290 | for (i = 0; i < sizeof(chksum); ++i) | ||
291 | ret -= *(pos + i); | ||
292 | return ret; | ||
293 | } | ||
294 | |||
295 | void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) | ||
296 | { | ||
297 | uint32_t pf2vf_ver = 0; | ||
298 | uint32_t pf2vf_size = 0; | ||
299 | uint32_t checksum = 0; | ||
300 | uint32_t checkval; | ||
301 | char *str; | ||
302 | |||
303 | adev->virt.fw_reserve.p_pf2vf = NULL; | ||
304 | adev->virt.fw_reserve.p_vf2pf = NULL; | ||
305 | |||
306 | if (adev->fw_vram_usage.va != NULL) { | ||
307 | adev->virt.fw_reserve.p_pf2vf = | ||
308 | (struct amdgim_pf2vf_info_header *)( | ||
309 | adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); | ||
310 | pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version; | ||
311 | AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); | ||
312 | AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); | ||
313 | |||
314 | /* pf2vf message must be in 4K */ | ||
315 | if (pf2vf_size > 0 && pf2vf_size < 4096) { | ||
316 | checkval = amdgpu_virt_fw_reserve_get_checksum( | ||
317 | adev->virt.fw_reserve.p_pf2vf, pf2vf_size, | ||
318 | adev->virt.fw_reserve.checksum_key, checksum); | ||
319 | if (checkval == checksum) { | ||
320 | adev->virt.fw_reserve.p_vf2pf = | ||
321 | ((void *)adev->virt.fw_reserve.p_pf2vf + | ||
322 | pf2vf_size); | ||
323 | memset((void *)adev->virt.fw_reserve.p_vf2pf, 0, | ||
324 | sizeof(amdgim_vf2pf_info)); | ||
325 | AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version, | ||
326 | AMDGPU_FW_VRAM_VF2PF_VER); | ||
327 | AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size, | ||
328 | sizeof(amdgim_vf2pf_info)); | ||
329 | AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version, | ||
330 | &str); | ||
331 | if (THIS_MODULE->version != NULL) | ||
332 | strcpy(str, THIS_MODULE->version); | ||
333 | else | ||
334 | strcpy(str, "N/A"); | ||
335 | AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert, | ||
336 | 0); | ||
337 | AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum, | ||
338 | amdgpu_virt_fw_reserve_get_checksum( | ||
339 | adev->virt.fw_reserve.p_vf2pf, | ||
340 | pf2vf_size, | ||
341 | adev->virt.fw_reserve.checksum_key, 0)); | ||
342 | } | ||
343 | } | ||
344 | } | ||
345 | } | ||
346 | |||
347 | |||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e5fd0ff6b29d..b89d37fc406f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | |||
@@ -58,6 +58,179 @@ struct amdgpu_virt_ops { | |||
58 | void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); | 58 | void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); |
59 | }; | 59 | }; |
60 | 60 | ||
61 | /* | ||
62 | * Firmware Reserve Frame buffer | ||
63 | */ | ||
64 | struct amdgpu_virt_fw_reserve { | ||
65 | struct amdgim_pf2vf_info_header *p_pf2vf; | ||
66 | struct amdgim_vf2pf_info_header *p_vf2pf; | ||
67 | unsigned int checksum_key; | ||
68 | }; | ||
69 | /* | ||
70 | * Defination between PF and VF | ||
71 | * Structures forcibly aligned to 4 to keep the same style as PF. | ||
72 | */ | ||
73 | #define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024) | ||
74 | |||
75 | #define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \ | ||
76 | (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2)) | ||
77 | |||
78 | enum AMDGIM_FEATURE_FLAG { | ||
79 | /* GIM supports feature of Error log collecting */ | ||
80 | AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1, | ||
81 | /* GIM supports feature of loading uCodes */ | ||
82 | AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, | ||
83 | }; | ||
84 | |||
85 | struct amdgim_pf2vf_info_header { | ||
86 | /* the total structure size in byte. */ | ||
87 | uint32_t size; | ||
88 | /* version of this structure, written by the GIM */ | ||
89 | uint32_t version; | ||
90 | } __aligned(4); | ||
91 | struct amdgim_pf2vf_info_v1 { | ||
92 | /* header contains size and version */ | ||
93 | struct amdgim_pf2vf_info_header header; | ||
94 | /* max_width * max_height */ | ||
95 | unsigned int uvd_enc_max_pixels_count; | ||
96 | /* 16x16 pixels/sec, codec independent */ | ||
97 | unsigned int uvd_enc_max_bandwidth; | ||
98 | /* max_width * max_height */ | ||
99 | unsigned int vce_enc_max_pixels_count; | ||
100 | /* 16x16 pixels/sec, codec independent */ | ||
101 | unsigned int vce_enc_max_bandwidth; | ||
102 | /* MEC FW position in kb from the start of visible frame buffer */ | ||
103 | unsigned int mecfw_kboffset; | ||
104 | /* The features flags of the GIM driver supports. */ | ||
105 | unsigned int feature_flags; | ||
106 | /* use private key from mailbox 2 to create chueksum */ | ||
107 | unsigned int checksum; | ||
108 | } __aligned(4); | ||
109 | |||
110 | struct amdgim_pf2vf_info_v2 { | ||
111 | /* header contains size and version */ | ||
112 | struct amdgim_pf2vf_info_header header; | ||
113 | /* use private key from mailbox 2 to create chueksum */ | ||
114 | uint32_t checksum; | ||
115 | /* The features flags of the GIM driver supports. */ | ||
116 | uint32_t feature_flags; | ||
117 | /* max_width * max_height */ | ||
118 | uint32_t uvd_enc_max_pixels_count; | ||
119 | /* 16x16 pixels/sec, codec independent */ | ||
120 | uint32_t uvd_enc_max_bandwidth; | ||
121 | /* max_width * max_height */ | ||
122 | uint32_t vce_enc_max_pixels_count; | ||
123 | /* 16x16 pixels/sec, codec independent */ | ||
124 | uint32_t vce_enc_max_bandwidth; | ||
125 | /* MEC FW position in kb from the start of VF visible frame buffer */ | ||
126 | uint64_t mecfw_kboffset; | ||
127 | /* MEC FW size in KB */ | ||
128 | uint32_t mecfw_ksize; | ||
129 | /* UVD FW position in kb from the start of VF visible frame buffer */ | ||
130 | uint64_t uvdfw_kboffset; | ||
131 | /* UVD FW size in KB */ | ||
132 | uint32_t uvdfw_ksize; | ||
133 | /* VCE FW position in kb from the start of VF visible frame buffer */ | ||
134 | uint64_t vcefw_kboffset; | ||
135 | /* VCE FW size in KB */ | ||
136 | uint32_t vcefw_ksize; | ||
137 | uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)]; | ||
138 | } __aligned(4); | ||
139 | |||
140 | |||
141 | struct amdgim_vf2pf_info_header { | ||
142 | /* the total structure size in byte. */ | ||
143 | uint32_t size; | ||
144 | /*version of this structure, written by the guest */ | ||
145 | uint32_t version; | ||
146 | } __aligned(4); | ||
147 | |||
148 | struct amdgim_vf2pf_info_v1 { | ||
149 | /* header contains size and version */ | ||
150 | struct amdgim_vf2pf_info_header header; | ||
151 | /* driver version */ | ||
152 | char driver_version[64]; | ||
153 | /* driver certification, 1=WHQL, 0=None */ | ||
154 | unsigned int driver_cert; | ||
155 | /* guest OS type and version: need a define */ | ||
156 | unsigned int os_info; | ||
157 | /* in the unit of 1M */ | ||
158 | unsigned int fb_usage; | ||
159 | /* guest gfx engine usage percentage */ | ||
160 | unsigned int gfx_usage; | ||
161 | /* guest gfx engine health percentage */ | ||
162 | unsigned int gfx_health; | ||
163 | /* guest compute engine usage percentage */ | ||
164 | unsigned int compute_usage; | ||
165 | /* guest compute engine health percentage */ | ||
166 | unsigned int compute_health; | ||
167 | /* guest vce engine usage percentage. 0xffff means N/A. */ | ||
168 | unsigned int vce_enc_usage; | ||
169 | /* guest vce engine health percentage. 0xffff means N/A. */ | ||
170 | unsigned int vce_enc_health; | ||
171 | /* guest uvd engine usage percentage. 0xffff means N/A. */ | ||
172 | unsigned int uvd_enc_usage; | ||
173 | /* guest uvd engine usage percentage. 0xffff means N/A. */ | ||
174 | unsigned int uvd_enc_health; | ||
175 | unsigned int checksum; | ||
176 | } __aligned(4); | ||
177 | |||
178 | struct amdgim_vf2pf_info_v2 { | ||
179 | /* header contains size and version */ | ||
180 | struct amdgim_vf2pf_info_header header; | ||
181 | uint32_t checksum; | ||
182 | /* driver version */ | ||
183 | uint8_t driver_version[64]; | ||
184 | /* driver certification, 1=WHQL, 0=None */ | ||
185 | uint32_t driver_cert; | ||
186 | /* guest OS type and version: need a define */ | ||
187 | uint32_t os_info; | ||
188 | /* in the unit of 1M */ | ||
189 | uint32_t fb_usage; | ||
190 | /* guest gfx engine usage percentage */ | ||
191 | uint32_t gfx_usage; | ||
192 | /* guest gfx engine health percentage */ | ||
193 | uint32_t gfx_health; | ||
194 | /* guest compute engine usage percentage */ | ||
195 | uint32_t compute_usage; | ||
196 | /* guest compute engine health percentage */ | ||
197 | uint32_t compute_health; | ||
198 | /* guest vce engine usage percentage. 0xffff means N/A. */ | ||
199 | uint32_t vce_enc_usage; | ||
200 | /* guest vce engine health percentage. 0xffff means N/A. */ | ||
201 | uint32_t vce_enc_health; | ||
202 | /* guest uvd engine usage percentage. 0xffff means N/A. */ | ||
203 | uint32_t uvd_enc_usage; | ||
204 | /* guest uvd engine usage percentage. 0xffff means N/A. */ | ||
205 | uint32_t uvd_enc_health; | ||
206 | uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)]; | ||
207 | } __aligned(4); | ||
208 | |||
209 | #define AMDGPU_FW_VRAM_VF2PF_VER 2 | ||
210 | typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; | ||
211 | |||
212 | #define AMDGPU_FW_VRAM_VF2PF_WRITE(adev, field, val) \ | ||
213 | do { \ | ||
214 | ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field = (val); \ | ||
215 | } while (0) | ||
216 | |||
217 | #define AMDGPU_FW_VRAM_VF2PF_READ(adev, field, val) \ | ||
218 | do { \ | ||
219 | (*val) = ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field; \ | ||
220 | } while (0) | ||
221 | |||
222 | #define AMDGPU_FW_VRAM_PF2VF_READ(adev, field, val) \ | ||
223 | do { \ | ||
224 | if (!adev->virt.fw_reserve.p_pf2vf) \ | ||
225 | *(val) = 0; \ | ||
226 | else { \ | ||
227 | if (adev->virt.fw_reserve.p_pf2vf->version == 1) \ | ||
228 | *(val) = ((struct amdgim_pf2vf_info_v1 *)adev->virt.fw_reserve.p_pf2vf)->field; \ | ||
229 | if (adev->virt.fw_reserve.p_pf2vf->version == 2) \ | ||
230 | *(val) = ((struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf)->field; \ | ||
231 | } \ | ||
232 | } while (0) | ||
233 | |||
61 | /* GPU virtualization */ | 234 | /* GPU virtualization */ |
62 | struct amdgpu_virt { | 235 | struct amdgpu_virt { |
63 | uint32_t caps; | 236 | uint32_t caps; |
@@ -72,6 +245,7 @@ struct amdgpu_virt { | |||
72 | struct amdgpu_mm_table mm_table; | 245 | struct amdgpu_mm_table mm_table; |
73 | const struct amdgpu_virt_ops *ops; | 246 | const struct amdgpu_virt_ops *ops; |
74 | struct amdgpu_vf_error_buffer vf_errors; | 247 | struct amdgpu_vf_error_buffer vf_errors; |
248 | struct amdgpu_virt_fw_reserve fw_reserve; | ||
75 | }; | 249 | }; |
76 | 250 | ||
77 | #define AMDGPU_CSA_SIZE (8 * 1024) | 251 | #define AMDGPU_CSA_SIZE (8 * 1024) |
@@ -114,5 +288,9 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); | |||
114 | int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); | 288 | int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); |
115 | int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); | 289 | int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); |
116 | void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); | 290 | void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); |
291 | int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, | ||
292 | unsigned int key, | ||
293 | unsigned int chksum); | ||
294 | void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); | ||
117 | 295 | ||
118 | #endif | 296 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fee0a32ac56f..010d14195a5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
328 | AMDGPU_GEM_CREATE_SHADOW); | 328 | AMDGPU_GEM_CREATE_SHADOW); |
329 | 329 | ||
330 | if (vm->pte_support_ats) { | 330 | if (vm->pte_support_ats) { |
331 | init_value = AMDGPU_PTE_SYSTEM; | 331 | init_value = AMDGPU_PTE_DEFAULT_ATC; |
332 | if (level != adev->vm_manager.num_level - 1) | 332 | if (level != adev->vm_manager.num_level - 1) |
333 | init_value |= AMDGPU_PDE_PTE; | 333 | init_value |= AMDGPU_PDE_PTE; |
334 | |||
334 | } | 335 | } |
335 | 336 | ||
336 | /* walk over the address space and allocate the page tables */ | 337 | /* walk over the address space and allocate the page tables */ |
@@ -1034,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
1034 | int r; | 1035 | int r; |
1035 | 1036 | ||
1036 | amdgpu_sync_create(&sync); | 1037 | amdgpu_sync_create(&sync); |
1037 | amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); | 1038 | amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); |
1038 | r = amdgpu_sync_wait(&sync, true); | 1039 | r = amdgpu_sync_wait(&sync, true); |
1039 | amdgpu_sync_free(&sync); | 1040 | amdgpu_sync_free(&sync); |
1040 | 1041 | ||
@@ -1175,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1175 | amdgpu_ring_pad_ib(ring, params.ib); | 1176 | amdgpu_ring_pad_ib(ring, params.ib); |
1176 | amdgpu_sync_resv(adev, &job->sync, | 1177 | amdgpu_sync_resv(adev, &job->sync, |
1177 | parent->base.bo->tbo.resv, | 1178 | parent->base.bo->tbo.resv, |
1178 | AMDGPU_FENCE_OWNER_VM); | 1179 | AMDGPU_FENCE_OWNER_VM, false); |
1179 | if (shadow) | 1180 | if (shadow) |
1180 | amdgpu_sync_resv(adev, &job->sync, | 1181 | amdgpu_sync_resv(adev, &job->sync, |
1181 | shadow->tbo.resv, | 1182 | shadow->tbo.resv, |
1182 | AMDGPU_FENCE_OWNER_VM); | 1183 | AMDGPU_FENCE_OWNER_VM, false); |
1183 | 1184 | ||
1184 | WARN_ON(params.ib->length_dw > ndw); | 1185 | WARN_ON(params.ib->length_dw > ndw); |
1185 | r = amdgpu_job_submit(job, ring, &vm->entity, | 1186 | r = amdgpu_job_submit(job, ring, &vm->entity, |
@@ -1643,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1643 | goto error_free; | 1644 | goto error_free; |
1644 | 1645 | ||
1645 | r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, | 1646 | r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, |
1646 | owner); | 1647 | owner, false); |
1647 | if (r) | 1648 | if (r) |
1648 | goto error_free; | 1649 | goto error_free; |
1649 | 1650 | ||
@@ -1698,6 +1699,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1698 | struct drm_mm_node *nodes, | 1699 | struct drm_mm_node *nodes, |
1699 | struct dma_fence **fence) | 1700 | struct dma_fence **fence) |
1700 | { | 1701 | { |
1702 | unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; | ||
1701 | uint64_t pfn, start = mapping->start; | 1703 | uint64_t pfn, start = mapping->start; |
1702 | int r; | 1704 | int r; |
1703 | 1705 | ||
@@ -1732,6 +1734,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1732 | } | 1734 | } |
1733 | 1735 | ||
1734 | do { | 1736 | do { |
1737 | dma_addr_t *dma_addr = NULL; | ||
1735 | uint64_t max_entries; | 1738 | uint64_t max_entries; |
1736 | uint64_t addr, last; | 1739 | uint64_t addr, last; |
1737 | 1740 | ||
@@ -1745,15 +1748,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1745 | } | 1748 | } |
1746 | 1749 | ||
1747 | if (pages_addr) { | 1750 | if (pages_addr) { |
1751 | uint64_t count; | ||
1752 | |||
1748 | max_entries = min(max_entries, 16ull * 1024ull); | 1753 | max_entries = min(max_entries, 16ull * 1024ull); |
1749 | addr = 0; | 1754 | for (count = 1; count < max_entries; ++count) { |
1755 | uint64_t idx = pfn + count; | ||
1756 | |||
1757 | if (pages_addr[idx] != | ||
1758 | (pages_addr[idx - 1] + PAGE_SIZE)) | ||
1759 | break; | ||
1760 | } | ||
1761 | |||
1762 | if (count < min_linear_pages) { | ||
1763 | addr = pfn << PAGE_SHIFT; | ||
1764 | dma_addr = pages_addr; | ||
1765 | } else { | ||
1766 | addr = pages_addr[pfn]; | ||
1767 | max_entries = count; | ||
1768 | } | ||
1769 | |||
1750 | } else if (flags & AMDGPU_PTE_VALID) { | 1770 | } else if (flags & AMDGPU_PTE_VALID) { |
1751 | addr += adev->vm_manager.vram_base_offset; | 1771 | addr += adev->vm_manager.vram_base_offset; |
1772 | addr += pfn << PAGE_SHIFT; | ||
1752 | } | 1773 | } |
1753 | addr += pfn << PAGE_SHIFT; | ||
1754 | 1774 | ||
1755 | last = min((uint64_t)mapping->last, start + max_entries - 1); | 1775 | last = min((uint64_t)mapping->last, start + max_entries - 1); |
1756 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, | 1776 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, |
1757 | start, last, flags, addr, | 1777 | start, last, flags, addr, |
1758 | fence); | 1778 | fence); |
1759 | if (r) | 1779 | if (r) |
@@ -2017,7 +2037,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
2017 | list_del(&mapping->list); | 2037 | list_del(&mapping->list); |
2018 | 2038 | ||
2019 | if (vm->pte_support_ats) | 2039 | if (vm->pte_support_ats) |
2020 | init_pte_value = AMDGPU_PTE_SYSTEM; | 2040 | init_pte_value = AMDGPU_PTE_DEFAULT_ATC; |
2021 | 2041 | ||
2022 | r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, | 2042 | r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, |
2023 | mapping->start, mapping->last, | 2043 | mapping->start, mapping->last, |
@@ -2629,7 +2649,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2629 | 2649 | ||
2630 | if (adev->asic_type == CHIP_RAVEN) { | 2650 | if (adev->asic_type == CHIP_RAVEN) { |
2631 | vm->pte_support_ats = true; | 2651 | vm->pte_support_ats = true; |
2632 | init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; | 2652 | init_pde_value = AMDGPU_PTE_DEFAULT_ATC |
2653 | | AMDGPU_PDE_PTE; | ||
2654 | |||
2633 | } | 2655 | } |
2634 | } else | 2656 | } else |
2635 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | 2657 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & |
@@ -2737,8 +2759,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
2737 | { | 2759 | { |
2738 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 2760 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
2739 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; | 2761 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; |
2762 | struct amdgpu_bo *root; | ||
2740 | u64 fault; | 2763 | u64 fault; |
2741 | int i; | 2764 | int i, r; |
2742 | 2765 | ||
2743 | /* Clear pending page faults from IH when the VM is destroyed */ | 2766 | /* Clear pending page faults from IH when the VM is destroyed */ |
2744 | while (kfifo_get(&vm->faults, &fault)) | 2767 | while (kfifo_get(&vm->faults, &fault)) |
@@ -2773,7 +2796,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
2773 | amdgpu_vm_free_mapping(adev, vm, mapping, NULL); | 2796 | amdgpu_vm_free_mapping(adev, vm, mapping, NULL); |
2774 | } | 2797 | } |
2775 | 2798 | ||
2776 | amdgpu_vm_free_levels(&vm->root); | 2799 | root = amdgpu_bo_ref(vm->root.base.bo); |
2800 | r = amdgpu_bo_reserve(root, true); | ||
2801 | if (r) { | ||
2802 | dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); | ||
2803 | } else { | ||
2804 | amdgpu_vm_free_levels(&vm->root); | ||
2805 | amdgpu_bo_unreserve(root); | ||
2806 | } | ||
2807 | amdgpu_bo_unref(&root); | ||
2777 | dma_fence_put(vm->last_update); | 2808 | dma_fence_put(vm->last_update); |
2778 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | 2809 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) |
2779 | amdgpu_vm_free_reserved_vmid(adev, vm, i); | 2810 | amdgpu_vm_free_reserved_vmid(adev, vm, i); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d68f39b4e5e7..aa914256b4bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
@@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry; | |||
73 | #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) | 73 | #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) |
74 | #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) | 74 | #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) |
75 | 75 | ||
76 | /* For Raven */ | ||
77 | #define AMDGPU_MTYPE_CC 2 | ||
78 | |||
79 | #define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ | ||
80 | | AMDGPU_PTE_SNOOPED \ | ||
81 | | AMDGPU_PTE_EXECUTABLE \ | ||
82 | | AMDGPU_PTE_READABLE \ | ||
83 | | AMDGPU_PTE_WRITEABLE \ | ||
84 | | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC)) | ||
85 | |||
76 | /* How to programm VM fault handling */ | 86 | /* How to programm VM fault handling */ |
77 | #define AMDGPU_VM_FAULT_STOP_NEVER 0 | 87 | #define AMDGPU_VM_FAULT_STOP_NEVER 0 |
78 | #define AMDGPU_VM_FAULT_STOP_FIRST 1 | 88 | #define AMDGPU_VM_FAULT_STOP_FIRST 1 |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 147e92b3a959..b8002ac3e536 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -20,6 +20,7 @@ | |||
20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | #include <linux/kernel.h> | ||
23 | #include <linux/firmware.h> | 24 | #include <linux/firmware.h> |
24 | #include <drm/drmP.h> | 25 | #include <drm/drmP.h> |
25 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
@@ -3952,10 +3953,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) | |||
3952 | adev->gfx.rlc.reg_list_format_size_bytes >> 2, | 3953 | adev->gfx.rlc.reg_list_format_size_bytes >> 2, |
3953 | unique_indices, | 3954 | unique_indices, |
3954 | &indices_count, | 3955 | &indices_count, |
3955 | sizeof(unique_indices) / sizeof(int), | 3956 | ARRAY_SIZE(unique_indices), |
3956 | indirect_start_offsets, | 3957 | indirect_start_offsets, |
3957 | &offset_count, | 3958 | &offset_count, |
3958 | sizeof(indirect_start_offsets)/sizeof(int)); | 3959 | ARRAY_SIZE(indirect_start_offsets)); |
3959 | 3960 | ||
3960 | /* save and restore list */ | 3961 | /* save and restore list */ |
3961 | WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); | 3962 | WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); |
@@ -3977,14 +3978,14 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) | |||
3977 | /* starting offsets starts */ | 3978 | /* starting offsets starts */ |
3978 | WREG32(mmRLC_GPM_SCRATCH_ADDR, | 3979 | WREG32(mmRLC_GPM_SCRATCH_ADDR, |
3979 | adev->gfx.rlc.starting_offsets_start); | 3980 | adev->gfx.rlc.starting_offsets_start); |
3980 | for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) | 3981 | for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) |
3981 | WREG32(mmRLC_GPM_SCRATCH_DATA, | 3982 | WREG32(mmRLC_GPM_SCRATCH_DATA, |
3982 | indirect_start_offsets[i]); | 3983 | indirect_start_offsets[i]); |
3983 | 3984 | ||
3984 | /* unique indices */ | 3985 | /* unique indices */ |
3985 | temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; | 3986 | temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; |
3986 | data = mmRLC_SRM_INDEX_CNTL_DATA_0; | 3987 | data = mmRLC_SRM_INDEX_CNTL_DATA_0; |
3987 | for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { | 3988 | for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { |
3988 | if (unique_indices[i] != 0) { | 3989 | if (unique_indices[i] != 0) { |
3989 | WREG32(temp + i, unique_indices[i] & 0x3FFFF); | 3990 | WREG32(temp + i, unique_indices[i] & 0x3FFFF); |
3990 | WREG32(data + i, unique_indices[i] >> 20); | 3991 | WREG32(data + i, unique_indices[i] >> 20); |
@@ -6394,6 +6395,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) | |||
6394 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); | 6395 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); |
6395 | } | 6396 | } |
6396 | 6397 | ||
6398 | static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, | ||
6399 | bool acquire) | ||
6400 | { | ||
6401 | struct amdgpu_device *adev = ring->adev; | ||
6402 | int pipe_num, tmp, reg; | ||
6403 | int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; | ||
6404 | |||
6405 | pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; | ||
6406 | |||
6407 | /* first me only has 2 entries, GFX and HP3D */ | ||
6408 | if (ring->me > 0) | ||
6409 | pipe_num -= 2; | ||
6410 | |||
6411 | reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; | ||
6412 | tmp = RREG32(reg); | ||
6413 | tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); | ||
6414 | WREG32(reg, tmp); | ||
6415 | } | ||
6416 | |||
6417 | static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, | ||
6418 | struct amdgpu_ring *ring, | ||
6419 | bool acquire) | ||
6420 | { | ||
6421 | int i, pipe; | ||
6422 | bool reserve; | ||
6423 | struct amdgpu_ring *iring; | ||
6424 | |||
6425 | mutex_lock(&adev->gfx.pipe_reserve_mutex); | ||
6426 | pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); | ||
6427 | if (acquire) | ||
6428 | set_bit(pipe, adev->gfx.pipe_reserve_bitmap); | ||
6429 | else | ||
6430 | clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); | ||
6431 | |||
6432 | if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { | ||
6433 | /* Clear all reservations - everyone reacquires all resources */ | ||
6434 | for (i = 0; i < adev->gfx.num_gfx_rings; ++i) | ||
6435 | gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], | ||
6436 | true); | ||
6437 | |||
6438 | for (i = 0; i < adev->gfx.num_compute_rings; ++i) | ||
6439 | gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], | ||
6440 | true); | ||
6441 | } else { | ||
6442 | /* Lower all pipes without a current reservation */ | ||
6443 | for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { | ||
6444 | iring = &adev->gfx.gfx_ring[i]; | ||
6445 | pipe = amdgpu_gfx_queue_to_bit(adev, | ||
6446 | iring->me, | ||
6447 | iring->pipe, | ||
6448 | 0); | ||
6449 | reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); | ||
6450 | gfx_v8_0_ring_set_pipe_percent(iring, reserve); | ||
6451 | } | ||
6452 | |||
6453 | for (i = 0; i < adev->gfx.num_compute_rings; ++i) { | ||
6454 | iring = &adev->gfx.compute_ring[i]; | ||
6455 | pipe = amdgpu_gfx_queue_to_bit(adev, | ||
6456 | iring->me, | ||
6457 | iring->pipe, | ||
6458 | 0); | ||
6459 | reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); | ||
6460 | gfx_v8_0_ring_set_pipe_percent(iring, reserve); | ||
6461 | } | ||
6462 | } | ||
6463 | |||
6464 | mutex_unlock(&adev->gfx.pipe_reserve_mutex); | ||
6465 | } | ||
6466 | |||
6467 | static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, | ||
6468 | struct amdgpu_ring *ring, | ||
6469 | bool acquire) | ||
6470 | { | ||
6471 | uint32_t pipe_priority = acquire ? 0x2 : 0x0; | ||
6472 | uint32_t queue_priority = acquire ? 0xf : 0x0; | ||
6473 | |||
6474 | mutex_lock(&adev->srbm_mutex); | ||
6475 | vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); | ||
6476 | |||
6477 | WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); | ||
6478 | WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); | ||
6479 | |||
6480 | vi_srbm_select(adev, 0, 0, 0, 0); | ||
6481 | mutex_unlock(&adev->srbm_mutex); | ||
6482 | } | ||
6483 | static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, | ||
6484 | enum amd_sched_priority priority) | ||
6485 | { | ||
6486 | struct amdgpu_device *adev = ring->adev; | ||
6487 | bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; | ||
6488 | |||
6489 | if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) | ||
6490 | return; | ||
6491 | |||
6492 | gfx_v8_0_hqd_set_priority(adev, ring, acquire); | ||
6493 | gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); | ||
6494 | } | ||
6495 | |||
6397 | static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, | 6496 | static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, |
6398 | u64 addr, u64 seq, | 6497 | u64 addr, u64 seq, |
6399 | unsigned flags) | 6498 | unsigned flags) |
@@ -6839,6 +6938,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { | |||
6839 | .test_ib = gfx_v8_0_ring_test_ib, | 6938 | .test_ib = gfx_v8_0_ring_test_ib, |
6840 | .insert_nop = amdgpu_ring_insert_nop, | 6939 | .insert_nop = amdgpu_ring_insert_nop, |
6841 | .pad_ib = amdgpu_ring_generic_pad_ib, | 6940 | .pad_ib = amdgpu_ring_generic_pad_ib, |
6941 | .set_priority = gfx_v8_0_ring_set_priority_compute, | ||
6842 | }; | 6942 | }; |
6843 | 6943 | ||
6844 | static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { | 6944 | static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 99a5b3b92e8e..7f15bb2c5233 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | |||
@@ -20,6 +20,7 @@ | |||
20 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | #include <linux/kernel.h> | ||
23 | #include <linux/firmware.h> | 24 | #include <linux/firmware.h> |
24 | #include <drm/drmP.h> | 25 | #include <drm/drmP.h> |
25 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
@@ -1730,10 +1731,10 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | |||
1730 | adev->gfx.rlc.reg_list_format_size_bytes >> 2, | 1731 | adev->gfx.rlc.reg_list_format_size_bytes >> 2, |
1731 | unique_indirect_regs, | 1732 | unique_indirect_regs, |
1732 | &unique_indirect_reg_count, | 1733 | &unique_indirect_reg_count, |
1733 | sizeof(unique_indirect_regs)/sizeof(int), | 1734 | ARRAY_SIZE(unique_indirect_regs), |
1734 | indirect_start_offsets, | 1735 | indirect_start_offsets, |
1735 | &indirect_start_offsets_count, | 1736 | &indirect_start_offsets_count, |
1736 | sizeof(indirect_start_offsets)/sizeof(int)); | 1737 | ARRAY_SIZE(indirect_start_offsets)); |
1737 | 1738 | ||
1738 | /* enable auto inc in case it is disabled */ | 1739 | /* enable auto inc in case it is disabled */ |
1739 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); | 1740 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); |
@@ -1770,12 +1771,12 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | |||
1770 | /* write the starting offsets to RLC scratch ram */ | 1771 | /* write the starting offsets to RLC scratch ram */ |
1771 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), | 1772 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), |
1772 | adev->gfx.rlc.starting_offsets_start); | 1773 | adev->gfx.rlc.starting_offsets_start); |
1773 | for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) | 1774 | for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) |
1774 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), | 1775 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), |
1775 | indirect_start_offsets[i]); | 1776 | indirect_start_offsets[i]); |
1776 | 1777 | ||
1777 | /* load unique indirect regs*/ | 1778 | /* load unique indirect regs*/ |
1778 | for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) { | 1779 | for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { |
1779 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, | 1780 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, |
1780 | unique_indirect_regs[i] & 0x3FFFF); | 1781 | unique_indirect_regs[i] & 0x3FFFF); |
1781 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, | 1782 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, |
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2812d88a8bdd..b4906d2f30d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | |||
@@ -183,6 +183,12 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, | |||
183 | pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); | 183 | pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); |
184 | return r; | 184 | return r; |
185 | } | 185 | } |
186 | /* Retrieve checksum from mailbox2 */ | ||
187 | if (req == IDH_REQ_GPU_INIT_ACCESS) { | ||
188 | adev->virt.fw_reserve.checksum_key = | ||
189 | RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, | ||
190 | mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2)); | ||
191 | } | ||
186 | } | 192 | } |
187 | 193 | ||
188 | return 0; | 194 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 1c006ba9d826..3ca9d114f630 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c | |||
@@ -279,10 +279,7 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) | |||
279 | } | 279 | } |
280 | static u32 soc15_get_xclk(struct amdgpu_device *adev) | 280 | static u32 soc15_get_xclk(struct amdgpu_device *adev) |
281 | { | 281 | { |
282 | if (adev->asic_type == CHIP_VEGA10) | 282 | return adev->clock.spll.reference_freq; |
283 | return adev->clock.spll.reference_freq/4; | ||
284 | else | ||
285 | return adev->clock.spll.reference_freq; | ||
286 | } | 283 | } |
287 | 284 | ||
288 | 285 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 60af7310a234..71299c67c517 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | |||
@@ -268,8 +268,9 @@ err: | |||
268 | * | 268 | * |
269 | * Close up a stream for HW test or if userspace failed to do so | 269 | * Close up a stream for HW test or if userspace failed to do so |
270 | */ | 270 | */ |
271 | int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | 271 | static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, |
272 | bool direct, struct dma_fence **fence) | 272 | uint32_t handle, |
273 | bool direct, struct dma_fence **fence) | ||
273 | { | 274 | { |
274 | const unsigned ib_size_dw = 16; | 275 | const unsigned ib_size_dw = 16; |
275 | struct amdgpu_job *job; | 276 | struct amdgpu_job *job; |