diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
95 files changed, 5948 insertions, 1975 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2ca2b5154d52..bfd332c95b61 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile | |||
| @@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ | |||
| 56 | 56 | ||
| 57 | # add asic specific block | 57 | # add asic specific block |
| 58 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ | 58 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ |
| 59 | ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \ | 59 | ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o |
| 60 | amdgpu_amdkfd_gfx_v7.o | ||
| 61 | 60 | ||
| 62 | amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o | 61 | amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o |
| 63 | 62 | ||
| 64 | amdgpu-y += \ | 63 | amdgpu-y += \ |
| 65 | vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o | 64 | vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ |
| 65 | vega20_reg_init.o | ||
| 66 | |||
| 67 | # add DF block | ||
| 68 | amdgpu-y += \ | ||
| 69 | df_v1_7.o \ | ||
| 70 | df_v3_6.o | ||
| 66 | 71 | ||
| 67 | # add GMC block | 72 | # add GMC block |
| 68 | amdgpu-y += \ | 73 | amdgpu-y += \ |
| @@ -126,11 +131,20 @@ amdgpu-y += \ | |||
| 126 | vcn_v1_0.o | 131 | vcn_v1_0.o |
| 127 | 132 | ||
| 128 | # add amdkfd interfaces | 133 | # add amdkfd interfaces |
| 134 | amdgpu-y += amdgpu_amdkfd.o | ||
| 135 | |||
| 136 | ifneq ($(CONFIG_HSA_AMD),) | ||
| 129 | amdgpu-y += \ | 137 | amdgpu-y += \ |
| 130 | amdgpu_amdkfd.o \ | ||
| 131 | amdgpu_amdkfd_fence.o \ | 138 | amdgpu_amdkfd_fence.o \ |
| 132 | amdgpu_amdkfd_gpuvm.o \ | 139 | amdgpu_amdkfd_gpuvm.o \ |
| 133 | amdgpu_amdkfd_gfx_v8.o | 140 | amdgpu_amdkfd_gfx_v8.o \ |
| 141 | amdgpu_amdkfd_gfx_v9.o | ||
| 142 | |||
| 143 | ifneq ($(CONFIG_DRM_AMDGPU_CIK),) | ||
| 144 | amdgpu-y += amdgpu_amdkfd_gfx_v7.o | ||
| 145 | endif | ||
| 146 | |||
| 147 | endif | ||
| 134 | 148 | ||
| 135 | # add cgs | 149 | # add cgs |
| 136 | amdgpu-y += amdgpu_cgs.o | 150 | amdgpu-y += amdgpu_cgs.o |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8b605f3dc05..a59c07590cee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
| @@ -129,6 +129,7 @@ extern int amdgpu_lbpw; | |||
| 129 | extern int amdgpu_compute_multipipe; | 129 | extern int amdgpu_compute_multipipe; |
| 130 | extern int amdgpu_gpu_recovery; | 130 | extern int amdgpu_gpu_recovery; |
| 131 | extern int amdgpu_emu_mode; | 131 | extern int amdgpu_emu_mode; |
| 132 | extern uint amdgpu_smu_memory_pool_size; | ||
| 132 | 133 | ||
| 133 | #ifdef CONFIG_DRM_AMDGPU_SI | 134 | #ifdef CONFIG_DRM_AMDGPU_SI |
| 134 | extern int amdgpu_si_support; | 135 | extern int amdgpu_si_support; |
| @@ -137,6 +138,7 @@ extern int amdgpu_si_support; | |||
| 137 | extern int amdgpu_cik_support; | 138 | extern int amdgpu_cik_support; |
| 138 | #endif | 139 | #endif |
| 139 | 140 | ||
| 141 | #define AMDGPU_SG_THRESHOLD (256*1024*1024) | ||
| 140 | #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ | 142 | #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ |
| 141 | #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 | 143 | #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 |
| 142 | #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ | 144 | #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ |
| @@ -222,10 +224,10 @@ enum amdgpu_kiq_irq { | |||
| 222 | AMDGPU_CP_KIQ_IRQ_LAST | 224 | AMDGPU_CP_KIQ_IRQ_LAST |
| 223 | }; | 225 | }; |
| 224 | 226 | ||
| 225 | int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, | 227 | int amdgpu_device_ip_set_clockgating_state(void *dev, |
| 226 | enum amd_ip_block_type block_type, | 228 | enum amd_ip_block_type block_type, |
| 227 | enum amd_clockgating_state state); | 229 | enum amd_clockgating_state state); |
| 228 | int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, | 230 | int amdgpu_device_ip_set_powergating_state(void *dev, |
| 229 | enum amd_ip_block_type block_type, | 231 | enum amd_ip_block_type block_type, |
| 230 | enum amd_powergating_state state); | 232 | enum amd_powergating_state state); |
| 231 | void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, | 233 | void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, |
| @@ -681,6 +683,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, | |||
| 681 | int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); | 683 | int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); |
| 682 | 684 | ||
| 683 | void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); | 685 | void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); |
| 686 | void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr); | ||
| 687 | void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); | ||
| 684 | void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); | 688 | void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); |
| 685 | 689 | ||
| 686 | 690 | ||
| @@ -771,9 +775,18 @@ struct amdgpu_rlc { | |||
| 771 | u32 starting_offsets_start; | 775 | u32 starting_offsets_start; |
| 772 | u32 reg_list_format_size_bytes; | 776 | u32 reg_list_format_size_bytes; |
| 773 | u32 reg_list_size_bytes; | 777 | u32 reg_list_size_bytes; |
| 778 | u32 reg_list_format_direct_reg_list_length; | ||
| 779 | u32 save_restore_list_cntl_size_bytes; | ||
| 780 | u32 save_restore_list_gpm_size_bytes; | ||
| 781 | u32 save_restore_list_srm_size_bytes; | ||
| 774 | 782 | ||
| 775 | u32 *register_list_format; | 783 | u32 *register_list_format; |
| 776 | u32 *register_restore; | 784 | u32 *register_restore; |
| 785 | u8 *save_restore_list_cntl; | ||
| 786 | u8 *save_restore_list_gpm; | ||
| 787 | u8 *save_restore_list_srm; | ||
| 788 | |||
| 789 | bool is_rlc_v2_1; | ||
| 777 | }; | 790 | }; |
| 778 | 791 | ||
| 779 | #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES | 792 | #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES |
| @@ -867,6 +880,8 @@ struct amdgpu_gfx_config { | |||
| 867 | 880 | ||
| 868 | /* gfx configure feature */ | 881 | /* gfx configure feature */ |
| 869 | uint32_t double_offchip_lds_buf; | 882 | uint32_t double_offchip_lds_buf; |
| 883 | /* cached value of DB_DEBUG2 */ | ||
| 884 | uint32_t db_debug2; | ||
| 870 | }; | 885 | }; |
| 871 | 886 | ||
| 872 | struct amdgpu_cu_info { | 887 | struct amdgpu_cu_info { |
| @@ -938,6 +953,12 @@ struct amdgpu_gfx { | |||
| 938 | uint32_t ce_feature_version; | 953 | uint32_t ce_feature_version; |
| 939 | uint32_t pfp_feature_version; | 954 | uint32_t pfp_feature_version; |
| 940 | uint32_t rlc_feature_version; | 955 | uint32_t rlc_feature_version; |
| 956 | uint32_t rlc_srlc_fw_version; | ||
| 957 | uint32_t rlc_srlc_feature_version; | ||
| 958 | uint32_t rlc_srlg_fw_version; | ||
| 959 | uint32_t rlc_srlg_feature_version; | ||
| 960 | uint32_t rlc_srls_fw_version; | ||
| 961 | uint32_t rlc_srls_feature_version; | ||
| 941 | uint32_t mec_feature_version; | 962 | uint32_t mec_feature_version; |
| 942 | uint32_t mec2_feature_version; | 963 | uint32_t mec2_feature_version; |
| 943 | struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; | 964 | struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; |
| @@ -1204,6 +1225,8 @@ struct amdgpu_asic_funcs { | |||
| 1204 | /* invalidate hdp read cache */ | 1225 | /* invalidate hdp read cache */ |
| 1205 | void (*invalidate_hdp)(struct amdgpu_device *adev, | 1226 | void (*invalidate_hdp)(struct amdgpu_device *adev, |
| 1206 | struct amdgpu_ring *ring); | 1227 | struct amdgpu_ring *ring); |
| 1228 | /* check if the asic needs a full reset of if soft reset will work */ | ||
| 1229 | bool (*need_full_reset)(struct amdgpu_device *adev); | ||
| 1207 | }; | 1230 | }; |
| 1208 | 1231 | ||
| 1209 | /* | 1232 | /* |
| @@ -1368,7 +1391,19 @@ struct amdgpu_nbio_funcs { | |||
| 1368 | void (*detect_hw_virt)(struct amdgpu_device *adev); | 1391 | void (*detect_hw_virt)(struct amdgpu_device *adev); |
| 1369 | }; | 1392 | }; |
| 1370 | 1393 | ||
| 1371 | 1394 | struct amdgpu_df_funcs { | |
| 1395 | void (*init)(struct amdgpu_device *adev); | ||
| 1396 | void (*enable_broadcast_mode)(struct amdgpu_device *adev, | ||
| 1397 | bool enable); | ||
| 1398 | u32 (*get_fb_channel_number)(struct amdgpu_device *adev); | ||
| 1399 | u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); | ||
| 1400 | void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, | ||
| 1401 | bool enable); | ||
| 1402 | void (*get_clockgating_state)(struct amdgpu_device *adev, | ||
| 1403 | u32 *flags); | ||
| 1404 | void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, | ||
| 1405 | bool enable); | ||
| 1406 | }; | ||
| 1372 | /* Define the HW IP blocks will be used in driver , add more if necessary */ | 1407 | /* Define the HW IP blocks will be used in driver , add more if necessary */ |
| 1373 | enum amd_hw_ip_block_type { | 1408 | enum amd_hw_ip_block_type { |
| 1374 | GC_HWIP = 1, | 1409 | GC_HWIP = 1, |
| @@ -1398,6 +1433,7 @@ enum amd_hw_ip_block_type { | |||
| 1398 | struct amd_powerplay { | 1433 | struct amd_powerplay { |
| 1399 | void *pp_handle; | 1434 | void *pp_handle; |
| 1400 | const struct amd_pm_funcs *pp_funcs; | 1435 | const struct amd_pm_funcs *pp_funcs; |
| 1436 | uint32_t pp_feature; | ||
| 1401 | }; | 1437 | }; |
| 1402 | 1438 | ||
| 1403 | #define AMDGPU_RESET_MAGIC_NUM 64 | 1439 | #define AMDGPU_RESET_MAGIC_NUM 64 |
| @@ -1590,6 +1626,7 @@ struct amdgpu_device { | |||
| 1590 | uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; | 1626 | uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; |
| 1591 | 1627 | ||
| 1592 | const struct amdgpu_nbio_funcs *nbio_funcs; | 1628 | const struct amdgpu_nbio_funcs *nbio_funcs; |
| 1629 | const struct amdgpu_df_funcs *df_funcs; | ||
| 1593 | 1630 | ||
| 1594 | /* delayed work_func for deferring clockgating during resume */ | 1631 | /* delayed work_func for deferring clockgating during resume */ |
| 1595 | struct delayed_work late_init_work; | 1632 | struct delayed_work late_init_work; |
| @@ -1764,6 +1801,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
| 1764 | #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) | 1801 | #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) |
| 1765 | #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) | 1802 | #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) |
| 1766 | #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) | 1803 | #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) |
| 1804 | #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) | ||
| 1767 | #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) | 1805 | #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) |
| 1768 | #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) | 1806 | #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) |
| 1769 | #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) | 1807 | #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) |
| @@ -1790,6 +1828,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
| 1790 | #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) | 1828 | #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) |
| 1791 | #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) | 1829 | #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) |
| 1792 | #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) | 1830 | #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) |
| 1831 | #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) | ||
| 1793 | #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) | 1832 | #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) |
| 1794 | #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) | 1833 | #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) |
| 1795 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) | 1834 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 12558044acd4..428e5eb3444f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | |||
| @@ -290,12 +290,11 @@ static int acp_hw_init(void *handle) | |||
| 290 | else if (r) | 290 | else if (r) |
| 291 | return r; | 291 | return r; |
| 292 | 292 | ||
| 293 | r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO, | 293 | if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) |
| 294 | 0x5289, 0, &acp_base); | 294 | return -EINVAL; |
| 295 | if (r == -ENODEV) | 295 | |
| 296 | return 0; | 296 | acp_base = adev->rmmio_base; |
| 297 | else if (r) | 297 | |
| 298 | return r; | ||
| 299 | if (adev->asic_type != CHIP_STONEY) { | 298 | if (adev->asic_type != CHIP_STONEY) { |
| 300 | adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); | 299 | adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); |
| 301 | if (adev->acp.acp_genpd == NULL) | 300 | if (adev->acp.acp_genpd == NULL) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4d36203ffb11..8f6f45567bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |||
| @@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void) | |||
| 50 | kgd2kfd = NULL; | 50 | kgd2kfd = NULL; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | |||
| 53 | #elif defined(CONFIG_HSA_AMD) | 54 | #elif defined(CONFIG_HSA_AMD) |
| 55 | |||
| 54 | ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); | 56 | ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); |
| 55 | if (ret) | 57 | if (ret) |
| 56 | kgd2kfd = NULL; | 58 | kgd2kfd = NULL; |
| 57 | 59 | ||
| 58 | #else | 60 | #else |
| 61 | kgd2kfd = NULL; | ||
| 59 | ret = -ENOENT; | 62 | ret = -ENOENT; |
| 60 | #endif | 63 | #endif |
| 64 | |||
| 65 | #if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD) | ||
| 61 | amdgpu_amdkfd_gpuvm_init_mem_limits(); | 66 | amdgpu_amdkfd_gpuvm_init_mem_limits(); |
| 67 | #endif | ||
| 62 | 68 | ||
| 63 | return ret; | 69 | return ret; |
| 64 | } | 70 | } |
| @@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) | |||
| 92 | case CHIP_POLARIS11: | 98 | case CHIP_POLARIS11: |
| 93 | kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); | 99 | kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); |
| 94 | break; | 100 | break; |
| 101 | case CHIP_VEGA10: | ||
| 102 | case CHIP_RAVEN: | ||
| 103 | kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); | ||
| 104 | break; | ||
| 95 | default: | 105 | default: |
| 96 | dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); | 106 | dev_info(adev->dev, "kfd not supported on this ASIC\n"); |
| 97 | return; | 107 | return; |
| 98 | } | 108 | } |
| 99 | 109 | ||
| @@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | |||
| 175 | &gpu_resources.doorbell_physical_address, | 185 | &gpu_resources.doorbell_physical_address, |
| 176 | &gpu_resources.doorbell_aperture_size, | 186 | &gpu_resources.doorbell_aperture_size, |
| 177 | &gpu_resources.doorbell_start_offset); | 187 | &gpu_resources.doorbell_start_offset); |
| 188 | if (adev->asic_type >= CHIP_VEGA10) { | ||
| 189 | /* On SOC15 the BIF is involved in routing | ||
| 190 | * doorbells using the low 12 bits of the | ||
| 191 | * address. Communicate the assignments to | ||
| 192 | * KFD. KFD uses two doorbell pages per | ||
| 193 | * process in case of 64-bit doorbells so we | ||
| 194 | * can use each doorbell assignment twice. | ||
| 195 | */ | ||
| 196 | gpu_resources.sdma_doorbell[0][0] = | ||
| 197 | AMDGPU_DOORBELL64_sDMA_ENGINE0; | ||
| 198 | gpu_resources.sdma_doorbell[0][1] = | ||
| 199 | AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; | ||
| 200 | gpu_resources.sdma_doorbell[1][0] = | ||
| 201 | AMDGPU_DOORBELL64_sDMA_ENGINE1; | ||
| 202 | gpu_resources.sdma_doorbell[1][1] = | ||
| 203 | AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; | ||
| 204 | /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for | ||
| 205 | * SDMA, IH and VCN. So don't use them for the CP. | ||
| 206 | */ | ||
| 207 | gpu_resources.reserved_doorbell_mask = 0x1f0; | ||
| 208 | gpu_resources.reserved_doorbell_val = 0x0f0; | ||
| 209 | } | ||
| 178 | 210 | ||
| 179 | kgd2kfd->device_init(adev->kfd, &gpu_resources); | 211 | kgd2kfd->device_init(adev->kfd, &gpu_resources); |
| 180 | } | 212 | } |
| @@ -217,13 +249,19 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | |||
| 217 | { | 249 | { |
| 218 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | 250 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
| 219 | struct amdgpu_bo *bo = NULL; | 251 | struct amdgpu_bo *bo = NULL; |
| 252 | struct amdgpu_bo_param bp; | ||
| 220 | int r; | 253 | int r; |
| 221 | uint64_t gpu_addr_tmp = 0; | 254 | uint64_t gpu_addr_tmp = 0; |
| 222 | void *cpu_ptr_tmp = NULL; | 255 | void *cpu_ptr_tmp = NULL; |
| 223 | 256 | ||
| 224 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, | 257 | memset(&bp, 0, sizeof(bp)); |
| 225 | AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel, | 258 | bp.size = size; |
| 226 | NULL, &bo); | 259 | bp.byte_align = PAGE_SIZE; |
| 260 | bp.domain = AMDGPU_GEM_DOMAIN_GTT; | ||
| 261 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; | ||
| 262 | bp.type = ttm_bo_type_kernel; | ||
| 263 | bp.resv = NULL; | ||
| 264 | r = amdgpu_bo_create(adev, &bp, &bo); | ||
| 227 | if (r) { | 265 | if (r) { |
| 228 | dev_err(adev->dev, | 266 | dev_err(adev->dev, |
| 229 | "failed to allocate BO for amdkfd (%d)\n", r); | 267 | "failed to allocate BO for amdkfd (%d)\n", r); |
| @@ -432,3 +470,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) | |||
| 432 | 470 | ||
| 433 | return false; | 471 | return false; |
| 434 | } | 472 | } |
| 473 | |||
| 474 | #if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) | ||
| 475 | bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) | ||
| 476 | { | ||
| 477 | return false; | ||
| 478 | } | ||
| 479 | |||
| 480 | void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) | ||
| 481 | { | ||
| 482 | } | ||
| 483 | |||
| 484 | void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, | ||
| 485 | struct amdgpu_vm *vm) | ||
| 486 | { | ||
| 487 | } | ||
| 488 | |||
| 489 | struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) | ||
| 490 | { | ||
| 491 | return NULL; | ||
| 492 | } | ||
| 493 | |||
| 494 | int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) | ||
| 495 | { | ||
| 496 | return 0; | ||
| 497 | } | ||
| 498 | |||
| 499 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) | ||
| 500 | { | ||
| 501 | return NULL; | ||
| 502 | } | ||
| 503 | |||
| 504 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) | ||
| 505 | { | ||
| 506 | return NULL; | ||
| 507 | } | ||
| 508 | |||
| 509 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) | ||
| 510 | { | ||
| 511 | return NULL; | ||
| 512 | } | ||
| 513 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c2c2bea731e0..a8418a3f4e9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
| 29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
| 30 | #include <linux/mmu_context.h> | 30 | #include <linux/mmu_context.h> |
| 31 | #include <linux/workqueue.h> | ||
| 31 | #include <kgd_kfd_interface.h> | 32 | #include <kgd_kfd_interface.h> |
| 32 | #include <drm/ttm/ttm_execbuf_util.h> | 33 | #include <drm/ttm/ttm_execbuf_util.h> |
| 33 | #include "amdgpu_sync.h" | 34 | #include "amdgpu_sync.h" |
| @@ -59,7 +60,9 @@ struct kgd_mem { | |||
| 59 | 60 | ||
| 60 | uint32_t mapping_flags; | 61 | uint32_t mapping_flags; |
| 61 | 62 | ||
| 63 | atomic_t invalid; | ||
| 62 | struct amdkfd_process_info *process_info; | 64 | struct amdkfd_process_info *process_info; |
| 65 | struct page **user_pages; | ||
| 63 | 66 | ||
| 64 | struct amdgpu_sync sync; | 67 | struct amdgpu_sync sync; |
| 65 | 68 | ||
| @@ -84,6 +87,9 @@ struct amdkfd_process_info { | |||
| 84 | struct list_head vm_list_head; | 87 | struct list_head vm_list_head; |
| 85 | /* List head for all KFD BOs that belong to a KFD process. */ | 88 | /* List head for all KFD BOs that belong to a KFD process. */ |
| 86 | struct list_head kfd_bo_list; | 89 | struct list_head kfd_bo_list; |
| 90 | /* List of userptr BOs that are valid or invalid */ | ||
| 91 | struct list_head userptr_valid_list; | ||
| 92 | struct list_head userptr_inval_list; | ||
| 87 | /* Lock to protect kfd_bo_list */ | 93 | /* Lock to protect kfd_bo_list */ |
| 88 | struct mutex lock; | 94 | struct mutex lock; |
| 89 | 95 | ||
| @@ -91,6 +97,11 @@ struct amdkfd_process_info { | |||
| 91 | unsigned int n_vms; | 97 | unsigned int n_vms; |
| 92 | /* Eviction Fence */ | 98 | /* Eviction Fence */ |
| 93 | struct amdgpu_amdkfd_fence *eviction_fence; | 99 | struct amdgpu_amdkfd_fence *eviction_fence; |
| 100 | |||
| 101 | /* MMU-notifier related fields */ | ||
| 102 | atomic_t evicted_bos; | ||
| 103 | struct delayed_work restore_userptr_work; | ||
| 104 | struct pid *pid; | ||
| 94 | }; | 105 | }; |
| 95 | 106 | ||
| 96 | int amdgpu_amdkfd_init(void); | 107 | int amdgpu_amdkfd_init(void); |
| @@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); | |||
| 104 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); | 115 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); |
| 105 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); | 116 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); |
| 106 | 117 | ||
| 118 | int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); | ||
| 107 | int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, | 119 | int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, |
| 108 | uint32_t vmid, uint64_t gpu_addr, | 120 | uint32_t vmid, uint64_t gpu_addr, |
| 109 | uint32_t *ib_cmd, uint32_t ib_len); | 121 | uint32_t *ib_cmd, uint32_t ib_len); |
| 110 | 122 | ||
| 111 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); | 123 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); |
| 112 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); | 124 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); |
| 125 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); | ||
| 113 | 126 | ||
| 114 | bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); | 127 | bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); |
| 115 | 128 | ||
| @@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); | |||
| 143 | 156 | ||
| 144 | /* GPUVM API */ | 157 | /* GPUVM API */ |
| 145 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, | 158 | int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, |
| 146 | void **process_info, | 159 | void **process_info, |
| 147 | struct dma_fence **ef); | 160 | struct dma_fence **ef); |
| 148 | int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, | 161 | int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, |
| 149 | struct file *filp, | 162 | struct file *filp, |
| 150 | void **vm, void **process_info, | 163 | void **vm, void **process_info, |
| 151 | struct dma_fence **ef); | 164 | struct dma_fence **ef); |
| 152 | void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, | 165 | void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, |
| 153 | struct amdgpu_vm *vm); | 166 | struct amdgpu_vm *vm); |
| 154 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); | 167 | void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); |
| 155 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); | 168 | uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); |
| 156 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | 169 | int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea54e53172b9..0ff36d45a597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |||
| @@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |||
| 98 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | 98 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, |
| 99 | unsigned int vmid); | 99 | unsigned int vmid); |
| 100 | 100 | ||
| 101 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | ||
| 102 | uint32_t hpd_size, uint64_t hpd_gpu_addr); | ||
| 103 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | 101 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); |
| 104 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | 102 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
| 105 | uint32_t queue_id, uint32_t __user *wptr, | 103 | uint32_t queue_id, uint32_t __user *wptr, |
| @@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
| 183 | .free_pasid = amdgpu_pasid_free, | 181 | .free_pasid = amdgpu_pasid_free, |
| 184 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | 182 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
| 185 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | 183 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
| 186 | .init_pipeline = kgd_init_pipeline, | ||
| 187 | .init_interrupts = kgd_init_interrupts, | 184 | .init_interrupts = kgd_init_interrupts, |
| 188 | .hqd_load = kgd_hqd_load, | 185 | .hqd_load = kgd_hqd_load, |
| 189 | .hqd_sdma_load = kgd_hqd_sdma_load, | 186 | .hqd_sdma_load = kgd_hqd_sdma_load, |
| @@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |||
| 309 | return 0; | 306 | return 0; |
| 310 | } | 307 | } |
| 311 | 308 | ||
| 312 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | ||
| 313 | uint32_t hpd_size, uint64_t hpd_gpu_addr) | ||
| 314 | { | ||
| 315 | /* amdgpu owns the per-pipe state */ | ||
| 316 | return 0; | ||
| 317 | } | ||
| 318 | |||
| 319 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | 309 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) |
| 320 | { | 310 | { |
| 321 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 311 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 89264c9a5e9f..6ef9762b4b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | |||
| @@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | |||
| 57 | uint32_t sh_mem_bases); | 57 | uint32_t sh_mem_bases); |
| 58 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | 58 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, |
| 59 | unsigned int vmid); | 59 | unsigned int vmid); |
| 60 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | ||
| 61 | uint32_t hpd_size, uint64_t hpd_gpu_addr); | ||
| 62 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | 60 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); |
| 63 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | 61 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, |
| 64 | uint32_t queue_id, uint32_t __user *wptr, | 62 | uint32_t queue_id, uint32_t __user *wptr, |
| @@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
| 141 | .free_pasid = amdgpu_pasid_free, | 139 | .free_pasid = amdgpu_pasid_free, |
| 142 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | 140 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
| 143 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | 141 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
| 144 | .init_pipeline = kgd_init_pipeline, | ||
| 145 | .init_interrupts = kgd_init_interrupts, | 142 | .init_interrupts = kgd_init_interrupts, |
| 146 | .hqd_load = kgd_hqd_load, | 143 | .hqd_load = kgd_hqd_load, |
| 147 | .hqd_sdma_load = kgd_hqd_sdma_load, | 144 | .hqd_sdma_load = kgd_hqd_sdma_load, |
| @@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | |||
| 270 | return 0; | 267 | return 0; |
| 271 | } | 268 | } |
| 272 | 269 | ||
| 273 | static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, | ||
| 274 | uint32_t hpd_size, uint64_t hpd_gpu_addr) | ||
| 275 | { | ||
| 276 | /* amdgpu owns the per-pipe state */ | ||
| 277 | return 0; | ||
| 278 | } | ||
| 279 | |||
| 280 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | 270 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) |
| 281 | { | 271 | { |
| 282 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 272 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c new file mode 100644 index 000000000000..f0c0d3953f69 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | |||
| @@ -0,0 +1,1043 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2014-2018 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #define pr_fmt(fmt) "kfd2kgd: " fmt | ||
| 24 | |||
| 25 | #include <linux/module.h> | ||
| 26 | #include <linux/fdtable.h> | ||
| 27 | #include <linux/uaccess.h> | ||
| 28 | #include <linux/firmware.h> | ||
| 29 | #include <drm/drmP.h> | ||
| 30 | #include "amdgpu.h" | ||
| 31 | #include "amdgpu_amdkfd.h" | ||
| 32 | #include "amdgpu_ucode.h" | ||
| 33 | #include "soc15_hw_ip.h" | ||
| 34 | #include "gc/gc_9_0_offset.h" | ||
| 35 | #include "gc/gc_9_0_sh_mask.h" | ||
| 36 | #include "vega10_enum.h" | ||
| 37 | #include "sdma0/sdma0_4_0_offset.h" | ||
| 38 | #include "sdma0/sdma0_4_0_sh_mask.h" | ||
| 39 | #include "sdma1/sdma1_4_0_offset.h" | ||
| 40 | #include "sdma1/sdma1_4_0_sh_mask.h" | ||
| 41 | #include "athub/athub_1_0_offset.h" | ||
| 42 | #include "athub/athub_1_0_sh_mask.h" | ||
| 43 | #include "oss/osssys_4_0_offset.h" | ||
| 44 | #include "oss/osssys_4_0_sh_mask.h" | ||
| 45 | #include "soc15_common.h" | ||
| 46 | #include "v9_structs.h" | ||
| 47 | #include "soc15.h" | ||
| 48 | #include "soc15d.h" | ||
| 49 | |||
| 50 | /* HACK: MMHUB and GC both have VM-related register with the same | ||
| 51 | * names but different offsets. Define the MMHUB register we need here | ||
| 52 | * with a prefix. A proper solution would be to move the functions | ||
| 53 | * programming these registers into gfx_v9_0.c and mmhub_v1_0.c | ||
| 54 | * respectively. | ||
| 55 | */ | ||
| 56 | #define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 | ||
| 57 | #define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 | ||
| 58 | |||
| 59 | #define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 | ||
| 60 | #define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 | ||
| 61 | |||
| 62 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b | ||
| 63 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 | ||
| 64 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c | ||
| 65 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 | ||
| 66 | |||
| 67 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b | ||
| 68 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 | ||
| 69 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c | ||
| 70 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 | ||
| 71 | |||
| 72 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b | ||
| 73 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 | ||
| 74 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c | ||
| 75 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 | ||
| 76 | |||
| 77 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 | ||
| 78 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 | ||
| 79 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 | ||
| 80 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 | ||
| 81 | |||
| 82 | #define V9_PIPE_PER_MEC (4) | ||
| 83 | #define V9_QUEUES_PER_PIPE_MEC (8) | ||
| 84 | |||
| 85 | enum hqd_dequeue_request_type { | ||
| 86 | NO_ACTION = 0, | ||
| 87 | DRAIN_PIPE, | ||
| 88 | RESET_WAVES | ||
| 89 | }; | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Register access functions | ||
| 93 | */ | ||
| 94 | |||
| 95 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | ||
| 96 | uint32_t sh_mem_config, | ||
| 97 | uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, | ||
| 98 | uint32_t sh_mem_bases); | ||
| 99 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | ||
| 100 | unsigned int vmid); | ||
| 101 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); | ||
| 102 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | ||
| 103 | uint32_t queue_id, uint32_t __user *wptr, | ||
| 104 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
| 105 | struct mm_struct *mm); | ||
| 106 | static int kgd_hqd_dump(struct kgd_dev *kgd, | ||
| 107 | uint32_t pipe_id, uint32_t queue_id, | ||
| 108 | uint32_t (**dump)[2], uint32_t *n_regs); | ||
| 109 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, | ||
| 110 | uint32_t __user *wptr, struct mm_struct *mm); | ||
| 111 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, | ||
| 112 | uint32_t engine_id, uint32_t queue_id, | ||
| 113 | uint32_t (**dump)[2], uint32_t *n_regs); | ||
| 114 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | ||
| 115 | uint32_t pipe_id, uint32_t queue_id); | ||
| 116 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); | ||
| 117 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, | ||
| 118 | enum kfd_preempt_type reset_type, | ||
| 119 | unsigned int utimeout, uint32_t pipe_id, | ||
| 120 | uint32_t queue_id); | ||
| 121 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | ||
| 122 | unsigned int utimeout); | ||
| 123 | static int kgd_address_watch_disable(struct kgd_dev *kgd); | ||
| 124 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | ||
| 125 | unsigned int watch_point_id, | ||
| 126 | uint32_t cntl_val, | ||
| 127 | uint32_t addr_hi, | ||
| 128 | uint32_t addr_lo); | ||
| 129 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | ||
| 130 | uint32_t gfx_index_val, | ||
| 131 | uint32_t sq_cmd); | ||
| 132 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | ||
| 133 | unsigned int watch_point_id, | ||
| 134 | unsigned int reg_offset); | ||
| 135 | |||
| 136 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | ||
| 137 | uint8_t vmid); | ||
| 138 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | ||
| 139 | uint8_t vmid); | ||
| 140 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | ||
| 141 | uint32_t page_table_base); | ||
| 142 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); | ||
| 143 | static void set_scratch_backing_va(struct kgd_dev *kgd, | ||
| 144 | uint64_t va, uint32_t vmid); | ||
| 145 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); | ||
| 146 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); | ||
| 147 | |||
| 148 | /* Because of REG_GET_FIELD() being used, we put this function in the | ||
| 149 | * asic specific file. | ||
| 150 | */ | ||
| 151 | static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, | ||
| 152 | struct tile_config *config) | ||
| 153 | { | ||
| 154 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | ||
| 155 | |||
| 156 | config->gb_addr_config = adev->gfx.config.gb_addr_config; | ||
| 157 | |||
| 158 | config->tile_config_ptr = adev->gfx.config.tile_mode_array; | ||
| 159 | config->num_tile_configs = | ||
| 160 | ARRAY_SIZE(adev->gfx.config.tile_mode_array); | ||
| 161 | config->macro_tile_config_ptr = | ||
| 162 | adev->gfx.config.macrotile_mode_array; | ||
| 163 | config->num_macro_tile_configs = | ||
| 164 | ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); | ||
| 165 | |||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | static const struct kfd2kgd_calls kfd2kgd = { | ||
| 170 | .init_gtt_mem_allocation = alloc_gtt_mem, | ||
| 171 | .free_gtt_mem = free_gtt_mem, | ||
| 172 | .get_local_mem_info = get_local_mem_info, | ||
| 173 | .get_gpu_clock_counter = get_gpu_clock_counter, | ||
| 174 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | ||
| 175 | .alloc_pasid = amdgpu_pasid_alloc, | ||
| 176 | .free_pasid = amdgpu_pasid_free, | ||
| 177 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | ||
| 178 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | ||
| 179 | .init_interrupts = kgd_init_interrupts, | ||
| 180 | .hqd_load = kgd_hqd_load, | ||
| 181 | .hqd_sdma_load = kgd_hqd_sdma_load, | ||
| 182 | .hqd_dump = kgd_hqd_dump, | ||
| 183 | .hqd_sdma_dump = kgd_hqd_sdma_dump, | ||
| 184 | .hqd_is_occupied = kgd_hqd_is_occupied, | ||
| 185 | .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, | ||
| 186 | .hqd_destroy = kgd_hqd_destroy, | ||
| 187 | .hqd_sdma_destroy = kgd_hqd_sdma_destroy, | ||
| 188 | .address_watch_disable = kgd_address_watch_disable, | ||
| 189 | .address_watch_execute = kgd_address_watch_execute, | ||
| 190 | .wave_control_execute = kgd_wave_control_execute, | ||
| 191 | .address_watch_get_offset = kgd_address_watch_get_offset, | ||
| 192 | .get_atc_vmid_pasid_mapping_pasid = | ||
| 193 | get_atc_vmid_pasid_mapping_pasid, | ||
| 194 | .get_atc_vmid_pasid_mapping_valid = | ||
| 195 | get_atc_vmid_pasid_mapping_valid, | ||
| 196 | .get_fw_version = get_fw_version, | ||
| 197 | .set_scratch_backing_va = set_scratch_backing_va, | ||
| 198 | .get_tile_config = amdgpu_amdkfd_get_tile_config, | ||
| 199 | .get_cu_info = get_cu_info, | ||
| 200 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, | ||
| 201 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | ||
| 202 | .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, | ||
| 203 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | ||
| 204 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | ||
| 205 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | ||
| 206 | .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, | ||
| 207 | .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, | ||
| 208 | .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, | ||
| 209 | .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, | ||
| 210 | .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, | ||
| 211 | .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, | ||
| 212 | .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, | ||
| 213 | .invalidate_tlbs = invalidate_tlbs, | ||
| 214 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | ||
| 215 | .submit_ib = amdgpu_amdkfd_submit_ib, | ||
| 216 | }; | ||
| 217 | |||
| 218 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) | ||
| 219 | { | ||
| 220 | return (struct kfd2kgd_calls *)&kfd2kgd; | ||
| 221 | } | ||
| 222 | |||
| 223 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | ||
| 224 | { | ||
| 225 | return (struct amdgpu_device *)kgd; | ||
| 226 | } | ||
| 227 | |||
| 228 | static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, | ||
| 229 | uint32_t queue, uint32_t vmid) | ||
| 230 | { | ||
| 231 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 232 | |||
| 233 | mutex_lock(&adev->srbm_mutex); | ||
| 234 | soc15_grbm_select(adev, mec, pipe, queue, vmid); | ||
| 235 | } | ||
| 236 | |||
| 237 | static void unlock_srbm(struct kgd_dev *kgd) | ||
| 238 | { | ||
| 239 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 240 | |||
| 241 | soc15_grbm_select(adev, 0, 0, 0, 0); | ||
| 242 | mutex_unlock(&adev->srbm_mutex); | ||
| 243 | } | ||
| 244 | |||
| 245 | static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, | ||
| 246 | uint32_t queue_id) | ||
| 247 | { | ||
| 248 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 249 | |||
| 250 | uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | ||
| 251 | uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | ||
| 252 | |||
| 253 | lock_srbm(kgd, mec, pipe, queue_id, 0); | ||
| 254 | } | ||
| 255 | |||
| 256 | static uint32_t get_queue_mask(struct amdgpu_device *adev, | ||
| 257 | uint32_t pipe_id, uint32_t queue_id) | ||
| 258 | { | ||
| 259 | unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + | ||
| 260 | queue_id) & 31; | ||
| 261 | |||
| 262 | return ((uint32_t)1) << bit; | ||
| 263 | } | ||
| 264 | |||
| 265 | static void release_queue(struct kgd_dev *kgd) | ||
| 266 | { | ||
| 267 | unlock_srbm(kgd); | ||
| 268 | } | ||
| 269 | |||
| 270 | static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, | ||
| 271 | uint32_t sh_mem_config, | ||
| 272 | uint32_t sh_mem_ape1_base, | ||
| 273 | uint32_t sh_mem_ape1_limit, | ||
| 274 | uint32_t sh_mem_bases) | ||
| 275 | { | ||
| 276 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 277 | |||
| 278 | lock_srbm(kgd, 0, 0, 0, vmid); | ||
| 279 | |||
| 280 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); | ||
| 281 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); | ||
| 282 | /* APE1 no longer exists on GFX9 */ | ||
| 283 | |||
| 284 | unlock_srbm(kgd); | ||
| 285 | } | ||
| 286 | |||
| 287 | static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, | ||
| 288 | unsigned int vmid) | ||
| 289 | { | ||
| 290 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 291 | |||
| 292 | /* | ||
| 293 | * We have to assume that there is no outstanding mapping. | ||
| 294 | * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because | ||
| 295 | * a mapping is in progress or because a mapping finished | ||
| 296 | * and the SW cleared it. | ||
| 297 | * So the protocol is to always wait & clear. | ||
| 298 | */ | ||
| 299 | uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | | ||
| 300 | ATC_VMID0_PASID_MAPPING__VALID_MASK; | ||
| 301 | |||
| 302 | /* | ||
| 303 | * need to do this twice, once for gfx and once for mmhub | ||
| 304 | * for ATC add 16 to VMID for mmhub, for IH different registers. | ||
| 305 | * ATC_VMID0..15 registers are separate from ATC_VMID16..31. | ||
| 306 | */ | ||
| 307 | |||
| 308 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, | ||
| 309 | pasid_mapping); | ||
| 310 | |||
| 311 | while (!(RREG32(SOC15_REG_OFFSET( | ||
| 312 | ATHUB, 0, | ||
| 313 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | ||
| 314 | (1U << vmid))) | ||
| 315 | cpu_relax(); | ||
| 316 | |||
| 317 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | ||
| 318 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | ||
| 319 | 1U << vmid); | ||
| 320 | |||
| 321 | /* Mapping vmid to pasid also for IH block */ | ||
| 322 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, | ||
| 323 | pasid_mapping); | ||
| 324 | |||
| 325 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, | ||
| 326 | pasid_mapping); | ||
| 327 | |||
| 328 | while (!(RREG32(SOC15_REG_OFFSET( | ||
| 329 | ATHUB, 0, | ||
| 330 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & | ||
| 331 | (1U << (vmid + 16)))) | ||
| 332 | cpu_relax(); | ||
| 333 | |||
| 334 | WREG32(SOC15_REG_OFFSET(ATHUB, 0, | ||
| 335 | mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), | ||
| 336 | 1U << (vmid + 16)); | ||
| 337 | |||
| 338 | /* Mapping vmid to pasid also for IH block */ | ||
| 339 | WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, | ||
| 340 | pasid_mapping); | ||
| 341 | return 0; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* TODO - RING0 form of field is obsolete, seems to date back to SI | ||
| 345 | * but still works | ||
| 346 | */ | ||
| 347 | |||
| 348 | static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) | ||
| 349 | { | ||
| 350 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 351 | uint32_t mec; | ||
| 352 | uint32_t pipe; | ||
| 353 | |||
| 354 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | ||
| 355 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | ||
| 356 | |||
| 357 | lock_srbm(kgd, mec, pipe, 0, 0); | ||
| 358 | |||
| 359 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), | ||
| 360 | CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | | ||
| 361 | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); | ||
| 362 | |||
| 363 | unlock_srbm(kgd); | ||
| 364 | |||
| 365 | return 0; | ||
| 366 | } | ||
| 367 | |||
| 368 | static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, | ||
| 369 | unsigned int engine_id, | ||
| 370 | unsigned int queue_id) | ||
| 371 | { | ||
| 372 | uint32_t base[2] = { | ||
| 373 | SOC15_REG_OFFSET(SDMA0, 0, | ||
| 374 | mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, | ||
| 375 | SOC15_REG_OFFSET(SDMA1, 0, | ||
| 376 | mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL | ||
| 377 | }; | ||
| 378 | uint32_t retval; | ||
| 379 | |||
| 380 | retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - | ||
| 381 | mmSDMA0_RLC0_RB_CNTL); | ||
| 382 | |||
| 383 | pr_debug("sdma base address: 0x%x\n", retval); | ||
| 384 | |||
| 385 | return retval; | ||
| 386 | } | ||
| 387 | |||
| 388 | static inline struct v9_mqd *get_mqd(void *mqd) | ||
| 389 | { | ||
| 390 | return (struct v9_mqd *)mqd; | ||
| 391 | } | ||
| 392 | |||
| 393 | static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) | ||
| 394 | { | ||
| 395 | return (struct v9_sdma_mqd *)mqd; | ||
| 396 | } | ||
| 397 | |||
| 398 | static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, | ||
| 399 | uint32_t queue_id, uint32_t __user *wptr, | ||
| 400 | uint32_t wptr_shift, uint32_t wptr_mask, | ||
| 401 | struct mm_struct *mm) | ||
| 402 | { | ||
| 403 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 404 | struct v9_mqd *m; | ||
| 405 | uint32_t *mqd_hqd; | ||
| 406 | uint32_t reg, hqd_base, data; | ||
| 407 | |||
| 408 | m = get_mqd(mqd); | ||
| 409 | |||
| 410 | acquire_queue(kgd, pipe_id, queue_id); | ||
| 411 | |||
| 412 | /* HIQ is set during driver init period with vmid set to 0*/ | ||
| 413 | if (m->cp_hqd_vmid == 0) { | ||
| 414 | uint32_t value, mec, pipe; | ||
| 415 | |||
| 416 | mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; | ||
| 417 | pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); | ||
| 418 | |||
| 419 | pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", | ||
| 420 | mec, pipe, queue_id); | ||
| 421 | value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); | ||
| 422 | value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, | ||
| 423 | ((mec << 5) | (pipe << 3) | queue_id | 0x80)); | ||
| 424 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); | ||
| 425 | } | ||
| 426 | |||
| 427 | /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ | ||
| 428 | mqd_hqd = &m->cp_mqd_base_addr_lo; | ||
| 429 | hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | ||
| 430 | |||
| 431 | for (reg = hqd_base; | ||
| 432 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | ||
| 433 | WREG32(reg, mqd_hqd[reg - hqd_base]); | ||
| 434 | |||
| 435 | |||
| 436 | /* Activate doorbell logic before triggering WPTR poll. */ | ||
| 437 | data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, | ||
| 438 | CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); | ||
| 439 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); | ||
| 440 | |||
| 441 | if (wptr) { | ||
| 442 | /* Don't read wptr with get_user because the user | ||
| 443 | * context may not be accessible (if this function | ||
| 444 | * runs in a work queue). Instead trigger a one-shot | ||
| 445 | * polling read from memory in the CP. This assumes | ||
| 446 | * that wptr is GPU-accessible in the queue's VMID via | ||
| 447 | * ATC or SVM. WPTR==RPTR before starting the poll so | ||
| 448 | * the CP starts fetching new commands from the right | ||
| 449 | * place. | ||
| 450 | * | ||
| 451 | * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit | ||
| 452 | * tricky. Assume that the queue didn't overflow. The | ||
| 453 | * number of valid bits in the 32-bit RPTR depends on | ||
| 454 | * the queue size. The remaining bits are taken from | ||
| 455 | * the saved 64-bit WPTR. If the WPTR wrapped, add the | ||
| 456 | * queue size. | ||
| 457 | */ | ||
| 458 | uint32_t queue_size = | ||
| 459 | 2 << REG_GET_FIELD(m->cp_hqd_pq_control, | ||
| 460 | CP_HQD_PQ_CONTROL, QUEUE_SIZE); | ||
| 461 | uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); | ||
| 462 | |||
| 463 | if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) | ||
| 464 | guessed_wptr += queue_size; | ||
| 465 | guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); | ||
| 466 | guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; | ||
| 467 | |||
| 468 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), | ||
| 469 | lower_32_bits(guessed_wptr)); | ||
| 470 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), | ||
| 471 | upper_32_bits(guessed_wptr)); | ||
| 472 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), | ||
| 473 | lower_32_bits((uintptr_t)wptr)); | ||
| 474 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), | ||
| 475 | upper_32_bits((uintptr_t)wptr)); | ||
| 476 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), | ||
| 477 | get_queue_mask(adev, pipe_id, queue_id)); | ||
| 478 | } | ||
| 479 | |||
| 480 | /* Start the EOP fetcher */ | ||
| 481 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), | ||
| 482 | REG_SET_FIELD(m->cp_hqd_eop_rptr, | ||
| 483 | CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); | ||
| 484 | |||
| 485 | data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); | ||
| 486 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); | ||
| 487 | |||
| 488 | release_queue(kgd); | ||
| 489 | |||
| 490 | return 0; | ||
| 491 | } | ||
| 492 | |||
| 493 | static int kgd_hqd_dump(struct kgd_dev *kgd, | ||
| 494 | uint32_t pipe_id, uint32_t queue_id, | ||
| 495 | uint32_t (**dump)[2], uint32_t *n_regs) | ||
| 496 | { | ||
| 497 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 498 | uint32_t i = 0, reg; | ||
| 499 | #define HQD_N_REGS 56 | ||
| 500 | #define DUMP_REG(addr) do { \ | ||
| 501 | if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ | ||
| 502 | break; \ | ||
| 503 | (*dump)[i][0] = (addr) << 2; \ | ||
| 504 | (*dump)[i++][1] = RREG32(addr); \ | ||
| 505 | } while (0) | ||
| 506 | |||
| 507 | *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); | ||
| 508 | if (*dump == NULL) | ||
| 509 | return -ENOMEM; | ||
| 510 | |||
| 511 | acquire_queue(kgd, pipe_id, queue_id); | ||
| 512 | |||
| 513 | for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); | ||
| 514 | reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) | ||
| 515 | DUMP_REG(reg); | ||
| 516 | |||
| 517 | release_queue(kgd); | ||
| 518 | |||
| 519 | WARN_ON_ONCE(i != HQD_N_REGS); | ||
| 520 | *n_regs = i; | ||
| 521 | |||
| 522 | return 0; | ||
| 523 | } | ||
| 524 | |||
| 525 | static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, | ||
| 526 | uint32_t __user *wptr, struct mm_struct *mm) | ||
| 527 | { | ||
| 528 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 529 | struct v9_sdma_mqd *m; | ||
| 530 | uint32_t sdma_base_addr, sdmax_gfx_context_cntl; | ||
| 531 | unsigned long end_jiffies; | ||
| 532 | uint32_t data; | ||
| 533 | uint64_t data64; | ||
| 534 | uint64_t __user *wptr64 = (uint64_t __user *)wptr; | ||
| 535 | |||
| 536 | m = get_sdma_mqd(mqd); | ||
| 537 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | ||
| 538 | m->sdma_queue_id); | ||
| 539 | sdmax_gfx_context_cntl = m->sdma_engine_id ? | ||
| 540 | SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : | ||
| 541 | SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); | ||
| 542 | |||
| 543 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, | ||
| 544 | m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); | ||
| 545 | |||
| 546 | end_jiffies = msecs_to_jiffies(2000) + jiffies; | ||
| 547 | while (true) { | ||
| 548 | data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | ||
| 549 | if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) | ||
| 550 | break; | ||
| 551 | if (time_after(jiffies, end_jiffies)) | ||
| 552 | return -ETIME; | ||
| 553 | usleep_range(500, 1000); | ||
| 554 | } | ||
| 555 | data = RREG32(sdmax_gfx_context_cntl); | ||
| 556 | data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, | ||
| 557 | RESUME_CTX, 0); | ||
| 558 | WREG32(sdmax_gfx_context_cntl, data); | ||
| 559 | |||
| 560 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, | ||
| 561 | m->sdmax_rlcx_doorbell_offset); | ||
| 562 | |||
| 563 | data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, | ||
| 564 | ENABLE, 1); | ||
| 565 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); | ||
| 566 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); | ||
| 567 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, | ||
| 568 | m->sdmax_rlcx_rb_rptr_hi); | ||
| 569 | |||
| 570 | WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); | ||
| 571 | if (read_user_wptr(mm, wptr64, data64)) { | ||
| 572 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, | ||
| 573 | lower_32_bits(data64)); | ||
| 574 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, | ||
| 575 | upper_32_bits(data64)); | ||
| 576 | } else { | ||
| 577 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, | ||
| 578 | m->sdmax_rlcx_rb_rptr); | ||
| 579 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, | ||
| 580 | m->sdmax_rlcx_rb_rptr_hi); | ||
| 581 | } | ||
| 582 | WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); | ||
| 583 | |||
| 584 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); | ||
| 585 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, | ||
| 586 | m->sdmax_rlcx_rb_base_hi); | ||
| 587 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, | ||
| 588 | m->sdmax_rlcx_rb_rptr_addr_lo); | ||
| 589 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, | ||
| 590 | m->sdmax_rlcx_rb_rptr_addr_hi); | ||
| 591 | |||
| 592 | data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, | ||
| 593 | RB_ENABLE, 1); | ||
| 594 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); | ||
| 595 | |||
| 596 | return 0; | ||
| 597 | } | ||
| 598 | |||
| 599 | static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, | ||
| 600 | uint32_t engine_id, uint32_t queue_id, | ||
| 601 | uint32_t (**dump)[2], uint32_t *n_regs) | ||
| 602 | { | ||
| 603 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 604 | uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); | ||
| 605 | uint32_t i = 0, reg; | ||
| 606 | #undef HQD_N_REGS | ||
| 607 | #define HQD_N_REGS (19+6+7+10) | ||
| 608 | |||
| 609 | *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); | ||
| 610 | if (*dump == NULL) | ||
| 611 | return -ENOMEM; | ||
| 612 | |||
| 613 | for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) | ||
| 614 | DUMP_REG(sdma_base_addr + reg); | ||
| 615 | for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) | ||
| 616 | DUMP_REG(sdma_base_addr + reg); | ||
| 617 | for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; | ||
| 618 | reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) | ||
| 619 | DUMP_REG(sdma_base_addr + reg); | ||
| 620 | for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; | ||
| 621 | reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) | ||
| 622 | DUMP_REG(sdma_base_addr + reg); | ||
| 623 | |||
| 624 | WARN_ON_ONCE(i != HQD_N_REGS); | ||
| 625 | *n_regs = i; | ||
| 626 | |||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | |||
| 630 | static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, | ||
| 631 | uint32_t pipe_id, uint32_t queue_id) | ||
| 632 | { | ||
| 633 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 634 | uint32_t act; | ||
| 635 | bool retval = false; | ||
| 636 | uint32_t low, high; | ||
| 637 | |||
| 638 | acquire_queue(kgd, pipe_id, queue_id); | ||
| 639 | act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | ||
| 640 | if (act) { | ||
| 641 | low = lower_32_bits(queue_address >> 8); | ||
| 642 | high = upper_32_bits(queue_address >> 8); | ||
| 643 | |||
| 644 | if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && | ||
| 645 | high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) | ||
| 646 | retval = true; | ||
| 647 | } | ||
| 648 | release_queue(kgd); | ||
| 649 | return retval; | ||
| 650 | } | ||
| 651 | |||
| 652 | static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | ||
| 653 | { | ||
| 654 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 655 | struct v9_sdma_mqd *m; | ||
| 656 | uint32_t sdma_base_addr; | ||
| 657 | uint32_t sdma_rlc_rb_cntl; | ||
| 658 | |||
| 659 | m = get_sdma_mqd(mqd); | ||
| 660 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | ||
| 661 | m->sdma_queue_id); | ||
| 662 | |||
| 663 | sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | ||
| 664 | |||
| 665 | if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) | ||
| 666 | return true; | ||
| 667 | |||
| 668 | return false; | ||
| 669 | } | ||
| 670 | |||
| 671 | static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, | ||
| 672 | enum kfd_preempt_type reset_type, | ||
| 673 | unsigned int utimeout, uint32_t pipe_id, | ||
| 674 | uint32_t queue_id) | ||
| 675 | { | ||
| 676 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 677 | enum hqd_dequeue_request_type type; | ||
| 678 | unsigned long end_jiffies; | ||
| 679 | uint32_t temp; | ||
| 680 | struct v9_mqd *m = get_mqd(mqd); | ||
| 681 | |||
| 682 | acquire_queue(kgd, pipe_id, queue_id); | ||
| 683 | |||
| 684 | if (m->cp_hqd_vmid == 0) | ||
| 685 | WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); | ||
| 686 | |||
| 687 | switch (reset_type) { | ||
| 688 | case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: | ||
| 689 | type = DRAIN_PIPE; | ||
| 690 | break; | ||
| 691 | case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: | ||
| 692 | type = RESET_WAVES; | ||
| 693 | break; | ||
| 694 | default: | ||
| 695 | type = DRAIN_PIPE; | ||
| 696 | break; | ||
| 697 | } | ||
| 698 | |||
| 699 | WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); | ||
| 700 | |||
| 701 | end_jiffies = (utimeout * HZ / 1000) + jiffies; | ||
| 702 | while (true) { | ||
| 703 | temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); | ||
| 704 | if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) | ||
| 705 | break; | ||
| 706 | if (time_after(jiffies, end_jiffies)) { | ||
| 707 | pr_err("cp queue preemption time out.\n"); | ||
| 708 | release_queue(kgd); | ||
| 709 | return -ETIME; | ||
| 710 | } | ||
| 711 | usleep_range(500, 1000); | ||
| 712 | } | ||
| 713 | |||
| 714 | release_queue(kgd); | ||
| 715 | return 0; | ||
| 716 | } | ||
| 717 | |||
| 718 | static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | ||
| 719 | unsigned int utimeout) | ||
| 720 | { | ||
| 721 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 722 | struct v9_sdma_mqd *m; | ||
| 723 | uint32_t sdma_base_addr; | ||
| 724 | uint32_t temp; | ||
| 725 | unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; | ||
| 726 | |||
| 727 | m = get_sdma_mqd(mqd); | ||
| 728 | sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, | ||
| 729 | m->sdma_queue_id); | ||
| 730 | |||
| 731 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); | ||
| 732 | temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; | ||
| 733 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); | ||
| 734 | |||
| 735 | while (true) { | ||
| 736 | temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); | ||
| 737 | if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) | ||
| 738 | break; | ||
| 739 | if (time_after(jiffies, end_jiffies)) | ||
| 740 | return -ETIME; | ||
| 741 | usleep_range(500, 1000); | ||
| 742 | } | ||
| 743 | |||
| 744 | WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); | ||
| 745 | WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, | ||
| 746 | RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | | ||
| 747 | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); | ||
| 748 | |||
| 749 | m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); | ||
| 750 | m->sdmax_rlcx_rb_rptr_hi = | ||
| 751 | RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); | ||
| 752 | |||
| 753 | return 0; | ||
| 754 | } | ||
| 755 | |||
| 756 | static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, | ||
| 757 | uint8_t vmid) | ||
| 758 | { | ||
| 759 | uint32_t reg; | ||
| 760 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
| 761 | |||
| 762 | reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) | ||
| 763 | + vmid); | ||
| 764 | return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; | ||
| 765 | } | ||
| 766 | |||
| 767 | static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | ||
| 768 | uint8_t vmid) | ||
| 769 | { | ||
| 770 | uint32_t reg; | ||
| 771 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
| 772 | |||
| 773 | reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) | ||
| 774 | + vmid); | ||
| 775 | return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; | ||
| 776 | } | ||
| 777 | |||
| 778 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | ||
| 779 | { | ||
| 780 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
| 781 | uint32_t req = (1 << vmid) | | ||
| 782 | (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ | ||
| 783 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | | ||
| 784 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | | ||
| 785 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | | ||
| 786 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | | ||
| 787 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; | ||
| 788 | |||
| 789 | mutex_lock(&adev->srbm_mutex); | ||
| 790 | |||
| 791 | /* Use legacy mode tlb invalidation. | ||
| 792 | * | ||
| 793 | * Currently on Raven the code below is broken for anything but | ||
| 794 | * legacy mode due to a MMHUB power gating problem. A workaround | ||
| 795 | * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ | ||
| 796 | * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack | ||
| 797 | * bit. | ||
| 798 | * | ||
| 799 | * TODO 1: agree on the right set of invalidation registers for | ||
| 800 | * KFD use. Use the last one for now. Invalidate both GC and | ||
| 801 | * MMHUB. | ||
| 802 | * | ||
| 803 | * TODO 2: support range-based invalidation, requires kfg2kgd | ||
| 804 | * interface change | ||
| 805 | */ | ||
| 806 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), | ||
| 807 | 0xffffffff); | ||
| 808 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), | ||
| 809 | 0x0000001f); | ||
| 810 | |||
| 811 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 812 | mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), | ||
| 813 | 0xffffffff); | ||
| 814 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 815 | mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), | ||
| 816 | 0x0000001f); | ||
| 817 | |||
| 818 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); | ||
| 819 | |||
| 820 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), | ||
| 821 | req); | ||
| 822 | |||
| 823 | while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & | ||
| 824 | (1 << vmid))) | ||
| 825 | cpu_relax(); | ||
| 826 | |||
| 827 | while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
| 828 | mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & | ||
| 829 | (1 << vmid))) | ||
| 830 | cpu_relax(); | ||
| 831 | |||
| 832 | mutex_unlock(&adev->srbm_mutex); | ||
| 833 | |||
| 834 | } | ||
| 835 | |||
| 836 | static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) | ||
| 837 | { | ||
| 838 | signed long r; | ||
| 839 | uint32_t seq; | ||
| 840 | struct amdgpu_ring *ring = &adev->gfx.kiq.ring; | ||
| 841 | |||
| 842 | spin_lock(&adev->gfx.kiq.ring_lock); | ||
| 843 | amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ | ||
| 844 | amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); | ||
| 845 | amdgpu_ring_write(ring, | ||
| 846 | PACKET3_INVALIDATE_TLBS_DST_SEL(1) | | ||
| 847 | PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | | ||
| 848 | PACKET3_INVALIDATE_TLBS_PASID(pasid) | | ||
| 849 | PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ | ||
| 850 | amdgpu_fence_emit_polling(ring, &seq); | ||
| 851 | amdgpu_ring_commit(ring); | ||
| 852 | spin_unlock(&adev->gfx.kiq.ring_lock); | ||
| 853 | |||
| 854 | r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); | ||
| 855 | if (r < 1) { | ||
| 856 | DRM_ERROR("wait for kiq fence error: %ld.\n", r); | ||
| 857 | return -ETIME; | ||
| 858 | } | ||
| 859 | |||
| 860 | return 0; | ||
| 861 | } | ||
| 862 | |||
| 863 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) | ||
| 864 | { | ||
| 865 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
| 866 | int vmid; | ||
| 867 | struct amdgpu_ring *ring = &adev->gfx.kiq.ring; | ||
| 868 | |||
| 869 | if (ring->ready) | ||
| 870 | return invalidate_tlbs_with_kiq(adev, pasid); | ||
| 871 | |||
| 872 | for (vmid = 0; vmid < 16; vmid++) { | ||
| 873 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) | ||
| 874 | continue; | ||
| 875 | if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { | ||
| 876 | if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) | ||
| 877 | == pasid) { | ||
| 878 | write_vmid_invalidate_request(kgd, vmid); | ||
| 879 | break; | ||
| 880 | } | ||
| 881 | } | ||
| 882 | } | ||
| 883 | |||
| 884 | return 0; | ||
| 885 | } | ||
| 886 | |||
| 887 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) | ||
| 888 | { | ||
| 889 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
| 890 | |||
| 891 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | ||
| 892 | pr_err("non kfd vmid %d\n", vmid); | ||
| 893 | return 0; | ||
| 894 | } | ||
| 895 | |||
| 896 | write_vmid_invalidate_request(kgd, vmid); | ||
| 897 | return 0; | ||
| 898 | } | ||
| 899 | |||
| 900 | static int kgd_address_watch_disable(struct kgd_dev *kgd) | ||
| 901 | { | ||
| 902 | return 0; | ||
| 903 | } | ||
| 904 | |||
| 905 | static int kgd_address_watch_execute(struct kgd_dev *kgd, | ||
| 906 | unsigned int watch_point_id, | ||
| 907 | uint32_t cntl_val, | ||
| 908 | uint32_t addr_hi, | ||
| 909 | uint32_t addr_lo) | ||
| 910 | { | ||
| 911 | return 0; | ||
| 912 | } | ||
| 913 | |||
| 914 | static int kgd_wave_control_execute(struct kgd_dev *kgd, | ||
| 915 | uint32_t gfx_index_val, | ||
| 916 | uint32_t sq_cmd) | ||
| 917 | { | ||
| 918 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 919 | uint32_t data = 0; | ||
| 920 | |||
| 921 | mutex_lock(&adev->grbm_idx_mutex); | ||
| 922 | |||
| 923 | WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); | ||
| 924 | WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); | ||
| 925 | |||
| 926 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | ||
| 927 | INSTANCE_BROADCAST_WRITES, 1); | ||
| 928 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | ||
| 929 | SH_BROADCAST_WRITES, 1); | ||
| 930 | data = REG_SET_FIELD(data, GRBM_GFX_INDEX, | ||
| 931 | SE_BROADCAST_WRITES, 1); | ||
| 932 | |||
| 933 | WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); | ||
| 934 | mutex_unlock(&adev->grbm_idx_mutex); | ||
| 935 | |||
| 936 | return 0; | ||
| 937 | } | ||
| 938 | |||
| 939 | static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, | ||
| 940 | unsigned int watch_point_id, | ||
| 941 | unsigned int reg_offset) | ||
| 942 | { | ||
| 943 | return 0; | ||
| 944 | } | ||
| 945 | |||
| 946 | static void set_scratch_backing_va(struct kgd_dev *kgd, | ||
| 947 | uint64_t va, uint32_t vmid) | ||
| 948 | { | ||
| 949 | /* No longer needed on GFXv9. The scratch base address is | ||
| 950 | * passed to the shader by the CP. It's the user mode driver's | ||
| 951 | * responsibility. | ||
| 952 | */ | ||
| 953 | } | ||
| 954 | |||
| 955 | /* FIXME: Does this need to be ASIC-specific code? */ | ||
| 956 | static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) | ||
| 957 | { | ||
| 958 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | ||
| 959 | const union amdgpu_firmware_header *hdr; | ||
| 960 | |||
| 961 | switch (type) { | ||
| 962 | case KGD_ENGINE_PFP: | ||
| 963 | hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; | ||
| 964 | break; | ||
| 965 | |||
| 966 | case KGD_ENGINE_ME: | ||
| 967 | hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; | ||
| 968 | break; | ||
| 969 | |||
| 970 | case KGD_ENGINE_CE: | ||
| 971 | hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; | ||
| 972 | break; | ||
| 973 | |||
| 974 | case KGD_ENGINE_MEC1: | ||
| 975 | hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; | ||
| 976 | break; | ||
| 977 | |||
| 978 | case KGD_ENGINE_MEC2: | ||
| 979 | hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; | ||
| 980 | break; | ||
| 981 | |||
| 982 | case KGD_ENGINE_RLC: | ||
| 983 | hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; | ||
| 984 | break; | ||
| 985 | |||
| 986 | case KGD_ENGINE_SDMA1: | ||
| 987 | hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; | ||
| 988 | break; | ||
| 989 | |||
| 990 | case KGD_ENGINE_SDMA2: | ||
| 991 | hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; | ||
| 992 | break; | ||
| 993 | |||
| 994 | default: | ||
| 995 | return 0; | ||
| 996 | } | ||
| 997 | |||
| 998 | if (hdr == NULL) | ||
| 999 | return 0; | ||
| 1000 | |||
| 1001 | /* Only 12 bit in use*/ | ||
| 1002 | return hdr->common.ucode_version; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | ||
| 1006 | uint32_t page_table_base) | ||
| 1007 | { | ||
| 1008 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
| 1009 | uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | | ||
| 1010 | AMDGPU_PTE_VALID; | ||
| 1011 | |||
| 1012 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | ||
| 1013 | pr_err("trying to set page table base for wrong VMID %u\n", | ||
| 1014 | vmid); | ||
| 1015 | return; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /* TODO: take advantage of per-process address space size. For | ||
| 1019 | * now, all processes share the same address space size, like | ||
| 1020 | * on GFX8 and older. | ||
| 1021 | */ | ||
| 1022 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); | ||
| 1023 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); | ||
| 1024 | |||
| 1025 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), | ||
| 1026 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | ||
| 1027 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), | ||
| 1028 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | ||
| 1029 | |||
| 1030 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); | ||
| 1031 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); | ||
| 1032 | |||
| 1033 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); | ||
| 1034 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); | ||
| 1035 | |||
| 1036 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), | ||
| 1037 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | ||
| 1038 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), | ||
| 1039 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | ||
| 1040 | |||
| 1041 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); | ||
| 1042 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); | ||
| 1043 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1d6e1479da38..ff8fd75f7ca5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |||
| @@ -23,6 +23,8 @@ | |||
| 23 | #define pr_fmt(fmt) "kfd2kgd: " fmt | 23 | #define pr_fmt(fmt) "kfd2kgd: " fmt |
| 24 | 24 | ||
| 25 | #include <linux/list.h> | 25 | #include <linux/list.h> |
| 26 | #include <linux/pagemap.h> | ||
| 27 | #include <linux/sched/mm.h> | ||
| 26 | #include <drm/drmP.h> | 28 | #include <drm/drmP.h> |
| 27 | #include "amdgpu_object.h" | 29 | #include "amdgpu_object.h" |
| 28 | #include "amdgpu_vm.h" | 30 | #include "amdgpu_vm.h" |
| @@ -33,10 +35,20 @@ | |||
| 33 | */ | 35 | */ |
| 34 | #define VI_BO_SIZE_ALIGN (0x8000) | 36 | #define VI_BO_SIZE_ALIGN (0x8000) |
| 35 | 37 | ||
| 38 | /* BO flag to indicate a KFD userptr BO */ | ||
| 39 | #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) | ||
| 40 | |||
| 41 | /* Userptr restore delay, just long enough to allow consecutive VM | ||
| 42 | * changes to accumulate | ||
| 43 | */ | ||
| 44 | #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 | ||
| 45 | |||
| 36 | /* Impose limit on how much memory KFD can use */ | 46 | /* Impose limit on how much memory KFD can use */ |
| 37 | static struct { | 47 | static struct { |
| 38 | uint64_t max_system_mem_limit; | 48 | uint64_t max_system_mem_limit; |
| 49 | uint64_t max_userptr_mem_limit; | ||
| 39 | int64_t system_mem_used; | 50 | int64_t system_mem_used; |
| 51 | int64_t userptr_mem_used; | ||
| 40 | spinlock_t mem_limit_lock; | 52 | spinlock_t mem_limit_lock; |
| 41 | } kfd_mem_limit; | 53 | } kfd_mem_limit; |
| 42 | 54 | ||
| @@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = { | |||
| 57 | 69 | ||
| 58 | #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] | 70 | #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] |
| 59 | 71 | ||
| 72 | static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); | ||
| 60 | 73 | ||
| 61 | 74 | ||
| 62 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) | 75 | static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) |
| @@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, | |||
| 78 | 91 | ||
| 79 | /* Set memory usage limits. Current, limits are | 92 | /* Set memory usage limits. Current, limits are |
| 80 | * System (kernel) memory - 3/8th System RAM | 93 | * System (kernel) memory - 3/8th System RAM |
| 94 | * Userptr memory - 3/4th System RAM | ||
| 81 | */ | 95 | */ |
| 82 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | 96 | void amdgpu_amdkfd_gpuvm_init_mem_limits(void) |
| 83 | { | 97 | { |
| @@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) | |||
| 90 | 104 | ||
| 91 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); | 105 | spin_lock_init(&kfd_mem_limit.mem_limit_lock); |
| 92 | kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); | 106 | kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); |
| 93 | pr_debug("Kernel memory limit %lluM\n", | 107 | kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); |
| 94 | (kfd_mem_limit.max_system_mem_limit >> 20)); | 108 | pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", |
| 109 | (kfd_mem_limit.max_system_mem_limit >> 20), | ||
| 110 | (kfd_mem_limit.max_userptr_mem_limit >> 20)); | ||
| 95 | } | 111 | } |
| 96 | 112 | ||
| 97 | static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, | 113 | static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, |
| @@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, | |||
| 111 | goto err_no_mem; | 127 | goto err_no_mem; |
| 112 | } | 128 | } |
| 113 | kfd_mem_limit.system_mem_used += (acc_size + size); | 129 | kfd_mem_limit.system_mem_used += (acc_size + size); |
| 130 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { | ||
| 131 | if ((kfd_mem_limit.system_mem_used + acc_size > | ||
| 132 | kfd_mem_limit.max_system_mem_limit) || | ||
| 133 | (kfd_mem_limit.userptr_mem_used + (size + acc_size) > | ||
| 134 | kfd_mem_limit.max_userptr_mem_limit)) { | ||
| 135 | ret = -ENOMEM; | ||
| 136 | goto err_no_mem; | ||
| 137 | } | ||
| 138 | kfd_mem_limit.system_mem_used += acc_size; | ||
| 139 | kfd_mem_limit.userptr_mem_used += size; | ||
| 114 | } | 140 | } |
| 115 | err_no_mem: | 141 | err_no_mem: |
| 116 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 142 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
| @@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, | |||
| 126 | sizeof(struct amdgpu_bo)); | 152 | sizeof(struct amdgpu_bo)); |
| 127 | 153 | ||
| 128 | spin_lock(&kfd_mem_limit.mem_limit_lock); | 154 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
| 129 | if (domain == AMDGPU_GEM_DOMAIN_GTT) | 155 | if (domain == AMDGPU_GEM_DOMAIN_GTT) { |
| 130 | kfd_mem_limit.system_mem_used -= (acc_size + size); | 156 | kfd_mem_limit.system_mem_used -= (acc_size + size); |
| 157 | } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { | ||
| 158 | kfd_mem_limit.system_mem_used -= acc_size; | ||
| 159 | kfd_mem_limit.userptr_mem_used -= size; | ||
| 160 | } | ||
| 131 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | 161 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
| 132 | "kfd system memory accounting unbalanced"); | 162 | "kfd system memory accounting unbalanced"); |
| 163 | WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, | ||
| 164 | "kfd userptr memory accounting unbalanced"); | ||
| 133 | 165 | ||
| 134 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 166 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
| 135 | } | 167 | } |
| @@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) | |||
| 138 | { | 170 | { |
| 139 | spin_lock(&kfd_mem_limit.mem_limit_lock); | 171 | spin_lock(&kfd_mem_limit.mem_limit_lock); |
| 140 | 172 | ||
| 141 | if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { | 173 | if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { |
| 174 | kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; | ||
| 175 | kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); | ||
| 176 | } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { | ||
| 142 | kfd_mem_limit.system_mem_used -= | 177 | kfd_mem_limit.system_mem_used -= |
| 143 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); | 178 | (bo->tbo.acc_size + amdgpu_bo_size(bo)); |
| 144 | } | 179 | } |
| 145 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, | 180 | WARN_ONCE(kfd_mem_limit.system_mem_used < 0, |
| 146 | "kfd system memory accounting unbalanced"); | 181 | "kfd system memory accounting unbalanced"); |
| 182 | WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, | ||
| 183 | "kfd userptr memory accounting unbalanced"); | ||
| 147 | 184 | ||
| 148 | spin_unlock(&kfd_mem_limit.mem_limit_lock); | 185 | spin_unlock(&kfd_mem_limit.mem_limit_lock); |
| 149 | } | 186 | } |
| @@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, | |||
| 506 | } | 543 | } |
| 507 | 544 | ||
| 508 | static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, | 545 | static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, |
| 509 | struct amdkfd_process_info *process_info) | 546 | struct amdkfd_process_info *process_info, |
| 547 | bool userptr) | ||
| 510 | { | 548 | { |
| 511 | struct ttm_validate_buffer *entry = &mem->validate_list; | 549 | struct ttm_validate_buffer *entry = &mem->validate_list; |
| 512 | struct amdgpu_bo *bo = mem->bo; | 550 | struct amdgpu_bo *bo = mem->bo; |
| @@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, | |||
| 515 | entry->shared = true; | 553 | entry->shared = true; |
| 516 | entry->bo = &bo->tbo; | 554 | entry->bo = &bo->tbo; |
| 517 | mutex_lock(&process_info->lock); | 555 | mutex_lock(&process_info->lock); |
| 518 | list_add_tail(&entry->head, &process_info->kfd_bo_list); | 556 | if (userptr) |
| 557 | list_add_tail(&entry->head, &process_info->userptr_valid_list); | ||
| 558 | else | ||
| 559 | list_add_tail(&entry->head, &process_info->kfd_bo_list); | ||
| 519 | mutex_unlock(&process_info->lock); | 560 | mutex_unlock(&process_info->lock); |
| 520 | } | 561 | } |
| 521 | 562 | ||
| 563 | /* Initializes user pages. It registers the MMU notifier and validates | ||
| 564 | * the userptr BO in the GTT domain. | ||
| 565 | * | ||
| 566 | * The BO must already be on the userptr_valid_list. Otherwise an | ||
| 567 | * eviction and restore may happen that leaves the new BO unmapped | ||
| 568 | * with the user mode queues running. | ||
| 569 | * | ||
| 570 | * Takes the process_info->lock to protect against concurrent restore | ||
| 571 | * workers. | ||
| 572 | * | ||
| 573 | * Returns 0 for success, negative errno for errors. | ||
| 574 | */ | ||
| 575 | static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, | ||
| 576 | uint64_t user_addr) | ||
| 577 | { | ||
| 578 | struct amdkfd_process_info *process_info = mem->process_info; | ||
| 579 | struct amdgpu_bo *bo = mem->bo; | ||
| 580 | struct ttm_operation_ctx ctx = { true, false }; | ||
| 581 | int ret = 0; | ||
| 582 | |||
| 583 | mutex_lock(&process_info->lock); | ||
| 584 | |||
| 585 | ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); | ||
| 586 | if (ret) { | ||
| 587 | pr_err("%s: Failed to set userptr: %d\n", __func__, ret); | ||
| 588 | goto out; | ||
| 589 | } | ||
| 590 | |||
| 591 | ret = amdgpu_mn_register(bo, user_addr); | ||
| 592 | if (ret) { | ||
| 593 | pr_err("%s: Failed to register MMU notifier: %d\n", | ||
| 594 | __func__, ret); | ||
| 595 | goto out; | ||
| 596 | } | ||
| 597 | |||
| 598 | /* If no restore worker is running concurrently, user_pages | ||
| 599 | * should not be allocated | ||
| 600 | */ | ||
| 601 | WARN(mem->user_pages, "Leaking user_pages array"); | ||
| 602 | |||
| 603 | mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, | ||
| 604 | sizeof(struct page *), | ||
| 605 | GFP_KERNEL | __GFP_ZERO); | ||
| 606 | if (!mem->user_pages) { | ||
| 607 | pr_err("%s: Failed to allocate pages array\n", __func__); | ||
| 608 | ret = -ENOMEM; | ||
| 609 | goto unregister_out; | ||
| 610 | } | ||
| 611 | |||
| 612 | ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); | ||
| 613 | if (ret) { | ||
| 614 | pr_err("%s: Failed to get user pages: %d\n", __func__, ret); | ||
| 615 | goto free_out; | ||
| 616 | } | ||
| 617 | |||
| 618 | amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); | ||
| 619 | |||
| 620 | ret = amdgpu_bo_reserve(bo, true); | ||
| 621 | if (ret) { | ||
| 622 | pr_err("%s: Failed to reserve BO\n", __func__); | ||
| 623 | goto release_out; | ||
| 624 | } | ||
| 625 | amdgpu_ttm_placement_from_domain(bo, mem->domain); | ||
| 626 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
| 627 | if (ret) | ||
| 628 | pr_err("%s: failed to validate BO\n", __func__); | ||
| 629 | amdgpu_bo_unreserve(bo); | ||
| 630 | |||
| 631 | release_out: | ||
| 632 | if (ret) | ||
| 633 | release_pages(mem->user_pages, bo->tbo.ttm->num_pages); | ||
| 634 | free_out: | ||
| 635 | kvfree(mem->user_pages); | ||
| 636 | mem->user_pages = NULL; | ||
| 637 | unregister_out: | ||
| 638 | if (ret) | ||
| 639 | amdgpu_mn_unregister(bo); | ||
| 640 | out: | ||
| 641 | mutex_unlock(&process_info->lock); | ||
| 642 | return ret; | ||
| 643 | } | ||
| 644 | |||
| 522 | /* Reserving a BO and its page table BOs must happen atomically to | 645 | /* Reserving a BO and its page table BOs must happen atomically to |
| 523 | * avoid deadlocks. Some operations update multiple VMs at once. Track | 646 | * avoid deadlocks. Some operations update multiple VMs at once. Track |
| 524 | * all the reservation info in a context structure. Optionally a sync | 647 | * all the reservation info in a context structure. Optionally a sync |
| @@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, | |||
| 748 | } | 871 | } |
| 749 | 872 | ||
| 750 | static int map_bo_to_gpuvm(struct amdgpu_device *adev, | 873 | static int map_bo_to_gpuvm(struct amdgpu_device *adev, |
| 751 | struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) | 874 | struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, |
| 875 | bool no_update_pte) | ||
| 752 | { | 876 | { |
| 753 | int ret; | 877 | int ret; |
| 754 | 878 | ||
| @@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, | |||
| 762 | return ret; | 886 | return ret; |
| 763 | } | 887 | } |
| 764 | 888 | ||
| 889 | if (no_update_pte) | ||
| 890 | return 0; | ||
| 891 | |||
| 765 | ret = update_gpuvm_pte(adev, entry, sync); | 892 | ret = update_gpuvm_pte(adev, entry, sync); |
| 766 | if (ret) { | 893 | if (ret) { |
| 767 | pr_err("update_gpuvm_pte() failed\n"); | 894 | pr_err("update_gpuvm_pte() failed\n"); |
| @@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, | |||
| 820 | mutex_init(&info->lock); | 947 | mutex_init(&info->lock); |
| 821 | INIT_LIST_HEAD(&info->vm_list_head); | 948 | INIT_LIST_HEAD(&info->vm_list_head); |
| 822 | INIT_LIST_HEAD(&info->kfd_bo_list); | 949 | INIT_LIST_HEAD(&info->kfd_bo_list); |
| 950 | INIT_LIST_HEAD(&info->userptr_valid_list); | ||
| 951 | INIT_LIST_HEAD(&info->userptr_inval_list); | ||
| 823 | 952 | ||
| 824 | info->eviction_fence = | 953 | info->eviction_fence = |
| 825 | amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), | 954 | amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), |
| @@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, | |||
| 830 | goto create_evict_fence_fail; | 959 | goto create_evict_fence_fail; |
| 831 | } | 960 | } |
| 832 | 961 | ||
| 962 | info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); | ||
| 963 | atomic_set(&info->evicted_bos, 0); | ||
| 964 | INIT_DELAYED_WORK(&info->restore_userptr_work, | ||
| 965 | amdgpu_amdkfd_restore_userptr_worker); | ||
| 966 | |||
| 833 | *process_info = info; | 967 | *process_info = info; |
| 834 | *ef = dma_fence_get(&info->eviction_fence->base); | 968 | *ef = dma_fence_get(&info->eviction_fence->base); |
| 835 | } | 969 | } |
| @@ -872,6 +1006,7 @@ reserve_pd_fail: | |||
| 872 | dma_fence_put(*ef); | 1006 | dma_fence_put(*ef); |
| 873 | *ef = NULL; | 1007 | *ef = NULL; |
| 874 | *process_info = NULL; | 1008 | *process_info = NULL; |
| 1009 | put_pid(info->pid); | ||
| 875 | create_evict_fence_fail: | 1010 | create_evict_fence_fail: |
| 876 | mutex_destroy(&info->lock); | 1011 | mutex_destroy(&info->lock); |
| 877 | kfree(info); | 1012 | kfree(info); |
| @@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, | |||
| 967 | /* Release per-process resources when last compute VM is destroyed */ | 1102 | /* Release per-process resources when last compute VM is destroyed */ |
| 968 | if (!process_info->n_vms) { | 1103 | if (!process_info->n_vms) { |
| 969 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); | 1104 | WARN_ON(!list_empty(&process_info->kfd_bo_list)); |
| 1105 | WARN_ON(!list_empty(&process_info->userptr_valid_list)); | ||
| 1106 | WARN_ON(!list_empty(&process_info->userptr_inval_list)); | ||
| 970 | 1107 | ||
| 971 | dma_fence_put(&process_info->eviction_fence->base); | 1108 | dma_fence_put(&process_info->eviction_fence->base); |
| 1109 | cancel_delayed_work_sync(&process_info->restore_userptr_work); | ||
| 1110 | put_pid(process_info->pid); | ||
| 972 | mutex_destroy(&process_info->lock); | 1111 | mutex_destroy(&process_info->lock); |
| 973 | kfree(process_info); | 1112 | kfree(process_info); |
| 974 | } | 1113 | } |
| @@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
| 1003 | { | 1142 | { |
| 1004 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 1143 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
| 1005 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; | 1144 | struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; |
| 1145 | uint64_t user_addr = 0; | ||
| 1006 | struct amdgpu_bo *bo; | 1146 | struct amdgpu_bo *bo; |
| 1147 | struct amdgpu_bo_param bp; | ||
| 1007 | int byte_align; | 1148 | int byte_align; |
| 1008 | u32 alloc_domain; | 1149 | u32 domain, alloc_domain; |
| 1009 | u64 alloc_flags; | 1150 | u64 alloc_flags; |
| 1010 | uint32_t mapping_flags; | 1151 | uint32_t mapping_flags; |
| 1011 | int ret; | 1152 | int ret; |
| @@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
| 1014 | * Check on which domain to allocate BO | 1155 | * Check on which domain to allocate BO |
| 1015 | */ | 1156 | */ |
| 1016 | if (flags & ALLOC_MEM_FLAGS_VRAM) { | 1157 | if (flags & ALLOC_MEM_FLAGS_VRAM) { |
| 1017 | alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; | 1158 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; |
| 1018 | alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; | 1159 | alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; |
| 1019 | alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? | 1160 | alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? |
| 1020 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : | 1161 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : |
| 1021 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS; | 1162 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS; |
| 1022 | } else if (flags & ALLOC_MEM_FLAGS_GTT) { | 1163 | } else if (flags & ALLOC_MEM_FLAGS_GTT) { |
| 1023 | alloc_domain = AMDGPU_GEM_DOMAIN_GTT; | 1164 | domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; |
| 1165 | alloc_flags = 0; | ||
| 1166 | } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { | ||
| 1167 | domain = AMDGPU_GEM_DOMAIN_GTT; | ||
| 1168 | alloc_domain = AMDGPU_GEM_DOMAIN_CPU; | ||
| 1024 | alloc_flags = 0; | 1169 | alloc_flags = 0; |
| 1170 | if (!offset || !*offset) | ||
| 1171 | return -EINVAL; | ||
| 1172 | user_addr = *offset; | ||
| 1025 | } else { | 1173 | } else { |
| 1026 | return -EINVAL; | 1174 | return -EINVAL; |
| 1027 | } | 1175 | } |
| @@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
| 1069 | pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", | 1217 | pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", |
| 1070 | va, size, domain_string(alloc_domain)); | 1218 | va, size, domain_string(alloc_domain)); |
| 1071 | 1219 | ||
| 1072 | ret = amdgpu_bo_create(adev, size, byte_align, | 1220 | memset(&bp, 0, sizeof(bp)); |
| 1073 | alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo); | 1221 | bp.size = size; |
| 1222 | bp.byte_align = byte_align; | ||
| 1223 | bp.domain = alloc_domain; | ||
| 1224 | bp.flags = alloc_flags; | ||
| 1225 | bp.type = ttm_bo_type_device; | ||
| 1226 | bp.resv = NULL; | ||
| 1227 | ret = amdgpu_bo_create(adev, &bp, &bo); | ||
| 1074 | if (ret) { | 1228 | if (ret) { |
| 1075 | pr_debug("Failed to create BO on domain %s. ret %d\n", | 1229 | pr_debug("Failed to create BO on domain %s. ret %d\n", |
| 1076 | domain_string(alloc_domain), ret); | 1230 | domain_string(alloc_domain), ret); |
| @@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( | |||
| 1078 | } | 1232 | } |
| 1079 | bo->kfd_bo = *mem; | 1233 | bo->kfd_bo = *mem; |
| 1080 | (*mem)->bo = bo; | 1234 | (*mem)->bo = bo; |
| 1235 | if (user_addr) | ||
| 1236 | bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; | ||
| 1081 | 1237 | ||
| 1082 | (*mem)->va = va; | 1238 | (*mem)->va = va; |
| 1083 | (*mem)->domain = alloc_domain; | 1239 | (*mem)->domain = domain; |
| 1084 | (*mem)->mapped_to_gpu_memory = 0; | 1240 | (*mem)->mapped_to_gpu_memory = 0; |
| 1085 | (*mem)->process_info = avm->process_info; | 1241 | (*mem)->process_info = avm->process_info; |
| 1086 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); | 1242 | add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); |
| 1243 | |||
| 1244 | if (user_addr) { | ||
| 1245 | ret = init_user_pages(*mem, current->mm, user_addr); | ||
| 1246 | if (ret) { | ||
| 1247 | mutex_lock(&avm->process_info->lock); | ||
| 1248 | list_del(&(*mem)->validate_list.head); | ||
| 1249 | mutex_unlock(&avm->process_info->lock); | ||
| 1250 | goto allocate_init_user_pages_failed; | ||
| 1251 | } | ||
| 1252 | } | ||
| 1087 | 1253 | ||
| 1088 | if (offset) | 1254 | if (offset) |
| 1089 | *offset = amdgpu_bo_mmap_offset(bo); | 1255 | *offset = amdgpu_bo_mmap_offset(bo); |
| 1090 | 1256 | ||
| 1091 | return 0; | 1257 | return 0; |
| 1092 | 1258 | ||
| 1259 | allocate_init_user_pages_failed: | ||
| 1260 | amdgpu_bo_unref(&bo); | ||
| 1261 | /* Don't unreserve system mem limit twice */ | ||
| 1262 | goto err_reserve_system_mem; | ||
| 1093 | err_bo_create: | 1263 | err_bo_create: |
| 1094 | unreserve_system_mem_limit(adev, size, alloc_domain); | 1264 | unreserve_system_mem_limit(adev, size, alloc_domain); |
| 1095 | err_reserve_system_mem: | 1265 | err_reserve_system_mem: |
| @@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( | |||
| 1122 | * be freed anyway | 1292 | * be freed anyway |
| 1123 | */ | 1293 | */ |
| 1124 | 1294 | ||
| 1295 | /* No more MMU notifiers */ | ||
| 1296 | amdgpu_mn_unregister(mem->bo); | ||
| 1297 | |||
| 1125 | /* Make sure restore workers don't access the BO any more */ | 1298 | /* Make sure restore workers don't access the BO any more */ |
| 1126 | bo_list_entry = &mem->validate_list; | 1299 | bo_list_entry = &mem->validate_list; |
| 1127 | mutex_lock(&process_info->lock); | 1300 | mutex_lock(&process_info->lock); |
| 1128 | list_del(&bo_list_entry->head); | 1301 | list_del(&bo_list_entry->head); |
| 1129 | mutex_unlock(&process_info->lock); | 1302 | mutex_unlock(&process_info->lock); |
| 1130 | 1303 | ||
| 1304 | /* Free user pages if necessary */ | ||
| 1305 | if (mem->user_pages) { | ||
| 1306 | pr_debug("%s: Freeing user_pages array\n", __func__); | ||
| 1307 | if (mem->user_pages[0]) | ||
| 1308 | release_pages(mem->user_pages, | ||
| 1309 | mem->bo->tbo.ttm->num_pages); | ||
| 1310 | kvfree(mem->user_pages); | ||
| 1311 | } | ||
| 1312 | |||
| 1131 | ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); | 1313 | ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); |
| 1132 | if (unlikely(ret)) | 1314 | if (unlikely(ret)) |
| 1133 | return ret; | 1315 | return ret; |
| @@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
| 1173 | struct kfd_bo_va_list *bo_va_entry = NULL; | 1355 | struct kfd_bo_va_list *bo_va_entry = NULL; |
| 1174 | struct kfd_bo_va_list *bo_va_entry_aql = NULL; | 1356 | struct kfd_bo_va_list *bo_va_entry_aql = NULL; |
| 1175 | unsigned long bo_size; | 1357 | unsigned long bo_size; |
| 1176 | 1358 | bool is_invalid_userptr = false; | |
| 1177 | /* Make sure restore is not running concurrently. | ||
| 1178 | */ | ||
| 1179 | mutex_lock(&mem->process_info->lock); | ||
| 1180 | |||
| 1181 | mutex_lock(&mem->lock); | ||
| 1182 | 1359 | ||
| 1183 | bo = mem->bo; | 1360 | bo = mem->bo; |
| 1184 | |||
| 1185 | if (!bo) { | 1361 | if (!bo) { |
| 1186 | pr_err("Invalid BO when mapping memory to GPU\n"); | 1362 | pr_err("Invalid BO when mapping memory to GPU\n"); |
| 1187 | ret = -EINVAL; | 1363 | return -EINVAL; |
| 1188 | goto out; | ||
| 1189 | } | 1364 | } |
| 1190 | 1365 | ||
| 1366 | /* Make sure restore is not running concurrently. Since we | ||
| 1367 | * don't map invalid userptr BOs, we rely on the next restore | ||
| 1368 | * worker to do the mapping | ||
| 1369 | */ | ||
| 1370 | mutex_lock(&mem->process_info->lock); | ||
| 1371 | |||
| 1372 | /* Lock mmap-sem. If we find an invalid userptr BO, we can be | ||
| 1373 | * sure that the MMU notifier is no longer running | ||
| 1374 | * concurrently and the queues are actually stopped | ||
| 1375 | */ | ||
| 1376 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | ||
| 1377 | down_write(¤t->mm->mmap_sem); | ||
| 1378 | is_invalid_userptr = atomic_read(&mem->invalid); | ||
| 1379 | up_write(¤t->mm->mmap_sem); | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | mutex_lock(&mem->lock); | ||
| 1383 | |||
| 1191 | domain = mem->domain; | 1384 | domain = mem->domain; |
| 1192 | bo_size = bo->tbo.mem.size; | 1385 | bo_size = bo->tbo.mem.size; |
| 1193 | 1386 | ||
| @@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
| 1200 | if (unlikely(ret)) | 1393 | if (unlikely(ret)) |
| 1201 | goto out; | 1394 | goto out; |
| 1202 | 1395 | ||
| 1396 | /* Userptr can be marked as "not invalid", but not actually be | ||
| 1397 | * validated yet (still in the system domain). In that case | ||
| 1398 | * the queues are still stopped and we can leave mapping for | ||
| 1399 | * the next restore worker | ||
| 1400 | */ | ||
| 1401 | if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) | ||
| 1402 | is_invalid_userptr = true; | ||
| 1403 | |||
| 1203 | if (check_if_add_bo_to_vm(avm, mem)) { | 1404 | if (check_if_add_bo_to_vm(avm, mem)) { |
| 1204 | ret = add_bo_to_vm(adev, mem, avm, false, | 1405 | ret = add_bo_to_vm(adev, mem, avm, false, |
| 1205 | &bo_va_entry); | 1406 | &bo_va_entry); |
| @@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
| 1217 | goto add_bo_to_vm_failed; | 1418 | goto add_bo_to_vm_failed; |
| 1218 | } | 1419 | } |
| 1219 | 1420 | ||
| 1220 | if (mem->mapped_to_gpu_memory == 0) { | 1421 | if (mem->mapped_to_gpu_memory == 0 && |
| 1422 | !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | ||
| 1221 | /* Validate BO only once. The eviction fence gets added to BO | 1423 | /* Validate BO only once. The eviction fence gets added to BO |
| 1222 | * the first time it is mapped. Validate will wait for all | 1424 | * the first time it is mapped. Validate will wait for all |
| 1223 | * background evictions to complete. | 1425 | * background evictions to complete. |
| @@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( | |||
| 1235 | entry->va, entry->va + bo_size, | 1437 | entry->va, entry->va + bo_size, |
| 1236 | entry); | 1438 | entry); |
| 1237 | 1439 | ||
| 1238 | ret = map_bo_to_gpuvm(adev, entry, ctx.sync); | 1440 | ret = map_bo_to_gpuvm(adev, entry, ctx.sync, |
| 1441 | is_invalid_userptr); | ||
| 1239 | if (ret) { | 1442 | if (ret) { |
| 1240 | pr_err("Failed to map radeon bo to gpuvm\n"); | 1443 | pr_err("Failed to map radeon bo to gpuvm\n"); |
| 1241 | goto map_bo_to_gpuvm_failed; | 1444 | goto map_bo_to_gpuvm_failed; |
| @@ -1418,6 +1621,337 @@ bo_reserve_failed: | |||
| 1418 | return ret; | 1621 | return ret; |
| 1419 | } | 1622 | } |
| 1420 | 1623 | ||
| 1624 | /* Evict a userptr BO by stopping the queues if necessary | ||
| 1625 | * | ||
| 1626 | * Runs in MMU notifier, may be in RECLAIM_FS context. This means it | ||
| 1627 | * cannot do any memory allocations, and cannot take any locks that | ||
| 1628 | * are held elsewhere while allocating memory. Therefore this is as | ||
| 1629 | * simple as possible, using atomic counters. | ||
| 1630 | * | ||
| 1631 | * It doesn't do anything to the BO itself. The real work happens in | ||
| 1632 | * restore, where we get updated page addresses. This function only | ||
| 1633 | * ensures that GPU access to the BO is stopped. | ||
| 1634 | */ | ||
| 1635 | int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, | ||
| 1636 | struct mm_struct *mm) | ||
| 1637 | { | ||
| 1638 | struct amdkfd_process_info *process_info = mem->process_info; | ||
| 1639 | int invalid, evicted_bos; | ||
| 1640 | int r = 0; | ||
| 1641 | |||
| 1642 | invalid = atomic_inc_return(&mem->invalid); | ||
| 1643 | evicted_bos = atomic_inc_return(&process_info->evicted_bos); | ||
| 1644 | if (evicted_bos == 1) { | ||
| 1645 | /* First eviction, stop the queues */ | ||
| 1646 | r = kgd2kfd->quiesce_mm(mm); | ||
| 1647 | if (r) | ||
| 1648 | pr_err("Failed to quiesce KFD\n"); | ||
| 1649 | schedule_delayed_work(&process_info->restore_userptr_work, | ||
| 1650 | msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | return r; | ||
| 1654 | } | ||
| 1655 | |||
| 1656 | /* Update invalid userptr BOs | ||
| 1657 | * | ||
| 1658 | * Moves invalidated (evicted) userptr BOs from userptr_valid_list to | ||
| 1659 | * userptr_inval_list and updates user pages for all BOs that have | ||
| 1660 | * been invalidated since their last update. | ||
| 1661 | */ | ||
| 1662 | static int update_invalid_user_pages(struct amdkfd_process_info *process_info, | ||
| 1663 | struct mm_struct *mm) | ||
| 1664 | { | ||
| 1665 | struct kgd_mem *mem, *tmp_mem; | ||
| 1666 | struct amdgpu_bo *bo; | ||
| 1667 | struct ttm_operation_ctx ctx = { false, false }; | ||
| 1668 | int invalid, ret; | ||
| 1669 | |||
| 1670 | /* Move all invalidated BOs to the userptr_inval_list and | ||
| 1671 | * release their user pages by migration to the CPU domain | ||
| 1672 | */ | ||
| 1673 | list_for_each_entry_safe(mem, tmp_mem, | ||
| 1674 | &process_info->userptr_valid_list, | ||
| 1675 | validate_list.head) { | ||
| 1676 | if (!atomic_read(&mem->invalid)) | ||
| 1677 | continue; /* BO is still valid */ | ||
| 1678 | |||
| 1679 | bo = mem->bo; | ||
| 1680 | |||
| 1681 | if (amdgpu_bo_reserve(bo, true)) | ||
| 1682 | return -EAGAIN; | ||
| 1683 | amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); | ||
| 1684 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
| 1685 | amdgpu_bo_unreserve(bo); | ||
| 1686 | if (ret) { | ||
| 1687 | pr_err("%s: Failed to invalidate userptr BO\n", | ||
| 1688 | __func__); | ||
| 1689 | return -EAGAIN; | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | list_move_tail(&mem->validate_list.head, | ||
| 1693 | &process_info->userptr_inval_list); | ||
| 1694 | } | ||
| 1695 | |||
| 1696 | if (list_empty(&process_info->userptr_inval_list)) | ||
| 1697 | return 0; /* All evicted userptr BOs were freed */ | ||
| 1698 | |||
| 1699 | /* Go through userptr_inval_list and update any invalid user_pages */ | ||
| 1700 | list_for_each_entry(mem, &process_info->userptr_inval_list, | ||
| 1701 | validate_list.head) { | ||
| 1702 | invalid = atomic_read(&mem->invalid); | ||
| 1703 | if (!invalid) | ||
| 1704 | /* BO hasn't been invalidated since the last | ||
| 1705 | * revalidation attempt. Keep its BO list. | ||
| 1706 | */ | ||
| 1707 | continue; | ||
| 1708 | |||
| 1709 | bo = mem->bo; | ||
| 1710 | |||
| 1711 | if (!mem->user_pages) { | ||
| 1712 | mem->user_pages = | ||
| 1713 | kvmalloc_array(bo->tbo.ttm->num_pages, | ||
| 1714 | sizeof(struct page *), | ||
| 1715 | GFP_KERNEL | __GFP_ZERO); | ||
| 1716 | if (!mem->user_pages) { | ||
| 1717 | pr_err("%s: Failed to allocate pages array\n", | ||
| 1718 | __func__); | ||
| 1719 | return -ENOMEM; | ||
| 1720 | } | ||
| 1721 | } else if (mem->user_pages[0]) { | ||
| 1722 | release_pages(mem->user_pages, bo->tbo.ttm->num_pages); | ||
| 1723 | } | ||
| 1724 | |||
| 1725 | /* Get updated user pages */ | ||
| 1726 | ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, | ||
| 1727 | mem->user_pages); | ||
| 1728 | if (ret) { | ||
| 1729 | mem->user_pages[0] = NULL; | ||
| 1730 | pr_info("%s: Failed to get user pages: %d\n", | ||
| 1731 | __func__, ret); | ||
| 1732 | /* Pretend it succeeded. It will fail later | ||
| 1733 | * with a VM fault if the GPU tries to access | ||
| 1734 | * it. Better than hanging indefinitely with | ||
| 1735 | * stalled user mode queues. | ||
| 1736 | */ | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | /* Mark the BO as valid unless it was invalidated | ||
| 1740 | * again concurrently | ||
| 1741 | */ | ||
| 1742 | if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) | ||
| 1743 | return -EAGAIN; | ||
| 1744 | } | ||
| 1745 | |||
| 1746 | return 0; | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | /* Validate invalid userptr BOs | ||
| 1750 | * | ||
| 1751 | * Validates BOs on the userptr_inval_list, and moves them back to the | ||
| 1752 | * userptr_valid_list. Also updates GPUVM page tables with new page | ||
| 1753 | * addresses and waits for the page table updates to complete. | ||
| 1754 | */ | ||
| 1755 | static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) | ||
| 1756 | { | ||
| 1757 | struct amdgpu_bo_list_entry *pd_bo_list_entries; | ||
| 1758 | struct list_head resv_list, duplicates; | ||
| 1759 | struct ww_acquire_ctx ticket; | ||
| 1760 | struct amdgpu_sync sync; | ||
| 1761 | |||
| 1762 | struct amdgpu_vm *peer_vm; | ||
| 1763 | struct kgd_mem *mem, *tmp_mem; | ||
| 1764 | struct amdgpu_bo *bo; | ||
| 1765 | struct ttm_operation_ctx ctx = { false, false }; | ||
| 1766 | int i, ret; | ||
| 1767 | |||
| 1768 | pd_bo_list_entries = kcalloc(process_info->n_vms, | ||
| 1769 | sizeof(struct amdgpu_bo_list_entry), | ||
| 1770 | GFP_KERNEL); | ||
| 1771 | if (!pd_bo_list_entries) { | ||
| 1772 | pr_err("%s: Failed to allocate PD BO list entries\n", __func__); | ||
| 1773 | return -ENOMEM; | ||
| 1774 | } | ||
| 1775 | |||
| 1776 | INIT_LIST_HEAD(&resv_list); | ||
| 1777 | INIT_LIST_HEAD(&duplicates); | ||
| 1778 | |||
| 1779 | /* Get all the page directory BOs that need to be reserved */ | ||
| 1780 | i = 0; | ||
| 1781 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
| 1782 | vm_list_node) | ||
| 1783 | amdgpu_vm_get_pd_bo(peer_vm, &resv_list, | ||
| 1784 | &pd_bo_list_entries[i++]); | ||
| 1785 | /* Add the userptr_inval_list entries to resv_list */ | ||
| 1786 | list_for_each_entry(mem, &process_info->userptr_inval_list, | ||
| 1787 | validate_list.head) { | ||
| 1788 | list_add_tail(&mem->resv_list.head, &resv_list); | ||
| 1789 | mem->resv_list.bo = mem->validate_list.bo; | ||
| 1790 | mem->resv_list.shared = mem->validate_list.shared; | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | /* Reserve all BOs and page tables for validation */ | ||
| 1794 | ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); | ||
| 1795 | WARN(!list_empty(&duplicates), "Duplicates should be empty"); | ||
| 1796 | if (ret) | ||
| 1797 | goto out; | ||
| 1798 | |||
| 1799 | amdgpu_sync_create(&sync); | ||
| 1800 | |||
| 1801 | /* Avoid triggering eviction fences when unmapping invalid | ||
| 1802 | * userptr BOs (waits for all fences, doesn't use | ||
| 1803 | * FENCE_OWNER_VM) | ||
| 1804 | */ | ||
| 1805 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
| 1806 | vm_list_node) | ||
| 1807 | amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, | ||
| 1808 | process_info->eviction_fence, | ||
| 1809 | NULL, NULL); | ||
| 1810 | |||
| 1811 | ret = process_validate_vms(process_info); | ||
| 1812 | if (ret) | ||
| 1813 | goto unreserve_out; | ||
| 1814 | |||
| 1815 | /* Validate BOs and update GPUVM page tables */ | ||
| 1816 | list_for_each_entry_safe(mem, tmp_mem, | ||
| 1817 | &process_info->userptr_inval_list, | ||
| 1818 | validate_list.head) { | ||
| 1819 | struct kfd_bo_va_list *bo_va_entry; | ||
| 1820 | |||
| 1821 | bo = mem->bo; | ||
| 1822 | |||
| 1823 | /* Copy pages array and validate the BO if we got user pages */ | ||
| 1824 | if (mem->user_pages[0]) { | ||
| 1825 | amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, | ||
| 1826 | mem->user_pages); | ||
| 1827 | amdgpu_ttm_placement_from_domain(bo, mem->domain); | ||
| 1828 | ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||
| 1829 | if (ret) { | ||
| 1830 | pr_err("%s: failed to validate BO\n", __func__); | ||
| 1831 | goto unreserve_out; | ||
| 1832 | } | ||
| 1833 | } | ||
| 1834 | |||
| 1835 | /* Validate succeeded, now the BO owns the pages, free | ||
| 1836 | * our copy of the pointer array. Put this BO back on | ||
| 1837 | * the userptr_valid_list. If we need to revalidate | ||
| 1838 | * it, we need to start from scratch. | ||
| 1839 | */ | ||
| 1840 | kvfree(mem->user_pages); | ||
| 1841 | mem->user_pages = NULL; | ||
| 1842 | list_move_tail(&mem->validate_list.head, | ||
| 1843 | &process_info->userptr_valid_list); | ||
| 1844 | |||
| 1845 | /* Update mapping. If the BO was not validated | ||
| 1846 | * (because we couldn't get user pages), this will | ||
| 1847 | * clear the page table entries, which will result in | ||
| 1848 | * VM faults if the GPU tries to access the invalid | ||
| 1849 | * memory. | ||
| 1850 | */ | ||
| 1851 | list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { | ||
| 1852 | if (!bo_va_entry->is_mapped) | ||
| 1853 | continue; | ||
| 1854 | |||
| 1855 | ret = update_gpuvm_pte((struct amdgpu_device *) | ||
| 1856 | bo_va_entry->kgd_dev, | ||
| 1857 | bo_va_entry, &sync); | ||
| 1858 | if (ret) { | ||
| 1859 | pr_err("%s: update PTE failed\n", __func__); | ||
| 1860 | /* make sure this gets validated again */ | ||
| 1861 | atomic_inc(&mem->invalid); | ||
| 1862 | goto unreserve_out; | ||
| 1863 | } | ||
| 1864 | } | ||
| 1865 | } | ||
| 1866 | |||
| 1867 | /* Update page directories */ | ||
| 1868 | ret = process_update_pds(process_info, &sync); | ||
| 1869 | |||
| 1870 | unreserve_out: | ||
| 1871 | list_for_each_entry(peer_vm, &process_info->vm_list_head, | ||
| 1872 | vm_list_node) | ||
| 1873 | amdgpu_bo_fence(peer_vm->root.base.bo, | ||
| 1874 | &process_info->eviction_fence->base, true); | ||
| 1875 | ttm_eu_backoff_reservation(&ticket, &resv_list); | ||
| 1876 | amdgpu_sync_wait(&sync, false); | ||
| 1877 | amdgpu_sync_free(&sync); | ||
| 1878 | out: | ||
| 1879 | kfree(pd_bo_list_entries); | ||
| 1880 | |||
| 1881 | return ret; | ||
| 1882 | } | ||
| 1883 | |||
| 1884 | /* Worker callback to restore evicted userptr BOs | ||
| 1885 | * | ||
| 1886 | * Tries to update and validate all userptr BOs. If successful and no | ||
| 1887 | * concurrent evictions happened, the queues are restarted. Otherwise, | ||
| 1888 | * reschedule for another attempt later. | ||
| 1889 | */ | ||
| 1890 | static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) | ||
| 1891 | { | ||
| 1892 | struct delayed_work *dwork = to_delayed_work(work); | ||
| 1893 | struct amdkfd_process_info *process_info = | ||
| 1894 | container_of(dwork, struct amdkfd_process_info, | ||
| 1895 | restore_userptr_work); | ||
| 1896 | struct task_struct *usertask; | ||
| 1897 | struct mm_struct *mm; | ||
| 1898 | int evicted_bos; | ||
| 1899 | |||
| 1900 | evicted_bos = atomic_read(&process_info->evicted_bos); | ||
| 1901 | if (!evicted_bos) | ||
| 1902 | return; | ||
| 1903 | |||
| 1904 | /* Reference task and mm in case of concurrent process termination */ | ||
| 1905 | usertask = get_pid_task(process_info->pid, PIDTYPE_PID); | ||
| 1906 | if (!usertask) | ||
| 1907 | return; | ||
| 1908 | mm = get_task_mm(usertask); | ||
| 1909 | if (!mm) { | ||
| 1910 | put_task_struct(usertask); | ||
| 1911 | return; | ||
| 1912 | } | ||
| 1913 | |||
| 1914 | mutex_lock(&process_info->lock); | ||
| 1915 | |||
| 1916 | if (update_invalid_user_pages(process_info, mm)) | ||
| 1917 | goto unlock_out; | ||
| 1918 | /* userptr_inval_list can be empty if all evicted userptr BOs | ||
| 1919 | * have been freed. In that case there is nothing to validate | ||
| 1920 | * and we can just restart the queues. | ||
| 1921 | */ | ||
| 1922 | if (!list_empty(&process_info->userptr_inval_list)) { | ||
| 1923 | if (atomic_read(&process_info->evicted_bos) != evicted_bos) | ||
| 1924 | goto unlock_out; /* Concurrent eviction, try again */ | ||
| 1925 | |||
| 1926 | if (validate_invalid_user_pages(process_info)) | ||
| 1927 | goto unlock_out; | ||
| 1928 | } | ||
| 1929 | /* Final check for concurrent evicton and atomic update. If | ||
| 1930 | * another eviction happens after successful update, it will | ||
| 1931 | * be a first eviction that calls quiesce_mm. The eviction | ||
| 1932 | * reference counting inside KFD will handle this case. | ||
| 1933 | */ | ||
| 1934 | if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != | ||
| 1935 | evicted_bos) | ||
| 1936 | goto unlock_out; | ||
| 1937 | evicted_bos = 0; | ||
| 1938 | if (kgd2kfd->resume_mm(mm)) { | ||
| 1939 | pr_err("%s: Failed to resume KFD\n", __func__); | ||
| 1940 | /* No recovery from this failure. Probably the CP is | ||
| 1941 | * hanging. No point trying again. | ||
| 1942 | */ | ||
| 1943 | } | ||
| 1944 | unlock_out: | ||
| 1945 | mutex_unlock(&process_info->lock); | ||
| 1946 | mmput(mm); | ||
| 1947 | put_task_struct(usertask); | ||
| 1948 | |||
| 1949 | /* If validation failed, reschedule another attempt */ | ||
| 1950 | if (evicted_bos) | ||
| 1951 | schedule_delayed_work(&process_info->restore_userptr_work, | ||
| 1952 | msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); | ||
| 1953 | } | ||
| 1954 | |||
| 1421 | /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given | 1955 | /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given |
| 1422 | * KFD process identified by process_info | 1956 | * KFD process identified by process_info |
| 1423 | * | 1957 | * |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index a0f48cb9b8f0..236915849cfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | |||
| @@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) | |||
| 322 | 322 | ||
| 323 | return ret; | 323 | return ret; |
| 324 | } | 324 | } |
| 325 | |||
| 326 | union gfx_info { | ||
| 327 | struct atom_gfx_info_v2_4 v24; | ||
| 328 | }; | ||
| 329 | |||
| 330 | int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) | ||
| 331 | { | ||
| 332 | struct amdgpu_mode_info *mode_info = &adev->mode_info; | ||
| 333 | int index; | ||
| 334 | uint8_t frev, crev; | ||
| 335 | uint16_t data_offset; | ||
| 336 | |||
| 337 | index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, | ||
| 338 | gfx_info); | ||
| 339 | if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, | ||
| 340 | &frev, &crev, &data_offset)) { | ||
| 341 | union gfx_info *gfx_info = (union gfx_info *) | ||
| 342 | (mode_info->atom_context->bios + data_offset); | ||
| 343 | switch (crev) { | ||
| 344 | case 4: | ||
| 345 | adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se; | ||
| 346 | adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh; | ||
| 347 | adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se; | ||
| 348 | adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se; | ||
| 349 | adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs; | ||
| 350 | adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs); | ||
| 351 | adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds; | ||
| 352 | adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth; | ||
| 353 | adev->gfx.config.gs_prim_buffer_depth = | ||
| 354 | le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth); | ||
| 355 | adev->gfx.config.double_offchip_lds_buf = | ||
| 356 | gfx_info->v24.gc_double_offchip_lds_buffer; | ||
| 357 | adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size); | ||
| 358 | adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd); | ||
| 359 | adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu; | ||
| 360 | adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size); | ||
| 361 | return 0; | ||
| 362 | default: | ||
| 363 | return -EINVAL; | ||
| 364 | } | ||
| 365 | |||
| 366 | } | ||
| 367 | return -EINVAL; | ||
| 368 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 7689c961c4ef..20f158fd3b76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h | |||
| @@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); | |||
| 30 | int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); | 30 | int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); |
| 31 | int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); | 31 | int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); |
| 32 | int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); | 32 | int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); |
| 33 | int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); | ||
| 33 | 34 | ||
| 34 | #endif | 35 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 1ae5ae8c45a4..1bcb2b247335 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | |||
| @@ -550,7 +550,7 @@ static int amdgpu_atpx_init(void) | |||
| 550 | * look up whether we are the integrated or discrete GPU (all asics). | 550 | * look up whether we are the integrated or discrete GPU (all asics). |
| 551 | * Returns the client id. | 551 | * Returns the client id. |
| 552 | */ | 552 | */ |
| 553 | static int amdgpu_atpx_get_client_id(struct pci_dev *pdev) | 553 | static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev) |
| 554 | { | 554 | { |
| 555 | if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) | 555 | if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) |
| 556 | return VGA_SWITCHEROO_IGD; | 556 | return VGA_SWITCHEROO_IGD; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 02b849be083b..19cfff31f2e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | |||
| @@ -75,13 +75,20 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, | |||
| 75 | { | 75 | { |
| 76 | struct amdgpu_bo *dobj = NULL; | 76 | struct amdgpu_bo *dobj = NULL; |
| 77 | struct amdgpu_bo *sobj = NULL; | 77 | struct amdgpu_bo *sobj = NULL; |
| 78 | struct amdgpu_bo_param bp; | ||
| 78 | uint64_t saddr, daddr; | 79 | uint64_t saddr, daddr; |
| 79 | int r, n; | 80 | int r, n; |
| 80 | int time; | 81 | int time; |
| 81 | 82 | ||
| 83 | memset(&bp, 0, sizeof(bp)); | ||
| 84 | bp.size = size; | ||
| 85 | bp.byte_align = PAGE_SIZE; | ||
| 86 | bp.domain = sdomain; | ||
| 87 | bp.flags = 0; | ||
| 88 | bp.type = ttm_bo_type_kernel; | ||
| 89 | bp.resv = NULL; | ||
| 82 | n = AMDGPU_BENCHMARK_ITERATIONS; | 90 | n = AMDGPU_BENCHMARK_ITERATIONS; |
| 83 | r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0, | 91 | r = amdgpu_bo_create(adev, &bp, &sobj); |
| 84 | ttm_bo_type_kernel, NULL, &sobj); | ||
| 85 | if (r) { | 92 | if (r) { |
| 86 | goto out_cleanup; | 93 | goto out_cleanup; |
| 87 | } | 94 | } |
| @@ -93,8 +100,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, | |||
| 93 | if (r) { | 100 | if (r) { |
| 94 | goto out_cleanup; | 101 | goto out_cleanup; |
| 95 | } | 102 | } |
| 96 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0, | 103 | bp.domain = ddomain; |
| 97 | ttm_bo_type_kernel, NULL, &dobj); | 104 | r = amdgpu_bo_create(adev, &bp, &dobj); |
| 98 | if (r) { | 105 | if (r) { |
| 99 | goto out_cleanup; | 106 | goto out_cleanup; |
| 100 | } | 107 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 71a57b2f7f04..e950730f1933 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | |||
| @@ -23,7 +23,6 @@ | |||
| 23 | */ | 23 | */ |
| 24 | #include <linux/list.h> | 24 | #include <linux/list.h> |
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/pci.h> | ||
| 27 | #include <drm/drmP.h> | 26 | #include <drm/drmP.h> |
| 28 | #include <linux/firmware.h> | 27 | #include <linux/firmware.h> |
| 29 | #include <drm/amdgpu_drm.h> | 28 | #include <drm/amdgpu_drm.h> |
| @@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, | |||
| 109 | WARN(1, "Invalid indirect register space"); | 108 | WARN(1, "Invalid indirect register space"); |
| 110 | } | 109 | } |
| 111 | 110 | ||
| 112 | static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, | ||
| 113 | enum cgs_resource_type resource_type, | ||
| 114 | uint64_t size, | ||
| 115 | uint64_t offset, | ||
| 116 | uint64_t *resource_base) | ||
| 117 | { | ||
| 118 | CGS_FUNC_ADEV; | ||
| 119 | |||
| 120 | if (resource_base == NULL) | ||
| 121 | return -EINVAL; | ||
| 122 | |||
| 123 | switch (resource_type) { | ||
| 124 | case CGS_RESOURCE_TYPE_MMIO: | ||
| 125 | if (adev->rmmio_size == 0) | ||
| 126 | return -ENOENT; | ||
| 127 | if ((offset + size) > adev->rmmio_size) | ||
| 128 | return -EINVAL; | ||
| 129 | *resource_base = adev->rmmio_base; | ||
| 130 | return 0; | ||
| 131 | case CGS_RESOURCE_TYPE_DOORBELL: | ||
| 132 | if (adev->doorbell.size == 0) | ||
| 133 | return -ENOENT; | ||
| 134 | if ((offset + size) > adev->doorbell.size) | ||
| 135 | return -EINVAL; | ||
| 136 | *resource_base = adev->doorbell.base; | ||
| 137 | return 0; | ||
| 138 | case CGS_RESOURCE_TYPE_FB: | ||
| 139 | case CGS_RESOURCE_TYPE_IO: | ||
| 140 | case CGS_RESOURCE_TYPE_ROM: | ||
| 141 | default: | ||
| 142 | return -EINVAL; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device, | ||
| 147 | unsigned table, uint16_t *size, | ||
| 148 | uint8_t *frev, uint8_t *crev) | ||
| 149 | { | ||
| 150 | CGS_FUNC_ADEV; | ||
| 151 | uint16_t data_start; | ||
| 152 | |||
| 153 | if (amdgpu_atom_parse_data_header( | ||
| 154 | adev->mode_info.atom_context, table, size, | ||
| 155 | frev, crev, &data_start)) | ||
| 156 | return (uint8_t*)adev->mode_info.atom_context->bios + | ||
| 157 | data_start; | ||
| 158 | |||
| 159 | return NULL; | ||
| 160 | } | ||
| 161 | |||
| 162 | static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table, | ||
| 163 | uint8_t *frev, uint8_t *crev) | ||
| 164 | { | ||
| 165 | CGS_FUNC_ADEV; | ||
| 166 | |||
| 167 | if (amdgpu_atom_parse_cmd_header( | ||
| 168 | adev->mode_info.atom_context, table, | ||
| 169 | frev, crev)) | ||
| 170 | return 0; | ||
| 171 | |||
| 172 | return -EINVAL; | ||
| 173 | } | ||
| 174 | |||
| 175 | static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table, | ||
| 176 | void *args) | ||
| 177 | { | ||
| 178 | CGS_FUNC_ADEV; | ||
| 179 | |||
| 180 | return amdgpu_atom_execute_table( | ||
| 181 | adev->mode_info.atom_context, table, args); | ||
| 182 | } | ||
| 183 | |||
| 184 | static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, | ||
| 185 | enum amd_ip_block_type block_type, | ||
| 186 | enum amd_clockgating_state state) | ||
| 187 | { | ||
| 188 | CGS_FUNC_ADEV; | ||
| 189 | int i, r = -1; | ||
| 190 | |||
| 191 | for (i = 0; i < adev->num_ip_blocks; i++) { | ||
| 192 | if (!adev->ip_blocks[i].status.valid) | ||
| 193 | continue; | ||
| 194 | |||
| 195 | if (adev->ip_blocks[i].version->type == block_type) { | ||
| 196 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state( | ||
| 197 | (void *)adev, | ||
| 198 | state); | ||
| 199 | break; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | return r; | ||
| 203 | } | ||
| 204 | |||
| 205 | static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, | ||
| 206 | enum amd_ip_block_type block_type, | ||
| 207 | enum amd_powergating_state state) | ||
| 208 | { | ||
| 209 | CGS_FUNC_ADEV; | ||
| 210 | int i, r = -1; | ||
| 211 | |||
| 212 | for (i = 0; i < adev->num_ip_blocks; i++) { | ||
| 213 | if (!adev->ip_blocks[i].status.valid) | ||
| 214 | continue; | ||
| 215 | |||
| 216 | if (adev->ip_blocks[i].version->type == block_type) { | ||
| 217 | r = adev->ip_blocks[i].version->funcs->set_powergating_state( | ||
| 218 | (void *)adev, | ||
| 219 | state); | ||
| 220 | break; | ||
| 221 | } | ||
| 222 | } | ||
| 223 | return r; | ||
| 224 | } | ||
| 225 | |||
| 226 | |||
| 227 | static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) | 111 | static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) |
| 228 | { | 112 | { |
| 229 | CGS_FUNC_ADEV; | 113 | CGS_FUNC_ADEV; |
| @@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) | |||
| 271 | return result; | 155 | return result; |
| 272 | } | 156 | } |
| 273 | 157 | ||
| 274 | static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type) | ||
| 275 | { | ||
| 276 | CGS_FUNC_ADEV; | ||
| 277 | if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) { | ||
| 278 | release_firmware(adev->pm.fw); | ||
| 279 | adev->pm.fw = NULL; | ||
| 280 | return 0; | ||
| 281 | } | ||
| 282 | /* cannot release other firmware because they are not created by cgs */ | ||
| 283 | return -EINVAL; | ||
| 284 | } | ||
| 285 | |||
| 286 | static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, | 158 | static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, |
| 287 | enum cgs_ucode_id type) | 159 | enum cgs_ucode_id type) |
| 288 | { | 160 | { |
| @@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, | |||
| 326 | return fw_version; | 198 | return fw_version; |
| 327 | } | 199 | } |
| 328 | 200 | ||
| 329 | static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device, | ||
| 330 | bool en) | ||
| 331 | { | ||
| 332 | CGS_FUNC_ADEV; | ||
| 333 | |||
| 334 | if (adev->gfx.rlc.funcs->enter_safe_mode == NULL || | ||
| 335 | adev->gfx.rlc.funcs->exit_safe_mode == NULL) | ||
| 336 | return 0; | ||
| 337 | |||
| 338 | if (en) | ||
| 339 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | ||
| 340 | else | ||
| 341 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | ||
| 342 | |||
| 343 | return 0; | ||
| 344 | } | ||
| 345 | |||
| 346 | static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device, | ||
| 347 | bool lock) | ||
| 348 | { | ||
| 349 | CGS_FUNC_ADEV; | ||
| 350 | |||
| 351 | if (lock) | ||
| 352 | mutex_lock(&adev->grbm_idx_mutex); | ||
| 353 | else | ||
| 354 | mutex_unlock(&adev->grbm_idx_mutex); | ||
| 355 | } | ||
| 356 | |||
| 357 | static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | 201 | static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, |
| 358 | enum cgs_ucode_id type, | 202 | enum cgs_ucode_id type, |
| 359 | struct cgs_firmware_info *info) | 203 | struct cgs_firmware_info *info) |
| @@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
| 541 | case CHIP_POLARIS12: | 385 | case CHIP_POLARIS12: |
| 542 | strcpy(fw_name, "amdgpu/polaris12_smc.bin"); | 386 | strcpy(fw_name, "amdgpu/polaris12_smc.bin"); |
| 543 | break; | 387 | break; |
| 388 | case CHIP_VEGAM: | ||
| 389 | strcpy(fw_name, "amdgpu/vegam_smc.bin"); | ||
| 390 | break; | ||
| 544 | case CHIP_VEGA10: | 391 | case CHIP_VEGA10: |
| 545 | if ((adev->pdev->device == 0x687f) && | 392 | if ((adev->pdev->device == 0x687f) && |
| 546 | ((adev->pdev->revision == 0xc0) || | 393 | ((adev->pdev->revision == 0xc0) || |
| @@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
| 553 | case CHIP_VEGA12: | 400 | case CHIP_VEGA12: |
| 554 | strcpy(fw_name, "amdgpu/vega12_smc.bin"); | 401 | strcpy(fw_name, "amdgpu/vega12_smc.bin"); |
| 555 | break; | 402 | break; |
| 403 | case CHIP_VEGA20: | ||
| 404 | strcpy(fw_name, "amdgpu/vega20_smc.bin"); | ||
| 405 | break; | ||
| 556 | default: | 406 | default: |
| 557 | DRM_ERROR("SMC firmware not supported\n"); | 407 | DRM_ERROR("SMC firmware not supported\n"); |
| 558 | return -EINVAL; | 408 | return -EINVAL; |
| @@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
| 598 | return 0; | 448 | return 0; |
| 599 | } | 449 | } |
| 600 | 450 | ||
| 601 | static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device) | ||
| 602 | { | ||
| 603 | CGS_FUNC_ADEV; | ||
| 604 | return amdgpu_sriov_vf(adev); | ||
| 605 | } | ||
| 606 | |||
| 607 | static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, | ||
| 608 | struct cgs_display_info *info) | ||
| 609 | { | ||
| 610 | CGS_FUNC_ADEV; | ||
| 611 | struct cgs_mode_info *mode_info; | ||
| 612 | |||
| 613 | if (info == NULL) | ||
| 614 | return -EINVAL; | ||
| 615 | |||
| 616 | mode_info = info->mode_info; | ||
| 617 | if (mode_info) | ||
| 618 | /* if the displays are off, vblank time is max */ | ||
| 619 | mode_info->vblank_time_us = 0xffffffff; | ||
| 620 | |||
| 621 | if (!amdgpu_device_has_dc_support(adev)) { | ||
| 622 | struct amdgpu_crtc *amdgpu_crtc; | ||
| 623 | struct drm_device *ddev = adev->ddev; | ||
| 624 | struct drm_crtc *crtc; | ||
| 625 | uint32_t line_time_us, vblank_lines; | ||
| 626 | |||
| 627 | if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { | ||
| 628 | list_for_each_entry(crtc, | ||
| 629 | &ddev->mode_config.crtc_list, head) { | ||
| 630 | amdgpu_crtc = to_amdgpu_crtc(crtc); | ||
| 631 | if (crtc->enabled) { | ||
| 632 | info->active_display_mask |= (1 << amdgpu_crtc->crtc_id); | ||
| 633 | info->display_count++; | ||
| 634 | } | ||
| 635 | if (mode_info != NULL && | ||
| 636 | crtc->enabled && amdgpu_crtc->enabled && | ||
| 637 | amdgpu_crtc->hw_mode.clock) { | ||
| 638 | line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) / | ||
| 639 | amdgpu_crtc->hw_mode.clock; | ||
| 640 | vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end - | ||
| 641 | amdgpu_crtc->hw_mode.crtc_vdisplay + | ||
| 642 | (amdgpu_crtc->v_border * 2); | ||
| 643 | mode_info->vblank_time_us = vblank_lines * line_time_us; | ||
| 644 | mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); | ||
| 645 | /* we have issues with mclk switching with refresh rates | ||
| 646 | * over 120 hz on the non-DC code. | ||
| 647 | */ | ||
| 648 | if (mode_info->refresh_rate > 120) | ||
| 649 | mode_info->vblank_time_us = 0; | ||
| 650 | mode_info = NULL; | ||
| 651 | } | ||
| 652 | } | ||
| 653 | } | ||
| 654 | } else { | ||
| 655 | info->display_count = adev->pm.pm_display_cfg.num_display; | ||
| 656 | if (mode_info != NULL) { | ||
| 657 | mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time; | ||
| 658 | mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh; | ||
| 659 | } | ||
| 660 | } | ||
| 661 | return 0; | ||
| 662 | } | ||
| 663 | |||
| 664 | |||
| 665 | static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled) | ||
| 666 | { | ||
| 667 | CGS_FUNC_ADEV; | ||
| 668 | |||
| 669 | adev->pm.dpm_enabled = enabled; | ||
| 670 | |||
| 671 | return 0; | ||
| 672 | } | ||
| 673 | |||
| 674 | static const struct cgs_ops amdgpu_cgs_ops = { | 451 | static const struct cgs_ops amdgpu_cgs_ops = { |
| 675 | .read_register = amdgpu_cgs_read_register, | 452 | .read_register = amdgpu_cgs_read_register, |
| 676 | .write_register = amdgpu_cgs_write_register, | 453 | .write_register = amdgpu_cgs_write_register, |
| 677 | .read_ind_register = amdgpu_cgs_read_ind_register, | 454 | .read_ind_register = amdgpu_cgs_read_ind_register, |
| 678 | .write_ind_register = amdgpu_cgs_write_ind_register, | 455 | .write_ind_register = amdgpu_cgs_write_ind_register, |
| 679 | .get_pci_resource = amdgpu_cgs_get_pci_resource, | ||
| 680 | .atom_get_data_table = amdgpu_cgs_atom_get_data_table, | ||
| 681 | .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs, | ||
| 682 | .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table, | ||
| 683 | .get_firmware_info = amdgpu_cgs_get_firmware_info, | 456 | .get_firmware_info = amdgpu_cgs_get_firmware_info, |
| 684 | .rel_firmware = amdgpu_cgs_rel_firmware, | ||
| 685 | .set_powergating_state = amdgpu_cgs_set_powergating_state, | ||
| 686 | .set_clockgating_state = amdgpu_cgs_set_clockgating_state, | ||
| 687 | .get_active_displays_info = amdgpu_cgs_get_active_displays_info, | ||
| 688 | .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled, | ||
| 689 | .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, | ||
| 690 | .enter_safe_mode = amdgpu_cgs_enter_safe_mode, | ||
| 691 | .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, | ||
| 692 | }; | 457 | }; |
| 693 | 458 | ||
| 694 | struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) | 459 | struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 96501ff0e55b..8e66851eb427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | |||
| @@ -691,7 +691,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector) | |||
| 691 | return ret; | 691 | return ret; |
| 692 | } | 692 | } |
| 693 | 693 | ||
| 694 | static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, | 694 | static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, |
| 695 | struct drm_display_mode *mode) | 695 | struct drm_display_mode *mode) |
| 696 | { | 696 | { |
| 697 | struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); | 697 | struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); |
| @@ -843,7 +843,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) | |||
| 843 | return ret; | 843 | return ret; |
| 844 | } | 844 | } |
| 845 | 845 | ||
| 846 | static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector, | 846 | static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector, |
| 847 | struct drm_display_mode *mode) | 847 | struct drm_display_mode *mode) |
| 848 | { | 848 | { |
| 849 | struct drm_device *dev = connector->dev; | 849 | struct drm_device *dev = connector->dev; |
| @@ -1172,7 +1172,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector) | |||
| 1172 | amdgpu_connector->use_digital = true; | 1172 | amdgpu_connector->use_digital = true; |
| 1173 | } | 1173 | } |
| 1174 | 1174 | ||
| 1175 | static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, | 1175 | static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, |
| 1176 | struct drm_display_mode *mode) | 1176 | struct drm_display_mode *mode) |
| 1177 | { | 1177 | { |
| 1178 | struct drm_device *dev = connector->dev; | 1178 | struct drm_device *dev = connector->dev; |
| @@ -1448,7 +1448,7 @@ out: | |||
| 1448 | return ret; | 1448 | return ret; |
| 1449 | } | 1449 | } |
| 1450 | 1450 | ||
| 1451 | static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector, | 1451 | static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector, |
| 1452 | struct drm_display_mode *mode) | 1452 | struct drm_display_mode *mode) |
| 1453 | { | 1453 | { |
| 1454 | struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); | 1454 | struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc34b50e6b29..9c1d491d742e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
| @@ -382,8 +382,7 @@ retry: | |||
| 382 | 382 | ||
| 383 | p->bytes_moved += ctx.bytes_moved; | 383 | p->bytes_moved += ctx.bytes_moved; |
| 384 | if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && | 384 | if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && |
| 385 | bo->tbo.mem.mem_type == TTM_PL_VRAM && | 385 | amdgpu_bo_in_cpu_visible_vram(bo)) |
| 386 | bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) | ||
| 387 | p->bytes_moved_vis += ctx.bytes_moved; | 386 | p->bytes_moved_vis += ctx.bytes_moved; |
| 388 | 387 | ||
| 389 | if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { | 388 | if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { |
| @@ -411,7 +410,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, | |||
| 411 | struct amdgpu_bo_list_entry *candidate = p->evictable; | 410 | struct amdgpu_bo_list_entry *candidate = p->evictable; |
| 412 | struct amdgpu_bo *bo = candidate->robj; | 411 | struct amdgpu_bo *bo = candidate->robj; |
| 413 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 412 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
| 414 | u64 initial_bytes_moved, bytes_moved; | ||
| 415 | bool update_bytes_moved_vis; | 413 | bool update_bytes_moved_vis; |
| 416 | uint32_t other; | 414 | uint32_t other; |
| 417 | 415 | ||
| @@ -435,18 +433,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, | |||
| 435 | continue; | 433 | continue; |
| 436 | 434 | ||
| 437 | /* Good we can try to move this BO somewhere else */ | 435 | /* Good we can try to move this BO somewhere else */ |
| 438 | amdgpu_ttm_placement_from_domain(bo, other); | ||
| 439 | update_bytes_moved_vis = | 436 | update_bytes_moved_vis = |
| 440 | adev->gmc.visible_vram_size < adev->gmc.real_vram_size && | 437 | adev->gmc.visible_vram_size < adev->gmc.real_vram_size && |
| 441 | bo->tbo.mem.mem_type == TTM_PL_VRAM && | 438 | amdgpu_bo_in_cpu_visible_vram(bo); |
| 442 | bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT; | 439 | amdgpu_ttm_placement_from_domain(bo, other); |
| 443 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); | ||
| 444 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | 440 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
| 445 | bytes_moved = atomic64_read(&adev->num_bytes_moved) - | 441 | p->bytes_moved += ctx.bytes_moved; |
| 446 | initial_bytes_moved; | ||
| 447 | p->bytes_moved += bytes_moved; | ||
| 448 | if (update_bytes_moved_vis) | 442 | if (update_bytes_moved_vis) |
| 449 | p->bytes_moved_vis += bytes_moved; | 443 | p->bytes_moved_vis += ctx.bytes_moved; |
| 450 | 444 | ||
| 451 | if (unlikely(r)) | 445 | if (unlikely(r)) |
| 452 | break; | 446 | break; |
| @@ -536,7 +530,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
| 536 | if (p->bo_list) { | 530 | if (p->bo_list) { |
| 537 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); | 531 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); |
| 538 | if (p->bo_list->first_userptr != p->bo_list->num_entries) | 532 | if (p->bo_list->first_userptr != p->bo_list->num_entries) |
| 539 | p->mn = amdgpu_mn_get(p->adev); | 533 | p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); |
| 540 | } | 534 | } |
| 541 | 535 | ||
| 542 | INIT_LIST_HEAD(&duplicates); | 536 | INIT_LIST_HEAD(&duplicates); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 3fabf9f97022..c5bb36275e93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |||
| @@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, | |||
| 91 | continue; | 91 | continue; |
| 92 | 92 | ||
| 93 | r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, | 93 | r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, |
| 94 | rq, amdgpu_sched_jobs, &ctx->guilty); | 94 | rq, &ctx->guilty); |
| 95 | if (r) | 95 | if (r) |
| 96 | goto failed; | 96 | goto failed; |
| 97 | } | 97 | } |
| @@ -111,8 +111,9 @@ failed: | |||
| 111 | return r; | 111 | return r; |
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) | 114 | static void amdgpu_ctx_fini(struct kref *ref) |
| 115 | { | 115 | { |
| 116 | struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); | ||
| 116 | struct amdgpu_device *adev = ctx->adev; | 117 | struct amdgpu_device *adev = ctx->adev; |
| 117 | unsigned i, j; | 118 | unsigned i, j; |
| 118 | 119 | ||
| @@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) | |||
| 125 | kfree(ctx->fences); | 126 | kfree(ctx->fences); |
| 126 | ctx->fences = NULL; | 127 | ctx->fences = NULL; |
| 127 | 128 | ||
| 128 | for (i = 0; i < adev->num_rings; i++) | ||
| 129 | drm_sched_entity_fini(&adev->rings[i]->sched, | ||
| 130 | &ctx->rings[i].entity); | ||
| 131 | |||
| 132 | amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); | 129 | amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); |
| 133 | 130 | ||
| 134 | mutex_destroy(&ctx->lock); | 131 | mutex_destroy(&ctx->lock); |
| 132 | |||
| 133 | kfree(ctx); | ||
| 135 | } | 134 | } |
| 136 | 135 | ||
| 137 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, | 136 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, |
| @@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, | |||
| 170 | static void amdgpu_ctx_do_release(struct kref *ref) | 169 | static void amdgpu_ctx_do_release(struct kref *ref) |
| 171 | { | 170 | { |
| 172 | struct amdgpu_ctx *ctx; | 171 | struct amdgpu_ctx *ctx; |
| 172 | u32 i; | ||
| 173 | 173 | ||
| 174 | ctx = container_of(ref, struct amdgpu_ctx, refcount); | 174 | ctx = container_of(ref, struct amdgpu_ctx, refcount); |
| 175 | 175 | ||
| 176 | amdgpu_ctx_fini(ctx); | 176 | for (i = 0; i < ctx->adev->num_rings; i++) { |
| 177 | 177 | ||
| 178 | kfree(ctx); | 178 | if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) |
| 179 | continue; | ||
| 180 | |||
| 181 | drm_sched_entity_fini(&ctx->adev->rings[i]->sched, | ||
| 182 | &ctx->rings[i].entity); | ||
| 183 | } | ||
| 184 | |||
| 185 | amdgpu_ctx_fini(ref); | ||
| 179 | } | 186 | } |
| 180 | 187 | ||
| 181 | static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) | 188 | static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) |
| @@ -437,16 +444,72 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) | |||
| 437 | idr_init(&mgr->ctx_handles); | 444 | idr_init(&mgr->ctx_handles); |
| 438 | } | 445 | } |
| 439 | 446 | ||
| 447 | void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) | ||
| 448 | { | ||
| 449 | struct amdgpu_ctx *ctx; | ||
| 450 | struct idr *idp; | ||
| 451 | uint32_t id, i; | ||
| 452 | |||
| 453 | idp = &mgr->ctx_handles; | ||
| 454 | |||
| 455 | idr_for_each_entry(idp, ctx, id) { | ||
| 456 | |||
| 457 | if (!ctx->adev) | ||
| 458 | return; | ||
| 459 | |||
| 460 | for (i = 0; i < ctx->adev->num_rings; i++) { | ||
| 461 | |||
| 462 | if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) | ||
| 463 | continue; | ||
| 464 | |||
| 465 | if (kref_read(&ctx->refcount) == 1) | ||
| 466 | drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, | ||
| 467 | &ctx->rings[i].entity); | ||
| 468 | else | ||
| 469 | DRM_ERROR("ctx %p is still alive\n", ctx); | ||
| 470 | } | ||
| 471 | } | ||
| 472 | } | ||
| 473 | |||
| 474 | void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) | ||
| 475 | { | ||
| 476 | struct amdgpu_ctx *ctx; | ||
| 477 | struct idr *idp; | ||
| 478 | uint32_t id, i; | ||
| 479 | |||
| 480 | idp = &mgr->ctx_handles; | ||
| 481 | |||
| 482 | idr_for_each_entry(idp, ctx, id) { | ||
| 483 | |||
| 484 | if (!ctx->adev) | ||
| 485 | return; | ||
| 486 | |||
| 487 | for (i = 0; i < ctx->adev->num_rings; i++) { | ||
| 488 | |||
| 489 | if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) | ||
| 490 | continue; | ||
| 491 | |||
| 492 | if (kref_read(&ctx->refcount) == 1) | ||
| 493 | drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched, | ||
| 494 | &ctx->rings[i].entity); | ||
| 495 | else | ||
| 496 | DRM_ERROR("ctx %p is still alive\n", ctx); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | } | ||
| 500 | |||
| 440 | void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) | 501 | void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) |
| 441 | { | 502 | { |
| 442 | struct amdgpu_ctx *ctx; | 503 | struct amdgpu_ctx *ctx; |
| 443 | struct idr *idp; | 504 | struct idr *idp; |
| 444 | uint32_t id; | 505 | uint32_t id; |
| 445 | 506 | ||
| 507 | amdgpu_ctx_mgr_entity_cleanup(mgr); | ||
| 508 | |||
| 446 | idp = &mgr->ctx_handles; | 509 | idp = &mgr->ctx_handles; |
| 447 | 510 | ||
| 448 | idr_for_each_entry(idp, ctx, id) { | 511 | idr_for_each_entry(idp, ctx, id) { |
| 449 | if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) | 512 | if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) |
| 450 | DRM_ERROR("ctx %p is still alive\n", ctx); | 513 | DRM_ERROR("ctx %p is still alive\n", ctx); |
| 451 | } | 514 | } |
| 452 | 515 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 448d69fe3756..f5fb93795a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | |||
| @@ -28,8 +28,13 @@ | |||
| 28 | #include <linux/debugfs.h> | 28 | #include <linux/debugfs.h> |
| 29 | #include "amdgpu.h" | 29 | #include "amdgpu.h" |
| 30 | 30 | ||
| 31 | /* | 31 | /** |
| 32 | * Debugfs | 32 | * amdgpu_debugfs_add_files - Add simple debugfs entries |
| 33 | * | ||
| 34 | * @adev: Device to attach debugfs entries to | ||
| 35 | * @files: Array of function callbacks that respond to reads | ||
| 36 | * @nfiles: Number of callbacks to register | ||
| 37 | * | ||
| 33 | */ | 38 | */ |
| 34 | int amdgpu_debugfs_add_files(struct amdgpu_device *adev, | 39 | int amdgpu_debugfs_add_files(struct amdgpu_device *adev, |
| 35 | const struct drm_info_list *files, | 40 | const struct drm_info_list *files, |
| @@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, | |||
| 64 | 69 | ||
| 65 | #if defined(CONFIG_DEBUG_FS) | 70 | #if defined(CONFIG_DEBUG_FS) |
| 66 | 71 | ||
| 67 | 72 | /** | |
| 73 | * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes | ||
| 74 | * | ||
| 75 | * @read: True if reading | ||
| 76 | * @f: open file handle | ||
| 77 | * @buf: User buffer to write/read to | ||
| 78 | * @size: Number of bytes to write/read | ||
| 79 | * @pos: Offset to seek to | ||
| 80 | * | ||
| 81 | * This debugfs entry has special meaning on the offset being sought. | ||
| 82 | * Various bits have different meanings: | ||
| 83 | * | ||
| 84 | * Bit 62: Indicates a GRBM bank switch is needed | ||
| 85 | * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is | ||
| 86 | * zero) | ||
| 87 | * Bits 24..33: The SE or ME selector if needed | ||
| 88 | * Bits 34..43: The SH (or SA) or PIPE selector if needed | ||
| 89 | * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed | ||
| 90 | * | ||
| 91 | * Bit 23: Indicates that the PM power gating lock should be held | ||
| 92 | * This is necessary to read registers that might be | ||
| 93 | * unreliable during a power gating transistion. | ||
| 94 | * | ||
| 95 | * The lower bits are the BYTE offset of the register to read. This | ||
| 96 | * allows reading multiple registers in a single call and having | ||
| 97 | * the returned size reflect that. | ||
| 98 | */ | ||
| 68 | static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, | 99 | static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, |
| 69 | char __user *buf, size_t size, loff_t *pos) | 100 | char __user *buf, size_t size, loff_t *pos) |
| 70 | { | 101 | { |
| @@ -164,19 +195,37 @@ end: | |||
| 164 | return result; | 195 | return result; |
| 165 | } | 196 | } |
| 166 | 197 | ||
| 167 | 198 | /** | |
| 199 | * amdgpu_debugfs_regs_read - Callback for reading MMIO registers | ||
| 200 | */ | ||
| 168 | static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, | 201 | static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, |
| 169 | size_t size, loff_t *pos) | 202 | size_t size, loff_t *pos) |
| 170 | { | 203 | { |
| 171 | return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); | 204 | return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); |
| 172 | } | 205 | } |
| 173 | 206 | ||
| 207 | /** | ||
| 208 | * amdgpu_debugfs_regs_write - Callback for writing MMIO registers | ||
| 209 | */ | ||
| 174 | static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, | 210 | static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, |
| 175 | size_t size, loff_t *pos) | 211 | size_t size, loff_t *pos) |
| 176 | { | 212 | { |
| 177 | return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); | 213 | return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); |
| 178 | } | 214 | } |
| 179 | 215 | ||
| 216 | |||
| 217 | /** | ||
| 218 | * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register | ||
| 219 | * | ||
| 220 | * @f: open file handle | ||
| 221 | * @buf: User buffer to store read data in | ||
| 222 | * @size: Number of bytes to read | ||
| 223 | * @pos: Offset to seek to | ||
| 224 | * | ||
| 225 | * The lower bits are the BYTE offset of the register to read. This | ||
| 226 | * allows reading multiple registers in a single call and having | ||
| 227 | * the returned size reflect that. | ||
| 228 | */ | ||
| 180 | static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, | 229 | static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, |
| 181 | size_t size, loff_t *pos) | 230 | size_t size, loff_t *pos) |
| 182 | { | 231 | { |
| @@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, | |||
| 204 | return result; | 253 | return result; |
| 205 | } | 254 | } |
| 206 | 255 | ||
| 256 | /** | ||
| 257 | * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register | ||
| 258 | * | ||
| 259 | * @f: open file handle | ||
| 260 | * @buf: User buffer to write data from | ||
| 261 | * @size: Number of bytes to write | ||
| 262 | * @pos: Offset to seek to | ||
| 263 | * | ||
| 264 | * The lower bits are the BYTE offset of the register to write. This | ||
| 265 | * allows writing multiple registers in a single call and having | ||
| 266 | * the returned size reflect that. | ||
| 267 | */ | ||
| 207 | static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, | 268 | static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, |
| 208 | size_t size, loff_t *pos) | 269 | size_t size, loff_t *pos) |
| 209 | { | 270 | { |
| @@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user | |||
| 232 | return result; | 293 | return result; |
| 233 | } | 294 | } |
| 234 | 295 | ||
| 296 | /** | ||
| 297 | * amdgpu_debugfs_regs_didt_read - Read from a DIDT register | ||
| 298 | * | ||
| 299 | * @f: open file handle | ||
| 300 | * @buf: User buffer to store read data in | ||
| 301 | * @size: Number of bytes to read | ||
| 302 | * @pos: Offset to seek to | ||
| 303 | * | ||
| 304 | * The lower bits are the BYTE offset of the register to read. This | ||
| 305 | * allows reading multiple registers in a single call and having | ||
| 306 | * the returned size reflect that. | ||
| 307 | */ | ||
| 235 | static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, | 308 | static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, |
| 236 | size_t size, loff_t *pos) | 309 | size_t size, loff_t *pos) |
| 237 | { | 310 | { |
| @@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, | |||
| 259 | return result; | 332 | return result; |
| 260 | } | 333 | } |
| 261 | 334 | ||
| 335 | /** | ||
| 336 | * amdgpu_debugfs_regs_didt_write - Write to a DIDT register | ||
| 337 | * | ||
| 338 | * @f: open file handle | ||
| 339 | * @buf: User buffer to write data from | ||
| 340 | * @size: Number of bytes to write | ||
| 341 | * @pos: Offset to seek to | ||
| 342 | * | ||
| 343 | * The lower bits are the BYTE offset of the register to write. This | ||
| 344 | * allows writing multiple registers in a single call and having | ||
| 345 | * the returned size reflect that. | ||
| 346 | */ | ||
| 262 | static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, | 347 | static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, |
| 263 | size_t size, loff_t *pos) | 348 | size_t size, loff_t *pos) |
| 264 | { | 349 | { |
| @@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user | |||
| 287 | return result; | 372 | return result; |
| 288 | } | 373 | } |
| 289 | 374 | ||
| 375 | /** | ||
| 376 | * amdgpu_debugfs_regs_smc_read - Read from a SMC register | ||
| 377 | * | ||
| 378 | * @f: open file handle | ||
| 379 | * @buf: User buffer to store read data in | ||
| 380 | * @size: Number of bytes to read | ||
| 381 | * @pos: Offset to seek to | ||
| 382 | * | ||
| 383 | * The lower bits are the BYTE offset of the register to read. This | ||
| 384 | * allows reading multiple registers in a single call and having | ||
| 385 | * the returned size reflect that. | ||
| 386 | */ | ||
| 290 | static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, | 387 | static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, |
| 291 | size_t size, loff_t *pos) | 388 | size_t size, loff_t *pos) |
| 292 | { | 389 | { |
| @@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, | |||
| 314 | return result; | 411 | return result; |
| 315 | } | 412 | } |
| 316 | 413 | ||
| 414 | /** | ||
| 415 | * amdgpu_debugfs_regs_smc_write - Write to a SMC register | ||
| 416 | * | ||
| 417 | * @f: open file handle | ||
| 418 | * @buf: User buffer to write data from | ||
| 419 | * @size: Number of bytes to write | ||
| 420 | * @pos: Offset to seek to | ||
| 421 | * | ||
| 422 | * The lower bits are the BYTE offset of the register to write. This | ||
| 423 | * allows writing multiple registers in a single call and having | ||
| 424 | * the returned size reflect that. | ||
| 425 | */ | ||
| 317 | static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, | 426 | static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, |
| 318 | size_t size, loff_t *pos) | 427 | size_t size, loff_t *pos) |
| 319 | { | 428 | { |
| @@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * | |||
| 342 | return result; | 451 | return result; |
| 343 | } | 452 | } |
| 344 | 453 | ||
| 454 | /** | ||
| 455 | * amdgpu_debugfs_gca_config_read - Read from gfx config data | ||
| 456 | * | ||
| 457 | * @f: open file handle | ||
| 458 | * @buf: User buffer to store read data in | ||
| 459 | * @size: Number of bytes to read | ||
| 460 | * @pos: Offset to seek to | ||
| 461 | * | ||
| 462 | * This file is used to access configuration data in a somewhat | ||
| 463 | * stable fashion. The format is a series of DWORDs with the first | ||
| 464 | * indicating which revision it is. New content is appended to the | ||
| 465 | * end so that older software can still read the data. | ||
| 466 | */ | ||
| 467 | |||
| 345 | static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, | 468 | static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, |
| 346 | size_t size, loff_t *pos) | 469 | size_t size, loff_t *pos) |
| 347 | { | 470 | { |
| @@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, | |||
| 418 | return result; | 541 | return result; |
| 419 | } | 542 | } |
| 420 | 543 | ||
| 544 | /** | ||
| 545 | * amdgpu_debugfs_sensor_read - Read from the powerplay sensors | ||
| 546 | * | ||
| 547 | * @f: open file handle | ||
| 548 | * @buf: User buffer to store read data in | ||
| 549 | * @size: Number of bytes to read | ||
| 550 | * @pos: Offset to seek to | ||
| 551 | * | ||
| 552 | * The offset is treated as the BYTE address of one of the sensors | ||
| 553 | * enumerated in amd/include/kgd_pp_interface.h under the | ||
| 554 | * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK | ||
| 555 | * you would use the offset 3 * 4 = 12. | ||
| 556 | */ | ||
| 421 | static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, | 557 | static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, |
| 422 | size_t size, loff_t *pos) | 558 | size_t size, loff_t *pos) |
| 423 | { | 559 | { |
| @@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, | |||
| 428 | if (size & 3 || *pos & 0x3) | 564 | if (size & 3 || *pos & 0x3) |
| 429 | return -EINVAL; | 565 | return -EINVAL; |
| 430 | 566 | ||
| 431 | if (amdgpu_dpm == 0) | 567 | if (!adev->pm.dpm_enabled) |
| 432 | return -EINVAL; | 568 | return -EINVAL; |
| 433 | 569 | ||
| 434 | /* convert offset to sensor number */ | 570 | /* convert offset to sensor number */ |
| @@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, | |||
| 457 | return !r ? outsize : r; | 593 | return !r ? outsize : r; |
| 458 | } | 594 | } |
| 459 | 595 | ||
| 596 | /** amdgpu_debugfs_wave_read - Read WAVE STATUS data | ||
| 597 | * | ||
| 598 | * @f: open file handle | ||
| 599 | * @buf: User buffer to store read data in | ||
| 600 | * @size: Number of bytes to read | ||
| 601 | * @pos: Offset to seek to | ||
| 602 | * | ||
| 603 | * The offset being sought changes which wave that the status data | ||
| 604 | * will be returned for. The bits are used as follows: | ||
| 605 | * | ||
| 606 | * Bits 0..6: Byte offset into data | ||
| 607 | * Bits 7..14: SE selector | ||
| 608 | * Bits 15..22: SH/SA selector | ||
| 609 | * Bits 23..30: CU/{WGP+SIMD} selector | ||
| 610 | * Bits 31..36: WAVE ID selector | ||
| 611 | * Bits 37..44: SIMD ID selector | ||
| 612 | * | ||
| 613 | * The returned data begins with one DWORD of version information | ||
| 614 | * Followed by WAVE STATUS registers relevant to the GFX IP version | ||
| 615 | * being used. See gfx_v8_0_read_wave_data() for an example output. | ||
| 616 | */ | ||
| 460 | static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, | 617 | static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, |
| 461 | size_t size, loff_t *pos) | 618 | size_t size, loff_t *pos) |
| 462 | { | 619 | { |
| @@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, | |||
| 507 | return result; | 664 | return result; |
| 508 | } | 665 | } |
| 509 | 666 | ||
| 667 | /** amdgpu_debugfs_gpr_read - Read wave gprs | ||
| 668 | * | ||
| 669 | * @f: open file handle | ||
| 670 | * @buf: User buffer to store read data in | ||
| 671 | * @size: Number of bytes to read | ||
| 672 | * @pos: Offset to seek to | ||
| 673 | * | ||
| 674 | * The offset being sought changes which wave that the status data | ||
| 675 | * will be returned for. The bits are used as follows: | ||
| 676 | * | ||
| 677 | * Bits 0..11: Byte offset into data | ||
| 678 | * Bits 12..19: SE selector | ||
| 679 | * Bits 20..27: SH/SA selector | ||
| 680 | * Bits 28..35: CU/{WGP+SIMD} selector | ||
| 681 | * Bits 36..43: WAVE ID selector | ||
| 682 | * Bits 37..44: SIMD ID selector | ||
| 683 | * Bits 52..59: Thread selector | ||
| 684 | * Bits 60..61: Bank selector (VGPR=0,SGPR=1) | ||
| 685 | * | ||
| 686 | * The return data comes from the SGPR or VGPR register bank for | ||
| 687 | * the selected operational unit. | ||
| 688 | */ | ||
| 510 | static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, | 689 | static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, |
| 511 | size_t size, loff_t *pos) | 690 | size_t size, loff_t *pos) |
| 512 | { | 691 | { |
| @@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = { | |||
| 637 | "amdgpu_gpr", | 816 | "amdgpu_gpr", |
| 638 | }; | 817 | }; |
| 639 | 818 | ||
| 819 | /** | ||
| 820 | * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide | ||
| 821 | * register access. | ||
| 822 | * | ||
| 823 | * @adev: The device to attach the debugfs entries to | ||
| 824 | */ | ||
| 640 | int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) | 825 | int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) |
| 641 | { | 826 | { |
| 642 | struct drm_minor *minor = adev->ddev->primary; | 827 | struct drm_minor *minor = adev->ddev->primary; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 34af664b9f93..290e279abf0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
| @@ -83,8 +83,10 @@ static const char *amdgpu_asic_name[] = { | |||
| 83 | "POLARIS10", | 83 | "POLARIS10", |
| 84 | "POLARIS11", | 84 | "POLARIS11", |
| 85 | "POLARIS12", | 85 | "POLARIS12", |
| 86 | "VEGAM", | ||
| 86 | "VEGA10", | 87 | "VEGA10", |
| 87 | "VEGA12", | 88 | "VEGA12", |
| 89 | "VEGA20", | ||
| 88 | "RAVEN", | 90 | "RAVEN", |
| 89 | "LAST", | 91 | "LAST", |
| 90 | }; | 92 | }; |
| @@ -690,6 +692,8 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, | |||
| 690 | { | 692 | { |
| 691 | u64 size_af, size_bf; | 693 | u64 size_af, size_bf; |
| 692 | 694 | ||
| 695 | mc->gart_size += adev->pm.smu_prv_buffer_size; | ||
| 696 | |||
| 693 | size_af = adev->gmc.mc_mask - mc->vram_end; | 697 | size_af = adev->gmc.mc_mask - mc->vram_end; |
| 694 | size_bf = mc->vram_start; | 698 | size_bf = mc->vram_start; |
| 695 | if (size_bf > size_af) { | 699 | if (size_bf > size_af) { |
| @@ -907,6 +911,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) | |||
| 907 | } | 911 | } |
| 908 | } | 912 | } |
| 909 | 913 | ||
| 914 | static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) | ||
| 915 | { | ||
| 916 | struct sysinfo si; | ||
| 917 | bool is_os_64 = (sizeof(void *) == 8) ? true : false; | ||
| 918 | uint64_t total_memory; | ||
| 919 | uint64_t dram_size_seven_GB = 0x1B8000000; | ||
| 920 | uint64_t dram_size_three_GB = 0xB8000000; | ||
| 921 | |||
| 922 | if (amdgpu_smu_memory_pool_size == 0) | ||
| 923 | return; | ||
| 924 | |||
| 925 | if (!is_os_64) { | ||
| 926 | DRM_WARN("Not 64-bit OS, feature not supported\n"); | ||
| 927 | goto def_value; | ||
| 928 | } | ||
| 929 | si_meminfo(&si); | ||
| 930 | total_memory = (uint64_t)si.totalram * si.mem_unit; | ||
| 931 | |||
| 932 | if ((amdgpu_smu_memory_pool_size == 1) || | ||
| 933 | (amdgpu_smu_memory_pool_size == 2)) { | ||
| 934 | if (total_memory < dram_size_three_GB) | ||
| 935 | goto def_value1; | ||
| 936 | } else if ((amdgpu_smu_memory_pool_size == 4) || | ||
| 937 | (amdgpu_smu_memory_pool_size == 8)) { | ||
| 938 | if (total_memory < dram_size_seven_GB) | ||
| 939 | goto def_value1; | ||
| 940 | } else { | ||
| 941 | DRM_WARN("Smu memory pool size not supported\n"); | ||
| 942 | goto def_value; | ||
| 943 | } | ||
| 944 | adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; | ||
| 945 | |||
| 946 | return; | ||
| 947 | |||
| 948 | def_value1: | ||
| 949 | DRM_WARN("No enough system memory\n"); | ||
| 950 | def_value: | ||
| 951 | adev->pm.smu_prv_buffer_size = 0; | ||
| 952 | } | ||
| 953 | |||
| 910 | /** | 954 | /** |
| 911 | * amdgpu_device_check_arguments - validate module params | 955 | * amdgpu_device_check_arguments - validate module params |
| 912 | * | 956 | * |
| @@ -948,6 +992,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) | |||
| 948 | amdgpu_vm_fragment_size = -1; | 992 | amdgpu_vm_fragment_size = -1; |
| 949 | } | 993 | } |
| 950 | 994 | ||
| 995 | amdgpu_device_check_smu_prv_buffer_size(adev); | ||
| 996 | |||
| 951 | amdgpu_device_check_vm_size(adev); | 997 | amdgpu_device_check_vm_size(adev); |
| 952 | 998 | ||
| 953 | amdgpu_device_check_block_size(adev); | 999 | amdgpu_device_check_block_size(adev); |
| @@ -1039,10 +1085,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { | |||
| 1039 | * the hardware IP specified. | 1085 | * the hardware IP specified. |
| 1040 | * Returns the error code from the last instance. | 1086 | * Returns the error code from the last instance. |
| 1041 | */ | 1087 | */ |
| 1042 | int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, | 1088 | int amdgpu_device_ip_set_clockgating_state(void *dev, |
| 1043 | enum amd_ip_block_type block_type, | 1089 | enum amd_ip_block_type block_type, |
| 1044 | enum amd_clockgating_state state) | 1090 | enum amd_clockgating_state state) |
| 1045 | { | 1091 | { |
| 1092 | struct amdgpu_device *adev = dev; | ||
| 1046 | int i, r = 0; | 1093 | int i, r = 0; |
| 1047 | 1094 | ||
| 1048 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1095 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| @@ -1072,10 +1119,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, | |||
| 1072 | * the hardware IP specified. | 1119 | * the hardware IP specified. |
| 1073 | * Returns the error code from the last instance. | 1120 | * Returns the error code from the last instance. |
| 1074 | */ | 1121 | */ |
| 1075 | int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, | 1122 | int amdgpu_device_ip_set_powergating_state(void *dev, |
| 1076 | enum amd_ip_block_type block_type, | 1123 | enum amd_ip_block_type block_type, |
| 1077 | enum amd_powergating_state state) | 1124 | enum amd_powergating_state state) |
| 1078 | { | 1125 | { |
| 1126 | struct amdgpu_device *adev = dev; | ||
| 1079 | int i, r = 0; | 1127 | int i, r = 0; |
| 1080 | 1128 | ||
| 1081 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1129 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| @@ -1320,9 +1368,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) | |||
| 1320 | case CHIP_TOPAZ: | 1368 | case CHIP_TOPAZ: |
| 1321 | case CHIP_TONGA: | 1369 | case CHIP_TONGA: |
| 1322 | case CHIP_FIJI: | 1370 | case CHIP_FIJI: |
| 1323 | case CHIP_POLARIS11: | ||
| 1324 | case CHIP_POLARIS10: | 1371 | case CHIP_POLARIS10: |
| 1372 | case CHIP_POLARIS11: | ||
| 1325 | case CHIP_POLARIS12: | 1373 | case CHIP_POLARIS12: |
| 1374 | case CHIP_VEGAM: | ||
| 1326 | case CHIP_CARRIZO: | 1375 | case CHIP_CARRIZO: |
| 1327 | case CHIP_STONEY: | 1376 | case CHIP_STONEY: |
| 1328 | #ifdef CONFIG_DRM_AMDGPU_SI | 1377 | #ifdef CONFIG_DRM_AMDGPU_SI |
| @@ -1339,6 +1388,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) | |||
| 1339 | case CHIP_KABINI: | 1388 | case CHIP_KABINI: |
| 1340 | case CHIP_MULLINS: | 1389 | case CHIP_MULLINS: |
| 1341 | #endif | 1390 | #endif |
| 1391 | case CHIP_VEGA20: | ||
| 1342 | default: | 1392 | default: |
| 1343 | return 0; | 1393 | return 0; |
| 1344 | case CHIP_VEGA10: | 1394 | case CHIP_VEGA10: |
| @@ -1428,9 +1478,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) | |||
| 1428 | case CHIP_TOPAZ: | 1478 | case CHIP_TOPAZ: |
| 1429 | case CHIP_TONGA: | 1479 | case CHIP_TONGA: |
| 1430 | case CHIP_FIJI: | 1480 | case CHIP_FIJI: |
| 1431 | case CHIP_POLARIS11: | ||
| 1432 | case CHIP_POLARIS10: | 1481 | case CHIP_POLARIS10: |
| 1482 | case CHIP_POLARIS11: | ||
| 1433 | case CHIP_POLARIS12: | 1483 | case CHIP_POLARIS12: |
| 1484 | case CHIP_VEGAM: | ||
| 1434 | case CHIP_CARRIZO: | 1485 | case CHIP_CARRIZO: |
| 1435 | case CHIP_STONEY: | 1486 | case CHIP_STONEY: |
| 1436 | if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) | 1487 | if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) |
| @@ -1472,6 +1523,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) | |||
| 1472 | #endif | 1523 | #endif |
| 1473 | case CHIP_VEGA10: | 1524 | case CHIP_VEGA10: |
| 1474 | case CHIP_VEGA12: | 1525 | case CHIP_VEGA12: |
| 1526 | case CHIP_VEGA20: | ||
| 1475 | case CHIP_RAVEN: | 1527 | case CHIP_RAVEN: |
| 1476 | if (adev->asic_type == CHIP_RAVEN) | 1528 | if (adev->asic_type == CHIP_RAVEN) |
| 1477 | adev->family = AMDGPU_FAMILY_RV; | 1529 | adev->family = AMDGPU_FAMILY_RV; |
| @@ -1499,6 +1551,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) | |||
| 1499 | return -EAGAIN; | 1551 | return -EAGAIN; |
| 1500 | } | 1552 | } |
| 1501 | 1553 | ||
| 1554 | adev->powerplay.pp_feature = amdgpu_pp_feature_mask; | ||
| 1555 | |||
| 1502 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1556 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 1503 | if ((amdgpu_ip_block_mask & (1 << i)) == 0) { | 1557 | if ((amdgpu_ip_block_mask & (1 << i)) == 0) { |
| 1504 | DRM_ERROR("disabled ip block: %d <%s>\n", | 1558 | DRM_ERROR("disabled ip block: %d <%s>\n", |
| @@ -1654,12 +1708,17 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) | |||
| 1654 | if (amdgpu_emu_mode == 1) | 1708 | if (amdgpu_emu_mode == 1) |
| 1655 | return 0; | 1709 | return 0; |
| 1656 | 1710 | ||
| 1711 | r = amdgpu_ib_ring_tests(adev); | ||
| 1712 | if (r) | ||
| 1713 | DRM_ERROR("ib ring test failed (%d).\n", r); | ||
| 1714 | |||
| 1657 | for (i = 0; i < adev->num_ip_blocks; i++) { | 1715 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 1658 | if (!adev->ip_blocks[i].status.valid) | 1716 | if (!adev->ip_blocks[i].status.valid) |
| 1659 | continue; | 1717 | continue; |
| 1660 | /* skip CG for VCE/UVD, it's handled specially */ | 1718 | /* skip CG for VCE/UVD, it's handled specially */ |
| 1661 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && | 1719 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && |
| 1662 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && | 1720 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && |
| 1721 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && | ||
| 1663 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { | 1722 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { |
| 1664 | /* enable clockgating to save power */ | 1723 | /* enable clockgating to save power */ |
| 1665 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, | 1724 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, |
| @@ -1704,8 +1763,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) | |||
| 1704 | } | 1763 | } |
| 1705 | } | 1764 | } |
| 1706 | 1765 | ||
| 1707 | mod_delayed_work(system_wq, &adev->late_init_work, | 1766 | queue_delayed_work(system_wq, &adev->late_init_work, |
| 1708 | msecs_to_jiffies(AMDGPU_RESUME_MS)); | 1767 | msecs_to_jiffies(AMDGPU_RESUME_MS)); |
| 1709 | 1768 | ||
| 1710 | amdgpu_device_fill_reset_magic(adev); | 1769 | amdgpu_device_fill_reset_magic(adev); |
| 1711 | 1770 | ||
| @@ -1759,6 +1818,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) | |||
| 1759 | 1818 | ||
| 1760 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && | 1819 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && |
| 1761 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && | 1820 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && |
| 1821 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && | ||
| 1762 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { | 1822 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { |
| 1763 | /* ungate blocks before hw fini so that we can shutdown the blocks safely */ | 1823 | /* ungate blocks before hw fini so that we can shutdown the blocks safely */ |
| 1764 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, | 1824 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, |
| @@ -1850,6 +1910,12 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) | |||
| 1850 | if (amdgpu_sriov_vf(adev)) | 1910 | if (amdgpu_sriov_vf(adev)) |
| 1851 | amdgpu_virt_request_full_gpu(adev, false); | 1911 | amdgpu_virt_request_full_gpu(adev, false); |
| 1852 | 1912 | ||
| 1913 | /* ungate SMC block powergating */ | ||
| 1914 | if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) | ||
| 1915 | amdgpu_device_ip_set_powergating_state(adev, | ||
| 1916 | AMD_IP_BLOCK_TYPE_SMC, | ||
| 1917 | AMD_CG_STATE_UNGATE); | ||
| 1918 | |||
| 1853 | /* ungate SMC block first */ | 1919 | /* ungate SMC block first */ |
| 1854 | r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, | 1920 | r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, |
| 1855 | AMD_CG_STATE_UNGATE); | 1921 | AMD_CG_STATE_UNGATE); |
| @@ -2086,16 +2152,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) | |||
| 2086 | case CHIP_MULLINS: | 2152 | case CHIP_MULLINS: |
| 2087 | case CHIP_CARRIZO: | 2153 | case CHIP_CARRIZO: |
| 2088 | case CHIP_STONEY: | 2154 | case CHIP_STONEY: |
| 2089 | case CHIP_POLARIS11: | ||
| 2090 | case CHIP_POLARIS10: | 2155 | case CHIP_POLARIS10: |
| 2156 | case CHIP_POLARIS11: | ||
| 2091 | case CHIP_POLARIS12: | 2157 | case CHIP_POLARIS12: |
| 2158 | case CHIP_VEGAM: | ||
| 2092 | case CHIP_TONGA: | 2159 | case CHIP_TONGA: |
| 2093 | case CHIP_FIJI: | 2160 | case CHIP_FIJI: |
| 2094 | #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) | ||
| 2095 | return amdgpu_dc != 0; | ||
| 2096 | #endif | ||
| 2097 | case CHIP_VEGA10: | 2161 | case CHIP_VEGA10: |
| 2098 | case CHIP_VEGA12: | 2162 | case CHIP_VEGA12: |
| 2163 | case CHIP_VEGA20: | ||
| 2099 | #if defined(CONFIG_DRM_AMD_DC_DCN1_0) | 2164 | #if defined(CONFIG_DRM_AMD_DC_DCN1_0) |
| 2100 | case CHIP_RAVEN: | 2165 | case CHIP_RAVEN: |
| 2101 | #endif | 2166 | #endif |
| @@ -2375,10 +2440,6 @@ fence_driver_init: | |||
| 2375 | goto failed; | 2440 | goto failed; |
| 2376 | } | 2441 | } |
| 2377 | 2442 | ||
| 2378 | r = amdgpu_ib_ring_tests(adev); | ||
| 2379 | if (r) | ||
| 2380 | DRM_ERROR("ib ring test failed (%d).\n", r); | ||
| 2381 | |||
| 2382 | if (amdgpu_sriov_vf(adev)) | 2443 | if (amdgpu_sriov_vf(adev)) |
| 2383 | amdgpu_virt_init_data_exchange(adev); | 2444 | amdgpu_virt_init_data_exchange(adev); |
| 2384 | 2445 | ||
| @@ -2539,7 +2600,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) | |||
| 2539 | /* unpin the front buffers and cursors */ | 2600 | /* unpin the front buffers and cursors */ |
| 2540 | list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { | 2601 | list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { |
| 2541 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 2602 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
| 2542 | struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb); | 2603 | struct drm_framebuffer *fb = crtc->primary->fb; |
| 2543 | struct amdgpu_bo *robj; | 2604 | struct amdgpu_bo *robj; |
| 2544 | 2605 | ||
| 2545 | if (amdgpu_crtc->cursor_bo) { | 2606 | if (amdgpu_crtc->cursor_bo) { |
| @@ -2551,10 +2612,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) | |||
| 2551 | } | 2612 | } |
| 2552 | } | 2613 | } |
| 2553 | 2614 | ||
| 2554 | if (rfb == NULL || rfb->obj == NULL) { | 2615 | if (fb == NULL || fb->obj[0] == NULL) { |
| 2555 | continue; | 2616 | continue; |
| 2556 | } | 2617 | } |
| 2557 | robj = gem_to_amdgpu_bo(rfb->obj); | 2618 | robj = gem_to_amdgpu_bo(fb->obj[0]); |
| 2558 | /* don't unpin kernel fb objects */ | 2619 | /* don't unpin kernel fb objects */ |
| 2559 | if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { | 2620 | if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { |
| 2560 | r = amdgpu_bo_reserve(robj, true); | 2621 | r = amdgpu_bo_reserve(robj, true); |
| @@ -2640,11 +2701,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) | |||
| 2640 | } | 2701 | } |
| 2641 | amdgpu_fence_driver_resume(adev); | 2702 | amdgpu_fence_driver_resume(adev); |
| 2642 | 2703 | ||
| 2643 | if (resume) { | ||
| 2644 | r = amdgpu_ib_ring_tests(adev); | ||
| 2645 | if (r) | ||
| 2646 | DRM_ERROR("ib ring test failed (%d).\n", r); | ||
| 2647 | } | ||
| 2648 | 2704 | ||
| 2649 | r = amdgpu_device_ip_late_init(adev); | 2705 | r = amdgpu_device_ip_late_init(adev); |
| 2650 | if (r) | 2706 | if (r) |
| @@ -2736,6 +2792,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) | |||
| 2736 | if (amdgpu_sriov_vf(adev)) | 2792 | if (amdgpu_sriov_vf(adev)) |
| 2737 | return true; | 2793 | return true; |
| 2738 | 2794 | ||
| 2795 | if (amdgpu_asic_need_full_reset(adev)) | ||
| 2796 | return true; | ||
| 2797 | |||
| 2739 | for (i = 0; i < adev->num_ip_blocks; i++) { | 2798 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 2740 | if (!adev->ip_blocks[i].status.valid) | 2799 | if (!adev->ip_blocks[i].status.valid) |
| 2741 | continue; | 2800 | continue; |
| @@ -2792,6 +2851,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) | |||
| 2792 | { | 2851 | { |
| 2793 | int i; | 2852 | int i; |
| 2794 | 2853 | ||
| 2854 | if (amdgpu_asic_need_full_reset(adev)) | ||
| 2855 | return true; | ||
| 2856 | |||
| 2795 | for (i = 0; i < adev->num_ip_blocks; i++) { | 2857 | for (i = 0; i < adev->num_ip_blocks; i++) { |
| 2796 | if (!adev->ip_blocks[i].status.valid) | 2858 | if (!adev->ip_blocks[i].status.valid) |
| 2797 | continue; | 2859 | continue; |
| @@ -3087,20 +3149,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, | |||
| 3087 | 3149 | ||
| 3088 | /* now we are okay to resume SMC/CP/SDMA */ | 3150 | /* now we are okay to resume SMC/CP/SDMA */ |
| 3089 | r = amdgpu_device_ip_reinit_late_sriov(adev); | 3151 | r = amdgpu_device_ip_reinit_late_sriov(adev); |
| 3090 | amdgpu_virt_release_full_gpu(adev, true); | ||
| 3091 | if (r) | 3152 | if (r) |
| 3092 | goto error; | 3153 | goto error; |
| 3093 | 3154 | ||
| 3094 | amdgpu_irq_gpu_reset_resume_helper(adev); | 3155 | amdgpu_irq_gpu_reset_resume_helper(adev); |
| 3095 | r = amdgpu_ib_ring_tests(adev); | 3156 | r = amdgpu_ib_ring_tests(adev); |
| 3096 | 3157 | ||
| 3158 | error: | ||
| 3159 | amdgpu_virt_release_full_gpu(adev, true); | ||
| 3097 | if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { | 3160 | if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { |
| 3098 | atomic_inc(&adev->vram_lost_counter); | 3161 | atomic_inc(&adev->vram_lost_counter); |
| 3099 | r = amdgpu_device_handle_vram_lost(adev); | 3162 | r = amdgpu_device_handle_vram_lost(adev); |
| 3100 | } | 3163 | } |
| 3101 | 3164 | ||
| 3102 | error: | ||
| 3103 | |||
| 3104 | return r; | 3165 | return r; |
| 3105 | } | 3166 | } |
| 3106 | 3167 | ||
| @@ -3117,7 +3178,6 @@ error: | |||
| 3117 | int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | 3178 | int amdgpu_device_gpu_recover(struct amdgpu_device *adev, |
| 3118 | struct amdgpu_job *job, bool force) | 3179 | struct amdgpu_job *job, bool force) |
| 3119 | { | 3180 | { |
| 3120 | struct drm_atomic_state *state = NULL; | ||
| 3121 | int i, r, resched; | 3181 | int i, r, resched; |
| 3122 | 3182 | ||
| 3123 | if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { | 3183 | if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { |
| @@ -3140,10 +3200,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
| 3140 | /* block TTM */ | 3200 | /* block TTM */ |
| 3141 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); | 3201 | resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); |
| 3142 | 3202 | ||
| 3143 | /* store modesetting */ | ||
| 3144 | if (amdgpu_device_has_dc_support(adev)) | ||
| 3145 | state = drm_atomic_helper_suspend(adev->ddev); | ||
| 3146 | |||
| 3147 | /* block all schedulers and reset given job's ring */ | 3203 | /* block all schedulers and reset given job's ring */ |
| 3148 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 3204 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
| 3149 | struct amdgpu_ring *ring = adev->rings[i]; | 3205 | struct amdgpu_ring *ring = adev->rings[i]; |
| @@ -3183,10 +3239,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
| 3183 | kthread_unpark(ring->sched.thread); | 3239 | kthread_unpark(ring->sched.thread); |
| 3184 | } | 3240 | } |
| 3185 | 3241 | ||
| 3186 | if (amdgpu_device_has_dc_support(adev)) { | 3242 | if (!amdgpu_device_has_dc_support(adev)) { |
| 3187 | if (drm_atomic_helper_resume(adev->ddev, state)) | ||
| 3188 | dev_info(adev->dev, "drm resume failed:%d\n", r); | ||
| 3189 | } else { | ||
| 3190 | drm_helper_resume_force_mode(adev->ddev); | 3243 | drm_helper_resume_force_mode(adev->ddev); |
| 3191 | } | 3244 | } |
| 3192 | 3245 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 93f700ab1bfb..76ee8e04ff11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <linux/pm_runtime.h> | 35 | #include <linux/pm_runtime.h> |
| 36 | #include <drm/drm_crtc_helper.h> | 36 | #include <drm/drm_crtc_helper.h> |
| 37 | #include <drm/drm_edid.h> | 37 | #include <drm/drm_edid.h> |
| 38 | #include <drm/drm_gem_framebuffer_helper.h> | ||
| 38 | #include <drm/drm_fb_helper.h> | 39 | #include <drm/drm_fb_helper.h> |
| 39 | 40 | ||
| 40 | static void amdgpu_display_flip_callback(struct dma_fence *f, | 41 | static void amdgpu_display_flip_callback(struct dma_fence *f, |
| @@ -151,8 +152,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, | |||
| 151 | struct drm_device *dev = crtc->dev; | 152 | struct drm_device *dev = crtc->dev; |
| 152 | struct amdgpu_device *adev = dev->dev_private; | 153 | struct amdgpu_device *adev = dev->dev_private; |
| 153 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 154 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
| 154 | struct amdgpu_framebuffer *old_amdgpu_fb; | ||
| 155 | struct amdgpu_framebuffer *new_amdgpu_fb; | ||
| 156 | struct drm_gem_object *obj; | 155 | struct drm_gem_object *obj; |
| 157 | struct amdgpu_flip_work *work; | 156 | struct amdgpu_flip_work *work; |
| 158 | struct amdgpu_bo *new_abo; | 157 | struct amdgpu_bo *new_abo; |
| @@ -174,15 +173,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, | |||
| 174 | work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; | 173 | work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; |
| 175 | 174 | ||
| 176 | /* schedule unpin of the old buffer */ | 175 | /* schedule unpin of the old buffer */ |
| 177 | old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | 176 | obj = crtc->primary->fb->obj[0]; |
| 178 | obj = old_amdgpu_fb->obj; | ||
| 179 | 177 | ||
| 180 | /* take a reference to the old object */ | 178 | /* take a reference to the old object */ |
| 181 | work->old_abo = gem_to_amdgpu_bo(obj); | 179 | work->old_abo = gem_to_amdgpu_bo(obj); |
| 182 | amdgpu_bo_ref(work->old_abo); | 180 | amdgpu_bo_ref(work->old_abo); |
| 183 | 181 | ||
| 184 | new_amdgpu_fb = to_amdgpu_framebuffer(fb); | 182 | obj = fb->obj[0]; |
| 185 | obj = new_amdgpu_fb->obj; | ||
| 186 | new_abo = gem_to_amdgpu_bo(obj); | 183 | new_abo = gem_to_amdgpu_bo(obj); |
| 187 | 184 | ||
| 188 | /* pin the new buffer */ | 185 | /* pin the new buffer */ |
| @@ -192,7 +189,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, | |||
| 192 | goto cleanup; | 189 | goto cleanup; |
| 193 | } | 190 | } |
| 194 | 191 | ||
| 195 | r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base); | 192 | r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base); |
| 196 | if (unlikely(r != 0)) { | 193 | if (unlikely(r != 0)) { |
| 197 | DRM_ERROR("failed to pin new abo buffer before flip\n"); | 194 | DRM_ERROR("failed to pin new abo buffer before flip\n"); |
| 198 | goto unreserve; | 195 | goto unreserve; |
| @@ -482,31 +479,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, | |||
| 482 | return true; | 479 | return true; |
| 483 | } | 480 | } |
| 484 | 481 | ||
| 485 | static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb) | ||
| 486 | { | ||
| 487 | struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); | ||
| 488 | |||
| 489 | drm_gem_object_put_unlocked(amdgpu_fb->obj); | ||
| 490 | drm_framebuffer_cleanup(fb); | ||
| 491 | kfree(amdgpu_fb); | ||
| 492 | } | ||
| 493 | |||
| 494 | static int amdgpu_display_user_framebuffer_create_handle( | ||
| 495 | struct drm_framebuffer *fb, | ||
| 496 | struct drm_file *file_priv, | ||
| 497 | unsigned int *handle) | ||
| 498 | { | ||
| 499 | struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); | ||
| 500 | |||
| 501 | return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle); | ||
| 502 | } | ||
| 503 | |||
| 504 | static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { | 482 | static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { |
| 505 | .destroy = amdgpu_display_user_framebuffer_destroy, | 483 | .destroy = drm_gem_fb_destroy, |
| 506 | .create_handle = amdgpu_display_user_framebuffer_create_handle, | 484 | .create_handle = drm_gem_fb_create_handle, |
| 507 | }; | 485 | }; |
| 508 | 486 | ||
| 509 | uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev) | 487 | uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) |
| 510 | { | 488 | { |
| 511 | uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; | 489 | uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; |
| 512 | 490 | ||
| @@ -526,11 +504,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, | |||
| 526 | struct drm_gem_object *obj) | 504 | struct drm_gem_object *obj) |
| 527 | { | 505 | { |
| 528 | int ret; | 506 | int ret; |
| 529 | rfb->obj = obj; | 507 | rfb->base.obj[0] = obj; |
| 530 | drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); | 508 | drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); |
| 531 | ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); | 509 | ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); |
| 532 | if (ret) { | 510 | if (ret) { |
| 533 | rfb->obj = NULL; | 511 | rfb->base.obj[0] = NULL; |
| 534 | return ret; | 512 | return ret; |
| 535 | } | 513 | } |
| 536 | return 0; | 514 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 2b11d808f297..f66e3e3fef0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #ifndef __AMDGPU_DISPLAY_H__ | 23 | #ifndef __AMDGPU_DISPLAY_H__ |
| 24 | #define __AMDGPU_DISPLAY_H__ | 24 | #define __AMDGPU_DISPLAY_H__ |
| 25 | 25 | ||
| 26 | uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev); | 26 | uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); |
| 27 | struct drm_framebuffer * | 27 | struct drm_framebuffer * |
| 28 | amdgpu_display_user_framebuffer_create(struct drm_device *dev, | 28 | amdgpu_display_user_framebuffer_create(struct drm_device *dev, |
| 29 | struct drm_file *file_priv, | 29 | struct drm_file *file_priv, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index e997ebbe43ea..def1010ac05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | |||
| @@ -115,6 +115,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, | |||
| 115 | pr_cont("\n"); | 115 | pr_cont("\n"); |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev) | ||
| 119 | { | ||
| 120 | struct drm_device *ddev = adev->ddev; | ||
| 121 | struct drm_crtc *crtc; | ||
| 122 | struct amdgpu_crtc *amdgpu_crtc; | ||
| 123 | |||
| 124 | adev->pm.dpm.new_active_crtcs = 0; | ||
| 125 | adev->pm.dpm.new_active_crtc_count = 0; | ||
| 126 | if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { | ||
| 127 | list_for_each_entry(crtc, | ||
| 128 | &ddev->mode_config.crtc_list, head) { | ||
| 129 | amdgpu_crtc = to_amdgpu_crtc(crtc); | ||
| 130 | if (amdgpu_crtc->enabled) { | ||
| 131 | adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); | ||
| 132 | adev->pm.dpm.new_active_crtc_count++; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 118 | 138 | ||
| 119 | u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) | 139 | u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) |
| 120 | { | 140 | { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 643d008410c6..dd6203a0a6b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | |||
| @@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src { | |||
| 52 | AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 | 52 | AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 |
| 53 | }; | 53 | }; |
| 54 | 54 | ||
| 55 | #define SCLK_DEEP_SLEEP_MASK 0x8 | ||
| 56 | |||
| 57 | struct amdgpu_ps { | 55 | struct amdgpu_ps { |
| 58 | u32 caps; /* vbios flags */ | 56 | u32 caps; /* vbios flags */ |
| 59 | u32 class; /* vbios flags */ | 57 | u32 class; /* vbios flags */ |
| @@ -349,12 +347,6 @@ enum amdgpu_pcie_gen { | |||
| 349 | ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ | 347 | ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ |
| 350 | (adev)->powerplay.pp_handle, msg_id)) | 348 | (adev)->powerplay.pp_handle, msg_id)) |
| 351 | 349 | ||
| 352 | #define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \ | ||
| 353 | virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \ | ||
| 354 | ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \ | ||
| 355 | (adev)->powerplay.pp_handle, virtual_addr_low, \ | ||
| 356 | virtual_addr_hi, mc_addr_low, mc_addr_hi, size) | ||
| 357 | |||
| 358 | #define amdgpu_dpm_get_power_profile_mode(adev, buf) \ | 350 | #define amdgpu_dpm_get_power_profile_mode(adev, buf) \ |
| 359 | ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ | 351 | ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ |
| 360 | (adev)->powerplay.pp_handle, buf)) | 352 | (adev)->powerplay.pp_handle, buf)) |
| @@ -445,6 +437,8 @@ struct amdgpu_pm { | |||
| 445 | uint32_t pcie_gen_mask; | 437 | uint32_t pcie_gen_mask; |
| 446 | uint32_t pcie_mlw_mask; | 438 | uint32_t pcie_mlw_mask; |
| 447 | struct amd_pp_display_configuration pm_display_cfg;/* set by dc */ | 439 | struct amd_pp_display_configuration pm_display_cfg;/* set by dc */ |
| 440 | uint32_t smu_prv_buffer_size; | ||
| 441 | struct amdgpu_bo *smu_prv_buffer; | ||
| 448 | }; | 442 | }; |
| 449 | 443 | ||
| 450 | #define R600_SSTU_DFLT 0 | 444 | #define R600_SSTU_DFLT 0 |
| @@ -482,6 +476,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, | |||
| 482 | struct amdgpu_ps *rps); | 476 | struct amdgpu_ps *rps); |
| 483 | u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); | 477 | u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); |
| 484 | u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); | 478 | u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); |
| 479 | void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); | ||
| 485 | bool amdgpu_is_uvd_state(u32 class, u32 class2); | 480 | bool amdgpu_is_uvd_state(u32 class, u32 class2); |
| 486 | void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, | 481 | void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, |
| 487 | u32 *p, u32 *u); | 482 | u32 *p, u32 *u); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0b19482b36b8..b0bf2f24da48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
| @@ -75,9 +75,10 @@ | |||
| 75 | * - 3.23.0 - Add query for VRAM lost counter | 75 | * - 3.23.0 - Add query for VRAM lost counter |
| 76 | * - 3.24.0 - Add high priority compute support for gfx9 | 76 | * - 3.24.0 - Add high priority compute support for gfx9 |
| 77 | * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). | 77 | * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). |
| 78 | * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. | ||
| 78 | */ | 79 | */ |
| 79 | #define KMS_DRIVER_MAJOR 3 | 80 | #define KMS_DRIVER_MAJOR 3 |
| 80 | #define KMS_DRIVER_MINOR 25 | 81 | #define KMS_DRIVER_MINOR 26 |
| 81 | #define KMS_DRIVER_PATCHLEVEL 0 | 82 | #define KMS_DRIVER_PATCHLEVEL 0 |
| 82 | 83 | ||
| 83 | int amdgpu_vram_limit = 0; | 84 | int amdgpu_vram_limit = 0; |
| @@ -121,7 +122,7 @@ uint amdgpu_pg_mask = 0xffffffff; | |||
| 121 | uint amdgpu_sdma_phase_quantum = 32; | 122 | uint amdgpu_sdma_phase_quantum = 32; |
| 122 | char *amdgpu_disable_cu = NULL; | 123 | char *amdgpu_disable_cu = NULL; |
| 123 | char *amdgpu_virtual_display = NULL; | 124 | char *amdgpu_virtual_display = NULL; |
| 124 | uint amdgpu_pp_feature_mask = 0xffffbfff; | 125 | uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */ |
| 125 | int amdgpu_ngg = 0; | 126 | int amdgpu_ngg = 0; |
| 126 | int amdgpu_prim_buf_per_se = 0; | 127 | int amdgpu_prim_buf_per_se = 0; |
| 127 | int amdgpu_pos_buf_per_se = 0; | 128 | int amdgpu_pos_buf_per_se = 0; |
| @@ -132,6 +133,7 @@ int amdgpu_lbpw = -1; | |||
| 132 | int amdgpu_compute_multipipe = -1; | 133 | int amdgpu_compute_multipipe = -1; |
| 133 | int amdgpu_gpu_recovery = -1; /* auto */ | 134 | int amdgpu_gpu_recovery = -1; /* auto */ |
| 134 | int amdgpu_emu_mode = 0; | 135 | int amdgpu_emu_mode = 0; |
| 136 | uint amdgpu_smu_memory_pool_size = 0; | ||
| 135 | 137 | ||
| 136 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); | 138 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); |
| 137 | module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); | 139 | module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); |
| @@ -316,6 +318,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled) | |||
| 316 | module_param_named(cik_support, amdgpu_cik_support, int, 0444); | 318 | module_param_named(cik_support, amdgpu_cik_support, int, 0444); |
| 317 | #endif | 319 | #endif |
| 318 | 320 | ||
| 321 | MODULE_PARM_DESC(smu_memory_pool_size, | ||
| 322 | "reserve gtt for smu debug usage, 0 = disable," | ||
| 323 | "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); | ||
| 324 | module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); | ||
| 325 | |||
| 319 | static const struct pci_device_id pciidlist[] = { | 326 | static const struct pci_device_id pciidlist[] = { |
| 320 | #ifdef CONFIG_DRM_AMDGPU_SI | 327 | #ifdef CONFIG_DRM_AMDGPU_SI |
| 321 | {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, | 328 | {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, |
| @@ -534,6 +541,9 @@ static const struct pci_device_id pciidlist[] = { | |||
| 534 | {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | 541 | {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, |
| 535 | {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | 542 | {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, |
| 536 | {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, | 543 | {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, |
| 544 | /* VEGAM */ | ||
| 545 | {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, | ||
| 546 | {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, | ||
| 537 | /* Vega 10 */ | 547 | /* Vega 10 */ |
| 538 | {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, | 548 | {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, |
| 539 | {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, | 549 | {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, |
| @@ -550,6 +560,13 @@ static const struct pci_device_id pciidlist[] = { | |||
| 550 | {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, | 560 | {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, |
| 551 | {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, | 561 | {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, |
| 552 | {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, | 562 | {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, |
| 563 | /* Vega 20 */ | ||
| 564 | {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, | ||
| 565 | {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, | ||
| 566 | {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, | ||
| 567 | {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, | ||
| 568 | {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, | ||
| 569 | {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, | ||
| 553 | /* Raven */ | 570 | /* Raven */ |
| 554 | {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, | 571 | {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, |
| 555 | 572 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 12063019751b..bc5fd8ebab5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | |||
| @@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, | |||
| 137 | /* need to align pitch with crtc limits */ | 137 | /* need to align pitch with crtc limits */ |
| 138 | mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, | 138 | mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, |
| 139 | fb_tiled); | 139 | fb_tiled); |
| 140 | domain = amdgpu_display_framebuffer_domains(adev); | 140 | domain = amdgpu_display_supported_domains(adev); |
| 141 | 141 | ||
| 142 | height = ALIGN(mode_cmd->height, 8); | 142 | height = ALIGN(mode_cmd->height, 8); |
| 143 | size = mode_cmd->pitches[0] * height; | 143 | size = mode_cmd->pitches[0] * height; |
| @@ -292,9 +292,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb | |||
| 292 | 292 | ||
| 293 | drm_fb_helper_unregister_fbi(&rfbdev->helper); | 293 | drm_fb_helper_unregister_fbi(&rfbdev->helper); |
| 294 | 294 | ||
| 295 | if (rfb->obj) { | 295 | if (rfb->base.obj[0]) { |
| 296 | amdgpufb_destroy_pinned_object(rfb->obj); | 296 | amdgpufb_destroy_pinned_object(rfb->base.obj[0]); |
| 297 | rfb->obj = NULL; | 297 | rfb->base.obj[0] = NULL; |
| 298 | drm_framebuffer_unregister_private(&rfb->base); | 298 | drm_framebuffer_unregister_private(&rfb->base); |
| 299 | drm_framebuffer_cleanup(&rfb->base); | 299 | drm_framebuffer_cleanup(&rfb->base); |
| 300 | } | 300 | } |
| @@ -377,7 +377,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev) | |||
| 377 | if (!adev->mode_info.rfbdev) | 377 | if (!adev->mode_info.rfbdev) |
| 378 | return 0; | 378 | return 0; |
| 379 | 379 | ||
| 380 | robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj); | 380 | robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]); |
| 381 | size += amdgpu_bo_size(robj); | 381 | size += amdgpu_bo_size(robj); |
| 382 | return size; | 382 | return size; |
| 383 | } | 383 | } |
| @@ -386,7 +386,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) | |||
| 386 | { | 386 | { |
| 387 | if (!adev->mode_info.rfbdev) | 387 | if (!adev->mode_info.rfbdev) |
| 388 | return false; | 388 | return false; |
| 389 | if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj)) | 389 | if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0])) |
| 390 | return true; | 390 | return true; |
| 391 | return false; | 391 | return false; |
| 392 | } | 392 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 97449e06a242..39ec6b8890a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
| @@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) | |||
| 131 | * Emits a fence command on the requested ring (all asics). | 131 | * Emits a fence command on the requested ring (all asics). |
| 132 | * Returns 0 on success, -ENOMEM on failure. | 132 | * Returns 0 on success, -ENOMEM on failure. |
| 133 | */ | 133 | */ |
| 134 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) | 134 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, |
| 135 | unsigned flags) | ||
| 135 | { | 136 | { |
| 136 | struct amdgpu_device *adev = ring->adev; | 137 | struct amdgpu_device *adev = ring->adev; |
| 137 | struct amdgpu_fence *fence; | 138 | struct amdgpu_fence *fence; |
| @@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) | |||
| 149 | adev->fence_context + ring->idx, | 150 | adev->fence_context + ring->idx, |
| 150 | seq); | 151 | seq); |
| 151 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, | 152 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
| 152 | seq, AMDGPU_FENCE_FLAG_INT); | 153 | seq, flags | AMDGPU_FENCE_FLAG_INT); |
| 153 | 154 | ||
| 154 | ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; | 155 | ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; |
| 155 | /* This function can't be called concurrently anyway, otherwise | 156 | /* This function can't be called concurrently anyway, otherwise |
| @@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
| 375 | struct amdgpu_device *adev = ring->adev; | 376 | struct amdgpu_device *adev = ring->adev; |
| 376 | uint64_t index; | 377 | uint64_t index; |
| 377 | 378 | ||
| 378 | if (ring != &adev->uvd.ring) { | 379 | if (ring != &adev->uvd.inst[ring->me].ring) { |
| 379 | ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; | 380 | ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; |
| 380 | ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); | 381 | ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); |
| 381 | } else { | 382 | } else { |
| 382 | /* put fence directly behind firmware */ | 383 | /* put fence directly behind firmware */ |
| 383 | index = ALIGN(adev->uvd.fw->size, 8); | 384 | index = ALIGN(adev->uvd.fw->size, 8); |
| 384 | ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; | 385 | ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index; |
| 385 | ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; | 386 | ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; |
| 386 | } | 387 | } |
| 387 | amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); | 388 | amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); |
| 388 | amdgpu_irq_get(adev, irq_src, irq_type); | 389 | amdgpu_irq_get(adev, irq_src, irq_type); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index cf0f186c6092..17d6b9fb6d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | |||
| @@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) | |||
| 113 | int r; | 113 | int r; |
| 114 | 114 | ||
| 115 | if (adev->gart.robj == NULL) { | 115 | if (adev->gart.robj == NULL) { |
| 116 | r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, | 116 | struct amdgpu_bo_param bp; |
| 117 | AMDGPU_GEM_DOMAIN_VRAM, | 117 | |
| 118 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | 118 | memset(&bp, 0, sizeof(bp)); |
| 119 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | 119 | bp.size = adev->gart.table_size; |
| 120 | ttm_bo_type_kernel, NULL, | 120 | bp.byte_align = PAGE_SIZE; |
| 121 | &adev->gart.robj); | 121 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; |
| 122 | bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 123 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | ||
| 124 | bp.type = ttm_bo_type_kernel; | ||
| 125 | bp.resv = NULL; | ||
| 126 | r = amdgpu_bo_create(adev, &bp, &adev->gart.robj); | ||
| 122 | if (r) { | 127 | if (r) { |
| 123 | return r; | 128 | return r; |
| 124 | } | 129 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 46b9ea4e6103..2c8e27370284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | |||
| @@ -48,17 +48,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, | |||
| 48 | struct drm_gem_object **obj) | 48 | struct drm_gem_object **obj) |
| 49 | { | 49 | { |
| 50 | struct amdgpu_bo *bo; | 50 | struct amdgpu_bo *bo; |
| 51 | struct amdgpu_bo_param bp; | ||
| 51 | int r; | 52 | int r; |
| 52 | 53 | ||
| 54 | memset(&bp, 0, sizeof(bp)); | ||
| 53 | *obj = NULL; | 55 | *obj = NULL; |
| 54 | /* At least align on page size */ | 56 | /* At least align on page size */ |
| 55 | if (alignment < PAGE_SIZE) { | 57 | if (alignment < PAGE_SIZE) { |
| 56 | alignment = PAGE_SIZE; | 58 | alignment = PAGE_SIZE; |
| 57 | } | 59 | } |
| 58 | 60 | ||
| 61 | bp.size = size; | ||
| 62 | bp.byte_align = alignment; | ||
| 63 | bp.type = type; | ||
| 64 | bp.resv = resv; | ||
| 65 | bp.preferred_domain = initial_domain; | ||
| 59 | retry: | 66 | retry: |
| 60 | r = amdgpu_bo_create(adev, size, alignment, initial_domain, | 67 | bp.flags = flags; |
| 61 | flags, type, resv, &bo); | 68 | bp.domain = initial_domain; |
| 69 | r = amdgpu_bo_create(adev, &bp, &bo); | ||
| 62 | if (r) { | 70 | if (r) { |
| 63 | if (r != -ERESTARTSYS) { | 71 | if (r != -ERESTARTSYS) { |
| 64 | if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { | 72 | if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { |
| @@ -221,12 +229,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, | |||
| 221 | return -EINVAL; | 229 | return -EINVAL; |
| 222 | 230 | ||
| 223 | /* reject invalid gem domains */ | 231 | /* reject invalid gem domains */ |
| 224 | if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | | 232 | if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) |
| 225 | AMDGPU_GEM_DOMAIN_GTT | | ||
| 226 | AMDGPU_GEM_DOMAIN_VRAM | | ||
| 227 | AMDGPU_GEM_DOMAIN_GDS | | ||
| 228 | AMDGPU_GEM_DOMAIN_GWS | | ||
| 229 | AMDGPU_GEM_DOMAIN_OA)) | ||
| 230 | return -EINVAL; | 233 | return -EINVAL; |
| 231 | 234 | ||
| 232 | /* create a gem object to contain this object in */ | 235 | /* create a gem object to contain this object in */ |
| @@ -771,16 +774,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, | |||
| 771 | } | 774 | } |
| 772 | 775 | ||
| 773 | #if defined(CONFIG_DEBUG_FS) | 776 | #if defined(CONFIG_DEBUG_FS) |
| 777 | |||
| 778 | #define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \ | ||
| 779 | if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \ | ||
| 780 | seq_printf((m), " " #flag); \ | ||
| 781 | } | ||
| 782 | |||
| 774 | static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) | 783 | static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) |
| 775 | { | 784 | { |
| 776 | struct drm_gem_object *gobj = ptr; | 785 | struct drm_gem_object *gobj = ptr; |
| 777 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); | 786 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); |
| 778 | struct seq_file *m = data; | 787 | struct seq_file *m = data; |
| 779 | 788 | ||
| 789 | struct dma_buf_attachment *attachment; | ||
| 790 | struct dma_buf *dma_buf; | ||
| 780 | unsigned domain; | 791 | unsigned domain; |
| 781 | const char *placement; | 792 | const char *placement; |
| 782 | unsigned pin_count; | 793 | unsigned pin_count; |
| 783 | uint64_t offset; | ||
| 784 | 794 | ||
| 785 | domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); | 795 | domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); |
| 786 | switch (domain) { | 796 | switch (domain) { |
| @@ -798,13 +808,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) | |||
| 798 | seq_printf(m, "\t0x%08x: %12ld byte %s", | 808 | seq_printf(m, "\t0x%08x: %12ld byte %s", |
| 799 | id, amdgpu_bo_size(bo), placement); | 809 | id, amdgpu_bo_size(bo), placement); |
| 800 | 810 | ||
| 801 | offset = READ_ONCE(bo->tbo.mem.start); | ||
| 802 | if (offset != AMDGPU_BO_INVALID_OFFSET) | ||
| 803 | seq_printf(m, " @ 0x%010Lx", offset); | ||
| 804 | |||
| 805 | pin_count = READ_ONCE(bo->pin_count); | 811 | pin_count = READ_ONCE(bo->pin_count); |
| 806 | if (pin_count) | 812 | if (pin_count) |
| 807 | seq_printf(m, " pin count %d", pin_count); | 813 | seq_printf(m, " pin count %d", pin_count); |
| 814 | |||
| 815 | dma_buf = READ_ONCE(bo->gem_base.dma_buf); | ||
| 816 | attachment = READ_ONCE(bo->gem_base.import_attach); | ||
| 817 | |||
| 818 | if (attachment) | ||
| 819 | seq_printf(m, " imported from %p", dma_buf); | ||
| 820 | else if (dma_buf) | ||
| 821 | seq_printf(m, " exported as %p", dma_buf); | ||
| 822 | |||
| 823 | amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); | ||
| 824 | amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS); | ||
| 825 | amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC); | ||
| 826 | amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED); | ||
| 827 | amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW); | ||
| 828 | amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS); | ||
| 829 | amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID); | ||
| 830 | amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC); | ||
| 831 | |||
| 808 | seq_printf(m, "\n"); | 832 | seq_printf(m, "\n"); |
| 809 | 833 | ||
| 810 | return 0; | 834 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 311589e02d17..f70eeed9ed76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
| @@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
| 127 | struct amdgpu_vm *vm; | 127 | struct amdgpu_vm *vm; |
| 128 | uint64_t fence_ctx; | 128 | uint64_t fence_ctx; |
| 129 | uint32_t status = 0, alloc_size; | 129 | uint32_t status = 0, alloc_size; |
| 130 | unsigned fence_flags = 0; | ||
| 130 | 131 | ||
| 131 | unsigned i; | 132 | unsigned i; |
| 132 | int r = 0; | 133 | int r = 0; |
| @@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
| 227 | #endif | 228 | #endif |
| 228 | amdgpu_asic_invalidate_hdp(adev, ring); | 229 | amdgpu_asic_invalidate_hdp(adev, ring); |
| 229 | 230 | ||
| 230 | r = amdgpu_fence_emit(ring, f); | 231 | if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) |
| 232 | fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; | ||
| 233 | |||
| 234 | r = amdgpu_fence_emit(ring, f, fence_flags); | ||
| 231 | if (r) { | 235 | if (r) { |
| 232 | dev_err(adev->dev, "failed to emit fence (%d)\n", r); | 236 | dev_err(adev->dev, "failed to emit fence (%d)\n", r); |
| 233 | if (job && job->vmid) | 237 | if (job && job->vmid) |
| @@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
| 242 | /* wrap the last IB with fence */ | 246 | /* wrap the last IB with fence */ |
| 243 | if (job && job->uf_addr) { | 247 | if (job && job->uf_addr) { |
| 244 | amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, | 248 | amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, |
| 245 | AMDGPU_FENCE_FLAG_64BIT); | 249 | fence_flags | AMDGPU_FENCE_FLAG_64BIT); |
| 246 | } | 250 | } |
| 247 | 251 | ||
| 248 | if (patch_offset != ~0 && ring->funcs->patch_cond_exec) | 252 | if (patch_offset != ~0 && ring->funcs->patch_cond_exec) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4b7824d30e73..91517b166a3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include "amdgpu_sched.h" | 31 | #include "amdgpu_sched.h" |
| 32 | #include "amdgpu_uvd.h" | 32 | #include "amdgpu_uvd.h" |
| 33 | #include "amdgpu_vce.h" | 33 | #include "amdgpu_vce.h" |
| 34 | #include "atom.h" | ||
| 34 | 35 | ||
| 35 | #include <linux/vga_switcheroo.h> | 36 | #include <linux/vga_switcheroo.h> |
| 36 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
| @@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, | |||
| 214 | fw_info->ver = adev->gfx.rlc_fw_version; | 215 | fw_info->ver = adev->gfx.rlc_fw_version; |
| 215 | fw_info->feature = adev->gfx.rlc_feature_version; | 216 | fw_info->feature = adev->gfx.rlc_feature_version; |
| 216 | break; | 217 | break; |
| 218 | case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL: | ||
| 219 | fw_info->ver = adev->gfx.rlc_srlc_fw_version; | ||
| 220 | fw_info->feature = adev->gfx.rlc_srlc_feature_version; | ||
| 221 | break; | ||
| 222 | case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM: | ||
| 223 | fw_info->ver = adev->gfx.rlc_srlg_fw_version; | ||
| 224 | fw_info->feature = adev->gfx.rlc_srlg_feature_version; | ||
| 225 | break; | ||
| 226 | case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM: | ||
| 227 | fw_info->ver = adev->gfx.rlc_srls_fw_version; | ||
| 228 | fw_info->feature = adev->gfx.rlc_srls_feature_version; | ||
| 229 | break; | ||
| 217 | case AMDGPU_INFO_FW_GFX_MEC: | 230 | case AMDGPU_INFO_FW_GFX_MEC: |
| 218 | if (query_fw->index == 0) { | 231 | if (query_fw->index == 0) { |
| 219 | fw_info->ver = adev->gfx.mec_fw_version; | 232 | fw_info->ver = adev->gfx.mec_fw_version; |
| @@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 273 | struct drm_crtc *crtc; | 286 | struct drm_crtc *crtc; |
| 274 | uint32_t ui32 = 0; | 287 | uint32_t ui32 = 0; |
| 275 | uint64_t ui64 = 0; | 288 | uint64_t ui64 = 0; |
| 276 | int i, found; | 289 | int i, j, found; |
| 277 | int ui32_size = sizeof(ui32); | 290 | int ui32_size = sizeof(ui32); |
| 278 | 291 | ||
| 279 | if (!info->return_size || !info->return_pointer) | 292 | if (!info->return_size || !info->return_pointer) |
| 280 | return -EINVAL; | 293 | return -EINVAL; |
| 281 | 294 | ||
| 295 | /* Ensure IB tests are run on ring */ | ||
| 296 | flush_delayed_work(&adev->late_init_work); | ||
| 297 | |||
| 282 | switch (info->query) { | 298 | switch (info->query) { |
| 283 | case AMDGPU_INFO_ACCEL_WORKING: | 299 | case AMDGPU_INFO_ACCEL_WORKING: |
| 284 | ui32 = adev->accel_working; | 300 | ui32 = adev->accel_working; |
| @@ -332,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 332 | break; | 348 | break; |
| 333 | case AMDGPU_HW_IP_UVD: | 349 | case AMDGPU_HW_IP_UVD: |
| 334 | type = AMD_IP_BLOCK_TYPE_UVD; | 350 | type = AMD_IP_BLOCK_TYPE_UVD; |
| 335 | ring_mask = adev->uvd.ring.ready ? 1 : 0; | 351 | for (i = 0; i < adev->uvd.num_uvd_inst; i++) |
| 352 | ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i); | ||
| 336 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; | 353 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; |
| 337 | ib_size_alignment = 16; | 354 | ib_size_alignment = 16; |
| 338 | break; | 355 | break; |
| @@ -345,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 345 | break; | 362 | break; |
| 346 | case AMDGPU_HW_IP_UVD_ENC: | 363 | case AMDGPU_HW_IP_UVD_ENC: |
| 347 | type = AMD_IP_BLOCK_TYPE_UVD; | 364 | type = AMD_IP_BLOCK_TYPE_UVD; |
| 348 | for (i = 0; i < adev->uvd.num_enc_rings; i++) | 365 | for (i = 0; i < adev->uvd.num_uvd_inst; i++) |
| 349 | ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i); | 366 | for (j = 0; j < adev->uvd.num_enc_rings; j++) |
| 367 | ring_mask |= | ||
| 368 | ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) << | ||
| 369 | (j + i * adev->uvd.num_enc_rings)); | ||
| 350 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; | 370 | ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; |
| 351 | ib_size_alignment = 1; | 371 | ib_size_alignment = 1; |
| 352 | break; | 372 | break; |
| @@ -701,10 +721,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 701 | } | 721 | } |
| 702 | } | 722 | } |
| 703 | case AMDGPU_INFO_SENSOR: { | 723 | case AMDGPU_INFO_SENSOR: { |
| 704 | struct pp_gpu_power query = {0}; | 724 | if (!adev->pm.dpm_enabled) |
| 705 | int query_size = sizeof(query); | ||
| 706 | |||
| 707 | if (amdgpu_dpm == 0) | ||
| 708 | return -ENOENT; | 725 | return -ENOENT; |
| 709 | 726 | ||
| 710 | switch (info->sensor_info.type) { | 727 | switch (info->sensor_info.type) { |
| @@ -746,10 +763,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file | |||
| 746 | /* get average GPU power */ | 763 | /* get average GPU power */ |
| 747 | if (amdgpu_dpm_read_sensor(adev, | 764 | if (amdgpu_dpm_read_sensor(adev, |
| 748 | AMDGPU_PP_SENSOR_GPU_POWER, | 765 | AMDGPU_PP_SENSOR_GPU_POWER, |
| 749 | (void *)&query, &query_size)) { | 766 | (void *)&ui32, &ui32_size)) { |
| 750 | return -EINVAL; | 767 | return -EINVAL; |
| 751 | } | 768 | } |
| 752 | ui32 = query.average_gpu_power >> 8; | 769 | ui32 >>= 8; |
| 753 | break; | 770 | break; |
| 754 | case AMDGPU_INFO_SENSOR_VDDNB: | 771 | case AMDGPU_INFO_SENSOR_VDDNB: |
| 755 | /* get VDDNB in millivolts */ | 772 | /* get VDDNB in millivolts */ |
| @@ -913,8 +930,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
| 913 | return; | 930 | return; |
| 914 | 931 | ||
| 915 | pm_runtime_get_sync(dev->dev); | 932 | pm_runtime_get_sync(dev->dev); |
| 916 | 933 | amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr); | |
| 917 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); | ||
| 918 | 934 | ||
| 919 | if (adev->asic_type != CHIP_RAVEN) { | 935 | if (adev->asic_type != CHIP_RAVEN) { |
| 920 | amdgpu_uvd_free_handles(adev, file_priv); | 936 | amdgpu_uvd_free_handles(adev, file_priv); |
| @@ -935,6 +951,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
| 935 | pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); | 951 | pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); |
| 936 | 952 | ||
| 937 | amdgpu_vm_fini(adev, &fpriv->vm); | 953 | amdgpu_vm_fini(adev, &fpriv->vm); |
| 954 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); | ||
| 955 | |||
| 938 | if (pasid) | 956 | if (pasid) |
| 939 | amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); | 957 | amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); |
| 940 | amdgpu_bo_unref(&pd); | 958 | amdgpu_bo_unref(&pd); |
| @@ -1088,6 +1106,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) | |||
| 1088 | struct amdgpu_device *adev = dev->dev_private; | 1106 | struct amdgpu_device *adev = dev->dev_private; |
| 1089 | struct drm_amdgpu_info_firmware fw_info; | 1107 | struct drm_amdgpu_info_firmware fw_info; |
| 1090 | struct drm_amdgpu_query_fw query_fw; | 1108 | struct drm_amdgpu_query_fw query_fw; |
| 1109 | struct atom_context *ctx = adev->mode_info.atom_context; | ||
| 1091 | int ret, i; | 1110 | int ret, i; |
| 1092 | 1111 | ||
| 1093 | /* VCE */ | 1112 | /* VCE */ |
| @@ -1146,6 +1165,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) | |||
| 1146 | seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n", | 1165 | seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n", |
| 1147 | fw_info.feature, fw_info.ver); | 1166 | fw_info.feature, fw_info.ver); |
| 1148 | 1167 | ||
| 1168 | /* RLC SAVE RESTORE LIST CNTL */ | ||
| 1169 | query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL; | ||
| 1170 | ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); | ||
| 1171 | if (ret) | ||
| 1172 | return ret; | ||
| 1173 | seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n", | ||
| 1174 | fw_info.feature, fw_info.ver); | ||
| 1175 | |||
| 1176 | /* RLC SAVE RESTORE LIST GPM MEM */ | ||
| 1177 | query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM; | ||
| 1178 | ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); | ||
| 1179 | if (ret) | ||
| 1180 | return ret; | ||
| 1181 | seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n", | ||
| 1182 | fw_info.feature, fw_info.ver); | ||
| 1183 | |||
| 1184 | /* RLC SAVE RESTORE LIST SRM MEM */ | ||
| 1185 | query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM; | ||
| 1186 | ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); | ||
| 1187 | if (ret) | ||
| 1188 | return ret; | ||
| 1189 | seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n", | ||
| 1190 | fw_info.feature, fw_info.ver); | ||
| 1191 | |||
| 1149 | /* MEC */ | 1192 | /* MEC */ |
| 1150 | query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; | 1193 | query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; |
| 1151 | query_fw.index = 0; | 1194 | query_fw.index = 0; |
| @@ -1210,6 +1253,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) | |||
| 1210 | seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", | 1253 | seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", |
| 1211 | fw_info.feature, fw_info.ver); | 1254 | fw_info.feature, fw_info.ver); |
| 1212 | 1255 | ||
| 1256 | |||
| 1257 | seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); | ||
| 1258 | |||
| 1213 | return 0; | 1259 | return 0; |
| 1214 | } | 1260 | } |
| 1215 | 1261 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index bd67f4cb8e6c..83e344fbb50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | |||
| @@ -36,12 +36,14 @@ | |||
| 36 | #include <drm/drm.h> | 36 | #include <drm/drm.h> |
| 37 | 37 | ||
| 38 | #include "amdgpu.h" | 38 | #include "amdgpu.h" |
| 39 | #include "amdgpu_amdkfd.h" | ||
| 39 | 40 | ||
| 40 | struct amdgpu_mn { | 41 | struct amdgpu_mn { |
| 41 | /* constant after initialisation */ | 42 | /* constant after initialisation */ |
| 42 | struct amdgpu_device *adev; | 43 | struct amdgpu_device *adev; |
| 43 | struct mm_struct *mm; | 44 | struct mm_struct *mm; |
| 44 | struct mmu_notifier mn; | 45 | struct mmu_notifier mn; |
| 46 | enum amdgpu_mn_type type; | ||
| 45 | 47 | ||
| 46 | /* only used on destruction */ | 48 | /* only used on destruction */ |
| 47 | struct work_struct work; | 49 | struct work_struct work; |
| @@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, | |||
| 185 | } | 187 | } |
| 186 | 188 | ||
| 187 | /** | 189 | /** |
| 188 | * amdgpu_mn_invalidate_range_start - callback to notify about mm change | 190 | * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change |
| 189 | * | 191 | * |
| 190 | * @mn: our notifier | 192 | * @mn: our notifier |
| 191 | * @mn: the mm this callback is about | 193 | * @mn: the mm this callback is about |
| @@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, | |||
| 195 | * We block for all BOs between start and end to be idle and | 197 | * We block for all BOs between start and end to be idle and |
| 196 | * unmap them by move them into system domain again. | 198 | * unmap them by move them into system domain again. |
| 197 | */ | 199 | */ |
| 198 | static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, | 200 | static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, |
| 199 | struct mm_struct *mm, | 201 | struct mm_struct *mm, |
| 200 | unsigned long start, | 202 | unsigned long start, |
| 201 | unsigned long end) | 203 | unsigned long end) |
| 202 | { | 204 | { |
| 203 | struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); | 205 | struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); |
| 204 | struct interval_tree_node *it; | 206 | struct interval_tree_node *it; |
| @@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, | |||
| 220 | } | 222 | } |
| 221 | 223 | ||
| 222 | /** | 224 | /** |
| 225 | * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change | ||
| 226 | * | ||
| 227 | * @mn: our notifier | ||
| 228 | * @mn: the mm this callback is about | ||
| 229 | * @start: start of updated range | ||
| 230 | * @end: end of updated range | ||
| 231 | * | ||
| 232 | * We temporarily evict all BOs between start and end. This | ||
| 233 | * necessitates evicting all user-mode queues of the process. The BOs | ||
| 234 | * are restorted in amdgpu_mn_invalidate_range_end_hsa. | ||
| 235 | */ | ||
| 236 | static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, | ||
| 237 | struct mm_struct *mm, | ||
| 238 | unsigned long start, | ||
| 239 | unsigned long end) | ||
| 240 | { | ||
| 241 | struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); | ||
| 242 | struct interval_tree_node *it; | ||
| 243 | |||
| 244 | /* notification is exclusive, but interval is inclusive */ | ||
| 245 | end -= 1; | ||
| 246 | |||
| 247 | amdgpu_mn_read_lock(rmn); | ||
| 248 | |||
| 249 | it = interval_tree_iter_first(&rmn->objects, start, end); | ||
| 250 | while (it) { | ||
| 251 | struct amdgpu_mn_node *node; | ||
| 252 | struct amdgpu_bo *bo; | ||
| 253 | |||
| 254 | node = container_of(it, struct amdgpu_mn_node, it); | ||
| 255 | it = interval_tree_iter_next(it, start, end); | ||
| 256 | |||
| 257 | list_for_each_entry(bo, &node->bos, mn_list) { | ||
| 258 | struct kgd_mem *mem = bo->kfd_bo; | ||
| 259 | |||
| 260 | if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, | ||
| 261 | start, end)) | ||
| 262 | amdgpu_amdkfd_evict_userptr(mem, mm); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | } | ||
| 266 | |||
| 267 | /** | ||
| 223 | * amdgpu_mn_invalidate_range_end - callback to notify about mm change | 268 | * amdgpu_mn_invalidate_range_end - callback to notify about mm change |
| 224 | * | 269 | * |
| 225 | * @mn: our notifier | 270 | * @mn: our notifier |
| @@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, | |||
| 239 | amdgpu_mn_read_unlock(rmn); | 284 | amdgpu_mn_read_unlock(rmn); |
| 240 | } | 285 | } |
| 241 | 286 | ||
| 242 | static const struct mmu_notifier_ops amdgpu_mn_ops = { | 287 | static const struct mmu_notifier_ops amdgpu_mn_ops[] = { |
| 243 | .release = amdgpu_mn_release, | 288 | [AMDGPU_MN_TYPE_GFX] = { |
| 244 | .invalidate_range_start = amdgpu_mn_invalidate_range_start, | 289 | .release = amdgpu_mn_release, |
| 245 | .invalidate_range_end = amdgpu_mn_invalidate_range_end, | 290 | .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, |
| 291 | .invalidate_range_end = amdgpu_mn_invalidate_range_end, | ||
| 292 | }, | ||
| 293 | [AMDGPU_MN_TYPE_HSA] = { | ||
| 294 | .release = amdgpu_mn_release, | ||
| 295 | .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, | ||
| 296 | .invalidate_range_end = amdgpu_mn_invalidate_range_end, | ||
| 297 | }, | ||
| 246 | }; | 298 | }; |
| 247 | 299 | ||
| 300 | /* Low bits of any reasonable mm pointer will be unused due to struct | ||
| 301 | * alignment. Use these bits to make a unique key from the mm pointer | ||
| 302 | * and notifier type. | ||
| 303 | */ | ||
| 304 | #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) | ||
| 305 | |||
| 248 | /** | 306 | /** |
| 249 | * amdgpu_mn_get - create notifier context | 307 | * amdgpu_mn_get - create notifier context |
| 250 | * | 308 | * |
| 251 | * @adev: amdgpu device pointer | 309 | * @adev: amdgpu device pointer |
| 310 | * @type: type of MMU notifier context | ||
| 252 | * | 311 | * |
| 253 | * Creates a notifier context for current->mm. | 312 | * Creates a notifier context for current->mm. |
| 254 | */ | 313 | */ |
| 255 | struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | 314 | struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, |
| 315 | enum amdgpu_mn_type type) | ||
| 256 | { | 316 | { |
| 257 | struct mm_struct *mm = current->mm; | 317 | struct mm_struct *mm = current->mm; |
| 258 | struct amdgpu_mn *rmn; | 318 | struct amdgpu_mn *rmn; |
| 319 | unsigned long key = AMDGPU_MN_KEY(mm, type); | ||
| 259 | int r; | 320 | int r; |
| 260 | 321 | ||
| 261 | mutex_lock(&adev->mn_lock); | 322 | mutex_lock(&adev->mn_lock); |
| @@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | |||
| 264 | return ERR_PTR(-EINTR); | 325 | return ERR_PTR(-EINTR); |
| 265 | } | 326 | } |
| 266 | 327 | ||
| 267 | hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) | 328 | hash_for_each_possible(adev->mn_hash, rmn, node, key) |
| 268 | if (rmn->mm == mm) | 329 | if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) |
| 269 | goto release_locks; | 330 | goto release_locks; |
| 270 | 331 | ||
| 271 | rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); | 332 | rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); |
| @@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | |||
| 276 | 337 | ||
| 277 | rmn->adev = adev; | 338 | rmn->adev = adev; |
| 278 | rmn->mm = mm; | 339 | rmn->mm = mm; |
| 279 | rmn->mn.ops = &amdgpu_mn_ops; | ||
| 280 | init_rwsem(&rmn->lock); | 340 | init_rwsem(&rmn->lock); |
| 341 | rmn->type = type; | ||
| 342 | rmn->mn.ops = &amdgpu_mn_ops[type]; | ||
| 281 | rmn->objects = RB_ROOT_CACHED; | 343 | rmn->objects = RB_ROOT_CACHED; |
| 282 | mutex_init(&rmn->read_lock); | 344 | mutex_init(&rmn->read_lock); |
| 283 | atomic_set(&rmn->recursion, 0); | 345 | atomic_set(&rmn->recursion, 0); |
| @@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | |||
| 286 | if (r) | 348 | if (r) |
| 287 | goto free_rmn; | 349 | goto free_rmn; |
| 288 | 350 | ||
| 289 | hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); | 351 | hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); |
| 290 | 352 | ||
| 291 | release_locks: | 353 | release_locks: |
| 292 | up_write(&mm->mmap_sem); | 354 | up_write(&mm->mmap_sem); |
| @@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
| 315 | { | 377 | { |
| 316 | unsigned long end = addr + amdgpu_bo_size(bo) - 1; | 378 | unsigned long end = addr + amdgpu_bo_size(bo) - 1; |
| 317 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 379 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
| 380 | enum amdgpu_mn_type type = | ||
| 381 | bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; | ||
| 318 | struct amdgpu_mn *rmn; | 382 | struct amdgpu_mn *rmn; |
| 319 | struct amdgpu_mn_node *node = NULL; | 383 | struct amdgpu_mn_node *node = NULL, *new_node; |
| 320 | struct list_head bos; | 384 | struct list_head bos; |
| 321 | struct interval_tree_node *it; | 385 | struct interval_tree_node *it; |
| 322 | 386 | ||
| 323 | rmn = amdgpu_mn_get(adev); | 387 | rmn = amdgpu_mn_get(adev, type); |
| 324 | if (IS_ERR(rmn)) | 388 | if (IS_ERR(rmn)) |
| 325 | return PTR_ERR(rmn); | 389 | return PTR_ERR(rmn); |
| 326 | 390 | ||
| 391 | new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); | ||
| 392 | if (!new_node) | ||
| 393 | return -ENOMEM; | ||
| 394 | |||
| 327 | INIT_LIST_HEAD(&bos); | 395 | INIT_LIST_HEAD(&bos); |
| 328 | 396 | ||
| 329 | down_write(&rmn->lock); | 397 | down_write(&rmn->lock); |
| @@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) | |||
| 337 | list_splice(&node->bos, &bos); | 405 | list_splice(&node->bos, &bos); |
| 338 | } | 406 | } |
| 339 | 407 | ||
| 340 | if (!node) { | 408 | if (!node) |
| 341 | node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); | 409 | node = new_node; |
| 342 | if (!node) { | 410 | else |
| 343 | up_write(&rmn->lock); | 411 | kfree(new_node); |
| 344 | return -ENOMEM; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | 412 | ||
| 348 | bo->mn = rmn; | 413 | bo->mn = rmn; |
| 349 | 414 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index d0095a3793b8..eb0f432f78fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | |||
| @@ -29,16 +29,23 @@ | |||
| 29 | */ | 29 | */ |
| 30 | struct amdgpu_mn; | 30 | struct amdgpu_mn; |
| 31 | 31 | ||
| 32 | enum amdgpu_mn_type { | ||
| 33 | AMDGPU_MN_TYPE_GFX, | ||
| 34 | AMDGPU_MN_TYPE_HSA, | ||
| 35 | }; | ||
| 36 | |||
| 32 | #if defined(CONFIG_MMU_NOTIFIER) | 37 | #if defined(CONFIG_MMU_NOTIFIER) |
| 33 | void amdgpu_mn_lock(struct amdgpu_mn *mn); | 38 | void amdgpu_mn_lock(struct amdgpu_mn *mn); |
| 34 | void amdgpu_mn_unlock(struct amdgpu_mn *mn); | 39 | void amdgpu_mn_unlock(struct amdgpu_mn *mn); |
| 35 | struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); | 40 | struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, |
| 41 | enum amdgpu_mn_type type); | ||
| 36 | int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); | 42 | int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); |
| 37 | void amdgpu_mn_unregister(struct amdgpu_bo *bo); | 43 | void amdgpu_mn_unregister(struct amdgpu_bo *bo); |
| 38 | #else | 44 | #else |
| 39 | static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} | 45 | static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} |
| 40 | static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} | 46 | static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} |
| 41 | static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) | 47 | static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, |
| 48 | enum amdgpu_mn_type type) | ||
| 42 | { | 49 | { |
| 43 | return NULL; | 50 | return NULL; |
| 44 | } | 51 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d6416ee52e32..b9e9e8b02fb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | |||
| @@ -308,7 +308,6 @@ struct amdgpu_display_funcs { | |||
| 308 | 308 | ||
| 309 | struct amdgpu_framebuffer { | 309 | struct amdgpu_framebuffer { |
| 310 | struct drm_framebuffer base; | 310 | struct drm_framebuffer base; |
| 311 | struct drm_gem_object *obj; | ||
| 312 | 311 | ||
| 313 | /* caching for later use */ | 312 | /* caching for later use */ |
| 314 | uint64_t address; | 313 | uint64_t address; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6d08cde8443c..6a9e46ae7f0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | |||
| @@ -191,14 +191,21 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, | |||
| 191 | u32 domain, struct amdgpu_bo **bo_ptr, | 191 | u32 domain, struct amdgpu_bo **bo_ptr, |
| 192 | u64 *gpu_addr, void **cpu_addr) | 192 | u64 *gpu_addr, void **cpu_addr) |
| 193 | { | 193 | { |
| 194 | struct amdgpu_bo_param bp; | ||
| 194 | bool free = false; | 195 | bool free = false; |
| 195 | int r; | 196 | int r; |
| 196 | 197 | ||
| 198 | memset(&bp, 0, sizeof(bp)); | ||
| 199 | bp.size = size; | ||
| 200 | bp.byte_align = align; | ||
| 201 | bp.domain = domain; | ||
| 202 | bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 203 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | ||
| 204 | bp.type = ttm_bo_type_kernel; | ||
| 205 | bp.resv = NULL; | ||
| 206 | |||
| 197 | if (!*bo_ptr) { | 207 | if (!*bo_ptr) { |
| 198 | r = amdgpu_bo_create(adev, size, align, domain, | 208 | r = amdgpu_bo_create(adev, &bp, bo_ptr); |
| 199 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 200 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | ||
| 201 | ttm_bo_type_kernel, NULL, bo_ptr); | ||
| 202 | if (r) { | 209 | if (r) { |
| 203 | dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", | 210 | dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", |
| 204 | r); | 211 | r); |
| @@ -341,27 +348,25 @@ fail: | |||
| 341 | return false; | 348 | return false; |
| 342 | } | 349 | } |
| 343 | 350 | ||
| 344 | static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, | 351 | static int amdgpu_bo_do_create(struct amdgpu_device *adev, |
| 345 | int byte_align, u32 domain, | 352 | struct amdgpu_bo_param *bp, |
| 346 | u64 flags, enum ttm_bo_type type, | ||
| 347 | struct reservation_object *resv, | ||
| 348 | struct amdgpu_bo **bo_ptr) | 353 | struct amdgpu_bo **bo_ptr) |
| 349 | { | 354 | { |
| 350 | struct ttm_operation_ctx ctx = { | 355 | struct ttm_operation_ctx ctx = { |
| 351 | .interruptible = (type != ttm_bo_type_kernel), | 356 | .interruptible = (bp->type != ttm_bo_type_kernel), |
| 352 | .no_wait_gpu = false, | 357 | .no_wait_gpu = false, |
| 353 | .resv = resv, | 358 | .resv = bp->resv, |
| 354 | .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT | 359 | .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT |
| 355 | }; | 360 | }; |
| 356 | struct amdgpu_bo *bo; | 361 | struct amdgpu_bo *bo; |
| 357 | unsigned long page_align; | 362 | unsigned long page_align, size = bp->size; |
| 358 | size_t acc_size; | 363 | size_t acc_size; |
| 359 | int r; | 364 | int r; |
| 360 | 365 | ||
| 361 | page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; | 366 | page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; |
| 362 | size = ALIGN(size, PAGE_SIZE); | 367 | size = ALIGN(size, PAGE_SIZE); |
| 363 | 368 | ||
| 364 | if (!amdgpu_bo_validate_size(adev, size, domain)) | 369 | if (!amdgpu_bo_validate_size(adev, size, bp->domain)) |
| 365 | return -ENOMEM; | 370 | return -ENOMEM; |
| 366 | 371 | ||
| 367 | *bo_ptr = NULL; | 372 | *bo_ptr = NULL; |
| @@ -375,18 +380,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, | |||
| 375 | drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); | 380 | drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); |
| 376 | INIT_LIST_HEAD(&bo->shadow_list); | 381 | INIT_LIST_HEAD(&bo->shadow_list); |
| 377 | INIT_LIST_HEAD(&bo->va); | 382 | INIT_LIST_HEAD(&bo->va); |
| 378 | bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | | 383 | bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : |
| 379 | AMDGPU_GEM_DOMAIN_GTT | | 384 | bp->domain; |
| 380 | AMDGPU_GEM_DOMAIN_CPU | | ||
| 381 | AMDGPU_GEM_DOMAIN_GDS | | ||
| 382 | AMDGPU_GEM_DOMAIN_GWS | | ||
| 383 | AMDGPU_GEM_DOMAIN_OA); | ||
| 384 | bo->allowed_domains = bo->preferred_domains; | 385 | bo->allowed_domains = bo->preferred_domains; |
| 385 | if (type != ttm_bo_type_kernel && | 386 | if (bp->type != ttm_bo_type_kernel && |
| 386 | bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) | 387 | bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) |
| 387 | bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; | 388 | bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; |
| 388 | 389 | ||
| 389 | bo->flags = flags; | 390 | bo->flags = bp->flags; |
| 390 | 391 | ||
| 391 | #ifdef CONFIG_X86_32 | 392 | #ifdef CONFIG_X86_32 |
| 392 | /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit | 393 | /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit |
| @@ -417,11 +418,13 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, | |||
| 417 | #endif | 418 | #endif |
| 418 | 419 | ||
| 419 | bo->tbo.bdev = &adev->mman.bdev; | 420 | bo->tbo.bdev = &adev->mman.bdev; |
| 420 | amdgpu_ttm_placement_from_domain(bo, domain); | 421 | amdgpu_ttm_placement_from_domain(bo, bp->domain); |
| 422 | if (bp->type == ttm_bo_type_kernel) | ||
| 423 | bo->tbo.priority = 1; | ||
| 421 | 424 | ||
| 422 | r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, | 425 | r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type, |
| 423 | &bo->placement, page_align, &ctx, acc_size, | 426 | &bo->placement, page_align, &ctx, acc_size, |
| 424 | NULL, resv, &amdgpu_ttm_bo_destroy); | 427 | NULL, bp->resv, &amdgpu_ttm_bo_destroy); |
| 425 | if (unlikely(r != 0)) | 428 | if (unlikely(r != 0)) |
| 426 | return r; | 429 | return r; |
| 427 | 430 | ||
| @@ -433,10 +436,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, | |||
| 433 | else | 436 | else |
| 434 | amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); | 437 | amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); |
| 435 | 438 | ||
| 436 | if (type == ttm_bo_type_kernel) | 439 | if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && |
| 437 | bo->tbo.priority = 1; | ||
| 438 | |||
| 439 | if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && | ||
| 440 | bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { | 440 | bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { |
| 441 | struct dma_fence *fence; | 441 | struct dma_fence *fence; |
| 442 | 442 | ||
| @@ -449,20 +449,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, | |||
| 449 | bo->tbo.moving = dma_fence_get(fence); | 449 | bo->tbo.moving = dma_fence_get(fence); |
| 450 | dma_fence_put(fence); | 450 | dma_fence_put(fence); |
| 451 | } | 451 | } |
| 452 | if (!resv) | 452 | if (!bp->resv) |
| 453 | amdgpu_bo_unreserve(bo); | 453 | amdgpu_bo_unreserve(bo); |
| 454 | *bo_ptr = bo; | 454 | *bo_ptr = bo; |
| 455 | 455 | ||
| 456 | trace_amdgpu_bo_create(bo); | 456 | trace_amdgpu_bo_create(bo); |
| 457 | 457 | ||
| 458 | /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ | 458 | /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ |
| 459 | if (type == ttm_bo_type_device) | 459 | if (bp->type == ttm_bo_type_device) |
| 460 | bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; | 460 | bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; |
| 461 | 461 | ||
| 462 | return 0; | 462 | return 0; |
| 463 | 463 | ||
| 464 | fail_unreserve: | 464 | fail_unreserve: |
| 465 | if (!resv) | 465 | if (!bp->resv) |
| 466 | ww_mutex_unlock(&bo->tbo.resv->lock); | 466 | ww_mutex_unlock(&bo->tbo.resv->lock); |
| 467 | amdgpu_bo_unref(&bo); | 467 | amdgpu_bo_unref(&bo); |
| 468 | return r; | 468 | return r; |
| @@ -472,16 +472,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, | |||
| 472 | unsigned long size, int byte_align, | 472 | unsigned long size, int byte_align, |
| 473 | struct amdgpu_bo *bo) | 473 | struct amdgpu_bo *bo) |
| 474 | { | 474 | { |
| 475 | struct amdgpu_bo_param bp; | ||
| 475 | int r; | 476 | int r; |
| 476 | 477 | ||
| 477 | if (bo->shadow) | 478 | if (bo->shadow) |
| 478 | return 0; | 479 | return 0; |
| 479 | 480 | ||
| 480 | r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT, | 481 | memset(&bp, 0, sizeof(bp)); |
| 481 | AMDGPU_GEM_CREATE_CPU_GTT_USWC | | 482 | bp.size = size; |
| 482 | AMDGPU_GEM_CREATE_SHADOW, | 483 | bp.byte_align = byte_align; |
| 483 | ttm_bo_type_kernel, | 484 | bp.domain = AMDGPU_GEM_DOMAIN_GTT; |
| 484 | bo->tbo.resv, &bo->shadow); | 485 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | |
| 486 | AMDGPU_GEM_CREATE_SHADOW; | ||
| 487 | bp.type = ttm_bo_type_kernel; | ||
| 488 | bp.resv = bo->tbo.resv; | ||
| 489 | |||
| 490 | r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); | ||
| 485 | if (!r) { | 491 | if (!r) { |
| 486 | bo->shadow->parent = amdgpu_bo_ref(bo); | 492 | bo->shadow->parent = amdgpu_bo_ref(bo); |
| 487 | mutex_lock(&adev->shadow_list_lock); | 493 | mutex_lock(&adev->shadow_list_lock); |
| @@ -492,28 +498,26 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, | |||
| 492 | return r; | 498 | return r; |
| 493 | } | 499 | } |
| 494 | 500 | ||
| 495 | int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, | 501 | int amdgpu_bo_create(struct amdgpu_device *adev, |
| 496 | int byte_align, u32 domain, | 502 | struct amdgpu_bo_param *bp, |
| 497 | u64 flags, enum ttm_bo_type type, | ||
| 498 | struct reservation_object *resv, | ||
| 499 | struct amdgpu_bo **bo_ptr) | 503 | struct amdgpu_bo **bo_ptr) |
| 500 | { | 504 | { |
| 501 | uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; | 505 | u64 flags = bp->flags; |
| 502 | int r; | 506 | int r; |
| 503 | 507 | ||
| 504 | r = amdgpu_bo_do_create(adev, size, byte_align, domain, | 508 | bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; |
| 505 | parent_flags, type, resv, bo_ptr); | 509 | r = amdgpu_bo_do_create(adev, bp, bo_ptr); |
| 506 | if (r) | 510 | if (r) |
| 507 | return r; | 511 | return r; |
| 508 | 512 | ||
| 509 | if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { | 513 | if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { |
| 510 | if (!resv) | 514 | if (!bp->resv) |
| 511 | WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, | 515 | WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, |
| 512 | NULL)); | 516 | NULL)); |
| 513 | 517 | ||
| 514 | r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); | 518 | r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr)); |
| 515 | 519 | ||
| 516 | if (!resv) | 520 | if (!bp->resv) |
| 517 | reservation_object_unlock((*bo_ptr)->tbo.resv); | 521 | reservation_object_unlock((*bo_ptr)->tbo.resv); |
| 518 | 522 | ||
| 519 | if (r) | 523 | if (r) |
| @@ -689,8 +693,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, | |||
| 689 | return -EINVAL; | 693 | return -EINVAL; |
| 690 | 694 | ||
| 691 | /* A shared bo cannot be migrated to VRAM */ | 695 | /* A shared bo cannot be migrated to VRAM */ |
| 692 | if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM)) | 696 | if (bo->prime_shared_count) { |
| 693 | return -EINVAL; | 697 | if (domain & AMDGPU_GEM_DOMAIN_GTT) |
| 698 | domain = AMDGPU_GEM_DOMAIN_GTT; | ||
| 699 | else | ||
| 700 | return -EINVAL; | ||
| 701 | } | ||
| 702 | |||
| 703 | /* This assumes only APU display buffers are pinned with (VRAM|GTT). | ||
| 704 | * See function amdgpu_display_supported_domains() | ||
| 705 | */ | ||
| 706 | if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { | ||
| 707 | domain = AMDGPU_GEM_DOMAIN_VRAM; | ||
| 708 | if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD) | ||
| 709 | domain = AMDGPU_GEM_DOMAIN_GTT; | ||
| 710 | } | ||
| 694 | 711 | ||
| 695 | if (bo->pin_count) { | 712 | if (bo->pin_count) { |
| 696 | uint32_t mem_type = bo->tbo.mem.mem_type; | 713 | uint32_t mem_type = bo->tbo.mem.mem_type; |
| @@ -838,6 +855,13 @@ int amdgpu_bo_init(struct amdgpu_device *adev) | |||
| 838 | return amdgpu_ttm_init(adev); | 855 | return amdgpu_ttm_init(adev); |
| 839 | } | 856 | } |
| 840 | 857 | ||
| 858 | int amdgpu_bo_late_init(struct amdgpu_device *adev) | ||
| 859 | { | ||
| 860 | amdgpu_ttm_late_init(adev); | ||
| 861 | |||
| 862 | return 0; | ||
| 863 | } | ||
| 864 | |||
| 841 | void amdgpu_bo_fini(struct amdgpu_device *adev) | 865 | void amdgpu_bo_fini(struct amdgpu_device *adev) |
| 842 | { | 866 | { |
| 843 | amdgpu_ttm_fini(adev); | 867 | amdgpu_ttm_fini(adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 546f77cb7882..540e03fa159f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | |||
| @@ -33,6 +33,16 @@ | |||
| 33 | 33 | ||
| 34 | #define AMDGPU_BO_INVALID_OFFSET LONG_MAX | 34 | #define AMDGPU_BO_INVALID_OFFSET LONG_MAX |
| 35 | 35 | ||
| 36 | struct amdgpu_bo_param { | ||
| 37 | unsigned long size; | ||
| 38 | int byte_align; | ||
| 39 | u32 domain; | ||
| 40 | u32 preferred_domain; | ||
| 41 | u64 flags; | ||
| 42 | enum ttm_bo_type type; | ||
| 43 | struct reservation_object *resv; | ||
| 44 | }; | ||
| 45 | |||
| 36 | /* bo virtual addresses in a vm */ | 46 | /* bo virtual addresses in a vm */ |
| 37 | struct amdgpu_bo_va_mapping { | 47 | struct amdgpu_bo_va_mapping { |
| 38 | struct amdgpu_bo_va *bo_va; | 48 | struct amdgpu_bo_va *bo_va; |
| @@ -196,6 +206,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) | |||
| 196 | } | 206 | } |
| 197 | 207 | ||
| 198 | /** | 208 | /** |
| 209 | * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM | ||
| 210 | */ | ||
| 211 | static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) | ||
| 212 | { | ||
| 213 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | ||
| 214 | unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; | ||
| 215 | struct drm_mm_node *node = bo->tbo.mem.mm_node; | ||
| 216 | unsigned long pages_left; | ||
| 217 | |||
| 218 | if (bo->tbo.mem.mem_type != TTM_PL_VRAM) | ||
| 219 | return false; | ||
| 220 | |||
| 221 | for (pages_left = bo->tbo.mem.num_pages; pages_left; | ||
| 222 | pages_left -= node->size, node++) | ||
| 223 | if (node->start < fpfn) | ||
| 224 | return true; | ||
| 225 | |||
| 226 | return false; | ||
| 227 | } | ||
| 228 | |||
| 229 | /** | ||
| 199 | * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced | 230 | * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced |
| 200 | */ | 231 | */ |
| 201 | static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) | 232 | static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) |
| @@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) | |||
| 203 | return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; | 234 | return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; |
| 204 | } | 235 | } |
| 205 | 236 | ||
| 206 | int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, | 237 | int amdgpu_bo_create(struct amdgpu_device *adev, |
| 207 | int byte_align, u32 domain, | 238 | struct amdgpu_bo_param *bp, |
| 208 | u64 flags, enum ttm_bo_type type, | ||
| 209 | struct reservation_object *resv, | ||
| 210 | struct amdgpu_bo **bo_ptr); | 239 | struct amdgpu_bo **bo_ptr); |
| 211 | int amdgpu_bo_create_reserved(struct amdgpu_device *adev, | 240 | int amdgpu_bo_create_reserved(struct amdgpu_device *adev, |
| 212 | unsigned long size, int align, | 241 | unsigned long size, int align, |
| @@ -230,6 +259,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, | |||
| 230 | int amdgpu_bo_unpin(struct amdgpu_bo *bo); | 259 | int amdgpu_bo_unpin(struct amdgpu_bo *bo); |
| 231 | int amdgpu_bo_evict_vram(struct amdgpu_device *adev); | 260 | int amdgpu_bo_evict_vram(struct amdgpu_device *adev); |
| 232 | int amdgpu_bo_init(struct amdgpu_device *adev); | 261 | int amdgpu_bo_init(struct amdgpu_device *adev); |
| 262 | int amdgpu_bo_late_init(struct amdgpu_device *adev); | ||
| 233 | void amdgpu_bo_fini(struct amdgpu_device *adev); | 263 | void amdgpu_bo_fini(struct amdgpu_device *adev); |
| 234 | int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, | 264 | int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, |
| 235 | struct vm_area_struct *vma); | 265 | struct vm_area_struct *vma); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 361975cf45a9..b455da487782 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | |||
| @@ -77,6 +77,37 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) | |||
| 77 | } | 77 | } |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | /** | ||
| 81 | * DOC: power_dpm_state | ||
| 82 | * | ||
| 83 | * This is a legacy interface and is only provided for backwards compatibility. | ||
| 84 | * The amdgpu driver provides a sysfs API for adjusting certain power | ||
| 85 | * related parameters. The file power_dpm_state is used for this. | ||
| 86 | * It accepts the following arguments: | ||
| 87 | * - battery | ||
| 88 | * - balanced | ||
| 89 | * - performance | ||
| 90 | * | ||
| 91 | * battery | ||
| 92 | * | ||
| 93 | * On older GPUs, the vbios provided a special power state for battery | ||
| 94 | * operation. Selecting battery switched to this state. This is no | ||
| 95 | * longer provided on newer GPUs so the option does nothing in that case. | ||
| 96 | * | ||
| 97 | * balanced | ||
| 98 | * | ||
| 99 | * On older GPUs, the vbios provided a special power state for balanced | ||
| 100 | * operation. Selecting balanced switched to this state. This is no | ||
| 101 | * longer provided on newer GPUs so the option does nothing in that case. | ||
| 102 | * | ||
| 103 | * performance | ||
| 104 | * | ||
| 105 | * On older GPUs, the vbios provided a special power state for performance | ||
| 106 | * operation. Selecting performance switched to this state. This is no | ||
| 107 | * longer provided on newer GPUs so the option does nothing in that case. | ||
| 108 | * | ||
| 109 | */ | ||
| 110 | |||
| 80 | static ssize_t amdgpu_get_dpm_state(struct device *dev, | 111 | static ssize_t amdgpu_get_dpm_state(struct device *dev, |
| 81 | struct device_attribute *attr, | 112 | struct device_attribute *attr, |
| 82 | char *buf) | 113 | char *buf) |
| @@ -131,6 +162,59 @@ fail: | |||
| 131 | return count; | 162 | return count; |
| 132 | } | 163 | } |
| 133 | 164 | ||
| 165 | |||
| 166 | /** | ||
| 167 | * DOC: power_dpm_force_performance_level | ||
| 168 | * | ||
| 169 | * The amdgpu driver provides a sysfs API for adjusting certain power | ||
| 170 | * related parameters. The file power_dpm_force_performance_level is | ||
| 171 | * used for this. It accepts the following arguments: | ||
| 172 | * - auto | ||
| 173 | * - low | ||
| 174 | * - high | ||
| 175 | * - manual | ||
| 176 | * - GPU fan | ||
| 177 | * - profile_standard | ||
| 178 | * - profile_min_sclk | ||
| 179 | * - profile_min_mclk | ||
| 180 | * - profile_peak | ||
| 181 | * | ||
| 182 | * auto | ||
| 183 | * | ||
| 184 | * When auto is selected, the driver will attempt to dynamically select | ||
| 185 | * the optimal power profile for current conditions in the driver. | ||
| 186 | * | ||
| 187 | * low | ||
| 188 | * | ||
| 189 | * When low is selected, the clocks are forced to the lowest power state. | ||
| 190 | * | ||
| 191 | * high | ||
| 192 | * | ||
| 193 | * When high is selected, the clocks are forced to the highest power state. | ||
| 194 | * | ||
| 195 | * manual | ||
| 196 | * | ||
| 197 | * When manual is selected, the user can manually adjust which power states | ||
| 198 | * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk, | ||
| 199 | * and pp_dpm_pcie files and adjust the power state transition heuristics | ||
| 200 | * via the pp_power_profile_mode sysfs file. | ||
| 201 | * | ||
| 202 | * profile_standard | ||
| 203 | * profile_min_sclk | ||
| 204 | * profile_min_mclk | ||
| 205 | * profile_peak | ||
| 206 | * | ||
| 207 | * When the profiling modes are selected, clock and power gating are | ||
| 208 | * disabled and the clocks are set for different profiling cases. This | ||
| 209 | * mode is recommended for profiling specific work loads where you do | ||
| 210 | * not want clock or power gating for clock fluctuation to interfere | ||
| 211 | * with your results. profile_standard sets the clocks to a fixed clock | ||
| 212 | * level which varies from asic to asic. profile_min_sclk forces the sclk | ||
| 213 | * to the lowest level. profile_min_mclk forces the mclk to the lowest level. | ||
| 214 | * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels. | ||
| 215 | * | ||
| 216 | */ | ||
| 217 | |||
| 134 | static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, | 218 | static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, |
| 135 | struct device_attribute *attr, | 219 | struct device_attribute *attr, |
| 136 | char *buf) | 220 | char *buf) |
| @@ -324,6 +408,17 @@ fail: | |||
| 324 | return count; | 408 | return count; |
| 325 | } | 409 | } |
| 326 | 410 | ||
| 411 | /** | ||
| 412 | * DOC: pp_table | ||
| 413 | * | ||
| 414 | * The amdgpu driver provides a sysfs API for uploading new powerplay | ||
| 415 | * tables. The file pp_table is used for this. Reading the file | ||
| 416 | * will dump the current power play table. Writing to the file | ||
| 417 | * will attempt to upload a new powerplay table and re-initialize | ||
| 418 | * powerplay using that new table. | ||
| 419 | * | ||
| 420 | */ | ||
| 421 | |||
| 327 | static ssize_t amdgpu_get_pp_table(struct device *dev, | 422 | static ssize_t amdgpu_get_pp_table(struct device *dev, |
| 328 | struct device_attribute *attr, | 423 | struct device_attribute *attr, |
| 329 | char *buf) | 424 | char *buf) |
| @@ -360,6 +455,29 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, | |||
| 360 | return count; | 455 | return count; |
| 361 | } | 456 | } |
| 362 | 457 | ||
| 458 | /** | ||
| 459 | * DOC: pp_od_clk_voltage | ||
| 460 | * | ||
| 461 | * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages | ||
| 462 | * in each power level within a power state. The pp_od_clk_voltage is used for | ||
| 463 | * this. | ||
| 464 | * | ||
| 465 | * Reading the file will display: | ||
| 466 | * - a list of engine clock levels and voltages labeled OD_SCLK | ||
| 467 | * - a list of memory clock levels and voltages labeled OD_MCLK | ||
| 468 | * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE | ||
| 469 | * | ||
| 470 | * To manually adjust these settings, first select manual using | ||
| 471 | * power_dpm_force_performance_level. Enter a new value for each | ||
| 472 | * level by writing a string that contains "s/m level clock voltage" to | ||
| 473 | * the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz | ||
| 474 | * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at | ||
| 475 | * 810 mV. When you have edited all of the states as needed, write | ||
| 476 | * "c" (commit) to the file to commit your changes. If you want to reset to the | ||
| 477 | * default power levels, write "r" (reset) to the file to reset them. | ||
| 478 | * | ||
| 479 | */ | ||
| 480 | |||
| 363 | static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, | 481 | static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, |
| 364 | struct device_attribute *attr, | 482 | struct device_attribute *attr, |
| 365 | const char *buf, | 483 | const char *buf, |
| @@ -437,6 +555,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, | |||
| 437 | if (adev->powerplay.pp_funcs->print_clock_levels) { | 555 | if (adev->powerplay.pp_funcs->print_clock_levels) { |
| 438 | size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); | 556 | size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); |
| 439 | size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); | 557 | size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); |
| 558 | size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); | ||
| 440 | return size; | 559 | return size; |
| 441 | } else { | 560 | } else { |
| 442 | return snprintf(buf, PAGE_SIZE, "\n"); | 561 | return snprintf(buf, PAGE_SIZE, "\n"); |
| @@ -444,6 +563,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, | |||
| 444 | 563 | ||
| 445 | } | 564 | } |
| 446 | 565 | ||
| 566 | /** | ||
| 567 | * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie | ||
| 568 | * | ||
| 569 | * The amdgpu driver provides a sysfs API for adjusting what power levels | ||
| 570 | * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, | ||
| 571 | * and pp_dpm_pcie are used for this. | ||
| 572 | * | ||
| 573 | * Reading back the files will show you the available power levels within | ||
| 574 | * the power state and the clock information for those levels. | ||
| 575 | * | ||
| 576 | * To manually adjust these states, first select manual using | ||
| 577 | * power_dpm_force_performance_level. | ||
| 578 | * Secondly,Enter a new value for each level by inputing a string that | ||
| 579 | * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" | ||
| 580 | * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. | ||
| 581 | */ | ||
| 582 | |||
| 447 | static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, | 583 | static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, |
| 448 | struct device_attribute *attr, | 584 | struct device_attribute *attr, |
| 449 | char *buf) | 585 | char *buf) |
| @@ -466,23 +602,27 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, | |||
| 466 | struct amdgpu_device *adev = ddev->dev_private; | 602 | struct amdgpu_device *adev = ddev->dev_private; |
| 467 | int ret; | 603 | int ret; |
| 468 | long level; | 604 | long level; |
| 469 | uint32_t i, mask = 0; | 605 | uint32_t mask = 0; |
| 470 | char sub_str[2]; | 606 | char *sub_str = NULL; |
| 607 | char *tmp; | ||
| 608 | char buf_cpy[count]; | ||
| 609 | const char delimiter[3] = {' ', '\n', '\0'}; | ||
| 471 | 610 | ||
| 472 | for (i = 0; i < strlen(buf); i++) { | 611 | memcpy(buf_cpy, buf, count+1); |
| 473 | if (*(buf + i) == '\n') | 612 | tmp = buf_cpy; |
| 474 | continue; | 613 | while (tmp[0]) { |
| 475 | sub_str[0] = *(buf + i); | 614 | sub_str = strsep(&tmp, delimiter); |
| 476 | sub_str[1] = '\0'; | 615 | if (strlen(sub_str)) { |
| 477 | ret = kstrtol(sub_str, 0, &level); | 616 | ret = kstrtol(sub_str, 0, &level); |
| 478 | 617 | ||
| 479 | if (ret) { | 618 | if (ret) { |
| 480 | count = -EINVAL; | 619 | count = -EINVAL; |
| 481 | goto fail; | 620 | goto fail; |
| 482 | } | 621 | } |
| 483 | mask |= 1 << level; | 622 | mask |= 1 << level; |
| 623 | } else | ||
| 624 | break; | ||
| 484 | } | 625 | } |
| 485 | |||
| 486 | if (adev->powerplay.pp_funcs->force_clock_level) | 626 | if (adev->powerplay.pp_funcs->force_clock_level) |
| 487 | amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); | 627 | amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); |
| 488 | 628 | ||
| @@ -512,21 +652,26 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, | |||
| 512 | struct amdgpu_device *adev = ddev->dev_private; | 652 | struct amdgpu_device *adev = ddev->dev_private; |
| 513 | int ret; | 653 | int ret; |
| 514 | long level; | 654 | long level; |
| 515 | uint32_t i, mask = 0; | 655 | uint32_t mask = 0; |
| 516 | char sub_str[2]; | 656 | char *sub_str = NULL; |
| 657 | char *tmp; | ||
| 658 | char buf_cpy[count]; | ||
| 659 | const char delimiter[3] = {' ', '\n', '\0'}; | ||
| 517 | 660 | ||
| 518 | for (i = 0; i < strlen(buf); i++) { | 661 | memcpy(buf_cpy, buf, count+1); |
| 519 | if (*(buf + i) == '\n') | 662 | tmp = buf_cpy; |
| 520 | continue; | 663 | while (tmp[0]) { |
| 521 | sub_str[0] = *(buf + i); | 664 | sub_str = strsep(&tmp, delimiter); |
| 522 | sub_str[1] = '\0'; | 665 | if (strlen(sub_str)) { |
| 523 | ret = kstrtol(sub_str, 0, &level); | 666 | ret = kstrtol(sub_str, 0, &level); |
| 524 | 667 | ||
| 525 | if (ret) { | 668 | if (ret) { |
| 526 | count = -EINVAL; | 669 | count = -EINVAL; |
| 527 | goto fail; | 670 | goto fail; |
| 528 | } | 671 | } |
| 529 | mask |= 1 << level; | 672 | mask |= 1 << level; |
| 673 | } else | ||
| 674 | break; | ||
| 530 | } | 675 | } |
| 531 | if (adev->powerplay.pp_funcs->force_clock_level) | 676 | if (adev->powerplay.pp_funcs->force_clock_level) |
| 532 | amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); | 677 | amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); |
| @@ -557,21 +702,27 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, | |||
| 557 | struct amdgpu_device *adev = ddev->dev_private; | 702 | struct amdgpu_device *adev = ddev->dev_private; |
| 558 | int ret; | 703 | int ret; |
| 559 | long level; | 704 | long level; |
| 560 | uint32_t i, mask = 0; | 705 | uint32_t mask = 0; |
| 561 | char sub_str[2]; | 706 | char *sub_str = NULL; |
| 707 | char *tmp; | ||
| 708 | char buf_cpy[count]; | ||
| 709 | const char delimiter[3] = {' ', '\n', '\0'}; | ||
| 562 | 710 | ||
| 563 | for (i = 0; i < strlen(buf); i++) { | 711 | memcpy(buf_cpy, buf, count+1); |
| 564 | if (*(buf + i) == '\n') | 712 | tmp = buf_cpy; |
| 565 | continue; | ||
| 566 | sub_str[0] = *(buf + i); | ||
| 567 | sub_str[1] = '\0'; | ||
| 568 | ret = kstrtol(sub_str, 0, &level); | ||
| 569 | 713 | ||
| 570 | if (ret) { | 714 | while (tmp[0]) { |
| 571 | count = -EINVAL; | 715 | sub_str = strsep(&tmp, delimiter); |
| 572 | goto fail; | 716 | if (strlen(sub_str)) { |
| 573 | } | 717 | ret = kstrtol(sub_str, 0, &level); |
| 574 | mask |= 1 << level; | 718 | |
| 719 | if (ret) { | ||
| 720 | count = -EINVAL; | ||
| 721 | goto fail; | ||
| 722 | } | ||
| 723 | mask |= 1 << level; | ||
| 724 | } else | ||
| 725 | break; | ||
| 575 | } | 726 | } |
| 576 | if (adev->powerplay.pp_funcs->force_clock_level) | 727 | if (adev->powerplay.pp_funcs->force_clock_level) |
| 577 | amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); | 728 | amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); |
| @@ -668,6 +819,26 @@ fail: | |||
| 668 | return count; | 819 | return count; |
| 669 | } | 820 | } |
| 670 | 821 | ||
| 822 | /** | ||
| 823 | * DOC: pp_power_profile_mode | ||
| 824 | * | ||
| 825 | * The amdgpu driver provides a sysfs API for adjusting the heuristics | ||
| 826 | * related to switching between power levels in a power state. The file | ||
| 827 | * pp_power_profile_mode is used for this. | ||
| 828 | * | ||
| 829 | * Reading this file outputs a list of all of the predefined power profiles | ||
| 830 | * and the relevant heuristics settings for that profile. | ||
| 831 | * | ||
| 832 | * To select a profile or create a custom profile, first select manual using | ||
| 833 | * power_dpm_force_performance_level. Writing the number of a predefined | ||
| 834 | * profile to pp_power_profile_mode will enable those heuristics. To | ||
| 835 | * create a custom set of heuristics, write a string of numbers to the file | ||
| 836 | * starting with the number of the custom profile along with a setting | ||
| 837 | * for each heuristic parameter. Due to differences across asic families | ||
| 838 | * the heuristic parameters vary from family to family. | ||
| 839 | * | ||
| 840 | */ | ||
| 841 | |||
| 671 | static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, | 842 | static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, |
| 672 | struct device_attribute *attr, | 843 | struct device_attribute *attr, |
| 673 | char *buf) | 844 | char *buf) |
| @@ -1020,8 +1191,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, | |||
| 1020 | { | 1191 | { |
| 1021 | struct amdgpu_device *adev = dev_get_drvdata(dev); | 1192 | struct amdgpu_device *adev = dev_get_drvdata(dev); |
| 1022 | struct drm_device *ddev = adev->ddev; | 1193 | struct drm_device *ddev = adev->ddev; |
| 1023 | struct pp_gpu_power query = {0}; | 1194 | u32 query = 0; |
| 1024 | int r, size = sizeof(query); | 1195 | int r, size = sizeof(u32); |
| 1025 | unsigned uw; | 1196 | unsigned uw; |
| 1026 | 1197 | ||
| 1027 | /* Can't get power when the card is off */ | 1198 | /* Can't get power when the card is off */ |
| @@ -1041,7 +1212,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, | |||
| 1041 | return r; | 1212 | return r; |
| 1042 | 1213 | ||
| 1043 | /* convert to microwatts */ | 1214 | /* convert to microwatts */ |
| 1044 | uw = (query.average_gpu_power >> 8) * 1000000; | 1215 | uw = (query >> 8) * 1000000 + (query & 0xff) * 1000; |
| 1045 | 1216 | ||
| 1046 | return snprintf(buf, PAGE_SIZE, "%u\n", uw); | 1217 | return snprintf(buf, PAGE_SIZE, "%u\n", uw); |
| 1047 | } | 1218 | } |
| @@ -1109,6 +1280,46 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, | |||
| 1109 | return count; | 1280 | return count; |
| 1110 | } | 1281 | } |
| 1111 | 1282 | ||
| 1283 | |||
| 1284 | /** | ||
| 1285 | * DOC: hwmon | ||
| 1286 | * | ||
| 1287 | * The amdgpu driver exposes the following sensor interfaces: | ||
| 1288 | * - GPU temperature (via the on-die sensor) | ||
| 1289 | * - GPU voltage | ||
| 1290 | * - Northbridge voltage (APUs only) | ||
| 1291 | * - GPU power | ||
| 1292 | * - GPU fan | ||
| 1293 | * | ||
| 1294 | * hwmon interfaces for GPU temperature: | ||
| 1295 | * - temp1_input: the on die GPU temperature in millidegrees Celsius | ||
| 1296 | * - temp1_crit: temperature critical max value in millidegrees Celsius | ||
| 1297 | * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius | ||
| 1298 | * | ||
| 1299 | * hwmon interfaces for GPU voltage: | ||
| 1300 | * - in0_input: the voltage on the GPU in millivolts | ||
| 1301 | * - in1_input: the voltage on the Northbridge in millivolts | ||
| 1302 | * | ||
| 1303 | * hwmon interfaces for GPU power: | ||
| 1304 | * - power1_average: average power used by the GPU in microWatts | ||
| 1305 | * - power1_cap_min: minimum cap supported in microWatts | ||
| 1306 | * - power1_cap_max: maximum cap supported in microWatts | ||
| 1307 | * - power1_cap: selected power cap in microWatts | ||
| 1308 | * | ||
| 1309 | * hwmon interfaces for GPU fan: | ||
| 1310 | * - pwm1: pulse width modulation fan level (0-255) | ||
| 1311 | * - pwm1_enable: pulse width modulation fan control method | ||
| 1312 | * 0: no fan speed control | ||
| 1313 | * 1: manual fan speed control using pwm interface | ||
| 1314 | * 2: automatic fan speed control | ||
| 1315 | * - pwm1_min: pulse width modulation fan control minimum level (0) | ||
| 1316 | * - pwm1_max: pulse width modulation fan control maximum level (255) | ||
| 1317 | * - fan1_input: fan speed in RPM | ||
| 1318 | * | ||
| 1319 | * You can use hwmon tools like sensors to view this information on your system. | ||
| 1320 | * | ||
| 1321 | */ | ||
| 1322 | |||
| 1112 | static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); | 1323 | static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); |
| 1113 | static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); | 1324 | static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); |
| 1114 | static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); | 1325 | static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); |
| @@ -1153,19 +1364,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, | |||
| 1153 | struct amdgpu_device *adev = dev_get_drvdata(dev); | 1364 | struct amdgpu_device *adev = dev_get_drvdata(dev); |
| 1154 | umode_t effective_mode = attr->mode; | 1365 | umode_t effective_mode = attr->mode; |
| 1155 | 1366 | ||
| 1156 | /* handle non-powerplay limitations */ | 1367 | |
| 1157 | if (!adev->powerplay.pp_handle) { | 1368 | /* Skip fan attributes if fan is not present */ |
| 1158 | /* Skip fan attributes if fan is not present */ | 1369 | if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || |
| 1159 | if (adev->pm.no_fan && | 1370 | attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || |
| 1160 | (attr == &sensor_dev_attr_pwm1.dev_attr.attr || | 1371 | attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || |
| 1161 | attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || | 1372 | attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || |
| 1162 | attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || | 1373 | attr == &sensor_dev_attr_fan1_input.dev_attr.attr)) |
| 1163 | attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) | 1374 | return 0; |
| 1164 | return 0; | ||
| 1165 | /* requires powerplay */ | ||
| 1166 | if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) | ||
| 1167 | return 0; | ||
| 1168 | } | ||
| 1169 | 1375 | ||
| 1170 | /* Skip limit attributes if DPM is not enabled */ | 1376 | /* Skip limit attributes if DPM is not enabled */ |
| 1171 | if (!adev->pm.dpm_enabled && | 1377 | if (!adev->pm.dpm_enabled && |
| @@ -1658,9 +1864,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) | |||
| 1658 | 1864 | ||
| 1659 | void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | 1865 | void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) |
| 1660 | { | 1866 | { |
| 1661 | struct drm_device *ddev = adev->ddev; | ||
| 1662 | struct drm_crtc *crtc; | ||
| 1663 | struct amdgpu_crtc *amdgpu_crtc; | ||
| 1664 | int i = 0; | 1867 | int i = 0; |
| 1665 | 1868 | ||
| 1666 | if (!adev->pm.dpm_enabled) | 1869 | if (!adev->pm.dpm_enabled) |
| @@ -1676,21 +1879,25 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | |||
| 1676 | } | 1879 | } |
| 1677 | 1880 | ||
| 1678 | if (adev->powerplay.pp_funcs->dispatch_tasks) { | 1881 | if (adev->powerplay.pp_funcs->dispatch_tasks) { |
| 1882 | if (!amdgpu_device_has_dc_support(adev)) { | ||
| 1883 | mutex_lock(&adev->pm.mutex); | ||
| 1884 | amdgpu_dpm_get_active_displays(adev); | ||
| 1885 | adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs; | ||
| 1886 | adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); | ||
| 1887 | adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); | ||
| 1888 | /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ | ||
| 1889 | if (adev->pm.pm_display_cfg.vrefresh > 120) | ||
| 1890 | adev->pm.pm_display_cfg.min_vblank_time = 0; | ||
| 1891 | if (adev->powerplay.pp_funcs->display_configuration_change) | ||
| 1892 | adev->powerplay.pp_funcs->display_configuration_change( | ||
| 1893 | adev->powerplay.pp_handle, | ||
| 1894 | &adev->pm.pm_display_cfg); | ||
| 1895 | mutex_unlock(&adev->pm.mutex); | ||
| 1896 | } | ||
| 1679 | amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); | 1897 | amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); |
| 1680 | } else { | 1898 | } else { |
| 1681 | mutex_lock(&adev->pm.mutex); | 1899 | mutex_lock(&adev->pm.mutex); |
| 1682 | adev->pm.dpm.new_active_crtcs = 0; | 1900 | amdgpu_dpm_get_active_displays(adev); |
| 1683 | adev->pm.dpm.new_active_crtc_count = 0; | ||
| 1684 | if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { | ||
| 1685 | list_for_each_entry(crtc, | ||
| 1686 | &ddev->mode_config.crtc_list, head) { | ||
| 1687 | amdgpu_crtc = to_amdgpu_crtc(crtc); | ||
| 1688 | if (amdgpu_crtc->enabled) { | ||
| 1689 | adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); | ||
| 1690 | adev->pm.dpm.new_active_crtc_count++; | ||
| 1691 | } | ||
| 1692 | } | ||
| 1693 | } | ||
| 1694 | /* update battery/ac status */ | 1901 | /* update battery/ac status */ |
| 1695 | if (power_supply_is_system_supplied() > 0) | 1902 | if (power_supply_is_system_supplied() > 0) |
| 1696 | adev->pm.dpm.ac_power = true; | 1903 | adev->pm.dpm.ac_power = true; |
| @@ -1711,7 +1918,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | |||
| 1711 | static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) | 1918 | static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) |
| 1712 | { | 1919 | { |
| 1713 | uint32_t value; | 1920 | uint32_t value; |
| 1714 | struct pp_gpu_power query = {0}; | 1921 | uint32_t query = 0; |
| 1715 | int size; | 1922 | int size; |
| 1716 | 1923 | ||
| 1717 | /* sanity check PP is enabled */ | 1924 | /* sanity check PP is enabled */ |
| @@ -1734,17 +1941,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a | |||
| 1734 | seq_printf(m, "\t%u mV (VDDGFX)\n", value); | 1941 | seq_printf(m, "\t%u mV (VDDGFX)\n", value); |
| 1735 | if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) | 1942 | if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) |
| 1736 | seq_printf(m, "\t%u mV (VDDNB)\n", value); | 1943 | seq_printf(m, "\t%u mV (VDDNB)\n", value); |
| 1737 | size = sizeof(query); | 1944 | size = sizeof(uint32_t); |
| 1738 | if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) { | 1945 | if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) |
| 1739 | seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8, | 1946 | seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff); |
| 1740 | query.vddc_power & 0xff); | ||
| 1741 | seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8, | ||
| 1742 | query.vddci_power & 0xff); | ||
| 1743 | seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8, | ||
| 1744 | query.max_gpu_power & 0xff); | ||
| 1745 | seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8, | ||
| 1746 | query.average_gpu_power & 0xff); | ||
| 1747 | } | ||
| 1748 | size = sizeof(value); | 1947 | size = sizeof(value); |
| 1749 | seq_printf(m, "\n"); | 1948 | seq_printf(m, "\n"); |
| 1750 | 1949 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 4b584cb75bf4..4683626b065f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | |||
| @@ -102,12 +102,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, | |||
| 102 | struct reservation_object *resv = attach->dmabuf->resv; | 102 | struct reservation_object *resv = attach->dmabuf->resv; |
| 103 | struct amdgpu_device *adev = dev->dev_private; | 103 | struct amdgpu_device *adev = dev->dev_private; |
| 104 | struct amdgpu_bo *bo; | 104 | struct amdgpu_bo *bo; |
| 105 | struct amdgpu_bo_param bp; | ||
| 105 | int ret; | 106 | int ret; |
| 106 | 107 | ||
| 108 | memset(&bp, 0, sizeof(bp)); | ||
| 109 | bp.size = attach->dmabuf->size; | ||
| 110 | bp.byte_align = PAGE_SIZE; | ||
| 111 | bp.domain = AMDGPU_GEM_DOMAIN_CPU; | ||
| 112 | bp.flags = 0; | ||
| 113 | bp.type = ttm_bo_type_sg; | ||
| 114 | bp.resv = resv; | ||
| 107 | ww_mutex_lock(&resv->lock, NULL); | 115 | ww_mutex_lock(&resv->lock, NULL); |
| 108 | ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, | 116 | ret = amdgpu_bo_create(adev, &bp, &bo); |
| 109 | AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg, | ||
| 110 | resv, &bo); | ||
| 111 | if (ret) | 117 | if (ret) |
| 112 | goto error; | 118 | goto error; |
| 113 | 119 | ||
| @@ -209,7 +215,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, | |||
| 209 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); | 215 | struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); |
| 210 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 216 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
| 211 | struct ttm_operation_ctx ctx = { true, false }; | 217 | struct ttm_operation_ctx ctx = { true, false }; |
| 212 | u32 domain = amdgpu_display_framebuffer_domains(adev); | 218 | u32 domain = amdgpu_display_supported_domains(adev); |
| 213 | int ret; | 219 | int ret; |
| 214 | bool reads = (direction == DMA_BIDIRECTIONAL || | 220 | bool reads = (direction == DMA_BIDIRECTIONAL || |
| 215 | direction == DMA_FROM_DEVICE); | 221 | direction == DMA_FROM_DEVICE); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c7d43e064fc7..9f1a5bd39ae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | |||
| @@ -52,6 +52,7 @@ static int psp_sw_init(void *handle) | |||
| 52 | switch (adev->asic_type) { | 52 | switch (adev->asic_type) { |
| 53 | case CHIP_VEGA10: | 53 | case CHIP_VEGA10: |
| 54 | case CHIP_VEGA12: | 54 | case CHIP_VEGA12: |
| 55 | case CHIP_VEGA20: | ||
| 55 | psp_v3_1_set_psp_funcs(psp); | 56 | psp_v3_1_set_psp_funcs(psp); |
| 56 | break; | 57 | break; |
| 57 | case CHIP_RAVEN: | 58 | case CHIP_RAVEN: |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 262c1267249e..8af16e81c7d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | |||
| @@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, | |||
| 66 | u32 ring, | 66 | u32 ring, |
| 67 | struct amdgpu_ring **out_ring) | 67 | struct amdgpu_ring **out_ring) |
| 68 | { | 68 | { |
| 69 | u32 instance; | ||
| 70 | |||
| 69 | switch (mapper->hw_ip) { | 71 | switch (mapper->hw_ip) { |
| 70 | case AMDGPU_HW_IP_GFX: | 72 | case AMDGPU_HW_IP_GFX: |
| 71 | *out_ring = &adev->gfx.gfx_ring[ring]; | 73 | *out_ring = &adev->gfx.gfx_ring[ring]; |
| @@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, | |||
| 77 | *out_ring = &adev->sdma.instance[ring].ring; | 79 | *out_ring = &adev->sdma.instance[ring].ring; |
| 78 | break; | 80 | break; |
| 79 | case AMDGPU_HW_IP_UVD: | 81 | case AMDGPU_HW_IP_UVD: |
| 80 | *out_ring = &adev->uvd.ring; | 82 | instance = ring; |
| 83 | *out_ring = &adev->uvd.inst[instance].ring; | ||
| 81 | break; | 84 | break; |
| 82 | case AMDGPU_HW_IP_VCE: | 85 | case AMDGPU_HW_IP_VCE: |
| 83 | *out_ring = &adev->vce.ring[ring]; | 86 | *out_ring = &adev->vce.ring[ring]; |
| 84 | break; | 87 | break; |
| 85 | case AMDGPU_HW_IP_UVD_ENC: | 88 | case AMDGPU_HW_IP_UVD_ENC: |
| 86 | *out_ring = &adev->uvd.ring_enc[ring]; | 89 | instance = ring / adev->uvd.num_enc_rings; |
| 90 | *out_ring = | ||
| 91 | &adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings]; | ||
| 87 | break; | 92 | break; |
| 88 | case AMDGPU_HW_IP_VCN_DEC: | 93 | case AMDGPU_HW_IP_VCN_DEC: |
| 89 | *out_ring = &adev->vcn.ring_dec; | 94 | *out_ring = &adev->vcn.ring_dec; |
| @@ -240,13 +245,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, | |||
| 240 | ip_num_rings = adev->sdma.num_instances; | 245 | ip_num_rings = adev->sdma.num_instances; |
| 241 | break; | 246 | break; |
| 242 | case AMDGPU_HW_IP_UVD: | 247 | case AMDGPU_HW_IP_UVD: |
| 243 | ip_num_rings = 1; | 248 | ip_num_rings = adev->uvd.num_uvd_inst; |
| 244 | break; | 249 | break; |
| 245 | case AMDGPU_HW_IP_VCE: | 250 | case AMDGPU_HW_IP_VCE: |
| 246 | ip_num_rings = adev->vce.num_rings; | 251 | ip_num_rings = adev->vce.num_rings; |
| 247 | break; | 252 | break; |
| 248 | case AMDGPU_HW_IP_UVD_ENC: | 253 | case AMDGPU_HW_IP_UVD_ENC: |
| 249 | ip_num_rings = adev->uvd.num_enc_rings; | 254 | ip_num_rings = |
| 255 | adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst; | ||
| 250 | break; | 256 | break; |
| 251 | case AMDGPU_HW_IP_VCN_DEC: | 257 | case AMDGPU_HW_IP_VCN_DEC: |
| 252 | ip_num_rings = 1; | 258 | ip_num_rings = 1; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d5f526f38e50..c6850b629d0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |||
| @@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) | |||
| 362 | 362 | ||
| 363 | dma_fence_put(ring->vmid_wait); | 363 | dma_fence_put(ring->vmid_wait); |
| 364 | ring->vmid_wait = NULL; | 364 | ring->vmid_wait = NULL; |
| 365 | ring->me = 0; | ||
| 365 | 366 | ||
| 366 | ring->adev->rings[ring->idx] = NULL; | 367 | ring->adev->rings[ring->idx] = NULL; |
| 367 | } | 368 | } |
| @@ -459,6 +460,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) | |||
| 459 | spin_unlock(&adev->ring_lru_list_lock); | 460 | spin_unlock(&adev->ring_lru_list_lock); |
| 460 | } | 461 | } |
| 461 | 462 | ||
| 463 | /** | ||
| 464 | * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper | ||
| 465 | * | ||
| 466 | * @adev: amdgpu_device pointer | ||
| 467 | * @reg0: register to write | ||
| 468 | * @reg1: register to wait on | ||
| 469 | * @ref: reference value to write/wait on | ||
| 470 | * @mask: mask to wait on | ||
| 471 | * | ||
| 472 | * Helper for rings that don't support write and wait in a | ||
| 473 | * single oneshot packet. | ||
| 474 | */ | ||
| 475 | void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, | ||
| 476 | uint32_t reg0, uint32_t reg1, | ||
| 477 | uint32_t ref, uint32_t mask) | ||
| 478 | { | ||
| 479 | amdgpu_ring_emit_wreg(ring, reg0, ref); | ||
| 480 | amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); | ||
| 481 | } | ||
| 482 | |||
| 462 | /* | 483 | /* |
| 463 | * Debugfs info | 484 | * Debugfs info |
| 464 | */ | 485 | */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1a5911882657..1513124c5659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | #include <drm/drm_print.h> | 29 | #include <drm/drm_print.h> |
| 30 | 30 | ||
| 31 | /* max number of rings */ | 31 | /* max number of rings */ |
| 32 | #define AMDGPU_MAX_RINGS 18 | 32 | #define AMDGPU_MAX_RINGS 21 |
| 33 | #define AMDGPU_MAX_GFX_RINGS 1 | 33 | #define AMDGPU_MAX_GFX_RINGS 1 |
| 34 | #define AMDGPU_MAX_COMPUTE_RINGS 8 | 34 | #define AMDGPU_MAX_COMPUTE_RINGS 8 |
| 35 | #define AMDGPU_MAX_VCE_RINGS 3 | 35 | #define AMDGPU_MAX_VCE_RINGS 3 |
| @@ -42,6 +42,7 @@ | |||
| 42 | 42 | ||
| 43 | #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) | 43 | #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) |
| 44 | #define AMDGPU_FENCE_FLAG_INT (1 << 1) | 44 | #define AMDGPU_FENCE_FLAG_INT (1 << 1) |
| 45 | #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) | ||
| 45 | 46 | ||
| 46 | enum amdgpu_ring_type { | 47 | enum amdgpu_ring_type { |
| 47 | AMDGPU_RING_TYPE_GFX, | 48 | AMDGPU_RING_TYPE_GFX, |
| @@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
| 90 | unsigned irq_type); | 91 | unsigned irq_type); |
| 91 | void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); | 92 | void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); |
| 92 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev); | 93 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev); |
| 93 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); | 94 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, |
| 95 | unsigned flags); | ||
| 94 | int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); | 96 | int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); |
| 95 | void amdgpu_fence_process(struct amdgpu_ring *ring); | 97 | void amdgpu_fence_process(struct amdgpu_ring *ring); |
| 96 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); | 98 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); |
| @@ -154,6 +156,9 @@ struct amdgpu_ring_funcs { | |||
| 154 | void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); | 156 | void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); |
| 155 | void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, | 157 | void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, |
| 156 | uint32_t val, uint32_t mask); | 158 | uint32_t val, uint32_t mask); |
| 159 | void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, | ||
| 160 | uint32_t reg0, uint32_t reg1, | ||
| 161 | uint32_t ref, uint32_t mask); | ||
| 157 | void (*emit_tmz)(struct amdgpu_ring *ring, bool start); | 162 | void (*emit_tmz)(struct amdgpu_ring *ring, bool start); |
| 158 | /* priority functions */ | 163 | /* priority functions */ |
| 159 | void (*set_priority) (struct amdgpu_ring *ring, | 164 | void (*set_priority) (struct amdgpu_ring *ring, |
| @@ -228,6 +233,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, | |||
| 228 | int *blacklist, int num_blacklist, | 233 | int *blacklist, int num_blacklist, |
| 229 | bool lru_pipe_order, struct amdgpu_ring **ring); | 234 | bool lru_pipe_order, struct amdgpu_ring **ring); |
| 230 | void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); | 235 | void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); |
| 236 | void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, | ||
| 237 | uint32_t reg0, uint32_t val0, | ||
| 238 | uint32_t reg1, uint32_t val1); | ||
| 239 | |||
| 231 | static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) | 240 | static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) |
| 232 | { | 241 | { |
| 233 | int i = 0; | 242 | int i = 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 2dbe87591f81..d167e8ab76d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | |||
| @@ -33,6 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) | |||
| 33 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; | 33 | struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; |
| 34 | struct amdgpu_bo *vram_obj = NULL; | 34 | struct amdgpu_bo *vram_obj = NULL; |
| 35 | struct amdgpu_bo **gtt_obj = NULL; | 35 | struct amdgpu_bo **gtt_obj = NULL; |
| 36 | struct amdgpu_bo_param bp; | ||
| 36 | uint64_t gart_addr, vram_addr; | 37 | uint64_t gart_addr, vram_addr; |
| 37 | unsigned n, size; | 38 | unsigned n, size; |
| 38 | int i, r; | 39 | int i, r; |
| @@ -58,9 +59,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) | |||
| 58 | r = 1; | 59 | r = 1; |
| 59 | goto out_cleanup; | 60 | goto out_cleanup; |
| 60 | } | 61 | } |
| 61 | 62 | memset(&bp, 0, sizeof(bp)); | |
| 62 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0, | 63 | bp.size = size; |
| 63 | ttm_bo_type_kernel, NULL, &vram_obj); | 64 | bp.byte_align = PAGE_SIZE; |
| 65 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; | ||
| 66 | bp.flags = 0; | ||
| 67 | bp.type = ttm_bo_type_kernel; | ||
| 68 | bp.resv = NULL; | ||
| 69 | |||
| 70 | r = amdgpu_bo_create(adev, &bp, &vram_obj); | ||
| 64 | if (r) { | 71 | if (r) { |
| 65 | DRM_ERROR("Failed to create VRAM object\n"); | 72 | DRM_ERROR("Failed to create VRAM object\n"); |
| 66 | goto out_cleanup; | 73 | goto out_cleanup; |
| @@ -79,9 +86,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) | |||
| 79 | void **vram_start, **vram_end; | 86 | void **vram_start, **vram_end; |
| 80 | struct dma_fence *fence = NULL; | 87 | struct dma_fence *fence = NULL; |
| 81 | 88 | ||
| 82 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, | 89 | bp.domain = AMDGPU_GEM_DOMAIN_GTT; |
| 83 | AMDGPU_GEM_DOMAIN_GTT, 0, | 90 | r = amdgpu_bo_create(adev, &bp, gtt_obj + i); |
| 84 | ttm_bo_type_kernel, NULL, gtt_obj + i); | ||
| 85 | if (r) { | 91 | if (r) { |
| 86 | DRM_ERROR("Failed to create GTT object %d\n", i); | 92 | DRM_ERROR("Failed to create GTT object %d\n", i); |
| 87 | goto out_lclean; | 93 | goto out_lclean; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 532263ab6e16..e96e26d3f3b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | |||
| @@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, | |||
| 275 | ), | 275 | ), |
| 276 | 276 | ||
| 277 | TP_fast_assign( | 277 | TP_fast_assign( |
| 278 | __entry->bo = bo_va->base.bo; | 278 | __entry->bo = bo_va ? bo_va->base.bo : NULL; |
| 279 | __entry->start = mapping->start; | 279 | __entry->start = mapping->start; |
| 280 | __entry->last = mapping->last; | 280 | __entry->last = mapping->last; |
| 281 | __entry->offset = mapping->offset; | 281 | __entry->offset = mapping->offset; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 205da3ff9cd0..e93a0a237dc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
| @@ -63,16 +63,44 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); | |||
| 63 | /* | 63 | /* |
| 64 | * Global memory. | 64 | * Global memory. |
| 65 | */ | 65 | */ |
| 66 | |||
| 67 | /** | ||
| 68 | * amdgpu_ttm_mem_global_init - Initialize and acquire reference to | ||
| 69 | * memory object | ||
| 70 | * | ||
| 71 | * @ref: Object for initialization. | ||
| 72 | * | ||
| 73 | * This is called by drm_global_item_ref() when an object is being | ||
| 74 | * initialized. | ||
| 75 | */ | ||
| 66 | static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) | 76 | static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) |
| 67 | { | 77 | { |
| 68 | return ttm_mem_global_init(ref->object); | 78 | return ttm_mem_global_init(ref->object); |
| 69 | } | 79 | } |
| 70 | 80 | ||
| 81 | /** | ||
| 82 | * amdgpu_ttm_mem_global_release - Drop reference to a memory object | ||
| 83 | * | ||
| 84 | * @ref: Object being removed | ||
| 85 | * | ||
| 86 | * This is called by drm_global_item_unref() when an object is being | ||
| 87 | * released. | ||
| 88 | */ | ||
| 71 | static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) | 89 | static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) |
| 72 | { | 90 | { |
| 73 | ttm_mem_global_release(ref->object); | 91 | ttm_mem_global_release(ref->object); |
| 74 | } | 92 | } |
| 75 | 93 | ||
| 94 | /** | ||
| 95 | * amdgpu_ttm_global_init - Initialize global TTM memory reference | ||
| 96 | * structures. | ||
| 97 | * | ||
| 98 | * @adev: AMDGPU device for which the global structures need to be | ||
| 99 | * registered. | ||
| 100 | * | ||
| 101 | * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init() | ||
| 102 | * during bring up. | ||
| 103 | */ | ||
| 76 | static int amdgpu_ttm_global_init(struct amdgpu_device *adev) | 104 | static int amdgpu_ttm_global_init(struct amdgpu_device *adev) |
| 77 | { | 105 | { |
| 78 | struct drm_global_reference *global_ref; | 106 | struct drm_global_reference *global_ref; |
| @@ -80,7 +108,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) | |||
| 80 | struct drm_sched_rq *rq; | 108 | struct drm_sched_rq *rq; |
| 81 | int r; | 109 | int r; |
| 82 | 110 | ||
| 111 | /* ensure reference is false in case init fails */ | ||
| 83 | adev->mman.mem_global_referenced = false; | 112 | adev->mman.mem_global_referenced = false; |
| 113 | |||
| 84 | global_ref = &adev->mman.mem_global_ref; | 114 | global_ref = &adev->mman.mem_global_ref; |
| 85 | global_ref->global_type = DRM_GLOBAL_TTM_MEM; | 115 | global_ref->global_type = DRM_GLOBAL_TTM_MEM; |
| 86 | global_ref->size = sizeof(struct ttm_mem_global); | 116 | global_ref->size = sizeof(struct ttm_mem_global); |
| @@ -111,7 +141,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) | |||
| 111 | ring = adev->mman.buffer_funcs_ring; | 141 | ring = adev->mman.buffer_funcs_ring; |
| 112 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; | 142 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; |
| 113 | r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, | 143 | r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, |
| 114 | rq, amdgpu_sched_jobs, NULL); | 144 | rq, NULL); |
| 115 | if (r) { | 145 | if (r) { |
| 116 | DRM_ERROR("Failed setting up TTM BO move run queue.\n"); | 146 | DRM_ERROR("Failed setting up TTM BO move run queue.\n"); |
| 117 | goto error_entity; | 147 | goto error_entity; |
| @@ -146,6 +176,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) | |||
| 146 | return 0; | 176 | return 0; |
| 147 | } | 177 | } |
| 148 | 178 | ||
| 179 | /** | ||
| 180 | * amdgpu_init_mem_type - Initialize a memory manager for a specific | ||
| 181 | * type of memory request. | ||
| 182 | * | ||
| 183 | * @bdev: The TTM BO device object (contains a reference to | ||
| 184 | * amdgpu_device) | ||
| 185 | * @type: The type of memory requested | ||
| 186 | * @man: | ||
| 187 | * | ||
| 188 | * This is called by ttm_bo_init_mm() when a buffer object is being | ||
| 189 | * initialized. | ||
| 190 | */ | ||
| 149 | static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, | 191 | static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, |
| 150 | struct ttm_mem_type_manager *man) | 192 | struct ttm_mem_type_manager *man) |
| 151 | { | 193 | { |
| @@ -161,6 +203,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, | |||
| 161 | man->default_caching = TTM_PL_FLAG_CACHED; | 203 | man->default_caching = TTM_PL_FLAG_CACHED; |
| 162 | break; | 204 | break; |
| 163 | case TTM_PL_TT: | 205 | case TTM_PL_TT: |
| 206 | /* GTT memory */ | ||
| 164 | man->func = &amdgpu_gtt_mgr_func; | 207 | man->func = &amdgpu_gtt_mgr_func; |
| 165 | man->gpu_offset = adev->gmc.gart_start; | 208 | man->gpu_offset = adev->gmc.gart_start; |
| 166 | man->available_caching = TTM_PL_MASK_CACHING; | 209 | man->available_caching = TTM_PL_MASK_CACHING; |
| @@ -193,6 +236,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, | |||
| 193 | return 0; | 236 | return 0; |
| 194 | } | 237 | } |
| 195 | 238 | ||
| 239 | /** | ||
| 240 | * amdgpu_evict_flags - Compute placement flags | ||
| 241 | * | ||
| 242 | * @bo: The buffer object to evict | ||
| 243 | * @placement: Possible destination(s) for evicted BO | ||
| 244 | * | ||
| 245 | * Fill in placement data when ttm_bo_evict() is called | ||
| 246 | */ | ||
| 196 | static void amdgpu_evict_flags(struct ttm_buffer_object *bo, | 247 | static void amdgpu_evict_flags(struct ttm_buffer_object *bo, |
| 197 | struct ttm_placement *placement) | 248 | struct ttm_placement *placement) |
| 198 | { | 249 | { |
| @@ -204,12 +255,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, | |||
| 204 | .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | 255 | .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM |
| 205 | }; | 256 | }; |
| 206 | 257 | ||
| 258 | /* Don't handle scatter gather BOs */ | ||
| 207 | if (bo->type == ttm_bo_type_sg) { | 259 | if (bo->type == ttm_bo_type_sg) { |
| 208 | placement->num_placement = 0; | 260 | placement->num_placement = 0; |
| 209 | placement->num_busy_placement = 0; | 261 | placement->num_busy_placement = 0; |
| 210 | return; | 262 | return; |
| 211 | } | 263 | } |
| 212 | 264 | ||
| 265 | /* Object isn't an AMDGPU object so ignore */ | ||
| 213 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { | 266 | if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { |
| 214 | placement->placement = &placements; | 267 | placement->placement = &placements; |
| 215 | placement->busy_placement = &placements; | 268 | placement->busy_placement = &placements; |
| @@ -217,26 +270,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, | |||
| 217 | placement->num_busy_placement = 1; | 270 | placement->num_busy_placement = 1; |
| 218 | return; | 271 | return; |
| 219 | } | 272 | } |
| 273 | |||
| 220 | abo = ttm_to_amdgpu_bo(bo); | 274 | abo = ttm_to_amdgpu_bo(bo); |
| 221 | switch (bo->mem.mem_type) { | 275 | switch (bo->mem.mem_type) { |
| 222 | case TTM_PL_VRAM: | 276 | case TTM_PL_VRAM: |
| 223 | if (!adev->mman.buffer_funcs_enabled) { | 277 | if (!adev->mman.buffer_funcs_enabled) { |
| 278 | /* Move to system memory */ | ||
| 224 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); | 279 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); |
| 225 | } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && | 280 | } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && |
| 226 | !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { | 281 | !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && |
| 227 | unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; | 282 | amdgpu_bo_in_cpu_visible_vram(abo)) { |
| 228 | struct drm_mm_node *node = bo->mem.mm_node; | ||
| 229 | unsigned long pages_left; | ||
| 230 | |||
| 231 | for (pages_left = bo->mem.num_pages; | ||
| 232 | pages_left; | ||
| 233 | pages_left -= node->size, node++) { | ||
| 234 | if (node->start < fpfn) | ||
| 235 | break; | ||
| 236 | } | ||
| 237 | |||
| 238 | if (!pages_left) | ||
| 239 | goto gtt; | ||
| 240 | 283 | ||
| 241 | /* Try evicting to the CPU inaccessible part of VRAM | 284 | /* Try evicting to the CPU inaccessible part of VRAM |
| 242 | * first, but only set GTT as busy placement, so this | 285 | * first, but only set GTT as busy placement, so this |
| @@ -245,12 +288,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, | |||
| 245 | */ | 288 | */ |
| 246 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | | 289 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | |
| 247 | AMDGPU_GEM_DOMAIN_GTT); | 290 | AMDGPU_GEM_DOMAIN_GTT); |
| 248 | abo->placements[0].fpfn = fpfn; | 291 | abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; |
| 249 | abo->placements[0].lpfn = 0; | 292 | abo->placements[0].lpfn = 0; |
| 250 | abo->placement.busy_placement = &abo->placements[1]; | 293 | abo->placement.busy_placement = &abo->placements[1]; |
| 251 | abo->placement.num_busy_placement = 1; | 294 | abo->placement.num_busy_placement = 1; |
| 252 | } else { | 295 | } else { |
| 253 | gtt: | 296 | /* Move to GTT memory */ |
| 254 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); | 297 | amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); |
| 255 | } | 298 | } |
| 256 | break; | 299 | break; |
| @@ -261,6 +304,15 @@ gtt: | |||
| 261 | *placement = abo->placement; | 304 | *placement = abo->placement; |
| 262 | } | 305 | } |
| 263 | 306 | ||
| 307 | /** | ||
| 308 | * amdgpu_verify_access - Verify access for a mmap call | ||
| 309 | * | ||
| 310 | * @bo: The buffer object to map | ||
| 311 | * @filp: The file pointer from the process performing the mmap | ||
| 312 | * | ||
| 313 | * This is called by ttm_bo_mmap() to verify whether a process | ||
| 314 | * has the right to mmap a BO to their process space. | ||
| 315 | */ | ||
| 264 | static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) | 316 | static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) |
| 265 | { | 317 | { |
| 266 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); | 318 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); |
| @@ -278,6 +330,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) | |||
| 278 | filp->private_data); | 330 | filp->private_data); |
| 279 | } | 331 | } |
| 280 | 332 | ||
| 333 | /** | ||
| 334 | * amdgpu_move_null - Register memory for a buffer object | ||
| 335 | * | ||
| 336 | * @bo: The bo to assign the memory to | ||
| 337 | * @new_mem: The memory to be assigned. | ||
| 338 | * | ||
| 339 | * Assign the memory from new_mem to the memory of the buffer object | ||
| 340 | * bo. | ||
| 341 | */ | ||
| 281 | static void amdgpu_move_null(struct ttm_buffer_object *bo, | 342 | static void amdgpu_move_null(struct ttm_buffer_object *bo, |
| 282 | struct ttm_mem_reg *new_mem) | 343 | struct ttm_mem_reg *new_mem) |
| 283 | { | 344 | { |
| @@ -288,6 +349,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo, | |||
| 288 | new_mem->mm_node = NULL; | 349 | new_mem->mm_node = NULL; |
| 289 | } | 350 | } |
| 290 | 351 | ||
| 352 | /** | ||
| 353 | * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT | ||
| 354 | * buffer. | ||
| 355 | */ | ||
| 291 | static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, | 356 | static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, |
| 292 | struct drm_mm_node *mm_node, | 357 | struct drm_mm_node *mm_node, |
| 293 | struct ttm_mem_reg *mem) | 358 | struct ttm_mem_reg *mem) |
| @@ -302,9 +367,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, | |||
| 302 | } | 367 | } |
| 303 | 368 | ||
| 304 | /** | 369 | /** |
| 305 | * amdgpu_find_mm_node - Helper function finds the drm_mm_node | 370 | * amdgpu_find_mm_node - Helper function finds the drm_mm_node |
| 306 | * corresponding to @offset. It also modifies the offset to be | 371 | * corresponding to @offset. It also modifies |
| 307 | * within the drm_mm_node returned | 372 | * the offset to be within the drm_mm_node |
| 373 | * returned | ||
| 308 | */ | 374 | */ |
| 309 | static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, | 375 | static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, |
| 310 | unsigned long *offset) | 376 | unsigned long *offset) |
| @@ -443,7 +509,12 @@ error: | |||
| 443 | return r; | 509 | return r; |
| 444 | } | 510 | } |
| 445 | 511 | ||
| 446 | 512 | /** | |
| 513 | * amdgpu_move_blit - Copy an entire buffer to another buffer | ||
| 514 | * | ||
| 515 | * This is a helper called by amdgpu_bo_move() and | ||
| 516 | * amdgpu_move_vram_ram() to help move buffers to and from VRAM. | ||
| 517 | */ | ||
| 447 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, | 518 | static int amdgpu_move_blit(struct ttm_buffer_object *bo, |
| 448 | bool evict, bool no_wait_gpu, | 519 | bool evict, bool no_wait_gpu, |
| 449 | struct ttm_mem_reg *new_mem, | 520 | struct ttm_mem_reg *new_mem, |
| @@ -478,6 +549,11 @@ error: | |||
| 478 | return r; | 549 | return r; |
| 479 | } | 550 | } |
| 480 | 551 | ||
| 552 | /** | ||
| 553 | * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer | ||
| 554 | * | ||
| 555 | * Called by amdgpu_bo_move(). | ||
| 556 | */ | ||
| 481 | static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, | 557 | static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, |
| 482 | struct ttm_operation_ctx *ctx, | 558 | struct ttm_operation_ctx *ctx, |
| 483 | struct ttm_mem_reg *new_mem) | 559 | struct ttm_mem_reg *new_mem) |
| @@ -490,6 +566,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, | |||
| 490 | int r; | 566 | int r; |
| 491 | 567 | ||
| 492 | adev = amdgpu_ttm_adev(bo->bdev); | 568 | adev = amdgpu_ttm_adev(bo->bdev); |
| 569 | |||
| 570 | /* create space/pages for new_mem in GTT space */ | ||
| 493 | tmp_mem = *new_mem; | 571 | tmp_mem = *new_mem; |
| 494 | tmp_mem.mm_node = NULL; | 572 | tmp_mem.mm_node = NULL; |
| 495 | placement.num_placement = 1; | 573 | placement.num_placement = 1; |
| @@ -504,25 +582,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, | |||
| 504 | return r; | 582 | return r; |
| 505 | } | 583 | } |
| 506 | 584 | ||
| 585 | /* set caching flags */ | ||
| 507 | r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); | 586 | r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); |
| 508 | if (unlikely(r)) { | 587 | if (unlikely(r)) { |
| 509 | goto out_cleanup; | 588 | goto out_cleanup; |
| 510 | } | 589 | } |
| 511 | 590 | ||
| 591 | /* Bind the memory to the GTT space */ | ||
| 512 | r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); | 592 | r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); |
| 513 | if (unlikely(r)) { | 593 | if (unlikely(r)) { |
| 514 | goto out_cleanup; | 594 | goto out_cleanup; |
| 515 | } | 595 | } |
| 596 | |||
| 597 | /* blit VRAM to GTT */ | ||
| 516 | r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); | 598 | r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); |
| 517 | if (unlikely(r)) { | 599 | if (unlikely(r)) { |
| 518 | goto out_cleanup; | 600 | goto out_cleanup; |
| 519 | } | 601 | } |
| 602 | |||
| 603 | /* move BO (in tmp_mem) to new_mem */ | ||
| 520 | r = ttm_bo_move_ttm(bo, ctx, new_mem); | 604 | r = ttm_bo_move_ttm(bo, ctx, new_mem); |
| 521 | out_cleanup: | 605 | out_cleanup: |
| 522 | ttm_bo_mem_put(bo, &tmp_mem); | 606 | ttm_bo_mem_put(bo, &tmp_mem); |
| 523 | return r; | 607 | return r; |
| 524 | } | 608 | } |
| 525 | 609 | ||
| 610 | /** | ||
| 611 | * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM | ||
| 612 | * | ||
| 613 | * Called by amdgpu_bo_move(). | ||
| 614 | */ | ||
| 526 | static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, | 615 | static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, |
| 527 | struct ttm_operation_ctx *ctx, | 616 | struct ttm_operation_ctx *ctx, |
| 528 | struct ttm_mem_reg *new_mem) | 617 | struct ttm_mem_reg *new_mem) |
| @@ -535,6 +624,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, | |||
| 535 | int r; | 624 | int r; |
| 536 | 625 | ||
| 537 | adev = amdgpu_ttm_adev(bo->bdev); | 626 | adev = amdgpu_ttm_adev(bo->bdev); |
| 627 | |||
| 628 | /* make space in GTT for old_mem buffer */ | ||
| 538 | tmp_mem = *new_mem; | 629 | tmp_mem = *new_mem; |
| 539 | tmp_mem.mm_node = NULL; | 630 | tmp_mem.mm_node = NULL; |
| 540 | placement.num_placement = 1; | 631 | placement.num_placement = 1; |
| @@ -548,10 +639,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, | |||
| 548 | if (unlikely(r)) { | 639 | if (unlikely(r)) { |
| 549 | return r; | 640 | return r; |
| 550 | } | 641 | } |
| 642 | |||
| 643 | /* move/bind old memory to GTT space */ | ||
| 551 | r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); | 644 | r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); |
| 552 | if (unlikely(r)) { | 645 | if (unlikely(r)) { |
| 553 | goto out_cleanup; | 646 | goto out_cleanup; |
| 554 | } | 647 | } |
| 648 | |||
| 649 | /* copy to VRAM */ | ||
| 555 | r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); | 650 | r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); |
| 556 | if (unlikely(r)) { | 651 | if (unlikely(r)) { |
| 557 | goto out_cleanup; | 652 | goto out_cleanup; |
| @@ -561,6 +656,11 @@ out_cleanup: | |||
| 561 | return r; | 656 | return r; |
| 562 | } | 657 | } |
| 563 | 658 | ||
| 659 | /** | ||
| 660 | * amdgpu_bo_move - Move a buffer object to a new memory location | ||
| 661 | * | ||
| 662 | * Called by ttm_bo_handle_move_mem() | ||
| 663 | */ | ||
| 564 | static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, | 664 | static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, |
| 565 | struct ttm_operation_ctx *ctx, | 665 | struct ttm_operation_ctx *ctx, |
| 566 | struct ttm_mem_reg *new_mem) | 666 | struct ttm_mem_reg *new_mem) |
| @@ -626,6 +726,11 @@ memcpy: | |||
| 626 | return 0; | 726 | return 0; |
| 627 | } | 727 | } |
| 628 | 728 | ||
| 729 | /** | ||
| 730 | * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault | ||
| 731 | * | ||
| 732 | * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() | ||
| 733 | */ | ||
| 629 | static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) | 734 | static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) |
| 630 | { | 735 | { |
| 631 | struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; | 736 | struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; |
| @@ -695,7 +800,7 @@ struct amdgpu_ttm_tt { | |||
| 695 | struct ttm_dma_tt ttm; | 800 | struct ttm_dma_tt ttm; |
| 696 | u64 offset; | 801 | u64 offset; |
| 697 | uint64_t userptr; | 802 | uint64_t userptr; |
| 698 | struct mm_struct *usermm; | 803 | struct task_struct *usertask; |
| 699 | uint32_t userflags; | 804 | uint32_t userflags; |
| 700 | spinlock_t guptasklock; | 805 | spinlock_t guptasklock; |
| 701 | struct list_head guptasks; | 806 | struct list_head guptasks; |
| @@ -703,17 +808,29 @@ struct amdgpu_ttm_tt { | |||
| 703 | uint32_t last_set_pages; | 808 | uint32_t last_set_pages; |
| 704 | }; | 809 | }; |
| 705 | 810 | ||
| 811 | /** | ||
| 812 | * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to | ||
| 813 | * by a USERPTR pointer to memory | ||
| 814 | * | ||
| 815 | * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). | ||
| 816 | * This provides a wrapper around the get_user_pages() call to provide | ||
| 817 | * device accessible pages that back user memory. | ||
| 818 | */ | ||
| 706 | int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | 819 | int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) |
| 707 | { | 820 | { |
| 708 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 821 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| 822 | struct mm_struct *mm = gtt->usertask->mm; | ||
| 709 | unsigned int flags = 0; | 823 | unsigned int flags = 0; |
| 710 | unsigned pinned = 0; | 824 | unsigned pinned = 0; |
| 711 | int r; | 825 | int r; |
| 712 | 826 | ||
| 827 | if (!mm) /* Happens during process shutdown */ | ||
| 828 | return -ESRCH; | ||
| 829 | |||
| 713 | if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) | 830 | if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) |
| 714 | flags |= FOLL_WRITE; | 831 | flags |= FOLL_WRITE; |
| 715 | 832 | ||
| 716 | down_read(¤t->mm->mmap_sem); | 833 | down_read(&mm->mmap_sem); |
| 717 | 834 | ||
| 718 | if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { | 835 | if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { |
| 719 | /* check that we only use anonymous memory | 836 | /* check that we only use anonymous memory |
| @@ -721,13 +838,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
| 721 | unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; | 838 | unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; |
| 722 | struct vm_area_struct *vma; | 839 | struct vm_area_struct *vma; |
| 723 | 840 | ||
| 724 | vma = find_vma(gtt->usermm, gtt->userptr); | 841 | vma = find_vma(mm, gtt->userptr); |
| 725 | if (!vma || vma->vm_file || vma->vm_end < end) { | 842 | if (!vma || vma->vm_file || vma->vm_end < end) { |
| 726 | up_read(¤t->mm->mmap_sem); | 843 | up_read(&mm->mmap_sem); |
| 727 | return -EPERM; | 844 | return -EPERM; |
| 728 | } | 845 | } |
| 729 | } | 846 | } |
| 730 | 847 | ||
| 848 | /* loop enough times using contiguous pages of memory */ | ||
| 731 | do { | 849 | do { |
| 732 | unsigned num_pages = ttm->num_pages - pinned; | 850 | unsigned num_pages = ttm->num_pages - pinned; |
| 733 | uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; | 851 | uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; |
| @@ -739,7 +857,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
| 739 | list_add(&guptask.list, >t->guptasks); | 857 | list_add(&guptask.list, >t->guptasks); |
| 740 | spin_unlock(>t->guptasklock); | 858 | spin_unlock(>t->guptasklock); |
| 741 | 859 | ||
| 742 | r = get_user_pages(userptr, num_pages, flags, p, NULL); | 860 | if (mm == current->mm) |
| 861 | r = get_user_pages(userptr, num_pages, flags, p, NULL); | ||
| 862 | else | ||
| 863 | r = get_user_pages_remote(gtt->usertask, | ||
| 864 | mm, userptr, num_pages, | ||
| 865 | flags, p, NULL, NULL); | ||
| 743 | 866 | ||
| 744 | spin_lock(>t->guptasklock); | 867 | spin_lock(>t->guptasklock); |
| 745 | list_del(&guptask.list); | 868 | list_del(&guptask.list); |
| @@ -752,15 +875,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
| 752 | 875 | ||
| 753 | } while (pinned < ttm->num_pages); | 876 | } while (pinned < ttm->num_pages); |
| 754 | 877 | ||
| 755 | up_read(¤t->mm->mmap_sem); | 878 | up_read(&mm->mmap_sem); |
| 756 | return 0; | 879 | return 0; |
| 757 | 880 | ||
| 758 | release_pages: | 881 | release_pages: |
| 759 | release_pages(pages, pinned); | 882 | release_pages(pages, pinned); |
| 760 | up_read(¤t->mm->mmap_sem); | 883 | up_read(&mm->mmap_sem); |
| 761 | return r; | 884 | return r; |
| 762 | } | 885 | } |
| 763 | 886 | ||
| 887 | /** | ||
| 888 | * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages | ||
| 889 | * as necessary. | ||
| 890 | * | ||
| 891 | * Called by amdgpu_cs_list_validate(). This creates the page list | ||
| 892 | * that backs user memory and will ultimately be mapped into the device | ||
| 893 | * address space. | ||
| 894 | */ | ||
| 764 | void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) | 895 | void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) |
| 765 | { | 896 | { |
| 766 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 897 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| @@ -775,6 +906,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) | |||
| 775 | } | 906 | } |
| 776 | } | 907 | } |
| 777 | 908 | ||
| 909 | /** | ||
| 910 | * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty | ||
| 911 | * | ||
| 912 | * Called while unpinning userptr pages | ||
| 913 | */ | ||
| 778 | void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) | 914 | void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) |
| 779 | { | 915 | { |
| 780 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 916 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| @@ -793,7 +929,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) | |||
| 793 | } | 929 | } |
| 794 | } | 930 | } |
| 795 | 931 | ||
| 796 | /* prepare the sg table with the user pages */ | 932 | /** |
| 933 | * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the | ||
| 934 | * user pages | ||
| 935 | * | ||
| 936 | * Called by amdgpu_ttm_backend_bind() | ||
| 937 | **/ | ||
| 797 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) | 938 | static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) |
| 798 | { | 939 | { |
| 799 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); | 940 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
| @@ -805,17 +946,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) | |||
| 805 | enum dma_data_direction direction = write ? | 946 | enum dma_data_direction direction = write ? |
| 806 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; | 947 | DMA_BIDIRECTIONAL : DMA_TO_DEVICE; |
| 807 | 948 | ||
| 949 | /* Allocate an SG array and squash pages into it */ | ||
| 808 | r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, | 950 | r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, |
| 809 | ttm->num_pages << PAGE_SHIFT, | 951 | ttm->num_pages << PAGE_SHIFT, |
| 810 | GFP_KERNEL); | 952 | GFP_KERNEL); |
| 811 | if (r) | 953 | if (r) |
| 812 | goto release_sg; | 954 | goto release_sg; |
| 813 | 955 | ||
| 956 | /* Map SG to device */ | ||
| 814 | r = -ENOMEM; | 957 | r = -ENOMEM; |
| 815 | nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); | 958 | nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); |
| 816 | if (nents != ttm->sg->nents) | 959 | if (nents != ttm->sg->nents) |
| 817 | goto release_sg; | 960 | goto release_sg; |
| 818 | 961 | ||
| 962 | /* convert SG to linear array of pages and dma addresses */ | ||
| 819 | drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, | 963 | drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, |
| 820 | gtt->ttm.dma_address, ttm->num_pages); | 964 | gtt->ttm.dma_address, ttm->num_pages); |
| 821 | 965 | ||
| @@ -826,6 +970,9 @@ release_sg: | |||
| 826 | return r; | 970 | return r; |
| 827 | } | 971 | } |
| 828 | 972 | ||
| 973 | /** | ||
| 974 | * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages | ||
| 975 | */ | ||
| 829 | static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) | 976 | static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) |
| 830 | { | 977 | { |
| 831 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); | 978 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
| @@ -839,14 +986,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) | |||
| 839 | if (!ttm->sg->sgl) | 986 | if (!ttm->sg->sgl) |
| 840 | return; | 987 | return; |
| 841 | 988 | ||
| 842 | /* free the sg table and pages again */ | 989 | /* unmap the pages mapped to the device */ |
| 843 | dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); | 990 | dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); |
| 844 | 991 | ||
| 992 | /* mark the pages as dirty */ | ||
| 845 | amdgpu_ttm_tt_mark_user_pages(ttm); | 993 | amdgpu_ttm_tt_mark_user_pages(ttm); |
| 846 | 994 | ||
| 847 | sg_free_table(ttm->sg); | 995 | sg_free_table(ttm->sg); |
| 848 | } | 996 | } |
| 849 | 997 | ||
| 998 | int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, | ||
| 999 | struct ttm_buffer_object *tbo, | ||
| 1000 | uint64_t flags) | ||
| 1001 | { | ||
| 1002 | struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); | ||
| 1003 | struct ttm_tt *ttm = tbo->ttm; | ||
| 1004 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | ||
| 1005 | int r; | ||
| 1006 | |||
| 1007 | if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { | ||
| 1008 | uint64_t page_idx = 1; | ||
| 1009 | |||
| 1010 | r = amdgpu_gart_bind(adev, gtt->offset, page_idx, | ||
| 1011 | ttm->pages, gtt->ttm.dma_address, flags); | ||
| 1012 | if (r) | ||
| 1013 | goto gart_bind_fail; | ||
| 1014 | |||
| 1015 | /* Patch mtype of the second part BO */ | ||
| 1016 | flags &= ~AMDGPU_PTE_MTYPE_MASK; | ||
| 1017 | flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); | ||
| 1018 | |||
| 1019 | r = amdgpu_gart_bind(adev, | ||
| 1020 | gtt->offset + (page_idx << PAGE_SHIFT), | ||
| 1021 | ttm->num_pages - page_idx, | ||
| 1022 | &ttm->pages[page_idx], | ||
| 1023 | &(gtt->ttm.dma_address[page_idx]), flags); | ||
| 1024 | } else { | ||
| 1025 | r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, | ||
| 1026 | ttm->pages, gtt->ttm.dma_address, flags); | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | gart_bind_fail: | ||
| 1030 | if (r) | ||
| 1031 | DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", | ||
| 1032 | ttm->num_pages, gtt->offset); | ||
| 1033 | |||
| 1034 | return r; | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | /** | ||
| 1038 | * amdgpu_ttm_backend_bind - Bind GTT memory | ||
| 1039 | * | ||
| 1040 | * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). | ||
| 1041 | * This handles binding GTT memory to the device address space. | ||
| 1042 | */ | ||
| 850 | static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, | 1043 | static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, |
| 851 | struct ttm_mem_reg *bo_mem) | 1044 | struct ttm_mem_reg *bo_mem) |
| 852 | { | 1045 | { |
| @@ -877,7 +1070,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, | |||
| 877 | return 0; | 1070 | return 0; |
| 878 | } | 1071 | } |
| 879 | 1072 | ||
| 1073 | /* compute PTE flags relevant to this BO memory */ | ||
| 880 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); | 1074 | flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); |
| 1075 | |||
| 1076 | /* bind pages into GART page tables */ | ||
| 881 | gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; | 1077 | gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; |
| 882 | r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, | 1078 | r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, |
| 883 | ttm->pages, gtt->ttm.dma_address, flags); | 1079 | ttm->pages, gtt->ttm.dma_address, flags); |
| @@ -888,6 +1084,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, | |||
| 888 | return r; | 1084 | return r; |
| 889 | } | 1085 | } |
| 890 | 1086 | ||
| 1087 | /** | ||
| 1088 | * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object | ||
| 1089 | */ | ||
| 891 | int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) | 1090 | int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) |
| 892 | { | 1091 | { |
| 893 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); | 1092 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); |
| @@ -903,6 +1102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) | |||
| 903 | amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) | 1102 | amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) |
| 904 | return 0; | 1103 | return 0; |
| 905 | 1104 | ||
| 1105 | /* allocate GTT space */ | ||
| 906 | tmp = bo->mem; | 1106 | tmp = bo->mem; |
| 907 | tmp.mm_node = NULL; | 1107 | tmp.mm_node = NULL; |
| 908 | placement.num_placement = 1; | 1108 | placement.num_placement = 1; |
| @@ -918,10 +1118,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) | |||
| 918 | if (unlikely(r)) | 1118 | if (unlikely(r)) |
| 919 | return r; | 1119 | return r; |
| 920 | 1120 | ||
| 1121 | /* compute PTE flags for this buffer object */ | ||
| 921 | flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); | 1122 | flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); |
| 1123 | |||
| 1124 | /* Bind pages */ | ||
| 922 | gtt->offset = (u64)tmp.start << PAGE_SHIFT; | 1125 | gtt->offset = (u64)tmp.start << PAGE_SHIFT; |
| 923 | r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, | 1126 | r = amdgpu_ttm_gart_bind(adev, bo, flags); |
| 924 | bo->ttm->pages, gtt->ttm.dma_address, flags); | ||
| 925 | if (unlikely(r)) { | 1127 | if (unlikely(r)) { |
| 926 | ttm_bo_mem_put(bo, &tmp); | 1128 | ttm_bo_mem_put(bo, &tmp); |
| 927 | return r; | 1129 | return r; |
| @@ -935,31 +1137,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) | |||
| 935 | return 0; | 1137 | return 0; |
| 936 | } | 1138 | } |
| 937 | 1139 | ||
| 1140 | /** | ||
| 1141 | * amdgpu_ttm_recover_gart - Rebind GTT pages | ||
| 1142 | * | ||
| 1143 | * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to | ||
| 1144 | * rebind GTT pages during a GPU reset. | ||
| 1145 | */ | ||
| 938 | int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) | 1146 | int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) |
| 939 | { | 1147 | { |
| 940 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); | 1148 | struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); |
| 941 | struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm; | ||
| 942 | uint64_t flags; | 1149 | uint64_t flags; |
| 943 | int r; | 1150 | int r; |
| 944 | 1151 | ||
| 945 | if (!gtt) | 1152 | if (!tbo->ttm) |
| 946 | return 0; | 1153 | return 0; |
| 947 | 1154 | ||
| 948 | flags = amdgpu_ttm_tt_pte_flags(adev, >t->ttm.ttm, &tbo->mem); | 1155 | flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem); |
| 949 | r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, | 1156 | r = amdgpu_ttm_gart_bind(adev, tbo, flags); |
| 950 | gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); | 1157 | |
| 951 | if (r) | ||
| 952 | DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", | ||
| 953 | gtt->ttm.ttm.num_pages, gtt->offset); | ||
| 954 | return r; | 1158 | return r; |
| 955 | } | 1159 | } |
| 956 | 1160 | ||
| 1161 | /** | ||
| 1162 | * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages | ||
| 1163 | * | ||
| 1164 | * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and | ||
| 1165 | * ttm_tt_destroy(). | ||
| 1166 | */ | ||
| 957 | static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) | 1167 | static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) |
| 958 | { | 1168 | { |
| 959 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); | 1169 | struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); |
| 960 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 1170 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| 961 | int r; | 1171 | int r; |
| 962 | 1172 | ||
| 1173 | /* if the pages have userptr pinning then clear that first */ | ||
| 963 | if (gtt->userptr) | 1174 | if (gtt->userptr) |
| 964 | amdgpu_ttm_tt_unpin_userptr(ttm); | 1175 | amdgpu_ttm_tt_unpin_userptr(ttm); |
| 965 | 1176 | ||
| @@ -978,6 +1189,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) | |||
| 978 | { | 1189 | { |
| 979 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 1190 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| 980 | 1191 | ||
| 1192 | if (gtt->usertask) | ||
| 1193 | put_task_struct(gtt->usertask); | ||
| 1194 | |||
| 981 | ttm_dma_tt_fini(>t->ttm); | 1195 | ttm_dma_tt_fini(>t->ttm); |
| 982 | kfree(gtt); | 1196 | kfree(gtt); |
| 983 | } | 1197 | } |
| @@ -988,6 +1202,13 @@ static struct ttm_backend_func amdgpu_backend_func = { | |||
| 988 | .destroy = &amdgpu_ttm_backend_destroy, | 1202 | .destroy = &amdgpu_ttm_backend_destroy, |
| 989 | }; | 1203 | }; |
| 990 | 1204 | ||
| 1205 | /** | ||
| 1206 | * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO | ||
| 1207 | * | ||
| 1208 | * @bo: The buffer object to create a GTT ttm_tt object around | ||
| 1209 | * | ||
| 1210 | * Called by ttm_tt_create(). | ||
| 1211 | */ | ||
| 991 | static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, | 1212 | static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, |
| 992 | uint32_t page_flags) | 1213 | uint32_t page_flags) |
| 993 | { | 1214 | { |
| @@ -1001,6 +1222,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, | |||
| 1001 | return NULL; | 1222 | return NULL; |
| 1002 | } | 1223 | } |
| 1003 | gtt->ttm.ttm.func = &amdgpu_backend_func; | 1224 | gtt->ttm.ttm.func = &amdgpu_backend_func; |
| 1225 | |||
| 1226 | /* allocate space for the uninitialized page entries */ | ||
| 1004 | if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { | 1227 | if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { |
| 1005 | kfree(gtt); | 1228 | kfree(gtt); |
| 1006 | return NULL; | 1229 | return NULL; |
| @@ -1008,6 +1231,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, | |||
| 1008 | return >t->ttm.ttm; | 1231 | return >t->ttm.ttm; |
| 1009 | } | 1232 | } |
| 1010 | 1233 | ||
| 1234 | /** | ||
| 1235 | * amdgpu_ttm_tt_populate - Map GTT pages visible to the device | ||
| 1236 | * | ||
| 1237 | * Map the pages of a ttm_tt object to an address space visible | ||
| 1238 | * to the underlying device. | ||
| 1239 | */ | ||
| 1011 | static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, | 1240 | static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, |
| 1012 | struct ttm_operation_ctx *ctx) | 1241 | struct ttm_operation_ctx *ctx) |
| 1013 | { | 1242 | { |
| @@ -1015,6 +1244,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, | |||
| 1015 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 1244 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| 1016 | bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); | 1245 | bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); |
| 1017 | 1246 | ||
| 1247 | /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ | ||
| 1018 | if (gtt && gtt->userptr) { | 1248 | if (gtt && gtt->userptr) { |
| 1019 | ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); | 1249 | ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); |
| 1020 | if (!ttm->sg) | 1250 | if (!ttm->sg) |
| @@ -1039,9 +1269,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, | |||
| 1039 | } | 1269 | } |
| 1040 | #endif | 1270 | #endif |
| 1041 | 1271 | ||
| 1272 | /* fall back to generic helper to populate the page array | ||
| 1273 | * and map them to the device */ | ||
| 1042 | return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); | 1274 | return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); |
| 1043 | } | 1275 | } |
| 1044 | 1276 | ||
| 1277 | /** | ||
| 1278 | * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays | ||
| 1279 | * | ||
| 1280 | * Unmaps pages of a ttm_tt object from the device address space and | ||
| 1281 | * unpopulates the page array backing it. | ||
| 1282 | */ | ||
| 1045 | static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) | 1283 | static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) |
| 1046 | { | 1284 | { |
| 1047 | struct amdgpu_device *adev; | 1285 | struct amdgpu_device *adev; |
| @@ -1067,9 +1305,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) | |||
| 1067 | } | 1305 | } |
| 1068 | #endif | 1306 | #endif |
| 1069 | 1307 | ||
| 1308 | /* fall back to generic helper to unmap and unpopulate array */ | ||
| 1070 | ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); | 1309 | ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); |
| 1071 | } | 1310 | } |
| 1072 | 1311 | ||
| 1312 | /** | ||
| 1313 | * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt | ||
| 1314 | * for the current task | ||
| 1315 | * | ||
| 1316 | * @ttm: The ttm_tt object to bind this userptr object to | ||
| 1317 | * @addr: The address in the current tasks VM space to use | ||
| 1318 | * @flags: Requirements of userptr object. | ||
| 1319 | * | ||
| 1320 | * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages | ||
| 1321 | * to current task | ||
| 1322 | */ | ||
| 1073 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | 1323 | int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, |
| 1074 | uint32_t flags) | 1324 | uint32_t flags) |
| 1075 | { | 1325 | { |
| @@ -1079,8 +1329,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | |||
| 1079 | return -EINVAL; | 1329 | return -EINVAL; |
| 1080 | 1330 | ||
| 1081 | gtt->userptr = addr; | 1331 | gtt->userptr = addr; |
| 1082 | gtt->usermm = current->mm; | ||
| 1083 | gtt->userflags = flags; | 1332 | gtt->userflags = flags; |
| 1333 | |||
| 1334 | if (gtt->usertask) | ||
| 1335 | put_task_struct(gtt->usertask); | ||
| 1336 | gtt->usertask = current->group_leader; | ||
| 1337 | get_task_struct(gtt->usertask); | ||
| 1338 | |||
| 1084 | spin_lock_init(>t->guptasklock); | 1339 | spin_lock_init(>t->guptasklock); |
| 1085 | INIT_LIST_HEAD(>t->guptasks); | 1340 | INIT_LIST_HEAD(>t->guptasks); |
| 1086 | atomic_set(>t->mmu_invalidations, 0); | 1341 | atomic_set(>t->mmu_invalidations, 0); |
| @@ -1089,6 +1344,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, | |||
| 1089 | return 0; | 1344 | return 0; |
| 1090 | } | 1345 | } |
| 1091 | 1346 | ||
| 1347 | /** | ||
| 1348 | * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object | ||
| 1349 | */ | ||
| 1092 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) | 1350 | struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) |
| 1093 | { | 1351 | { |
| 1094 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 1352 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| @@ -1096,9 +1354,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) | |||
| 1096 | if (gtt == NULL) | 1354 | if (gtt == NULL) |
| 1097 | return NULL; | 1355 | return NULL; |
| 1098 | 1356 | ||
| 1099 | return gtt->usermm; | 1357 | if (gtt->usertask == NULL) |
| 1358 | return NULL; | ||
| 1359 | |||
| 1360 | return gtt->usertask->mm; | ||
| 1100 | } | 1361 | } |
| 1101 | 1362 | ||
| 1363 | /** | ||
| 1364 | * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays | ||
| 1365 | * inside an address range for the | ||
| 1366 | * current task. | ||
| 1367 | * | ||
| 1368 | */ | ||
| 1102 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, | 1369 | bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, |
| 1103 | unsigned long end) | 1370 | unsigned long end) |
| 1104 | { | 1371 | { |
| @@ -1109,10 +1376,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, | |||
| 1109 | if (gtt == NULL || !gtt->userptr) | 1376 | if (gtt == NULL || !gtt->userptr) |
| 1110 | return false; | 1377 | return false; |
| 1111 | 1378 | ||
| 1379 | /* Return false if no part of the ttm_tt object lies within | ||
| 1380 | * the range | ||
| 1381 | */ | ||
| 1112 | size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; | 1382 | size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; |
| 1113 | if (gtt->userptr > end || gtt->userptr + size <= start) | 1383 | if (gtt->userptr > end || gtt->userptr + size <= start) |
| 1114 | return false; | 1384 | return false; |
| 1115 | 1385 | ||
| 1386 | /* Search the lists of tasks that hold this mapping and see | ||
| 1387 | * if current is one of them. If it is return false. | ||
| 1388 | */ | ||
| 1116 | spin_lock(>t->guptasklock); | 1389 | spin_lock(>t->guptasklock); |
| 1117 | list_for_each_entry(entry, >t->guptasks, list) { | 1390 | list_for_each_entry(entry, >t->guptasks, list) { |
| 1118 | if (entry->task == current) { | 1391 | if (entry->task == current) { |
| @@ -1127,6 +1400,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, | |||
| 1127 | return true; | 1400 | return true; |
| 1128 | } | 1401 | } |
| 1129 | 1402 | ||
| 1403 | /** | ||
| 1404 | * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been | ||
| 1405 | * invalidated? | ||
| 1406 | */ | ||
| 1130 | bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, | 1407 | bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, |
| 1131 | int *last_invalidated) | 1408 | int *last_invalidated) |
| 1132 | { | 1409 | { |
| @@ -1137,6 +1414,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, | |||
| 1137 | return prev_invalidated != *last_invalidated; | 1414 | return prev_invalidated != *last_invalidated; |
| 1138 | } | 1415 | } |
| 1139 | 1416 | ||
| 1417 | /** | ||
| 1418 | * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this | ||
| 1419 | * ttm_tt object been invalidated | ||
| 1420 | * since the last time they've | ||
| 1421 | * been set? | ||
| 1422 | */ | ||
| 1140 | bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) | 1423 | bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) |
| 1141 | { | 1424 | { |
| 1142 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 1425 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| @@ -1147,6 +1430,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) | |||
| 1147 | return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; | 1430 | return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; |
| 1148 | } | 1431 | } |
| 1149 | 1432 | ||
| 1433 | /** | ||
| 1434 | * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? | ||
| 1435 | */ | ||
| 1150 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) | 1436 | bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) |
| 1151 | { | 1437 | { |
| 1152 | struct amdgpu_ttm_tt *gtt = (void *)ttm; | 1438 | struct amdgpu_ttm_tt *gtt = (void *)ttm; |
| @@ -1157,6 +1443,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) | |||
| 1157 | return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); | 1443 | return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); |
| 1158 | } | 1444 | } |
| 1159 | 1445 | ||
| 1446 | /** | ||
| 1447 | * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object | ||
| 1448 | * | ||
| 1449 | * @ttm: The ttm_tt object to compute the flags for | ||
| 1450 | * @mem: The memory registry backing this ttm_tt object | ||
| 1451 | */ | ||
| 1160 | uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, | 1452 | uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, |
| 1161 | struct ttm_mem_reg *mem) | 1453 | struct ttm_mem_reg *mem) |
| 1162 | { | 1454 | { |
| @@ -1181,6 +1473,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, | |||
| 1181 | return flags; | 1473 | return flags; |
| 1182 | } | 1474 | } |
| 1183 | 1475 | ||
| 1476 | /** | ||
| 1477 | * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict | ||
| 1478 | * a buffer object. | ||
| 1479 | * | ||
| 1480 | * Return true if eviction is sensible. Called by | ||
| 1481 | * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space() | ||
| 1482 | * which tries to evict buffer objects until it can find space | ||
| 1483 | * for a new object and by ttm_bo_force_list_clean() which is | ||
| 1484 | * used to clean out a memory space. | ||
| 1485 | */ | ||
| 1184 | static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, | 1486 | static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, |
| 1185 | const struct ttm_place *place) | 1487 | const struct ttm_place *place) |
| 1186 | { | 1488 | { |
| @@ -1227,6 +1529,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, | |||
| 1227 | return ttm_bo_eviction_valuable(bo, place); | 1529 | return ttm_bo_eviction_valuable(bo, place); |
| 1228 | } | 1530 | } |
| 1229 | 1531 | ||
| 1532 | /** | ||
| 1533 | * amdgpu_ttm_access_memory - Read or Write memory that backs a | ||
| 1534 | * buffer object. | ||
| 1535 | * | ||
| 1536 | * @bo: The buffer object to read/write | ||
| 1537 | * @offset: Offset into buffer object | ||
| 1538 | * @buf: Secondary buffer to write/read from | ||
| 1539 | * @len: Length in bytes of access | ||
| 1540 | * @write: true if writing | ||
| 1541 | * | ||
| 1542 | * This is used to access VRAM that backs a buffer object via MMIO | ||
| 1543 | * access for debugging purposes. | ||
| 1544 | */ | ||
| 1230 | static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, | 1545 | static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, |
| 1231 | unsigned long offset, | 1546 | unsigned long offset, |
| 1232 | void *buf, int len, int write) | 1547 | void *buf, int len, int write) |
| @@ -1329,6 +1644,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) | |||
| 1329 | static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) | 1644 | static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) |
| 1330 | { | 1645 | { |
| 1331 | struct ttm_operation_ctx ctx = { false, false }; | 1646 | struct ttm_operation_ctx ctx = { false, false }; |
| 1647 | struct amdgpu_bo_param bp; | ||
| 1332 | int r = 0; | 1648 | int r = 0; |
| 1333 | int i; | 1649 | int i; |
| 1334 | u64 vram_size = adev->gmc.visible_vram_size; | 1650 | u64 vram_size = adev->gmc.visible_vram_size; |
| @@ -1336,17 +1652,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) | |||
| 1336 | u64 size = adev->fw_vram_usage.size; | 1652 | u64 size = adev->fw_vram_usage.size; |
| 1337 | struct amdgpu_bo *bo; | 1653 | struct amdgpu_bo *bo; |
| 1338 | 1654 | ||
| 1655 | memset(&bp, 0, sizeof(bp)); | ||
| 1656 | bp.size = adev->fw_vram_usage.size; | ||
| 1657 | bp.byte_align = PAGE_SIZE; | ||
| 1658 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; | ||
| 1659 | bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 1660 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | ||
| 1661 | bp.type = ttm_bo_type_kernel; | ||
| 1662 | bp.resv = NULL; | ||
| 1339 | adev->fw_vram_usage.va = NULL; | 1663 | adev->fw_vram_usage.va = NULL; |
| 1340 | adev->fw_vram_usage.reserved_bo = NULL; | 1664 | adev->fw_vram_usage.reserved_bo = NULL; |
| 1341 | 1665 | ||
| 1342 | if (adev->fw_vram_usage.size > 0 && | 1666 | if (adev->fw_vram_usage.size > 0 && |
| 1343 | adev->fw_vram_usage.size <= vram_size) { | 1667 | adev->fw_vram_usage.size <= vram_size) { |
| 1344 | 1668 | ||
| 1345 | r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, | 1669 | r = amdgpu_bo_create(adev, &bp, |
| 1346 | AMDGPU_GEM_DOMAIN_VRAM, | ||
| 1347 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | | ||
| 1348 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | ||
| 1349 | ttm_bo_type_kernel, NULL, | ||
| 1350 | &adev->fw_vram_usage.reserved_bo); | 1670 | &adev->fw_vram_usage.reserved_bo); |
| 1351 | if (r) | 1671 | if (r) |
| 1352 | goto error_create; | 1672 | goto error_create; |
| @@ -1398,13 +1718,22 @@ error_create: | |||
| 1398 | adev->fw_vram_usage.reserved_bo = NULL; | 1718 | adev->fw_vram_usage.reserved_bo = NULL; |
| 1399 | return r; | 1719 | return r; |
| 1400 | } | 1720 | } |
| 1401 | 1721 | /** | |
| 1722 | * amdgpu_ttm_init - Init the memory management (ttm) as well as | ||
| 1723 | * various gtt/vram related fields. | ||
| 1724 | * | ||
| 1725 | * This initializes all of the memory space pools that the TTM layer | ||
| 1726 | * will need such as the GTT space (system memory mapped to the device), | ||
| 1727 | * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which | ||
| 1728 | * can be mapped per VMID. | ||
| 1729 | */ | ||
| 1402 | int amdgpu_ttm_init(struct amdgpu_device *adev) | 1730 | int amdgpu_ttm_init(struct amdgpu_device *adev) |
| 1403 | { | 1731 | { |
| 1404 | uint64_t gtt_size; | 1732 | uint64_t gtt_size; |
| 1405 | int r; | 1733 | int r; |
| 1406 | u64 vis_vram_limit; | 1734 | u64 vis_vram_limit; |
| 1407 | 1735 | ||
| 1736 | /* initialize global references for vram/gtt */ | ||
| 1408 | r = amdgpu_ttm_global_init(adev); | 1737 | r = amdgpu_ttm_global_init(adev); |
| 1409 | if (r) { | 1738 | if (r) { |
| 1410 | return r; | 1739 | return r; |
| @@ -1425,6 +1754,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
| 1425 | /* We opt to avoid OOM on system pages allocations */ | 1754 | /* We opt to avoid OOM on system pages allocations */ |
| 1426 | adev->mman.bdev.no_retry = true; | 1755 | adev->mman.bdev.no_retry = true; |
| 1427 | 1756 | ||
| 1757 | /* Initialize VRAM pool with all of VRAM divided into pages */ | ||
| 1428 | r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, | 1758 | r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, |
| 1429 | adev->gmc.real_vram_size >> PAGE_SHIFT); | 1759 | adev->gmc.real_vram_size >> PAGE_SHIFT); |
| 1430 | if (r) { | 1760 | if (r) { |
| @@ -1454,15 +1784,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
| 1454 | return r; | 1784 | return r; |
| 1455 | } | 1785 | } |
| 1456 | 1786 | ||
| 1457 | r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, | 1787 | /* allocate memory as required for VGA |
| 1458 | AMDGPU_GEM_DOMAIN_VRAM, | 1788 | * This is used for VGA emulation and pre-OS scanout buffers to |
| 1459 | &adev->stolen_vga_memory, | 1789 | * avoid display artifacts while transitioning between pre-OS |
| 1460 | NULL, NULL); | 1790 | * and driver. */ |
| 1461 | if (r) | 1791 | if (adev->gmc.stolen_size) { |
| 1462 | return r; | 1792 | r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, |
| 1793 | AMDGPU_GEM_DOMAIN_VRAM, | ||
| 1794 | &adev->stolen_vga_memory, | ||
| 1795 | NULL, NULL); | ||
| 1796 | if (r) | ||
| 1797 | return r; | ||
| 1798 | } | ||
| 1463 | DRM_INFO("amdgpu: %uM of VRAM memory ready\n", | 1799 | DRM_INFO("amdgpu: %uM of VRAM memory ready\n", |
| 1464 | (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); | 1800 | (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); |
| 1465 | 1801 | ||
| 1802 | /* Compute GTT size, either bsaed on 3/4th the size of RAM size | ||
| 1803 | * or whatever the user passed on module init */ | ||
| 1466 | if (amdgpu_gtt_size == -1) { | 1804 | if (amdgpu_gtt_size == -1) { |
| 1467 | struct sysinfo si; | 1805 | struct sysinfo si; |
| 1468 | 1806 | ||
| @@ -1473,6 +1811,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
| 1473 | } | 1811 | } |
| 1474 | else | 1812 | else |
| 1475 | gtt_size = (uint64_t)amdgpu_gtt_size << 20; | 1813 | gtt_size = (uint64_t)amdgpu_gtt_size << 20; |
| 1814 | |||
| 1815 | /* Initialize GTT memory pool */ | ||
| 1476 | r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); | 1816 | r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); |
| 1477 | if (r) { | 1817 | if (r) { |
| 1478 | DRM_ERROR("Failed initializing GTT heap.\n"); | 1818 | DRM_ERROR("Failed initializing GTT heap.\n"); |
| @@ -1481,6 +1821,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
| 1481 | DRM_INFO("amdgpu: %uM of GTT memory ready.\n", | 1821 | DRM_INFO("amdgpu: %uM of GTT memory ready.\n", |
| 1482 | (unsigned)(gtt_size / (1024 * 1024))); | 1822 | (unsigned)(gtt_size / (1024 * 1024))); |
| 1483 | 1823 | ||
| 1824 | /* Initialize various on-chip memory pools */ | ||
| 1484 | adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; | 1825 | adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; |
| 1485 | adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; | 1826 | adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; |
| 1486 | adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; | 1827 | adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; |
| @@ -1520,6 +1861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
| 1520 | } | 1861 | } |
| 1521 | } | 1862 | } |
| 1522 | 1863 | ||
| 1864 | /* Register debugfs entries for amdgpu_ttm */ | ||
| 1523 | r = amdgpu_ttm_debugfs_init(adev); | 1865 | r = amdgpu_ttm_debugfs_init(adev); |
| 1524 | if (r) { | 1866 | if (r) { |
| 1525 | DRM_ERROR("Failed to init debugfs\n"); | 1867 | DRM_ERROR("Failed to init debugfs\n"); |
| @@ -1528,13 +1870,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
| 1528 | return 0; | 1870 | return 0; |
| 1529 | } | 1871 | } |
| 1530 | 1872 | ||
| 1873 | /** | ||
| 1874 | * amdgpu_ttm_late_init - Handle any late initialization for | ||
| 1875 | * amdgpu_ttm | ||
| 1876 | */ | ||
| 1877 | void amdgpu_ttm_late_init(struct amdgpu_device *adev) | ||
| 1878 | { | ||
| 1879 | /* return the VGA stolen memory (if any) back to VRAM */ | ||
| 1880 | amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); | ||
| 1881 | } | ||
| 1882 | |||
| 1883 | /** | ||
| 1884 | * amdgpu_ttm_fini - De-initialize the TTM memory pools | ||
| 1885 | */ | ||
| 1531 | void amdgpu_ttm_fini(struct amdgpu_device *adev) | 1886 | void amdgpu_ttm_fini(struct amdgpu_device *adev) |
| 1532 | { | 1887 | { |
| 1533 | if (!adev->mman.initialized) | 1888 | if (!adev->mman.initialized) |
| 1534 | return; | 1889 | return; |
| 1535 | 1890 | ||
| 1536 | amdgpu_ttm_debugfs_fini(adev); | 1891 | amdgpu_ttm_debugfs_fini(adev); |
| 1537 | amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); | ||
| 1538 | amdgpu_ttm_fw_reserve_vram_fini(adev); | 1892 | amdgpu_ttm_fw_reserve_vram_fini(adev); |
| 1539 | if (adev->mman.aper_base_kaddr) | 1893 | if (adev->mman.aper_base_kaddr) |
| 1540 | iounmap(adev->mman.aper_base_kaddr); | 1894 | iounmap(adev->mman.aper_base_kaddr); |
| @@ -1856,6 +2210,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { | |||
| 1856 | #endif | 2210 | #endif |
| 1857 | }; | 2211 | }; |
| 1858 | 2212 | ||
| 2213 | /** | ||
| 2214 | * amdgpu_ttm_vram_read - Linear read access to VRAM | ||
| 2215 | * | ||
| 2216 | * Accesses VRAM via MMIO for debugging purposes. | ||
| 2217 | */ | ||
| 1859 | static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, | 2218 | static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, |
| 1860 | size_t size, loff_t *pos) | 2219 | size_t size, loff_t *pos) |
| 1861 | { | 2220 | { |
| @@ -1895,6 +2254,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, | |||
| 1895 | return result; | 2254 | return result; |
| 1896 | } | 2255 | } |
| 1897 | 2256 | ||
| 2257 | /** | ||
| 2258 | * amdgpu_ttm_vram_write - Linear write access to VRAM | ||
| 2259 | * | ||
| 2260 | * Accesses VRAM via MMIO for debugging purposes. | ||
| 2261 | */ | ||
| 1898 | static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, | 2262 | static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, |
| 1899 | size_t size, loff_t *pos) | 2263 | size_t size, loff_t *pos) |
| 1900 | { | 2264 | { |
| @@ -1943,6 +2307,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = { | |||
| 1943 | 2307 | ||
| 1944 | #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS | 2308 | #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS |
| 1945 | 2309 | ||
| 2310 | /** | ||
| 2311 | * amdgpu_ttm_gtt_read - Linear read access to GTT memory | ||
| 2312 | */ | ||
| 1946 | static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, | 2313 | static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, |
| 1947 | size_t size, loff_t *pos) | 2314 | size_t size, loff_t *pos) |
| 1948 | { | 2315 | { |
| @@ -1990,6 +2357,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { | |||
| 1990 | 2357 | ||
| 1991 | #endif | 2358 | #endif |
| 1992 | 2359 | ||
| 2360 | /** | ||
| 2361 | * amdgpu_iomem_read - Virtual read access to GPU mapped memory | ||
| 2362 | * | ||
| 2363 | * This function is used to read memory that has been mapped to the | ||
| 2364 | * GPU and the known addresses are not physical addresses but instead | ||
| 2365 | * bus addresses (e.g., what you'd put in an IB or ring buffer). | ||
| 2366 | */ | ||
| 1993 | static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, | 2367 | static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, |
| 1994 | size_t size, loff_t *pos) | 2368 | size_t size, loff_t *pos) |
| 1995 | { | 2369 | { |
| @@ -1998,6 +2372,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, | |||
| 1998 | ssize_t result = 0; | 2372 | ssize_t result = 0; |
| 1999 | int r; | 2373 | int r; |
| 2000 | 2374 | ||
| 2375 | /* retrieve the IOMMU domain if any for this device */ | ||
| 2001 | dom = iommu_get_domain_for_dev(adev->dev); | 2376 | dom = iommu_get_domain_for_dev(adev->dev); |
| 2002 | 2377 | ||
| 2003 | while (size) { | 2378 | while (size) { |
| @@ -2010,6 +2385,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, | |||
| 2010 | 2385 | ||
| 2011 | bytes = bytes < size ? bytes : size; | 2386 | bytes = bytes < size ? bytes : size; |
| 2012 | 2387 | ||
| 2388 | /* Translate the bus address to a physical address. If | ||
| 2389 | * the domain is NULL it means there is no IOMMU active | ||
| 2390 | * and the address translation is the identity | ||
| 2391 | */ | ||
| 2013 | addr = dom ? iommu_iova_to_phys(dom, addr) : addr; | 2392 | addr = dom ? iommu_iova_to_phys(dom, addr) : addr; |
| 2014 | 2393 | ||
| 2015 | pfn = addr >> PAGE_SHIFT; | 2394 | pfn = addr >> PAGE_SHIFT; |
| @@ -2034,6 +2413,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, | |||
| 2034 | return result; | 2413 | return result; |
| 2035 | } | 2414 | } |
| 2036 | 2415 | ||
| 2416 | /** | ||
| 2417 | * amdgpu_iomem_write - Virtual write access to GPU mapped memory | ||
| 2418 | * | ||
| 2419 | * This function is used to write memory that has been mapped to the | ||
| 2420 | * GPU and the known addresses are not physical addresses but instead | ||
| 2421 | * bus addresses (e.g., what you'd put in an IB or ring buffer). | ||
| 2422 | */ | ||
| 2037 | static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, | 2423 | static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, |
| 2038 | size_t size, loff_t *pos) | 2424 | size_t size, loff_t *pos) |
| 2039 | { | 2425 | { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6ea7de863041..e969c879d87e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | |||
| @@ -77,6 +77,7 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); | |||
| 77 | uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); | 77 | uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); |
| 78 | 78 | ||
| 79 | int amdgpu_ttm_init(struct amdgpu_device *adev); | 79 | int amdgpu_ttm_init(struct amdgpu_device *adev); |
| 80 | void amdgpu_ttm_late_init(struct amdgpu_device *adev); | ||
| 80 | void amdgpu_ttm_fini(struct amdgpu_device *adev); | 81 | void amdgpu_ttm_fini(struct amdgpu_device *adev); |
| 81 | void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, | 82 | void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, |
| 82 | bool enable); | 83 | bool enable); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 5916cc25e28b..f55f72a37ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | |||
| @@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) | |||
| 161 | le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); | 161 | le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); |
| 162 | DRM_DEBUG("reg_list_separate_size_bytes: %u\n", | 162 | DRM_DEBUG("reg_list_separate_size_bytes: %u\n", |
| 163 | le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); | 163 | le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); |
| 164 | DRM_DEBUG("reg_list_separate_size_bytes: %u\n", | 164 | DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", |
| 165 | le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); | 165 | le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); |
| 166 | if (version_minor == 1) { | ||
| 167 | const struct rlc_firmware_header_v2_1 *v2_1 = | ||
| 168 | container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); | ||
| 169 | DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n", | ||
| 170 | le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length)); | ||
| 171 | DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n", | ||
| 172 | le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver)); | ||
| 173 | DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n", | ||
| 174 | le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver)); | ||
| 175 | DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n", | ||
| 176 | le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes)); | ||
| 177 | DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n", | ||
| 178 | le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes)); | ||
| 179 | DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n", | ||
| 180 | le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver)); | ||
| 181 | DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n", | ||
| 182 | le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver)); | ||
| 183 | DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n", | ||
| 184 | le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes)); | ||
| 185 | DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n", | ||
| 186 | le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes)); | ||
| 187 | DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n", | ||
| 188 | le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver)); | ||
| 189 | DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n", | ||
| 190 | le32_to_cpu(v2_1->save_restore_list_srm_feature_ver)); | ||
| 191 | DRM_DEBUG("save_restore_list_srm_size_bytes %u\n", | ||
| 192 | le32_to_cpu(v2_1->save_restore_list_srm_size_bytes)); | ||
| 193 | DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n", | ||
| 194 | le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes)); | ||
| 195 | } | ||
| 166 | } else { | 196 | } else { |
| 167 | DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); | 197 | DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); |
| 168 | } | 198 | } |
| @@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) | |||
| 265 | case CHIP_POLARIS10: | 295 | case CHIP_POLARIS10: |
| 266 | case CHIP_POLARIS11: | 296 | case CHIP_POLARIS11: |
| 267 | case CHIP_POLARIS12: | 297 | case CHIP_POLARIS12: |
| 298 | case CHIP_VEGAM: | ||
| 268 | if (!load_type) | 299 | if (!load_type) |
| 269 | return AMDGPU_FW_LOAD_DIRECT; | 300 | return AMDGPU_FW_LOAD_DIRECT; |
| 270 | else | 301 | else |
| @@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) | |||
| 276 | return AMDGPU_FW_LOAD_DIRECT; | 307 | return AMDGPU_FW_LOAD_DIRECT; |
| 277 | else | 308 | else |
| 278 | return AMDGPU_FW_LOAD_PSP; | 309 | return AMDGPU_FW_LOAD_PSP; |
| 310 | case CHIP_VEGA20: | ||
| 311 | return AMDGPU_FW_LOAD_DIRECT; | ||
| 279 | default: | 312 | default: |
| 280 | DRM_ERROR("Unknown firmware load type\n"); | 313 | DRM_ERROR("Unknown firmware load type\n"); |
| 281 | } | 314 | } |
| @@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, | |||
| 307 | (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && | 340 | (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && |
| 308 | ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && | 341 | ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && |
| 309 | ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && | 342 | ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && |
| 310 | ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { | 343 | ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && |
| 344 | ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && | ||
| 345 | ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && | ||
| 346 | ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { | ||
| 311 | ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); | 347 | ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); |
| 312 | 348 | ||
| 313 | memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + | 349 | memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + |
| @@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, | |||
| 329 | le32_to_cpu(header->ucode_array_offset_bytes) + | 365 | le32_to_cpu(header->ucode_array_offset_bytes) + |
| 330 | le32_to_cpu(cp_hdr->jt_offset) * 4), | 366 | le32_to_cpu(cp_hdr->jt_offset) * 4), |
| 331 | ucode->ucode_size); | 367 | ucode->ucode_size); |
| 368 | } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { | ||
| 369 | ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; | ||
| 370 | memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, | ||
| 371 | ucode->ucode_size); | ||
| 372 | } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) { | ||
| 373 | ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes; | ||
| 374 | memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm, | ||
| 375 | ucode->ucode_size); | ||
| 376 | } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { | ||
| 377 | ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes; | ||
| 378 | memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm, | ||
| 379 | ucode->ucode_size); | ||
| 332 | } | 380 | } |
| 333 | 381 | ||
| 334 | return 0; | 382 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 30b5500dc152..08e38579af24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | |||
| @@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 { | |||
| 98 | uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */ | 98 | uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */ |
| 99 | }; | 99 | }; |
| 100 | 100 | ||
| 101 | /* version_major=2, version_minor=1 */ | ||
| 102 | struct rlc_firmware_header_v2_1 { | ||
| 103 | struct rlc_firmware_header_v2_0 v2_0; | ||
| 104 | uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */ | ||
| 105 | uint32_t save_restore_list_cntl_ucode_ver; | ||
| 106 | uint32_t save_restore_list_cntl_feature_ver; | ||
| 107 | uint32_t save_restore_list_cntl_size_bytes; | ||
| 108 | uint32_t save_restore_list_cntl_offset_bytes; | ||
| 109 | uint32_t save_restore_list_gpm_ucode_ver; | ||
| 110 | uint32_t save_restore_list_gpm_feature_ver; | ||
| 111 | uint32_t save_restore_list_gpm_size_bytes; | ||
| 112 | uint32_t save_restore_list_gpm_offset_bytes; | ||
| 113 | uint32_t save_restore_list_srm_ucode_ver; | ||
| 114 | uint32_t save_restore_list_srm_feature_ver; | ||
| 115 | uint32_t save_restore_list_srm_size_bytes; | ||
| 116 | uint32_t save_restore_list_srm_offset_bytes; | ||
| 117 | }; | ||
| 118 | |||
| 101 | /* version_major=1, version_minor=0 */ | 119 | /* version_major=1, version_minor=0 */ |
| 102 | struct sdma_firmware_header_v1_0 { | 120 | struct sdma_firmware_header_v1_0 { |
| 103 | struct common_firmware_header header; | 121 | struct common_firmware_header header; |
| @@ -148,6 +166,7 @@ union amdgpu_firmware_header { | |||
| 148 | struct gfx_firmware_header_v1_0 gfx; | 166 | struct gfx_firmware_header_v1_0 gfx; |
| 149 | struct rlc_firmware_header_v1_0 rlc; | 167 | struct rlc_firmware_header_v1_0 rlc; |
| 150 | struct rlc_firmware_header_v2_0 rlc_v2_0; | 168 | struct rlc_firmware_header_v2_0 rlc_v2_0; |
| 169 | struct rlc_firmware_header_v2_1 rlc_v2_1; | ||
| 151 | struct sdma_firmware_header_v1_0 sdma; | 170 | struct sdma_firmware_header_v1_0 sdma; |
| 152 | struct sdma_firmware_header_v1_1 sdma_v1_1; | 171 | struct sdma_firmware_header_v1_1 sdma_v1_1; |
| 153 | struct gpu_info_firmware_header_v1_0 gpu_info; | 172 | struct gpu_info_firmware_header_v1_0 gpu_info; |
| @@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID { | |||
| 168 | AMDGPU_UCODE_ID_CP_MEC2, | 187 | AMDGPU_UCODE_ID_CP_MEC2, |
| 169 | AMDGPU_UCODE_ID_CP_MEC2_JT, | 188 | AMDGPU_UCODE_ID_CP_MEC2_JT, |
| 170 | AMDGPU_UCODE_ID_RLC_G, | 189 | AMDGPU_UCODE_ID_RLC_G, |
| 190 | AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, | ||
| 191 | AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, | ||
| 192 | AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, | ||
| 171 | AMDGPU_UCODE_ID_STORAGE, | 193 | AMDGPU_UCODE_ID_STORAGE, |
| 172 | AMDGPU_UCODE_ID_SMC, | 194 | AMDGPU_UCODE_ID_SMC, |
| 173 | AMDGPU_UCODE_ID_UVD, | 195 | AMDGPU_UCODE_ID_UVD, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 627542b22ae4..bcf68f80bbf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | |||
| @@ -66,15 +66,18 @@ | |||
| 66 | #define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" | 66 | #define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" |
| 67 | #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" | 67 | #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" |
| 68 | #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" | 68 | #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" |
| 69 | #define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin" | ||
| 69 | 70 | ||
| 70 | #define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" | 71 | #define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" |
| 71 | #define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin" | 72 | #define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin" |
| 73 | #define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin" | ||
| 72 | 74 | ||
| 73 | #define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00) | 75 | /* These are common relative offsets for all asics, from uvd_7_0_offset.h, */ |
| 74 | #define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00) | 76 | #define UVD_GPCOM_VCPU_CMD 0x03c3 |
| 75 | #define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00) | 77 | #define UVD_GPCOM_VCPU_DATA0 0x03c4 |
| 76 | #define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00) | 78 | #define UVD_GPCOM_VCPU_DATA1 0x03c5 |
| 77 | #define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00) | 79 | #define UVD_NO_OP 0x03ff |
| 80 | #define UVD_BASE_SI 0x3800 | ||
| 78 | 81 | ||
| 79 | /** | 82 | /** |
| 80 | * amdgpu_uvd_cs_ctx - Command submission parser context | 83 | * amdgpu_uvd_cs_ctx - Command submission parser context |
| @@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); | |||
| 109 | MODULE_FIRMWARE(FIRMWARE_POLARIS10); | 112 | MODULE_FIRMWARE(FIRMWARE_POLARIS10); |
| 110 | MODULE_FIRMWARE(FIRMWARE_POLARIS11); | 113 | MODULE_FIRMWARE(FIRMWARE_POLARIS11); |
| 111 | MODULE_FIRMWARE(FIRMWARE_POLARIS12); | 114 | MODULE_FIRMWARE(FIRMWARE_POLARIS12); |
| 115 | MODULE_FIRMWARE(FIRMWARE_VEGAM); | ||
| 112 | 116 | ||
| 113 | MODULE_FIRMWARE(FIRMWARE_VEGA10); | 117 | MODULE_FIRMWARE(FIRMWARE_VEGA10); |
| 114 | MODULE_FIRMWARE(FIRMWARE_VEGA12); | 118 | MODULE_FIRMWARE(FIRMWARE_VEGA12); |
| 119 | MODULE_FIRMWARE(FIRMWARE_VEGA20); | ||
| 115 | 120 | ||
| 116 | static void amdgpu_uvd_idle_work_handler(struct work_struct *work); | 121 | static void amdgpu_uvd_idle_work_handler(struct work_struct *work); |
| 117 | 122 | ||
| @@ -123,9 +128,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) | |||
| 123 | const char *fw_name; | 128 | const char *fw_name; |
| 124 | const struct common_firmware_header *hdr; | 129 | const struct common_firmware_header *hdr; |
| 125 | unsigned version_major, version_minor, family_id; | 130 | unsigned version_major, version_minor, family_id; |
| 126 | int i, r; | 131 | int i, j, r; |
| 127 | 132 | ||
| 128 | INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); | 133 | INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler); |
| 129 | 134 | ||
| 130 | switch (adev->asic_type) { | 135 | switch (adev->asic_type) { |
| 131 | #ifdef CONFIG_DRM_AMDGPU_CIK | 136 | #ifdef CONFIG_DRM_AMDGPU_CIK |
| @@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) | |||
| 172 | case CHIP_VEGA12: | 177 | case CHIP_VEGA12: |
| 173 | fw_name = FIRMWARE_VEGA12; | 178 | fw_name = FIRMWARE_VEGA12; |
| 174 | break; | 179 | break; |
| 180 | case CHIP_VEGAM: | ||
| 181 | fw_name = FIRMWARE_VEGAM; | ||
| 182 | break; | ||
| 183 | case CHIP_VEGA20: | ||
| 184 | fw_name = FIRMWARE_VEGA20; | ||
| 185 | break; | ||
| 175 | default: | 186 | default: |
| 176 | return -EINVAL; | 187 | return -EINVAL; |
| 177 | } | 188 | } |
| @@ -226,28 +237,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) | |||
| 226 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) | 237 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) |
| 227 | bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); | 238 | bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); |
| 228 | 239 | ||
| 229 | r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, | 240 | for (j = 0; j < adev->uvd.num_uvd_inst; j++) { |
| 230 | AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo, | ||
| 231 | &adev->uvd.gpu_addr, &adev->uvd.cpu_addr); | ||
| 232 | if (r) { | ||
| 233 | dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); | ||
| 234 | return r; | ||
| 235 | } | ||
| 236 | 241 | ||
| 237 | ring = &adev->uvd.ring; | 242 | r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, |
| 238 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; | 243 | AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo, |
| 239 | r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, | 244 | &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); |
| 240 | rq, amdgpu_sched_jobs, NULL); | 245 | if (r) { |
| 241 | if (r != 0) { | 246 | dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); |
| 242 | DRM_ERROR("Failed setting up UVD run queue.\n"); | 247 | return r; |
| 243 | return r; | 248 | } |
| 244 | } | ||
| 245 | 249 | ||
| 246 | for (i = 0; i < adev->uvd.max_handles; ++i) { | 250 | ring = &adev->uvd.inst[j].ring; |
| 247 | atomic_set(&adev->uvd.handles[i], 0); | 251 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; |
| 248 | adev->uvd.filp[i] = NULL; | 252 | r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity, |
| 249 | } | 253 | rq, NULL); |
| 254 | if (r != 0) { | ||
| 255 | DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j); | ||
| 256 | return r; | ||
| 257 | } | ||
| 250 | 258 | ||
| 259 | for (i = 0; i < adev->uvd.max_handles; ++i) { | ||
| 260 | atomic_set(&adev->uvd.inst[j].handles[i], 0); | ||
| 261 | adev->uvd.inst[j].filp[i] = NULL; | ||
| 262 | } | ||
| 263 | } | ||
| 251 | /* from uvd v5.0 HW addressing capacity increased to 64 bits */ | 264 | /* from uvd v5.0 HW addressing capacity increased to 64 bits */ |
| 252 | if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) | 265 | if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) |
| 253 | adev->uvd.address_64_bit = true; | 266 | adev->uvd.address_64_bit = true; |
| @@ -274,20 +287,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) | |||
| 274 | 287 | ||
| 275 | int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) | 288 | int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) |
| 276 | { | 289 | { |
| 277 | int i; | 290 | int i, j; |
| 278 | kfree(adev->uvd.saved_bo); | ||
| 279 | 291 | ||
| 280 | drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); | 292 | for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { |
| 293 | kfree(adev->uvd.inst[j].saved_bo); | ||
| 281 | 294 | ||
| 282 | amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo, | 295 | drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity); |
| 283 | &adev->uvd.gpu_addr, | ||
| 284 | (void **)&adev->uvd.cpu_addr); | ||
| 285 | 296 | ||
| 286 | amdgpu_ring_fini(&adev->uvd.ring); | 297 | amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, |
| 298 | &adev->uvd.inst[j].gpu_addr, | ||
| 299 | (void **)&adev->uvd.inst[j].cpu_addr); | ||
| 287 | 300 | ||
| 288 | for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) | 301 | amdgpu_ring_fini(&adev->uvd.inst[j].ring); |
| 289 | amdgpu_ring_fini(&adev->uvd.ring_enc[i]); | ||
| 290 | 302 | ||
| 303 | for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) | ||
| 304 | amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); | ||
| 305 | } | ||
| 291 | release_firmware(adev->uvd.fw); | 306 | release_firmware(adev->uvd.fw); |
| 292 | 307 | ||
| 293 | return 0; | 308 | return 0; |
| @@ -297,32 +312,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) | |||
| 297 | { | 312 | { |
| 298 | unsigned size; | 313 | unsigned size; |
| 299 | void *ptr; | 314 | void *ptr; |
| 300 | int i; | 315 | int i, j; |
| 301 | |||
| 302 | if (adev->uvd.vcpu_bo == NULL) | ||
| 303 | return 0; | ||
| 304 | 316 | ||
| 305 | cancel_delayed_work_sync(&adev->uvd.idle_work); | 317 | for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { |
| 318 | if (adev->uvd.inst[j].vcpu_bo == NULL) | ||
| 319 | continue; | ||
| 306 | 320 | ||
| 307 | /* only valid for physical mode */ | 321 | cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work); |
| 308 | if (adev->asic_type < CHIP_POLARIS10) { | ||
| 309 | for (i = 0; i < adev->uvd.max_handles; ++i) | ||
| 310 | if (atomic_read(&adev->uvd.handles[i])) | ||
| 311 | break; | ||
| 312 | 322 | ||
| 313 | if (i == adev->uvd.max_handles) | 323 | /* only valid for physical mode */ |
| 314 | return 0; | 324 | if (adev->asic_type < CHIP_POLARIS10) { |
| 315 | } | 325 | for (i = 0; i < adev->uvd.max_handles; ++i) |
| 326 | if (atomic_read(&adev->uvd.inst[j].handles[i])) | ||
| 327 | break; | ||
| 316 | 328 | ||
| 317 | size = amdgpu_bo_size(adev->uvd.vcpu_bo); | 329 | if (i == adev->uvd.max_handles) |
| 318 | ptr = adev->uvd.cpu_addr; | 330 | continue; |
| 331 | } | ||
| 319 | 332 | ||
| 320 | adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); | 333 | size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo); |
| 321 | if (!adev->uvd.saved_bo) | 334 | ptr = adev->uvd.inst[j].cpu_addr; |
| 322 | return -ENOMEM; | ||
| 323 | 335 | ||
| 324 | memcpy_fromio(adev->uvd.saved_bo, ptr, size); | 336 | adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL); |
| 337 | if (!adev->uvd.inst[j].saved_bo) | ||
| 338 | return -ENOMEM; | ||
| 325 | 339 | ||
| 340 | memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); | ||
| 341 | } | ||
| 326 | return 0; | 342 | return 0; |
| 327 | } | 343 | } |
| 328 | 344 | ||
| @@ -330,59 +346,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) | |||
| 330 | { | 346 | { |
| 331 | unsigned size; | 347 | unsigned size; |
| 332 | void *ptr; | 348 | void *ptr; |
| 349 | int i; | ||
| 333 | 350 | ||
| 334 | if (adev->uvd.vcpu_bo == NULL) | 351 | for (i = 0; i < adev->uvd.num_uvd_inst; i++) { |
| 335 | return -EINVAL; | 352 | if (adev->uvd.inst[i].vcpu_bo == NULL) |
| 353 | return -EINVAL; | ||
| 336 | 354 | ||
| 337 | size = amdgpu_bo_size(adev->uvd.vcpu_bo); | 355 | size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo); |
| 338 | ptr = adev->uvd.cpu_addr; | 356 | ptr = adev->uvd.inst[i].cpu_addr; |
| 339 | 357 | ||
| 340 | if (adev->uvd.saved_bo != NULL) { | 358 | if (adev->uvd.inst[i].saved_bo != NULL) { |
| 341 | memcpy_toio(ptr, adev->uvd.saved_bo, size); | 359 | memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); |
| 342 | kfree(adev->uvd.saved_bo); | 360 | kfree(adev->uvd.inst[i].saved_bo); |
| 343 | adev->uvd.saved_bo = NULL; | 361 | adev->uvd.inst[i].saved_bo = NULL; |
| 344 | } else { | 362 | } else { |
| 345 | const struct common_firmware_header *hdr; | 363 | const struct common_firmware_header *hdr; |
| 346 | unsigned offset; | 364 | unsigned offset; |
| 347 | 365 | ||
| 348 | hdr = (const struct common_firmware_header *)adev->uvd.fw->data; | 366 | hdr = (const struct common_firmware_header *)adev->uvd.fw->data; |
| 349 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { | 367 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { |
| 350 | offset = le32_to_cpu(hdr->ucode_array_offset_bytes); | 368 | offset = le32_to_cpu(hdr->ucode_array_offset_bytes); |
| 351 | memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset, | 369 | memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, |
| 352 | le32_to_cpu(hdr->ucode_size_bytes)); | 370 | le32_to_cpu(hdr->ucode_size_bytes)); |
| 353 | size -= le32_to_cpu(hdr->ucode_size_bytes); | 371 | size -= le32_to_cpu(hdr->ucode_size_bytes); |
| 354 | ptr += le32_to_cpu(hdr->ucode_size_bytes); | 372 | ptr += le32_to_cpu(hdr->ucode_size_bytes); |
| 373 | } | ||
| 374 | memset_io(ptr, 0, size); | ||
| 375 | /* to restore uvd fence seq */ | ||
| 376 | amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring); | ||
| 355 | } | 377 | } |
| 356 | memset_io(ptr, 0, size); | ||
| 357 | /* to restore uvd fence seq */ | ||
| 358 | amdgpu_fence_driver_force_completion(&adev->uvd.ring); | ||
| 359 | } | 378 | } |
| 360 | |||
| 361 | return 0; | 379 | return 0; |
| 362 | } | 380 | } |
| 363 | 381 | ||
| 364 | void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | 382 | void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) |
| 365 | { | 383 | { |
| 366 | struct amdgpu_ring *ring = &adev->uvd.ring; | 384 | struct amdgpu_ring *ring; |
| 367 | int i, r; | 385 | int i, j, r; |
| 368 | 386 | ||
| 369 | for (i = 0; i < adev->uvd.max_handles; ++i) { | 387 | for (j = 0; j < adev->uvd.num_uvd_inst; j++) { |
| 370 | uint32_t handle = atomic_read(&adev->uvd.handles[i]); | 388 | ring = &adev->uvd.inst[j].ring; |
| 371 | if (handle != 0 && adev->uvd.filp[i] == filp) { | 389 | |
| 372 | struct dma_fence *fence; | 390 | for (i = 0; i < adev->uvd.max_handles; ++i) { |
| 373 | 391 | uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]); | |
| 374 | r = amdgpu_uvd_get_destroy_msg(ring, handle, | 392 | if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) { |
| 375 | false, &fence); | 393 | struct dma_fence *fence; |
| 376 | if (r) { | 394 | |
| 377 | DRM_ERROR("Error destroying UVD (%d)!\n", r); | 395 | r = amdgpu_uvd_get_destroy_msg(ring, handle, |
| 378 | continue; | 396 | false, &fence); |
| 379 | } | 397 | if (r) { |
| 398 | DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r); | ||
| 399 | continue; | ||
| 400 | } | ||
| 380 | 401 | ||
| 381 | dma_fence_wait(fence, false); | 402 | dma_fence_wait(fence, false); |
| 382 | dma_fence_put(fence); | 403 | dma_fence_put(fence); |
| 383 | 404 | ||
| 384 | adev->uvd.filp[i] = NULL; | 405 | adev->uvd.inst[j].filp[i] = NULL; |
| 385 | atomic_set(&adev->uvd.handles[i], 0); | 406 | atomic_set(&adev->uvd.inst[j].handles[i], 0); |
| 407 | } | ||
| 386 | } | 408 | } |
| 387 | } | 409 | } |
| 388 | } | 410 | } |
| @@ -657,15 +679,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, | |||
| 657 | void *ptr; | 679 | void *ptr; |
| 658 | long r; | 680 | long r; |
| 659 | int i; | 681 | int i; |
| 682 | uint32_t ip_instance = ctx->parser->job->ring->me; | ||
| 660 | 683 | ||
| 661 | if (offset & 0x3F) { | 684 | if (offset & 0x3F) { |
| 662 | DRM_ERROR("UVD messages must be 64 byte aligned!\n"); | 685 | DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance); |
| 663 | return -EINVAL; | 686 | return -EINVAL; |
| 664 | } | 687 | } |
| 665 | 688 | ||
| 666 | r = amdgpu_bo_kmap(bo, &ptr); | 689 | r = amdgpu_bo_kmap(bo, &ptr); |
| 667 | if (r) { | 690 | if (r) { |
| 668 | DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); | 691 | DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r); |
| 669 | return r; | 692 | return r; |
| 670 | } | 693 | } |
| 671 | 694 | ||
| @@ -675,7 +698,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, | |||
| 675 | handle = msg[2]; | 698 | handle = msg[2]; |
| 676 | 699 | ||
| 677 | if (handle == 0) { | 700 | if (handle == 0) { |
| 678 | DRM_ERROR("Invalid UVD handle!\n"); | 701 | DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance); |
| 679 | return -EINVAL; | 702 | return -EINVAL; |
| 680 | } | 703 | } |
| 681 | 704 | ||
| @@ -686,18 +709,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, | |||
| 686 | 709 | ||
| 687 | /* try to alloc a new handle */ | 710 | /* try to alloc a new handle */ |
| 688 | for (i = 0; i < adev->uvd.max_handles; ++i) { | 711 | for (i = 0; i < adev->uvd.max_handles; ++i) { |
| 689 | if (atomic_read(&adev->uvd.handles[i]) == handle) { | 712 | if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { |
| 690 | DRM_ERROR("Handle 0x%x already in use!\n", handle); | 713 | DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle); |
| 691 | return -EINVAL; | 714 | return -EINVAL; |
| 692 | } | 715 | } |
| 693 | 716 | ||
| 694 | if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { | 717 | if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) { |
| 695 | adev->uvd.filp[i] = ctx->parser->filp; | 718 | adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp; |
| 696 | return 0; | 719 | return 0; |
| 697 | } | 720 | } |
| 698 | } | 721 | } |
| 699 | 722 | ||
| 700 | DRM_ERROR("No more free UVD handles!\n"); | 723 | DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance); |
| 701 | return -ENOSPC; | 724 | return -ENOSPC; |
| 702 | 725 | ||
| 703 | case 1: | 726 | case 1: |
| @@ -709,27 +732,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, | |||
| 709 | 732 | ||
| 710 | /* validate the handle */ | 733 | /* validate the handle */ |
| 711 | for (i = 0; i < adev->uvd.max_handles; ++i) { | 734 | for (i = 0; i < adev->uvd.max_handles; ++i) { |
| 712 | if (atomic_read(&adev->uvd.handles[i]) == handle) { | 735 | if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { |
| 713 | if (adev->uvd.filp[i] != ctx->parser->filp) { | 736 | if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) { |
| 714 | DRM_ERROR("UVD handle collision detected!\n"); | 737 | DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance); |
| 715 | return -EINVAL; | 738 | return -EINVAL; |
| 716 | } | 739 | } |
| 717 | return 0; | 740 | return 0; |
| 718 | } | 741 | } |
| 719 | } | 742 | } |
| 720 | 743 | ||
| 721 | DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); | 744 | DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle); |
| 722 | return -ENOENT; | 745 | return -ENOENT; |
| 723 | 746 | ||
| 724 | case 2: | 747 | case 2: |
| 725 | /* it's a destroy msg, free the handle */ | 748 | /* it's a destroy msg, free the handle */ |
| 726 | for (i = 0; i < adev->uvd.max_handles; ++i) | 749 | for (i = 0; i < adev->uvd.max_handles; ++i) |
| 727 | atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); | 750 | atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0); |
| 728 | amdgpu_bo_kunmap(bo); | 751 | amdgpu_bo_kunmap(bo); |
| 729 | return 0; | 752 | return 0; |
| 730 | 753 | ||
| 731 | default: | 754 | default: |
| 732 | DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); | 755 | DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type); |
| 733 | return -EINVAL; | 756 | return -EINVAL; |
| 734 | } | 757 | } |
| 735 | BUG(); | 758 | BUG(); |
| @@ -800,7 +823,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) | |||
| 800 | } | 823 | } |
| 801 | 824 | ||
| 802 | if ((cmd == 0 || cmd == 0x3) && | 825 | if ((cmd == 0 || cmd == 0x3) && |
| 803 | (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) { | 826 | (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) { |
| 804 | DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", | 827 | DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", |
| 805 | start, end); | 828 | start, end); |
| 806 | return -EINVAL; | 829 | return -EINVAL; |
| @@ -968,6 +991,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, | |||
| 968 | uint64_t addr; | 991 | uint64_t addr; |
| 969 | long r; | 992 | long r; |
| 970 | int i; | 993 | int i; |
| 994 | unsigned offset_idx = 0; | ||
| 995 | unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; | ||
| 971 | 996 | ||
| 972 | amdgpu_bo_kunmap(bo); | 997 | amdgpu_bo_kunmap(bo); |
| 973 | amdgpu_bo_unpin(bo); | 998 | amdgpu_bo_unpin(bo); |
| @@ -987,17 +1012,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, | |||
| 987 | goto err; | 1012 | goto err; |
| 988 | 1013 | ||
| 989 | if (adev->asic_type >= CHIP_VEGA10) { | 1014 | if (adev->asic_type >= CHIP_VEGA10) { |
| 990 | data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0); | 1015 | offset_idx = 1 + ring->me; |
| 991 | data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0); | 1016 | offset[1] = adev->reg_offset[UVD_HWIP][0][1]; |
| 992 | data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0); | 1017 | offset[2] = adev->reg_offset[UVD_HWIP][1][1]; |
| 993 | data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0); | ||
| 994 | } else { | ||
| 995 | data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); | ||
| 996 | data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); | ||
| 997 | data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); | ||
| 998 | data[3] = PACKET0(mmUVD_NO_OP, 0); | ||
| 999 | } | 1018 | } |
| 1000 | 1019 | ||
| 1020 | data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0); | ||
| 1021 | data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0); | ||
| 1022 | data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0); | ||
| 1023 | data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0); | ||
| 1024 | |||
| 1001 | ib = &job->ibs[0]; | 1025 | ib = &job->ibs[0]; |
| 1002 | addr = amdgpu_bo_gpu_offset(bo); | 1026 | addr = amdgpu_bo_gpu_offset(bo); |
| 1003 | ib->ptr[0] = data[0]; | 1027 | ib->ptr[0] = data[0]; |
| @@ -1033,7 +1057,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, | |||
| 1033 | if (r) | 1057 | if (r) |
| 1034 | goto err_free; | 1058 | goto err_free; |
| 1035 | 1059 | ||
| 1036 | r = amdgpu_job_submit(job, ring, &adev->uvd.entity, | 1060 | r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity, |
| 1037 | AMDGPU_FENCE_OWNER_UNDEFINED, &f); | 1061 | AMDGPU_FENCE_OWNER_UNDEFINED, &f); |
| 1038 | if (r) | 1062 | if (r) |
| 1039 | goto err_free; | 1063 | goto err_free; |
| @@ -1121,8 +1145,15 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | |||
| 1121 | static void amdgpu_uvd_idle_work_handler(struct work_struct *work) | 1145 | static void amdgpu_uvd_idle_work_handler(struct work_struct *work) |
| 1122 | { | 1146 | { |
| 1123 | struct amdgpu_device *adev = | 1147 | struct amdgpu_device *adev = |
| 1124 | container_of(work, struct amdgpu_device, uvd.idle_work.work); | 1148 | container_of(work, struct amdgpu_device, uvd.inst->idle_work.work); |
| 1125 | unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); | 1149 | unsigned fences = 0, i, j; |
| 1150 | |||
| 1151 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { | ||
| 1152 | fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring); | ||
| 1153 | for (j = 0; j < adev->uvd.num_enc_rings; ++j) { | ||
| 1154 | fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]); | ||
| 1155 | } | ||
| 1156 | } | ||
| 1126 | 1157 | ||
| 1127 | if (fences == 0) { | 1158 | if (fences == 0) { |
| 1128 | if (adev->pm.dpm_enabled) { | 1159 | if (adev->pm.dpm_enabled) { |
| @@ -1136,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) | |||
| 1136 | AMD_CG_STATE_GATE); | 1167 | AMD_CG_STATE_GATE); |
| 1137 | } | 1168 | } |
| 1138 | } else { | 1169 | } else { |
| 1139 | schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT); | 1170 | schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT); |
| 1140 | } | 1171 | } |
| 1141 | } | 1172 | } |
| 1142 | 1173 | ||
| @@ -1148,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) | |||
| 1148 | if (amdgpu_sriov_vf(adev)) | 1179 | if (amdgpu_sriov_vf(adev)) |
| 1149 | return; | 1180 | return; |
| 1150 | 1181 | ||
| 1151 | set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); | 1182 | set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work); |
| 1152 | if (set_clocks) { | 1183 | if (set_clocks) { |
| 1153 | if (adev->pm.dpm_enabled) { | 1184 | if (adev->pm.dpm_enabled) { |
| 1154 | amdgpu_dpm_enable_uvd(adev, true); | 1185 | amdgpu_dpm_enable_uvd(adev, true); |
| @@ -1165,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) | |||
| 1165 | void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) | 1196 | void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) |
| 1166 | { | 1197 | { |
| 1167 | if (!amdgpu_sriov_vf(ring->adev)) | 1198 | if (!amdgpu_sriov_vf(ring->adev)) |
| 1168 | schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); | 1199 | schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT); |
| 1169 | } | 1200 | } |
| 1170 | 1201 | ||
| 1171 | /** | 1202 | /** |
| @@ -1179,27 +1210,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
| 1179 | { | 1210 | { |
| 1180 | struct dma_fence *fence; | 1211 | struct dma_fence *fence; |
| 1181 | long r; | 1212 | long r; |
| 1213 | uint32_t ip_instance = ring->me; | ||
| 1182 | 1214 | ||
| 1183 | r = amdgpu_uvd_get_create_msg(ring, 1, NULL); | 1215 | r = amdgpu_uvd_get_create_msg(ring, 1, NULL); |
| 1184 | if (r) { | 1216 | if (r) { |
| 1185 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | 1217 | DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); |
| 1186 | goto error; | 1218 | goto error; |
| 1187 | } | 1219 | } |
| 1188 | 1220 | ||
| 1189 | r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); | 1221 | r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); |
| 1190 | if (r) { | 1222 | if (r) { |
| 1191 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | 1223 | DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); |
| 1192 | goto error; | 1224 | goto error; |
| 1193 | } | 1225 | } |
| 1194 | 1226 | ||
| 1195 | r = dma_fence_wait_timeout(fence, false, timeout); | 1227 | r = dma_fence_wait_timeout(fence, false, timeout); |
| 1196 | if (r == 0) { | 1228 | if (r == 0) { |
| 1197 | DRM_ERROR("amdgpu: IB test timed out.\n"); | 1229 | DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); |
| 1198 | r = -ETIMEDOUT; | 1230 | r = -ETIMEDOUT; |
| 1199 | } else if (r < 0) { | 1231 | } else if (r < 0) { |
| 1200 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | 1232 | DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); |
| 1201 | } else { | 1233 | } else { |
| 1202 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | 1234 | DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); |
| 1203 | r = 0; | 1235 | r = 0; |
| 1204 | } | 1236 | } |
| 1205 | 1237 | ||
| @@ -1227,7 +1259,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev) | |||
| 1227 | * necessarily linear. So we need to count | 1259 | * necessarily linear. So we need to count |
| 1228 | * all non-zero handles. | 1260 | * all non-zero handles. |
| 1229 | */ | 1261 | */ |
| 1230 | if (atomic_read(&adev->uvd.handles[i])) | 1262 | if (atomic_read(&adev->uvd.inst->handles[i])) |
| 1231 | used_handles++; | 1263 | used_handles++; |
| 1232 | } | 1264 | } |
| 1233 | 1265 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 32ea20b99e53..b1579fba134c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | |||
| @@ -31,30 +31,37 @@ | |||
| 31 | #define AMDGPU_UVD_SESSION_SIZE (50*1024) | 31 | #define AMDGPU_UVD_SESSION_SIZE (50*1024) |
| 32 | #define AMDGPU_UVD_FIRMWARE_OFFSET 256 | 32 | #define AMDGPU_UVD_FIRMWARE_OFFSET 256 |
| 33 | 33 | ||
| 34 | #define AMDGPU_MAX_UVD_INSTANCES 2 | ||
| 35 | |||
| 34 | #define AMDGPU_UVD_FIRMWARE_SIZE(adev) \ | 36 | #define AMDGPU_UVD_FIRMWARE_SIZE(adev) \ |
| 35 | (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \ | 37 | (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \ |
| 36 | 8) - AMDGPU_UVD_FIRMWARE_OFFSET) | 38 | 8) - AMDGPU_UVD_FIRMWARE_OFFSET) |
| 37 | 39 | ||
| 38 | struct amdgpu_uvd { | 40 | struct amdgpu_uvd_inst { |
| 39 | struct amdgpu_bo *vcpu_bo; | 41 | struct amdgpu_bo *vcpu_bo; |
| 40 | void *cpu_addr; | 42 | void *cpu_addr; |
| 41 | uint64_t gpu_addr; | 43 | uint64_t gpu_addr; |
| 42 | unsigned fw_version; | ||
| 43 | void *saved_bo; | 44 | void *saved_bo; |
| 44 | unsigned max_handles; | ||
| 45 | atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; | 45 | atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; |
| 46 | struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; | 46 | struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; |
| 47 | struct delayed_work idle_work; | 47 | struct delayed_work idle_work; |
| 48 | const struct firmware *fw; /* UVD firmware */ | ||
| 49 | struct amdgpu_ring ring; | 48 | struct amdgpu_ring ring; |
| 50 | struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS]; | 49 | struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS]; |
| 51 | struct amdgpu_irq_src irq; | 50 | struct amdgpu_irq_src irq; |
| 52 | bool address_64_bit; | ||
| 53 | bool use_ctx_buf; | ||
| 54 | struct drm_sched_entity entity; | 51 | struct drm_sched_entity entity; |
| 55 | struct drm_sched_entity entity_enc; | 52 | struct drm_sched_entity entity_enc; |
| 56 | uint32_t srbm_soft_reset; | 53 | uint32_t srbm_soft_reset; |
| 54 | }; | ||
| 55 | |||
| 56 | struct amdgpu_uvd { | ||
| 57 | const struct firmware *fw; /* UVD firmware */ | ||
| 58 | unsigned fw_version; | ||
| 59 | unsigned max_handles; | ||
| 57 | unsigned num_enc_rings; | 60 | unsigned num_enc_rings; |
| 61 | uint8_t num_uvd_inst; | ||
| 62 | bool address_64_bit; | ||
| 63 | bool use_ctx_buf; | ||
| 64 | struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES]; | ||
| 58 | }; | 65 | }; |
| 59 | 66 | ||
| 60 | int amdgpu_uvd_sw_init(struct amdgpu_device *adev); | 67 | int amdgpu_uvd_sw_init(struct amdgpu_device *adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a33804bd3314..23d960ec1cf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | |||
| @@ -51,11 +51,13 @@ | |||
| 51 | #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" | 51 | #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" |
| 52 | #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" | 52 | #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" |
| 53 | #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" | 53 | #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" |
| 54 | #define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" | 54 | #define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" |
| 55 | #define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" | 55 | #define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" |
| 56 | #define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin" | ||
| 56 | 57 | ||
| 57 | #define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" | 58 | #define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" |
| 58 | #define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" | 59 | #define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" |
| 60 | #define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin" | ||
| 59 | 61 | ||
| 60 | #ifdef CONFIG_DRM_AMDGPU_CIK | 62 | #ifdef CONFIG_DRM_AMDGPU_CIK |
| 61 | MODULE_FIRMWARE(FIRMWARE_BONAIRE); | 63 | MODULE_FIRMWARE(FIRMWARE_BONAIRE); |
| @@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); | |||
| 71 | MODULE_FIRMWARE(FIRMWARE_POLARIS10); | 73 | MODULE_FIRMWARE(FIRMWARE_POLARIS10); |
| 72 | MODULE_FIRMWARE(FIRMWARE_POLARIS11); | 74 | MODULE_FIRMWARE(FIRMWARE_POLARIS11); |
| 73 | MODULE_FIRMWARE(FIRMWARE_POLARIS12); | 75 | MODULE_FIRMWARE(FIRMWARE_POLARIS12); |
| 76 | MODULE_FIRMWARE(FIRMWARE_VEGAM); | ||
| 74 | 77 | ||
| 75 | MODULE_FIRMWARE(FIRMWARE_VEGA10); | 78 | MODULE_FIRMWARE(FIRMWARE_VEGA10); |
| 76 | MODULE_FIRMWARE(FIRMWARE_VEGA12); | 79 | MODULE_FIRMWARE(FIRMWARE_VEGA12); |
| 80 | MODULE_FIRMWARE(FIRMWARE_VEGA20); | ||
| 77 | 81 | ||
| 78 | static void amdgpu_vce_idle_work_handler(struct work_struct *work); | 82 | static void amdgpu_vce_idle_work_handler(struct work_struct *work); |
| 79 | 83 | ||
| @@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) | |||
| 132 | case CHIP_POLARIS12: | 136 | case CHIP_POLARIS12: |
| 133 | fw_name = FIRMWARE_POLARIS12; | 137 | fw_name = FIRMWARE_POLARIS12; |
| 134 | break; | 138 | break; |
| 139 | case CHIP_VEGAM: | ||
| 140 | fw_name = FIRMWARE_VEGAM; | ||
| 141 | break; | ||
| 135 | case CHIP_VEGA10: | 142 | case CHIP_VEGA10: |
| 136 | fw_name = FIRMWARE_VEGA10; | 143 | fw_name = FIRMWARE_VEGA10; |
| 137 | break; | 144 | break; |
| 138 | case CHIP_VEGA12: | 145 | case CHIP_VEGA12: |
| 139 | fw_name = FIRMWARE_VEGA12; | 146 | fw_name = FIRMWARE_VEGA12; |
| 140 | break; | 147 | break; |
| 148 | case CHIP_VEGA20: | ||
| 149 | fw_name = FIRMWARE_VEGA20; | ||
| 150 | break; | ||
| 141 | 151 | ||
| 142 | default: | 152 | default: |
| 143 | return -EINVAL; | 153 | return -EINVAL; |
| @@ -181,7 +191,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) | |||
| 181 | ring = &adev->vce.ring[0]; | 191 | ring = &adev->vce.ring[0]; |
| 182 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; | 192 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; |
| 183 | r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, | 193 | r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, |
| 184 | rq, amdgpu_sched_jobs, NULL); | 194 | rq, NULL); |
| 185 | if (r != 0) { | 195 | if (r != 0) { |
| 186 | DRM_ERROR("Failed setting up VCE run queue.\n"); | 196 | DRM_ERROR("Failed setting up VCE run queue.\n"); |
| 187 | return r; | 197 | return r; |
| @@ -755,6 +765,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) | |||
| 755 | if (r) | 765 | if (r) |
| 756 | goto out; | 766 | goto out; |
| 757 | break; | 767 | break; |
| 768 | |||
| 769 | case 0x0500000d: /* MV buffer */ | ||
| 770 | r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, | ||
| 771 | idx + 2, 0, 0); | ||
| 772 | if (r) | ||
| 773 | goto out; | ||
| 774 | |||
| 775 | r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, | ||
| 776 | idx + 7, 0, 0); | ||
| 777 | if (r) | ||
| 778 | goto out; | ||
| 779 | break; | ||
| 758 | } | 780 | } |
| 759 | 781 | ||
| 760 | idx += len / 4; | 782 | idx += len / 4; |
| @@ -860,6 +882,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) | |||
| 860 | goto out; | 882 | goto out; |
| 861 | break; | 883 | break; |
| 862 | 884 | ||
| 885 | case 0x0500000d: /* MV buffer */ | ||
| 886 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, | ||
| 887 | idx + 2, *size, 0); | ||
| 888 | if (r) | ||
| 889 | goto out; | ||
| 890 | |||
| 891 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, | ||
| 892 | idx + 7, *size / 12, 0); | ||
| 893 | if (r) | ||
| 894 | goto out; | ||
| 895 | break; | ||
| 896 | |||
| 863 | default: | 897 | default: |
| 864 | DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); | 898 | DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); |
| 865 | r = -EINVAL; | 899 | r = -EINVAL; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 58e495330b38..8851bcdfc260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | |||
| @@ -105,7 +105,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) | |||
| 105 | ring = &adev->vcn.ring_dec; | 105 | ring = &adev->vcn.ring_dec; |
| 106 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; | 106 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; |
| 107 | r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, | 107 | r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, |
| 108 | rq, amdgpu_sched_jobs, NULL); | 108 | rq, NULL); |
| 109 | if (r != 0) { | 109 | if (r != 0) { |
| 110 | DRM_ERROR("Failed setting up VCN dec run queue.\n"); | 110 | DRM_ERROR("Failed setting up VCN dec run queue.\n"); |
| 111 | return r; | 111 | return r; |
| @@ -114,7 +114,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) | |||
| 114 | ring = &adev->vcn.ring_enc[0]; | 114 | ring = &adev->vcn.ring_enc[0]; |
| 115 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; | 115 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; |
| 116 | r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, | 116 | r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, |
| 117 | rq, amdgpu_sched_jobs, NULL); | 117 | rq, NULL); |
| 118 | if (r != 0) { | 118 | if (r != 0) { |
| 119 | DRM_ERROR("Failed setting up VCN enc run queue.\n"); | 119 | DRM_ERROR("Failed setting up VCN enc run queue.\n"); |
| 120 | return r; | 120 | return r; |
| @@ -205,13 +205,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) | |||
| 205 | struct amdgpu_device *adev = | 205 | struct amdgpu_device *adev = |
| 206 | container_of(work, struct amdgpu_device, vcn.idle_work.work); | 206 | container_of(work, struct amdgpu_device, vcn.idle_work.work); |
| 207 | unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); | 207 | unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); |
| 208 | unsigned i; | ||
| 209 | |||
| 210 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { | ||
| 211 | fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); | ||
| 212 | } | ||
| 208 | 213 | ||
| 209 | if (fences == 0) { | 214 | if (fences == 0) { |
| 210 | if (adev->pm.dpm_enabled) { | 215 | if (adev->pm.dpm_enabled) |
| 211 | /* might be used when with pg/cg | ||
| 212 | amdgpu_dpm_enable_uvd(adev, false); | 216 | amdgpu_dpm_enable_uvd(adev, false); |
| 213 | */ | 217 | else |
| 214 | } | 218 | amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, |
| 219 | AMD_PG_STATE_GATE); | ||
| 215 | } else { | 220 | } else { |
| 216 | schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); | 221 | schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); |
| 217 | } | 222 | } |
| @@ -223,9 +228,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) | |||
| 223 | bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); | 228 | bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); |
| 224 | 229 | ||
| 225 | if (set_clocks && adev->pm.dpm_enabled) { | 230 | if (set_clocks && adev->pm.dpm_enabled) { |
| 226 | /* might be used when with pg/cg | 231 | if (adev->pm.dpm_enabled) |
| 227 | amdgpu_dpm_enable_uvd(adev, true); | 232 | amdgpu_dpm_enable_uvd(adev, true); |
| 228 | */ | 233 | else |
| 234 | amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, | ||
| 235 | AMD_PG_STATE_UNGATE); | ||
| 229 | } | 236 | } |
| 230 | } | 237 | } |
| 231 | 238 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 2fd7db891689..181e6afa9847 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | |||
| @@ -45,6 +45,17 @@ | |||
| 45 | #define VCN_ENC_CMD_REG_WRITE 0x0000000b | 45 | #define VCN_ENC_CMD_REG_WRITE 0x0000000b |
| 46 | #define VCN_ENC_CMD_REG_WAIT 0x0000000c | 46 | #define VCN_ENC_CMD_REG_WAIT 0x0000000c |
| 47 | 47 | ||
| 48 | enum engine_status_constants { | ||
| 49 | UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, | ||
| 50 | UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, | ||
| 51 | UVD_STATUS__UVD_BUSY = 0x00000004, | ||
| 52 | GB_ADDR_CONFIG_DEFAULT = 0x26010011, | ||
| 53 | UVD_STATUS__IDLE = 0x2, | ||
| 54 | UVD_STATUS__BUSY = 0x5, | ||
| 55 | UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1, | ||
| 56 | UVD_STATUS__RBC_BUSY = 0x1, | ||
| 57 | }; | ||
| 58 | |||
| 48 | struct amdgpu_vcn { | 59 | struct amdgpu_vcn { |
| 49 | struct amdgpu_bo *vcpu_bo; | 60 | struct amdgpu_bo *vcpu_bo; |
| 50 | void *cpu_addr; | 61 | void *cpu_addr; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index da55a78d7380..ccba88cc8c54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
| @@ -94,6 +94,34 @@ struct amdgpu_prt_cb { | |||
| 94 | struct dma_fence_cb cb; | 94 | struct dma_fence_cb cb; |
| 95 | }; | 95 | }; |
| 96 | 96 | ||
| 97 | static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, | ||
| 98 | struct amdgpu_vm *vm, | ||
| 99 | struct amdgpu_bo *bo) | ||
| 100 | { | ||
| 101 | base->vm = vm; | ||
| 102 | base->bo = bo; | ||
| 103 | INIT_LIST_HEAD(&base->bo_list); | ||
| 104 | INIT_LIST_HEAD(&base->vm_status); | ||
| 105 | |||
| 106 | if (!bo) | ||
| 107 | return; | ||
| 108 | list_add_tail(&base->bo_list, &bo->va); | ||
| 109 | |||
| 110 | if (bo->tbo.resv != vm->root.base.bo->tbo.resv) | ||
| 111 | return; | ||
| 112 | |||
| 113 | if (bo->preferred_domains & | ||
| 114 | amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) | ||
| 115 | return; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * we checked all the prerequisites, but it looks like this per vm bo | ||
| 119 | * is currently evicted. add the bo to the evicted list to make sure it | ||
| 120 | * is validated on next vm use to avoid fault. | ||
| 121 | * */ | ||
| 122 | list_move_tail(&base->vm_status, &vm->evicted); | ||
| 123 | } | ||
| 124 | |||
| 97 | /** | 125 | /** |
| 98 | * amdgpu_vm_level_shift - return the addr shift for each level | 126 | * amdgpu_vm_level_shift - return the addr shift for each level |
| 99 | * | 127 | * |
| @@ -196,24 +224,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 196 | void *param) | 224 | void *param) |
| 197 | { | 225 | { |
| 198 | struct ttm_bo_global *glob = adev->mman.bdev.glob; | 226 | struct ttm_bo_global *glob = adev->mman.bdev.glob; |
| 199 | int r; | 227 | struct amdgpu_vm_bo_base *bo_base, *tmp; |
| 228 | int r = 0; | ||
| 200 | 229 | ||
| 201 | spin_lock(&vm->status_lock); | 230 | list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { |
| 202 | while (!list_empty(&vm->evicted)) { | 231 | struct amdgpu_bo *bo = bo_base->bo; |
| 203 | struct amdgpu_vm_bo_base *bo_base; | ||
| 204 | struct amdgpu_bo *bo; | ||
| 205 | 232 | ||
| 206 | bo_base = list_first_entry(&vm->evicted, | ||
| 207 | struct amdgpu_vm_bo_base, | ||
| 208 | vm_status); | ||
| 209 | spin_unlock(&vm->status_lock); | ||
| 210 | |||
| 211 | bo = bo_base->bo; | ||
| 212 | BUG_ON(!bo); | ||
| 213 | if (bo->parent) { | 233 | if (bo->parent) { |
| 214 | r = validate(param, bo); | 234 | r = validate(param, bo); |
| 215 | if (r) | 235 | if (r) |
| 216 | return r; | 236 | break; |
| 217 | 237 | ||
| 218 | spin_lock(&glob->lru_lock); | 238 | spin_lock(&glob->lru_lock); |
| 219 | ttm_bo_move_to_lru_tail(&bo->tbo); | 239 | ttm_bo_move_to_lru_tail(&bo->tbo); |
| @@ -222,22 +242,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 222 | spin_unlock(&glob->lru_lock); | 242 | spin_unlock(&glob->lru_lock); |
| 223 | } | 243 | } |
| 224 | 244 | ||
| 225 | if (bo->tbo.type == ttm_bo_type_kernel && | 245 | if (bo->tbo.type != ttm_bo_type_kernel) { |
| 226 | vm->use_cpu_for_update) { | 246 | spin_lock(&vm->moved_lock); |
| 227 | r = amdgpu_bo_kmap(bo, NULL); | ||
| 228 | if (r) | ||
| 229 | return r; | ||
| 230 | } | ||
| 231 | |||
| 232 | spin_lock(&vm->status_lock); | ||
| 233 | if (bo->tbo.type != ttm_bo_type_kernel) | ||
| 234 | list_move(&bo_base->vm_status, &vm->moved); | 247 | list_move(&bo_base->vm_status, &vm->moved); |
| 235 | else | 248 | spin_unlock(&vm->moved_lock); |
| 249 | } else { | ||
| 236 | list_move(&bo_base->vm_status, &vm->relocated); | 250 | list_move(&bo_base->vm_status, &vm->relocated); |
| 251 | } | ||
| 237 | } | 252 | } |
| 238 | spin_unlock(&vm->status_lock); | ||
| 239 | 253 | ||
| 240 | return 0; | 254 | spin_lock(&glob->lru_lock); |
| 255 | list_for_each_entry(bo_base, &vm->idle, vm_status) { | ||
| 256 | struct amdgpu_bo *bo = bo_base->bo; | ||
| 257 | |||
| 258 | if (!bo->parent) | ||
| 259 | continue; | ||
| 260 | |||
| 261 | ttm_bo_move_to_lru_tail(&bo->tbo); | ||
| 262 | if (bo->shadow) | ||
| 263 | ttm_bo_move_to_lru_tail(&bo->shadow->tbo); | ||
| 264 | } | ||
| 265 | spin_unlock(&glob->lru_lock); | ||
| 266 | |||
| 267 | return r; | ||
| 241 | } | 268 | } |
| 242 | 269 | ||
| 243 | /** | 270 | /** |
| @@ -249,13 +276,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 249 | */ | 276 | */ |
| 250 | bool amdgpu_vm_ready(struct amdgpu_vm *vm) | 277 | bool amdgpu_vm_ready(struct amdgpu_vm *vm) |
| 251 | { | 278 | { |
| 252 | bool ready; | 279 | return list_empty(&vm->evicted); |
| 253 | |||
| 254 | spin_lock(&vm->status_lock); | ||
| 255 | ready = list_empty(&vm->evicted); | ||
| 256 | spin_unlock(&vm->status_lock); | ||
| 257 | |||
| 258 | return ready; | ||
| 259 | } | 280 | } |
| 260 | 281 | ||
| 261 | /** | 282 | /** |
| @@ -412,11 +433,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
| 412 | struct amdgpu_bo *pt; | 433 | struct amdgpu_bo *pt; |
| 413 | 434 | ||
| 414 | if (!entry->base.bo) { | 435 | if (!entry->base.bo) { |
| 415 | r = amdgpu_bo_create(adev, | 436 | struct amdgpu_bo_param bp; |
| 416 | amdgpu_vm_bo_size(adev, level), | 437 | |
| 417 | AMDGPU_GPU_PAGE_SIZE, | 438 | memset(&bp, 0, sizeof(bp)); |
| 418 | AMDGPU_GEM_DOMAIN_VRAM, flags, | 439 | bp.size = amdgpu_vm_bo_size(adev, level); |
| 419 | ttm_bo_type_kernel, resv, &pt); | 440 | bp.byte_align = AMDGPU_GPU_PAGE_SIZE; |
| 441 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; | ||
| 442 | bp.flags = flags; | ||
| 443 | bp.type = ttm_bo_type_kernel; | ||
| 444 | bp.resv = resv; | ||
| 445 | r = amdgpu_bo_create(adev, &bp, &pt); | ||
| 420 | if (r) | 446 | if (r) |
| 421 | return r; | 447 | return r; |
| 422 | 448 | ||
| @@ -441,12 +467,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
| 441 | */ | 467 | */ |
| 442 | pt->parent = amdgpu_bo_ref(parent->base.bo); | 468 | pt->parent = amdgpu_bo_ref(parent->base.bo); |
| 443 | 469 | ||
| 444 | entry->base.vm = vm; | 470 | amdgpu_vm_bo_base_init(&entry->base, vm, pt); |
| 445 | entry->base.bo = pt; | 471 | list_move(&entry->base.vm_status, &vm->relocated); |
| 446 | list_add_tail(&entry->base.bo_list, &pt->va); | ||
| 447 | spin_lock(&vm->status_lock); | ||
| 448 | list_add(&entry->base.vm_status, &vm->relocated); | ||
| 449 | spin_unlock(&vm->status_lock); | ||
| 450 | } | 472 | } |
| 451 | 473 | ||
| 452 | if (level < AMDGPU_VM_PTB) { | 474 | if (level < AMDGPU_VM_PTB) { |
| @@ -628,7 +650,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ | |||
| 628 | amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); | 650 | amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); |
| 629 | 651 | ||
| 630 | if (vm_flush_needed || pasid_mapping_needed) { | 652 | if (vm_flush_needed || pasid_mapping_needed) { |
| 631 | r = amdgpu_fence_emit(ring, &fence); | 653 | r = amdgpu_fence_emit(ring, &fence, 0); |
| 632 | if (r) | 654 | if (r) |
| 633 | return r; | 655 | return r; |
| 634 | } | 656 | } |
| @@ -893,10 +915,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, | |||
| 893 | if (!entry->base.bo) | 915 | if (!entry->base.bo) |
| 894 | continue; | 916 | continue; |
| 895 | 917 | ||
| 896 | spin_lock(&vm->status_lock); | 918 | if (!entry->base.moved) |
| 897 | if (list_empty(&entry->base.vm_status)) | 919 | list_move(&entry->base.vm_status, &vm->relocated); |
| 898 | list_add(&entry->base.vm_status, &vm->relocated); | ||
| 899 | spin_unlock(&vm->status_lock); | ||
| 900 | amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); | 920 | amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); |
| 901 | } | 921 | } |
| 902 | } | 922 | } |
| @@ -926,6 +946,14 @@ restart: | |||
| 926 | params.adev = adev; | 946 | params.adev = adev; |
| 927 | 947 | ||
| 928 | if (vm->use_cpu_for_update) { | 948 | if (vm->use_cpu_for_update) { |
| 949 | struct amdgpu_vm_bo_base *bo_base; | ||
| 950 | |||
| 951 | list_for_each_entry(bo_base, &vm->relocated, vm_status) { | ||
| 952 | r = amdgpu_bo_kmap(bo_base->bo, NULL); | ||
| 953 | if (unlikely(r)) | ||
| 954 | return r; | ||
| 955 | } | ||
| 956 | |||
| 929 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); | 957 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); |
| 930 | if (unlikely(r)) | 958 | if (unlikely(r)) |
| 931 | return r; | 959 | return r; |
| @@ -941,7 +969,6 @@ restart: | |||
| 941 | params.func = amdgpu_vm_do_set_ptes; | 969 | params.func = amdgpu_vm_do_set_ptes; |
| 942 | } | 970 | } |
| 943 | 971 | ||
| 944 | spin_lock(&vm->status_lock); | ||
| 945 | while (!list_empty(&vm->relocated)) { | 972 | while (!list_empty(&vm->relocated)) { |
| 946 | struct amdgpu_vm_bo_base *bo_base, *parent; | 973 | struct amdgpu_vm_bo_base *bo_base, *parent; |
| 947 | struct amdgpu_vm_pt *pt, *entry; | 974 | struct amdgpu_vm_pt *pt, *entry; |
| @@ -950,14 +977,12 @@ restart: | |||
| 950 | bo_base = list_first_entry(&vm->relocated, | 977 | bo_base = list_first_entry(&vm->relocated, |
| 951 | struct amdgpu_vm_bo_base, | 978 | struct amdgpu_vm_bo_base, |
| 952 | vm_status); | 979 | vm_status); |
| 953 | list_del_init(&bo_base->vm_status); | 980 | bo_base->moved = false; |
| 954 | spin_unlock(&vm->status_lock); | 981 | list_move(&bo_base->vm_status, &vm->idle); |
| 955 | 982 | ||
| 956 | bo = bo_base->bo->parent; | 983 | bo = bo_base->bo->parent; |
| 957 | if (!bo) { | 984 | if (!bo) |
| 958 | spin_lock(&vm->status_lock); | ||
| 959 | continue; | 985 | continue; |
| 960 | } | ||
| 961 | 986 | ||
| 962 | parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, | 987 | parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, |
| 963 | bo_list); | 988 | bo_list); |
| @@ -966,12 +991,10 @@ restart: | |||
| 966 | 991 | ||
| 967 | amdgpu_vm_update_pde(¶ms, vm, pt, entry); | 992 | amdgpu_vm_update_pde(¶ms, vm, pt, entry); |
| 968 | 993 | ||
| 969 | spin_lock(&vm->status_lock); | ||
| 970 | if (!vm->use_cpu_for_update && | 994 | if (!vm->use_cpu_for_update && |
| 971 | (ndw - params.ib->length_dw) < 32) | 995 | (ndw - params.ib->length_dw) < 32) |
| 972 | break; | 996 | break; |
| 973 | } | 997 | } |
| 974 | spin_unlock(&vm->status_lock); | ||
| 975 | 998 | ||
| 976 | if (vm->use_cpu_for_update) { | 999 | if (vm->use_cpu_for_update) { |
| 977 | /* Flush HDP */ | 1000 | /* Flush HDP */ |
| @@ -1074,9 +1097,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | |||
| 1074 | if (entry->huge) { | 1097 | if (entry->huge) { |
| 1075 | /* Add the entry to the relocated list to update it. */ | 1098 | /* Add the entry to the relocated list to update it. */ |
| 1076 | entry->huge = false; | 1099 | entry->huge = false; |
| 1077 | spin_lock(&p->vm->status_lock); | ||
| 1078 | list_move(&entry->base.vm_status, &p->vm->relocated); | 1100 | list_move(&entry->base.vm_status, &p->vm->relocated); |
| 1079 | spin_unlock(&p->vm->status_lock); | ||
| 1080 | } | 1101 | } |
| 1081 | return; | 1102 | return; |
| 1082 | } | 1103 | } |
| @@ -1555,9 +1576,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
| 1555 | amdgpu_asic_flush_hdp(adev, NULL); | 1576 | amdgpu_asic_flush_hdp(adev, NULL); |
| 1556 | } | 1577 | } |
| 1557 | 1578 | ||
| 1558 | spin_lock(&vm->status_lock); | 1579 | spin_lock(&vm->moved_lock); |
| 1559 | list_del_init(&bo_va->base.vm_status); | 1580 | list_del_init(&bo_va->base.vm_status); |
| 1560 | spin_unlock(&vm->status_lock); | 1581 | spin_unlock(&vm->moved_lock); |
| 1582 | |||
| 1583 | /* If the BO is not in its preferred location add it back to | ||
| 1584 | * the evicted list so that it gets validated again on the | ||
| 1585 | * next command submission. | ||
| 1586 | */ | ||
| 1587 | if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { | ||
| 1588 | uint32_t mem_type = bo->tbo.mem.mem_type; | ||
| 1589 | |||
| 1590 | if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) | ||
| 1591 | list_add_tail(&bo_va->base.vm_status, &vm->evicted); | ||
| 1592 | else | ||
| 1593 | list_add(&bo_va->base.vm_status, &vm->idle); | ||
| 1594 | } | ||
| 1561 | 1595 | ||
| 1562 | list_splice_init(&bo_va->invalids, &bo_va->valids); | 1596 | list_splice_init(&bo_va->invalids, &bo_va->valids); |
| 1563 | bo_va->cleared = clear; | 1597 | bo_va->cleared = clear; |
| @@ -1766,19 +1800,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
| 1766 | int amdgpu_vm_handle_moved(struct amdgpu_device *adev, | 1800 | int amdgpu_vm_handle_moved(struct amdgpu_device *adev, |
| 1767 | struct amdgpu_vm *vm) | 1801 | struct amdgpu_vm *vm) |
| 1768 | { | 1802 | { |
| 1803 | struct amdgpu_bo_va *bo_va, *tmp; | ||
| 1804 | struct list_head moved; | ||
| 1769 | bool clear; | 1805 | bool clear; |
| 1770 | int r = 0; | 1806 | int r; |
| 1771 | |||
| 1772 | spin_lock(&vm->status_lock); | ||
| 1773 | while (!list_empty(&vm->moved)) { | ||
| 1774 | struct amdgpu_bo_va *bo_va; | ||
| 1775 | struct reservation_object *resv; | ||
| 1776 | 1807 | ||
| 1777 | bo_va = list_first_entry(&vm->moved, | 1808 | INIT_LIST_HEAD(&moved); |
| 1778 | struct amdgpu_bo_va, base.vm_status); | 1809 | spin_lock(&vm->moved_lock); |
| 1779 | spin_unlock(&vm->status_lock); | 1810 | list_splice_init(&vm->moved, &moved); |
| 1811 | spin_unlock(&vm->moved_lock); | ||
| 1780 | 1812 | ||
| 1781 | resv = bo_va->base.bo->tbo.resv; | 1813 | list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) { |
| 1814 | struct reservation_object *resv = bo_va->base.bo->tbo.resv; | ||
| 1782 | 1815 | ||
| 1783 | /* Per VM BOs never need to bo cleared in the page tables */ | 1816 | /* Per VM BOs never need to bo cleared in the page tables */ |
| 1784 | if (resv == vm->root.base.bo->tbo.resv) | 1817 | if (resv == vm->root.base.bo->tbo.resv) |
| @@ -1791,17 +1824,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, | |||
| 1791 | clear = true; | 1824 | clear = true; |
| 1792 | 1825 | ||
| 1793 | r = amdgpu_vm_bo_update(adev, bo_va, clear); | 1826 | r = amdgpu_vm_bo_update(adev, bo_va, clear); |
| 1794 | if (r) | 1827 | if (r) { |
| 1828 | spin_lock(&vm->moved_lock); | ||
| 1829 | list_splice(&moved, &vm->moved); | ||
| 1830 | spin_unlock(&vm->moved_lock); | ||
| 1795 | return r; | 1831 | return r; |
| 1832 | } | ||
| 1796 | 1833 | ||
| 1797 | if (!clear && resv != vm->root.base.bo->tbo.resv) | 1834 | if (!clear && resv != vm->root.base.bo->tbo.resv) |
| 1798 | reservation_object_unlock(resv); | 1835 | reservation_object_unlock(resv); |
| 1799 | 1836 | ||
| 1800 | spin_lock(&vm->status_lock); | ||
| 1801 | } | 1837 | } |
| 1802 | spin_unlock(&vm->status_lock); | ||
| 1803 | 1838 | ||
| 1804 | return r; | 1839 | return 0; |
| 1805 | } | 1840 | } |
| 1806 | 1841 | ||
| 1807 | /** | 1842 | /** |
| @@ -1827,36 +1862,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | |||
| 1827 | if (bo_va == NULL) { | 1862 | if (bo_va == NULL) { |
| 1828 | return NULL; | 1863 | return NULL; |
| 1829 | } | 1864 | } |
| 1830 | bo_va->base.vm = vm; | 1865 | amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); |
| 1831 | bo_va->base.bo = bo; | ||
| 1832 | INIT_LIST_HEAD(&bo_va->base.bo_list); | ||
| 1833 | INIT_LIST_HEAD(&bo_va->base.vm_status); | ||
| 1834 | 1866 | ||
| 1835 | bo_va->ref_count = 1; | 1867 | bo_va->ref_count = 1; |
| 1836 | INIT_LIST_HEAD(&bo_va->valids); | 1868 | INIT_LIST_HEAD(&bo_va->valids); |
| 1837 | INIT_LIST_HEAD(&bo_va->invalids); | 1869 | INIT_LIST_HEAD(&bo_va->invalids); |
| 1838 | 1870 | ||
| 1839 | if (!bo) | ||
| 1840 | return bo_va; | ||
| 1841 | |||
| 1842 | list_add_tail(&bo_va->base.bo_list, &bo->va); | ||
| 1843 | |||
| 1844 | if (bo->tbo.resv != vm->root.base.bo->tbo.resv) | ||
| 1845 | return bo_va; | ||
| 1846 | |||
| 1847 | if (bo->preferred_domains & | ||
| 1848 | amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) | ||
| 1849 | return bo_va; | ||
| 1850 | |||
| 1851 | /* | ||
| 1852 | * We checked all the prerequisites, but it looks like this per VM BO | ||
| 1853 | * is currently evicted. add the BO to the evicted list to make sure it | ||
| 1854 | * is validated on next VM use to avoid fault. | ||
| 1855 | * */ | ||
| 1856 | spin_lock(&vm->status_lock); | ||
| 1857 | list_move_tail(&bo_va->base.vm_status, &vm->evicted); | ||
| 1858 | spin_unlock(&vm->status_lock); | ||
| 1859 | |||
| 1860 | return bo_va; | 1871 | return bo_va; |
| 1861 | } | 1872 | } |
| 1862 | 1873 | ||
| @@ -1884,11 +1895,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, | |||
| 1884 | if (mapping->flags & AMDGPU_PTE_PRT) | 1895 | if (mapping->flags & AMDGPU_PTE_PRT) |
| 1885 | amdgpu_vm_prt_get(adev); | 1896 | amdgpu_vm_prt_get(adev); |
| 1886 | 1897 | ||
| 1887 | if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { | 1898 | if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && |
| 1888 | spin_lock(&vm->status_lock); | 1899 | !bo_va->base.moved) { |
| 1889 | if (list_empty(&bo_va->base.vm_status)) | 1900 | spin_lock(&vm->moved_lock); |
| 1890 | list_add(&bo_va->base.vm_status, &vm->moved); | 1901 | list_move(&bo_va->base.vm_status, &vm->moved); |
| 1891 | spin_unlock(&vm->status_lock); | 1902 | spin_unlock(&vm->moved_lock); |
| 1892 | } | 1903 | } |
| 1893 | trace_amdgpu_vm_bo_map(bo_va, mapping); | 1904 | trace_amdgpu_vm_bo_map(bo_va, mapping); |
| 1894 | } | 1905 | } |
| @@ -2198,9 +2209,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
| 2198 | 2209 | ||
| 2199 | list_del(&bo_va->base.bo_list); | 2210 | list_del(&bo_va->base.bo_list); |
| 2200 | 2211 | ||
| 2201 | spin_lock(&vm->status_lock); | 2212 | spin_lock(&vm->moved_lock); |
| 2202 | list_del(&bo_va->base.vm_status); | 2213 | list_del(&bo_va->base.vm_status); |
| 2203 | spin_unlock(&vm->status_lock); | 2214 | spin_unlock(&vm->moved_lock); |
| 2204 | 2215 | ||
| 2205 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { | 2216 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { |
| 2206 | list_del(&mapping->list); | 2217 | list_del(&mapping->list); |
| @@ -2234,33 +2245,34 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | |||
| 2234 | { | 2245 | { |
| 2235 | struct amdgpu_vm_bo_base *bo_base; | 2246 | struct amdgpu_vm_bo_base *bo_base; |
| 2236 | 2247 | ||
| 2248 | /* shadow bo doesn't have bo base, its validation needs its parent */ | ||
| 2249 | if (bo->parent && bo->parent->shadow == bo) | ||
| 2250 | bo = bo->parent; | ||
| 2251 | |||
| 2237 | list_for_each_entry(bo_base, &bo->va, bo_list) { | 2252 | list_for_each_entry(bo_base, &bo->va, bo_list) { |
| 2238 | struct amdgpu_vm *vm = bo_base->vm; | 2253 | struct amdgpu_vm *vm = bo_base->vm; |
| 2254 | bool was_moved = bo_base->moved; | ||
| 2239 | 2255 | ||
| 2240 | bo_base->moved = true; | 2256 | bo_base->moved = true; |
| 2241 | if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { | 2257 | if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { |
| 2242 | spin_lock(&bo_base->vm->status_lock); | ||
| 2243 | if (bo->tbo.type == ttm_bo_type_kernel) | 2258 | if (bo->tbo.type == ttm_bo_type_kernel) |
| 2244 | list_move(&bo_base->vm_status, &vm->evicted); | 2259 | list_move(&bo_base->vm_status, &vm->evicted); |
| 2245 | else | 2260 | else |
| 2246 | list_move_tail(&bo_base->vm_status, | 2261 | list_move_tail(&bo_base->vm_status, |
| 2247 | &vm->evicted); | 2262 | &vm->evicted); |
| 2248 | spin_unlock(&bo_base->vm->status_lock); | ||
| 2249 | continue; | 2263 | continue; |
| 2250 | } | 2264 | } |
| 2251 | 2265 | ||
| 2252 | if (bo->tbo.type == ttm_bo_type_kernel) { | 2266 | if (was_moved) |
| 2253 | spin_lock(&bo_base->vm->status_lock); | ||
| 2254 | if (list_empty(&bo_base->vm_status)) | ||
| 2255 | list_add(&bo_base->vm_status, &vm->relocated); | ||
| 2256 | spin_unlock(&bo_base->vm->status_lock); | ||
| 2257 | continue; | 2267 | continue; |
| 2258 | } | ||
| 2259 | 2268 | ||
| 2260 | spin_lock(&bo_base->vm->status_lock); | 2269 | if (bo->tbo.type == ttm_bo_type_kernel) { |
| 2261 | if (list_empty(&bo_base->vm_status)) | 2270 | list_move(&bo_base->vm_status, &vm->relocated); |
| 2262 | list_add(&bo_base->vm_status, &vm->moved); | 2271 | } else { |
| 2263 | spin_unlock(&bo_base->vm->status_lock); | 2272 | spin_lock(&bo_base->vm->moved_lock); |
| 2273 | list_move(&bo_base->vm_status, &vm->moved); | ||
| 2274 | spin_unlock(&bo_base->vm->moved_lock); | ||
| 2275 | } | ||
| 2264 | } | 2276 | } |
| 2265 | } | 2277 | } |
| 2266 | 2278 | ||
| @@ -2355,6 +2367,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, | |||
| 2355 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 2367 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
| 2356 | int vm_context, unsigned int pasid) | 2368 | int vm_context, unsigned int pasid) |
| 2357 | { | 2369 | { |
| 2370 | struct amdgpu_bo_param bp; | ||
| 2371 | struct amdgpu_bo *root; | ||
| 2358 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, | 2372 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, |
| 2359 | AMDGPU_VM_PTE_COUNT(adev) * 8); | 2373 | AMDGPU_VM_PTE_COUNT(adev) * 8); |
| 2360 | unsigned ring_instance; | 2374 | unsigned ring_instance; |
| @@ -2367,10 +2381,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 2367 | vm->va = RB_ROOT_CACHED; | 2381 | vm->va = RB_ROOT_CACHED; |
| 2368 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | 2382 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) |
| 2369 | vm->reserved_vmid[i] = NULL; | 2383 | vm->reserved_vmid[i] = NULL; |
| 2370 | spin_lock_init(&vm->status_lock); | ||
| 2371 | INIT_LIST_HEAD(&vm->evicted); | 2384 | INIT_LIST_HEAD(&vm->evicted); |
| 2372 | INIT_LIST_HEAD(&vm->relocated); | 2385 | INIT_LIST_HEAD(&vm->relocated); |
| 2386 | spin_lock_init(&vm->moved_lock); | ||
| 2373 | INIT_LIST_HEAD(&vm->moved); | 2387 | INIT_LIST_HEAD(&vm->moved); |
| 2388 | INIT_LIST_HEAD(&vm->idle); | ||
| 2374 | INIT_LIST_HEAD(&vm->freed); | 2389 | INIT_LIST_HEAD(&vm->freed); |
| 2375 | 2390 | ||
| 2376 | /* create scheduler entity for page table updates */ | 2391 | /* create scheduler entity for page table updates */ |
| @@ -2380,7 +2395,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 2380 | ring = adev->vm_manager.vm_pte_rings[ring_instance]; | 2395 | ring = adev->vm_manager.vm_pte_rings[ring_instance]; |
| 2381 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; | 2396 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; |
| 2382 | r = drm_sched_entity_init(&ring->sched, &vm->entity, | 2397 | r = drm_sched_entity_init(&ring->sched, &vm->entity, |
| 2383 | rq, amdgpu_sched_jobs, NULL); | 2398 | rq, NULL); |
| 2384 | if (r) | 2399 | if (r) |
| 2385 | return r; | 2400 | return r; |
| 2386 | 2401 | ||
| @@ -2409,24 +2424,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 2409 | flags |= AMDGPU_GEM_CREATE_SHADOW; | 2424 | flags |= AMDGPU_GEM_CREATE_SHADOW; |
| 2410 | 2425 | ||
| 2411 | size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); | 2426 | size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); |
| 2412 | r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, | 2427 | memset(&bp, 0, sizeof(bp)); |
| 2413 | ttm_bo_type_kernel, NULL, &vm->root.base.bo); | 2428 | bp.size = size; |
| 2429 | bp.byte_align = align; | ||
| 2430 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM; | ||
| 2431 | bp.flags = flags; | ||
| 2432 | bp.type = ttm_bo_type_kernel; | ||
| 2433 | bp.resv = NULL; | ||
| 2434 | r = amdgpu_bo_create(adev, &bp, &root); | ||
| 2414 | if (r) | 2435 | if (r) |
| 2415 | goto error_free_sched_entity; | 2436 | goto error_free_sched_entity; |
| 2416 | 2437 | ||
| 2417 | r = amdgpu_bo_reserve(vm->root.base.bo, true); | 2438 | r = amdgpu_bo_reserve(root, true); |
| 2418 | if (r) | 2439 | if (r) |
| 2419 | goto error_free_root; | 2440 | goto error_free_root; |
| 2420 | 2441 | ||
| 2421 | r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, | 2442 | r = amdgpu_vm_clear_bo(adev, vm, root, |
| 2422 | adev->vm_manager.root_level, | 2443 | adev->vm_manager.root_level, |
| 2423 | vm->pte_support_ats); | 2444 | vm->pte_support_ats); |
| 2424 | if (r) | 2445 | if (r) |
| 2425 | goto error_unreserve; | 2446 | goto error_unreserve; |
| 2426 | 2447 | ||
| 2427 | vm->root.base.vm = vm; | 2448 | amdgpu_vm_bo_base_init(&vm->root.base, vm, root); |
| 2428 | list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); | ||
| 2429 | list_add_tail(&vm->root.base.vm_status, &vm->evicted); | ||
| 2430 | amdgpu_bo_unreserve(vm->root.base.bo); | 2449 | amdgpu_bo_unreserve(vm->root.base.bo); |
| 2431 | 2450 | ||
| 2432 | if (pasid) { | 2451 | if (pasid) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 30f080364c97..061b99a18cb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |||
| @@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry; | |||
| 75 | /* PDE Block Fragment Size for VEGA10 */ | 75 | /* PDE Block Fragment Size for VEGA10 */ |
| 76 | #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) | 76 | #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) |
| 77 | 77 | ||
| 78 | /* VEGA10 only */ | 78 | |
| 79 | /* For GFX9 */ | ||
| 79 | #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) | 80 | #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) |
| 80 | #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) | 81 | #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) |
| 81 | 82 | ||
| 82 | /* For Raven */ | 83 | #define AMDGPU_MTYPE_NC 0 |
| 83 | #define AMDGPU_MTYPE_CC 2 | 84 | #define AMDGPU_MTYPE_CC 2 |
| 84 | 85 | ||
| 85 | #define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ | 86 | #define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ |
| @@ -167,9 +168,6 @@ struct amdgpu_vm { | |||
| 167 | /* tree of virtual addresses mapped */ | 168 | /* tree of virtual addresses mapped */ |
| 168 | struct rb_root_cached va; | 169 | struct rb_root_cached va; |
| 169 | 170 | ||
| 170 | /* protecting invalidated */ | ||
| 171 | spinlock_t status_lock; | ||
| 172 | |||
| 173 | /* BOs who needs a validation */ | 171 | /* BOs who needs a validation */ |
| 174 | struct list_head evicted; | 172 | struct list_head evicted; |
| 175 | 173 | ||
| @@ -178,6 +176,10 @@ struct amdgpu_vm { | |||
| 178 | 176 | ||
| 179 | /* BOs moved, but not yet updated in the PT */ | 177 | /* BOs moved, but not yet updated in the PT */ |
| 180 | struct list_head moved; | 178 | struct list_head moved; |
| 179 | spinlock_t moved_lock; | ||
| 180 | |||
| 181 | /* All BOs of this VM not currently in the state machine */ | ||
| 182 | struct list_head idle; | ||
| 181 | 183 | ||
| 182 | /* BO mappings freed, but not yet updated in the PT */ | 184 | /* BO mappings freed, but not yet updated in the PT */ |
| 183 | struct list_head freed; | 185 | struct list_head freed; |
| @@ -186,9 +188,6 @@ struct amdgpu_vm { | |||
| 186 | struct amdgpu_vm_pt root; | 188 | struct amdgpu_vm_pt root; |
| 187 | struct dma_fence *last_update; | 189 | struct dma_fence *last_update; |
| 188 | 190 | ||
| 189 | /* protecting freed */ | ||
| 190 | spinlock_t freed_lock; | ||
| 191 | |||
| 192 | /* Scheduler entity for page table updates */ | 191 | /* Scheduler entity for page table updates */ |
| 193 | struct drm_sched_entity entity; | 192 | struct drm_sched_entity entity; |
| 194 | 193 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 47ef3e6e7178..a266dcf5daed 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c | |||
| @@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev) | |||
| 5903 | pi->pcie_dpm_key_disabled = 0; | 5903 | pi->pcie_dpm_key_disabled = 0; |
| 5904 | pi->thermal_sclk_dpm_enabled = 0; | 5904 | pi->thermal_sclk_dpm_enabled = 0; |
| 5905 | 5905 | ||
| 5906 | if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) | 5906 | if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) |
| 5907 | pi->caps_sclk_ds = true; | 5907 | pi->caps_sclk_ds = true; |
| 5908 | else | 5908 | else |
| 5909 | pi->caps_sclk_ds = false; | 5909 | pi->caps_sclk_ds = false; |
| @@ -6255,7 +6255,7 @@ static int ci_dpm_late_init(void *handle) | |||
| 6255 | int ret; | 6255 | int ret; |
| 6256 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 6256 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 6257 | 6257 | ||
| 6258 | if (!amdgpu_dpm) | 6258 | if (!adev->pm.dpm_enabled) |
| 6259 | return 0; | 6259 | return 0; |
| 6260 | 6260 | ||
| 6261 | /* init the sysfs and debugfs files late */ | 6261 | /* init the sysfs and debugfs files late */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 0df22030e713..8ff4c60d1b59 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c | |||
| @@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev, | |||
| 1735 | } | 1735 | } |
| 1736 | } | 1736 | } |
| 1737 | 1737 | ||
| 1738 | static bool cik_need_full_reset(struct amdgpu_device *adev) | ||
| 1739 | { | ||
| 1740 | /* change this when we support soft reset */ | ||
| 1741 | return true; | ||
| 1742 | } | ||
| 1743 | |||
| 1738 | static const struct amdgpu_asic_funcs cik_asic_funcs = | 1744 | static const struct amdgpu_asic_funcs cik_asic_funcs = |
| 1739 | { | 1745 | { |
| 1740 | .read_disabled_bios = &cik_read_disabled_bios, | 1746 | .read_disabled_bios = &cik_read_disabled_bios, |
| @@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = | |||
| 1748 | .get_config_memsize = &cik_get_config_memsize, | 1754 | .get_config_memsize = &cik_get_config_memsize, |
| 1749 | .flush_hdp = &cik_flush_hdp, | 1755 | .flush_hdp = &cik_flush_hdp, |
| 1750 | .invalidate_hdp = &cik_invalidate_hdp, | 1756 | .invalidate_hdp = &cik_invalidate_hdp, |
| 1757 | .need_full_reset = &cik_need_full_reset, | ||
| 1751 | }; | 1758 | }; |
| 1752 | 1759 | ||
| 1753 | static int cik_common_early_init(void *handle) | 1760 | static int cik_common_early_init(void *handle) |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 452f88ea46a2..ada241bfeee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | |||
| @@ -1823,7 +1823,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1823 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 1823 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
| 1824 | struct drm_device *dev = crtc->dev; | 1824 | struct drm_device *dev = crtc->dev; |
| 1825 | struct amdgpu_device *adev = dev->dev_private; | 1825 | struct amdgpu_device *adev = dev->dev_private; |
| 1826 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 1827 | struct drm_framebuffer *target_fb; | 1826 | struct drm_framebuffer *target_fb; |
| 1828 | struct drm_gem_object *obj; | 1827 | struct drm_gem_object *obj; |
| 1829 | struct amdgpu_bo *abo; | 1828 | struct amdgpu_bo *abo; |
| @@ -1842,18 +1841,15 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1842 | return 0; | 1841 | return 0; |
| 1843 | } | 1842 | } |
| 1844 | 1843 | ||
| 1845 | if (atomic) { | 1844 | if (atomic) |
| 1846 | amdgpu_fb = to_amdgpu_framebuffer(fb); | ||
| 1847 | target_fb = fb; | 1845 | target_fb = fb; |
| 1848 | } else { | 1846 | else |
| 1849 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | ||
| 1850 | target_fb = crtc->primary->fb; | 1847 | target_fb = crtc->primary->fb; |
| 1851 | } | ||
| 1852 | 1848 | ||
| 1853 | /* If atomic, assume fb object is pinned & idle & fenced and | 1849 | /* If atomic, assume fb object is pinned & idle & fenced and |
| 1854 | * just update base pointers | 1850 | * just update base pointers |
| 1855 | */ | 1851 | */ |
| 1856 | obj = amdgpu_fb->obj; | 1852 | obj = target_fb->obj[0]; |
| 1857 | abo = gem_to_amdgpu_bo(obj); | 1853 | abo = gem_to_amdgpu_bo(obj); |
| 1858 | r = amdgpu_bo_reserve(abo, false); | 1854 | r = amdgpu_bo_reserve(abo, false); |
| 1859 | if (unlikely(r != 0)) | 1855 | if (unlikely(r != 0)) |
| @@ -2043,8 +2039,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 2043 | WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); | 2039 | WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); |
| 2044 | 2040 | ||
| 2045 | if (!atomic && fb && fb != crtc->primary->fb) { | 2041 | if (!atomic && fb && fb != crtc->primary->fb) { |
| 2046 | amdgpu_fb = to_amdgpu_framebuffer(fb); | 2042 | abo = gem_to_amdgpu_bo(fb->obj[0]); |
| 2047 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 2048 | r = amdgpu_bo_reserve(abo, true); | 2043 | r = amdgpu_bo_reserve(abo, true); |
| 2049 | if (unlikely(r != 0)) | 2044 | if (unlikely(r != 0)) |
| 2050 | return r; | 2045 | return r; |
| @@ -2526,11 +2521,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc) | |||
| 2526 | dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); | 2521 | dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); |
| 2527 | if (crtc->primary->fb) { | 2522 | if (crtc->primary->fb) { |
| 2528 | int r; | 2523 | int r; |
| 2529 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 2530 | struct amdgpu_bo *abo; | 2524 | struct amdgpu_bo *abo; |
| 2531 | 2525 | ||
| 2532 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | 2526 | abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); |
| 2533 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 2534 | r = amdgpu_bo_reserve(abo, true); | 2527 | r = amdgpu_bo_reserve(abo, true); |
| 2535 | if (unlikely(r)) | 2528 | if (unlikely(r)) |
| 2536 | DRM_ERROR("failed to reserve abo before unpin\n"); | 2529 | DRM_ERROR("failed to reserve abo before unpin\n"); |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index a7c1c584a191..a5b96eac3033 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | |||
| @@ -173,6 +173,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 173 | ARRAY_SIZE(polaris11_golden_settings_a11)); | 173 | ARRAY_SIZE(polaris11_golden_settings_a11)); |
| 174 | break; | 174 | break; |
| 175 | case CHIP_POLARIS10: | 175 | case CHIP_POLARIS10: |
| 176 | case CHIP_VEGAM: | ||
| 176 | amdgpu_device_program_register_sequence(adev, | 177 | amdgpu_device_program_register_sequence(adev, |
| 177 | polaris10_golden_settings_a11, | 178 | polaris10_golden_settings_a11, |
| 178 | ARRAY_SIZE(polaris10_golden_settings_a11)); | 179 | ARRAY_SIZE(polaris10_golden_settings_a11)); |
| @@ -473,6 +474,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev) | |||
| 473 | num_crtc = 2; | 474 | num_crtc = 2; |
| 474 | break; | 475 | break; |
| 475 | case CHIP_POLARIS10: | 476 | case CHIP_POLARIS10: |
| 477 | case CHIP_VEGAM: | ||
| 476 | num_crtc = 6; | 478 | num_crtc = 6; |
| 477 | break; | 479 | break; |
| 478 | case CHIP_POLARIS11: | 480 | case CHIP_POLARIS11: |
| @@ -1445,6 +1447,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev) | |||
| 1445 | adev->mode_info.audio.num_pins = 7; | 1447 | adev->mode_info.audio.num_pins = 7; |
| 1446 | break; | 1448 | break; |
| 1447 | case CHIP_POLARIS10: | 1449 | case CHIP_POLARIS10: |
| 1450 | case CHIP_VEGAM: | ||
| 1448 | adev->mode_info.audio.num_pins = 8; | 1451 | adev->mode_info.audio.num_pins = 8; |
| 1449 | break; | 1452 | break; |
| 1450 | case CHIP_POLARIS11: | 1453 | case CHIP_POLARIS11: |
| @@ -1862,7 +1865,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1862 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 1865 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
| 1863 | struct drm_device *dev = crtc->dev; | 1866 | struct drm_device *dev = crtc->dev; |
| 1864 | struct amdgpu_device *adev = dev->dev_private; | 1867 | struct amdgpu_device *adev = dev->dev_private; |
| 1865 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 1866 | struct drm_framebuffer *target_fb; | 1868 | struct drm_framebuffer *target_fb; |
| 1867 | struct drm_gem_object *obj; | 1869 | struct drm_gem_object *obj; |
| 1868 | struct amdgpu_bo *abo; | 1870 | struct amdgpu_bo *abo; |
| @@ -1881,18 +1883,15 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1881 | return 0; | 1883 | return 0; |
| 1882 | } | 1884 | } |
| 1883 | 1885 | ||
| 1884 | if (atomic) { | 1886 | if (atomic) |
| 1885 | amdgpu_fb = to_amdgpu_framebuffer(fb); | ||
| 1886 | target_fb = fb; | 1887 | target_fb = fb; |
| 1887 | } else { | 1888 | else |
| 1888 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | ||
| 1889 | target_fb = crtc->primary->fb; | 1889 | target_fb = crtc->primary->fb; |
| 1890 | } | ||
| 1891 | 1890 | ||
| 1892 | /* If atomic, assume fb object is pinned & idle & fenced and | 1891 | /* If atomic, assume fb object is pinned & idle & fenced and |
| 1893 | * just update base pointers | 1892 | * just update base pointers |
| 1894 | */ | 1893 | */ |
| 1895 | obj = amdgpu_fb->obj; | 1894 | obj = target_fb->obj[0]; |
| 1896 | abo = gem_to_amdgpu_bo(obj); | 1895 | abo = gem_to_amdgpu_bo(obj); |
| 1897 | r = amdgpu_bo_reserve(abo, false); | 1896 | r = amdgpu_bo_reserve(abo, false); |
| 1898 | if (unlikely(r != 0)) | 1897 | if (unlikely(r != 0)) |
| @@ -2082,8 +2081,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 2082 | WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); | 2081 | WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); |
| 2083 | 2082 | ||
| 2084 | if (!atomic && fb && fb != crtc->primary->fb) { | 2083 | if (!atomic && fb && fb != crtc->primary->fb) { |
| 2085 | amdgpu_fb = to_amdgpu_framebuffer(fb); | 2084 | abo = gem_to_amdgpu_bo(fb->obj[0]); |
| 2086 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 2087 | r = amdgpu_bo_reserve(abo, true); | 2085 | r = amdgpu_bo_reserve(abo, true); |
| 2088 | if (unlikely(r != 0)) | 2086 | if (unlikely(r != 0)) |
| 2089 | return r; | 2087 | return r; |
| @@ -2253,7 +2251,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) | |||
| 2253 | 2251 | ||
| 2254 | if ((adev->asic_type == CHIP_POLARIS10) || | 2252 | if ((adev->asic_type == CHIP_POLARIS10) || |
| 2255 | (adev->asic_type == CHIP_POLARIS11) || | 2253 | (adev->asic_type == CHIP_POLARIS11) || |
| 2256 | (adev->asic_type == CHIP_POLARIS12)) { | 2254 | (adev->asic_type == CHIP_POLARIS12) || |
| 2255 | (adev->asic_type == CHIP_VEGAM)) { | ||
| 2257 | struct amdgpu_encoder *amdgpu_encoder = | 2256 | struct amdgpu_encoder *amdgpu_encoder = |
| 2258 | to_amdgpu_encoder(amdgpu_crtc->encoder); | 2257 | to_amdgpu_encoder(amdgpu_crtc->encoder); |
| 2259 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; | 2258 | struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; |
| @@ -2601,11 +2600,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) | |||
| 2601 | dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); | 2600 | dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); |
| 2602 | if (crtc->primary->fb) { | 2601 | if (crtc->primary->fb) { |
| 2603 | int r; | 2602 | int r; |
| 2604 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 2605 | struct amdgpu_bo *abo; | 2603 | struct amdgpu_bo *abo; |
| 2606 | 2604 | ||
| 2607 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | 2605 | abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); |
| 2608 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 2609 | r = amdgpu_bo_reserve(abo, true); | 2606 | r = amdgpu_bo_reserve(abo, true); |
| 2610 | if (unlikely(r)) | 2607 | if (unlikely(r)) |
| 2611 | DRM_ERROR("failed to reserve abo before unpin\n"); | 2608 | DRM_ERROR("failed to reserve abo before unpin\n"); |
| @@ -2673,7 +2670,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc, | |||
| 2673 | 2670 | ||
| 2674 | if ((adev->asic_type == CHIP_POLARIS10) || | 2671 | if ((adev->asic_type == CHIP_POLARIS10) || |
| 2675 | (adev->asic_type == CHIP_POLARIS11) || | 2672 | (adev->asic_type == CHIP_POLARIS11) || |
| 2676 | (adev->asic_type == CHIP_POLARIS12)) { | 2673 | (adev->asic_type == CHIP_POLARIS12) || |
| 2674 | (adev->asic_type == CHIP_VEGAM)) { | ||
| 2677 | struct amdgpu_encoder *amdgpu_encoder = | 2675 | struct amdgpu_encoder *amdgpu_encoder = |
| 2678 | to_amdgpu_encoder(amdgpu_crtc->encoder); | 2676 | to_amdgpu_encoder(amdgpu_crtc->encoder); |
| 2679 | int encoder_mode = | 2677 | int encoder_mode = |
| @@ -2830,6 +2828,7 @@ static int dce_v11_0_early_init(void *handle) | |||
| 2830 | adev->mode_info.num_dig = 9; | 2828 | adev->mode_info.num_dig = 9; |
| 2831 | break; | 2829 | break; |
| 2832 | case CHIP_POLARIS10: | 2830 | case CHIP_POLARIS10: |
| 2831 | case CHIP_VEGAM: | ||
| 2833 | adev->mode_info.num_hpd = 6; | 2832 | adev->mode_info.num_hpd = 6; |
| 2834 | adev->mode_info.num_dig = 6; | 2833 | adev->mode_info.num_dig = 6; |
| 2835 | break; | 2834 | break; |
| @@ -2949,7 +2948,8 @@ static int dce_v11_0_hw_init(void *handle) | |||
| 2949 | amdgpu_atombios_encoder_init_dig(adev); | 2948 | amdgpu_atombios_encoder_init_dig(adev); |
| 2950 | if ((adev->asic_type == CHIP_POLARIS10) || | 2949 | if ((adev->asic_type == CHIP_POLARIS10) || |
| 2951 | (adev->asic_type == CHIP_POLARIS11) || | 2950 | (adev->asic_type == CHIP_POLARIS11) || |
| 2952 | (adev->asic_type == CHIP_POLARIS12)) { | 2951 | (adev->asic_type == CHIP_POLARIS12) || |
| 2952 | (adev->asic_type == CHIP_VEGAM)) { | ||
| 2953 | amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, | 2953 | amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, |
| 2954 | DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); | 2954 | DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); |
| 2955 | amdgpu_atombios_crtc_set_dce_clock(adev, 0, | 2955 | amdgpu_atombios_crtc_set_dce_clock(adev, 0, |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 9f67b7fd3487..394cc1e8fe20 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | |||
| @@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1780 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 1780 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
| 1781 | struct drm_device *dev = crtc->dev; | 1781 | struct drm_device *dev = crtc->dev; |
| 1782 | struct amdgpu_device *adev = dev->dev_private; | 1782 | struct amdgpu_device *adev = dev->dev_private; |
| 1783 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 1784 | struct drm_framebuffer *target_fb; | 1783 | struct drm_framebuffer *target_fb; |
| 1785 | struct drm_gem_object *obj; | 1784 | struct drm_gem_object *obj; |
| 1786 | struct amdgpu_bo *abo; | 1785 | struct amdgpu_bo *abo; |
| @@ -1798,18 +1797,15 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1798 | return 0; | 1797 | return 0; |
| 1799 | } | 1798 | } |
| 1800 | 1799 | ||
| 1801 | if (atomic) { | 1800 | if (atomic) |
| 1802 | amdgpu_fb = to_amdgpu_framebuffer(fb); | ||
| 1803 | target_fb = fb; | 1801 | target_fb = fb; |
| 1804 | } else { | 1802 | else |
| 1805 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | ||
| 1806 | target_fb = crtc->primary->fb; | 1803 | target_fb = crtc->primary->fb; |
| 1807 | } | ||
| 1808 | 1804 | ||
| 1809 | /* If atomic, assume fb object is pinned & idle & fenced and | 1805 | /* If atomic, assume fb object is pinned & idle & fenced and |
| 1810 | * just update base pointers | 1806 | * just update base pointers |
| 1811 | */ | 1807 | */ |
| 1812 | obj = amdgpu_fb->obj; | 1808 | obj = target_fb->obj[0]; |
| 1813 | abo = gem_to_amdgpu_bo(obj); | 1809 | abo = gem_to_amdgpu_bo(obj); |
| 1814 | r = amdgpu_bo_reserve(abo, false); | 1810 | r = amdgpu_bo_reserve(abo, false); |
| 1815 | if (unlikely(r != 0)) | 1811 | if (unlikely(r != 0)) |
| @@ -1978,8 +1974,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1978 | WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); | 1974 | WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); |
| 1979 | 1975 | ||
| 1980 | if (!atomic && fb && fb != crtc->primary->fb) { | 1976 | if (!atomic && fb && fb != crtc->primary->fb) { |
| 1981 | amdgpu_fb = to_amdgpu_framebuffer(fb); | 1977 | abo = gem_to_amdgpu_bo(fb->obj[0]); |
| 1982 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 1983 | r = amdgpu_bo_reserve(abo, true); | 1978 | r = amdgpu_bo_reserve(abo, true); |
| 1984 | if (unlikely(r != 0)) | 1979 | if (unlikely(r != 0)) |
| 1985 | return r; | 1980 | return r; |
| @@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc) | |||
| 2414 | dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); | 2409 | dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); |
| 2415 | if (crtc->primary->fb) { | 2410 | if (crtc->primary->fb) { |
| 2416 | int r; | 2411 | int r; |
| 2417 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 2418 | struct amdgpu_bo *abo; | 2412 | struct amdgpu_bo *abo; |
| 2419 | 2413 | ||
| 2420 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | 2414 | abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); |
| 2421 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 2422 | r = amdgpu_bo_reserve(abo, true); | 2415 | r = amdgpu_bo_reserve(abo, true); |
| 2423 | if (unlikely(r)) | 2416 | if (unlikely(r)) |
| 2424 | DRM_ERROR("failed to reserve abo before unpin\n"); | 2417 | DRM_ERROR("failed to reserve abo before unpin\n"); |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index f55422cbd77a..c9b9ab8f1b05 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | |||
| @@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1754 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 1754 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
| 1755 | struct drm_device *dev = crtc->dev; | 1755 | struct drm_device *dev = crtc->dev; |
| 1756 | struct amdgpu_device *adev = dev->dev_private; | 1756 | struct amdgpu_device *adev = dev->dev_private; |
| 1757 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 1758 | struct drm_framebuffer *target_fb; | 1757 | struct drm_framebuffer *target_fb; |
| 1759 | struct drm_gem_object *obj; | 1758 | struct drm_gem_object *obj; |
| 1760 | struct amdgpu_bo *abo; | 1759 | struct amdgpu_bo *abo; |
| @@ -1773,18 +1772,15 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1773 | return 0; | 1772 | return 0; |
| 1774 | } | 1773 | } |
| 1775 | 1774 | ||
| 1776 | if (atomic) { | 1775 | if (atomic) |
| 1777 | amdgpu_fb = to_amdgpu_framebuffer(fb); | ||
| 1778 | target_fb = fb; | 1776 | target_fb = fb; |
| 1779 | } else { | 1777 | else |
| 1780 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | ||
| 1781 | target_fb = crtc->primary->fb; | 1778 | target_fb = crtc->primary->fb; |
| 1782 | } | ||
| 1783 | 1779 | ||
| 1784 | /* If atomic, assume fb object is pinned & idle & fenced and | 1780 | /* If atomic, assume fb object is pinned & idle & fenced and |
| 1785 | * just update base pointers | 1781 | * just update base pointers |
| 1786 | */ | 1782 | */ |
| 1787 | obj = amdgpu_fb->obj; | 1783 | obj = target_fb->obj[0]; |
| 1788 | abo = gem_to_amdgpu_bo(obj); | 1784 | abo = gem_to_amdgpu_bo(obj); |
| 1789 | r = amdgpu_bo_reserve(abo, false); | 1785 | r = amdgpu_bo_reserve(abo, false); |
| 1790 | if (unlikely(r != 0)) | 1786 | if (unlikely(r != 0)) |
| @@ -1955,8 +1951,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, | |||
| 1955 | WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); | 1951 | WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); |
| 1956 | 1952 | ||
| 1957 | if (!atomic && fb && fb != crtc->primary->fb) { | 1953 | if (!atomic && fb && fb != crtc->primary->fb) { |
| 1958 | amdgpu_fb = to_amdgpu_framebuffer(fb); | 1954 | abo = gem_to_amdgpu_bo(fb->obj[0]); |
| 1959 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 1960 | r = amdgpu_bo_reserve(abo, true); | 1955 | r = amdgpu_bo_reserve(abo, true); |
| 1961 | if (unlikely(r != 0)) | 1956 | if (unlikely(r != 0)) |
| 1962 | return r; | 1957 | return r; |
| @@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc) | |||
| 2430 | dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); | 2425 | dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); |
| 2431 | if (crtc->primary->fb) { | 2426 | if (crtc->primary->fb) { |
| 2432 | int r; | 2427 | int r; |
| 2433 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 2434 | struct amdgpu_bo *abo; | 2428 | struct amdgpu_bo *abo; |
| 2435 | 2429 | ||
| 2436 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | 2430 | abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); |
| 2437 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 2438 | r = amdgpu_bo_reserve(abo, true); | 2431 | r = amdgpu_bo_reserve(abo, true); |
| 2439 | if (unlikely(r)) | 2432 | if (unlikely(r)) |
| 2440 | DRM_ERROR("failed to reserve abo before unpin\n"); | 2433 | DRM_ERROR("failed to reserve abo before unpin\n"); |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index b51f05dc9582..dbf2ccd0c744 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c | |||
| @@ -168,11 +168,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc) | |||
| 168 | dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); | 168 | dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); |
| 169 | if (crtc->primary->fb) { | 169 | if (crtc->primary->fb) { |
| 170 | int r; | 170 | int r; |
| 171 | struct amdgpu_framebuffer *amdgpu_fb; | ||
| 172 | struct amdgpu_bo *abo; | 171 | struct amdgpu_bo *abo; |
| 173 | 172 | ||
| 174 | amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); | 173 | abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); |
| 175 | abo = gem_to_amdgpu_bo(amdgpu_fb->obj); | ||
| 176 | r = amdgpu_bo_reserve(abo, true); | 174 | r = amdgpu_bo_reserve(abo, true); |
| 177 | if (unlikely(r)) | 175 | if (unlikely(r)) |
| 178 | DRM_ERROR("failed to reserve abo before unpin\n"); | 176 | DRM_ERROR("failed to reserve abo before unpin\n"); |
| @@ -329,7 +327,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector) | |||
| 329 | return 0; | 327 | return 0; |
| 330 | } | 328 | } |
| 331 | 329 | ||
| 332 | static int dce_virtual_mode_valid(struct drm_connector *connector, | 330 | static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector, |
| 333 | struct drm_display_mode *mode) | 331 | struct drm_display_mode *mode) |
| 334 | { | 332 | { |
| 335 | return MODE_OK; | 333 | return MODE_OK; |
| @@ -462,8 +460,9 @@ static int dce_virtual_hw_init(void *handle) | |||
| 462 | break; | 460 | break; |
| 463 | case CHIP_CARRIZO: | 461 | case CHIP_CARRIZO: |
| 464 | case CHIP_STONEY: | 462 | case CHIP_STONEY: |
| 465 | case CHIP_POLARIS11: | ||
| 466 | case CHIP_POLARIS10: | 463 | case CHIP_POLARIS10: |
| 464 | case CHIP_POLARIS11: | ||
| 465 | case CHIP_VEGAM: | ||
| 467 | dce_v11_0_disable_dce(adev); | 466 | dce_v11_0_disable_dce(adev); |
| 468 | break; | 467 | break; |
| 469 | case CHIP_TOPAZ: | 468 | case CHIP_TOPAZ: |
| @@ -474,6 +473,7 @@ static int dce_virtual_hw_init(void *handle) | |||
| 474 | break; | 473 | break; |
| 475 | case CHIP_VEGA10: | 474 | case CHIP_VEGA10: |
| 476 | case CHIP_VEGA12: | 475 | case CHIP_VEGA12: |
| 476 | case CHIP_VEGA20: | ||
| 477 | break; | 477 | break; |
| 478 | default: | 478 | default: |
| 479 | DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); | 479 | DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); |
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c new file mode 100644 index 000000000000..9935371db7ce --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | #include "amdgpu.h" | ||
| 24 | #include "df_v1_7.h" | ||
| 25 | |||
| 26 | #include "df/df_1_7_default.h" | ||
| 27 | #include "df/df_1_7_offset.h" | ||
| 28 | #include "df/df_1_7_sh_mask.h" | ||
| 29 | |||
| 30 | static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; | ||
| 31 | |||
| 32 | static void df_v1_7_init (struct amdgpu_device *adev) | ||
| 33 | { | ||
| 34 | } | ||
| 35 | |||
| 36 | static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, | ||
| 37 | bool enable) | ||
| 38 | { | ||
| 39 | u32 tmp; | ||
| 40 | |||
| 41 | if (enable) { | ||
| 42 | tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl); | ||
| 43 | tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; | ||
| 44 | WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp); | ||
| 45 | } else | ||
| 46 | WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, | ||
| 47 | mmFabricConfigAccessControl_DEFAULT); | ||
| 48 | } | ||
| 49 | |||
| 50 | static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev) | ||
| 51 | { | ||
| 52 | u32 tmp; | ||
| 53 | |||
| 54 | tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); | ||
| 55 | tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; | ||
| 56 | tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; | ||
| 57 | |||
| 58 | return tmp; | ||
| 59 | } | ||
| 60 | |||
| 61 | static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) | ||
| 62 | { | ||
| 63 | int fb_channel_number; | ||
| 64 | |||
| 65 | fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); | ||
| 66 | |||
| 67 | return df_v1_7_channel_number[fb_channel_number]; | ||
| 68 | } | ||
| 69 | |||
| 70 | static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, | ||
| 71 | bool enable) | ||
| 72 | { | ||
| 73 | u32 tmp; | ||
| 74 | |||
| 75 | /* Put DF on broadcast mode */ | ||
| 76 | adev->df_funcs->enable_broadcast_mode(adev, true); | ||
| 77 | |||
| 78 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { | ||
| 79 | tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); | ||
| 80 | tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; | ||
| 81 | tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY; | ||
| 82 | WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); | ||
| 83 | } else { | ||
| 84 | tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); | ||
| 85 | tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; | ||
| 86 | tmp |= DF_V1_7_MGCG_DISABLE; | ||
| 87 | WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); | ||
| 88 | } | ||
| 89 | |||
| 90 | /* Exit boradcast mode */ | ||
| 91 | adev->df_funcs->enable_broadcast_mode(adev, false); | ||
| 92 | } | ||
| 93 | |||
| 94 | static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, | ||
| 95 | u32 *flags) | ||
| 96 | { | ||
| 97 | u32 tmp; | ||
| 98 | |||
| 99 | /* AMD_CG_SUPPORT_DF_MGCG */ | ||
| 100 | tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); | ||
| 101 | if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY) | ||
| 102 | *flags |= AMD_CG_SUPPORT_DF_MGCG; | ||
| 103 | } | ||
| 104 | |||
| 105 | static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, | ||
| 106 | bool enable) | ||
| 107 | { | ||
| 108 | WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0, | ||
| 109 | ForceParWrRMW, enable); | ||
| 110 | } | ||
| 111 | |||
| 112 | const struct amdgpu_df_funcs df_v1_7_funcs = { | ||
| 113 | .init = df_v1_7_init, | ||
| 114 | .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, | ||
| 115 | .get_fb_channel_number = df_v1_7_get_fb_channel_number, | ||
| 116 | .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, | ||
| 117 | .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating, | ||
| 118 | .get_clockgating_state = df_v1_7_get_clockgating_state, | ||
| 119 | .enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw, | ||
| 120 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h new file mode 100644 index 000000000000..74621104c487 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #ifndef __DF_V1_7_H__ | ||
| 25 | #define __DF_V1_7_H__ | ||
| 26 | |||
| 27 | #include "soc15_common.h" | ||
| 28 | enum DF_V1_7_MGCG | ||
| 29 | { | ||
| 30 | DF_V1_7_MGCG_DISABLE = 0, | ||
| 31 | DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1, | ||
| 32 | DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2, | ||
| 33 | DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13, | ||
| 34 | DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14, | ||
| 35 | DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15 | ||
| 36 | }; | ||
| 37 | |||
| 38 | extern const struct amdgpu_df_funcs df_v1_7_funcs; | ||
| 39 | |||
| 40 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c new file mode 100644 index 000000000000..60608b3df881 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | #include "amdgpu.h" | ||
| 24 | #include "df_v3_6.h" | ||
| 25 | |||
| 26 | #include "df/df_3_6_default.h" | ||
| 27 | #include "df/df_3_6_offset.h" | ||
| 28 | #include "df/df_3_6_sh_mask.h" | ||
| 29 | |||
| 30 | static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, | ||
| 31 | 16, 32, 0, 0, 0, 2, 4, 8}; | ||
| 32 | |||
| 33 | static void df_v3_6_init(struct amdgpu_device *adev) | ||
| 34 | { | ||
| 35 | } | ||
| 36 | |||
| 37 | static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, | ||
| 38 | bool enable) | ||
| 39 | { | ||
| 40 | u32 tmp; | ||
| 41 | |||
| 42 | if (enable) { | ||
| 43 | tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl); | ||
| 44 | tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; | ||
| 45 | WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp); | ||
| 46 | } else | ||
| 47 | WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, | ||
| 48 | mmFabricConfigAccessControl_DEFAULT); | ||
| 49 | } | ||
| 50 | |||
| 51 | static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev) | ||
| 52 | { | ||
| 53 | u32 tmp; | ||
| 54 | |||
| 55 | tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0); | ||
| 56 | tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; | ||
| 57 | tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; | ||
| 58 | |||
| 59 | return tmp; | ||
| 60 | } | ||
| 61 | |||
| 62 | static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) | ||
| 63 | { | ||
| 64 | int fb_channel_number; | ||
| 65 | |||
| 66 | fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); | ||
| 67 | if (fb_channel_number > ARRAY_SIZE(df_v3_6_channel_number)) | ||
| 68 | fb_channel_number = 0; | ||
| 69 | |||
| 70 | return df_v3_6_channel_number[fb_channel_number]; | ||
| 71 | } | ||
| 72 | |||
| 73 | static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, | ||
| 74 | bool enable) | ||
| 75 | { | ||
| 76 | u32 tmp; | ||
| 77 | |||
| 78 | /* Put DF on broadcast mode */ | ||
| 79 | adev->df_funcs->enable_broadcast_mode(adev, true); | ||
| 80 | |||
| 81 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { | ||
| 82 | tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); | ||
| 83 | tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; | ||
| 84 | tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; | ||
| 85 | WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); | ||
| 86 | } else { | ||
| 87 | tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); | ||
| 88 | tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; | ||
| 89 | tmp |= DF_V3_6_MGCG_DISABLE; | ||
| 90 | WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); | ||
| 91 | } | ||
| 92 | |||
| 93 | /* Exit broadcast mode */ | ||
| 94 | adev->df_funcs->enable_broadcast_mode(adev, false); | ||
| 95 | } | ||
| 96 | |||
| 97 | static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, | ||
| 98 | u32 *flags) | ||
| 99 | { | ||
| 100 | u32 tmp; | ||
| 101 | |||
| 102 | /* AMD_CG_SUPPORT_DF_MGCG */ | ||
| 103 | tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); | ||
| 104 | if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY) | ||
| 105 | *flags |= AMD_CG_SUPPORT_DF_MGCG; | ||
| 106 | } | ||
| 107 | |||
| 108 | const struct amdgpu_df_funcs df_v3_6_funcs = { | ||
| 109 | .init = df_v3_6_init, | ||
| 110 | .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, | ||
| 111 | .get_fb_channel_number = df_v3_6_get_fb_channel_number, | ||
| 112 | .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, | ||
| 113 | .update_medium_grain_clock_gating = | ||
| 114 | df_v3_6_update_medium_grain_clock_gating, | ||
| 115 | .get_clockgating_state = df_v3_6_get_clockgating_state, | ||
| 116 | }; | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h new file mode 100644 index 000000000000..e79c58e5efcb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | |||
| 24 | #ifndef __DF_V3_6_H__ | ||
| 25 | #define __DF_V3_6_H__ | ||
| 26 | |||
| 27 | #include "soc15_common.h" | ||
| 28 | |||
| 29 | enum DF_V3_6_MGCG { | ||
| 30 | DF_V3_6_MGCG_DISABLE = 0, | ||
| 31 | DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1, | ||
| 32 | DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2, | ||
| 33 | DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13, | ||
| 34 | DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14, | ||
| 35 | DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 | ||
| 36 | }; | ||
| 37 | |||
| 38 | extern const struct amdgpu_df_funcs df_v3_6_funcs; | ||
| 39 | |||
| 40 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index e14263fca1c9..818874b13c99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
| @@ -125,18 +125,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); | |||
| 125 | MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); | 125 | MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); |
| 126 | MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); | 126 | MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); |
| 127 | 127 | ||
| 128 | MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); | ||
| 129 | MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); | ||
| 130 | MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); | ||
| 131 | MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); | ||
| 132 | MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); | ||
| 133 | MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); | ||
| 134 | MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); | ||
| 135 | MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); | ||
| 136 | MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); | ||
| 137 | MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); | ||
| 138 | MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); | ||
| 139 | |||
| 140 | MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); | 128 | MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); |
| 141 | MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); | 129 | MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); |
| 142 | MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); | 130 | MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); |
| @@ -149,6 +137,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); | |||
| 149 | MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); | 137 | MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); |
| 150 | MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); | 138 | MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); |
| 151 | 139 | ||
| 140 | MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); | ||
| 141 | MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); | ||
| 142 | MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); | ||
| 143 | MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); | ||
| 144 | MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); | ||
| 145 | MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); | ||
| 146 | MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); | ||
| 147 | MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); | ||
| 148 | MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); | ||
| 149 | MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); | ||
| 150 | MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); | ||
| 151 | |||
| 152 | MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); | 152 | MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); |
| 153 | MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); | 153 | MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); |
| 154 | MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); | 154 | MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); |
| @@ -161,6 +161,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); | |||
| 161 | MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); | 161 | MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); |
| 162 | MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); | 162 | MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); |
| 163 | 163 | ||
| 164 | MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); | ||
| 165 | MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); | ||
| 166 | MODULE_FIRMWARE("amdgpu/vegam_me.bin"); | ||
| 167 | MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); | ||
| 168 | MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); | ||
| 169 | MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); | ||
| 170 | |||
| 164 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = | 171 | static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = |
| 165 | { | 172 | { |
| 166 | {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, | 173 | {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, |
| @@ -292,6 +299,37 @@ static const u32 tonga_mgcg_cgcg_init[] = | |||
| 292 | mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, | 299 | mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, |
| 293 | }; | 300 | }; |
| 294 | 301 | ||
| 302 | static const u32 golden_settings_vegam_a11[] = | ||
| 303 | { | ||
| 304 | mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, | ||
| 305 | mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, | ||
| 306 | mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, | ||
| 307 | mmDB_DEBUG2, 0xf00fffff, 0x00000400, | ||
| 308 | mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, | ||
| 309 | mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, | ||
| 310 | mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, | ||
| 311 | mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, | ||
| 312 | mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, | ||
| 313 | mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, | ||
| 314 | mmSQ_CONFIG, 0x07f80000, 0x01180000, | ||
| 315 | mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, | ||
| 316 | mmTCC_CTRL, 0x00100000, 0xf31fff7f, | ||
| 317 | mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, | ||
| 318 | mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, | ||
| 319 | mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, | ||
| 320 | mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, | ||
| 321 | }; | ||
| 322 | |||
| 323 | static const u32 vegam_golden_common_all[] = | ||
| 324 | { | ||
| 325 | mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, | ||
| 326 | mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, | ||
| 327 | mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, | ||
| 328 | mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, | ||
| 329 | mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, | ||
| 330 | mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, | ||
| 331 | }; | ||
| 332 | |||
| 295 | static const u32 golden_settings_polaris11_a11[] = | 333 | static const u32 golden_settings_polaris11_a11[] = |
| 296 | { | 334 | { |
| 297 | mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, | 335 | mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, |
| @@ -712,6 +750,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 712 | tonga_golden_common_all, | 750 | tonga_golden_common_all, |
| 713 | ARRAY_SIZE(tonga_golden_common_all)); | 751 | ARRAY_SIZE(tonga_golden_common_all)); |
| 714 | break; | 752 | break; |
| 753 | case CHIP_VEGAM: | ||
| 754 | amdgpu_device_program_register_sequence(adev, | ||
| 755 | golden_settings_vegam_a11, | ||
| 756 | ARRAY_SIZE(golden_settings_vegam_a11)); | ||
| 757 | amdgpu_device_program_register_sequence(adev, | ||
| 758 | vegam_golden_common_all, | ||
| 759 | ARRAY_SIZE(vegam_golden_common_all)); | ||
| 760 | break; | ||
| 715 | case CHIP_POLARIS11: | 761 | case CHIP_POLARIS11: |
| 716 | case CHIP_POLARIS12: | 762 | case CHIP_POLARIS12: |
| 717 | amdgpu_device_program_register_sequence(adev, | 763 | amdgpu_device_program_register_sequence(adev, |
| @@ -918,17 +964,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) | |||
| 918 | case CHIP_FIJI: | 964 | case CHIP_FIJI: |
| 919 | chip_name = "fiji"; | 965 | chip_name = "fiji"; |
| 920 | break; | 966 | break; |
| 921 | case CHIP_POLARIS11: | 967 | case CHIP_STONEY: |
| 922 | chip_name = "polaris11"; | 968 | chip_name = "stoney"; |
| 923 | break; | 969 | break; |
| 924 | case CHIP_POLARIS10: | 970 | case CHIP_POLARIS10: |
| 925 | chip_name = "polaris10"; | 971 | chip_name = "polaris10"; |
| 926 | break; | 972 | break; |
| 973 | case CHIP_POLARIS11: | ||
| 974 | chip_name = "polaris11"; | ||
| 975 | break; | ||
| 927 | case CHIP_POLARIS12: | 976 | case CHIP_POLARIS12: |
| 928 | chip_name = "polaris12"; | 977 | chip_name = "polaris12"; |
| 929 | break; | 978 | break; |
| 930 | case CHIP_STONEY: | 979 | case CHIP_VEGAM: |
| 931 | chip_name = "stoney"; | 980 | chip_name = "vegam"; |
| 932 | break; | 981 | break; |
| 933 | default: | 982 | default: |
| 934 | BUG(); | 983 | BUG(); |
| @@ -1770,6 +1819,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 1770 | gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; | 1819 | gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; |
| 1771 | break; | 1820 | break; |
| 1772 | case CHIP_POLARIS10: | 1821 | case CHIP_POLARIS10: |
| 1822 | case CHIP_VEGAM: | ||
| 1773 | ret = amdgpu_atombios_get_gfx_info(adev); | 1823 | ret = amdgpu_atombios_get_gfx_info(adev); |
| 1774 | if (ret) | 1824 | if (ret) |
| 1775 | return ret; | 1825 | return ret; |
| @@ -1957,12 +2007,13 @@ static int gfx_v8_0_sw_init(void *handle) | |||
| 1957 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 2007 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1958 | 2008 | ||
| 1959 | switch (adev->asic_type) { | 2009 | switch (adev->asic_type) { |
| 1960 | case CHIP_FIJI: | ||
| 1961 | case CHIP_TONGA: | 2010 | case CHIP_TONGA: |
| 2011 | case CHIP_CARRIZO: | ||
| 2012 | case CHIP_FIJI: | ||
| 2013 | case CHIP_POLARIS10: | ||
| 1962 | case CHIP_POLARIS11: | 2014 | case CHIP_POLARIS11: |
| 1963 | case CHIP_POLARIS12: | 2015 | case CHIP_POLARIS12: |
| 1964 | case CHIP_POLARIS10: | 2016 | case CHIP_VEGAM: |
| 1965 | case CHIP_CARRIZO: | ||
| 1966 | adev->gfx.mec.num_mec = 2; | 2017 | adev->gfx.mec.num_mec = 2; |
| 1967 | break; | 2018 | break; |
| 1968 | case CHIP_TOPAZ: | 2019 | case CHIP_TOPAZ: |
| @@ -2323,6 +2374,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) | |||
| 2323 | 2374 | ||
| 2324 | break; | 2375 | break; |
| 2325 | case CHIP_FIJI: | 2376 | case CHIP_FIJI: |
| 2377 | case CHIP_VEGAM: | ||
| 2326 | modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | | 2378 | modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | |
| 2327 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | | 2379 | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | |
| 2328 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | | 2380 | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | |
| @@ -3504,6 +3556,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) | |||
| 3504 | { | 3556 | { |
| 3505 | switch (adev->asic_type) { | 3557 | switch (adev->asic_type) { |
| 3506 | case CHIP_FIJI: | 3558 | case CHIP_FIJI: |
| 3559 | case CHIP_VEGAM: | ||
| 3507 | *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | | 3560 | *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | |
| 3508 | RB_XSEL2(1) | PKR_MAP(2) | | 3561 | RB_XSEL2(1) | PKR_MAP(2) | |
| 3509 | PKR_XSEL(1) | PKR_YSEL(1) | | 3562 | PKR_XSEL(1) | PKR_YSEL(1) | |
| @@ -4071,7 +4124,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev) | |||
| 4071 | gfx_v8_0_init_power_gating(adev); | 4124 | gfx_v8_0_init_power_gating(adev); |
| 4072 | WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); | 4125 | WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); |
| 4073 | } else if ((adev->asic_type == CHIP_POLARIS11) || | 4126 | } else if ((adev->asic_type == CHIP_POLARIS11) || |
| 4074 | (adev->asic_type == CHIP_POLARIS12)) { | 4127 | (adev->asic_type == CHIP_POLARIS12) || |
| 4128 | (adev->asic_type == CHIP_VEGAM)) { | ||
| 4075 | gfx_v8_0_init_csb(adev); | 4129 | gfx_v8_0_init_csb(adev); |
| 4076 | gfx_v8_0_init_save_restore_list(adev); | 4130 | gfx_v8_0_init_save_restore_list(adev); |
| 4077 | gfx_v8_0_enable_save_restore_machine(adev); | 4131 | gfx_v8_0_enable_save_restore_machine(adev); |
| @@ -4146,7 +4200,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) | |||
| 4146 | WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); | 4200 | WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); |
| 4147 | if (adev->asic_type == CHIP_POLARIS11 || | 4201 | if (adev->asic_type == CHIP_POLARIS11 || |
| 4148 | adev->asic_type == CHIP_POLARIS10 || | 4202 | adev->asic_type == CHIP_POLARIS10 || |
| 4149 | adev->asic_type == CHIP_POLARIS12) { | 4203 | adev->asic_type == CHIP_POLARIS12 || |
| 4204 | adev->asic_type == CHIP_VEGAM) { | ||
| 4150 | tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); | 4205 | tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); |
| 4151 | tmp &= ~0x3; | 4206 | tmp &= ~0x3; |
| 4152 | WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); | 4207 | WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); |
| @@ -5498,7 +5553,8 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade | |||
| 5498 | bool enable) | 5553 | bool enable) |
| 5499 | { | 5554 | { |
| 5500 | if ((adev->asic_type == CHIP_POLARIS11) || | 5555 | if ((adev->asic_type == CHIP_POLARIS11) || |
| 5501 | (adev->asic_type == CHIP_POLARIS12)) | 5556 | (adev->asic_type == CHIP_POLARIS12) || |
| 5557 | (adev->asic_type == CHIP_VEGAM)) | ||
| 5502 | /* Send msg to SMU via Powerplay */ | 5558 | /* Send msg to SMU via Powerplay */ |
| 5503 | amdgpu_device_ip_set_powergating_state(adev, | 5559 | amdgpu_device_ip_set_powergating_state(adev, |
| 5504 | AMD_IP_BLOCK_TYPE_SMC, | 5560 | AMD_IP_BLOCK_TYPE_SMC, |
| @@ -5588,6 +5644,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, | |||
| 5588 | break; | 5644 | break; |
| 5589 | case CHIP_POLARIS11: | 5645 | case CHIP_POLARIS11: |
| 5590 | case CHIP_POLARIS12: | 5646 | case CHIP_POLARIS12: |
| 5647 | case CHIP_VEGAM: | ||
| 5591 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) | 5648 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) |
| 5592 | gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); | 5649 | gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); |
| 5593 | else | 5650 | else |
| @@ -6154,6 +6211,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle, | |||
| 6154 | case CHIP_POLARIS10: | 6211 | case CHIP_POLARIS10: |
| 6155 | case CHIP_POLARIS11: | 6212 | case CHIP_POLARIS11: |
| 6156 | case CHIP_POLARIS12: | 6213 | case CHIP_POLARIS12: |
| 6214 | case CHIP_VEGAM: | ||
| 6157 | gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); | 6215 | gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); |
| 6158 | break; | 6216 | break; |
| 6159 | default: | 6217 | default: |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9d39fd5b1822..d7530fdfaad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "amdgpu_gfx.h" | 27 | #include "amdgpu_gfx.h" |
| 28 | #include "soc15.h" | 28 | #include "soc15.h" |
| 29 | #include "soc15d.h" | 29 | #include "soc15d.h" |
| 30 | #include "amdgpu_atomfirmware.h" | ||
| 30 | 31 | ||
| 31 | #include "gc/gc_9_0_offset.h" | 32 | #include "gc/gc_9_0_offset.h" |
| 32 | #include "gc/gc_9_0_sh_mask.h" | 33 | #include "gc/gc_9_0_sh_mask.h" |
| @@ -41,7 +42,6 @@ | |||
| 41 | #define GFX9_MEC_HPD_SIZE 2048 | 42 | #define GFX9_MEC_HPD_SIZE 2048 |
| 42 | #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L | 43 | #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L |
| 43 | #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L | 44 | #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L |
| 44 | #define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 | ||
| 45 | 45 | ||
| 46 | #define mmPWR_MISC_CNTL_STATUS 0x0183 | 46 | #define mmPWR_MISC_CNTL_STATUS 0x0183 |
| 47 | #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 | 47 | #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 |
| @@ -64,6 +64,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); | |||
| 64 | MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); | 64 | MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); |
| 65 | MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); | 65 | MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); |
| 66 | 66 | ||
| 67 | MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); | ||
| 68 | MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); | ||
| 69 | MODULE_FIRMWARE("amdgpu/vega20_me.bin"); | ||
| 70 | MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); | ||
| 71 | MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); | ||
| 72 | MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); | ||
| 73 | |||
| 67 | MODULE_FIRMWARE("amdgpu/raven_ce.bin"); | 74 | MODULE_FIRMWARE("amdgpu/raven_ce.bin"); |
| 68 | MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); | 75 | MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); |
| 69 | MODULE_FIRMWARE("amdgpu/raven_me.bin"); | 76 | MODULE_FIRMWARE("amdgpu/raven_me.bin"); |
| @@ -73,29 +80,22 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); | |||
| 73 | 80 | ||
| 74 | static const struct soc15_reg_golden golden_settings_gc_9_0[] = | 81 | static const struct soc15_reg_golden golden_settings_gc_9_0[] = |
| 75 | { | 82 | { |
| 76 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), | ||
| 77 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), | ||
| 78 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), | ||
| 79 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), | 83 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), |
| 80 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), | 84 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), |
| 81 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), | ||
| 82 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), | 85 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), |
| 83 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), | 86 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), |
| 84 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), | 87 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), |
| 85 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), | ||
| 86 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), | ||
| 87 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), | ||
| 88 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), | ||
| 89 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), | ||
| 90 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), | 88 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), |
| 91 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), | 89 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), |
| 90 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), | ||
| 91 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), | ||
| 92 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), | ||
| 92 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), | 93 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), |
| 93 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), | 94 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), |
| 94 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), | 95 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), |
| 95 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), | 96 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), |
| 96 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), | 97 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), |
| 97 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), | 98 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) |
| 98 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) | ||
| 99 | }; | 99 | }; |
| 100 | 100 | ||
| 101 | static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = | 101 | static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = |
| @@ -109,6 +109,20 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = | |||
| 109 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) | 109 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) |
| 110 | }; | 110 | }; |
| 111 | 111 | ||
| 112 | static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = | ||
| 113 | { | ||
| 114 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), | ||
| 115 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), | ||
| 116 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), | ||
| 117 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), | ||
| 118 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), | ||
| 119 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), | ||
| 120 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), | ||
| 121 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), | ||
| 122 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), | ||
| 123 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) | ||
| 124 | }; | ||
| 125 | |||
| 112 | static const struct soc15_reg_golden golden_settings_gc_9_1[] = | 126 | static const struct soc15_reg_golden golden_settings_gc_9_1[] = |
| 113 | { | 127 | { |
| 114 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), | 128 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), |
| @@ -185,6 +199,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = | |||
| 185 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) | 199 | SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) |
| 186 | }; | 200 | }; |
| 187 | 201 | ||
| 202 | static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = | ||
| 203 | { | ||
| 204 | mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 205 | mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 206 | mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 207 | mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 208 | mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 209 | mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 210 | mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 211 | mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, | ||
| 212 | }; | ||
| 213 | |||
| 214 | static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = | ||
| 215 | { | ||
| 216 | mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 217 | mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 218 | mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 219 | mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 220 | mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 221 | mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 222 | mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 223 | mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, | ||
| 224 | }; | ||
| 225 | |||
| 188 | #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 | 226 | #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 |
| 189 | #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 | 227 | #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 |
| 190 | #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 | 228 | #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 |
| @@ -218,6 +256,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 218 | golden_settings_gc_9_2_1_vg12, | 256 | golden_settings_gc_9_2_1_vg12, |
| 219 | ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); | 257 | ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); |
| 220 | break; | 258 | break; |
| 259 | case CHIP_VEGA20: | ||
| 260 | soc15_program_register_sequence(adev, | ||
| 261 | golden_settings_gc_9_0, | ||
| 262 | ARRAY_SIZE(golden_settings_gc_9_0)); | ||
| 263 | soc15_program_register_sequence(adev, | ||
| 264 | golden_settings_gc_9_0_vg20, | ||
| 265 | ARRAY_SIZE(golden_settings_gc_9_0_vg20)); | ||
| 266 | break; | ||
| 221 | case CHIP_RAVEN: | 267 | case CHIP_RAVEN: |
| 222 | soc15_program_register_sequence(adev, | 268 | soc15_program_register_sequence(adev, |
| 223 | golden_settings_gc_9_1, | 269 | golden_settings_gc_9_1, |
| @@ -401,6 +447,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) | |||
| 401 | kfree(adev->gfx.rlc.register_list_format); | 447 | kfree(adev->gfx.rlc.register_list_format); |
| 402 | } | 448 | } |
| 403 | 449 | ||
| 450 | static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) | ||
| 451 | { | ||
| 452 | const struct rlc_firmware_header_v2_1 *rlc_hdr; | ||
| 453 | |||
| 454 | rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; | ||
| 455 | adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); | ||
| 456 | adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); | ||
| 457 | adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); | ||
| 458 | adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); | ||
| 459 | adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); | ||
| 460 | adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); | ||
| 461 | adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); | ||
| 462 | adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); | ||
| 463 | adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); | ||
| 464 | adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); | ||
| 465 | adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); | ||
| 466 | adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); | ||
| 467 | adev->gfx.rlc.reg_list_format_direct_reg_list_length = | ||
| 468 | le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); | ||
| 469 | } | ||
| 470 | |||
| 404 | static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | 471 | static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) |
| 405 | { | 472 | { |
| 406 | const char *chip_name; | 473 | const char *chip_name; |
| @@ -412,6 +479,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 412 | const struct rlc_firmware_header_v2_0 *rlc_hdr; | 479 | const struct rlc_firmware_header_v2_0 *rlc_hdr; |
| 413 | unsigned int *tmp = NULL; | 480 | unsigned int *tmp = NULL; |
| 414 | unsigned int i = 0; | 481 | unsigned int i = 0; |
| 482 | uint16_t version_major; | ||
| 483 | uint16_t version_minor; | ||
| 415 | 484 | ||
| 416 | DRM_DEBUG("\n"); | 485 | DRM_DEBUG("\n"); |
| 417 | 486 | ||
| @@ -422,6 +491,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 422 | case CHIP_VEGA12: | 491 | case CHIP_VEGA12: |
| 423 | chip_name = "vega12"; | 492 | chip_name = "vega12"; |
| 424 | break; | 493 | break; |
| 494 | case CHIP_VEGA20: | ||
| 495 | chip_name = "vega20"; | ||
| 496 | break; | ||
| 425 | case CHIP_RAVEN: | 497 | case CHIP_RAVEN: |
| 426 | chip_name = "raven"; | 498 | chip_name = "raven"; |
| 427 | break; | 499 | break; |
| @@ -468,6 +540,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 468 | goto out; | 540 | goto out; |
| 469 | err = amdgpu_ucode_validate(adev->gfx.rlc_fw); | 541 | err = amdgpu_ucode_validate(adev->gfx.rlc_fw); |
| 470 | rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; | 542 | rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; |
| 543 | |||
| 544 | version_major = le16_to_cpu(rlc_hdr->header.header_version_major); | ||
| 545 | version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); | ||
| 546 | if (version_major == 2 && version_minor == 1) | ||
| 547 | adev->gfx.rlc.is_rlc_v2_1 = true; | ||
| 548 | |||
| 471 | adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); | 549 | adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); |
| 472 | adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); | 550 | adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); |
| 473 | adev->gfx.rlc.save_and_restore_offset = | 551 | adev->gfx.rlc.save_and_restore_offset = |
| @@ -508,6 +586,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 508 | for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) | 586 | for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) |
| 509 | adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); | 587 | adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); |
| 510 | 588 | ||
| 589 | if (adev->gfx.rlc.is_rlc_v2_1) | ||
| 590 | gfx_v9_0_init_rlc_ext_microcode(adev); | ||
| 591 | |||
| 511 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); | 592 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); |
| 512 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); | 593 | err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); |
| 513 | if (err) | 594 | if (err) |
| @@ -566,6 +647,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) | |||
| 566 | adev->firmware.fw_size += | 647 | adev->firmware.fw_size += |
| 567 | ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); | 648 | ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); |
| 568 | 649 | ||
| 650 | if (adev->gfx.rlc.is_rlc_v2_1) { | ||
| 651 | info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; | ||
| 652 | info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; | ||
| 653 | info->fw = adev->gfx.rlc_fw; | ||
| 654 | adev->firmware.fw_size += | ||
| 655 | ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); | ||
| 656 | |||
| 657 | info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; | ||
| 658 | info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; | ||
| 659 | info->fw = adev->gfx.rlc_fw; | ||
| 660 | adev->firmware.fw_size += | ||
| 661 | ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); | ||
| 662 | |||
| 663 | info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; | ||
| 664 | info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; | ||
| 665 | info->fw = adev->gfx.rlc_fw; | ||
| 666 | adev->firmware.fw_size += | ||
| 667 | ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); | ||
| 668 | } | ||
| 669 | |||
| 569 | info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; | 670 | info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; |
| 570 | info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; | 671 | info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; |
| 571 | info->fw = adev->gfx.mec_fw; | 672 | info->fw = adev->gfx.mec_fw; |
| @@ -1013,9 +1114,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { | |||
| 1013 | .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q | 1114 | .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q |
| 1014 | }; | 1115 | }; |
| 1015 | 1116 | ||
| 1016 | static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) | 1117 | static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) |
| 1017 | { | 1118 | { |
| 1018 | u32 gb_addr_config; | 1119 | u32 gb_addr_config; |
| 1120 | int err; | ||
| 1019 | 1121 | ||
| 1020 | adev->gfx.funcs = &gfx_v9_0_gfx_funcs; | 1122 | adev->gfx.funcs = &gfx_v9_0_gfx_funcs; |
| 1021 | 1123 | ||
| @@ -1037,6 +1139,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 1037 | gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; | 1139 | gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; |
| 1038 | DRM_INFO("fix gfx.config for vega12\n"); | 1140 | DRM_INFO("fix gfx.config for vega12\n"); |
| 1039 | break; | 1141 | break; |
| 1142 | case CHIP_VEGA20: | ||
| 1143 | adev->gfx.config.max_hw_contexts = 8; | ||
| 1144 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | ||
| 1145 | adev->gfx.config.sc_prim_fifo_size_backend = 0x100; | ||
| 1146 | adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; | ||
| 1147 | adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; | ||
| 1148 | gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); | ||
| 1149 | gb_addr_config &= ~0xf3e777ff; | ||
| 1150 | gb_addr_config |= 0x22014042; | ||
| 1151 | /* check vbios table if gpu info is not available */ | ||
| 1152 | err = amdgpu_atomfirmware_get_gfx_info(adev); | ||
| 1153 | if (err) | ||
| 1154 | return err; | ||
| 1155 | break; | ||
| 1040 | case CHIP_RAVEN: | 1156 | case CHIP_RAVEN: |
| 1041 | adev->gfx.config.max_hw_contexts = 8; | 1157 | adev->gfx.config.max_hw_contexts = 8; |
| 1042 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; | 1158 | adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; |
| @@ -1086,6 +1202,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) | |||
| 1086 | adev->gfx.config.gb_addr_config, | 1202 | adev->gfx.config.gb_addr_config, |
| 1087 | GB_ADDR_CONFIG, | 1203 | GB_ADDR_CONFIG, |
| 1088 | PIPE_INTERLEAVE_SIZE)); | 1204 | PIPE_INTERLEAVE_SIZE)); |
| 1205 | |||
| 1206 | return 0; | ||
| 1089 | } | 1207 | } |
| 1090 | 1208 | ||
| 1091 | static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, | 1209 | static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, |
| @@ -1319,6 +1437,7 @@ static int gfx_v9_0_sw_init(void *handle) | |||
| 1319 | switch (adev->asic_type) { | 1437 | switch (adev->asic_type) { |
| 1320 | case CHIP_VEGA10: | 1438 | case CHIP_VEGA10: |
| 1321 | case CHIP_VEGA12: | 1439 | case CHIP_VEGA12: |
| 1440 | case CHIP_VEGA20: | ||
| 1322 | case CHIP_RAVEN: | 1441 | case CHIP_RAVEN: |
| 1323 | adev->gfx.mec.num_mec = 2; | 1442 | adev->gfx.mec.num_mec = 2; |
| 1324 | break; | 1443 | break; |
| @@ -1446,7 +1565,9 @@ static int gfx_v9_0_sw_init(void *handle) | |||
| 1446 | 1565 | ||
| 1447 | adev->gfx.ce_ram_size = 0x8000; | 1566 | adev->gfx.ce_ram_size = 0x8000; |
| 1448 | 1567 | ||
| 1449 | gfx_v9_0_gpu_early_init(adev); | 1568 | r = gfx_v9_0_gpu_early_init(adev); |
| 1569 | if (r) | ||
| 1570 | return r; | ||
| 1450 | 1571 | ||
| 1451 | r = gfx_v9_0_ngg_init(adev); | 1572 | r = gfx_v9_0_ngg_init(adev); |
| 1452 | if (r) | 1573 | if (r) |
| @@ -1600,6 +1721,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) | |||
| 1600 | 1721 | ||
| 1601 | gfx_v9_0_setup_rb(adev); | 1722 | gfx_v9_0_setup_rb(adev); |
| 1602 | gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); | 1723 | gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); |
| 1724 | adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); | ||
| 1603 | 1725 | ||
| 1604 | /* XXX SH_MEM regs */ | 1726 | /* XXX SH_MEM regs */ |
| 1605 | /* where to put LDS, scratch, GPUVM in FSA64 space */ | 1727 | /* where to put LDS, scratch, GPUVM in FSA64 space */ |
| @@ -1616,7 +1738,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) | |||
| 1616 | tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, | 1738 | tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, |
| 1617 | SH_MEM_ALIGNMENT_MODE_UNALIGNED); | 1739 | SH_MEM_ALIGNMENT_MODE_UNALIGNED); |
| 1618 | WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); | 1740 | WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); |
| 1619 | tmp = adev->gmc.shared_aperture_start >> 48; | 1741 | tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, |
| 1742 | (adev->gmc.private_aperture_start >> 48)); | ||
| 1743 | tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, | ||
| 1744 | (adev->gmc.shared_aperture_start >> 48)); | ||
| 1620 | WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); | 1745 | WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); |
| 1621 | } | 1746 | } |
| 1622 | } | 1747 | } |
| @@ -1708,55 +1833,42 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev) | |||
| 1708 | adev->gfx.rlc.clear_state_size); | 1833 | adev->gfx.rlc.clear_state_size); |
| 1709 | } | 1834 | } |
| 1710 | 1835 | ||
| 1711 | static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, | 1836 | static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, |
| 1712 | int indirect_offset, | 1837 | int indirect_offset, |
| 1713 | int list_size, | 1838 | int list_size, |
| 1714 | int *unique_indirect_regs, | 1839 | int *unique_indirect_regs, |
| 1715 | int *unique_indirect_reg_count, | 1840 | int *unique_indirect_reg_count, |
| 1716 | int max_indirect_reg_count, | ||
| 1717 | int *indirect_start_offsets, | 1841 | int *indirect_start_offsets, |
| 1718 | int *indirect_start_offsets_count, | 1842 | int *indirect_start_offsets_count) |
| 1719 | int max_indirect_start_offsets_count) | ||
| 1720 | { | 1843 | { |
| 1721 | int idx; | 1844 | int idx; |
| 1722 | bool new_entry = true; | ||
| 1723 | 1845 | ||
| 1724 | for (; indirect_offset < list_size; indirect_offset++) { | 1846 | for (; indirect_offset < list_size; indirect_offset++) { |
| 1847 | indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; | ||
| 1848 | *indirect_start_offsets_count = *indirect_start_offsets_count + 1; | ||
| 1725 | 1849 | ||
| 1726 | if (new_entry) { | 1850 | while (register_list_format[indirect_offset] != 0xFFFFFFFF) { |
| 1727 | new_entry = false; | 1851 | indirect_offset += 2; |
| 1728 | indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; | ||
| 1729 | *indirect_start_offsets_count = *indirect_start_offsets_count + 1; | ||
| 1730 | BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); | ||
| 1731 | } | ||
| 1732 | 1852 | ||
| 1733 | if (register_list_format[indirect_offset] == 0xFFFFFFFF) { | 1853 | /* look for the matching indice */ |
| 1734 | new_entry = true; | 1854 | for (idx = 0; idx < *unique_indirect_reg_count; idx++) { |
| 1735 | continue; | 1855 | if (unique_indirect_regs[idx] == |
| 1736 | } | 1856 | register_list_format[indirect_offset] || |
| 1857 | !unique_indirect_regs[idx]) | ||
| 1858 | break; | ||
| 1859 | } | ||
| 1737 | 1860 | ||
| 1738 | indirect_offset += 2; | 1861 | BUG_ON(idx >= *unique_indirect_reg_count); |
| 1739 | 1862 | ||
| 1740 | /* look for the matching indice */ | 1863 | if (!unique_indirect_regs[idx]) |
| 1741 | for (idx = 0; idx < *unique_indirect_reg_count; idx++) { | 1864 | unique_indirect_regs[idx] = register_list_format[indirect_offset]; |
| 1742 | if (unique_indirect_regs[idx] == | ||
| 1743 | register_list_format[indirect_offset]) | ||
| 1744 | break; | ||
| 1745 | } | ||
| 1746 | 1865 | ||
| 1747 | if (idx >= *unique_indirect_reg_count) { | 1866 | indirect_offset++; |
| 1748 | unique_indirect_regs[*unique_indirect_reg_count] = | ||
| 1749 | register_list_format[indirect_offset]; | ||
| 1750 | idx = *unique_indirect_reg_count; | ||
| 1751 | *unique_indirect_reg_count = *unique_indirect_reg_count + 1; | ||
| 1752 | BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); | ||
| 1753 | } | 1867 | } |
| 1754 | |||
| 1755 | register_list_format[indirect_offset] = idx; | ||
| 1756 | } | 1868 | } |
| 1757 | } | 1869 | } |
| 1758 | 1870 | ||
| 1759 | static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | 1871 | static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) |
| 1760 | { | 1872 | { |
| 1761 | int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; | 1873 | int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; |
| 1762 | int unique_indirect_reg_count = 0; | 1874 | int unique_indirect_reg_count = 0; |
| @@ -1765,7 +1877,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | |||
| 1765 | int indirect_start_offsets_count = 0; | 1877 | int indirect_start_offsets_count = 0; |
| 1766 | 1878 | ||
| 1767 | int list_size = 0; | 1879 | int list_size = 0; |
| 1768 | int i = 0; | 1880 | int i = 0, j = 0; |
| 1769 | u32 tmp = 0; | 1881 | u32 tmp = 0; |
| 1770 | 1882 | ||
| 1771 | u32 *register_list_format = | 1883 | u32 *register_list_format = |
| @@ -1776,15 +1888,14 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | |||
| 1776 | adev->gfx.rlc.reg_list_format_size_bytes); | 1888 | adev->gfx.rlc.reg_list_format_size_bytes); |
| 1777 | 1889 | ||
| 1778 | /* setup unique_indirect_regs array and indirect_start_offsets array */ | 1890 | /* setup unique_indirect_regs array and indirect_start_offsets array */ |
| 1779 | gfx_v9_0_parse_ind_reg_list(register_list_format, | 1891 | unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); |
| 1780 | GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, | 1892 | gfx_v9_1_parse_ind_reg_list(register_list_format, |
| 1781 | adev->gfx.rlc.reg_list_format_size_bytes >> 2, | 1893 | adev->gfx.rlc.reg_list_format_direct_reg_list_length, |
| 1782 | unique_indirect_regs, | 1894 | adev->gfx.rlc.reg_list_format_size_bytes >> 2, |
| 1783 | &unique_indirect_reg_count, | 1895 | unique_indirect_regs, |
| 1784 | ARRAY_SIZE(unique_indirect_regs), | 1896 | &unique_indirect_reg_count, |
| 1785 | indirect_start_offsets, | 1897 | indirect_start_offsets, |
| 1786 | &indirect_start_offsets_count, | 1898 | &indirect_start_offsets_count); |
| 1787 | ARRAY_SIZE(indirect_start_offsets)); | ||
| 1788 | 1899 | ||
| 1789 | /* enable auto inc in case it is disabled */ | 1900 | /* enable auto inc in case it is disabled */ |
| 1790 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); | 1901 | tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); |
| @@ -1798,19 +1909,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | |||
| 1798 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), | 1909 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), |
| 1799 | adev->gfx.rlc.register_restore[i]); | 1910 | adev->gfx.rlc.register_restore[i]); |
| 1800 | 1911 | ||
| 1801 | /* load direct register */ | ||
| 1802 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); | ||
| 1803 | for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) | ||
| 1804 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), | ||
| 1805 | adev->gfx.rlc.register_restore[i]); | ||
| 1806 | |||
| 1807 | /* load indirect register */ | 1912 | /* load indirect register */ |
| 1808 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), | 1913 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), |
| 1809 | adev->gfx.rlc.reg_list_format_start); | 1914 | adev->gfx.rlc.reg_list_format_start); |
| 1810 | for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) | 1915 | |
| 1916 | /* direct register portion */ | ||
| 1917 | for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) | ||
| 1811 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), | 1918 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), |
| 1812 | register_list_format[i]); | 1919 | register_list_format[i]); |
| 1813 | 1920 | ||
| 1921 | /* indirect register portion */ | ||
| 1922 | while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { | ||
| 1923 | if (register_list_format[i] == 0xFFFFFFFF) { | ||
| 1924 | WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); | ||
| 1925 | continue; | ||
| 1926 | } | ||
| 1927 | |||
| 1928 | WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); | ||
| 1929 | WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); | ||
| 1930 | |||
| 1931 | for (j = 0; j < unique_indirect_reg_count; j++) { | ||
| 1932 | if (register_list_format[i] == unique_indirect_regs[j]) { | ||
| 1933 | WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); | ||
| 1934 | break; | ||
| 1935 | } | ||
| 1936 | } | ||
| 1937 | |||
| 1938 | BUG_ON(j >= unique_indirect_reg_count); | ||
| 1939 | |||
| 1940 | i++; | ||
| 1941 | } | ||
| 1942 | |||
| 1814 | /* set save/restore list size */ | 1943 | /* set save/restore list size */ |
| 1815 | list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; | 1944 | list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; |
| 1816 | list_size = list_size >> 1; | 1945 | list_size = list_size >> 1; |
| @@ -1823,14 +1952,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) | |||
| 1823 | adev->gfx.rlc.starting_offsets_start); | 1952 | adev->gfx.rlc.starting_offsets_start); |
| 1824 | for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) | 1953 | for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) |
| 1825 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), | 1954 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), |
| 1826 | indirect_start_offsets[i]); | 1955 | indirect_start_offsets[i]); |
| 1827 | 1956 | ||
| 1828 | /* load unique indirect regs*/ | 1957 | /* load unique indirect regs*/ |
| 1829 | for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { | 1958 | for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { |
| 1830 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, | 1959 | if (unique_indirect_regs[i] != 0) { |
| 1831 | unique_indirect_regs[i] & 0x3FFFF); | 1960 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) |
| 1832 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, | 1961 | + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], |
| 1833 | unique_indirect_regs[i] >> 20); | 1962 | unique_indirect_regs[i] & 0x3FFFF); |
| 1963 | |||
| 1964 | WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) | ||
| 1965 | + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], | ||
| 1966 | unique_indirect_regs[i] >> 20); | ||
| 1967 | } | ||
| 1834 | } | 1968 | } |
| 1835 | 1969 | ||
| 1836 | kfree(register_list_format); | 1970 | kfree(register_list_format); |
| @@ -2010,6 +2144,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad | |||
| 2010 | 2144 | ||
| 2011 | static void gfx_v9_0_init_pg(struct amdgpu_device *adev) | 2145 | static void gfx_v9_0_init_pg(struct amdgpu_device *adev) |
| 2012 | { | 2146 | { |
| 2147 | if (!adev->gfx.rlc.is_rlc_v2_1) | ||
| 2148 | return; | ||
| 2149 | |||
| 2013 | if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | | 2150 | if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | |
| 2014 | AMD_PG_SUPPORT_GFX_SMG | | 2151 | AMD_PG_SUPPORT_GFX_SMG | |
| 2015 | AMD_PG_SUPPORT_GFX_DMG | | 2152 | AMD_PG_SUPPORT_GFX_DMG | |
| @@ -2017,27 +2154,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) | |||
| 2017 | AMD_PG_SUPPORT_GDS | | 2154 | AMD_PG_SUPPORT_GDS | |
| 2018 | AMD_PG_SUPPORT_RLC_SMU_HS)) { | 2155 | AMD_PG_SUPPORT_RLC_SMU_HS)) { |
| 2019 | gfx_v9_0_init_csb(adev); | 2156 | gfx_v9_0_init_csb(adev); |
| 2020 | gfx_v9_0_init_rlc_save_restore_list(adev); | 2157 | gfx_v9_1_init_rlc_save_restore_list(adev); |
| 2021 | gfx_v9_0_enable_save_restore_machine(adev); | 2158 | gfx_v9_0_enable_save_restore_machine(adev); |
| 2022 | 2159 | ||
| 2023 | if (adev->asic_type == CHIP_RAVEN) { | 2160 | WREG32(mmRLC_JUMP_TABLE_RESTORE, |
| 2024 | WREG32(mmRLC_JUMP_TABLE_RESTORE, | 2161 | adev->gfx.rlc.cp_table_gpu_addr >> 8); |
| 2025 | adev->gfx.rlc.cp_table_gpu_addr >> 8); | 2162 | gfx_v9_0_init_gfx_power_gating(adev); |
| 2026 | gfx_v9_0_init_gfx_power_gating(adev); | ||
| 2027 | |||
| 2028 | if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { | ||
| 2029 | gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); | ||
| 2030 | gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); | ||
| 2031 | } else { | ||
| 2032 | gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); | ||
| 2033 | gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | if (adev->pg_flags & AMD_PG_SUPPORT_CP) | ||
| 2037 | gfx_v9_0_enable_cp_power_gating(adev, true); | ||
| 2038 | else | ||
| 2039 | gfx_v9_0_enable_cp_power_gating(adev, false); | ||
| 2040 | } | ||
| 2041 | } | 2163 | } |
| 2042 | } | 2164 | } |
| 2043 | 2165 | ||
| @@ -3061,6 +3183,9 @@ static int gfx_v9_0_hw_fini(void *handle) | |||
| 3061 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 3183 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 3062 | int i; | 3184 | int i; |
| 3063 | 3185 | ||
| 3186 | amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, | ||
| 3187 | AMD_PG_STATE_UNGATE); | ||
| 3188 | |||
| 3064 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); | 3189 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); |
| 3065 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); | 3190 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); |
| 3066 | 3191 | ||
| @@ -3279,6 +3404,11 @@ static int gfx_v9_0_late_init(void *handle) | |||
| 3279 | if (r) | 3404 | if (r) |
| 3280 | return r; | 3405 | return r; |
| 3281 | 3406 | ||
| 3407 | r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, | ||
| 3408 | AMD_PG_STATE_GATE); | ||
| 3409 | if (r) | ||
| 3410 | return r; | ||
| 3411 | |||
| 3282 | return 0; | 3412 | return 0; |
| 3283 | } | 3413 | } |
| 3284 | 3414 | ||
| @@ -3339,8 +3469,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) | |||
| 3339 | static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, | 3469 | static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, |
| 3340 | bool enable) | 3470 | bool enable) |
| 3341 | { | 3471 | { |
| 3342 | /* TODO: double check if we need to perform under safe mdoe */ | 3472 | gfx_v9_0_enter_rlc_safe_mode(adev); |
| 3343 | /* gfx_v9_0_enter_rlc_safe_mode(adev); */ | ||
| 3344 | 3473 | ||
| 3345 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { | 3474 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { |
| 3346 | gfx_v9_0_enable_gfx_cg_power_gating(adev, true); | 3475 | gfx_v9_0_enable_gfx_cg_power_gating(adev, true); |
| @@ -3351,7 +3480,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, | |||
| 3351 | gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); | 3480 | gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); |
| 3352 | } | 3481 | } |
| 3353 | 3482 | ||
| 3354 | /* gfx_v9_0_exit_rlc_safe_mode(adev); */ | 3483 | gfx_v9_0_exit_rlc_safe_mode(adev); |
| 3355 | } | 3484 | } |
| 3356 | 3485 | ||
| 3357 | static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, | 3486 | static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, |
| @@ -3605,6 +3734,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, | |||
| 3605 | switch (adev->asic_type) { | 3734 | switch (adev->asic_type) { |
| 3606 | case CHIP_VEGA10: | 3735 | case CHIP_VEGA10: |
| 3607 | case CHIP_VEGA12: | 3736 | case CHIP_VEGA12: |
| 3737 | case CHIP_VEGA20: | ||
| 3608 | case CHIP_RAVEN: | 3738 | case CHIP_RAVEN: |
| 3609 | gfx_v9_0_update_gfx_clock_gating(adev, | 3739 | gfx_v9_0_update_gfx_clock_gating(adev, |
| 3610 | state == AMD_CG_STATE_GATE ? true : false); | 3740 | state == AMD_CG_STATE_GATE ? true : false); |
| @@ -3742,7 +3872,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | |||
| 3742 | } | 3872 | } |
| 3743 | 3873 | ||
| 3744 | amdgpu_ring_write(ring, header); | 3874 | amdgpu_ring_write(ring, header); |
| 3745 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ | 3875 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ |
| 3746 | amdgpu_ring_write(ring, | 3876 | amdgpu_ring_write(ring, |
| 3747 | #ifdef __BIG_ENDIAN | 3877 | #ifdef __BIG_ENDIAN |
| 3748 | (2 << 0) | | 3878 | (2 << 0) | |
| @@ -3774,13 +3904,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, | |||
| 3774 | { | 3904 | { |
| 3775 | bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; | 3905 | bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; |
| 3776 | bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; | 3906 | bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; |
| 3907 | bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; | ||
| 3777 | 3908 | ||
| 3778 | /* RELEASE_MEM - flush caches, send int */ | 3909 | /* RELEASE_MEM - flush caches, send int */ |
| 3779 | amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); | 3910 | amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); |
| 3780 | amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | | 3911 | amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | |
| 3781 | EOP_TC_ACTION_EN | | 3912 | EOP_TC_NC_ACTION_EN) : |
| 3782 | EOP_TC_WB_ACTION_EN | | 3913 | (EOP_TCL1_ACTION_EN | |
| 3783 | EOP_TC_MD_ACTION_EN | | 3914 | EOP_TC_ACTION_EN | |
| 3915 | EOP_TC_WB_ACTION_EN | | ||
| 3916 | EOP_TC_MD_ACTION_EN)) | | ||
| 3784 | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | | 3917 | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | |
| 3785 | EVENT_INDEX(5))); | 3918 | EVENT_INDEX(5))); |
| 3786 | amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); | 3919 | amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); |
| @@ -4137,6 +4270,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, | |||
| 4137 | gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); | 4270 | gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); |
| 4138 | } | 4271 | } |
| 4139 | 4272 | ||
| 4273 | static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, | ||
| 4274 | uint32_t reg0, uint32_t reg1, | ||
| 4275 | uint32_t ref, uint32_t mask) | ||
| 4276 | { | ||
| 4277 | int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); | ||
| 4278 | |||
| 4279 | if (amdgpu_sriov_vf(ring->adev)) | ||
| 4280 | gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, | ||
| 4281 | ref, mask, 0x20); | ||
| 4282 | else | ||
| 4283 | amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, | ||
| 4284 | ref, mask); | ||
| 4285 | } | ||
| 4286 | |||
| 4140 | static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, | 4287 | static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, |
| 4141 | enum amdgpu_interrupt_state state) | 4288 | enum amdgpu_interrupt_state state) |
| 4142 | { | 4289 | { |
| @@ -4458,6 +4605,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { | |||
| 4458 | .emit_tmz = gfx_v9_0_ring_emit_tmz, | 4605 | .emit_tmz = gfx_v9_0_ring_emit_tmz, |
| 4459 | .emit_wreg = gfx_v9_0_ring_emit_wreg, | 4606 | .emit_wreg = gfx_v9_0_ring_emit_wreg, |
| 4460 | .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, | 4607 | .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, |
| 4608 | .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, | ||
| 4461 | }; | 4609 | }; |
| 4462 | 4610 | ||
| 4463 | static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { | 4611 | static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { |
| @@ -4492,6 +4640,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { | |||
| 4492 | .set_priority = gfx_v9_0_ring_set_priority_compute, | 4640 | .set_priority = gfx_v9_0_ring_set_priority_compute, |
| 4493 | .emit_wreg = gfx_v9_0_ring_emit_wreg, | 4641 | .emit_wreg = gfx_v9_0_ring_emit_wreg, |
| 4494 | .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, | 4642 | .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, |
| 4643 | .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, | ||
| 4495 | }; | 4644 | }; |
| 4496 | 4645 | ||
| 4497 | static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { | 4646 | static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { |
| @@ -4522,6 +4671,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { | |||
| 4522 | .emit_rreg = gfx_v9_0_ring_emit_rreg, | 4671 | .emit_rreg = gfx_v9_0_ring_emit_rreg, |
| 4523 | .emit_wreg = gfx_v9_0_ring_emit_wreg, | 4672 | .emit_wreg = gfx_v9_0_ring_emit_wreg, |
| 4524 | .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, | 4673 | .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, |
| 4674 | .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, | ||
| 4525 | }; | 4675 | }; |
| 4526 | 4676 | ||
| 4527 | static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) | 4677 | static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) |
| @@ -4577,6 +4727,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) | |||
| 4577 | switch (adev->asic_type) { | 4727 | switch (adev->asic_type) { |
| 4578 | case CHIP_VEGA10: | 4728 | case CHIP_VEGA10: |
| 4579 | case CHIP_VEGA12: | 4729 | case CHIP_VEGA12: |
| 4730 | case CHIP_VEGA20: | ||
| 4580 | case CHIP_RAVEN: | 4731 | case CHIP_RAVEN: |
| 4581 | adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; | 4732 | adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; |
| 4582 | break; | 4733 | break; |
| @@ -4686,6 +4837,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, | |||
| 4686 | 4837 | ||
| 4687 | cu_info->number = active_cu_number; | 4838 | cu_info->number = active_cu_number; |
| 4688 | cu_info->ao_cu_mask = ao_cu_mask; | 4839 | cu_info->ao_cu_mask = ao_cu_mask; |
| 4840 | cu_info->simd_per_cu = NUM_SIMD_PER_CU; | ||
| 4689 | 4841 | ||
| 4690 | return 0; | 4842 | return 0; |
| 4691 | } | 4843 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 5617cf62c566..79f9ac29019b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | |||
| @@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle) | |||
| 819 | { | 819 | { |
| 820 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 820 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 821 | 821 | ||
| 822 | amdgpu_bo_late_init(adev); | ||
| 823 | |||
| 822 | if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) | 824 | if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) |
| 823 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); | 825 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); |
| 824 | else | 826 | else |
| 825 | return 0; | 827 | return 0; |
| 826 | } | 828 | } |
| 827 | 829 | ||
| 830 | static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) | ||
| 831 | { | ||
| 832 | u32 d1vga_control = RREG32(mmD1VGA_CONTROL); | ||
| 833 | unsigned size; | ||
| 834 | |||
| 835 | if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { | ||
| 836 | size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ | ||
| 837 | } else { | ||
| 838 | u32 viewport = RREG32(mmVIEWPORT_SIZE); | ||
| 839 | size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * | ||
| 840 | REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * | ||
| 841 | 4); | ||
| 842 | } | ||
| 843 | /* return 0 if the pre-OS buffer uses up most of vram */ | ||
| 844 | if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) | ||
| 845 | return 0; | ||
| 846 | return size; | ||
| 847 | } | ||
| 848 | |||
| 828 | static int gmc_v6_0_sw_init(void *handle) | 849 | static int gmc_v6_0_sw_init(void *handle) |
| 829 | { | 850 | { |
| 830 | int r; | 851 | int r; |
| @@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle) | |||
| 851 | 872 | ||
| 852 | adev->gmc.mc_mask = 0xffffffffffULL; | 873 | adev->gmc.mc_mask = 0xffffffffffULL; |
| 853 | 874 | ||
| 854 | adev->gmc.stolen_size = 256 * 1024; | ||
| 855 | |||
| 856 | adev->need_dma32 = false; | 875 | adev->need_dma32 = false; |
| 857 | dma_bits = adev->need_dma32 ? 32 : 40; | 876 | dma_bits = adev->need_dma32 ? 32 : 40; |
| 858 | r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); | 877 | r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); |
| @@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle) | |||
| 878 | if (r) | 897 | if (r) |
| 879 | return r; | 898 | return r; |
| 880 | 899 | ||
| 900 | adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev); | ||
| 901 | |||
| 881 | r = amdgpu_bo_init(adev); | 902 | r = amdgpu_bo_init(adev); |
| 882 | if (r) | 903 | if (r) |
| 883 | return r; | 904 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80054f36e487..7147bfe25a23 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | |||
| @@ -958,12 +958,33 @@ static int gmc_v7_0_late_init(void *handle) | |||
| 958 | { | 958 | { |
| 959 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 959 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 960 | 960 | ||
| 961 | amdgpu_bo_late_init(adev); | ||
| 962 | |||
| 961 | if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) | 963 | if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) |
| 962 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); | 964 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); |
| 963 | else | 965 | else |
| 964 | return 0; | 966 | return 0; |
| 965 | } | 967 | } |
| 966 | 968 | ||
| 969 | static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) | ||
| 970 | { | ||
| 971 | u32 d1vga_control = RREG32(mmD1VGA_CONTROL); | ||
| 972 | unsigned size; | ||
| 973 | |||
| 974 | if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { | ||
| 975 | size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ | ||
| 976 | } else { | ||
| 977 | u32 viewport = RREG32(mmVIEWPORT_SIZE); | ||
| 978 | size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * | ||
| 979 | REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * | ||
| 980 | 4); | ||
| 981 | } | ||
| 982 | /* return 0 if the pre-OS buffer uses up most of vram */ | ||
| 983 | if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) | ||
| 984 | return 0; | ||
| 985 | return size; | ||
| 986 | } | ||
| 987 | |||
| 967 | static int gmc_v7_0_sw_init(void *handle) | 988 | static int gmc_v7_0_sw_init(void *handle) |
| 968 | { | 989 | { |
| 969 | int r; | 990 | int r; |
| @@ -998,8 +1019,6 @@ static int gmc_v7_0_sw_init(void *handle) | |||
| 998 | */ | 1019 | */ |
| 999 | adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ | 1020 | adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ |
| 1000 | 1021 | ||
| 1001 | adev->gmc.stolen_size = 256 * 1024; | ||
| 1002 | |||
| 1003 | /* set DMA mask + need_dma32 flags. | 1022 | /* set DMA mask + need_dma32 flags. |
| 1004 | * PCIE - can handle 40-bits. | 1023 | * PCIE - can handle 40-bits. |
| 1005 | * IGP - can handle 40-bits | 1024 | * IGP - can handle 40-bits |
| @@ -1030,6 +1049,8 @@ static int gmc_v7_0_sw_init(void *handle) | |||
| 1030 | if (r) | 1049 | if (r) |
| 1031 | return r; | 1050 | return r; |
| 1032 | 1051 | ||
| 1052 | adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev); | ||
| 1053 | |||
| 1033 | /* Memory manager */ | 1054 | /* Memory manager */ |
| 1034 | r = amdgpu_bo_init(adev); | 1055 | r = amdgpu_bo_init(adev); |
| 1035 | if (r) | 1056 | if (r) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d71d4cb68f9c..1edbe6b477b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | |||
| @@ -138,6 +138,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 138 | break; | 138 | break; |
| 139 | case CHIP_POLARIS11: | 139 | case CHIP_POLARIS11: |
| 140 | case CHIP_POLARIS12: | 140 | case CHIP_POLARIS12: |
| 141 | case CHIP_VEGAM: | ||
| 141 | amdgpu_device_program_register_sequence(adev, | 142 | amdgpu_device_program_register_sequence(adev, |
| 142 | golden_settings_polaris11_a11, | 143 | golden_settings_polaris11_a11, |
| 143 | ARRAY_SIZE(golden_settings_polaris11_a11)); | 144 | ARRAY_SIZE(golden_settings_polaris11_a11)); |
| @@ -231,6 +232,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) | |||
| 231 | case CHIP_FIJI: | 232 | case CHIP_FIJI: |
| 232 | case CHIP_CARRIZO: | 233 | case CHIP_CARRIZO: |
| 233 | case CHIP_STONEY: | 234 | case CHIP_STONEY: |
| 235 | case CHIP_VEGAM: | ||
| 234 | return 0; | 236 | return 0; |
| 235 | default: BUG(); | 237 | default: BUG(); |
| 236 | } | 238 | } |
| @@ -567,9 +569,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) | |||
| 567 | /* set the gart size */ | 569 | /* set the gart size */ |
| 568 | if (amdgpu_gart_size == -1) { | 570 | if (amdgpu_gart_size == -1) { |
| 569 | switch (adev->asic_type) { | 571 | switch (adev->asic_type) { |
| 570 | case CHIP_POLARIS11: /* all engines support GPUVM */ | ||
| 571 | case CHIP_POLARIS10: /* all engines support GPUVM */ | 572 | case CHIP_POLARIS10: /* all engines support GPUVM */ |
| 573 | case CHIP_POLARIS11: /* all engines support GPUVM */ | ||
| 572 | case CHIP_POLARIS12: /* all engines support GPUVM */ | 574 | case CHIP_POLARIS12: /* all engines support GPUVM */ |
| 575 | case CHIP_VEGAM: /* all engines support GPUVM */ | ||
| 573 | default: | 576 | default: |
| 574 | adev->gmc.gart_size = 256ULL << 20; | 577 | adev->gmc.gart_size = 256ULL << 20; |
| 575 | break; | 578 | break; |
| @@ -1049,12 +1052,33 @@ static int gmc_v8_0_late_init(void *handle) | |||
| 1049 | { | 1052 | { |
| 1050 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1053 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1051 | 1054 | ||
| 1055 | amdgpu_bo_late_init(adev); | ||
| 1056 | |||
| 1052 | if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) | 1057 | if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) |
| 1053 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); | 1058 | return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); |
| 1054 | else | 1059 | else |
| 1055 | return 0; | 1060 | return 0; |
| 1056 | } | 1061 | } |
| 1057 | 1062 | ||
| 1063 | static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) | ||
| 1064 | { | ||
| 1065 | u32 d1vga_control = RREG32(mmD1VGA_CONTROL); | ||
| 1066 | unsigned size; | ||
| 1067 | |||
| 1068 | if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { | ||
| 1069 | size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ | ||
| 1070 | } else { | ||
| 1071 | u32 viewport = RREG32(mmVIEWPORT_SIZE); | ||
| 1072 | size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * | ||
| 1073 | REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * | ||
| 1074 | 4); | ||
| 1075 | } | ||
| 1076 | /* return 0 if the pre-OS buffer uses up most of vram */ | ||
| 1077 | if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) | ||
| 1078 | return 0; | ||
| 1079 | return size; | ||
| 1080 | } | ||
| 1081 | |||
| 1058 | #define mmMC_SEQ_MISC0_FIJI 0xA71 | 1082 | #define mmMC_SEQ_MISC0_FIJI 0xA71 |
| 1059 | 1083 | ||
| 1060 | static int gmc_v8_0_sw_init(void *handle) | 1084 | static int gmc_v8_0_sw_init(void *handle) |
| @@ -1068,7 +1092,8 @@ static int gmc_v8_0_sw_init(void *handle) | |||
| 1068 | } else { | 1092 | } else { |
| 1069 | u32 tmp; | 1093 | u32 tmp; |
| 1070 | 1094 | ||
| 1071 | if (adev->asic_type == CHIP_FIJI) | 1095 | if ((adev->asic_type == CHIP_FIJI) || |
| 1096 | (adev->asic_type == CHIP_VEGAM)) | ||
| 1072 | tmp = RREG32(mmMC_SEQ_MISC0_FIJI); | 1097 | tmp = RREG32(mmMC_SEQ_MISC0_FIJI); |
| 1073 | else | 1098 | else |
| 1074 | tmp = RREG32(mmMC_SEQ_MISC0); | 1099 | tmp = RREG32(mmMC_SEQ_MISC0); |
| @@ -1096,8 +1121,6 @@ static int gmc_v8_0_sw_init(void *handle) | |||
| 1096 | */ | 1121 | */ |
| 1097 | adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ | 1122 | adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ |
| 1098 | 1123 | ||
| 1099 | adev->gmc.stolen_size = 256 * 1024; | ||
| 1100 | |||
| 1101 | /* set DMA mask + need_dma32 flags. | 1124 | /* set DMA mask + need_dma32 flags. |
| 1102 | * PCIE - can handle 40-bits. | 1125 | * PCIE - can handle 40-bits. |
| 1103 | * IGP - can handle 40-bits | 1126 | * IGP - can handle 40-bits |
| @@ -1128,6 +1151,8 @@ static int gmc_v8_0_sw_init(void *handle) | |||
| 1128 | if (r) | 1151 | if (r) |
| 1129 | return r; | 1152 | return r; |
| 1130 | 1153 | ||
| 1154 | adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev); | ||
| 1155 | |||
| 1131 | /* Memory manager */ | 1156 | /* Memory manager */ |
| 1132 | r = amdgpu_bo_init(adev); | 1157 | r = amdgpu_bo_init(adev); |
| 1133 | if (r) | 1158 | if (r) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e687363900bb..3c0a85d4e4ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |||
| @@ -43,19 +43,13 @@ | |||
| 43 | #include "gfxhub_v1_0.h" | 43 | #include "gfxhub_v1_0.h" |
| 44 | #include "mmhub_v1_0.h" | 44 | #include "mmhub_v1_0.h" |
| 45 | 45 | ||
| 46 | #define mmDF_CS_AON0_DramBaseAddress0 0x0044 | 46 | /* add these here since we already include dce12 headers and these are for DCN */ |
| 47 | #define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 | 47 | #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d |
| 48 | //DF_CS_AON0_DramBaseAddress0 | 48 | #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 |
| 49 | #define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 | 49 | #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0 |
| 50 | #define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 | 50 | #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10 |
| 51 | #define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 | 51 | #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL |
| 52 | #define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 | 52 | #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L |
| 53 | #define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc | ||
| 54 | #define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L | ||
| 55 | #define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L | ||
| 56 | #define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L | ||
| 57 | #define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L | ||
| 58 | #define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L | ||
| 59 | 53 | ||
| 60 | /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ | 54 | /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ |
| 61 | #define AMDGPU_NUM_OF_VMIDS 8 | 55 | #define AMDGPU_NUM_OF_VMIDS 8 |
| @@ -385,11 +379,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, | |||
| 385 | amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), | 379 | amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), |
| 386 | upper_32_bits(pd_addr)); | 380 | upper_32_bits(pd_addr)); |
| 387 | 381 | ||
| 388 | amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); | 382 | amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, |
| 389 | 383 | hub->vm_inv_eng0_ack + eng, | |
| 390 | /* wait for the invalidate to complete */ | 384 | req, 1 << vmid); |
| 391 | amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, | ||
| 392 | 1 << vmid, 1 << vmid); | ||
| 393 | 385 | ||
| 394 | return pd_addr; | 386 | return pd_addr; |
| 395 | } | 387 | } |
| @@ -556,8 +548,7 @@ static int gmc_v9_0_early_init(void *handle) | |||
| 556 | adev->gmc.shared_aperture_start = 0x2000000000000000ULL; | 548 | adev->gmc.shared_aperture_start = 0x2000000000000000ULL; |
| 557 | adev->gmc.shared_aperture_end = | 549 | adev->gmc.shared_aperture_end = |
| 558 | adev->gmc.shared_aperture_start + (4ULL << 30) - 1; | 550 | adev->gmc.shared_aperture_start + (4ULL << 30) - 1; |
| 559 | adev->gmc.private_aperture_start = | 551 | adev->gmc.private_aperture_start = 0x1000000000000000ULL; |
| 560 | adev->gmc.shared_aperture_end + 1; | ||
| 561 | adev->gmc.private_aperture_end = | 552 | adev->gmc.private_aperture_end = |
| 562 | adev->gmc.private_aperture_start + (4ULL << 30) - 1; | 553 | adev->gmc.private_aperture_start + (4ULL << 30) - 1; |
| 563 | 554 | ||
| @@ -659,6 +650,11 @@ static int gmc_v9_0_late_init(void *handle) | |||
| 659 | unsigned i; | 650 | unsigned i; |
| 660 | int r; | 651 | int r; |
| 661 | 652 | ||
| 653 | /* | ||
| 654 | * TODO - Uncomment once GART corruption issue is fixed. | ||
| 655 | */ | ||
| 656 | /* amdgpu_bo_late_init(adev); */ | ||
| 657 | |||
| 662 | for(i = 0; i < adev->num_rings; ++i) { | 658 | for(i = 0; i < adev->num_rings; ++i) { |
| 663 | struct amdgpu_ring *ring = adev->rings[i]; | 659 | struct amdgpu_ring *ring = adev->rings[i]; |
| 664 | unsigned vmhub = ring->funcs->vmhub; | 660 | unsigned vmhub = ring->funcs->vmhub; |
| @@ -679,6 +675,7 @@ static int gmc_v9_0_late_init(void *handle) | |||
| 679 | DRM_INFO("ECC is active.\n"); | 675 | DRM_INFO("ECC is active.\n"); |
| 680 | } else if (r == 0) { | 676 | } else if (r == 0) { |
| 681 | DRM_INFO("ECC is not present.\n"); | 677 | DRM_INFO("ECC is not present.\n"); |
| 678 | adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); | ||
| 682 | } else { | 679 | } else { |
| 683 | DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); | 680 | DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); |
| 684 | return r; | 681 | return r; |
| @@ -697,10 +694,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, | |||
| 697 | amdgpu_device_vram_location(adev, &adev->gmc, base); | 694 | amdgpu_device_vram_location(adev, &adev->gmc, base); |
| 698 | amdgpu_device_gart_location(adev, mc); | 695 | amdgpu_device_gart_location(adev, mc); |
| 699 | /* base offset of vram pages */ | 696 | /* base offset of vram pages */ |
| 700 | if (adev->flags & AMD_IS_APU) | 697 | adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); |
| 701 | adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); | ||
| 702 | else | ||
| 703 | adev->vm_manager.vram_base_offset = 0; | ||
| 704 | } | 698 | } |
| 705 | 699 | ||
| 706 | /** | 700 | /** |
| @@ -714,7 +708,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, | |||
| 714 | */ | 708 | */ |
| 715 | static int gmc_v9_0_mc_init(struct amdgpu_device *adev) | 709 | static int gmc_v9_0_mc_init(struct amdgpu_device *adev) |
| 716 | { | 710 | { |
| 717 | u32 tmp; | ||
| 718 | int chansize, numchan; | 711 | int chansize, numchan; |
| 719 | int r; | 712 | int r; |
| 720 | 713 | ||
| @@ -727,39 +720,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) | |||
| 727 | else | 720 | else |
| 728 | chansize = 128; | 721 | chansize = 128; |
| 729 | 722 | ||
| 730 | tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); | 723 | numchan = adev->df_funcs->get_hbm_channel_number(adev); |
| 731 | tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; | ||
| 732 | tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; | ||
| 733 | switch (tmp) { | ||
| 734 | case 0: | ||
| 735 | default: | ||
| 736 | numchan = 1; | ||
| 737 | break; | ||
| 738 | case 1: | ||
| 739 | numchan = 2; | ||
| 740 | break; | ||
| 741 | case 2: | ||
| 742 | numchan = 0; | ||
| 743 | break; | ||
| 744 | case 3: | ||
| 745 | numchan = 4; | ||
| 746 | break; | ||
| 747 | case 4: | ||
| 748 | numchan = 0; | ||
| 749 | break; | ||
| 750 | case 5: | ||
| 751 | numchan = 8; | ||
| 752 | break; | ||
| 753 | case 6: | ||
| 754 | numchan = 0; | ||
| 755 | break; | ||
| 756 | case 7: | ||
| 757 | numchan = 16; | ||
| 758 | break; | ||
| 759 | case 8: | ||
| 760 | numchan = 2; | ||
| 761 | break; | ||
| 762 | } | ||
| 763 | adev->gmc.vram_width = numchan * chansize; | 724 | adev->gmc.vram_width = numchan * chansize; |
| 764 | } | 725 | } |
| 765 | 726 | ||
| @@ -792,6 +753,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) | |||
| 792 | switch (adev->asic_type) { | 753 | switch (adev->asic_type) { |
| 793 | case CHIP_VEGA10: /* all engines support GPUVM */ | 754 | case CHIP_VEGA10: /* all engines support GPUVM */ |
| 794 | case CHIP_VEGA12: /* all engines support GPUVM */ | 755 | case CHIP_VEGA12: /* all engines support GPUVM */ |
| 756 | case CHIP_VEGA20: | ||
| 795 | default: | 757 | default: |
| 796 | adev->gmc.gart_size = 512ULL << 20; | 758 | adev->gmc.gart_size = 512ULL << 20; |
| 797 | break; | 759 | break; |
| @@ -826,6 +788,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) | |||
| 826 | return amdgpu_gart_table_vram_alloc(adev); | 788 | return amdgpu_gart_table_vram_alloc(adev); |
| 827 | } | 789 | } |
| 828 | 790 | ||
| 791 | static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) | ||
| 792 | { | ||
| 793 | #if 0 | ||
| 794 | u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); | ||
| 795 | #endif | ||
| 796 | unsigned size; | ||
| 797 | |||
| 798 | /* | ||
| 799 | * TODO Remove once GART corruption is resolved | ||
| 800 | * Check related code in gmc_v9_0_sw_fini | ||
| 801 | * */ | ||
| 802 | size = 9 * 1024 * 1024; | ||
| 803 | |||
| 804 | #if 0 | ||
| 805 | if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { | ||
| 806 | size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ | ||
| 807 | } else { | ||
| 808 | u32 viewport; | ||
| 809 | |||
| 810 | switch (adev->asic_type) { | ||
| 811 | case CHIP_RAVEN: | ||
| 812 | viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); | ||
| 813 | size = (REG_GET_FIELD(viewport, | ||
| 814 | HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * | ||
| 815 | REG_GET_FIELD(viewport, | ||
| 816 | HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * | ||
| 817 | 4); | ||
| 818 | break; | ||
| 819 | case CHIP_VEGA10: | ||
| 820 | case CHIP_VEGA12: | ||
| 821 | default: | ||
| 822 | viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); | ||
| 823 | size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * | ||
| 824 | REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) * | ||
| 825 | 4); | ||
| 826 | break; | ||
| 827 | } | ||
| 828 | } | ||
| 829 | /* return 0 if the pre-OS buffer uses up most of vram */ | ||
| 830 | if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) | ||
| 831 | return 0; | ||
| 832 | |||
| 833 | #endif | ||
| 834 | return size; | ||
| 835 | } | ||
| 836 | |||
| 829 | static int gmc_v9_0_sw_init(void *handle) | 837 | static int gmc_v9_0_sw_init(void *handle) |
| 830 | { | 838 | { |
| 831 | int r; | 839 | int r; |
| @@ -851,6 +859,7 @@ static int gmc_v9_0_sw_init(void *handle) | |||
| 851 | break; | 859 | break; |
| 852 | case CHIP_VEGA10: | 860 | case CHIP_VEGA10: |
| 853 | case CHIP_VEGA12: | 861 | case CHIP_VEGA12: |
| 862 | case CHIP_VEGA20: | ||
| 854 | /* | 863 | /* |
| 855 | * To fulfill 4-level page support, | 864 | * To fulfill 4-level page support, |
| 856 | * vm size is 256TB (48bit), maximum size of Vega10, | 865 | * vm size is 256TB (48bit), maximum size of Vega10, |
| @@ -877,12 +886,6 @@ static int gmc_v9_0_sw_init(void *handle) | |||
| 877 | */ | 886 | */ |
| 878 | adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ | 887 | adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ |
| 879 | 888 | ||
| 880 | /* | ||
| 881 | * It needs to reserve 8M stolen memory for vega10 | ||
| 882 | * TODO: Figure out how to avoid that... | ||
| 883 | */ | ||
| 884 | adev->gmc.stolen_size = 8 * 1024 * 1024; | ||
| 885 | |||
| 886 | /* set DMA mask + need_dma32 flags. | 889 | /* set DMA mask + need_dma32 flags. |
| 887 | * PCIE - can handle 44-bits. | 890 | * PCIE - can handle 44-bits. |
| 888 | * IGP - can handle 44-bits | 891 | * IGP - can handle 44-bits |
| @@ -907,6 +910,8 @@ static int gmc_v9_0_sw_init(void *handle) | |||
| 907 | if (r) | 910 | if (r) |
| 908 | return r; | 911 | return r; |
| 909 | 912 | ||
| 913 | adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev); | ||
| 914 | |||
| 910 | /* Memory manager */ | 915 | /* Memory manager */ |
| 911 | r = amdgpu_bo_init(adev); | 916 | r = amdgpu_bo_init(adev); |
| 912 | if (r) | 917 | if (r) |
| @@ -950,6 +955,18 @@ static int gmc_v9_0_sw_fini(void *handle) | |||
| 950 | amdgpu_gem_force_release(adev); | 955 | amdgpu_gem_force_release(adev); |
| 951 | amdgpu_vm_manager_fini(adev); | 956 | amdgpu_vm_manager_fini(adev); |
| 952 | gmc_v9_0_gart_fini(adev); | 957 | gmc_v9_0_gart_fini(adev); |
| 958 | |||
| 959 | /* | ||
| 960 | * TODO: | ||
| 961 | * Currently there is a bug where some memory client outside | ||
| 962 | * of the driver writes to first 8M of VRAM on S3 resume, | ||
| 963 | * this overrides GART which by default gets placed in first 8M and | ||
| 964 | * causes VM_FAULTS once GTT is accessed. | ||
| 965 | * Keep the stolen memory reservation until the while this is not solved. | ||
| 966 | * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init | ||
| 967 | */ | ||
| 968 | amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); | ||
| 969 | |||
| 953 | amdgpu_bo_fini(adev); | 970 | amdgpu_bo_fini(adev); |
| 954 | 971 | ||
| 955 | return 0; | 972 | return 0; |
| @@ -960,6 +977,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 960 | 977 | ||
| 961 | switch (adev->asic_type) { | 978 | switch (adev->asic_type) { |
| 962 | case CHIP_VEGA10: | 979 | case CHIP_VEGA10: |
| 980 | case CHIP_VEGA20: | ||
| 963 | soc15_program_register_sequence(adev, | 981 | soc15_program_register_sequence(adev, |
| 964 | golden_settings_mmhub_1_0_0, | 982 | golden_settings_mmhub_1_0_0, |
| 965 | ARRAY_SIZE(golden_settings_mmhub_1_0_0)); | 983 | ARRAY_SIZE(golden_settings_mmhub_1_0_0)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 26ba984ab2b7..17f7f074cedc 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c | |||
| @@ -2817,7 +2817,7 @@ static int kv_dpm_init(struct amdgpu_device *adev) | |||
| 2817 | pi->caps_tcp_ramping = true; | 2817 | pi->caps_tcp_ramping = true; |
| 2818 | } | 2818 | } |
| 2819 | 2819 | ||
| 2820 | if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) | 2820 | if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) |
| 2821 | pi->caps_sclk_ds = true; | 2821 | pi->caps_sclk_ds = true; |
| 2822 | else | 2822 | else |
| 2823 | pi->caps_sclk_ds = false; | 2823 | pi->caps_sclk_ds = false; |
| @@ -2974,7 +2974,7 @@ static int kv_dpm_late_init(void *handle) | |||
| 2974 | /* powerdown unused blocks for now */ | 2974 | /* powerdown unused blocks for now */ |
| 2975 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 2975 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 2976 | 2976 | ||
| 2977 | if (!amdgpu_dpm) | 2977 | if (!adev->pm.dpm_enabled) |
| 2978 | return 0; | 2978 | return 0; |
| 2979 | 2979 | ||
| 2980 | kv_dpm_powergate_acp(adev, true); | 2980 | kv_dpm_powergate_acp(adev, true); |
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 43f925773b57..3d53c4413f13 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | |||
| @@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, | |||
| 734 | switch (adev->asic_type) { | 734 | switch (adev->asic_type) { |
| 735 | case CHIP_VEGA10: | 735 | case CHIP_VEGA10: |
| 736 | case CHIP_VEGA12: | 736 | case CHIP_VEGA12: |
| 737 | case CHIP_VEGA20: | ||
| 737 | case CHIP_RAVEN: | 738 | case CHIP_RAVEN: |
| 738 | mmhub_v1_0_update_medium_grain_clock_gating(adev, | 739 | mmhub_v1_0_update_medium_grain_clock_gating(adev, |
| 739 | state == AMD_CG_STATE_GATE ? true : false); | 740 | state == AMD_CG_STATE_GATE ? true : false); |
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 493348672475..078f70faedcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | |||
| @@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) | |||
| 260 | } while (timeout > 1); | 260 | } while (timeout > 1); |
| 261 | 261 | ||
| 262 | flr_done: | 262 | flr_done: |
| 263 | if (locked) | 263 | if (locked) { |
| 264 | adev->in_gpu_reset = 0; | ||
| 264 | mutex_unlock(&adev->lock_reset); | 265 | mutex_unlock(&adev->lock_reset); |
| 266 | } | ||
| 265 | 267 | ||
| 266 | /* Trigger recovery for world switch failure if no TDR */ | 268 | /* Trigger recovery for world switch failure if no TDR */ |
| 267 | if (amdgpu_lockup_timeout == 0) | 269 | if (amdgpu_lockup_timeout == 0) |
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index df34dc79d444..365517c0121e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | |||
| @@ -34,10 +34,19 @@ | |||
| 34 | #define smnCPM_CONTROL 0x11180460 | 34 | #define smnCPM_CONTROL 0x11180460 |
| 35 | #define smnPCIE_CNTL2 0x11180070 | 35 | #define smnPCIE_CNTL2 0x11180070 |
| 36 | 36 | ||
| 37 | /* vega20 */ | ||
| 38 | #define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011 | ||
| 39 | #define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2 | ||
| 40 | |||
| 37 | static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) | 41 | static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) |
| 38 | { | 42 | { |
| 39 | u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); | 43 | u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); |
| 40 | 44 | ||
| 45 | if (adev->asic_type == CHIP_VEGA20) | ||
| 46 | tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20); | ||
| 47 | else | ||
| 48 | tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); | ||
| 49 | |||
| 41 | tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; | 50 | tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; |
| 42 | tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; | 51 | tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; |
| 43 | 52 | ||
| @@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan | |||
| 75 | SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); | 84 | SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); |
| 76 | 85 | ||
| 77 | u32 doorbell_range = RREG32(reg); | 86 | u32 doorbell_range = RREG32(reg); |
| 87 | u32 range = 2; | ||
| 88 | |||
| 89 | if (adev->asic_type == CHIP_VEGA20) | ||
| 90 | range = 8; | ||
| 78 | 91 | ||
| 79 | if (use_doorbell) { | 92 | if (use_doorbell) { |
| 80 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); | 93 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); |
| 81 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); | 94 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range); |
| 82 | } else | 95 | } else |
| 83 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); | 96 | doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); |
| 84 | 97 | ||
| @@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade | |||
| 133 | { | 146 | { |
| 134 | uint32_t def, data; | 147 | uint32_t def, data; |
| 135 | 148 | ||
| 149 | if (adev->asic_type == CHIP_VEGA20) | ||
| 150 | return; | ||
| 151 | |||
| 136 | /* NBIF_MGCG_CTRL_LCLK */ | 152 | /* NBIF_MGCG_CTRL_LCLK */ |
| 137 | def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); | 153 | def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); |
| 138 | 154 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 8da6da90b1c9..0cf48d26c676 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | |||
| @@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id | |||
| 40 | GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ | 40 | GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ |
| 41 | GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ | 41 | GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ |
| 42 | GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ | 42 | GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ |
| 43 | GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ | ||
| 44 | GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ | ||
| 45 | GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ | ||
| 43 | 46 | ||
| 44 | GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ | 47 | GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ |
| 45 | }; | 48 | }; |
| 46 | 49 | ||
| 47 | 50 | ||
| 51 | /*----------------------------------------------------------------------------- | ||
| 52 | NOTE: All physical addresses used in this interface are actually | ||
| 53 | GPU Virtual Addresses. | ||
| 54 | */ | ||
| 55 | |||
| 56 | |||
| 48 | /* Control registers of the TEE Gfx interface. These are located in | 57 | /* Control registers of the TEE Gfx interface. These are located in |
| 49 | * SRBM-to-PSP mailbox registers (total 8 registers). | 58 | * SRBM-to-PSP mailbox registers (total 8 registers). |
| 50 | */ | 59 | */ |
| @@ -55,8 +64,8 @@ struct psp_gfx_ctrl | |||
| 55 | volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ | 64 | volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ |
| 56 | volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ | 65 | volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ |
| 57 | volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ | 66 | volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ |
| 58 | volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */ | 67 | volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/ |
| 59 | volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */ | 68 | volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */ |
| 60 | volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ | 69 | volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ |
| 61 | 70 | ||
| 62 | }; | 71 | }; |
| @@ -78,6 +87,8 @@ enum psp_gfx_cmd_id | |||
| 78 | GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ | 87 | GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ |
| 79 | GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ | 88 | GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ |
| 80 | GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ | 89 | GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ |
| 90 | GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ | ||
| 91 | GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ | ||
| 81 | 92 | ||
| 82 | }; | 93 | }; |
| 83 | 94 | ||
| @@ -85,11 +96,11 @@ enum psp_gfx_cmd_id | |||
| 85 | /* Command to load Trusted Application binary into PSP OS. */ | 96 | /* Command to load Trusted Application binary into PSP OS. */ |
| 86 | struct psp_gfx_cmd_load_ta | 97 | struct psp_gfx_cmd_load_ta |
| 87 | { | 98 | { |
| 88 | uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */ | 99 | uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */ |
| 89 | uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */ | 100 | uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */ |
| 90 | uint32_t app_len; /* length of the TA binary in bytes */ | 101 | uint32_t app_len; /* length of the TA binary in bytes */ |
| 91 | uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */ | 102 | uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */ |
| 92 | uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */ | 103 | uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */ |
| 93 | uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ | 104 | uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ |
| 94 | 105 | ||
| 95 | /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided | 106 | /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided |
| @@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta | |||
| 111 | */ | 122 | */ |
| 112 | struct psp_gfx_buf_desc | 123 | struct psp_gfx_buf_desc |
| 113 | { | 124 | { |
| 114 | uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */ | 125 | uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */ |
| 115 | uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */ | 126 | uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */ |
| 116 | uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ | 127 | uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ |
| 117 | 128 | ||
| 118 | }; | 129 | }; |
| @@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd | |||
| 145 | /* Command to setup TMR region. */ | 156 | /* Command to setup TMR region. */ |
| 146 | struct psp_gfx_cmd_setup_tmr | 157 | struct psp_gfx_cmd_setup_tmr |
| 147 | { | 158 | { |
| 148 | uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */ | 159 | uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */ |
| 149 | uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */ | 160 | uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */ |
| 150 | uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ | 161 | uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ |
| 151 | 162 | ||
| 152 | }; | 163 | }; |
| @@ -174,18 +185,32 @@ enum psp_gfx_fw_type | |||
| 174 | GFX_FW_TYPE_ISP = 16, | 185 | GFX_FW_TYPE_ISP = 16, |
| 175 | GFX_FW_TYPE_ACP = 17, | 186 | GFX_FW_TYPE_ACP = 17, |
| 176 | GFX_FW_TYPE_SMU = 18, | 187 | GFX_FW_TYPE_SMU = 18, |
| 188 | GFX_FW_TYPE_MMSCH = 19, | ||
| 189 | GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, | ||
| 190 | GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, | ||
| 191 | GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22, | ||
| 192 | GFX_FW_TYPE_MAX = 23 | ||
| 177 | }; | 193 | }; |
| 178 | 194 | ||
| 179 | /* Command to load HW IP FW. */ | 195 | /* Command to load HW IP FW. */ |
| 180 | struct psp_gfx_cmd_load_ip_fw | 196 | struct psp_gfx_cmd_load_ip_fw |
| 181 | { | 197 | { |
| 182 | uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */ | 198 | uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */ |
| 183 | uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */ | 199 | uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */ |
| 184 | uint32_t fw_size; /* FW buffer size in bytes */ | 200 | uint32_t fw_size; /* FW buffer size in bytes */ |
| 185 | enum psp_gfx_fw_type fw_type; /* FW type */ | 201 | enum psp_gfx_fw_type fw_type; /* FW type */ |
| 186 | 202 | ||
| 187 | }; | 203 | }; |
| 188 | 204 | ||
| 205 | /* Command to save/restore HW IP FW. */ | ||
| 206 | struct psp_gfx_cmd_save_restore_ip_fw | ||
| 207 | { | ||
| 208 | uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/ | ||
| 209 | uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */ | ||
| 210 | uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */ | ||
| 211 | uint32_t buf_size; /* Size of the save/restore buffer in bytes */ | ||
| 212 | enum psp_gfx_fw_type fw_type; /* FW type */ | ||
| 213 | }; | ||
| 189 | 214 | ||
| 190 | /* All GFX ring buffer commands. */ | 215 | /* All GFX ring buffer commands. */ |
| 191 | union psp_gfx_commands | 216 | union psp_gfx_commands |
| @@ -195,7 +220,7 @@ union psp_gfx_commands | |||
| 195 | struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; | 220 | struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; |
| 196 | struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; | 221 | struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; |
| 197 | struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; | 222 | struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; |
| 198 | 223 | struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; | |
| 199 | }; | 224 | }; |
| 200 | 225 | ||
| 201 | 226 | ||
| @@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp | |||
| 226 | 251 | ||
| 227 | /* These fields are used for RBI only. They are all 0 in GPCOM commands | 252 | /* These fields are used for RBI only. They are all 0 in GPCOM commands |
| 228 | */ | 253 | */ |
| 229 | uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */ | 254 | uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */ |
| 230 | uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */ | 255 | uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */ |
| 231 | uint32_t resp_offset; /* +20 offset within response buffer */ | 256 | uint32_t resp_offset; /* +20 offset within response buffer */ |
| 232 | uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ | 257 | uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ |
| 233 | 258 | ||
| @@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp | |||
| 251 | /* Structure of the Ring Buffer Frame */ | 276 | /* Structure of the Ring Buffer Frame */ |
| 252 | struct psp_gfx_rb_frame | 277 | struct psp_gfx_rb_frame |
| 253 | { | 278 | { |
| 254 | uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */ | 279 | uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */ |
| 255 | uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */ | 280 | uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */ |
| 256 | uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ | 281 | uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ |
| 257 | uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */ | 282 | uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */ |
| 258 | uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */ | 283 | uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */ |
| 259 | uint32_t fence_value; /* +20 Fence value */ | 284 | uint32_t fence_value; /* +20 Fence value */ |
| 260 | uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ | 285 | uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ |
| 261 | uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ | 286 | uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ |
| 262 | uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ | 287 | uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ |
| 263 | uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ | 288 | uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ |
| 264 | uint8_t reserved1[2]; /* +34 reserved, must be 0 */ | 289 | uint8_t reserved1[2]; /* +34 reserved, must be 0 */ |
| 265 | uint32_t reserved2[7]; /* +40 reserved, must be 0 */ | 290 | uint32_t reserved2[7]; /* +36 reserved, must be 0 */ |
| 266 | /* total 64 bytes */ | 291 | /* total 64 bytes */ |
| 267 | }; | 292 | }; |
| 268 | 293 | ||
| 269 | #endif /* _PSP_TEE_GFX_IF_H_ */ | 294 | #endif /* _PSP_TEE_GFX_IF_H_ */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 8873d833a7f7..0ff136d02d9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | |||
| @@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * | |||
| 70 | case AMDGPU_UCODE_ID_RLC_G: | 70 | case AMDGPU_UCODE_ID_RLC_G: |
| 71 | *type = GFX_FW_TYPE_RLC_G; | 71 | *type = GFX_FW_TYPE_RLC_G; |
| 72 | break; | 72 | break; |
| 73 | case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: | ||
| 74 | *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL; | ||
| 75 | break; | ||
| 76 | case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: | ||
| 77 | *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; | ||
| 78 | break; | ||
| 79 | case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: | ||
| 80 | *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; | ||
| 81 | break; | ||
| 73 | case AMDGPU_UCODE_ID_SMC: | 82 | case AMDGPU_UCODE_ID_SMC: |
| 74 | *type = GFX_FW_TYPE_SMU; | 83 | *type = GFX_FW_TYPE_SMU; |
| 75 | break; | 84 | break; |
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 196e75def1f2..0c768e388ace 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | |||
| @@ -41,6 +41,9 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); | |||
| 41 | MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); | 41 | MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); |
| 42 | MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); | 42 | MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); |
| 43 | MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); | 43 | MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); |
| 44 | MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); | ||
| 45 | MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); | ||
| 46 | |||
| 44 | 47 | ||
| 45 | #define smnMP1_FIRMWARE_FLAGS 0x3010028 | 48 | #define smnMP1_FIRMWARE_FLAGS 0x3010028 |
| 46 | 49 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index be20a387d961..aa9ab299fd32 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | |||
| @@ -62,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin"); | |||
| 62 | MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); | 62 | MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); |
| 63 | MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin"); | 63 | MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin"); |
| 64 | MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin"); | 64 | MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin"); |
| 65 | MODULE_FIRMWARE("amdgpu/vegam_sdma.bin"); | ||
| 66 | MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin"); | ||
| 65 | 67 | ||
| 66 | 68 | ||
| 67 | static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = | 69 | static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = |
| @@ -209,6 +211,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 209 | break; | 211 | break; |
| 210 | case CHIP_POLARIS11: | 212 | case CHIP_POLARIS11: |
| 211 | case CHIP_POLARIS12: | 213 | case CHIP_POLARIS12: |
| 214 | case CHIP_VEGAM: | ||
| 212 | amdgpu_device_program_register_sequence(adev, | 215 | amdgpu_device_program_register_sequence(adev, |
| 213 | golden_settings_polaris11_a11, | 216 | golden_settings_polaris11_a11, |
| 214 | ARRAY_SIZE(golden_settings_polaris11_a11)); | 217 | ARRAY_SIZE(golden_settings_polaris11_a11)); |
| @@ -275,15 +278,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) | |||
| 275 | case CHIP_FIJI: | 278 | case CHIP_FIJI: |
| 276 | chip_name = "fiji"; | 279 | chip_name = "fiji"; |
| 277 | break; | 280 | break; |
| 278 | case CHIP_POLARIS11: | ||
| 279 | chip_name = "polaris11"; | ||
| 280 | break; | ||
| 281 | case CHIP_POLARIS10: | 281 | case CHIP_POLARIS10: |
| 282 | chip_name = "polaris10"; | 282 | chip_name = "polaris10"; |
| 283 | break; | 283 | break; |
| 284 | case CHIP_POLARIS11: | ||
| 285 | chip_name = "polaris11"; | ||
| 286 | break; | ||
| 284 | case CHIP_POLARIS12: | 287 | case CHIP_POLARIS12: |
| 285 | chip_name = "polaris12"; | 288 | chip_name = "polaris12"; |
| 286 | break; | 289 | break; |
| 290 | case CHIP_VEGAM: | ||
| 291 | chip_name = "vegam"; | ||
| 292 | break; | ||
| 287 | case CHIP_CARRIZO: | 293 | case CHIP_CARRIZO: |
| 288 | chip_name = "carrizo"; | 294 | chip_name = "carrizo"; |
| 289 | break; | 295 | break; |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 399f876f9cad..ca53b3fba422 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | |||
| @@ -42,6 +42,8 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); | |||
| 42 | MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); | 42 | MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); |
| 43 | MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); | 43 | MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); |
| 44 | MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin"); | 44 | MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin"); |
| 45 | MODULE_FIRMWARE("amdgpu/vega20_sdma.bin"); | ||
| 46 | MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); | ||
| 45 | MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); | 47 | MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); |
| 46 | 48 | ||
| 47 | #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L | 49 | #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L |
| @@ -107,6 +109,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = | |||
| 107 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) | 109 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) |
| 108 | }; | 110 | }; |
| 109 | 111 | ||
| 112 | static const struct soc15_reg_golden golden_settings_sdma_4_2[] = | ||
| 113 | { | ||
| 114 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), | ||
| 115 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), | ||
| 116 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), | ||
| 117 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), | ||
| 118 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), | ||
| 119 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), | ||
| 120 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000), | ||
| 121 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), | ||
| 122 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), | ||
| 123 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), | ||
| 124 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), | ||
| 125 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), | ||
| 126 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), | ||
| 127 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), | ||
| 128 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), | ||
| 129 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), | ||
| 130 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), | ||
| 131 | SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) | ||
| 132 | }; | ||
| 133 | |||
| 110 | static const struct soc15_reg_golden golden_settings_sdma_rv1[] = | 134 | static const struct soc15_reg_golden golden_settings_sdma_rv1[] = |
| 111 | { | 135 | { |
| 112 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), | 136 | SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), |
| @@ -139,6 +163,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) | |||
| 139 | golden_settings_sdma_vg12, | 163 | golden_settings_sdma_vg12, |
| 140 | ARRAY_SIZE(golden_settings_sdma_vg12)); | 164 | ARRAY_SIZE(golden_settings_sdma_vg12)); |
| 141 | break; | 165 | break; |
| 166 | case CHIP_VEGA20: | ||
| 167 | soc15_program_register_sequence(adev, | ||
| 168 | golden_settings_sdma_4_2, | ||
| 169 | ARRAY_SIZE(golden_settings_sdma_4_2)); | ||
| 170 | break; | ||
| 142 | case CHIP_RAVEN: | 171 | case CHIP_RAVEN: |
| 143 | soc15_program_register_sequence(adev, | 172 | soc15_program_register_sequence(adev, |
| 144 | golden_settings_sdma_4_1, | 173 | golden_settings_sdma_4_1, |
| @@ -182,6 +211,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) | |||
| 182 | case CHIP_VEGA12: | 211 | case CHIP_VEGA12: |
| 183 | chip_name = "vega12"; | 212 | chip_name = "vega12"; |
| 184 | break; | 213 | break; |
| 214 | case CHIP_VEGA20: | ||
| 215 | chip_name = "vega20"; | ||
| 216 | break; | ||
| 185 | case CHIP_RAVEN: | 217 | case CHIP_RAVEN: |
| 186 | chip_name = "raven"; | 218 | chip_name = "raven"; |
| 187 | break; | 219 | break; |
| @@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, | |||
| 360 | 392 | ||
| 361 | } | 393 | } |
| 362 | 394 | ||
| 395 | static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring, | ||
| 396 | int mem_space, int hdp, | ||
| 397 | uint32_t addr0, uint32_t addr1, | ||
| 398 | uint32_t ref, uint32_t mask, | ||
| 399 | uint32_t inv) | ||
| 400 | { | ||
| 401 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | | ||
| 402 | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) | | ||
| 403 | SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) | | ||
| 404 | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ | ||
| 405 | if (mem_space) { | ||
| 406 | /* memory */ | ||
| 407 | amdgpu_ring_write(ring, addr0); | ||
| 408 | amdgpu_ring_write(ring, addr1); | ||
| 409 | } else { | ||
| 410 | /* registers */ | ||
| 411 | amdgpu_ring_write(ring, addr0 << 2); | ||
| 412 | amdgpu_ring_write(ring, addr1 << 2); | ||
| 413 | } | ||
| 414 | amdgpu_ring_write(ring, ref); /* reference */ | ||
| 415 | amdgpu_ring_write(ring, mask); /* mask */ | ||
| 416 | amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | | ||
| 417 | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */ | ||
| 418 | } | ||
| 419 | |||
| 363 | /** | 420 | /** |
| 364 | * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring | 421 | * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring |
| 365 | * | 422 | * |
| @@ -378,15 +435,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | |||
| 378 | else | 435 | else |
| 379 | ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; | 436 | ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; |
| 380 | 437 | ||
| 381 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | | 438 | sdma_v4_0_wait_reg_mem(ring, 0, 1, |
| 382 | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | | 439 | adev->nbio_funcs->get_hdp_flush_done_offset(adev), |
| 383 | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ | 440 | adev->nbio_funcs->get_hdp_flush_req_offset(adev), |
| 384 | amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); | 441 | ref_and_mask, ref_and_mask, 10); |
| 385 | amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); | ||
| 386 | amdgpu_ring_write(ring, ref_and_mask); /* reference */ | ||
| 387 | amdgpu_ring_write(ring, ref_and_mask); /* mask */ | ||
| 388 | amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | | ||
| 389 | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ | ||
| 390 | } | 442 | } |
| 391 | 443 | ||
| 392 | /** | 444 | /** |
| @@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) | |||
| 1114 | uint64_t addr = ring->fence_drv.gpu_addr; | 1166 | uint64_t addr = ring->fence_drv.gpu_addr; |
| 1115 | 1167 | ||
| 1116 | /* wait for idle */ | 1168 | /* wait for idle */ |
| 1117 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | | 1169 | sdma_v4_0_wait_reg_mem(ring, 1, 0, |
| 1118 | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | | 1170 | addr & 0xfffffffc, |
| 1119 | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ | 1171 | upper_32_bits(addr) & 0xffffffff, |
| 1120 | SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); | 1172 | seq, 0xffffffff, 4); |
| 1121 | amdgpu_ring_write(ring, addr & 0xfffffffc); | ||
| 1122 | amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); | ||
| 1123 | amdgpu_ring_write(ring, seq); /* reference */ | ||
| 1124 | amdgpu_ring_write(ring, 0xffffffff); /* mask */ | ||
| 1125 | amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | | ||
| 1126 | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ | ||
| 1127 | } | 1173 | } |
| 1128 | 1174 | ||
| 1129 | 1175 | ||
| @@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring, | |||
| 1154 | static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, | 1200 | static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, |
| 1155 | uint32_t val, uint32_t mask) | 1201 | uint32_t val, uint32_t mask) |
| 1156 | { | 1202 | { |
| 1157 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | | 1203 | sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10); |
| 1158 | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | | ||
| 1159 | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ | ||
| 1160 | amdgpu_ring_write(ring, reg << 2); | ||
| 1161 | amdgpu_ring_write(ring, 0); | ||
| 1162 | amdgpu_ring_write(ring, val); /* reference */ | ||
| 1163 | amdgpu_ring_write(ring, mask); /* mask */ | ||
| 1164 | amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | | ||
| 1165 | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); | ||
| 1166 | } | 1204 | } |
| 1167 | 1205 | ||
| 1168 | static int sdma_v4_0_early_init(void *handle) | 1206 | static int sdma_v4_0_early_init(void *handle) |
| @@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle, | |||
| 1510 | switch (adev->asic_type) { | 1548 | switch (adev->asic_type) { |
| 1511 | case CHIP_VEGA10: | 1549 | case CHIP_VEGA10: |
| 1512 | case CHIP_VEGA12: | 1550 | case CHIP_VEGA12: |
| 1551 | case CHIP_VEGA20: | ||
| 1513 | case CHIP_RAVEN: | 1552 | case CHIP_RAVEN: |
| 1514 | sdma_v4_0_update_medium_grain_clock_gating(adev, | 1553 | sdma_v4_0_update_medium_grain_clock_gating(adev, |
| 1515 | state == AMD_CG_STATE_GATE ? true : false); | 1554 | state == AMD_CG_STATE_GATE ? true : false); |
| @@ -1605,6 +1644,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { | |||
| 1605 | .pad_ib = sdma_v4_0_ring_pad_ib, | 1644 | .pad_ib = sdma_v4_0_ring_pad_ib, |
| 1606 | .emit_wreg = sdma_v4_0_ring_emit_wreg, | 1645 | .emit_wreg = sdma_v4_0_ring_emit_wreg, |
| 1607 | .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, | 1646 | .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, |
| 1647 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | ||
| 1608 | }; | 1648 | }; |
| 1609 | 1649 | ||
| 1610 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) | 1650 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index a675ec6d2811..c364ef94cc36 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c | |||
| @@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev, | |||
| 1252 | } | 1252 | } |
| 1253 | } | 1253 | } |
| 1254 | 1254 | ||
| 1255 | static bool si_need_full_reset(struct amdgpu_device *adev) | ||
| 1256 | { | ||
| 1257 | /* change this when we support soft reset */ | ||
| 1258 | return true; | ||
| 1259 | } | ||
| 1260 | |||
| 1255 | static int si_get_pcie_lanes(struct amdgpu_device *adev) | 1261 | static int si_get_pcie_lanes(struct amdgpu_device *adev) |
| 1256 | { | 1262 | { |
| 1257 | u32 link_width_cntl; | 1263 | u32 link_width_cntl; |
| @@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = | |||
| 1332 | .get_config_memsize = &si_get_config_memsize, | 1338 | .get_config_memsize = &si_get_config_memsize, |
| 1333 | .flush_hdp = &si_flush_hdp, | 1339 | .flush_hdp = &si_flush_hdp, |
| 1334 | .invalidate_hdp = &si_invalidate_hdp, | 1340 | .invalidate_hdp = &si_invalidate_hdp, |
| 1341 | .need_full_reset = &si_need_full_reset, | ||
| 1335 | }; | 1342 | }; |
| 1336 | 1343 | ||
| 1337 | static uint32_t si_get_rev_id(struct amdgpu_device *adev) | 1344 | static uint32_t si_get_rev_id(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 797d505bf9ee..b12d7c9d42a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c | |||
| @@ -7580,7 +7580,7 @@ static int si_dpm_late_init(void *handle) | |||
| 7580 | int ret; | 7580 | int ret; |
| 7581 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 7581 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 7582 | 7582 | ||
| 7583 | if (!amdgpu_dpm) | 7583 | if (!adev->pm.dpm_enabled) |
| 7584 | return 0; | 7584 | return 0; |
| 7585 | 7585 | ||
| 7586 | ret = si_set_temperature_range(adev); | 7586 | ret = si_set_temperature_range(adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 51cf8a30f6c2..68b4a22a8892 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c | |||
| @@ -41,8 +41,6 @@ | |||
| 41 | #include "sdma1/sdma1_4_0_offset.h" | 41 | #include "sdma1/sdma1_4_0_offset.h" |
| 42 | #include "hdp/hdp_4_0_offset.h" | 42 | #include "hdp/hdp_4_0_offset.h" |
| 43 | #include "hdp/hdp_4_0_sh_mask.h" | 43 | #include "hdp/hdp_4_0_sh_mask.h" |
| 44 | #include "mp/mp_9_0_offset.h" | ||
| 45 | #include "mp/mp_9_0_sh_mask.h" | ||
| 46 | #include "smuio/smuio_9_0_offset.h" | 44 | #include "smuio/smuio_9_0_offset.h" |
| 47 | #include "smuio/smuio_9_0_sh_mask.h" | 45 | #include "smuio/smuio_9_0_sh_mask.h" |
| 48 | 46 | ||
| @@ -52,6 +50,8 @@ | |||
| 52 | #include "gmc_v9_0.h" | 50 | #include "gmc_v9_0.h" |
| 53 | #include "gfxhub_v1_0.h" | 51 | #include "gfxhub_v1_0.h" |
| 54 | #include "mmhub_v1_0.h" | 52 | #include "mmhub_v1_0.h" |
| 53 | #include "df_v1_7.h" | ||
| 54 | #include "df_v3_6.h" | ||
| 55 | #include "vega10_ih.h" | 55 | #include "vega10_ih.h" |
| 56 | #include "sdma_v4_0.h" | 56 | #include "sdma_v4_0.h" |
| 57 | #include "uvd_v7_0.h" | 57 | #include "uvd_v7_0.h" |
| @@ -60,33 +60,6 @@ | |||
| 60 | #include "dce_virtual.h" | 60 | #include "dce_virtual.h" |
| 61 | #include "mxgpu_ai.h" | 61 | #include "mxgpu_ai.h" |
| 62 | 62 | ||
| 63 | #define mmFabricConfigAccessControl 0x0410 | ||
| 64 | #define mmFabricConfigAccessControl_BASE_IDX 0 | ||
| 65 | #define mmFabricConfigAccessControl_DEFAULT 0x00000000 | ||
| 66 | //FabricConfigAccessControl | ||
| 67 | #define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 | ||
| 68 | #define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 | ||
| 69 | #define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 | ||
| 70 | #define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L | ||
| 71 | #define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L | ||
| 72 | #define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L | ||
| 73 | |||
| 74 | |||
| 75 | #define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc | ||
| 76 | #define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 | ||
| 77 | //DF_PIE_AON0_DfGlobalClkGater | ||
| 78 | #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 | ||
| 79 | #define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL | ||
| 80 | |||
| 81 | enum { | ||
| 82 | DF_MGCG_DISABLE = 0, | ||
| 83 | DF_MGCG_ENABLE_00_CYCLE_DELAY =1, | ||
| 84 | DF_MGCG_ENABLE_01_CYCLE_DELAY =2, | ||
| 85 | DF_MGCG_ENABLE_15_CYCLE_DELAY =13, | ||
| 86 | DF_MGCG_ENABLE_31_CYCLE_DELAY =14, | ||
| 87 | DF_MGCG_ENABLE_63_CYCLE_DELAY =15 | ||
| 88 | }; | ||
| 89 | |||
| 90 | #define mmMP0_MISC_CGTT_CTRL0 0x01b9 | 63 | #define mmMP0_MISC_CGTT_CTRL0 0x01b9 |
| 91 | #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 | 64 | #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 |
| 92 | #define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba | 65 | #define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba |
| @@ -313,6 +286,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { | |||
| 313 | { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, | 286 | { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, |
| 314 | { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, | 287 | { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, |
| 315 | { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, | 288 | { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, |
| 289 | { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)}, | ||
| 316 | }; | 290 | }; |
| 317 | 291 | ||
| 318 | static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, | 292 | static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, |
| @@ -341,6 +315,8 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, | |||
| 341 | } else { | 315 | } else { |
| 342 | if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) | 316 | if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) |
| 343 | return adev->gfx.config.gb_addr_config; | 317 | return adev->gfx.config.gb_addr_config; |
| 318 | else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2)) | ||
| 319 | return adev->gfx.config.db_debug2; | ||
| 344 | return RREG32(reg_offset); | 320 | return RREG32(reg_offset); |
| 345 | } | 321 | } |
| 346 | } | 322 | } |
| @@ -512,15 +488,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) | |||
| 512 | case CHIP_RAVEN: | 488 | case CHIP_RAVEN: |
| 513 | vega10_reg_base_init(adev); | 489 | vega10_reg_base_init(adev); |
| 514 | break; | 490 | break; |
| 491 | case CHIP_VEGA20: | ||
| 492 | vega20_reg_base_init(adev); | ||
| 493 | break; | ||
| 515 | default: | 494 | default: |
| 516 | return -EINVAL; | 495 | return -EINVAL; |
| 517 | } | 496 | } |
| 518 | 497 | ||
| 519 | if (adev->flags & AMD_IS_APU) | 498 | if (adev->flags & AMD_IS_APU) |
| 520 | adev->nbio_funcs = &nbio_v7_0_funcs; | 499 | adev->nbio_funcs = &nbio_v7_0_funcs; |
| 500 | else if (adev->asic_type == CHIP_VEGA20) | ||
| 501 | adev->nbio_funcs = &nbio_v7_0_funcs; | ||
| 521 | else | 502 | else |
| 522 | adev->nbio_funcs = &nbio_v6_1_funcs; | 503 | adev->nbio_funcs = &nbio_v6_1_funcs; |
| 523 | 504 | ||
| 505 | if (adev->asic_type == CHIP_VEGA20) | ||
| 506 | adev->df_funcs = &df_v3_6_funcs; | ||
| 507 | else | ||
| 508 | adev->df_funcs = &df_v1_7_funcs; | ||
| 524 | adev->nbio_funcs->detect_hw_virt(adev); | 509 | adev->nbio_funcs->detect_hw_virt(adev); |
| 525 | 510 | ||
| 526 | if (amdgpu_sriov_vf(adev)) | 511 | if (amdgpu_sriov_vf(adev)) |
| @@ -529,12 +514,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) | |||
| 529 | switch (adev->asic_type) { | 514 | switch (adev->asic_type) { |
| 530 | case CHIP_VEGA10: | 515 | case CHIP_VEGA10: |
| 531 | case CHIP_VEGA12: | 516 | case CHIP_VEGA12: |
| 517 | case CHIP_VEGA20: | ||
| 532 | amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); | 518 | amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); |
| 533 | amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); | 519 | amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); |
| 534 | amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); | 520 | amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); |
| 535 | amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); | 521 | if (adev->asic_type != CHIP_VEGA20) { |
| 536 | if (!amdgpu_sriov_vf(adev)) | 522 | amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); |
| 537 | amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); | 523 | if (!amdgpu_sriov_vf(adev)) |
| 524 | amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); | ||
| 525 | } | ||
| 538 | if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) | 526 | if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) |
| 539 | amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); | 527 | amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); |
| 540 | #if defined(CONFIG_DRM_AMD_DC) | 528 | #if defined(CONFIG_DRM_AMD_DC) |
| @@ -593,6 +581,12 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev, | |||
| 593 | HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); | 581 | HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); |
| 594 | } | 582 | } |
| 595 | 583 | ||
| 584 | static bool soc15_need_full_reset(struct amdgpu_device *adev) | ||
| 585 | { | ||
| 586 | /* change this when we implement soft reset */ | ||
| 587 | return true; | ||
| 588 | } | ||
| 589 | |||
| 596 | static const struct amdgpu_asic_funcs soc15_asic_funcs = | 590 | static const struct amdgpu_asic_funcs soc15_asic_funcs = |
| 597 | { | 591 | { |
| 598 | .read_disabled_bios = &soc15_read_disabled_bios, | 592 | .read_disabled_bios = &soc15_read_disabled_bios, |
| @@ -606,6 +600,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = | |||
| 606 | .get_config_memsize = &soc15_get_config_memsize, | 600 | .get_config_memsize = &soc15_get_config_memsize, |
| 607 | .flush_hdp = &soc15_flush_hdp, | 601 | .flush_hdp = &soc15_flush_hdp, |
| 608 | .invalidate_hdp = &soc15_invalidate_hdp, | 602 | .invalidate_hdp = &soc15_invalidate_hdp, |
| 603 | .need_full_reset = &soc15_need_full_reset, | ||
| 609 | }; | 604 | }; |
| 610 | 605 | ||
| 611 | static int soc15_common_early_init(void *handle) | 606 | static int soc15_common_early_init(void *handle) |
| @@ -675,6 +670,27 @@ static int soc15_common_early_init(void *handle) | |||
| 675 | adev->pg_flags = 0; | 670 | adev->pg_flags = 0; |
| 676 | adev->external_rev_id = adev->rev_id + 0x14; | 671 | adev->external_rev_id = adev->rev_id + 0x14; |
| 677 | break; | 672 | break; |
| 673 | case CHIP_VEGA20: | ||
| 674 | adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | | ||
| 675 | AMD_CG_SUPPORT_GFX_MGLS | | ||
| 676 | AMD_CG_SUPPORT_GFX_CGCG | | ||
| 677 | AMD_CG_SUPPORT_GFX_CGLS | | ||
| 678 | AMD_CG_SUPPORT_GFX_3D_CGCG | | ||
| 679 | AMD_CG_SUPPORT_GFX_3D_CGLS | | ||
| 680 | AMD_CG_SUPPORT_GFX_CP_LS | | ||
| 681 | AMD_CG_SUPPORT_MC_LS | | ||
| 682 | AMD_CG_SUPPORT_MC_MGCG | | ||
| 683 | AMD_CG_SUPPORT_SDMA_MGCG | | ||
| 684 | AMD_CG_SUPPORT_SDMA_LS | | ||
| 685 | AMD_CG_SUPPORT_BIF_MGCG | | ||
| 686 | AMD_CG_SUPPORT_BIF_LS | | ||
| 687 | AMD_CG_SUPPORT_HDP_MGCG | | ||
| 688 | AMD_CG_SUPPORT_ROM_MGCG | | ||
| 689 | AMD_CG_SUPPORT_VCE_MGCG | | ||
| 690 | AMD_CG_SUPPORT_UVD_MGCG; | ||
| 691 | adev->pg_flags = 0; | ||
| 692 | adev->external_rev_id = adev->rev_id + 0x28; | ||
| 693 | break; | ||
| 678 | case CHIP_RAVEN: | 694 | case CHIP_RAVEN: |
| 679 | adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | | 695 | adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | |
| 680 | AMD_CG_SUPPORT_GFX_MGLS | | 696 | AMD_CG_SUPPORT_GFX_MGLS | |
| @@ -694,8 +710,15 @@ static int soc15_common_early_init(void *handle) | |||
| 694 | AMD_CG_SUPPORT_MC_MGCG | | 710 | AMD_CG_SUPPORT_MC_MGCG | |
| 695 | AMD_CG_SUPPORT_MC_LS | | 711 | AMD_CG_SUPPORT_MC_LS | |
| 696 | AMD_CG_SUPPORT_SDMA_MGCG | | 712 | AMD_CG_SUPPORT_SDMA_MGCG | |
| 697 | AMD_CG_SUPPORT_SDMA_LS; | 713 | AMD_CG_SUPPORT_SDMA_LS | |
| 698 | adev->pg_flags = AMD_PG_SUPPORT_SDMA; | 714 | AMD_CG_SUPPORT_VCN_MGCG; |
| 715 | |||
| 716 | adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; | ||
| 717 | |||
| 718 | if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) | ||
| 719 | adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | | ||
| 720 | AMD_PG_SUPPORT_CP | | ||
| 721 | AMD_PG_SUPPORT_RLC_SMU_HS; | ||
| 699 | 722 | ||
| 700 | adev->external_rev_id = 0x1; | 723 | adev->external_rev_id = 0x1; |
| 701 | break; | 724 | break; |
| @@ -871,32 +894,6 @@ static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *ade | |||
| 871 | WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); | 894 | WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); |
| 872 | } | 895 | } |
| 873 | 896 | ||
| 874 | static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev, | ||
| 875 | bool enable) | ||
| 876 | { | ||
| 877 | uint32_t data; | ||
| 878 | |||
| 879 | /* Put DF on broadcast mode */ | ||
| 880 | data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl)); | ||
| 881 | data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; | ||
| 882 | WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data); | ||
| 883 | |||
| 884 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { | ||
| 885 | data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); | ||
| 886 | data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; | ||
| 887 | data |= DF_MGCG_ENABLE_15_CYCLE_DELAY; | ||
| 888 | WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); | ||
| 889 | } else { | ||
| 890 | data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); | ||
| 891 | data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; | ||
| 892 | data |= DF_MGCG_DISABLE; | ||
| 893 | WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); | ||
| 894 | } | ||
| 895 | |||
| 896 | WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), | ||
| 897 | mmFabricConfigAccessControl_DEFAULT); | ||
| 898 | } | ||
| 899 | |||
| 900 | static int soc15_common_set_clockgating_state(void *handle, | 897 | static int soc15_common_set_clockgating_state(void *handle, |
| 901 | enum amd_clockgating_state state) | 898 | enum amd_clockgating_state state) |
| 902 | { | 899 | { |
| @@ -908,6 +905,7 @@ static int soc15_common_set_clockgating_state(void *handle, | |||
| 908 | switch (adev->asic_type) { | 905 | switch (adev->asic_type) { |
| 909 | case CHIP_VEGA10: | 906 | case CHIP_VEGA10: |
| 910 | case CHIP_VEGA12: | 907 | case CHIP_VEGA12: |
| 908 | case CHIP_VEGA20: | ||
| 911 | adev->nbio_funcs->update_medium_grain_clock_gating(adev, | 909 | adev->nbio_funcs->update_medium_grain_clock_gating(adev, |
| 912 | state == AMD_CG_STATE_GATE ? true : false); | 910 | state == AMD_CG_STATE_GATE ? true : false); |
| 913 | adev->nbio_funcs->update_medium_grain_light_sleep(adev, | 911 | adev->nbio_funcs->update_medium_grain_light_sleep(adev, |
| @@ -920,7 +918,7 @@ static int soc15_common_set_clockgating_state(void *handle, | |||
| 920 | state == AMD_CG_STATE_GATE ? true : false); | 918 | state == AMD_CG_STATE_GATE ? true : false); |
| 921 | soc15_update_rom_medium_grain_clock_gating(adev, | 919 | soc15_update_rom_medium_grain_clock_gating(adev, |
| 922 | state == AMD_CG_STATE_GATE ? true : false); | 920 | state == AMD_CG_STATE_GATE ? true : false); |
| 923 | soc15_update_df_medium_grain_clock_gating(adev, | 921 | adev->df_funcs->update_medium_grain_clock_gating(adev, |
| 924 | state == AMD_CG_STATE_GATE ? true : false); | 922 | state == AMD_CG_STATE_GATE ? true : false); |
| 925 | break; | 923 | break; |
| 926 | case CHIP_RAVEN: | 924 | case CHIP_RAVEN: |
| @@ -973,10 +971,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) | |||
| 973 | if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) | 971 | if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) |
| 974 | *flags |= AMD_CG_SUPPORT_ROM_MGCG; | 972 | *flags |= AMD_CG_SUPPORT_ROM_MGCG; |
| 975 | 973 | ||
| 976 | /* AMD_CG_SUPPORT_DF_MGCG */ | 974 | adev->df_funcs->get_clockgating_state(adev, flags); |
| 977 | data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); | ||
| 978 | if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY) | ||
| 979 | *flags |= AMD_CG_SUPPORT_DF_MGCG; | ||
| 980 | } | 975 | } |
| 981 | 976 | ||
| 982 | static int soc15_common_set_powergating_state(void *handle, | 977 | static int soc15_common_set_powergating_state(void *handle, |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index f70da8a29f86..1f714b7af520 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h | |||
| @@ -55,5 +55,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, | |||
| 55 | const u32 array_size); | 55 | const u32 array_size); |
| 56 | 56 | ||
| 57 | int vega10_reg_base_init(struct amdgpu_device *adev); | 57 | int vega10_reg_base_init(struct amdgpu_device *adev); |
| 58 | int vega20_reg_base_init(struct amdgpu_device *adev); | ||
| 58 | 59 | ||
| 59 | #endif | 60 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index def865067edd..0942f492d2e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h | |||
| @@ -47,6 +47,21 @@ | |||
| 47 | #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ | 47 | #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ |
| 48 | WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) | 48 | WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) |
| 49 | 49 | ||
| 50 | #define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \ | ||
| 51 | do { \ | ||
| 52 | uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ | ||
| 53 | uint32_t loop = adev->usec_timeout; \ | ||
| 54 | while ((tmp_ & (mask)) != (expected_value)) { \ | ||
| 55 | udelay(2); \ | ||
| 56 | tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ | ||
| 57 | loop--; \ | ||
| 58 | if (!loop) { \ | ||
| 59 | ret = -ETIMEDOUT; \ | ||
| 60 | break; \ | ||
| 61 | } \ | ||
| 62 | } \ | ||
| 63 | } while (0) | ||
| 64 | |||
| 50 | #endif | 65 | #endif |
| 51 | 66 | ||
| 52 | 67 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 7f408f85fdb6..8dc29107228f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h | |||
| @@ -159,6 +159,7 @@ | |||
| 159 | #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ | 159 | #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ |
| 160 | #define EOP_TCL1_ACTION_EN (1 << 16) | 160 | #define EOP_TCL1_ACTION_EN (1 << 16) |
| 161 | #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ | 161 | #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ |
| 162 | #define EOP_TC_NC_ACTION_EN (1 << 19) | ||
| 162 | #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ | 163 | #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ |
| 163 | 164 | ||
| 164 | #define DATA_SEL(x) ((x) << 29) | 165 | #define DATA_SEL(x) ((x) << 29) |
| @@ -268,6 +269,11 @@ | |||
| 268 | * x=1: tmz_end | 269 | * x=1: tmz_end |
| 269 | */ | 270 | */ |
| 270 | 271 | ||
| 272 | #define PACKET3_INVALIDATE_TLBS 0x98 | ||
| 273 | # define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) | ||
| 274 | # define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) | ||
| 275 | # define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) | ||
| 276 | # define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) | ||
| 271 | #define PACKET3_SET_RESOURCES 0xA0 | 277 | #define PACKET3_SET_RESOURCES 0xA0 |
| 272 | /* 1. header | 278 | /* 1. header |
| 273 | * 2. CONTROL | 279 | * 2. CONTROL |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 948bb9437757..6fed3d7797a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | |||
| @@ -93,6 +93,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 93 | static int uvd_v4_2_early_init(void *handle) | 93 | static int uvd_v4_2_early_init(void *handle) |
| 94 | { | 94 | { |
| 95 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 95 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 96 | adev->uvd.num_uvd_inst = 1; | ||
| 96 | 97 | ||
| 97 | uvd_v4_2_set_ring_funcs(adev); | 98 | uvd_v4_2_set_ring_funcs(adev); |
| 98 | uvd_v4_2_set_irq_funcs(adev); | 99 | uvd_v4_2_set_irq_funcs(adev); |
| @@ -107,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle) | |||
| 107 | int r; | 108 | int r; |
| 108 | 109 | ||
| 109 | /* UVD TRAP */ | 110 | /* UVD TRAP */ |
| 110 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); | 111 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); |
| 111 | if (r) | 112 | if (r) |
| 112 | return r; | 113 | return r; |
| 113 | 114 | ||
| @@ -119,9 +120,9 @@ static int uvd_v4_2_sw_init(void *handle) | |||
| 119 | if (r) | 120 | if (r) |
| 120 | return r; | 121 | return r; |
| 121 | 122 | ||
| 122 | ring = &adev->uvd.ring; | 123 | ring = &adev->uvd.inst->ring; |
| 123 | sprintf(ring->name, "uvd"); | 124 | sprintf(ring->name, "uvd"); |
| 124 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); | 125 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); |
| 125 | 126 | ||
| 126 | return r; | 127 | return r; |
| 127 | } | 128 | } |
| @@ -150,7 +151,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, | |||
| 150 | static int uvd_v4_2_hw_init(void *handle) | 151 | static int uvd_v4_2_hw_init(void *handle) |
| 151 | { | 152 | { |
| 152 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 153 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 153 | struct amdgpu_ring *ring = &adev->uvd.ring; | 154 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 154 | uint32_t tmp; | 155 | uint32_t tmp; |
| 155 | int r; | 156 | int r; |
| 156 | 157 | ||
| @@ -208,7 +209,7 @@ done: | |||
| 208 | static int uvd_v4_2_hw_fini(void *handle) | 209 | static int uvd_v4_2_hw_fini(void *handle) |
| 209 | { | 210 | { |
| 210 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 211 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 211 | struct amdgpu_ring *ring = &adev->uvd.ring; | 212 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 212 | 213 | ||
| 213 | if (RREG32(mmUVD_STATUS) != 0) | 214 | if (RREG32(mmUVD_STATUS) != 0) |
| 214 | uvd_v4_2_stop(adev); | 215 | uvd_v4_2_stop(adev); |
| @@ -251,7 +252,7 @@ static int uvd_v4_2_resume(void *handle) | |||
| 251 | */ | 252 | */ |
| 252 | static int uvd_v4_2_start(struct amdgpu_device *adev) | 253 | static int uvd_v4_2_start(struct amdgpu_device *adev) |
| 253 | { | 254 | { |
| 254 | struct amdgpu_ring *ring = &adev->uvd.ring; | 255 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 255 | uint32_t rb_bufsz; | 256 | uint32_t rb_bufsz; |
| 256 | int i, j, r; | 257 | int i, j, r; |
| 257 | u32 tmp; | 258 | u32 tmp; |
| @@ -523,6 +524,18 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, | |||
| 523 | amdgpu_ring_write(ring, ib->length_dw); | 524 | amdgpu_ring_write(ring, ib->length_dw); |
| 524 | } | 525 | } |
| 525 | 526 | ||
| 527 | static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | ||
| 528 | { | ||
| 529 | int i; | ||
| 530 | |||
| 531 | WARN_ON(ring->wptr % 2 || count % 2); | ||
| 532 | |||
| 533 | for (i = 0; i < count / 2; i++) { | ||
| 534 | amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); | ||
| 535 | amdgpu_ring_write(ring, 0); | ||
| 536 | } | ||
| 537 | } | ||
| 538 | |||
| 526 | /** | 539 | /** |
| 527 | * uvd_v4_2_mc_resume - memory controller programming | 540 | * uvd_v4_2_mc_resume - memory controller programming |
| 528 | * | 541 | * |
| @@ -536,7 +549,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) | |||
| 536 | uint32_t size; | 549 | uint32_t size; |
| 537 | 550 | ||
| 538 | /* programm the VCPU memory controller bits 0-27 */ | 551 | /* programm the VCPU memory controller bits 0-27 */ |
| 539 | addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; | 552 | addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; |
| 540 | size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; | 553 | size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; |
| 541 | WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); | 554 | WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); |
| 542 | WREG32(mmUVD_VCPU_CACHE_SIZE0, size); | 555 | WREG32(mmUVD_VCPU_CACHE_SIZE0, size); |
| @@ -553,11 +566,11 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) | |||
| 553 | WREG32(mmUVD_VCPU_CACHE_SIZE2, size); | 566 | WREG32(mmUVD_VCPU_CACHE_SIZE2, size); |
| 554 | 567 | ||
| 555 | /* bits 28-31 */ | 568 | /* bits 28-31 */ |
| 556 | addr = (adev->uvd.gpu_addr >> 28) & 0xF; | 569 | addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF; |
| 557 | WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); | 570 | WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); |
| 558 | 571 | ||
| 559 | /* bits 32-39 */ | 572 | /* bits 32-39 */ |
| 560 | addr = (adev->uvd.gpu_addr >> 32) & 0xFF; | 573 | addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF; |
| 561 | WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); | 574 | WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); |
| 562 | 575 | ||
| 563 | WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); | 576 | WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); |
| @@ -664,7 +677,7 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev, | |||
| 664 | struct amdgpu_iv_entry *entry) | 677 | struct amdgpu_iv_entry *entry) |
| 665 | { | 678 | { |
| 666 | DRM_DEBUG("IH: UVD TRAP\n"); | 679 | DRM_DEBUG("IH: UVD TRAP\n"); |
| 667 | amdgpu_fence_process(&adev->uvd.ring); | 680 | amdgpu_fence_process(&adev->uvd.inst->ring); |
| 668 | return 0; | 681 | return 0; |
| 669 | } | 682 | } |
| 670 | 683 | ||
| @@ -688,7 +701,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, | |||
| 688 | 701 | ||
| 689 | if (state == AMD_PG_STATE_GATE) { | 702 | if (state == AMD_PG_STATE_GATE) { |
| 690 | uvd_v4_2_stop(adev); | 703 | uvd_v4_2_stop(adev); |
| 691 | if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { | 704 | if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { |
| 692 | if (!(RREG32_SMC(ixCURRENT_PG_STATUS) & | 705 | if (!(RREG32_SMC(ixCURRENT_PG_STATUS) & |
| 693 | CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) { | 706 | CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) { |
| 694 | WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | | 707 | WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | |
| @@ -699,7 +712,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, | |||
| 699 | } | 712 | } |
| 700 | return 0; | 713 | return 0; |
| 701 | } else { | 714 | } else { |
| 702 | if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { | 715 | if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { |
| 703 | if (RREG32_SMC(ixCURRENT_PG_STATUS) & | 716 | if (RREG32_SMC(ixCURRENT_PG_STATUS) & |
| 704 | CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { | 717 | CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { |
| 705 | WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | | 718 | WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | |
| @@ -732,7 +745,6 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { | |||
| 732 | static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { | 745 | static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { |
| 733 | .type = AMDGPU_RING_TYPE_UVD, | 746 | .type = AMDGPU_RING_TYPE_UVD, |
| 734 | .align_mask = 0xf, | 747 | .align_mask = 0xf, |
| 735 | .nop = PACKET0(mmUVD_NO_OP, 0), | ||
| 736 | .support_64bit_ptrs = false, | 748 | .support_64bit_ptrs = false, |
| 737 | .get_rptr = uvd_v4_2_ring_get_rptr, | 749 | .get_rptr = uvd_v4_2_ring_get_rptr, |
| 738 | .get_wptr = uvd_v4_2_ring_get_wptr, | 750 | .get_wptr = uvd_v4_2_ring_get_wptr, |
| @@ -745,7 +757,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { | |||
| 745 | .emit_fence = uvd_v4_2_ring_emit_fence, | 757 | .emit_fence = uvd_v4_2_ring_emit_fence, |
| 746 | .test_ring = uvd_v4_2_ring_test_ring, | 758 | .test_ring = uvd_v4_2_ring_test_ring, |
| 747 | .test_ib = amdgpu_uvd_ring_test_ib, | 759 | .test_ib = amdgpu_uvd_ring_test_ib, |
| 748 | .insert_nop = amdgpu_ring_insert_nop, | 760 | .insert_nop = uvd_v4_2_ring_insert_nop, |
| 749 | .pad_ib = amdgpu_ring_generic_pad_ib, | 761 | .pad_ib = amdgpu_ring_generic_pad_ib, |
| 750 | .begin_use = amdgpu_uvd_ring_begin_use, | 762 | .begin_use = amdgpu_uvd_ring_begin_use, |
| 751 | .end_use = amdgpu_uvd_ring_end_use, | 763 | .end_use = amdgpu_uvd_ring_end_use, |
| @@ -753,7 +765,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { | |||
| 753 | 765 | ||
| 754 | static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) | 766 | static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) |
| 755 | { | 767 | { |
| 756 | adev->uvd.ring.funcs = &uvd_v4_2_ring_funcs; | 768 | adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs; |
| 757 | } | 769 | } |
| 758 | 770 | ||
| 759 | static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { | 771 | static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { |
| @@ -763,8 +775,8 @@ static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { | |||
| 763 | 775 | ||
| 764 | static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev) | 776 | static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev) |
| 765 | { | 777 | { |
| 766 | adev->uvd.irq.num_types = 1; | 778 | adev->uvd.inst->irq.num_types = 1; |
| 767 | adev->uvd.irq.funcs = &uvd_v4_2_irq_funcs; | 779 | adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs; |
| 768 | } | 780 | } |
| 769 | 781 | ||
| 770 | const struct amdgpu_ip_block_version uvd_v4_2_ip_block = | 782 | const struct amdgpu_ip_block_version uvd_v4_2_ip_block = |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 6445d55e7d5a..341ee6d55ce8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | |||
| @@ -89,6 +89,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 89 | static int uvd_v5_0_early_init(void *handle) | 89 | static int uvd_v5_0_early_init(void *handle) |
| 90 | { | 90 | { |
| 91 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 91 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 92 | adev->uvd.num_uvd_inst = 1; | ||
| 92 | 93 | ||
| 93 | uvd_v5_0_set_ring_funcs(adev); | 94 | uvd_v5_0_set_ring_funcs(adev); |
| 94 | uvd_v5_0_set_irq_funcs(adev); | 95 | uvd_v5_0_set_irq_funcs(adev); |
| @@ -103,7 +104,7 @@ static int uvd_v5_0_sw_init(void *handle) | |||
| 103 | int r; | 104 | int r; |
| 104 | 105 | ||
| 105 | /* UVD TRAP */ | 106 | /* UVD TRAP */ |
| 106 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); | 107 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); |
| 107 | if (r) | 108 | if (r) |
| 108 | return r; | 109 | return r; |
| 109 | 110 | ||
| @@ -115,9 +116,9 @@ static int uvd_v5_0_sw_init(void *handle) | |||
| 115 | if (r) | 116 | if (r) |
| 116 | return r; | 117 | return r; |
| 117 | 118 | ||
| 118 | ring = &adev->uvd.ring; | 119 | ring = &adev->uvd.inst->ring; |
| 119 | sprintf(ring->name, "uvd"); | 120 | sprintf(ring->name, "uvd"); |
| 120 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); | 121 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); |
| 121 | 122 | ||
| 122 | return r; | 123 | return r; |
| 123 | } | 124 | } |
| @@ -144,7 +145,7 @@ static int uvd_v5_0_sw_fini(void *handle) | |||
| 144 | static int uvd_v5_0_hw_init(void *handle) | 145 | static int uvd_v5_0_hw_init(void *handle) |
| 145 | { | 146 | { |
| 146 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 147 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 147 | struct amdgpu_ring *ring = &adev->uvd.ring; | 148 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 148 | uint32_t tmp; | 149 | uint32_t tmp; |
| 149 | int r; | 150 | int r; |
| 150 | 151 | ||
| @@ -204,7 +205,7 @@ done: | |||
| 204 | static int uvd_v5_0_hw_fini(void *handle) | 205 | static int uvd_v5_0_hw_fini(void *handle) |
| 205 | { | 206 | { |
| 206 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 207 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 207 | struct amdgpu_ring *ring = &adev->uvd.ring; | 208 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 208 | 209 | ||
| 209 | if (RREG32(mmUVD_STATUS) != 0) | 210 | if (RREG32(mmUVD_STATUS) != 0) |
| 210 | uvd_v5_0_stop(adev); | 211 | uvd_v5_0_stop(adev); |
| @@ -253,9 +254,9 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) | |||
| 253 | 254 | ||
| 254 | /* programm memory controller bits 0-27 */ | 255 | /* programm memory controller bits 0-27 */ |
| 255 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, | 256 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
| 256 | lower_32_bits(adev->uvd.gpu_addr)); | 257 | lower_32_bits(adev->uvd.inst->gpu_addr)); |
| 257 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, | 258 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
| 258 | upper_32_bits(adev->uvd.gpu_addr)); | 259 | upper_32_bits(adev->uvd.inst->gpu_addr)); |
| 259 | 260 | ||
| 260 | offset = AMDGPU_UVD_FIRMWARE_OFFSET; | 261 | offset = AMDGPU_UVD_FIRMWARE_OFFSET; |
| 261 | size = AMDGPU_UVD_FIRMWARE_SIZE(adev); | 262 | size = AMDGPU_UVD_FIRMWARE_SIZE(adev); |
| @@ -287,7 +288,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) | |||
| 287 | */ | 288 | */ |
| 288 | static int uvd_v5_0_start(struct amdgpu_device *adev) | 289 | static int uvd_v5_0_start(struct amdgpu_device *adev) |
| 289 | { | 290 | { |
| 290 | struct amdgpu_ring *ring = &adev->uvd.ring; | 291 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 291 | uint32_t rb_bufsz, tmp; | 292 | uint32_t rb_bufsz, tmp; |
| 292 | uint32_t lmi_swap_cntl; | 293 | uint32_t lmi_swap_cntl; |
| 293 | uint32_t mp_swap_cntl; | 294 | uint32_t mp_swap_cntl; |
| @@ -540,6 +541,18 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, | |||
| 540 | amdgpu_ring_write(ring, ib->length_dw); | 541 | amdgpu_ring_write(ring, ib->length_dw); |
| 541 | } | 542 | } |
| 542 | 543 | ||
| 544 | static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | ||
| 545 | { | ||
| 546 | int i; | ||
| 547 | |||
| 548 | WARN_ON(ring->wptr % 2 || count % 2); | ||
| 549 | |||
| 550 | for (i = 0; i < count / 2; i++) { | ||
| 551 | amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); | ||
| 552 | amdgpu_ring_write(ring, 0); | ||
| 553 | } | ||
| 554 | } | ||
| 555 | |||
| 543 | static bool uvd_v5_0_is_idle(void *handle) | 556 | static bool uvd_v5_0_is_idle(void *handle) |
| 544 | { | 557 | { |
| 545 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 558 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| @@ -586,7 +599,7 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev, | |||
| 586 | struct amdgpu_iv_entry *entry) | 599 | struct amdgpu_iv_entry *entry) |
| 587 | { | 600 | { |
| 588 | DRM_DEBUG("IH: UVD TRAP\n"); | 601 | DRM_DEBUG("IH: UVD TRAP\n"); |
| 589 | amdgpu_fence_process(&adev->uvd.ring); | 602 | amdgpu_fence_process(&adev->uvd.inst->ring); |
| 590 | return 0; | 603 | return 0; |
| 591 | } | 604 | } |
| 592 | 605 | ||
| @@ -840,7 +853,6 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { | |||
| 840 | static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { | 853 | static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { |
| 841 | .type = AMDGPU_RING_TYPE_UVD, | 854 | .type = AMDGPU_RING_TYPE_UVD, |
| 842 | .align_mask = 0xf, | 855 | .align_mask = 0xf, |
| 843 | .nop = PACKET0(mmUVD_NO_OP, 0), | ||
| 844 | .support_64bit_ptrs = false, | 856 | .support_64bit_ptrs = false, |
| 845 | .get_rptr = uvd_v5_0_ring_get_rptr, | 857 | .get_rptr = uvd_v5_0_ring_get_rptr, |
| 846 | .get_wptr = uvd_v5_0_ring_get_wptr, | 858 | .get_wptr = uvd_v5_0_ring_get_wptr, |
| @@ -853,7 +865,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { | |||
| 853 | .emit_fence = uvd_v5_0_ring_emit_fence, | 865 | .emit_fence = uvd_v5_0_ring_emit_fence, |
| 854 | .test_ring = uvd_v5_0_ring_test_ring, | 866 | .test_ring = uvd_v5_0_ring_test_ring, |
| 855 | .test_ib = amdgpu_uvd_ring_test_ib, | 867 | .test_ib = amdgpu_uvd_ring_test_ib, |
| 856 | .insert_nop = amdgpu_ring_insert_nop, | 868 | .insert_nop = uvd_v5_0_ring_insert_nop, |
| 857 | .pad_ib = amdgpu_ring_generic_pad_ib, | 869 | .pad_ib = amdgpu_ring_generic_pad_ib, |
| 858 | .begin_use = amdgpu_uvd_ring_begin_use, | 870 | .begin_use = amdgpu_uvd_ring_begin_use, |
| 859 | .end_use = amdgpu_uvd_ring_end_use, | 871 | .end_use = amdgpu_uvd_ring_end_use, |
| @@ -861,7 +873,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { | |||
| 861 | 873 | ||
| 862 | static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) | 874 | static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) |
| 863 | { | 875 | { |
| 864 | adev->uvd.ring.funcs = &uvd_v5_0_ring_funcs; | 876 | adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs; |
| 865 | } | 877 | } |
| 866 | 878 | ||
| 867 | static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { | 879 | static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { |
| @@ -871,8 +883,8 @@ static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { | |||
| 871 | 883 | ||
| 872 | static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev) | 884 | static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev) |
| 873 | { | 885 | { |
| 874 | adev->uvd.irq.num_types = 1; | 886 | adev->uvd.inst->irq.num_types = 1; |
| 875 | adev->uvd.irq.funcs = &uvd_v5_0_irq_funcs; | 887 | adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs; |
| 876 | } | 888 | } |
| 877 | 889 | ||
| 878 | const struct amdgpu_ip_block_version uvd_v5_0_ip_block = | 890 | const struct amdgpu_ip_block_version uvd_v5_0_ip_block = |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index f26f515db2fb..bfddf97dd13e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | |||
| @@ -62,7 +62,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, | |||
| 62 | static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) | 62 | static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) |
| 63 | { | 63 | { |
| 64 | return ((adev->asic_type >= CHIP_POLARIS10) && | 64 | return ((adev->asic_type >= CHIP_POLARIS10) && |
| 65 | (adev->asic_type <= CHIP_POLARIS12) && | 65 | (adev->asic_type <= CHIP_VEGAM) && |
| 66 | (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); | 66 | (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| @@ -91,7 +91,7 @@ static uint64_t uvd_v6_0_enc_ring_get_rptr(struct amdgpu_ring *ring) | |||
| 91 | { | 91 | { |
| 92 | struct amdgpu_device *adev = ring->adev; | 92 | struct amdgpu_device *adev = ring->adev; |
| 93 | 93 | ||
| 94 | if (ring == &adev->uvd.ring_enc[0]) | 94 | if (ring == &adev->uvd.inst->ring_enc[0]) |
| 95 | return RREG32(mmUVD_RB_RPTR); | 95 | return RREG32(mmUVD_RB_RPTR); |
| 96 | else | 96 | else |
| 97 | return RREG32(mmUVD_RB_RPTR2); | 97 | return RREG32(mmUVD_RB_RPTR2); |
| @@ -121,7 +121,7 @@ static uint64_t uvd_v6_0_enc_ring_get_wptr(struct amdgpu_ring *ring) | |||
| 121 | { | 121 | { |
| 122 | struct amdgpu_device *adev = ring->adev; | 122 | struct amdgpu_device *adev = ring->adev; |
| 123 | 123 | ||
| 124 | if (ring == &adev->uvd.ring_enc[0]) | 124 | if (ring == &adev->uvd.inst->ring_enc[0]) |
| 125 | return RREG32(mmUVD_RB_WPTR); | 125 | return RREG32(mmUVD_RB_WPTR); |
| 126 | else | 126 | else |
| 127 | return RREG32(mmUVD_RB_WPTR2); | 127 | return RREG32(mmUVD_RB_WPTR2); |
| @@ -152,7 +152,7 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 152 | { | 152 | { |
| 153 | struct amdgpu_device *adev = ring->adev; | 153 | struct amdgpu_device *adev = ring->adev; |
| 154 | 154 | ||
| 155 | if (ring == &adev->uvd.ring_enc[0]) | 155 | if (ring == &adev->uvd.inst->ring_enc[0]) |
| 156 | WREG32(mmUVD_RB_WPTR, | 156 | WREG32(mmUVD_RB_WPTR, |
| 157 | lower_32_bits(ring->wptr)); | 157 | lower_32_bits(ring->wptr)); |
| 158 | else | 158 | else |
| @@ -375,6 +375,7 @@ error: | |||
| 375 | static int uvd_v6_0_early_init(void *handle) | 375 | static int uvd_v6_0_early_init(void *handle) |
| 376 | { | 376 | { |
| 377 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 377 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 378 | adev->uvd.num_uvd_inst = 1; | ||
| 378 | 379 | ||
| 379 | if (!(adev->flags & AMD_IS_APU) && | 380 | if (!(adev->flags & AMD_IS_APU) && |
| 380 | (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK)) | 381 | (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK)) |
| @@ -399,14 +400,14 @@ static int uvd_v6_0_sw_init(void *handle) | |||
| 399 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 400 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 400 | 401 | ||
| 401 | /* UVD TRAP */ | 402 | /* UVD TRAP */ |
| 402 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); | 403 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); |
| 403 | if (r) | 404 | if (r) |
| 404 | return r; | 405 | return r; |
| 405 | 406 | ||
| 406 | /* UVD ENC TRAP */ | 407 | /* UVD ENC TRAP */ |
| 407 | if (uvd_v6_0_enc_support(adev)) { | 408 | if (uvd_v6_0_enc_support(adev)) { |
| 408 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | 409 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
| 409 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.irq); | 410 | r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.inst->irq); |
| 410 | if (r) | 411 | if (r) |
| 411 | return r; | 412 | return r; |
| 412 | } | 413 | } |
| @@ -418,18 +419,18 @@ static int uvd_v6_0_sw_init(void *handle) | |||
| 418 | 419 | ||
| 419 | if (!uvd_v6_0_enc_support(adev)) { | 420 | if (!uvd_v6_0_enc_support(adev)) { |
| 420 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) | 421 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) |
| 421 | adev->uvd.ring_enc[i].funcs = NULL; | 422 | adev->uvd.inst->ring_enc[i].funcs = NULL; |
| 422 | 423 | ||
| 423 | adev->uvd.irq.num_types = 1; | 424 | adev->uvd.inst->irq.num_types = 1; |
| 424 | adev->uvd.num_enc_rings = 0; | 425 | adev->uvd.num_enc_rings = 0; |
| 425 | 426 | ||
| 426 | DRM_INFO("UVD ENC is disabled\n"); | 427 | DRM_INFO("UVD ENC is disabled\n"); |
| 427 | } else { | 428 | } else { |
| 428 | struct drm_sched_rq *rq; | 429 | struct drm_sched_rq *rq; |
| 429 | ring = &adev->uvd.ring_enc[0]; | 430 | ring = &adev->uvd.inst->ring_enc[0]; |
| 430 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; | 431 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; |
| 431 | r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, | 432 | r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc, |
| 432 | rq, amdgpu_sched_jobs, NULL); | 433 | rq, NULL); |
| 433 | if (r) { | 434 | if (r) { |
| 434 | DRM_ERROR("Failed setting up UVD ENC run queue.\n"); | 435 | DRM_ERROR("Failed setting up UVD ENC run queue.\n"); |
| 435 | return r; | 436 | return r; |
| @@ -440,17 +441,17 @@ static int uvd_v6_0_sw_init(void *handle) | |||
| 440 | if (r) | 441 | if (r) |
| 441 | return r; | 442 | return r; |
| 442 | 443 | ||
| 443 | ring = &adev->uvd.ring; | 444 | ring = &adev->uvd.inst->ring; |
| 444 | sprintf(ring->name, "uvd"); | 445 | sprintf(ring->name, "uvd"); |
| 445 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); | 446 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); |
| 446 | if (r) | 447 | if (r) |
| 447 | return r; | 448 | return r; |
| 448 | 449 | ||
| 449 | if (uvd_v6_0_enc_support(adev)) { | 450 | if (uvd_v6_0_enc_support(adev)) { |
| 450 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | 451 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
| 451 | ring = &adev->uvd.ring_enc[i]; | 452 | ring = &adev->uvd.inst->ring_enc[i]; |
| 452 | sprintf(ring->name, "uvd_enc%d", i); | 453 | sprintf(ring->name, "uvd_enc%d", i); |
| 453 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); | 454 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); |
| 454 | if (r) | 455 | if (r) |
| 455 | return r; | 456 | return r; |
| 456 | } | 457 | } |
| @@ -469,10 +470,10 @@ static int uvd_v6_0_sw_fini(void *handle) | |||
| 469 | return r; | 470 | return r; |
| 470 | 471 | ||
| 471 | if (uvd_v6_0_enc_support(adev)) { | 472 | if (uvd_v6_0_enc_support(adev)) { |
| 472 | drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); | 473 | drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc); |
| 473 | 474 | ||
| 474 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) | 475 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) |
| 475 | amdgpu_ring_fini(&adev->uvd.ring_enc[i]); | 476 | amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); |
| 476 | } | 477 | } |
| 477 | 478 | ||
| 478 | return amdgpu_uvd_sw_fini(adev); | 479 | return amdgpu_uvd_sw_fini(adev); |
| @@ -488,7 +489,7 @@ static int uvd_v6_0_sw_fini(void *handle) | |||
| 488 | static int uvd_v6_0_hw_init(void *handle) | 489 | static int uvd_v6_0_hw_init(void *handle) |
| 489 | { | 490 | { |
| 490 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 491 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 491 | struct amdgpu_ring *ring = &adev->uvd.ring; | 492 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 492 | uint32_t tmp; | 493 | uint32_t tmp; |
| 493 | int i, r; | 494 | int i, r; |
| 494 | 495 | ||
| @@ -532,7 +533,7 @@ static int uvd_v6_0_hw_init(void *handle) | |||
| 532 | 533 | ||
| 533 | if (uvd_v6_0_enc_support(adev)) { | 534 | if (uvd_v6_0_enc_support(adev)) { |
| 534 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | 535 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
| 535 | ring = &adev->uvd.ring_enc[i]; | 536 | ring = &adev->uvd.inst->ring_enc[i]; |
| 536 | ring->ready = true; | 537 | ring->ready = true; |
| 537 | r = amdgpu_ring_test_ring(ring); | 538 | r = amdgpu_ring_test_ring(ring); |
| 538 | if (r) { | 539 | if (r) { |
| @@ -563,7 +564,7 @@ done: | |||
| 563 | static int uvd_v6_0_hw_fini(void *handle) | 564 | static int uvd_v6_0_hw_fini(void *handle) |
| 564 | { | 565 | { |
| 565 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 566 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 566 | struct amdgpu_ring *ring = &adev->uvd.ring; | 567 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 567 | 568 | ||
| 568 | if (RREG32(mmUVD_STATUS) != 0) | 569 | if (RREG32(mmUVD_STATUS) != 0) |
| 569 | uvd_v6_0_stop(adev); | 570 | uvd_v6_0_stop(adev); |
| @@ -611,9 +612,9 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) | |||
| 611 | 612 | ||
| 612 | /* programm memory controller bits 0-27 */ | 613 | /* programm memory controller bits 0-27 */ |
| 613 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, | 614 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
| 614 | lower_32_bits(adev->uvd.gpu_addr)); | 615 | lower_32_bits(adev->uvd.inst->gpu_addr)); |
| 615 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, | 616 | WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
| 616 | upper_32_bits(adev->uvd.gpu_addr)); | 617 | upper_32_bits(adev->uvd.inst->gpu_addr)); |
| 617 | 618 | ||
| 618 | offset = AMDGPU_UVD_FIRMWARE_OFFSET; | 619 | offset = AMDGPU_UVD_FIRMWARE_OFFSET; |
| 619 | size = AMDGPU_UVD_FIRMWARE_SIZE(adev); | 620 | size = AMDGPU_UVD_FIRMWARE_SIZE(adev); |
| @@ -726,7 +727,7 @@ static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, | |||
| 726 | */ | 727 | */ |
| 727 | static int uvd_v6_0_start(struct amdgpu_device *adev) | 728 | static int uvd_v6_0_start(struct amdgpu_device *adev) |
| 728 | { | 729 | { |
| 729 | struct amdgpu_ring *ring = &adev->uvd.ring; | 730 | struct amdgpu_ring *ring = &adev->uvd.inst->ring; |
| 730 | uint32_t rb_bufsz, tmp; | 731 | uint32_t rb_bufsz, tmp; |
| 731 | uint32_t lmi_swap_cntl; | 732 | uint32_t lmi_swap_cntl; |
| 732 | uint32_t mp_swap_cntl; | 733 | uint32_t mp_swap_cntl; |
| @@ -866,14 +867,14 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) | |||
| 866 | WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); | 867 | WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); |
| 867 | 868 | ||
| 868 | if (uvd_v6_0_enc_support(adev)) { | 869 | if (uvd_v6_0_enc_support(adev)) { |
| 869 | ring = &adev->uvd.ring_enc[0]; | 870 | ring = &adev->uvd.inst->ring_enc[0]; |
| 870 | WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); | 871 | WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); |
| 871 | WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); | 872 | WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); |
| 872 | WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr); | 873 | WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr); |
| 873 | WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); | 874 | WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); |
| 874 | WREG32(mmUVD_RB_SIZE, ring->ring_size / 4); | 875 | WREG32(mmUVD_RB_SIZE, ring->ring_size / 4); |
| 875 | 876 | ||
| 876 | ring = &adev->uvd.ring_enc[1]; | 877 | ring = &adev->uvd.inst->ring_enc[1]; |
| 877 | WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); | 878 | WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); |
| 878 | WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); | 879 | WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); |
| 879 | WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr); | 880 | WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr); |
| @@ -964,6 +965,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, | |||
| 964 | } | 965 | } |
| 965 | 966 | ||
| 966 | /** | 967 | /** |
| 968 | * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing | ||
| 969 | * | ||
| 970 | * @ring: amdgpu_ring pointer | ||
| 971 | */ | ||
| 972 | static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | ||
| 973 | { | ||
| 974 | /* The firmware doesn't seem to like touching registers at this point. */ | ||
| 975 | } | ||
| 976 | |||
| 977 | /** | ||
| 967 | * uvd_v6_0_ring_test_ring - register write test | 978 | * uvd_v6_0_ring_test_ring - register write test |
| 968 | * | 979 | * |
| 969 | * @ring: amdgpu_ring pointer | 980 | * @ring: amdgpu_ring pointer |
| @@ -1089,6 +1100,18 @@ static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) | |||
| 1089 | amdgpu_ring_write(ring, 0xE); | 1100 | amdgpu_ring_write(ring, 0xE); |
| 1090 | } | 1101 | } |
| 1091 | 1102 | ||
| 1103 | static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | ||
| 1104 | { | ||
| 1105 | int i; | ||
| 1106 | |||
| 1107 | WARN_ON(ring->wptr % 2 || count % 2); | ||
| 1108 | |||
| 1109 | for (i = 0; i < count / 2; i++) { | ||
| 1110 | amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); | ||
| 1111 | amdgpu_ring_write(ring, 0); | ||
| 1112 | } | ||
| 1113 | } | ||
| 1114 | |||
| 1092 | static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring) | 1115 | static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring) |
| 1093 | { | 1116 | { |
| 1094 | uint32_t seq = ring->fence_drv.sync_seq; | 1117 | uint32_t seq = ring->fence_drv.sync_seq; |
| @@ -1148,10 +1171,10 @@ static bool uvd_v6_0_check_soft_reset(void *handle) | |||
| 1148 | srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); | 1171 | srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); |
| 1149 | 1172 | ||
| 1150 | if (srbm_soft_reset) { | 1173 | if (srbm_soft_reset) { |
| 1151 | adev->uvd.srbm_soft_reset = srbm_soft_reset; | 1174 | adev->uvd.inst->srbm_soft_reset = srbm_soft_reset; |
| 1152 | return true; | 1175 | return true; |
| 1153 | } else { | 1176 | } else { |
| 1154 | adev->uvd.srbm_soft_reset = 0; | 1177 | adev->uvd.inst->srbm_soft_reset = 0; |
| 1155 | return false; | 1178 | return false; |
| 1156 | } | 1179 | } |
| 1157 | } | 1180 | } |
| @@ -1160,7 +1183,7 @@ static int uvd_v6_0_pre_soft_reset(void *handle) | |||
| 1160 | { | 1183 | { |
| 1161 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1184 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1162 | 1185 | ||
| 1163 | if (!adev->uvd.srbm_soft_reset) | 1186 | if (!adev->uvd.inst->srbm_soft_reset) |
| 1164 | return 0; | 1187 | return 0; |
| 1165 | 1188 | ||
| 1166 | uvd_v6_0_stop(adev); | 1189 | uvd_v6_0_stop(adev); |
| @@ -1172,9 +1195,9 @@ static int uvd_v6_0_soft_reset(void *handle) | |||
| 1172 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1195 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1173 | u32 srbm_soft_reset; | 1196 | u32 srbm_soft_reset; |
| 1174 | 1197 | ||
| 1175 | if (!adev->uvd.srbm_soft_reset) | 1198 | if (!adev->uvd.inst->srbm_soft_reset) |
| 1176 | return 0; | 1199 | return 0; |
| 1177 | srbm_soft_reset = adev->uvd.srbm_soft_reset; | 1200 | srbm_soft_reset = adev->uvd.inst->srbm_soft_reset; |
| 1178 | 1201 | ||
| 1179 | if (srbm_soft_reset) { | 1202 | if (srbm_soft_reset) { |
| 1180 | u32 tmp; | 1203 | u32 tmp; |
| @@ -1202,7 +1225,7 @@ static int uvd_v6_0_post_soft_reset(void *handle) | |||
| 1202 | { | 1225 | { |
| 1203 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1226 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1204 | 1227 | ||
| 1205 | if (!adev->uvd.srbm_soft_reset) | 1228 | if (!adev->uvd.inst->srbm_soft_reset) |
| 1206 | return 0; | 1229 | return 0; |
| 1207 | 1230 | ||
| 1208 | mdelay(5); | 1231 | mdelay(5); |
| @@ -1228,17 +1251,17 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev, | |||
| 1228 | 1251 | ||
| 1229 | switch (entry->src_id) { | 1252 | switch (entry->src_id) { |
| 1230 | case 124: | 1253 | case 124: |
| 1231 | amdgpu_fence_process(&adev->uvd.ring); | 1254 | amdgpu_fence_process(&adev->uvd.inst->ring); |
| 1232 | break; | 1255 | break; |
| 1233 | case 119: | 1256 | case 119: |
| 1234 | if (likely(uvd_v6_0_enc_support(adev))) | 1257 | if (likely(uvd_v6_0_enc_support(adev))) |
| 1235 | amdgpu_fence_process(&adev->uvd.ring_enc[0]); | 1258 | amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]); |
| 1236 | else | 1259 | else |
| 1237 | int_handled = false; | 1260 | int_handled = false; |
| 1238 | break; | 1261 | break; |
| 1239 | case 120: | 1262 | case 120: |
| 1240 | if (likely(uvd_v6_0_enc_support(adev))) | 1263 | if (likely(uvd_v6_0_enc_support(adev))) |
| 1241 | amdgpu_fence_process(&adev->uvd.ring_enc[1]); | 1264 | amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]); |
| 1242 | else | 1265 | else |
| 1243 | int_handled = false; | 1266 | int_handled = false; |
| 1244 | break; | 1267 | break; |
| @@ -1521,22 +1544,22 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { | |||
| 1521 | static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { | 1544 | static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { |
| 1522 | .type = AMDGPU_RING_TYPE_UVD, | 1545 | .type = AMDGPU_RING_TYPE_UVD, |
| 1523 | .align_mask = 0xf, | 1546 | .align_mask = 0xf, |
| 1524 | .nop = PACKET0(mmUVD_NO_OP, 0), | ||
| 1525 | .support_64bit_ptrs = false, | 1547 | .support_64bit_ptrs = false, |
| 1526 | .get_rptr = uvd_v6_0_ring_get_rptr, | 1548 | .get_rptr = uvd_v6_0_ring_get_rptr, |
| 1527 | .get_wptr = uvd_v6_0_ring_get_wptr, | 1549 | .get_wptr = uvd_v6_0_ring_get_wptr, |
| 1528 | .set_wptr = uvd_v6_0_ring_set_wptr, | 1550 | .set_wptr = uvd_v6_0_ring_set_wptr, |
| 1529 | .parse_cs = amdgpu_uvd_ring_parse_cs, | 1551 | .parse_cs = amdgpu_uvd_ring_parse_cs, |
| 1530 | .emit_frame_size = | 1552 | .emit_frame_size = |
| 1531 | 6 + 6 + /* hdp flush / invalidate */ | 1553 | 6 + /* hdp invalidate */ |
| 1532 | 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ | 1554 | 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ |
| 1533 | 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ | 1555 | 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ |
| 1534 | .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ | 1556 | .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ |
| 1535 | .emit_ib = uvd_v6_0_ring_emit_ib, | 1557 | .emit_ib = uvd_v6_0_ring_emit_ib, |
| 1536 | .emit_fence = uvd_v6_0_ring_emit_fence, | 1558 | .emit_fence = uvd_v6_0_ring_emit_fence, |
| 1559 | .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, | ||
| 1537 | .test_ring = uvd_v6_0_ring_test_ring, | 1560 | .test_ring = uvd_v6_0_ring_test_ring, |
| 1538 | .test_ib = amdgpu_uvd_ring_test_ib, | 1561 | .test_ib = amdgpu_uvd_ring_test_ib, |
| 1539 | .insert_nop = amdgpu_ring_insert_nop, | 1562 | .insert_nop = uvd_v6_0_ring_insert_nop, |
| 1540 | .pad_ib = amdgpu_ring_generic_pad_ib, | 1563 | .pad_ib = amdgpu_ring_generic_pad_ib, |
| 1541 | .begin_use = amdgpu_uvd_ring_begin_use, | 1564 | .begin_use = amdgpu_uvd_ring_begin_use, |
| 1542 | .end_use = amdgpu_uvd_ring_end_use, | 1565 | .end_use = amdgpu_uvd_ring_end_use, |
| @@ -1552,7 +1575,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { | |||
| 1552 | .get_wptr = uvd_v6_0_ring_get_wptr, | 1575 | .get_wptr = uvd_v6_0_ring_get_wptr, |
| 1553 | .set_wptr = uvd_v6_0_ring_set_wptr, | 1576 | .set_wptr = uvd_v6_0_ring_set_wptr, |
| 1554 | .emit_frame_size = | 1577 | .emit_frame_size = |
| 1555 | 6 + 6 + /* hdp flush / invalidate */ | 1578 | 6 + /* hdp invalidate */ |
| 1556 | 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ | 1579 | 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ |
| 1557 | VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */ | 1580 | VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */ |
| 1558 | 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ | 1581 | 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ |
| @@ -1561,6 +1584,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { | |||
| 1561 | .emit_fence = uvd_v6_0_ring_emit_fence, | 1584 | .emit_fence = uvd_v6_0_ring_emit_fence, |
| 1562 | .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, | 1585 | .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, |
| 1563 | .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, | 1586 | .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, |
| 1587 | .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, | ||
| 1564 | .test_ring = uvd_v6_0_ring_test_ring, | 1588 | .test_ring = uvd_v6_0_ring_test_ring, |
| 1565 | .test_ib = amdgpu_uvd_ring_test_ib, | 1589 | .test_ib = amdgpu_uvd_ring_test_ib, |
| 1566 | .insert_nop = amdgpu_ring_insert_nop, | 1590 | .insert_nop = amdgpu_ring_insert_nop, |
| @@ -1600,10 +1624,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { | |||
| 1600 | static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) | 1624 | static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) |
| 1601 | { | 1625 | { |
| 1602 | if (adev->asic_type >= CHIP_POLARIS10) { | 1626 | if (adev->asic_type >= CHIP_POLARIS10) { |
| 1603 | adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs; | 1627 | adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs; |
| 1604 | DRM_INFO("UVD is enabled in VM mode\n"); | 1628 | DRM_INFO("UVD is enabled in VM mode\n"); |
| 1605 | } else { | 1629 | } else { |
| 1606 | adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs; | 1630 | adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs; |
| 1607 | DRM_INFO("UVD is enabled in physical mode\n"); | 1631 | DRM_INFO("UVD is enabled in physical mode\n"); |
| 1608 | } | 1632 | } |
| 1609 | } | 1633 | } |
| @@ -1613,7 +1637,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev) | |||
| 1613 | int i; | 1637 | int i; |
| 1614 | 1638 | ||
| 1615 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) | 1639 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) |
| 1616 | adev->uvd.ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; | 1640 | adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; |
| 1617 | 1641 | ||
| 1618 | DRM_INFO("UVD ENC is enabled in VM mode\n"); | 1642 | DRM_INFO("UVD ENC is enabled in VM mode\n"); |
| 1619 | } | 1643 | } |
| @@ -1626,11 +1650,11 @@ static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = { | |||
| 1626 | static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev) | 1650 | static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev) |
| 1627 | { | 1651 | { |
| 1628 | if (uvd_v6_0_enc_support(adev)) | 1652 | if (uvd_v6_0_enc_support(adev)) |
| 1629 | adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; | 1653 | adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1; |
| 1630 | else | 1654 | else |
| 1631 | adev->uvd.irq.num_types = 1; | 1655 | adev->uvd.inst->irq.num_types = 1; |
| 1632 | 1656 | ||
| 1633 | adev->uvd.irq.funcs = &uvd_v6_0_irq_funcs; | 1657 | adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs; |
| 1634 | } | 1658 | } |
| 1635 | 1659 | ||
| 1636 | const struct amdgpu_ip_block_version uvd_v6_0_ip_block = | 1660 | const struct amdgpu_ip_block_version uvd_v6_0_ip_block = |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eddc57f3b72a..57d32f21b3a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | |||
| @@ -40,6 +40,8 @@ | |||
| 40 | #include "mmhub/mmhub_1_0_offset.h" | 40 | #include "mmhub/mmhub_1_0_offset.h" |
| 41 | #include "mmhub/mmhub_1_0_sh_mask.h" | 41 | #include "mmhub/mmhub_1_0_sh_mask.h" |
| 42 | 42 | ||
| 43 | #define UVD7_MAX_HW_INSTANCES_VEGA20 2 | ||
| 44 | |||
| 43 | static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); | 45 | static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); |
| 44 | static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); | 46 | static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); |
| 45 | static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); | 47 | static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); |
| @@ -47,6 +49,11 @@ static int uvd_v7_0_start(struct amdgpu_device *adev); | |||
| 47 | static void uvd_v7_0_stop(struct amdgpu_device *adev); | 49 | static void uvd_v7_0_stop(struct amdgpu_device *adev); |
| 48 | static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); | 50 | static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); |
| 49 | 51 | ||
| 52 | static int amdgpu_ih_clientid_uvds[] = { | ||
| 53 | SOC15_IH_CLIENTID_UVD, | ||
| 54 | SOC15_IH_CLIENTID_UVD1 | ||
| 55 | }; | ||
| 56 | |||
| 50 | /** | 57 | /** |
| 51 | * uvd_v7_0_ring_get_rptr - get read pointer | 58 | * uvd_v7_0_ring_get_rptr - get read pointer |
| 52 | * | 59 | * |
| @@ -58,7 +65,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) | |||
| 58 | { | 65 | { |
| 59 | struct amdgpu_device *adev = ring->adev; | 66 | struct amdgpu_device *adev = ring->adev; |
| 60 | 67 | ||
| 61 | return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); | 68 | return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); |
| 62 | } | 69 | } |
| 63 | 70 | ||
| 64 | /** | 71 | /** |
| @@ -72,10 +79,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) | |||
| 72 | { | 79 | { |
| 73 | struct amdgpu_device *adev = ring->adev; | 80 | struct amdgpu_device *adev = ring->adev; |
| 74 | 81 | ||
| 75 | if (ring == &adev->uvd.ring_enc[0]) | 82 | if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) |
| 76 | return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); | 83 | return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); |
| 77 | else | 84 | else |
| 78 | return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); | 85 | return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); |
| 79 | } | 86 | } |
| 80 | 87 | ||
| 81 | /** | 88 | /** |
| @@ -89,7 +96,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) | |||
| 89 | { | 96 | { |
| 90 | struct amdgpu_device *adev = ring->adev; | 97 | struct amdgpu_device *adev = ring->adev; |
| 91 | 98 | ||
| 92 | return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); | 99 | return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); |
| 93 | } | 100 | } |
| 94 | 101 | ||
| 95 | /** | 102 | /** |
| @@ -106,10 +113,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) | |||
| 106 | if (ring->use_doorbell) | 113 | if (ring->use_doorbell) |
| 107 | return adev->wb.wb[ring->wptr_offs]; | 114 | return adev->wb.wb[ring->wptr_offs]; |
| 108 | 115 | ||
| 109 | if (ring == &adev->uvd.ring_enc[0]) | 116 | if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) |
| 110 | return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); | 117 | return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); |
| 111 | else | 118 | else |
| 112 | return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); | 119 | return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); |
| 113 | } | 120 | } |
| 114 | 121 | ||
| 115 | /** | 122 | /** |
| @@ -123,7 +130,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 123 | { | 130 | { |
| 124 | struct amdgpu_device *adev = ring->adev; | 131 | struct amdgpu_device *adev = ring->adev; |
| 125 | 132 | ||
| 126 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); | 133 | WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); |
| 127 | } | 134 | } |
| 128 | 135 | ||
| 129 | /** | 136 | /** |
| @@ -144,11 +151,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) | |||
| 144 | return; | 151 | return; |
| 145 | } | 152 | } |
| 146 | 153 | ||
| 147 | if (ring == &adev->uvd.ring_enc[0]) | 154 | if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) |
| 148 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, | 155 | WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, |
| 149 | lower_32_bits(ring->wptr)); | 156 | lower_32_bits(ring->wptr)); |
| 150 | else | 157 | else |
| 151 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, | 158 | WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, |
| 152 | lower_32_bits(ring->wptr)); | 159 | lower_32_bits(ring->wptr)); |
| 153 | } | 160 | } |
| 154 | 161 | ||
| @@ -170,8 +177,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
| 170 | 177 | ||
| 171 | r = amdgpu_ring_alloc(ring, 16); | 178 | r = amdgpu_ring_alloc(ring, 16); |
| 172 | if (r) { | 179 | if (r) { |
| 173 | DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", | 180 | DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n", |
| 174 | ring->idx, r); | 181 | ring->me, ring->idx, r); |
| 175 | return r; | 182 | return r; |
| 176 | } | 183 | } |
| 177 | amdgpu_ring_write(ring, HEVC_ENC_CMD_END); | 184 | amdgpu_ring_write(ring, HEVC_ENC_CMD_END); |
| @@ -184,11 +191,11 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
| 184 | } | 191 | } |
| 185 | 192 | ||
| 186 | if (i < adev->usec_timeout) { | 193 | if (i < adev->usec_timeout) { |
| 187 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 194 | DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", |
| 188 | ring->idx, i); | 195 | ring->me, ring->idx, i); |
| 189 | } else { | 196 | } else { |
| 190 | DRM_ERROR("amdgpu: ring %d test failed\n", | 197 | DRM_ERROR("amdgpu: (%d)ring %d test failed\n", |
| 191 | ring->idx); | 198 | ring->me, ring->idx); |
| 192 | r = -ETIMEDOUT; | 199 | r = -ETIMEDOUT; |
| 193 | } | 200 | } |
| 194 | 201 | ||
| @@ -342,24 +349,24 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
| 342 | 349 | ||
| 343 | r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); | 350 | r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); |
| 344 | if (r) { | 351 | if (r) { |
| 345 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | 352 | DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r); |
| 346 | goto error; | 353 | goto error; |
| 347 | } | 354 | } |
| 348 | 355 | ||
| 349 | r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence); | 356 | r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence); |
| 350 | if (r) { | 357 | if (r) { |
| 351 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | 358 | DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r); |
| 352 | goto error; | 359 | goto error; |
| 353 | } | 360 | } |
| 354 | 361 | ||
| 355 | r = dma_fence_wait_timeout(fence, false, timeout); | 362 | r = dma_fence_wait_timeout(fence, false, timeout); |
| 356 | if (r == 0) { | 363 | if (r == 0) { |
| 357 | DRM_ERROR("amdgpu: IB test timed out.\n"); | 364 | DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me); |
| 358 | r = -ETIMEDOUT; | 365 | r = -ETIMEDOUT; |
| 359 | } else if (r < 0) { | 366 | } else if (r < 0) { |
| 360 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | 367 | DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r); |
| 361 | } else { | 368 | } else { |
| 362 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | 369 | DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx); |
| 363 | r = 0; | 370 | r = 0; |
| 364 | } | 371 | } |
| 365 | error: | 372 | error: |
| @@ -370,6 +377,10 @@ error: | |||
| 370 | static int uvd_v7_0_early_init(void *handle) | 377 | static int uvd_v7_0_early_init(void *handle) |
| 371 | { | 378 | { |
| 372 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 379 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 380 | if (adev->asic_type == CHIP_VEGA20) | ||
| 381 | adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20; | ||
| 382 | else | ||
| 383 | adev->uvd.num_uvd_inst = 1; | ||
| 373 | 384 | ||
| 374 | if (amdgpu_sriov_vf(adev)) | 385 | if (amdgpu_sriov_vf(adev)) |
| 375 | adev->uvd.num_enc_rings = 1; | 386 | adev->uvd.num_enc_rings = 1; |
| @@ -386,19 +397,21 @@ static int uvd_v7_0_sw_init(void *handle) | |||
| 386 | { | 397 | { |
| 387 | struct amdgpu_ring *ring; | 398 | struct amdgpu_ring *ring; |
| 388 | struct drm_sched_rq *rq; | 399 | struct drm_sched_rq *rq; |
| 389 | int i, r; | 400 | int i, j, r; |
| 390 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 401 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 391 | 402 | ||
| 392 | /* UVD TRAP */ | 403 | for (j = 0; j < adev->uvd.num_uvd_inst; j++) { |
| 393 | r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq); | 404 | /* UVD TRAP */ |
| 394 | if (r) | 405 | r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], 124, &adev->uvd.inst[j].irq); |
| 395 | return r; | ||
| 396 | |||
| 397 | /* UVD ENC TRAP */ | ||
| 398 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | ||
| 399 | r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq); | ||
| 400 | if (r) | 406 | if (r) |
| 401 | return r; | 407 | return r; |
| 408 | |||
| 409 | /* UVD ENC TRAP */ | ||
| 410 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | ||
| 411 | r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + 119, &adev->uvd.inst[j].irq); | ||
| 412 | if (r) | ||
| 413 | return r; | ||
| 414 | } | ||
| 402 | } | 415 | } |
| 403 | 416 | ||
| 404 | r = amdgpu_uvd_sw_init(adev); | 417 | r = amdgpu_uvd_sw_init(adev); |
| @@ -415,43 +428,48 @@ static int uvd_v7_0_sw_init(void *handle) | |||
| 415 | DRM_INFO("PSP loading UVD firmware\n"); | 428 | DRM_INFO("PSP loading UVD firmware\n"); |
| 416 | } | 429 | } |
| 417 | 430 | ||
| 418 | ring = &adev->uvd.ring_enc[0]; | 431 | for (j = 0; j < adev->uvd.num_uvd_inst; j++) { |
| 419 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; | 432 | ring = &adev->uvd.inst[j].ring_enc[0]; |
| 420 | r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, | 433 | rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; |
| 421 | rq, amdgpu_sched_jobs, NULL); | 434 | r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc, |
| 422 | if (r) { | 435 | rq, NULL); |
| 423 | DRM_ERROR("Failed setting up UVD ENC run queue.\n"); | 436 | if (r) { |
| 424 | return r; | 437 | DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j); |
| 438 | return r; | ||
| 439 | } | ||
| 425 | } | 440 | } |
| 426 | 441 | ||
| 427 | r = amdgpu_uvd_resume(adev); | 442 | r = amdgpu_uvd_resume(adev); |
| 428 | if (r) | 443 | if (r) |
| 429 | return r; | 444 | return r; |
| 430 | if (!amdgpu_sriov_vf(adev)) { | ||
| 431 | ring = &adev->uvd.ring; | ||
| 432 | sprintf(ring->name, "uvd"); | ||
| 433 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); | ||
| 434 | if (r) | ||
| 435 | return r; | ||
| 436 | } | ||
| 437 | 445 | ||
| 438 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | 446 | for (j = 0; j < adev->uvd.num_uvd_inst; j++) { |
| 439 | ring = &adev->uvd.ring_enc[i]; | 447 | if (!amdgpu_sriov_vf(adev)) { |
| 440 | sprintf(ring->name, "uvd_enc%d", i); | 448 | ring = &adev->uvd.inst[j].ring; |
| 441 | if (amdgpu_sriov_vf(adev)) { | 449 | sprintf(ring->name, "uvd<%d>", j); |
| 442 | ring->use_doorbell = true; | 450 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); |
| 443 | 451 | if (r) | |
| 444 | /* currently only use the first enconding ring for | 452 | return r; |
| 445 | * sriov, so set unused location for other unused rings. | 453 | } |
| 446 | */ | 454 | |
| 447 | if (i == 0) | 455 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
| 448 | ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; | 456 | ring = &adev->uvd.inst[j].ring_enc[i]; |
| 449 | else | 457 | sprintf(ring->name, "uvd_enc%d<%d>", i, j); |
| 450 | ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; | 458 | if (amdgpu_sriov_vf(adev)) { |
| 459 | ring->use_doorbell = true; | ||
| 460 | |||
| 461 | /* currently only use the first enconding ring for | ||
| 462 | * sriov, so set unused location for other unused rings. | ||
| 463 | */ | ||
| 464 | if (i == 0) | ||
| 465 | ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; | ||
| 466 | else | ||
| 467 | ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; | ||
| 468 | } | ||
| 469 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); | ||
| 470 | if (r) | ||
| 471 | return r; | ||
| 451 | } | 472 | } |
| 452 | r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); | ||
| 453 | if (r) | ||
| 454 | return r; | ||
| 455 | } | 473 | } |
| 456 | 474 | ||
| 457 | r = amdgpu_virt_alloc_mm_table(adev); | 475 | r = amdgpu_virt_alloc_mm_table(adev); |
| @@ -463,7 +481,7 @@ static int uvd_v7_0_sw_init(void *handle) | |||
| 463 | 481 | ||
| 464 | static int uvd_v7_0_sw_fini(void *handle) | 482 | static int uvd_v7_0_sw_fini(void *handle) |
| 465 | { | 483 | { |
| 466 | int i, r; | 484 | int i, j, r; |
| 467 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 485 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 468 | 486 | ||
| 469 | amdgpu_virt_free_mm_table(adev); | 487 | amdgpu_virt_free_mm_table(adev); |
| @@ -472,11 +490,12 @@ static int uvd_v7_0_sw_fini(void *handle) | |||
| 472 | if (r) | 490 | if (r) |
| 473 | return r; | 491 | return r; |
| 474 | 492 | ||
| 475 | drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); | 493 | for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { |
| 476 | 494 | drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc); | |
| 477 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) | ||
| 478 | amdgpu_ring_fini(&adev->uvd.ring_enc[i]); | ||
| 479 | 495 | ||
| 496 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) | ||
| 497 | amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); | ||
| 498 | } | ||
| 480 | return amdgpu_uvd_sw_fini(adev); | 499 | return amdgpu_uvd_sw_fini(adev); |
| 481 | } | 500 | } |
| 482 | 501 | ||
| @@ -490,9 +509,9 @@ static int uvd_v7_0_sw_fini(void *handle) | |||
| 490 | static int uvd_v7_0_hw_init(void *handle) | 509 | static int uvd_v7_0_hw_init(void *handle) |
| 491 | { | 510 | { |
| 492 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 511 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 493 | struct amdgpu_ring *ring = &adev->uvd.ring; | 512 | struct amdgpu_ring *ring; |
| 494 | uint32_t tmp; | 513 | uint32_t tmp; |
| 495 | int i, r; | 514 | int i, j, r; |
| 496 | 515 | ||
| 497 | if (amdgpu_sriov_vf(adev)) | 516 | if (amdgpu_sriov_vf(adev)) |
| 498 | r = uvd_v7_0_sriov_start(adev); | 517 | r = uvd_v7_0_sriov_start(adev); |
| @@ -501,57 +520,60 @@ static int uvd_v7_0_hw_init(void *handle) | |||
| 501 | if (r) | 520 | if (r) |
| 502 | goto done; | 521 | goto done; |
| 503 | 522 | ||
| 504 | if (!amdgpu_sriov_vf(adev)) { | 523 | for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { |
| 505 | ring->ready = true; | 524 | ring = &adev->uvd.inst[j].ring; |
| 506 | r = amdgpu_ring_test_ring(ring); | 525 | |
| 507 | if (r) { | 526 | if (!amdgpu_sriov_vf(adev)) { |
| 508 | ring->ready = false; | 527 | ring->ready = true; |
| 509 | goto done; | 528 | r = amdgpu_ring_test_ring(ring); |
| 529 | if (r) { | ||
| 530 | ring->ready = false; | ||
| 531 | goto done; | ||
| 532 | } | ||
| 533 | |||
| 534 | r = amdgpu_ring_alloc(ring, 10); | ||
| 535 | if (r) { | ||
| 536 | DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r); | ||
| 537 | goto done; | ||
| 538 | } | ||
| 539 | |||
| 540 | tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, | ||
| 541 | mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); | ||
| 542 | amdgpu_ring_write(ring, tmp); | ||
| 543 | amdgpu_ring_write(ring, 0xFFFFF); | ||
| 544 | |||
| 545 | tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, | ||
| 546 | mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); | ||
| 547 | amdgpu_ring_write(ring, tmp); | ||
| 548 | amdgpu_ring_write(ring, 0xFFFFF); | ||
| 549 | |||
| 550 | tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, | ||
| 551 | mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); | ||
| 552 | amdgpu_ring_write(ring, tmp); | ||
| 553 | amdgpu_ring_write(ring, 0xFFFFF); | ||
| 554 | |||
| 555 | /* Clear timeout status bits */ | ||
| 556 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, | ||
| 557 | mmUVD_SEMA_TIMEOUT_STATUS), 0)); | ||
| 558 | amdgpu_ring_write(ring, 0x8); | ||
| 559 | |||
| 560 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, | ||
| 561 | mmUVD_SEMA_CNTL), 0)); | ||
| 562 | amdgpu_ring_write(ring, 3); | ||
| 563 | |||
| 564 | amdgpu_ring_commit(ring); | ||
| 510 | } | 565 | } |
| 511 | 566 | ||
| 512 | r = amdgpu_ring_alloc(ring, 10); | 567 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
| 513 | if (r) { | 568 | ring = &adev->uvd.inst[j].ring_enc[i]; |
| 514 | DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); | 569 | ring->ready = true; |
| 515 | goto done; | 570 | r = amdgpu_ring_test_ring(ring); |
| 571 | if (r) { | ||
| 572 | ring->ready = false; | ||
| 573 | goto done; | ||
| 574 | } | ||
| 516 | } | 575 | } |
| 517 | |||
| 518 | tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, | ||
| 519 | mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); | ||
| 520 | amdgpu_ring_write(ring, tmp); | ||
| 521 | amdgpu_ring_write(ring, 0xFFFFF); | ||
| 522 | |||
| 523 | tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, | ||
| 524 | mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); | ||
| 525 | amdgpu_ring_write(ring, tmp); | ||
| 526 | amdgpu_ring_write(ring, 0xFFFFF); | ||
| 527 | |||
| 528 | tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, | ||
| 529 | mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); | ||
| 530 | amdgpu_ring_write(ring, tmp); | ||
| 531 | amdgpu_ring_write(ring, 0xFFFFF); | ||
| 532 | |||
| 533 | /* Clear timeout status bits */ | ||
| 534 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, | ||
| 535 | mmUVD_SEMA_TIMEOUT_STATUS), 0)); | ||
| 536 | amdgpu_ring_write(ring, 0x8); | ||
| 537 | |||
| 538 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, | ||
| 539 | mmUVD_SEMA_CNTL), 0)); | ||
| 540 | amdgpu_ring_write(ring, 3); | ||
| 541 | |||
| 542 | amdgpu_ring_commit(ring); | ||
| 543 | } | 576 | } |
| 544 | |||
| 545 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | ||
| 546 | ring = &adev->uvd.ring_enc[i]; | ||
| 547 | ring->ready = true; | ||
| 548 | r = amdgpu_ring_test_ring(ring); | ||
| 549 | if (r) { | ||
| 550 | ring->ready = false; | ||
| 551 | goto done; | ||
| 552 | } | ||
| 553 | } | ||
| 554 | |||
| 555 | done: | 577 | done: |
| 556 | if (!r) | 578 | if (!r) |
| 557 | DRM_INFO("UVD and UVD ENC initialized successfully.\n"); | 579 | DRM_INFO("UVD and UVD ENC initialized successfully.\n"); |
| @@ -569,7 +591,7 @@ done: | |||
| 569 | static int uvd_v7_0_hw_fini(void *handle) | 591 | static int uvd_v7_0_hw_fini(void *handle) |
| 570 | { | 592 | { |
| 571 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 593 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 572 | struct amdgpu_ring *ring = &adev->uvd.ring; | 594 | int i; |
| 573 | 595 | ||
| 574 | if (!amdgpu_sriov_vf(adev)) | 596 | if (!amdgpu_sriov_vf(adev)) |
| 575 | uvd_v7_0_stop(adev); | 597 | uvd_v7_0_stop(adev); |
| @@ -578,7 +600,8 @@ static int uvd_v7_0_hw_fini(void *handle) | |||
| 578 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); | 600 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); |
| 579 | } | 601 | } |
| 580 | 602 | ||
| 581 | ring->ready = false; | 603 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) |
| 604 | adev->uvd.inst[i].ring.ready = false; | ||
| 582 | 605 | ||
| 583 | return 0; | 606 | return 0; |
| 584 | } | 607 | } |
| @@ -618,48 +641,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) | |||
| 618 | { | 641 | { |
| 619 | uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); | 642 | uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); |
| 620 | uint32_t offset; | 643 | uint32_t offset; |
| 644 | int i; | ||
| 621 | 645 | ||
| 622 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | 646 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { |
| 623 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, | 647 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 624 | lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); | 648 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
| 625 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, | 649 | lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); |
| 626 | upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); | 650 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
| 627 | offset = 0; | 651 | upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); |
| 628 | } else { | 652 | offset = 0; |
| 629 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, | 653 | } else { |
| 630 | lower_32_bits(adev->uvd.gpu_addr)); | 654 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, |
| 631 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, | 655 | lower_32_bits(adev->uvd.inst[i].gpu_addr)); |
| 632 | upper_32_bits(adev->uvd.gpu_addr)); | 656 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, |
| 633 | offset = size; | 657 | upper_32_bits(adev->uvd.inst[i].gpu_addr)); |
| 634 | } | 658 | offset = size; |
| 659 | } | ||
| 635 | 660 | ||
| 636 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, | 661 | WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, |
| 637 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); | 662 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); |
| 638 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); | 663 | WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); |
| 639 | 664 | ||
| 640 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, | 665 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, |
| 641 | lower_32_bits(adev->uvd.gpu_addr + offset)); | 666 | lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); |
| 642 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, | 667 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, |
| 643 | upper_32_bits(adev->uvd.gpu_addr + offset)); | 668 | upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); |
| 644 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); | 669 | WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); |
| 645 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); | 670 | WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); |
| 646 | 671 | ||
| 647 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, | 672 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, |
| 648 | lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | 673 | lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); |
| 649 | WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, | 674 | WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, |
| 650 | upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | 675 | upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); |
| 651 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); | 676 | WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); |
| 652 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, | 677 | WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, |
| 653 | AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); | 678 | AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); |
| 654 | 679 | ||
| 655 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, | 680 | WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG, |
| 656 | adev->gfx.config.gb_addr_config); | 681 | adev->gfx.config.gb_addr_config); |
| 657 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, | 682 | WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG, |
| 658 | adev->gfx.config.gb_addr_config); | 683 | adev->gfx.config.gb_addr_config); |
| 659 | WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, | 684 | WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG, |
| 660 | adev->gfx.config.gb_addr_config); | 685 | adev->gfx.config.gb_addr_config); |
| 661 | 686 | ||
| 662 | WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); | 687 | WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); |
| 688 | } | ||
| 663 | } | 689 | } |
| 664 | 690 | ||
| 665 | static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, | 691 | static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, |
| @@ -669,6 +695,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, | |||
| 669 | uint64_t addr = table->gpu_addr; | 695 | uint64_t addr = table->gpu_addr; |
| 670 | struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; | 696 | struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; |
| 671 | uint32_t size; | 697 | uint32_t size; |
| 698 | int i; | ||
| 672 | 699 | ||
| 673 | size = header->header_size + header->vce_table_size + header->uvd_table_size; | 700 | size = header->header_size + header->vce_table_size + header->uvd_table_size; |
| 674 | 701 | ||
| @@ -688,11 +715,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, | |||
| 688 | /* 4, set resp to zero */ | 715 | /* 4, set resp to zero */ |
| 689 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); | 716 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); |
| 690 | 717 | ||
| 691 | WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); | 718 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { |
| 692 | adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0; | 719 | WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0); |
| 693 | adev->uvd.ring_enc[0].wptr = 0; | 720 | adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0; |
| 694 | adev->uvd.ring_enc[0].wptr_old = 0; | 721 | adev->uvd.inst[i].ring_enc[0].wptr = 0; |
| 695 | 722 | adev->uvd.inst[i].ring_enc[0].wptr_old = 0; | |
| 723 | } | ||
| 696 | /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ | 724 | /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ |
| 697 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); | 725 | WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); |
| 698 | 726 | ||
| @@ -725,6 +753,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) | |||
| 725 | struct mmsch_v1_0_cmd_end end = { {0} }; | 753 | struct mmsch_v1_0_cmd_end end = { {0} }; |
| 726 | uint32_t *init_table = adev->virt.mm_table.cpu_addr; | 754 | uint32_t *init_table = adev->virt.mm_table.cpu_addr; |
| 727 | struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; | 755 | struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; |
| 756 | uint8_t i = 0; | ||
| 728 | 757 | ||
| 729 | direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; | 758 | direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; |
| 730 | direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; | 759 | direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; |
| @@ -742,120 +771,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) | |||
| 742 | 771 | ||
| 743 | init_table += header->uvd_table_offset; | 772 | init_table += header->uvd_table_offset; |
| 744 | 773 | ||
| 745 | ring = &adev->uvd.ring; | 774 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { |
| 746 | ring->wptr = 0; | 775 | ring = &adev->uvd.inst[i].ring; |
| 747 | size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); | 776 | ring->wptr = 0; |
| 748 | 777 | size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); | |
| 749 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), | 778 | |
| 750 | 0xFFFFFFFF, 0x00000004); | 779 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), |
| 751 | /* mc resume*/ | 780 | 0xFFFFFFFF, 0x00000004); |
| 752 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | 781 | /* mc resume*/ |
| 753 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), | 782 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 754 | lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); | 783 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), |
| 755 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), | 784 | lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); |
| 756 | upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); | 785 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), |
| 757 | offset = 0; | 786 | upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); |
| 758 | } else { | 787 | offset = 0; |
| 759 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), | 788 | } else { |
| 760 | lower_32_bits(adev->uvd.gpu_addr)); | 789 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), |
| 761 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), | 790 | lower_32_bits(adev->uvd.inst[i].gpu_addr)); |
| 762 | upper_32_bits(adev->uvd.gpu_addr)); | 791 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), |
| 763 | offset = size; | 792 | upper_32_bits(adev->uvd.inst[i].gpu_addr)); |
| 793 | offset = size; | ||
| 794 | } | ||
| 795 | |||
| 796 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), | ||
| 797 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); | ||
| 798 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size); | ||
| 799 | |||
| 800 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), | ||
| 801 | lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); | ||
| 802 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), | ||
| 803 | upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); | ||
| 804 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); | ||
| 805 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); | ||
| 806 | |||
| 807 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), | ||
| 808 | lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | ||
| 809 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), | ||
| 810 | upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | ||
| 811 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); | ||
| 812 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), | ||
| 813 | AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); | ||
| 814 | |||
| 815 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); | ||
| 816 | /* mc resume end*/ | ||
| 817 | |||
| 818 | /* disable clock gating */ | ||
| 819 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL), | ||
| 820 | ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); | ||
| 821 | |||
| 822 | /* disable interupt */ | ||
| 823 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), | ||
| 824 | ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); | ||
| 825 | |||
| 826 | /* stall UMC and register bus before resetting VCPU */ | ||
| 827 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), | ||
| 828 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, | ||
| 829 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 830 | |||
| 831 | /* put LMI, VCPU, RBC etc... into reset */ | ||
| 832 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), | ||
| 833 | (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | | ||
| 834 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | | ||
| 835 | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | | ||
| 836 | UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | | ||
| 837 | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | | ||
| 838 | UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | | ||
| 839 | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | | ||
| 840 | UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); | ||
| 841 | |||
| 842 | /* initialize UVD memory controller */ | ||
| 843 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL), | ||
| 844 | (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | | ||
| 845 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | | ||
| 846 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | | ||
| 847 | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | | ||
| 848 | UVD_LMI_CTRL__REQ_MODE_MASK | | ||
| 849 | 0x00100000L)); | ||
| 850 | |||
| 851 | /* take all subblocks out of reset, except VCPU */ | ||
| 852 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), | ||
| 853 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 854 | |||
| 855 | /* enable VCPU clock */ | ||
| 856 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), | ||
| 857 | UVD_VCPU_CNTL__CLK_EN_MASK); | ||
| 858 | |||
| 859 | /* enable master interrupt */ | ||
| 860 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), | ||
| 861 | ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), | ||
| 862 | (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); | ||
| 863 | |||
| 864 | /* clear the bit 4 of UVD_STATUS */ | ||
| 865 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), | ||
| 866 | ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); | ||
| 867 | |||
| 868 | /* force RBC into idle state */ | ||
| 869 | size = order_base_2(ring->ring_size); | ||
| 870 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); | ||
| 871 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); | ||
| 872 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); | ||
| 873 | |||
| 874 | ring = &adev->uvd.inst[i].ring_enc[0]; | ||
| 875 | ring->wptr = 0; | ||
| 876 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr); | ||
| 877 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); | ||
| 878 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4); | ||
| 879 | |||
| 880 | /* boot up the VCPU */ | ||
| 881 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0); | ||
| 882 | |||
| 883 | /* enable UMC */ | ||
| 884 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), | ||
| 885 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); | ||
| 886 | |||
| 887 | MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02); | ||
| 764 | } | 888 | } |
| 765 | |||
| 766 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), | ||
| 767 | AMDGPU_UVD_FIRMWARE_OFFSET >> 3); | ||
| 768 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); | ||
| 769 | |||
| 770 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), | ||
| 771 | lower_32_bits(adev->uvd.gpu_addr + offset)); | ||
| 772 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), | ||
| 773 | upper_32_bits(adev->uvd.gpu_addr + offset)); | ||
| 774 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); | ||
| 775 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); | ||
| 776 | |||
| 777 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), | ||
| 778 | lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | ||
| 779 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), | ||
| 780 | upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); | ||
| 781 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); | ||
| 782 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), | ||
| 783 | AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); | ||
| 784 | |||
| 785 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); | ||
| 786 | /* mc resume end*/ | ||
| 787 | |||
| 788 | /* disable clock gating */ | ||
| 789 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), | ||
| 790 | ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); | ||
| 791 | |||
| 792 | /* disable interupt */ | ||
| 793 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), | ||
| 794 | ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); | ||
| 795 | |||
| 796 | /* stall UMC and register bus before resetting VCPU */ | ||
| 797 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), | ||
| 798 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, | ||
| 799 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 800 | |||
| 801 | /* put LMI, VCPU, RBC etc... into reset */ | ||
| 802 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), | ||
| 803 | (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | | ||
| 804 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | | ||
| 805 | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | | ||
| 806 | UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | | ||
| 807 | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | | ||
| 808 | UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | | ||
| 809 | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | | ||
| 810 | UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); | ||
| 811 | |||
| 812 | /* initialize UVD memory controller */ | ||
| 813 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), | ||
| 814 | (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | | ||
| 815 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | | ||
| 816 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | | ||
| 817 | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | | ||
| 818 | UVD_LMI_CTRL__REQ_MODE_MASK | | ||
| 819 | 0x00100000L)); | ||
| 820 | |||
| 821 | /* take all subblocks out of reset, except VCPU */ | ||
| 822 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), | ||
| 823 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 824 | |||
| 825 | /* enable VCPU clock */ | ||
| 826 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), | ||
| 827 | UVD_VCPU_CNTL__CLK_EN_MASK); | ||
| 828 | |||
| 829 | /* enable master interrupt */ | ||
| 830 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), | ||
| 831 | ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), | ||
| 832 | (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); | ||
| 833 | |||
| 834 | /* clear the bit 4 of UVD_STATUS */ | ||
| 835 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), | ||
| 836 | ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); | ||
| 837 | |||
| 838 | /* force RBC into idle state */ | ||
| 839 | size = order_base_2(ring->ring_size); | ||
| 840 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); | ||
| 841 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); | ||
| 842 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); | ||
| 843 | |||
| 844 | ring = &adev->uvd.ring_enc[0]; | ||
| 845 | ring->wptr = 0; | ||
| 846 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); | ||
| 847 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); | ||
| 848 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); | ||
| 849 | |||
| 850 | /* boot up the VCPU */ | ||
| 851 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); | ||
| 852 | |||
| 853 | /* enable UMC */ | ||
| 854 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), | ||
| 855 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); | ||
| 856 | |||
| 857 | MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); | ||
| 858 | |||
| 859 | /* add end packet */ | 889 | /* add end packet */ |
| 860 | memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); | 890 | memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); |
| 861 | table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; | 891 | table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; |
| @@ -874,15 +904,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) | |||
| 874 | */ | 904 | */ |
| 875 | static int uvd_v7_0_start(struct amdgpu_device *adev) | 905 | static int uvd_v7_0_start(struct amdgpu_device *adev) |
| 876 | { | 906 | { |
| 877 | struct amdgpu_ring *ring = &adev->uvd.ring; | 907 | struct amdgpu_ring *ring; |
| 878 | uint32_t rb_bufsz, tmp; | 908 | uint32_t rb_bufsz, tmp; |
| 879 | uint32_t lmi_swap_cntl; | 909 | uint32_t lmi_swap_cntl; |
| 880 | uint32_t mp_swap_cntl; | 910 | uint32_t mp_swap_cntl; |
| 881 | int i, j, r; | 911 | int i, j, k, r; |
| 882 | 912 | ||
| 883 | /* disable DPG */ | 913 | for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { |
| 884 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, | 914 | /* disable DPG */ |
| 885 | ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); | 915 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0, |
| 916 | ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); | ||
| 917 | } | ||
| 886 | 918 | ||
| 887 | /* disable byte swapping */ | 919 | /* disable byte swapping */ |
| 888 | lmi_swap_cntl = 0; | 920 | lmi_swap_cntl = 0; |
| @@ -890,157 +922,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 890 | 922 | ||
| 891 | uvd_v7_0_mc_resume(adev); | 923 | uvd_v7_0_mc_resume(adev); |
| 892 | 924 | ||
| 893 | /* disable clock gating */ | 925 | for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { |
| 894 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0, | 926 | ring = &adev->uvd.inst[k].ring; |
| 895 | ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); | 927 | /* disable clock gating */ |
| 896 | 928 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0, | |
| 897 | /* disable interupt */ | 929 | ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); |
| 898 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, | ||
| 899 | ~UVD_MASTINT_EN__VCPU_EN_MASK); | ||
| 900 | |||
| 901 | /* stall UMC and register bus before resetting VCPU */ | ||
| 902 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), | ||
| 903 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, | ||
| 904 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 905 | mdelay(1); | ||
| 906 | |||
| 907 | /* put LMI, VCPU, RBC etc... into reset */ | ||
| 908 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, | ||
| 909 | UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | | ||
| 910 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | | ||
| 911 | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | | ||
| 912 | UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | | ||
| 913 | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | | ||
| 914 | UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | | ||
| 915 | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | | ||
| 916 | UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); | ||
| 917 | mdelay(5); | ||
| 918 | 930 | ||
| 919 | /* initialize UVD memory controller */ | 931 | /* disable interupt */ |
| 920 | WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, | 932 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0, |
| 921 | (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | | 933 | ~UVD_MASTINT_EN__VCPU_EN_MASK); |
| 922 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | | 934 | |
| 923 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | | 935 | /* stall UMC and register bus before resetting VCPU */ |
| 924 | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | | 936 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), |
| 925 | UVD_LMI_CTRL__REQ_MODE_MASK | | 937 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, |
| 926 | 0x00100000L); | 938 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); |
| 939 | mdelay(1); | ||
| 940 | |||
| 941 | /* put LMI, VCPU, RBC etc... into reset */ | ||
| 942 | WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, | ||
| 943 | UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | | ||
| 944 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | | ||
| 945 | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | | ||
| 946 | UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | | ||
| 947 | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | | ||
| 948 | UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | | ||
| 949 | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | | ||
| 950 | UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); | ||
| 951 | mdelay(5); | ||
| 952 | |||
| 953 | /* initialize UVD memory controller */ | ||
| 954 | WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL, | ||
| 955 | (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | | ||
| 956 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | | ||
| 957 | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | | ||
| 958 | UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | | ||
| 959 | UVD_LMI_CTRL__REQ_MODE_MASK | | ||
| 960 | 0x00100000L); | ||
| 927 | 961 | ||
| 928 | #ifdef __BIG_ENDIAN | 962 | #ifdef __BIG_ENDIAN |
| 929 | /* swap (8 in 32) RB and IB */ | 963 | /* swap (8 in 32) RB and IB */ |
| 930 | lmi_swap_cntl = 0xa; | 964 | lmi_swap_cntl = 0xa; |
| 931 | mp_swap_cntl = 0; | 965 | mp_swap_cntl = 0; |
| 932 | #endif | 966 | #endif |
| 933 | WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); | 967 | WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); |
| 934 | WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); | 968 | WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); |
| 935 | |||
| 936 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); | ||
| 937 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); | ||
| 938 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); | ||
| 939 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); | ||
| 940 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); | ||
| 941 | WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); | ||
| 942 | |||
| 943 | /* take all subblocks out of reset, except VCPU */ | ||
| 944 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, | ||
| 945 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 946 | mdelay(5); | ||
| 947 | 969 | ||
| 948 | /* enable VCPU clock */ | 970 | WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040); |
| 949 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, | 971 | WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0); |
| 950 | UVD_VCPU_CNTL__CLK_EN_MASK); | 972 | WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040); |
| 973 | WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0); | ||
| 974 | WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0); | ||
| 975 | WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88); | ||
| 951 | 976 | ||
| 952 | /* enable UMC */ | 977 | /* take all subblocks out of reset, except VCPU */ |
| 953 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, | 978 | WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, |
| 954 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | 979 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); |
| 980 | mdelay(5); | ||
| 955 | 981 | ||
| 956 | /* boot up the VCPU */ | 982 | /* enable VCPU clock */ |
| 957 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); | 983 | WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL, |
| 958 | mdelay(10); | 984 | UVD_VCPU_CNTL__CLK_EN_MASK); |
| 959 | 985 | ||
| 960 | for (i = 0; i < 10; ++i) { | 986 | /* enable UMC */ |
| 961 | uint32_t status; | 987 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0, |
| 988 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 962 | 989 | ||
| 963 | for (j = 0; j < 100; ++j) { | 990 | /* boot up the VCPU */ |
| 964 | status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); | 991 | WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0); |
| 992 | mdelay(10); | ||
| 993 | |||
| 994 | for (i = 0; i < 10; ++i) { | ||
| 995 | uint32_t status; | ||
| 996 | |||
| 997 | for (j = 0; j < 100; ++j) { | ||
| 998 | status = RREG32_SOC15(UVD, k, mmUVD_STATUS); | ||
| 999 | if (status & 2) | ||
| 1000 | break; | ||
| 1001 | mdelay(10); | ||
| 1002 | } | ||
| 1003 | r = 0; | ||
| 965 | if (status & 2) | 1004 | if (status & 2) |
| 966 | break; | 1005 | break; |
| 1006 | |||
| 1007 | DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k); | ||
| 1008 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), | ||
| 1009 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, | ||
| 1010 | ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 967 | mdelay(10); | 1011 | mdelay(10); |
| 1012 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0, | ||
| 1013 | ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 1014 | mdelay(10); | ||
| 1015 | r = -1; | ||
| 968 | } | 1016 | } |
| 969 | r = 0; | ||
| 970 | if (status & 2) | ||
| 971 | break; | ||
| 972 | |||
| 973 | DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); | ||
| 974 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), | ||
| 975 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, | ||
| 976 | ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 977 | mdelay(10); | ||
| 978 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, | ||
| 979 | ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 980 | mdelay(10); | ||
| 981 | r = -1; | ||
| 982 | } | ||
| 983 | |||
| 984 | if (r) { | ||
| 985 | DRM_ERROR("UVD not responding, giving up!!!\n"); | ||
| 986 | return r; | ||
| 987 | } | ||
| 988 | /* enable master interrupt */ | ||
| 989 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), | ||
| 990 | (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), | ||
| 991 | ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); | ||
| 992 | |||
| 993 | /* clear the bit 4 of UVD_STATUS */ | ||
| 994 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, | ||
| 995 | ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); | ||
| 996 | |||
| 997 | /* force RBC into idle state */ | ||
| 998 | rb_bufsz = order_base_2(ring->ring_size); | ||
| 999 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); | ||
| 1000 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); | ||
| 1001 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); | ||
| 1002 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); | ||
| 1003 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); | ||
| 1004 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); | ||
| 1005 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); | ||
| 1006 | |||
| 1007 | /* set the write pointer delay */ | ||
| 1008 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); | ||
| 1009 | |||
| 1010 | /* set the wb address */ | ||
| 1011 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, | ||
| 1012 | (upper_32_bits(ring->gpu_addr) >> 2)); | ||
| 1013 | |||
| 1014 | /* programm the RB_BASE for ring buffer */ | ||
| 1015 | WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, | ||
| 1016 | lower_32_bits(ring->gpu_addr)); | ||
| 1017 | WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, | ||
| 1018 | upper_32_bits(ring->gpu_addr)); | ||
| 1019 | |||
| 1020 | /* Initialize the ring buffer's read and write pointers */ | ||
| 1021 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); | ||
| 1022 | |||
| 1023 | ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); | ||
| 1024 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, | ||
| 1025 | lower_32_bits(ring->wptr)); | ||
| 1026 | 1017 | ||
| 1027 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, | 1018 | if (r) { |
| 1028 | ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); | 1019 | DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k); |
| 1029 | 1020 | return r; | |
| 1030 | ring = &adev->uvd.ring_enc[0]; | 1021 | } |
| 1031 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); | 1022 | /* enable master interrupt */ |
| 1032 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); | 1023 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), |
| 1033 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); | 1024 | (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), |
| 1034 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); | 1025 | ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); |
| 1035 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); | ||
| 1036 | 1026 | ||
| 1037 | ring = &adev->uvd.ring_enc[1]; | 1027 | /* clear the bit 4 of UVD_STATUS */ |
| 1038 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); | 1028 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0, |
| 1039 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); | 1029 | ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); |
| 1040 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); | ||
| 1041 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); | ||
| 1042 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); | ||
| 1043 | 1030 | ||
| 1031 | /* force RBC into idle state */ | ||
| 1032 | rb_bufsz = order_base_2(ring->ring_size); | ||
| 1033 | tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); | ||
| 1034 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); | ||
| 1035 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); | ||
| 1036 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); | ||
| 1037 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); | ||
| 1038 | tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); | ||
| 1039 | WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp); | ||
| 1040 | |||
| 1041 | /* set the write pointer delay */ | ||
| 1042 | WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0); | ||
| 1043 | |||
| 1044 | /* set the wb address */ | ||
| 1045 | WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR, | ||
| 1046 | (upper_32_bits(ring->gpu_addr) >> 2)); | ||
| 1047 | |||
| 1048 | /* programm the RB_BASE for ring buffer */ | ||
| 1049 | WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, | ||
| 1050 | lower_32_bits(ring->gpu_addr)); | ||
| 1051 | WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, | ||
| 1052 | upper_32_bits(ring->gpu_addr)); | ||
| 1053 | |||
| 1054 | /* Initialize the ring buffer's read and write pointers */ | ||
| 1055 | WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0); | ||
| 1056 | |||
| 1057 | ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR); | ||
| 1058 | WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR, | ||
| 1059 | lower_32_bits(ring->wptr)); | ||
| 1060 | |||
| 1061 | WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0, | ||
| 1062 | ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); | ||
| 1063 | |||
| 1064 | ring = &adev->uvd.inst[k].ring_enc[0]; | ||
| 1065 | WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); | ||
| 1066 | WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); | ||
| 1067 | WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr); | ||
| 1068 | WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); | ||
| 1069 | WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4); | ||
| 1070 | |||
| 1071 | ring = &adev->uvd.inst[k].ring_enc[1]; | ||
| 1072 | WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); | ||
| 1073 | WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); | ||
| 1074 | WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr); | ||
| 1075 | WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); | ||
| 1076 | WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4); | ||
| 1077 | } | ||
| 1044 | return 0; | 1078 | return 0; |
| 1045 | } | 1079 | } |
| 1046 | 1080 | ||
| @@ -1053,26 +1087,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) | |||
| 1053 | */ | 1087 | */ |
| 1054 | static void uvd_v7_0_stop(struct amdgpu_device *adev) | 1088 | static void uvd_v7_0_stop(struct amdgpu_device *adev) |
| 1055 | { | 1089 | { |
| 1056 | /* force RBC into idle state */ | 1090 | uint8_t i = 0; |
| 1057 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); | ||
| 1058 | |||
| 1059 | /* Stall UMC and register bus before resetting VCPU */ | ||
| 1060 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), | ||
| 1061 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, | ||
| 1062 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 1063 | mdelay(1); | ||
| 1064 | |||
| 1065 | /* put VCPU into reset */ | ||
| 1066 | WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, | ||
| 1067 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 1068 | mdelay(5); | ||
| 1069 | 1091 | ||
| 1070 | /* disable VCPU clock */ | 1092 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { |
| 1071 | WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); | 1093 | /* force RBC into idle state */ |
| 1094 | WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101); | ||
| 1072 | 1095 | ||
| 1073 | /* Unstall UMC and register bus */ | 1096 | /* Stall UMC and register bus before resetting VCPU */ |
| 1074 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, | 1097 | WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), |
| 1075 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | 1098 | UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, |
| 1099 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 1100 | mdelay(1); | ||
| 1101 | |||
| 1102 | /* put VCPU into reset */ | ||
| 1103 | WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET, | ||
| 1104 | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); | ||
| 1105 | mdelay(5); | ||
| 1106 | |||
| 1107 | /* disable VCPU clock */ | ||
| 1108 | WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0); | ||
| 1109 | |||
| 1110 | /* Unstall UMC and register bus */ | ||
| 1111 | WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, | ||
| 1112 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | ||
| 1113 | } | ||
| 1076 | } | 1114 | } |
| 1077 | 1115 | ||
| 1078 | /** | 1116 | /** |
| @@ -1091,26 +1129,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq | |||
| 1091 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); | 1129 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
| 1092 | 1130 | ||
| 1093 | amdgpu_ring_write(ring, | 1131 | amdgpu_ring_write(ring, |
| 1094 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); | 1132 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); |
| 1095 | amdgpu_ring_write(ring, seq); | 1133 | amdgpu_ring_write(ring, seq); |
| 1096 | amdgpu_ring_write(ring, | 1134 | amdgpu_ring_write(ring, |
| 1097 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | 1135 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); |
| 1098 | amdgpu_ring_write(ring, addr & 0xffffffff); | 1136 | amdgpu_ring_write(ring, addr & 0xffffffff); |
| 1099 | amdgpu_ring_write(ring, | 1137 | amdgpu_ring_write(ring, |
| 1100 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | 1138 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); |
| 1101 | amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); | 1139 | amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); |
| 1102 | amdgpu_ring_write(ring, | 1140 | amdgpu_ring_write(ring, |
| 1103 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | 1141 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); |
| 1104 | amdgpu_ring_write(ring, 0); | 1142 | amdgpu_ring_write(ring, 0); |
| 1105 | 1143 | ||
| 1106 | amdgpu_ring_write(ring, | 1144 | amdgpu_ring_write(ring, |
| 1107 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | 1145 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); |
| 1108 | amdgpu_ring_write(ring, 0); | 1146 | amdgpu_ring_write(ring, 0); |
| 1109 | amdgpu_ring_write(ring, | 1147 | amdgpu_ring_write(ring, |
| 1110 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | 1148 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); |
| 1111 | amdgpu_ring_write(ring, 0); | 1149 | amdgpu_ring_write(ring, 0); |
| 1112 | amdgpu_ring_write(ring, | 1150 | amdgpu_ring_write(ring, |
| 1113 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | 1151 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); |
| 1114 | amdgpu_ring_write(ring, 2); | 1152 | amdgpu_ring_write(ring, 2); |
| 1115 | } | 1153 | } |
| 1116 | 1154 | ||
| @@ -1136,6 +1174,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, | |||
| 1136 | } | 1174 | } |
| 1137 | 1175 | ||
| 1138 | /** | 1176 | /** |
| 1177 | * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing | ||
| 1178 | * | ||
| 1179 | * @ring: amdgpu_ring pointer | ||
| 1180 | */ | ||
| 1181 | static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | ||
| 1182 | { | ||
| 1183 | /* The firmware doesn't seem to like touching registers at this point. */ | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | /** | ||
| 1139 | * uvd_v7_0_ring_test_ring - register write test | 1187 | * uvd_v7_0_ring_test_ring - register write test |
| 1140 | * | 1188 | * |
| 1141 | * @ring: amdgpu_ring pointer | 1189 | * @ring: amdgpu_ring pointer |
| @@ -1149,30 +1197,30 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
| 1149 | unsigned i; | 1197 | unsigned i; |
| 1150 | int r; | 1198 | int r; |
| 1151 | 1199 | ||
| 1152 | WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 1200 | WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
| 1153 | r = amdgpu_ring_alloc(ring, 3); | 1201 | r = amdgpu_ring_alloc(ring, 3); |
| 1154 | if (r) { | 1202 | if (r) { |
| 1155 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 1203 | DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n", |
| 1156 | ring->idx, r); | 1204 | ring->me, ring->idx, r); |
| 1157 | return r; | 1205 | return r; |
| 1158 | } | 1206 | } |
| 1159 | amdgpu_ring_write(ring, | 1207 | amdgpu_ring_write(ring, |
| 1160 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); | 1208 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); |
| 1161 | amdgpu_ring_write(ring, 0xDEADBEEF); | 1209 | amdgpu_ring_write(ring, 0xDEADBEEF); |
| 1162 | amdgpu_ring_commit(ring); | 1210 | amdgpu_ring_commit(ring); |
| 1163 | for (i = 0; i < adev->usec_timeout; i++) { | 1211 | for (i = 0; i < adev->usec_timeout; i++) { |
| 1164 | tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); | 1212 | tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID); |
| 1165 | if (tmp == 0xDEADBEEF) | 1213 | if (tmp == 0xDEADBEEF) |
| 1166 | break; | 1214 | break; |
| 1167 | DRM_UDELAY(1); | 1215 | DRM_UDELAY(1); |
| 1168 | } | 1216 | } |
| 1169 | 1217 | ||
| 1170 | if (i < adev->usec_timeout) { | 1218 | if (i < adev->usec_timeout) { |
| 1171 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 1219 | DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", |
| 1172 | ring->idx, i); | 1220 | ring->me, ring->idx, i); |
| 1173 | } else { | 1221 | } else { |
| 1174 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | 1222 | DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n", |
| 1175 | ring->idx, tmp); | 1223 | ring->me, ring->idx, tmp); |
| 1176 | r = -EINVAL; | 1224 | r = -EINVAL; |
| 1177 | } | 1225 | } |
| 1178 | return r; | 1226 | return r; |
| @@ -1193,17 +1241,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, | |||
| 1193 | struct amdgpu_device *adev = ring->adev; | 1241 | struct amdgpu_device *adev = ring->adev; |
| 1194 | 1242 | ||
| 1195 | amdgpu_ring_write(ring, | 1243 | amdgpu_ring_write(ring, |
| 1196 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); | 1244 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); |
| 1197 | amdgpu_ring_write(ring, vmid); | 1245 | amdgpu_ring_write(ring, vmid); |
| 1198 | 1246 | ||
| 1199 | amdgpu_ring_write(ring, | 1247 | amdgpu_ring_write(ring, |
| 1200 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); | 1248 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); |
| 1201 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 1249 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
| 1202 | amdgpu_ring_write(ring, | 1250 | amdgpu_ring_write(ring, |
| 1203 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); | 1251 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); |
| 1204 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | 1252 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
| 1205 | amdgpu_ring_write(ring, | 1253 | amdgpu_ring_write(ring, |
| 1206 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); | 1254 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0)); |
| 1207 | amdgpu_ring_write(ring, ib->length_dw); | 1255 | amdgpu_ring_write(ring, ib->length_dw); |
| 1208 | } | 1256 | } |
| 1209 | 1257 | ||
| @@ -1231,13 +1279,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, | |||
| 1231 | struct amdgpu_device *adev = ring->adev; | 1279 | struct amdgpu_device *adev = ring->adev; |
| 1232 | 1280 | ||
| 1233 | amdgpu_ring_write(ring, | 1281 | amdgpu_ring_write(ring, |
| 1234 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | 1282 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); |
| 1235 | amdgpu_ring_write(ring, reg << 2); | 1283 | amdgpu_ring_write(ring, reg << 2); |
| 1236 | amdgpu_ring_write(ring, | 1284 | amdgpu_ring_write(ring, |
| 1237 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | 1285 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); |
| 1238 | amdgpu_ring_write(ring, val); | 1286 | amdgpu_ring_write(ring, val); |
| 1239 | amdgpu_ring_write(ring, | 1287 | amdgpu_ring_write(ring, |
| 1240 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | 1288 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); |
| 1241 | amdgpu_ring_write(ring, 8); | 1289 | amdgpu_ring_write(ring, 8); |
| 1242 | } | 1290 | } |
| 1243 | 1291 | ||
| @@ -1247,16 +1295,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, | |||
| 1247 | struct amdgpu_device *adev = ring->adev; | 1295 | struct amdgpu_device *adev = ring->adev; |
| 1248 | 1296 | ||
| 1249 | amdgpu_ring_write(ring, | 1297 | amdgpu_ring_write(ring, |
| 1250 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); | 1298 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); |
| 1251 | amdgpu_ring_write(ring, reg << 2); | 1299 | amdgpu_ring_write(ring, reg << 2); |
| 1252 | amdgpu_ring_write(ring, | 1300 | amdgpu_ring_write(ring, |
| 1253 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); | 1301 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); |
| 1254 | amdgpu_ring_write(ring, val); | 1302 | amdgpu_ring_write(ring, val); |
| 1255 | amdgpu_ring_write(ring, | 1303 | amdgpu_ring_write(ring, |
| 1256 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); | 1304 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0)); |
| 1257 | amdgpu_ring_write(ring, mask); | 1305 | amdgpu_ring_write(ring, mask); |
| 1258 | amdgpu_ring_write(ring, | 1306 | amdgpu_ring_write(ring, |
| 1259 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); | 1307 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); |
| 1260 | amdgpu_ring_write(ring, 12); | 1308 | amdgpu_ring_write(ring, 12); |
| 1261 | } | 1309 | } |
| 1262 | 1310 | ||
| @@ -1277,12 +1325,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
| 1277 | 1325 | ||
| 1278 | static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | 1326 | static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
| 1279 | { | 1327 | { |
| 1280 | int i; | ||
| 1281 | struct amdgpu_device *adev = ring->adev; | 1328 | struct amdgpu_device *adev = ring->adev; |
| 1329 | int i; | ||
| 1282 | 1330 | ||
| 1283 | for (i = 0; i < count; i++) | 1331 | WARN_ON(ring->wptr % 2 || count % 2); |
| 1284 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); | ||
| 1285 | 1332 | ||
| 1333 | for (i = 0; i < count / 2; i++) { | ||
| 1334 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0)); | ||
| 1335 | amdgpu_ring_write(ring, 0); | ||
| 1336 | } | ||
| 1286 | } | 1337 | } |
| 1287 | 1338 | ||
| 1288 | static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) | 1339 | static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) |
| @@ -1349,16 +1400,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle) | |||
| 1349 | 1400 | ||
| 1350 | if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || | 1401 | if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || |
| 1351 | REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || | 1402 | REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || |
| 1352 | (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & | 1403 | (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) & |
| 1353 | AMDGPU_UVD_STATUS_BUSY_MASK)) | 1404 | AMDGPU_UVD_STATUS_BUSY_MASK)) |
| 1354 | srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, | 1405 | srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, |
| 1355 | SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); | 1406 | SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); |
| 1356 | 1407 | ||
| 1357 | if (srbm_soft_reset) { | 1408 | if (srbm_soft_reset) { |
| 1358 | adev->uvd.srbm_soft_reset = srbm_soft_reset; | 1409 | adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset; |
| 1359 | return true; | 1410 | return true; |
| 1360 | } else { | 1411 | } else { |
| 1361 | adev->uvd.srbm_soft_reset = 0; | 1412 | adev->uvd.inst[ring->me].srbm_soft_reset = 0; |
| 1362 | return false; | 1413 | return false; |
| 1363 | } | 1414 | } |
| 1364 | } | 1415 | } |
| @@ -1367,7 +1418,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle) | |||
| 1367 | { | 1418 | { |
| 1368 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1419 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1369 | 1420 | ||
| 1370 | if (!adev->uvd.srbm_soft_reset) | 1421 | if (!adev->uvd.inst[ring->me].srbm_soft_reset) |
| 1371 | return 0; | 1422 | return 0; |
| 1372 | 1423 | ||
| 1373 | uvd_v7_0_stop(adev); | 1424 | uvd_v7_0_stop(adev); |
| @@ -1379,9 +1430,9 @@ static int uvd_v7_0_soft_reset(void *handle) | |||
| 1379 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1430 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1380 | u32 srbm_soft_reset; | 1431 | u32 srbm_soft_reset; |
| 1381 | 1432 | ||
| 1382 | if (!adev->uvd.srbm_soft_reset) | 1433 | if (!adev->uvd.inst[ring->me].srbm_soft_reset) |
| 1383 | return 0; | 1434 | return 0; |
| 1384 | srbm_soft_reset = adev->uvd.srbm_soft_reset; | 1435 | srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset; |
| 1385 | 1436 | ||
| 1386 | if (srbm_soft_reset) { | 1437 | if (srbm_soft_reset) { |
| 1387 | u32 tmp; | 1438 | u32 tmp; |
| @@ -1409,7 +1460,7 @@ static int uvd_v7_0_post_soft_reset(void *handle) | |||
| 1409 | { | 1460 | { |
| 1410 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1461 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 1411 | 1462 | ||
| 1412 | if (!adev->uvd.srbm_soft_reset) | 1463 | if (!adev->uvd.inst[ring->me].srbm_soft_reset) |
| 1413 | return 0; | 1464 | return 0; |
| 1414 | 1465 | ||
| 1415 | mdelay(5); | 1466 | mdelay(5); |
| @@ -1431,17 +1482,32 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, | |||
| 1431 | struct amdgpu_irq_src *source, | 1482 | struct amdgpu_irq_src *source, |
| 1432 | struct amdgpu_iv_entry *entry) | 1483 | struct amdgpu_iv_entry *entry) |
| 1433 | { | 1484 | { |
| 1485 | uint32_t ip_instance; | ||
| 1486 | |||
| 1487 | switch (entry->client_id) { | ||
| 1488 | case SOC15_IH_CLIENTID_UVD: | ||
| 1489 | ip_instance = 0; | ||
| 1490 | break; | ||
| 1491 | case SOC15_IH_CLIENTID_UVD1: | ||
| 1492 | ip_instance = 1; | ||
| 1493 | break; | ||
| 1494 | default: | ||
| 1495 | DRM_ERROR("Unhandled client id: %d\n", entry->client_id); | ||
| 1496 | return 0; | ||
| 1497 | } | ||
| 1498 | |||
| 1434 | DRM_DEBUG("IH: UVD TRAP\n"); | 1499 | DRM_DEBUG("IH: UVD TRAP\n"); |
| 1500 | |||
| 1435 | switch (entry->src_id) { | 1501 | switch (entry->src_id) { |
| 1436 | case 124: | 1502 | case 124: |
| 1437 | amdgpu_fence_process(&adev->uvd.ring); | 1503 | amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring); |
| 1438 | break; | 1504 | break; |
| 1439 | case 119: | 1505 | case 119: |
| 1440 | amdgpu_fence_process(&adev->uvd.ring_enc[0]); | 1506 | amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]); |
| 1441 | break; | 1507 | break; |
| 1442 | case 120: | 1508 | case 120: |
| 1443 | if (!amdgpu_sriov_vf(adev)) | 1509 | if (!amdgpu_sriov_vf(adev)) |
| 1444 | amdgpu_fence_process(&adev->uvd.ring_enc[1]); | 1510 | amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]); |
| 1445 | break; | 1511 | break; |
| 1446 | default: | 1512 | default: |
| 1447 | DRM_ERROR("Unhandled interrupt: %d %d\n", | 1513 | DRM_ERROR("Unhandled interrupt: %d %d\n", |
| @@ -1457,9 +1523,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) | |||
| 1457 | { | 1523 | { |
| 1458 | uint32_t data, data1, data2, suvd_flags; | 1524 | uint32_t data, data1, data2, suvd_flags; |
| 1459 | 1525 | ||
| 1460 | data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); | 1526 | data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL); |
| 1461 | data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); | 1527 | data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); |
| 1462 | data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); | 1528 | data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL); |
| 1463 | 1529 | ||
| 1464 | data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | | 1530 | data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | |
| 1465 | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); | 1531 | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); |
| @@ -1503,18 +1569,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) | |||
| 1503 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); | 1569 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); |
| 1504 | data1 |= suvd_flags; | 1570 | data1 |= suvd_flags; |
| 1505 | 1571 | ||
| 1506 | WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); | 1572 | WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data); |
| 1507 | WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); | 1573 | WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0); |
| 1508 | WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); | 1574 | WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); |
| 1509 | WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); | 1575 | WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2); |
| 1510 | } | 1576 | } |
| 1511 | 1577 | ||
| 1512 | static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) | 1578 | static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) |
| 1513 | { | 1579 | { |
| 1514 | uint32_t data, data1, cgc_flags, suvd_flags; | 1580 | uint32_t data, data1, cgc_flags, suvd_flags; |
| 1515 | 1581 | ||
| 1516 | data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); | 1582 | data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE); |
| 1517 | data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); | 1583 | data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); |
| 1518 | 1584 | ||
| 1519 | cgc_flags = UVD_CGC_GATE__SYS_MASK | | 1585 | cgc_flags = UVD_CGC_GATE__SYS_MASK | |
| 1520 | UVD_CGC_GATE__UDEC_MASK | | 1586 | UVD_CGC_GATE__UDEC_MASK | |
| @@ -1546,8 +1612,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) | |||
| 1546 | data |= cgc_flags; | 1612 | data |= cgc_flags; |
| 1547 | data1 |= suvd_flags; | 1613 | data1 |= suvd_flags; |
| 1548 | 1614 | ||
| 1549 | WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); | 1615 | WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data); |
| 1550 | WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); | 1616 | WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); |
| 1551 | } | 1617 | } |
| 1552 | 1618 | ||
| 1553 | static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) | 1619 | static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) |
| @@ -1606,7 +1672,7 @@ static int uvd_v7_0_set_powergating_state(void *handle, | |||
| 1606 | if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) | 1672 | if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) |
| 1607 | return 0; | 1673 | return 0; |
| 1608 | 1674 | ||
| 1609 | WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); | 1675 | WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); |
| 1610 | 1676 | ||
| 1611 | if (state == AMD_PG_STATE_GATE) { | 1677 | if (state == AMD_PG_STATE_GATE) { |
| 1612 | uvd_v7_0_stop(adev); | 1678 | uvd_v7_0_stop(adev); |
| @@ -1647,14 +1713,13 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = { | |||
| 1647 | static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { | 1713 | static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { |
| 1648 | .type = AMDGPU_RING_TYPE_UVD, | 1714 | .type = AMDGPU_RING_TYPE_UVD, |
| 1649 | .align_mask = 0xf, | 1715 | .align_mask = 0xf, |
| 1650 | .nop = PACKET0(0x81ff, 0), | ||
| 1651 | .support_64bit_ptrs = false, | 1716 | .support_64bit_ptrs = false, |
| 1652 | .vmhub = AMDGPU_MMHUB, | 1717 | .vmhub = AMDGPU_MMHUB, |
| 1653 | .get_rptr = uvd_v7_0_ring_get_rptr, | 1718 | .get_rptr = uvd_v7_0_ring_get_rptr, |
| 1654 | .get_wptr = uvd_v7_0_ring_get_wptr, | 1719 | .get_wptr = uvd_v7_0_ring_get_wptr, |
| 1655 | .set_wptr = uvd_v7_0_ring_set_wptr, | 1720 | .set_wptr = uvd_v7_0_ring_set_wptr, |
| 1656 | .emit_frame_size = | 1721 | .emit_frame_size = |
| 1657 | 6 + 6 + /* hdp flush / invalidate */ | 1722 | 6 + /* hdp invalidate */ |
| 1658 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + | 1723 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + |
| 1659 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + | 1724 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + |
| 1660 | 8 + /* uvd_v7_0_ring_emit_vm_flush */ | 1725 | 8 + /* uvd_v7_0_ring_emit_vm_flush */ |
| @@ -1663,6 +1728,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { | |||
| 1663 | .emit_ib = uvd_v7_0_ring_emit_ib, | 1728 | .emit_ib = uvd_v7_0_ring_emit_ib, |
| 1664 | .emit_fence = uvd_v7_0_ring_emit_fence, | 1729 | .emit_fence = uvd_v7_0_ring_emit_fence, |
| 1665 | .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, | 1730 | .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, |
| 1731 | .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush, | ||
| 1666 | .test_ring = uvd_v7_0_ring_test_ring, | 1732 | .test_ring = uvd_v7_0_ring_test_ring, |
| 1667 | .test_ib = amdgpu_uvd_ring_test_ib, | 1733 | .test_ib = amdgpu_uvd_ring_test_ib, |
| 1668 | .insert_nop = uvd_v7_0_ring_insert_nop, | 1734 | .insert_nop = uvd_v7_0_ring_insert_nop, |
| @@ -1671,6 +1737,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { | |||
| 1671 | .end_use = amdgpu_uvd_ring_end_use, | 1737 | .end_use = amdgpu_uvd_ring_end_use, |
| 1672 | .emit_wreg = uvd_v7_0_ring_emit_wreg, | 1738 | .emit_wreg = uvd_v7_0_ring_emit_wreg, |
| 1673 | .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait, | 1739 | .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait, |
| 1740 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | ||
| 1674 | }; | 1741 | }; |
| 1675 | 1742 | ||
| 1676 | static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { | 1743 | static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { |
| @@ -1702,22 +1769,32 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { | |||
| 1702 | .end_use = amdgpu_uvd_ring_end_use, | 1769 | .end_use = amdgpu_uvd_ring_end_use, |
| 1703 | .emit_wreg = uvd_v7_0_enc_ring_emit_wreg, | 1770 | .emit_wreg = uvd_v7_0_enc_ring_emit_wreg, |
| 1704 | .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait, | 1771 | .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait, |
| 1772 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | ||
| 1705 | }; | 1773 | }; |
| 1706 | 1774 | ||
| 1707 | static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) | 1775 | static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) |
| 1708 | { | 1776 | { |
| 1709 | adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs; | 1777 | int i; |
| 1710 | DRM_INFO("UVD is enabled in VM mode\n"); | 1778 | |
| 1779 | for (i = 0; i < adev->uvd.num_uvd_inst; i++) { | ||
| 1780 | adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs; | ||
| 1781 | adev->uvd.inst[i].ring.me = i; | ||
| 1782 | DRM_INFO("UVD(%d) is enabled in VM mode\n", i); | ||
| 1783 | } | ||
| 1711 | } | 1784 | } |
| 1712 | 1785 | ||
| 1713 | static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) | 1786 | static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) |
| 1714 | { | 1787 | { |
| 1715 | int i; | 1788 | int i, j; |
| 1716 | 1789 | ||
| 1717 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) | 1790 | for (j = 0; j < adev->uvd.num_uvd_inst; j++) { |
| 1718 | adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; | 1791 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
| 1792 | adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; | ||
| 1793 | adev->uvd.inst[j].ring_enc[i].me = j; | ||
| 1794 | } | ||
| 1719 | 1795 | ||
| 1720 | DRM_INFO("UVD ENC is enabled in VM mode\n"); | 1796 | DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j); |
| 1797 | } | ||
| 1721 | } | 1798 | } |
| 1722 | 1799 | ||
| 1723 | static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { | 1800 | static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { |
| @@ -1727,8 +1804,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { | |||
| 1727 | 1804 | ||
| 1728 | static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) | 1805 | static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) |
| 1729 | { | 1806 | { |
| 1730 | adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; | 1807 | int i; |
| 1731 | adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs; | 1808 | |
| 1809 | for (i = 0; i < adev->uvd.num_uvd_inst; i++) { | ||
| 1810 | adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1; | ||
| 1811 | adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs; | ||
| 1812 | } | ||
| 1732 | } | 1813 | } |
| 1733 | 1814 | ||
| 1734 | const struct amdgpu_ip_block_version uvd_v7_0_ip_block = | 1815 | const struct amdgpu_ip_block_version uvd_v7_0_ip_block = |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 428d1928e44e..0999c843f623 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | |||
| @@ -388,7 +388,8 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) | |||
| 388 | default: | 388 | default: |
| 389 | if ((adev->asic_type == CHIP_POLARIS10) || | 389 | if ((adev->asic_type == CHIP_POLARIS10) || |
| 390 | (adev->asic_type == CHIP_POLARIS11) || | 390 | (adev->asic_type == CHIP_POLARIS11) || |
| 391 | (adev->asic_type == CHIP_POLARIS12)) | 391 | (adev->asic_type == CHIP_POLARIS12) || |
| 392 | (adev->asic_type == CHIP_VEGAM)) | ||
| 392 | return AMDGPU_VCE_HARVEST_VCE1; | 393 | return AMDGPU_VCE_HARVEST_VCE1; |
| 393 | 394 | ||
| 394 | return 0; | 395 | return 0; |
| @@ -467,8 +468,8 @@ static int vce_v3_0_hw_init(void *handle) | |||
| 467 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 468 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 468 | 469 | ||
| 469 | vce_v3_0_override_vce_clock_gating(adev, true); | 470 | vce_v3_0_override_vce_clock_gating(adev, true); |
| 470 | if (!(adev->flags & AMD_IS_APU)) | 471 | |
| 471 | amdgpu_asic_set_vce_clocks(adev, 10000, 10000); | 472 | amdgpu_asic_set_vce_clocks(adev, 10000, 10000); |
| 472 | 473 | ||
| 473 | for (i = 0; i < adev->vce.num_rings; i++) | 474 | for (i = 0; i < adev->vce.num_rings; i++) |
| 474 | adev->vce.ring[i].ready = false; | 475 | adev->vce.ring[i].ready = false; |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 73fd48d6c756..8fd1b742985a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | |||
| @@ -1081,6 +1081,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { | |||
| 1081 | .end_use = amdgpu_vce_ring_end_use, | 1081 | .end_use = amdgpu_vce_ring_end_use, |
| 1082 | .emit_wreg = vce_v4_0_emit_wreg, | 1082 | .emit_wreg = vce_v4_0_emit_wreg, |
| 1083 | .emit_reg_wait = vce_v4_0_emit_reg_wait, | 1083 | .emit_reg_wait = vce_v4_0_emit_reg_wait, |
| 1084 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | ||
| 1084 | }; | 1085 | }; |
| 1085 | 1086 | ||
| 1086 | static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) | 1087 | static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 8c132673bc79..110b294ebed3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | |||
| @@ -35,7 +35,6 @@ | |||
| 35 | #include "mmhub/mmhub_9_1_offset.h" | 35 | #include "mmhub/mmhub_9_1_offset.h" |
| 36 | #include "mmhub/mmhub_9_1_sh_mask.h" | 36 | #include "mmhub/mmhub_9_1_sh_mask.h" |
| 37 | 37 | ||
| 38 | static int vcn_v1_0_start(struct amdgpu_device *adev); | ||
| 39 | static int vcn_v1_0_stop(struct amdgpu_device *adev); | 38 | static int vcn_v1_0_stop(struct amdgpu_device *adev); |
| 40 | static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); | 39 | static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); |
| 41 | static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); | 40 | static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); |
| @@ -146,10 +145,6 @@ static int vcn_v1_0_hw_init(void *handle) | |||
| 146 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; | 145 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; |
| 147 | int i, r; | 146 | int i, r; |
| 148 | 147 | ||
| 149 | r = vcn_v1_0_start(adev); | ||
| 150 | if (r) | ||
| 151 | goto done; | ||
| 152 | |||
| 153 | ring->ready = true; | 148 | ring->ready = true; |
| 154 | r = amdgpu_ring_test_ring(ring); | 149 | r = amdgpu_ring_test_ring(ring); |
| 155 | if (r) { | 150 | if (r) { |
| @@ -185,11 +180,9 @@ static int vcn_v1_0_hw_fini(void *handle) | |||
| 185 | { | 180 | { |
| 186 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 181 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 187 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; | 182 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; |
| 188 | int r; | ||
| 189 | 183 | ||
| 190 | r = vcn_v1_0_stop(adev); | 184 | if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) |
| 191 | if (r) | 185 | vcn_v1_0_stop(adev); |
| 192 | return r; | ||
| 193 | 186 | ||
| 194 | ring->ready = false; | 187 | ring->ready = false; |
| 195 | 188 | ||
| @@ -288,14 +281,14 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) | |||
| 288 | * | 281 | * |
| 289 | * Disable clock gating for VCN block | 282 | * Disable clock gating for VCN block |
| 290 | */ | 283 | */ |
| 291 | static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) | 284 | static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) |
| 292 | { | 285 | { |
| 293 | uint32_t data; | 286 | uint32_t data; |
| 294 | 287 | ||
| 295 | /* JPEG disable CGC */ | 288 | /* JPEG disable CGC */ |
| 296 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); | 289 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); |
| 297 | 290 | ||
| 298 | if (sw) | 291 | if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) |
| 299 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | 292 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
| 300 | else | 293 | else |
| 301 | data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; | 294 | data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; |
| @@ -310,7 +303,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) | |||
| 310 | 303 | ||
| 311 | /* UVD disable CGC */ | 304 | /* UVD disable CGC */ |
| 312 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); | 305 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); |
| 313 | if (sw) | 306 | if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) |
| 314 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | 307 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
| 315 | else | 308 | else |
| 316 | data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; | 309 | data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; |
| @@ -415,13 +408,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) | |||
| 415 | * | 408 | * |
| 416 | * Enable clock gating for VCN block | 409 | * Enable clock gating for VCN block |
| 417 | */ | 410 | */ |
| 418 | static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) | 411 | static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev) |
| 419 | { | 412 | { |
| 420 | uint32_t data = 0; | 413 | uint32_t data = 0; |
| 421 | 414 | ||
| 422 | /* enable JPEG CGC */ | 415 | /* enable JPEG CGC */ |
| 423 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); | 416 | data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); |
| 424 | if (sw) | 417 | if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) |
| 425 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | 418 | data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
| 426 | else | 419 | else |
| 427 | data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | 420 | data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
| @@ -435,7 +428,7 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) | |||
| 435 | 428 | ||
| 436 | /* enable UVD CGC */ | 429 | /* enable UVD CGC */ |
| 437 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); | 430 | data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); |
| 438 | if (sw) | 431 | if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) |
| 439 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | 432 | data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
| 440 | else | 433 | else |
| 441 | data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; | 434 | data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; |
| @@ -480,6 +473,94 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) | |||
| 480 | WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); | 473 | WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); |
| 481 | } | 474 | } |
| 482 | 475 | ||
| 476 | static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) | ||
| 477 | { | ||
| 478 | uint32_t data = 0; | ||
| 479 | int ret; | ||
| 480 | |||
| 481 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { | ||
| 482 | data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT | ||
| 483 | | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT | ||
| 484 | | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT | ||
| 485 | | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT | ||
| 486 | | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT | ||
| 487 | | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT | ||
| 488 | | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT | ||
| 489 | | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT | ||
| 490 | | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT | ||
| 491 | | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT | ||
| 492 | | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); | ||
| 493 | |||
| 494 | WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); | ||
| 495 | SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret); | ||
| 496 | } else { | ||
| 497 | data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT | ||
| 498 | | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT | ||
| 499 | | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT | ||
| 500 | | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT | ||
| 501 | | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT | ||
| 502 | | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT | ||
| 503 | | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT | ||
| 504 | | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT | ||
| 505 | | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT | ||
| 506 | | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT | ||
| 507 | | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); | ||
| 508 | WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); | ||
| 509 | SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret); | ||
| 510 | } | ||
| 511 | |||
| 512 | /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */ | ||
| 513 | |||
| 514 | data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); | ||
| 515 | data &= ~0x103; | ||
| 516 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN) | ||
| 517 | data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK; | ||
| 518 | |||
| 519 | WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); | ||
| 520 | } | ||
| 521 | |||
| 522 | static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) | ||
| 523 | { | ||
| 524 | uint32_t data = 0; | ||
| 525 | int ret; | ||
| 526 | |||
| 527 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { | ||
| 528 | /* Before power off, this indicator has to be turned on */ | ||
| 529 | data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); | ||
| 530 | data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; | ||
| 531 | data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; | ||
| 532 | WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); | ||
| 533 | |||
| 534 | |||
| 535 | data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT | ||
| 536 | | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT | ||
| 537 | | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT | ||
| 538 | | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT | ||
| 539 | | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT | ||
| 540 | | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT | ||
| 541 | | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT | ||
| 542 | | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT | ||
| 543 | | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT | ||
| 544 | | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT | ||
| 545 | | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); | ||
| 546 | |||
| 547 | WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); | ||
| 548 | |||
| 549 | data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT | ||
| 550 | | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT | ||
| 551 | | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT | ||
| 552 | | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT | ||
| 553 | | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT | ||
| 554 | | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT | ||
| 555 | | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT | ||
| 556 | | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT | ||
| 557 | | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT | ||
| 558 | | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT | ||
| 559 | | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT); | ||
| 560 | SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret); | ||
| 561 | } | ||
| 562 | } | ||
| 563 | |||
| 483 | /** | 564 | /** |
| 484 | * vcn_v1_0_start - start VCN block | 565 | * vcn_v1_0_start - start VCN block |
| 485 | * | 566 | * |
| @@ -499,8 +580,9 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) | |||
| 499 | 580 | ||
| 500 | vcn_v1_0_mc_resume(adev); | 581 | vcn_v1_0_mc_resume(adev); |
| 501 | 582 | ||
| 583 | vcn_1_0_disable_static_power_gating(adev); | ||
| 502 | /* disable clock gating */ | 584 | /* disable clock gating */ |
| 503 | vcn_v1_0_disable_clock_gating(adev, true); | 585 | vcn_v1_0_disable_clock_gating(adev); |
| 504 | 586 | ||
| 505 | /* disable interupt */ | 587 | /* disable interupt */ |
| 506 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, | 588 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, |
| @@ -680,16 +762,45 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) | |||
| 680 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, | 762 | WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, |
| 681 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); | 763 | ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); |
| 682 | 764 | ||
| 683 | /* enable clock gating */ | 765 | WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); |
| 684 | vcn_v1_0_enable_clock_gating(adev, true); | ||
| 685 | 766 | ||
| 767 | vcn_v1_0_enable_clock_gating(adev); | ||
| 768 | vcn_1_0_enable_static_power_gating(adev); | ||
| 686 | return 0; | 769 | return 0; |
| 687 | } | 770 | } |
| 688 | 771 | ||
| 772 | bool vcn_v1_0_is_idle(void *handle) | ||
| 773 | { | ||
| 774 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 775 | |||
| 776 | return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2); | ||
| 777 | } | ||
| 778 | |||
| 779 | int vcn_v1_0_wait_for_idle(void *handle) | ||
| 780 | { | ||
| 781 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 782 | int ret = 0; | ||
| 783 | |||
| 784 | SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret); | ||
| 785 | |||
| 786 | return ret; | ||
| 787 | } | ||
| 788 | |||
| 689 | static int vcn_v1_0_set_clockgating_state(void *handle, | 789 | static int vcn_v1_0_set_clockgating_state(void *handle, |
| 690 | enum amd_clockgating_state state) | 790 | enum amd_clockgating_state state) |
| 691 | { | 791 | { |
| 692 | /* needed for driver unload*/ | 792 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 793 | bool enable = (state == AMD_CG_STATE_GATE) ? true : false; | ||
| 794 | |||
| 795 | if (enable) { | ||
| 796 | /* wait for STATUS to clear */ | ||
| 797 | if (vcn_v1_0_is_idle(handle)) | ||
| 798 | return -EBUSY; | ||
| 799 | vcn_v1_0_enable_clock_gating(adev); | ||
| 800 | } else { | ||
| 801 | /* disable HW gating and enable Sw gating */ | ||
| 802 | vcn_v1_0_disable_clock_gating(adev); | ||
| 803 | } | ||
| 693 | return 0; | 804 | return 0; |
| 694 | } | 805 | } |
| 695 | 806 | ||
| @@ -1048,16 +1159,36 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, | |||
| 1048 | return 0; | 1159 | return 0; |
| 1049 | } | 1160 | } |
| 1050 | 1161 | ||
| 1051 | static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | 1162 | static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
| 1052 | { | 1163 | { |
| 1053 | int i; | ||
| 1054 | struct amdgpu_device *adev = ring->adev; | 1164 | struct amdgpu_device *adev = ring->adev; |
| 1165 | int i; | ||
| 1055 | 1166 | ||
| 1056 | for (i = 0; i < count; i++) | 1167 | WARN_ON(ring->wptr % 2 || count % 2); |
| 1057 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); | ||
| 1058 | 1168 | ||
| 1169 | for (i = 0; i < count / 2; i++) { | ||
| 1170 | amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); | ||
| 1171 | amdgpu_ring_write(ring, 0); | ||
| 1172 | } | ||
| 1059 | } | 1173 | } |
| 1060 | 1174 | ||
| 1175 | static int vcn_v1_0_set_powergating_state(void *handle, | ||
| 1176 | enum amd_powergating_state state) | ||
| 1177 | { | ||
| 1178 | /* This doesn't actually powergate the VCN block. | ||
| 1179 | * That's done in the dpm code via the SMC. This | ||
| 1180 | * just re-inits the block as necessary. The actual | ||
| 1181 | * gating still happens in the dpm code. We should | ||
| 1182 | * revisit this when there is a cleaner line between | ||
| 1183 | * the smc and the hw blocks | ||
| 1184 | */ | ||
| 1185 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | ||
| 1186 | |||
| 1187 | if (state == AMD_PG_STATE_GATE) | ||
| 1188 | return vcn_v1_0_stop(adev); | ||
| 1189 | else | ||
| 1190 | return vcn_v1_0_start(adev); | ||
| 1191 | } | ||
| 1061 | 1192 | ||
| 1062 | static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { | 1193 | static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { |
| 1063 | .name = "vcn_v1_0", | 1194 | .name = "vcn_v1_0", |
| @@ -1069,20 +1200,19 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { | |||
| 1069 | .hw_fini = vcn_v1_0_hw_fini, | 1200 | .hw_fini = vcn_v1_0_hw_fini, |
| 1070 | .suspend = vcn_v1_0_suspend, | 1201 | .suspend = vcn_v1_0_suspend, |
| 1071 | .resume = vcn_v1_0_resume, | 1202 | .resume = vcn_v1_0_resume, |
| 1072 | .is_idle = NULL /* vcn_v1_0_is_idle */, | 1203 | .is_idle = vcn_v1_0_is_idle, |
| 1073 | .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */, | 1204 | .wait_for_idle = vcn_v1_0_wait_for_idle, |
| 1074 | .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, | 1205 | .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, |
| 1075 | .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, | 1206 | .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, |
| 1076 | .soft_reset = NULL /* vcn_v1_0_soft_reset */, | 1207 | .soft_reset = NULL /* vcn_v1_0_soft_reset */, |
| 1077 | .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, | 1208 | .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, |
| 1078 | .set_clockgating_state = vcn_v1_0_set_clockgating_state, | 1209 | .set_clockgating_state = vcn_v1_0_set_clockgating_state, |
| 1079 | .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */, | 1210 | .set_powergating_state = vcn_v1_0_set_powergating_state, |
| 1080 | }; | 1211 | }; |
| 1081 | 1212 | ||
| 1082 | static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { | 1213 | static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { |
| 1083 | .type = AMDGPU_RING_TYPE_VCN_DEC, | 1214 | .type = AMDGPU_RING_TYPE_VCN_DEC, |
| 1084 | .align_mask = 0xf, | 1215 | .align_mask = 0xf, |
| 1085 | .nop = PACKET0(0x81ff, 0), | ||
| 1086 | .support_64bit_ptrs = false, | 1216 | .support_64bit_ptrs = false, |
| 1087 | .vmhub = AMDGPU_MMHUB, | 1217 | .vmhub = AMDGPU_MMHUB, |
| 1088 | .get_rptr = vcn_v1_0_dec_ring_get_rptr, | 1218 | .get_rptr = vcn_v1_0_dec_ring_get_rptr, |
| @@ -1101,7 +1231,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { | |||
| 1101 | .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, | 1231 | .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, |
| 1102 | .test_ring = amdgpu_vcn_dec_ring_test_ring, | 1232 | .test_ring = amdgpu_vcn_dec_ring_test_ring, |
| 1103 | .test_ib = amdgpu_vcn_dec_ring_test_ib, | 1233 | .test_ib = amdgpu_vcn_dec_ring_test_ib, |
| 1104 | .insert_nop = vcn_v1_0_ring_insert_nop, | 1234 | .insert_nop = vcn_v1_0_dec_ring_insert_nop, |
| 1105 | .insert_start = vcn_v1_0_dec_ring_insert_start, | 1235 | .insert_start = vcn_v1_0_dec_ring_insert_start, |
| 1106 | .insert_end = vcn_v1_0_dec_ring_insert_end, | 1236 | .insert_end = vcn_v1_0_dec_ring_insert_end, |
| 1107 | .pad_ib = amdgpu_ring_generic_pad_ib, | 1237 | .pad_ib = amdgpu_ring_generic_pad_ib, |
| @@ -1109,6 +1239,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { | |||
| 1109 | .end_use = amdgpu_vcn_ring_end_use, | 1239 | .end_use = amdgpu_vcn_ring_end_use, |
| 1110 | .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, | 1240 | .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, |
| 1111 | .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, | 1241 | .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, |
| 1242 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | ||
| 1112 | }; | 1243 | }; |
| 1113 | 1244 | ||
| 1114 | static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { | 1245 | static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { |
| @@ -1139,6 +1270,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { | |||
| 1139 | .end_use = amdgpu_vcn_ring_end_use, | 1270 | .end_use = amdgpu_vcn_ring_end_use, |
| 1140 | .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, | 1271 | .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, |
| 1141 | .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, | 1272 | .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, |
| 1273 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | ||
| 1142 | }; | 1274 | }; |
| 1143 | 1275 | ||
| 1144 | static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) | 1276 | static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c new file mode 100644 index 000000000000..52778de93ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
| 3 | * | ||
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 5 | * copy of this software and associated documentation files (the "Software"), | ||
| 6 | * to deal in the Software without restriction, including without limitation | ||
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 9 | * Software is furnished to do so, subject to the following conditions: | ||
| 10 | * | ||
| 11 | * The above copyright notice and this permission notice shall be included in | ||
| 12 | * all copies or substantial portions of the Software. | ||
| 13 | * | ||
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
| 21 | * | ||
| 22 | */ | ||
| 23 | #include "amdgpu.h" | ||
| 24 | #include "soc15.h" | ||
| 25 | |||
| 26 | #include "soc15_common.h" | ||
| 27 | #include "soc15_hw_ip.h" | ||
| 28 | #include "vega20_ip_offset.h" | ||
| 29 | |||
| 30 | int vega20_reg_base_init(struct amdgpu_device *adev) | ||
| 31 | { | ||
| 32 | /* HW has more IP blocks, only initialized the blocke beend by our driver */ | ||
| 33 | uint32_t i; | ||
| 34 | for (i = 0 ; i < MAX_INSTANCE ; ++i) { | ||
| 35 | adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); | ||
| 36 | adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); | ||
| 37 | adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); | ||
| 38 | adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); | ||
| 39 | adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); | ||
| 40 | adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); | ||
| 41 | adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); | ||
| 42 | adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); | ||
| 43 | adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); | ||
| 44 | adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i])); | ||
| 45 | adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); | ||
| 46 | adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); | ||
| 47 | adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); | ||
| 48 | adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); | ||
| 49 | } | ||
| 50 | return 0; | ||
| 51 | } | ||
| 52 | |||
| 53 | |||
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 126f1276d347..4ac1288ab7df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c | |||
| @@ -305,9 +305,10 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) | |||
| 305 | stoney_mgcg_cgcg_init, | 305 | stoney_mgcg_cgcg_init, |
| 306 | ARRAY_SIZE(stoney_mgcg_cgcg_init)); | 306 | ARRAY_SIZE(stoney_mgcg_cgcg_init)); |
| 307 | break; | 307 | break; |
| 308 | case CHIP_POLARIS11: | ||
| 309 | case CHIP_POLARIS10: | 308 | case CHIP_POLARIS10: |
| 309 | case CHIP_POLARIS11: | ||
| 310 | case CHIP_POLARIS12: | 310 | case CHIP_POLARIS12: |
| 311 | case CHIP_VEGAM: | ||
| 311 | default: | 312 | default: |
| 312 | break; | 313 | break; |
| 313 | } | 314 | } |
| @@ -728,33 +729,59 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, | |||
| 728 | return r; | 729 | return r; |
| 729 | 730 | ||
| 730 | tmp = RREG32_SMC(cntl_reg); | 731 | tmp = RREG32_SMC(cntl_reg); |
| 731 | tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | | 732 | |
| 732 | CG_DCLK_CNTL__DCLK_DIVIDER_MASK); | 733 | if (adev->flags & AMD_IS_APU) |
| 734 | tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK; | ||
| 735 | else | ||
| 736 | tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | | ||
| 737 | CG_DCLK_CNTL__DCLK_DIVIDER_MASK); | ||
| 733 | tmp |= dividers.post_divider; | 738 | tmp |= dividers.post_divider; |
| 734 | WREG32_SMC(cntl_reg, tmp); | 739 | WREG32_SMC(cntl_reg, tmp); |
| 735 | 740 | ||
| 736 | for (i = 0; i < 100; i++) { | 741 | for (i = 0; i < 100; i++) { |
| 737 | if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK) | 742 | tmp = RREG32_SMC(status_reg); |
| 738 | break; | 743 | if (adev->flags & AMD_IS_APU) { |
| 744 | if (tmp & 0x10000) | ||
| 745 | break; | ||
| 746 | } else { | ||
| 747 | if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK) | ||
| 748 | break; | ||
| 749 | } | ||
| 739 | mdelay(10); | 750 | mdelay(10); |
| 740 | } | 751 | } |
| 741 | if (i == 100) | 752 | if (i == 100) |
| 742 | return -ETIMEDOUT; | 753 | return -ETIMEDOUT; |
| 743 | |||
| 744 | return 0; | 754 | return 0; |
| 745 | } | 755 | } |
| 746 | 756 | ||
| 757 | #define ixGNB_CLK1_DFS_CNTL 0xD82200F0 | ||
| 758 | #define ixGNB_CLK1_STATUS 0xD822010C | ||
| 759 | #define ixGNB_CLK2_DFS_CNTL 0xD8220110 | ||
| 760 | #define ixGNB_CLK2_STATUS 0xD822012C | ||
| 761 | #define ixGNB_CLK3_DFS_CNTL 0xD8220130 | ||
| 762 | #define ixGNB_CLK3_STATUS 0xD822014C | ||
| 763 | |||
| 747 | static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) | 764 | static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) |
| 748 | { | 765 | { |
| 749 | int r; | 766 | int r; |
| 750 | 767 | ||
| 751 | r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); | 768 | if (adev->flags & AMD_IS_APU) { |
| 752 | if (r) | 769 | r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS); |
| 753 | return r; | 770 | if (r) |
| 771 | return r; | ||
| 754 | 772 | ||
| 755 | r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); | 773 | r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS); |
| 756 | if (r) | 774 | if (r) |
| 757 | return r; | 775 | return r; |
| 776 | } else { | ||
| 777 | r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); | ||
| 778 | if (r) | ||
| 779 | return r; | ||
| 780 | |||
| 781 | r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); | ||
| 782 | if (r) | ||
| 783 | return r; | ||
| 784 | } | ||
| 758 | 785 | ||
| 759 | return 0; | 786 | return 0; |
| 760 | } | 787 | } |
| @@ -764,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) | |||
| 764 | int r, i; | 791 | int r, i; |
| 765 | struct atom_clock_dividers dividers; | 792 | struct atom_clock_dividers dividers; |
| 766 | u32 tmp; | 793 | u32 tmp; |
| 794 | u32 reg_ctrl; | ||
| 795 | u32 reg_status; | ||
| 796 | u32 status_mask; | ||
| 797 | u32 reg_mask; | ||
| 798 | |||
| 799 | if (adev->flags & AMD_IS_APU) { | ||
| 800 | reg_ctrl = ixGNB_CLK3_DFS_CNTL; | ||
| 801 | reg_status = ixGNB_CLK3_STATUS; | ||
| 802 | status_mask = 0x00010000; | ||
| 803 | reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK; | ||
| 804 | } else { | ||
| 805 | reg_ctrl = ixCG_ECLK_CNTL; | ||
| 806 | reg_status = ixCG_ECLK_STATUS; | ||
| 807 | status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK; | ||
| 808 | reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK; | ||
| 809 | } | ||
| 767 | 810 | ||
| 768 | r = amdgpu_atombios_get_clock_dividers(adev, | 811 | r = amdgpu_atombios_get_clock_dividers(adev, |
| 769 | COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, | 812 | COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, |
| @@ -772,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) | |||
| 772 | return r; | 815 | return r; |
| 773 | 816 | ||
| 774 | for (i = 0; i < 100; i++) { | 817 | for (i = 0; i < 100; i++) { |
| 775 | if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) | 818 | if (RREG32_SMC(reg_status) & status_mask) |
| 776 | break; | 819 | break; |
| 777 | mdelay(10); | 820 | mdelay(10); |
| 778 | } | 821 | } |
| 822 | |||
| 779 | if (i == 100) | 823 | if (i == 100) |
| 780 | return -ETIMEDOUT; | 824 | return -ETIMEDOUT; |
| 781 | 825 | ||
| 782 | tmp = RREG32_SMC(ixCG_ECLK_CNTL); | 826 | tmp = RREG32_SMC(reg_ctrl); |
| 783 | tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | | 827 | tmp &= ~reg_mask; |
| 784 | CG_ECLK_CNTL__ECLK_DIVIDER_MASK); | ||
| 785 | tmp |= dividers.post_divider; | 828 | tmp |= dividers.post_divider; |
| 786 | WREG32_SMC(ixCG_ECLK_CNTL, tmp); | 829 | WREG32_SMC(reg_ctrl, tmp); |
| 787 | 830 | ||
| 788 | for (i = 0; i < 100; i++) { | 831 | for (i = 0; i < 100; i++) { |
| 789 | if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) | 832 | if (RREG32_SMC(reg_status) & status_mask) |
| 790 | break; | 833 | break; |
| 791 | mdelay(10); | 834 | mdelay(10); |
| 792 | } | 835 | } |
| 836 | |||
| 793 | if (i == 100) | 837 | if (i == 100) |
| 794 | return -ETIMEDOUT; | 838 | return -ETIMEDOUT; |
| 795 | 839 | ||
| @@ -876,6 +920,27 @@ static void vi_invalidate_hdp(struct amdgpu_device *adev, | |||
| 876 | } | 920 | } |
| 877 | } | 921 | } |
| 878 | 922 | ||
| 923 | static bool vi_need_full_reset(struct amdgpu_device *adev) | ||
| 924 | { | ||
| 925 | switch (adev->asic_type) { | ||
| 926 | case CHIP_CARRIZO: | ||
| 927 | case CHIP_STONEY: | ||
| 928 | /* CZ has hang issues with full reset at the moment */ | ||
| 929 | return false; | ||
| 930 | case CHIP_FIJI: | ||
| 931 | case CHIP_TONGA: | ||
| 932 | /* XXX: soft reset should work on fiji and tonga */ | ||
| 933 | return true; | ||
| 934 | case CHIP_POLARIS10: | ||
| 935 | case CHIP_POLARIS11: | ||
| 936 | case CHIP_POLARIS12: | ||
| 937 | case CHIP_TOPAZ: | ||
| 938 | default: | ||
| 939 | /* change this when we support soft reset */ | ||
| 940 | return true; | ||
| 941 | } | ||
| 942 | } | ||
| 943 | |||
| 879 | static const struct amdgpu_asic_funcs vi_asic_funcs = | 944 | static const struct amdgpu_asic_funcs vi_asic_funcs = |
| 880 | { | 945 | { |
| 881 | .read_disabled_bios = &vi_read_disabled_bios, | 946 | .read_disabled_bios = &vi_read_disabled_bios, |
| @@ -889,6 +954,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = | |||
| 889 | .get_config_memsize = &vi_get_config_memsize, | 954 | .get_config_memsize = &vi_get_config_memsize, |
| 890 | .flush_hdp = &vi_flush_hdp, | 955 | .flush_hdp = &vi_flush_hdp, |
| 891 | .invalidate_hdp = &vi_invalidate_hdp, | 956 | .invalidate_hdp = &vi_invalidate_hdp, |
| 957 | .need_full_reset = &vi_need_full_reset, | ||
| 892 | }; | 958 | }; |
| 893 | 959 | ||
| 894 | #define CZ_REV_BRISTOL(rev) \ | 960 | #define CZ_REV_BRISTOL(rev) \ |
| @@ -1031,6 +1097,30 @@ static int vi_common_early_init(void *handle) | |||
| 1031 | adev->pg_flags = 0; | 1097 | adev->pg_flags = 0; |
| 1032 | adev->external_rev_id = adev->rev_id + 0x64; | 1098 | adev->external_rev_id = adev->rev_id + 0x64; |
| 1033 | break; | 1099 | break; |
| 1100 | case CHIP_VEGAM: | ||
| 1101 | adev->cg_flags = 0; | ||
| 1102 | /*AMD_CG_SUPPORT_GFX_MGCG | | ||
| 1103 | AMD_CG_SUPPORT_GFX_RLC_LS | | ||
| 1104 | AMD_CG_SUPPORT_GFX_CP_LS | | ||
| 1105 | AMD_CG_SUPPORT_GFX_CGCG | | ||
| 1106 | AMD_CG_SUPPORT_GFX_CGLS | | ||
| 1107 | AMD_CG_SUPPORT_GFX_3D_CGCG | | ||
| 1108 | AMD_CG_SUPPORT_GFX_3D_CGLS | | ||
| 1109 | AMD_CG_SUPPORT_SDMA_MGCG | | ||
| 1110 | AMD_CG_SUPPORT_SDMA_LS | | ||
| 1111 | AMD_CG_SUPPORT_BIF_MGCG | | ||
| 1112 | AMD_CG_SUPPORT_BIF_LS | | ||
| 1113 | AMD_CG_SUPPORT_HDP_MGCG | | ||
| 1114 | AMD_CG_SUPPORT_HDP_LS | | ||
| 1115 | AMD_CG_SUPPORT_ROM_MGCG | | ||
| 1116 | AMD_CG_SUPPORT_MC_MGCG | | ||
| 1117 | AMD_CG_SUPPORT_MC_LS | | ||
| 1118 | AMD_CG_SUPPORT_DRM_LS | | ||
| 1119 | AMD_CG_SUPPORT_UVD_MGCG | | ||
| 1120 | AMD_CG_SUPPORT_VCE_MGCG;*/ | ||
| 1121 | adev->pg_flags = 0; | ||
| 1122 | adev->external_rev_id = adev->rev_id + 0x6E; | ||
| 1123 | break; | ||
| 1034 | case CHIP_CARRIZO: | 1124 | case CHIP_CARRIZO: |
| 1035 | adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | | 1125 | adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | |
| 1036 | AMD_CG_SUPPORT_GFX_MGCG | | 1126 | AMD_CG_SUPPORT_GFX_MGCG | |
| @@ -1422,6 +1512,7 @@ static int vi_common_set_clockgating_state(void *handle, | |||
| 1422 | case CHIP_POLARIS10: | 1512 | case CHIP_POLARIS10: |
| 1423 | case CHIP_POLARIS11: | 1513 | case CHIP_POLARIS11: |
| 1424 | case CHIP_POLARIS12: | 1514 | case CHIP_POLARIS12: |
| 1515 | case CHIP_VEGAM: | ||
| 1425 | vi_common_set_clockgating_state_by_smu(adev, state); | 1516 | vi_common_set_clockgating_state_by_smu(adev, state); |
| 1426 | default: | 1517 | default: |
| 1427 | break; | 1518 | break; |
| @@ -1551,9 +1642,10 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) | |||
| 1551 | amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); | 1642 | amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); |
| 1552 | } | 1643 | } |
| 1553 | break; | 1644 | break; |
| 1554 | case CHIP_POLARIS11: | ||
| 1555 | case CHIP_POLARIS10: | 1645 | case CHIP_POLARIS10: |
| 1646 | case CHIP_POLARIS11: | ||
| 1556 | case CHIP_POLARIS12: | 1647 | case CHIP_POLARIS12: |
| 1648 | case CHIP_VEGAM: | ||
| 1557 | amdgpu_device_ip_block_add(adev, &vi_common_ip_block); | 1649 | amdgpu_device_ip_block_add(adev, &vi_common_ip_block); |
| 1558 | amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); | 1650 | amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); |
| 1559 | amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); | 1651 | amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); |
